def trainWord2Vec(latentFactorN = 10, weightSave = './TrainingTemp/Word2vecWeight.npy', epoch=20): ''' Train a word2vec model based on Shakespear's sonnet, and save it's weight in TrainingTemp folder ''' encodedSonnets, encodedSyllaDict, code2word, punc2code = encodedShake() X, y = gnerateEmbedTrain(encodedSonnets) model = Sequential() model.add(Dense(units=latentFactorN, input_dim=len(code2word))) model.add(Dense(units=len(code2word), activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary() model.fit(to_categorical(X), to_categorical(y), epochs=epoch, verbose=1) weights = model.layers[0].get_weights()[0] weights = weights / np.max(np.abs(weights)) np.save(weightSave, weights) return weights
import numpy as np from HMM_fast import HiddenMarkovModel, unsupervised_HMM from DataProcessing import encodedShake, Convert2SonnetNaive if __name__ == '__main__': encodedSonnets, encodedSyllaDict, code2word, punc2code = encodedShake() HMmodel = unsupervised_HMM(encodedSonnets, 40, 200) print(Convert2SonnetNaive(encodedSonnets[0], code2word)[0]) print(Convert2SonnetNaive(encodedSonnets[0], code2word)[1]) sonnet = HMmodel.generate_emission(300)[0] print(Convert2SonnetNaive(sonnet, code2word)[1]) np.save('./TrainingTemp/HMM.A.npy', HMmodel.A) np.save('./TrainingTemp/HMM.O.npy', HMmodel.O)