def testTrain (): print ("Starting Test") np.random.seed(10) print ("Starting Tokenization") t = Tokenizer(vocabSize=15000) print ("Tokenizer Complete") vocabSize = t.getVocabSize() print ("Vocab Size: " + str(vocabSize)) xTrain, yTrain = t.getData() print ("Constructing Model") model = RNN(vocabSize) print ("Starting Timer") start = time.clock() model.sgdStep(xTrain[10], yTrain[10], .005) end = time.clock() print ("One Step Time: " + str(end-start)) print ("Starting Training") reset = open ("Data/Log.txt", "w") reset.write("") losses = trainWithSGD(model,xTrain, yTrain, cycles=50, evalAfterLoss=1) save("Data/Fakespeare.npz", model)
def __init__(self, fileName): t = Tokenizer() self.wordToInd = t.getWordToInd() self.indexToWord = t.getIndToWord() self.model = RNN(t.getVocabSize()) load(fileName, self.model)