def computeDevLoss(): global printHere # global counter # global devSurprisalTable devLoss = 0.0 devWords = 0 # corpusDev = getNextSentence("valid") corpusDev = corpusIteratorWikiWords.dev(args.language) stream = prepareDatasetChunks(corpusDev, train=False) surprisalTable = [0 for _ in range(args.horizon)] devCounter = 0 devMemory = 0 while True: devCounter += 1 printHere = (devCounter % 50 == 0) try: with torch.no_grad(): _, _, _, newLoss, newWords, devMemoryHere = forward( next(stream), surprisalTable=surprisalTable, doDropout=False, batchSizeHere=args.batchSize) except StopIteration: break devMemory += devMemoryHere.data.cpu().numpy() devLoss += newLoss devWords += newWords if printHere: print "Dev examples " + str(devCounter) devSurprisalTableHere = [ surp / (devCounter * args.batchSize) for surp in surprisalTable ] return devLoss / devWords, devSurprisalTableHere, devMemory / devCounter
def computeDevLoss(): global printHere # global counter # global devSurprisalTable devLoss = 0.0 devWords = 0 # corpusDev = getNextSentence("valid") corpusDev = corpusIteratorWikiWords.dev(args.language) stream = createStream(corpusDev) surprisalTable = [0 for _ in range(args.horizon)] devCounter = 0 devMemory = 0 while True: # try: # input_indices, wordStartIndices = next(stream) try: input_indices_list = [] wordStartIndices_list = [] for _ in range(args.batchSize): input_indices, wordStartIndices = next(stream) input_indices_list.append(input_indices) wordStartIndices_list.append(wordStartIndices) except StopIteration: break devCounter += 1 # counter += 1 printHere = (devCounter % 50 == 0) with torch.no_grad(): _, _, _, newLoss, newWords, devMemoryHere = doForwardPass( input_indices_list, wordStartIndices_list, surprisalTable=surprisalTable, doDropout=False, batchSizeHere=args.batchSize) devMemory += devMemoryHere.data.cpu().numpy() devLoss += newLoss devWords += newWords if printHere: print "Dev examples " + str(devCounter) devSurprisalTableHere = [ surp / (devCounter * args.batchSize) for surp in surprisalTable ] return devLoss / devWords, devSurprisalTableHere, devMemory / devCounter
assert False #################################### from torch.autograd import Variable #data = AcqdivReaderPartition(acqdivCorpusReader, partition="train").reshuffledIterator(blankBeforeEOS=False) rnn_drop.train(False) data = corpusIteratorWikiWords.dev(args.language) print("Got data") numeric_with_blanks = [] count = 0 print("Prepare chunks") for chunk in data: for word in chunk: numeric_with_blanks.append(stoi[" "]+3) for char in word: # print((char if char != "\n" else "\\n", stoi[char]+3 if char in stoi else 2)) count += 1 if char not in stoi: print(char)
if (labels[-1] == 1) and relevantNextWords[-1].startswith( relevantWords[-1] ): # this is actually not a hard assertion, it should just be quite unlikely in languages such as English print("WARNING", list(zip(boundaries[j][i:], boundariesAll[j][i:]))) # if len(relevantWords[-1]) > 1: # assert False import time devLosses = [] #for epoch in range(10000): if True: training_data = corpusIteratorWikiWords.dev(args.language, removeMarkup=False) training_data_c = corpusIteratorWiki.dev(args.language, doShuffling=False) print("Got data") training_chars = prepareDatasetChunks(training_data, training_data_c, train=False) rnn_drop.train(False) startTime = time.time() trainChars = 0 counter = 0 while True: counter += 1 try: numeric = [next(training_chars) for _ in range(args.batchSize)]
"components": [c.state_dict() for c in modules] } torch.save( state, "/u/scr/mhahn/CODEBOOKS/" + args.language1 + "AND" + args.language2 + "_" + __file__ + "_code_" + str(args.myID) + ".txt") if (time.time() - totalStartTime) / 60 > 4000: print("Breaking early to get some result within 72 hours") totalStartTime = time.time() break # break rnn_drop.train(False) dev_data_1 = corpusIteratorWikiWords.dev(args.language1) dev_data_2 = corpusIteratorWikiWords.dev(args.language2) print("Got data") dev_chars = prepareDatasetChunksTwo(dev_data_1, dev_data_2, train=False) dev_loss = 0 dev_char_count = 0 counter = 0 hidden, beginning = None, None while True: counter += 1 try: numeric = next(dev_chars) except StopIteration: