current = batch[partition * batchSize:(partition + 1) * batchSize] _, _, _, newLoss, newWords, lossWords, lossPOS = doForwardPass( current, train=False) devLoss += newLoss devWords += newWords devLossWords += lossWords devLossPOS += lossPOS return devLoss / devWords, devLossWords / devWords, devLossPOS / devWords while True: # corpus = getNextSentence("train") corpus = CorpusIteratorFuncHead(language) corpus.permute() corpus = corpus.iterator(rejectShortSentences=True) while True: try: batch = map(lambda x: next(corpus), 10 * range(batchSize)) except StopIteration: break batch = sorted(batch, key=len) partitions = range(10) shuffle(partitions) for partition in partitions: counter += 1 printHere = (counter % 100 == 0) current = batch[partition * batchSize:(partition + 1) * batchSize] loss, baselineLoss, policy_related_loss, _, wordNumInPass, lossWords, lossPOS = doForwardPass(
if wordNum > 0: crossEntropy = 0.99 * crossEntropy + 0.01 * (totalDepLength/wordNum) else: assert totalDepLength == 0 numberOfWords = wordNum return (totalDepLength, numberOfWords, byType) assert batchSize == 1 depLengths = [] if True: corpus = CorpusIteratorFuncHead(args.language,"train") corpusIterator = corpus.iterator() if corpus.length() == 0: quit() while True: try: batch = [next(corpusIterator)] except StopIteration: break partitions = range(1) for partition in partitions: counter += 1 printHere = (counter % 200 == 0) current = batch[partition*batchSize:(partition+1)*batchSize] if len(current) == 0: continue
lr_lm = 0.1 crossEntropy = 10.0 def encodeWord(w): return stoi[w] + 3 if stoi[w] < vocab_size else 1 import torch.nn.functional counter = 0 while True: corpus = CorpusIterator(args.language, partition="together") corpus.permute() corpus = corpus.iterator(rejectShortSentences=False) for current in corpus: if counter > 50000000: print("Quitting at counter " + str(counter)) quit() counter += 1 printHere = (counter % 50 == 0) current = [current] batchOrdered, logits = orderSentence(current[0], dhLogits, printHere) metadata = current[0][1] maxLength = len(batchOrdered) batchOrdered = [batchOrdered] if maxLength <= 2:
# print(chart[0][-1]) fullProb = log(sum([exp(x) if x is not None else 0 for x in chart[0][-1]])) goldProb = goldProbability conditional = (fullProb - goldProb) return conditional, len(batchOrdered[0]), fullProb, goldProb corpusDev = CorpusIteratorFuncHead(language, "dev") conditionalTotal = 0 marginalTotal = 0 goldTotal = 0 lengthTotal = 0 for i, sentence in enumerate(corpusDev.iterator(rejectShortSentences=True)): conditional, length, marginal, gold = forward([sentence]) conditionalTotal += conditional marginalTotal += marginal goldTotal += gold lengthTotal += length print(language, i, conditionalTotal / lengthTotal, marginalTotal / lengthTotal, goldTotal / lengthTotal) if i > 500: break with open( "/u/scr/mhahn/cky/" + __file__ + "_" + language + "_" + model + "_" + BASE_DIR + ".txt", "w") as outFile: print >> outFile, conditionalTotal / lengthTotal print >> outFile, marginalTotal / lengthTotal print >> outFile, goldTotal / lengthTotal