class CorpusIteratorFuncHead_V():
    def __init__(self,
                 language,
                 partition="train",
                 storeMorph=False,
                 splitLemmas=False,
                 shuffleDataSeed=None):
        self.basis = CorpusIterator_V(language,
                                      partition=partition,
                                      storeMorph=storeMorph,
                                      splitLemmas=splitLemmas,
                                      shuffleDataSeed=shuffleDataSeed)

    def permute(self):
        self.basis.permute()

    def length(self):
        return self.basis.length()

    def iterator(self, rejectShortSentences=False):
        iterator = self.basis.iterator(
            rejectShortSentences=rejectShortSentences)
        for sentence in iterator:
            reverse_content_head(sentence)
            yield sentence

    def getSentence(self, index):
        return reverse_content_head(self.basis.getSentence(index))
class CorpusIteratorFuncHeadFraction_V():
   def __init__(self, language, partition="train", fraction=1.0, storeMorph=False, splitLemmas=False):
      self.basis = CorpusIterator_V(language, partition=partition, storeMorph=storeMorph, splitLemmas=splitLemmas, shuffleDataSeed=4)
      self.basis.data = self.basis.data[:int(fraction*len(self.basis.data))]
      self.permute()
      self.fraction = fraction
   def permute(self):
      self.basis.permute()
   def length(self):
      return self.basis.length()
   def iterator(self, rejectShortSentences = False):
     iterator = self.basis.iterator(rejectShortSentences=rejectShortSentences)
     counter = 0
     print("Actual length", self.length())
     for sentence in iterator:
#         if counter > self.fraction * self.length():
 #           break
  #          counter += 1
         reverse_content_head(sentence)
         yield sentence
   def getSentence(self, index):
      return reverse_content_head(self.basis.getSentence(index))
Example #3
0
        devWords += newWords
        if printHere:
            print "Dev examples " + str(devCounter)
        devCounterTimesBatchSize += devBatchSize
    devSurprisalTableHere = [
        surp / (devCounterTimesBatchSize) for surp in surprisalTable
    ]
    return devLoss / devWords, devSurprisalTableHere

DEV_PERIOD = 5000
epochCount = 0
corpusBase = CorpusIterator_V(language, storeMorph=True)
while failedDevRuns == 0:
    epochCount += 1
    print "Starting new epoch, permuting corpus"
    corpusBase.permute()
    #  corpus = getNextSentence("train")
    corpus = corpusBase.iterator(rejectShortSentences=False)
    stream = createStream(corpus)

    if counter > 5:
        #       if counter % DEV_PERIOD == 0:
        newDevLoss, devSurprisalTableHere = computeDevLoss()
        #             devLosses.append(
        devLosses.append(newDevLoss)
        print "New dev loss " + str(newDevLoss) + ". previous was: " + str(
            lastDevLoss)
        if newDevLoss > 15 or len(devLosses) > 99:
            print "Abort, training too slow?"
            devLosses.append(newDevLoss + 0.001)
    optim.step()

print(docs)
times = {'Graal_1225_prose': 1225, 'Aucassin_early13_verse-prose': 1210, 'QuatreLivresReis_late12_prose': 1180, 'TroyesYvain_1180_verse': 1180, 'Roland_1100_verse': 1100, 'BeroulTristan_late12_verse': 1180, 'StLegier_1000_verse': 1000, 'StAlexis_1050_verse': 1050, 'Strasbourg_842_prose': 842, 'Lapidaire_mid12_prose': 1150}

timesVector = (torch.FloatTensor([times[itos_docs[i]] for i in range(len(docs))])/100.0)

#covarianceMatrix = 

print(timesVector)

print(times)
quit()

for epoch in range(100):
   corpus.permute()
   for q in range(corpus.length()):
      loss = forward(corpus, q)
      backward(loss)


   if epoch % 1 == 0:
       print("Saving")
       save_path = "../raw-results/"
       #save_path = "/afs/cs.stanford.edu/u/mhahn/scr/deps/"
       with open(save_path+"/manual_output_ground_coarse/"+args.language+"_"+__file__+"_model_"+str(myID)+".tsv", "w") as outFile:
          print("\t".join(list(map(str,["Epochs", "DH_Mean_NoPunct", "DH_Sigma_NoPunct", "Distance_Mean_NoPunct", "Distance_Sigma_NoPunct", "Dependency"]))), file=outFile)
          dh_numpy = pyro.get_param_store().get_param("mu_DH").data.numpy()
          dh_sigma_numpy = pyro.get_param_store().get_param("sigma_DH").data.numpy()
          dist_numpy = pyro.get_param_store().get_param("mu_Dist").data.numpy()
          dist_sigma_numpy = pyro.get_param_store().get_param("sigma_Dist").data.numpy()