class CorpusIteratorFuncHead_V():
    def __init__(self,
                 language,
                 partition="train",
                 storeMorph=False,
                 splitLemmas=False,
                 shuffleDataSeed=None):
        self.basis = CorpusIterator_V(language,
                                      partition=partition,
                                      storeMorph=storeMorph,
                                      splitLemmas=splitLemmas,
                                      shuffleDataSeed=shuffleDataSeed)

    def permute(self):
        self.basis.permute()

    def length(self):
        return self.basis.length()

    def iterator(self, rejectShortSentences=False):
        iterator = self.basis.iterator(
            rejectShortSentences=rejectShortSentences)
        for sentence in iterator:
            reverse_content_head(sentence)
            yield sentence

    def getSentence(self, index):
        return reverse_content_head(self.basis.getSentence(index))
class CorpusIteratorFuncHeadFraction_V():
   def __init__(self, language, partition="train", fraction=1.0, storeMorph=False, splitLemmas=False):
      self.basis = CorpusIterator_V(language, partition=partition, storeMorph=storeMorph, splitLemmas=splitLemmas, shuffleDataSeed=4)
      self.basis.data = self.basis.data[:int(fraction*len(self.basis.data))]
      self.permute()
      self.fraction = fraction
   def permute(self):
      self.basis.permute()
   def length(self):
      return self.basis.length()
   def iterator(self, rejectShortSentences = False):
     iterator = self.basis.iterator(rejectShortSentences=rejectShortSentences)
     counter = 0
     print("Actual length", self.length())
     for sentence in iterator:
#         if counter > self.fraction * self.length():
 #           break
  #          counter += 1
         reverse_content_head(sentence)
         yield sentence
   def getSentence(self, index):
      return reverse_content_head(self.basis.getSentence(index))
Exemple #3
0
       return (totalDepLength, numberOfWords, byType)



assert batchSize == 1

depLengths = []
#while True:
outpath = "/u/scr/mhahn/japanese/"+str(myID)
with open(outpath, "w") as outFile:
 print >> outFile, "\t".join(["Sent", "Length"])
 counter = 0
 if True:
   corpus = CorpusIterator_V(language,"train", shuffleDataSeed=40)
   corpusIterator = corpus.iterator()
   if corpus.length() == 0:
      quit()
   while True:
     try:
        batch = map(lambda x:next(corpusIterator), 10*range(batchSize))
     except StopIteration:
        break
     batch = sorted(batch, key=len)
     partitions = range(10)
     
     for partition in partitions:
        counter += 1
        printHere = (counter % 100 == 0)
        current = batch[partition*batchSize:(partition+1)*batchSize]
 
        depLength = doForwardPass(current)
Exemple #4
0
    numberOfWords = wordNum
    return (totalDepLength, numberOfWords, byType)


assert batchSize == 1

depLengths = []
#while True:
outpath = "/u/scr/mhahn/japanese/" + str(myID)
with open(outpath, "w") as outFile:
    print >> outFile, "\t".join(["Sent", "Length"])
    counter = 0
    if True:
        corpus = CorpusIterator_V(language, "train", shuffleDataSeed=40)
        corpusIterator = corpus.iterator()
        if corpus.length() == 0:
            quit()
        while True:
            try:
                batch = map(lambda x: next(corpusIterator),
                            10 * range(batchSize))
            except StopIteration:
                break
            batch = sorted(batch, key=len)
            partitions = range(10)

            for partition in partitions:
                counter += 1
                printHere = (counter % 100 == 0)
                current = batch[partition * batchSize:(partition + 1) *
                                batchSize]
print(docs)
times = {'Graal_1225_prose': 1225, 'Aucassin_early13_verse-prose': 1210, 'QuatreLivresReis_late12_prose': 1180, 'TroyesYvain_1180_verse': 1180, 'Roland_1100_verse': 1100, 'BeroulTristan_late12_verse': 1180, 'StLegier_1000_verse': 1000, 'StAlexis_1050_verse': 1050, 'Strasbourg_842_prose': 842, 'Lapidaire_mid12_prose': 1150}

timesVector = (torch.FloatTensor([times[itos_docs[i]] for i in range(len(docs))])/100.0)

#covarianceMatrix = 

print(timesVector)

print(times)
quit()

for epoch in range(100):
   corpus.permute()
   for q in range(corpus.length()):
      loss = forward(corpus, q)
      backward(loss)


   if epoch % 1 == 0:
       print("Saving")
       save_path = "../raw-results/"
       #save_path = "/afs/cs.stanford.edu/u/mhahn/scr/deps/"
       with open(save_path+"/manual_output_ground_coarse/"+args.language+"_"+__file__+"_model_"+str(myID)+".tsv", "w") as outFile:
          print("\t".join(list(map(str,["Epochs", "DH_Mean_NoPunct", "DH_Sigma_NoPunct", "Distance_Mean_NoPunct", "Distance_Sigma_NoPunct", "Dependency"]))), file=outFile)
          dh_numpy = pyro.get_param_store().get_param("mu_DH").data.numpy()
          dh_sigma_numpy = pyro.get_param_store().get_param("sigma_DH").data.numpy()
          dist_numpy = pyro.get_param_store().get_param("mu_Dist").data.numpy()
          dist_sigma_numpy = pyro.get_param_store().get_param("sigma_Dist").data.numpy()