def main(): print u.now(), "Start" modelFile = "tag.model" trainingFile = "gene.train" inputFile = "gene.dev" outputFile = "dev.p1.out" lines = u.readFromFile(inputFile) #weightVector = p1.readPreTrainedModel(modelFile) # Empty weight vector means a vector of 0s weightVector = dict() featureData = FeatureData(weightVector) trainingSet = readTrainingSetFile(trainingFile) perceptron(trainingSet, featureData, 5) # tagSequences = [] # # while len(lines) > 0: #sentence, lines = u.GetNextSentence(lines) # genX = gen(["", "hello", "there", "foo", "bar", "bacon"]) print u.now(), "Done"
def main(): print u.now(), "Start" modelFile = "tag.model" inputFile = "gene.dev" outputFile = "dev.p1.out" # inputFile = "short.dev" # outputFile = "dev.short.out" inputFile = "gene.test" outputFile = "gene_test.p1.out" weightVector = readPreTrainedModel(modelFile) lines = u.readFromFile(inputFile) tagSequences = [] while len(lines) > 0: sentence, lines = u.GetNextSentence(lines) featureData = FeatureData(weightVector) tagSequence = ViterbiGLM(featureData, sentence) tagSequences.append((tagSequence, sentence)) outputText = [] lines = u.readFromFile(inputFile) del (tagSequences[0][0])[0] del (tagSequences[0][1])[0] for tagSequence, sentence in tagSequences: for j in xrange(0,len(tagSequence)): if len(tagSequence) != len(sentence): continue tag = tagSequence[j] word = sentence[j] outputText.append("%s %s" % (word, tag)) outputText.append("") outputText.append("") outputText = '\n'.join(outputText) u.saveToFile(outputFile, outputText) print u.now(), "Done"