Ejemplo n.º 1
0
    def ComputeParseTree(self, sentence):
        
        print u.now(), "Initializing tables"
        self.__PiTable.Initialize(sentence, self)
        print u.now(), "Tables initialized"
        
        splitSentence = sentence.split()
        n = len(splitSentence)
        
        for l in xrange(1, n):
            for i in xrange(1, n - l + 1):
                j = i + l

                bestPi = MaxProbability(0, None, None)
        
                for X in self.N:
                    
                    #print l, i, j, X
                    
                    pi = self.FindMaxPi(i, j, X, splitSentence)
                    
                    if bestPi.P <= pi.P:
                        bestPi = pi
                        
                #print bestPi
                    
        print u.now(), "Tables computed"
        
        tree = self.__BuildTree(splitSentence)

        print u.now(), tree
        
        return tree
Ejemplo n.º 2
0
def Preprocess(countsFile="", trainingFile="", adjustedTrainingFile=""):
    if countsFile=="" or trainingFile=="" or adjustedTrainingFile=="":
        return

    RARETAG = '_RARE_'

    print "Preprocess started at", u.now()

    rareWords = readCounts(countsFile)[4]
    
    lines = u.readFromFile(trainingFile)
    
    newTrees = []
    
    processed = 0
    progressThreshold = 100
    
    for line in lines:
        #print "Processing:", line
        
        tree = json.loads(line)
        newTree = replaceLeaves(tree, rareWords, RARETAG)
        outputLine = json.dumps(newTree)
        
        newTrees.append(outputLine)
        
        processed += 1
        if progressThreshold <= processed:
            print u.now(), "Processed", processed
            progressThreshold += 100
        
    outputText = '\n'.join(newTrees)

    u.saveToFile(adjustedTrainingFile, outputText)

    
    print "Preprocess ended at", u.now()
Ejemplo n.º 3
0
def ParseFile(fileToParse="", adjustedCountsFile="", resultFile=""):
    if fileToParse == "" or adjustedCountsFile == "" or resultFile == "":
        return

    counts = a2p1.readCounts(adjustedCountsFile)

    nonTerminalCounts = counts[0]
    binaryCounts = counts[1]
    unaryCounts = counts[2]
    wordCounts = counts[3]

    print u.now(), "Building Grammar"
    grammar = VertMarkovGrammar(nonTerminalCounts, binaryCounts, unaryCounts,
                                wordCounts)
    print u.now(), "Grammar created"

    lines = u.readFromFile(fileToParse)

    outputLines = []

    progress = 0

    for sentence in lines:

        progress += 1
        print "%s Parsing (%s/%s): %s" % (u.now(), progress, len(lines),
                                          sentence[:-1])

        tree = grammar.ComputeParseTree(sentence)
        outputLine = json.dumps(tree)
        outputLines.append(outputLine)

    outputText = '\n'.join(outputLines)

    print u.now(), "Saving output to", resultFile
    u.saveToFile(resultFile, outputText)

    print u.now(), "Done"
Ejemplo n.º 4
0
def ParseFile(fileToParse="", adjustedCountsFile="", resultFile=""):
    if fileToParse=="" or adjustedCountsFile=="" or resultFile=="":
        return
    
    counts = a2p1.readCounts(adjustedCountsFile)
    
    nonTerminalCounts = counts[0]
    binaryCounts = counts[1]
    unaryCounts = counts[2]
    wordCounts = counts[3]
    
    print u.now(), "Building Grammar"
    grammar = VertMarkovGrammar(nonTerminalCounts, binaryCounts, unaryCounts, wordCounts)
    print u.now(), "Grammar created"
    
    lines = u.readFromFile(fileToParse)
    
    outputLines = []

    progress = 0
    
    for sentence in lines:
        
        progress += 1
        print "%s Parsing (%s/%s): %s" % ( u.now(), progress, len(lines), sentence[:-1]) 
        
        tree = grammar.ComputeParseTree(sentence)
        outputLine = json.dumps(tree)
        outputLines.append(outputLine)
        
        
    outputText = '\n'.join(outputLines)

    print u.now(), "Saving output to", resultFile
    u.saveToFile(resultFile, outputText)
    
    print u.now(), "Done"
Ejemplo n.º 5
0
    #    a2p1.Preprocess(countsFile, trainingFile, adjustedTrainingFile)
    #
    #    targetFile = "parse_dev.dat"
    #    adjustedCountsFile = "cfg.vert.adjusted.counts"
    #    resultFile = "parse_dev.vert.out"

    targetFile = "parse_test.dat"
    adjustedCountsFile = "cfg.vert.adjusted.counts"
    resultFile = "parse_test.p3.out"

    #
    #    targetFile = "test.dat"
    #    adjustedCountsFile = "cfg.vert.adjusted.counts"
    #    resultFile = "test.vert.out"

    start = u.now()

    #    targetFile = "shorttest.dat"
    #    adjustedCountsFile = "cfg.vert.adjusted.counts"
    #    resultFile = "shorttest.vert.out"

    #    targetFile = "shorttest.dat"
    #    adjustedCountsFile = "parse_train.counts.out"
    #    resultFile = "shorttest.out"

    #a2p2.ParseFile(targetFile, adjustedCountsFile, resultFile)

    ParseFile(targetFile, adjustedCountsFile, resultFile)

    end = u.now()
Ejemplo n.º 6
0
#    a2p1.Preprocess(countsFile, trainingFile, adjustedTrainingFile)
#
#    targetFile = "parse_dev.dat"
#    adjustedCountsFile = "cfg.vert.adjusted.counts"
#    resultFile = "parse_dev.vert.out"

    targetFile = "parse_test.dat"
    adjustedCountsFile = "cfg.vert.adjusted.counts"
    resultFile = "parse_test.p3.out"

#
#    targetFile = "test.dat"
#    adjustedCountsFile = "cfg.vert.adjusted.counts"
#    resultFile = "test.vert.out"

    start = u.now()

#    targetFile = "shorttest.dat"
#    adjustedCountsFile = "cfg.vert.adjusted.counts"
#    resultFile = "shorttest.vert.out"

#    targetFile = "shorttest.dat"
#    adjustedCountsFile = "parse_train.counts.out"
#    resultFile = "shorttest.out"

    #a2p2.ParseFile(targetFile, adjustedCountsFile, resultFile)

    ParseFile(targetFile, adjustedCountsFile, resultFile)
    
    end = u.now()