def mismatches(sentence, mismatches=False): parsed = sentence.parsed goldstandard = sentence.goldstandard leaves = sentence.leaves for x in parsed: if x in goldstandard and not parsed[x].hd == goldstandard[x].hd: ca = commonancestor(x, parsed, goldstandard, leaves) if ca: st1 = showDTree(ca[0]) rewrite = showDTree(ca[1]) rule = "%s => %s"%(st1, rewrite) cb = commonancestorX(x, parsed, goldstandard, leaves) r1 = parsed[x] r2 = goldstandard[x] positions = [r1.hd, r2.hd, x] rule = leaves[min(positions):max(positions)+1] for w in rule: if w.position == x: w.mark = "/*" elif w.position == r1.hd: w.mark = "/1" elif w.position == r2.hd: w.mark = "/2" else: w.mark = "" rule = " ".join(["%s%s"%(w.tag, w.mark) for w in rule]) if type(mismatches) == "dict": incTable(rule, mismatches) else: print "************************%s%s"%(st1, rewrite)
def showErrors(sentences): for sentence in sentences: if not sentence.goldstandard == sentence.parsed: print "**************************************" print "GOLD STANDARD" print showDTree(buildtree(sentence.goldstandard, sentence.leaves)) print "PARSED" print showDTree(buildtree(sentence.parsed, sentence.leaves)) print errors(sentence.parsed, sentence.goldstandard, sentence.leaves)
def nfold(trees, n=5, stackwindow=2, qwindow=2, tagsize=2, precision=0.95, threshold=500): print "nfold, tagsize=%s"%(tagsize) a = 0 ca = 0 for i in range(0, n): training, testing = makefold(i, n, trees) if training == []: training, testing = testing, training print "makeclassifier(%s)"%(i) parser = onefold(i, n, training, testing, stackwindow=stackwindow, qwindow=qwindow, tagsize=tagsize, precision=precision, threshold=threshold) print "testing[0].dtree before testparser" testing[0].showDTree() print "testing[0].parsed" print tb.showDTree(malt.buildtree(testing[0].parsed, testing[0].leaves)) a += parser.parseraccuracy c = parser.classifier ca += c.accuracy return a/n, ca/n, c, parser, training, testing
def showChanged(sentences): for sentence in sentences: if not sentence.tbr == sentence.parsed: x0 = scoreSentence(sentence, lambda x: x.tbr) x1 = scoreSentence(sentence, lambda x: x.parsed) print "**************************************" print "Change in accuracy %.3f (%s)"%((x0[0]/x0[1])-(x1[0]/x1[1]), x0[0]-x1[0]) print showDTree(buildtree(sentence.goldstandard, sentence.leaves)) print showDTree(buildtree(sentence.tbr, sentence.leaves)) print errors(sentence.tbr, sentence.goldstandard, sentence.leaves) print showDTree(buildtree(sentence.parsed, sentence.leaves)) print errors(sentence.parsed, sentence.goldstandard, sentence.leaves)