예제 #1
0
def readandfixRound1(taglength=2):
    sentences1, pretagged1 = readconll()
    sentences2, pretagged2 = tb.readtb()
    tagset1, contexts1 = tagsandstuff(pretagged1)
    tagset2, contexts2 = tagsandstuff(pretagged2)
    tags = alltags([tagset1, tagset2])
    pretagged = pretagged1 + pretagged2
    names = reallynames(tags)
    fixnames(pretagged, names)
    fixdates(pretagged, tags)
    fixlower(pretagged)
    return pretagged, [contexts1, contexts2], tags, [tagset1, tagset2], names
예제 #2
0
def comp34411(metering=0, brilltagger=False, sentences=False, pretagged=False, qwindow=2, stackwindow=False, folds=5, tagsize=2, atagsize=False, forced=False, precision=0.97, threshold=500):
    print "reading data"
    tb.METERING = metering
    if not atagsize:
        atagsize = tagsize
    if stackwindow == False:
        stackwindow = qwindow
    if not sentences:
        sentences, pretagged = tb.readtb()
    N = 10000
    K = 10000
    print "initial values for pretagged[:5], sentences[0]"
    print pretagged[:5]
    sentences[0].showDTree()
    tbl.METERING = metering
    tbl.tag.METERING = metering
    if not brilltagger:
        print "making tagger"
        t0 = datetime.datetime.now()
        brilltagger = tbl.trainwithmxlandbrill(pretagged=pretagged, N=N, K=K, mxltagsize=tagsize, tagsize=tagsize, atagsize=atagsize)
        print "training the tagger took %s seconds"%((datetime.datetime.now()-t0).seconds)
        print "pretagged[:5], sentences[0] after training the tagger"
        print pretagged[:5]
        sentences[0].showDTree()
    fp.METERING = metering
    if not forced:
        print "forceparse"
        t0 = datetime.datetime.now()
        fp.forceparseall(sentences, stackwindow=stackwindow, qwindow=qwindow,tagsize=tagsize)
        print "pretagged[:5], sentences[0] after forceparseall"
        print "forced parsing took %s seconds"%((datetime.datetime.now()-t0).seconds)
    print "nfold %s (precision = %s)"%(folds, precision)
    a, ca, c, parser, training, testing = fp.nfold(sentences, n=folds, stackwindow=stackwindow, qwindow=qwindow, tagsize=tagsize, precision=precision, threshold=threshold)
    print "average accuracy over all folds (precision = %s): %.3f\naverage classifier accuracy %.3f"%(precision, float(a), float(ca))
    print """
return the parser from the last fold & the training and testing sets
for that parser so we can do subsequent experiments soundly
"""
    return sentences, pretagged, brilltagger, parser, training, testing