def test_custom_feat_avg_perceptron():
    # w, tr_acc, dv_acc =  avg_perceptron.trainAvgPerceptron(10,tr_all,features.yourFeatures,alltags)
    # confusion = tagger_base.evalTagger(lambda words,alltags : tagger_base.classifierTagger(words,features.yourFeatures,w,alltags),'classifier')
    confusion = scorer.getConfusion(constants.DEV_FILE, 'avg_perceptron_custom.response')
    expected_acc = 0.810
    actual_acc = scorer.accuracy(confusion)
    ok_ (expected_acc < actual_acc, msg="NOT_IN_RANGE Expected:%f, Actual:%f" %(expected_acc, actual_acc))
def test_custom_str_perceptron():
    # w,tr_acc,dv_acc = str_perceptron.trainAvgStructPerceptron(10,tr_all,features.yourHMMFeatures,alltags)
    # confusion = tagger_base.evalTagger(lambda words,alltags : viterbi.viterbiTagger(words,features.yourHMMFeatures,w,alltags)[0],'custom_str_classifier')
    confusion = scorer.getConfusion(DEV_FILE, 'str_avg_perceptron_custom.response')
    expected_acc = 0.810
    actual_acc = scorer.accuracy(confusion)
    ok_ (expected_acc < actual_acc, msg="NOT_IN_RANGE Expected:%f, Actual:%f" %(expected_acc, actual_acc))    
def test_str_perceptron():
    # w,tr_acc,dv_acc = str_perceptron.trainAvgStructPerceptron(10,tr_all,features.wordTransFeatures,alltags)
    # confusion = tagger_base.evalTagger(lambda words,alltags : viterbi.viterbiTagger(words,features.wordTransFeatures,w,alltags)[0],'str_classifier')
    confusion = scorer.getConfusion(DEV_FILE, 'str_avg_perceptron.response')
    expected_acc = 0.749
    actual_acc = scorer.accuracy(confusion)
    ok_(expected_acc < actual_acc,
        msg="NOT_IN_RANGE Expected:%f, Actual:%f" % (expected_acc, actual_acc))
def test_avg_perceptron():
    # w, tr_acc, dv_acc =  avg_perceptron.trainAvgPerceptron(10,tr_all,features.wordCharFeatures,alltags)
    # confusion = tagger_base.evalTagger(lambda words,alltags : tagger_base.classifierTagger(words,features.wordCharFeatures,w,alltags),'classifier')
    confusion = scorer.getConfusion(constants.DEV_FILE,
                                    'avg_perceptron.response')
    expected_acc = 0.740
    actual_acc = scorer.accuracy(confusion)
    ok_(expected_acc < actual_acc,
        msg="NOT_IN_RANGE Expected:%f, Actual:%f" % (expected_acc, actual_acc))
Beispiel #5
0
def evalTagger(tagger,outfilename,testfile=DEV_FILE):
    alltags = set()
    for i,(words, tags) in enumerate(preproc.conllSeqGenerator(TRAIN_FILE)):
        for tag in tags:
            alltags.add(tag)
    with open(outfilename,'w') as outfile:
        for words,_ in preproc.conllSeqGenerator(testfile):
            pred_tags = tagger(words,alltags)
            for tag in pred_tags:
                print >>outfile, tag
            print >>outfile, ""
    return scorer.getConfusion(testfile,outfilename) #run the scorer on the prediction file
Beispiel #6
0
def evalTagger(tagger,outfilename,testfile=DEV_FILE):
    alltags = set()
    for i,(words, tags) in enumerate(preproc.conllSeqGenerator(TRAIN_FILE)):
        for tag in tags:
            alltags.add(tag)
    with open(outfilename,'w') as outfile:
        for words,_ in preproc.conllSeqGenerator(testfile):
            pred_tags = tagger(words,alltags)
            for tag in pred_tags:
                print >>outfile, tag
            print >>outfile, ""
    return scorer.getConfusion(testfile,outfilename) #run the scorer on the prediction file
def evalTagger(tagger,outfilename,testfile=DEV_FILE):
    """Calculate confusion_matrix for a given tagger

    Parameters:
    tagger -- Function mapping (words, possible_tags) to an optimal
              sequence of tags for the words
    outfilename -- Filename to write tagger predictions to
    testfile -- (optional) Filename containing true labels

    Returns:
    confusion_matrix -- dict of occurences of (true_label, pred_label)
    """
    alltags = set()
    for i,(words, tags) in enumerate(preproc.conllSeqGenerator(TRAIN_FILE)):
        for tag in tags:
            alltags.add(tag)
    with open(outfilename,'w') as outfile:
        for words,_ in preproc.conllSeqGenerator(testfile):
            pred_tags = tagger(words,alltags)
            for tag in pred_tags:
                print >>outfile, tag
            print >>outfile, ""
    return scorer.getConfusion(testfile,outfilename) #run the scorer on the prediction file
Beispiel #8
0
def evalTagger(tagger, outfilename, testfile=DEV_FILE):
    """Calculate confusion_matrix for a given tagger

    Parameters:
    tagger -- Function mapping (words, possible_tags) to an optimal
              sequence of tags for the words
    outfilename -- Filename to write tagger predictions to
    testfile -- (optional) Filename containing true labels

    Returns:
    confusion_matrix -- dict of occurences of (true_label, pred_label)
    """
    alltags = set()
    for i, (words, tags) in enumerate(preproc.conllSeqGenerator(TRAIN_FILE)):
        for tag in tags:
            alltags.add(tag)
    with open(outfilename, 'w') as outfile:
        for words, _ in preproc.conllSeqGenerator(testfile):
            pred_tags = tagger(words, alltags)
            for tag in pred_tags:
                print >> outfile, tag
            print >> outfile, ""
    return scorer.getConfusion(
        testfile, outfilename)  #run the scorer on the prediction file