Python conllSeqGenerator Examples

Programming Language: Python

Namespace/Package Name: gtnlplib.preproc

Method/Function: conllSeqGenerator

Examples at hotexamples.com: 7

Python conllSeqGenerator - 7 examples found. These are the top rated real world Python examples of gtnlplib.preproc.conllSeqGenerator extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: clf_base.py Project: ai3DVision/gt-nlp-class

def generateKaggleSubmission(tagger,outfilename):
    with open(outfilename, 'w') as f:
        writer = csv.DictWriter(f, fieldnames=['Id', 'Prediction'])
        writer.writeheader()

        alltags = set()
        for i,(words, tags) in enumerate(preproc.conllSeqGenerator(TRAIN_FILE)):
            for tag in tags:
                alltags.add(tag)

        i=0
        for words,_ in preproc.conllSeqGenerator(TEST_FILE):
            pred_tags = tagger(words,alltags)
            if isinstance(pred_tags, tuple):
                pred_tags = pred_tags[0] 
            for tag in pred_tags:
                writer.writerow({
                    'Id': 'test-{}'.format(i),
                    'Prediction':tag})
                i+=1
        i=0 
        for words,_ in preproc.conllSeqGenerator(DEV_FILE):
            pred_tags = tagger(words,alltags)
            if isinstance(pred_tags, tuple):
                pred_tags = pred_tags[0]
            for tag in pred_tags:
                # print >>outfile, tag
                writer.writerow({
                    'Id': 'dev-{}'.format(i),
                    'Prediction':tag})
                i+=1

Example #2

Show file

File: clf_base.py Project: vemal911/gt-nlp-class

def evalTagger(tagger,outfilename,testfile=DEV_FILE):
    alltags = set()
    for i,(words, tags) in enumerate(preproc.conllSeqGenerator(TRAIN_FILE)):
        for tag in tags:
            alltags.add(tag)
    with open(outfilename,'w') as outfile:
        for words,_ in preproc.conllSeqGenerator(testfile):
            pred_tags = tagger(words,alltags)
            for tag in pred_tags:
                print >>outfile, tag
            print >>outfile, ""
    return scorer.getConfusion(testfile,outfilename) #run the scorer on the prediction file

Example #3

Show file

File: clf_base.py Project: ai3DVision/gt-nlp-class

def evalTagger(tagger,outfilename,testfile=DEV_FILE):
    alltags = set()
    for i,(words, tags) in enumerate(preproc.conllSeqGenerator(TRAIN_FILE)):
        for tag in tags:
            alltags.add(tag)
    with open(outfilename,'w') as outfile:
        for words,_ in preproc.conllSeqGenerator(testfile):
            pred_tags = tagger(words,alltags)
            for tag in pred_tags:
                print >>outfile, tag
            print >>outfile, ""
    return scorer.getConfusion(testfile,outfilename) #run the scorer on the prediction file

Example #4

Show file

File: testnb.py Project: Mercurial1101/gt-nlp-class

def test_nb_prob_mass ():
    probability_masses = defaultdict(float)
    allwords = set([])
    for words, _ in preproc.conllSeqGenerator(TRAIN_FILE):
        for word in words:
            allwords.add(word)

    for tag in alltags:
        total_prob = sum(np.exp(weights_nb[(tag, word)]) for word in allwords)
        assert_almost_equals (1.0, total_prob, places=2,
            msg="UNEQUAL Expected tag %s to have total prob of 1.0, but instead has %s" %(tag, total_prob))

Example #5

Show file

File: tagger_base.py Project: Mercurial1101/gt-nlp-class

def evalTagger(tagger,outfilename,testfile=DEV_FILE):
    """Calculate confusion_matrix for a given tagger

    Parameters:
    tagger -- Function mapping (words, possible_tags) to an optimal
              sequence of tags for the words
    outfilename -- Filename to write tagger predictions to
    testfile -- (optional) Filename containing true labels

    Returns:
    confusion_matrix -- dict of occurences of (true_label, pred_label)
    """
    alltags = set()
    for i,(words, tags) in enumerate(preproc.conllSeqGenerator(TRAIN_FILE)):
        for tag in tags:
            alltags.add(tag)
    with open(outfilename,'w') as outfile:
        for words,_ in preproc.conllSeqGenerator(testfile):
            pred_tags = tagger(words,alltags)
            for tag in pred_tags:
                print >>outfile, tag
            print >>outfile, ""
    return scorer.getConfusion(testfile,outfilename) #run the scorer on the prediction file

Example #6

Show file

def evalTagger(tagger, outfilename, testfile=DEV_FILE):
    """Calculate confusion_matrix for a given tagger

    Parameters:
    tagger -- Function mapping (words, possible_tags) to an optimal
              sequence of tags for the words
    outfilename -- Filename to write tagger predictions to
    testfile -- (optional) Filename containing true labels

    Returns:
    confusion_matrix -- dict of occurences of (true_label, pred_label)
    """
    alltags = set()
    for i, (words, tags) in enumerate(preproc.conllSeqGenerator(TRAIN_FILE)):
        for tag in tags:
            alltags.add(tag)
    with open(outfilename, 'w') as outfile:
        for words, _ in preproc.conllSeqGenerator(testfile):
            pred_tags = tagger(words, alltags)
            for tag in pred_tags:
                print >> outfile, tag
            print >> outfile, ""
    return scorer.getConfusion(
        testfile, outfilename)  #run the scorer on the prediction file

Example #7

Show file

def test_nb_prob_mass():
    probability_masses = defaultdict(float)
    allwords = set([])
    for words, _ in preproc.conllSeqGenerator(TRAIN_FILE):
        for word in words:
            allwords.add(word)

    for tag in alltags:
        total_prob = sum(np.exp(weights_nb[(tag, word)]) for word in allwords)
        assert_almost_equals(
            1.0,
            total_prob,
            places=2,
            msg=
            "UNEQUAL Expected tag %s to have total prob of 1.0, but instead has %s"
            % (tag, total_prob))