Python Tagger.annotate Examples

Programming Language: Python

Namespace/Package Name: pandora.tagger

Class/Type: Tagger

Method/Function: annotate

Examples at hotexamples.com: 4

Python Tagger.annotate - 4 examples found. These are the top rated real world Python examples of pandora.tagger.Tagger.annotate extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Tagger(6)

annotate(3)

setup_from_disk(2)

epoch(1)

save(1)

setup_to_train(1)

Example #1

Show file

def tag_dir(model, input_dir, output_dir, tokenized_input, string=None, **kwargs):
    """ Tag a directory of texts

    :param model: Path to a model file
    :param input_dir: Path to a directory containing text files
    :param output_dir: Path to output tagged text files
    """
    print('::: started :::')

    tagger = Tagger(load=True, model_dir=model, overwrite=kwargs)
    print('Tagger loaded, now annotating...')

    orig_path = input_dir
    new_path = output_dir

    for filename in os.listdir(orig_path):
        if not filename.endswith('.txt'):
            continue

        print('\t +', filename)
        unseen_tokens = pandora.utils.load_unannotated_file(
            orig_path + filename,
            nb_instances=None,
            tokenized_input=tokenized_input
        )

        annotations = tagger.annotate(unseen_tokens)
        keys = list(annotations.keys())
        print("Keys :" + "\t".join(keys))
        with codecs.open(new_path + filename + ".tsv", 'w', 'utf8') as f:
            f.write("\t".join(keys) + "\n")
            for x in zip(*tuple([annotations[k] for k in keys])):
                f.write('\t'.join(list(x)) + '\n')

    print('::: ended :::')

Example #2

Show file

File: unseen.py Project: PonteIneptique/pandora

def main():
    print('::: started :::')
    
    tagger = Tagger(load=True, model_dir='models/wilhelmus_full')

    print('Tagger loaded, now annotating...')

    orig_path = 'data/wilhelmus/orig/'
    new_path = 'data/wilhelmus/tagged/'

    for filename in os.listdir(orig_path):
        if not filename.endswith('.txt'):
            continue

        print('\t +', filename)
        unseen_tokens = pandora.utils.load_unannotated_file(orig_path + filename,
                                                         nb_instances=None,
                                                         tokenized_input=False)

        annotations = tagger.annotate(unseen_tokens)
        with codecs.open(new_path + filename, 'w', 'utf8') as f:
            #for t, l, p in zip(annotations['tokens'], annotations['postcorrect_lemmas'], annotations['pos']):
            for t, l, p in zip(annotations['tokens'], annotations['lemmas'], annotations['pos']):
            #for t, l in zip(annotations['tokens'], annotations['lemmas']):
                f.write('\t'.join((t, l, p))+'\n')
    
    print('::: ended :::')

Example #3

Show file

def tag_string(model, input_dir, output_dir=None, string=None, **kwargs):
    """ Tag a directory of texts

    :param model: Path to a model file
    :param input_dir: Untokenized string to tag
    """

    print('::: started :::')

    tagger = Tagger(load=True, model_dir=model, overwrite=kwargs)

    print('Tagger loaded, now annotating...')

    unseen_tokens = tokenize.split(input_dir)
    print(unseen_tokens)

    annotations = tagger.annotate(unseen_tokens)

    keys = list(annotations.keys())
    print("--------------------")
    print('\t'.join(keys))
    print("--------------------")
    for x in zip(*tuple([annotations[k] for k in keys])):
        print('\t'.join(list(x)))

    print('::: ended :::')

Example #4

Show file

def main():
    print('::: started :::')

    tagger = Tagger(load=True, model_dir='models/wilhelmus_full')

    print('Tagger loaded, now annotating...')

    orig_path = 'data/wilhelmus/orig/'
    new_path = 'data/wilhelmus/tagged/'

    for filename in os.listdir(orig_path):
        if not filename.endswith('.txt'):
            continue

        print('\t +', filename)
        unseen_tokens = pandora.utils.load_unannotated_file(
            orig_path + filename, nb_instances=None, tokenized_input=False)

        annotations = tagger.annotate(unseen_tokens)
        with codecs.open(new_path + filename, 'w', 'utf8') as f:
            #for t, l, p in zip(annotations['tokens'], annotations['postcorrect_lemmas'], annotations['pos']):
            for t, l, p in zip(annotations['tokens'], annotations['lemmas'],
                               annotations['pos']):
                #for t, l in zip(annotations['tokens'], annotations['lemmas']):
                f.write('\t'.join((t, l, p)) + '\n')

    print('::: ended :::')