Python read_json_file 예제들, spacy.gold.read_json_file Python 예제들

예제 #1

0

파일 보기

def main(language, train_loc, dev_loc, model_dir, n_sents=0, n_iter=15, out_loc="", verbose=False,
         debug=False, corruption_level=0.0, gold_preproc=False, eval_only=False, pseudoprojective=False,
         L1=1e-6):
    parser_cfg = dict(locals())
    tagger_cfg = dict(locals())
    entity_cfg = dict(locals())

    lang = spacy.util.get_lang_class(language)

    parser_cfg['features'] = lang.Defaults.parser_features
    entity_cfg['features'] = lang.Defaults.entity_features

    if not eval_only:
        gold_train = list(read_json_file(train_loc))
        gold_dev = list(read_json_file(dev_loc))
        if n_sents > 0:
            gold_train = gold_train[:n_sents]
        train(lang, gold_train, gold_dev, model_dir, tagger_cfg, parser_cfg, entity_cfg,
              n_sents=n_sents, gold_preproc=gold_preproc, corruption_level=corruption_level,
              n_iter=n_iter)
    if out_loc:
        write_parses(lang, dev_loc, model_dir, out_loc)
    scorer = evaluate(lang, list(read_json_file(dev_loc)),
                      model_dir, gold_preproc=gold_preproc, verbose=verbose)
    print('TOK', scorer.token_acc)
    print('POS', scorer.tags_acc)
    print('UAS', scorer.uas)
    print('LAS', scorer.las)

    print('NER P', scorer.ents_p)
    print('NER R', scorer.ents_r)
    print('NER F', scorer.ents_f)

예제 #2

0

파일 보기

파일: train.py 프로젝트: paolodedios/spaCy

def main(language, train_loc, dev_loc, model_dir, n_sents=0, n_iter=15, out_loc="", verbose=False,
         debug=False, corruption_level=0.0, gold_preproc=False, eval_only=False, pseudoprojective=False):
    parser_cfg = dict(locals())
    tagger_cfg = dict(locals())
    entity_cfg = dict(locals())

    lang = spacy.util.get_lang_class(language)
    
    parser_cfg['features'] = lang.Defaults.parser_features
    entity_cfg['features'] = lang.Defaults.entity_features

    if not eval_only:
        gold_train = list(read_json_file(train_loc))
        gold_dev = list(read_json_file(dev_loc))
        train(lang, gold_train, gold_dev, model_dir, tagger_cfg, parser_cfg, entity_cfg,
              n_sents=n_sents, gold_preproc=gold_preproc, corruption_level=corruption_level,
              n_iter=n_iter)
    if out_loc:
        write_parses(lang, dev_loc, model_dir, out_loc)
    scorer = evaluate(lang, list(read_json_file(dev_loc)),
                      model_dir, gold_preproc=gold_preproc, verbose=verbose)
    print('TOK', scorer.token_acc)
    print('POS', scorer.tags_acc)
    print('UAS', scorer.uas)
    print('LAS', scorer.las)

    print('NER P', scorer.ents_p)
    print('NER R', scorer.ents_r)
    print('NER F', scorer.ents_f)

예제 #3

0

파일 보기

파일: nn_train.py 프로젝트: Arttii/spaCy

def main(train_loc, dev_loc, model_dir, n_sents=0, n_iter=15, verbose=False,
         nv_word=10, nv_tag=10, nv_label=10, nv_hidden=10,
         eta=0.1, mu=0.9, eval_only=False):




    gold_train = list(read_json_file(train_loc, lambda doc: 'wsj' in doc['id']))

    nlp = train(English, gold_train, model_dir,
               feat_set='embed',
               eta=eta, mu=mu,
               nv_word=nv_word, nv_tag=nv_tag, nv_label=nv_label, nv_hidden=nv_hidden,
               n_sents=n_sents, n_iter=n_iter,
               verbose=verbose)

    scorer = evaluate(nlp, list(read_json_file(dev_loc)))
    
    print 'TOK', 100-scorer.token_acc
    print 'POS', scorer.tags_acc
    print 'UAS', scorer.uas
    print 'LAS', scorer.las

    print 'NER P', scorer.ents_p
    print 'NER R', scorer.ents_r
    print 'NER F', scorer.ents_f

예제 #4

0

파일 보기

파일: train.py 프로젝트: domsooch/spaCy

def main(train_loc,
         dev_loc,
         model_dir,
         n_sents=0,
         n_iter=15,
         out_loc="",
         verbose=False,
         debug=False,
         corruption_level=0.0,
         gold_preproc=False,
         beam_width=1,
         eval_only=False,
         use_orig_arc_eager=False):
    if use_orig_arc_eager:
        English.ParserTransitionSystem = TreeArcEager
    if not eval_only:
        gold_train = list(read_json_file(train_loc))
        train(English,
              gold_train,
              model_dir,
              feat_set='basic' if not debug else 'debug',
              gold_preproc=gold_preproc,
              n_sents=n_sents,
              corruption_level=corruption_level,
              n_iter=n_iter,
              beam_width=beam_width,
              verbose=verbose,
              use_orig_arc_eager=use_orig_arc_eager)
    #if out_loc:
    #    write_parses(English, dev_loc, model_dir, out_loc, beam_width=beam_width)
    scorer = evaluate(English,
                      list(read_json_file(dev_loc)),
                      model_dir,
                      gold_preproc=gold_preproc,
                      verbose=verbose,
                      beam_width=beam_width)
    print 'TOK', scorer.token_acc
    print 'POS', scorer.tags_acc
    print 'UAS', scorer.uas
    print 'LAS', scorer.las

    print 'NER P', scorer.ents_p
    print 'NER R', scorer.ents_r
    print 'NER F', scorer.ents_f

예제 #5

0

파일 보기

파일: train.py 프로젝트: anukat2015/spaCy

def main(language,
         train_loc,
         dev_loc,
         model_dir,
         n_sents=0,
         n_iter=15,
         out_loc="",
         verbose=False,
         debug=False,
         corruption_level=0.0,
         gold_preproc=False,
         eval_only=False,
         pseudoprojective=False):
    lang = {'en': English, 'de': German}.get(language)

    if not eval_only:
        gold_train = list(read_json_file(train_loc))
        train(lang,
              gold_train,
              model_dir,
              feat_set='basic' if not debug else 'debug',
              gold_preproc=gold_preproc,
              n_sents=n_sents,
              corruption_level=corruption_level,
              n_iter=n_iter,
              verbose=verbose,
              pseudoprojective=pseudoprojective)
    if out_loc:
        write_parses(lang, dev_loc, model_dir, out_loc)
    scorer = evaluate(lang,
                      list(read_json_file(dev_loc)),
                      model_dir,
                      gold_preproc=gold_preproc,
                      verbose=verbose)
    print('TOK', scorer.token_acc)
    print('POS', scorer.tags_acc)
    print('UAS', scorer.uas)
    print('LAS', scorer.las)

    print('NER P', scorer.ents_p)
    print('NER R', scorer.ents_r)
    print('NER F', scorer.ents_f)

예제 #6

0

파일 보기

def main(train_loc,
         dev_loc,
         model_dir,
         n_sents=0,
         n_iter=15,
         verbose=False,
         nv_word=10,
         nv_tag=10,
         nv_label=10,
         nv_hidden=10,
         eta=0.1,
         mu=0.9,
         eval_only=False):

    gold_train = list(read_json_file(train_loc,
                                     lambda doc: 'wsj' in doc['id']))

    nlp = train(English,
                gold_train,
                model_dir,
                feat_set='embed',
                eta=eta,
                mu=mu,
                nv_word=nv_word,
                nv_tag=nv_tag,
                nv_label=nv_label,
                nv_hidden=nv_hidden,
                n_sents=n_sents,
                n_iter=n_iter,
                verbose=verbose)

    scorer = evaluate(nlp, list(read_json_file(dev_loc)))

    print 'TOK', 100 - scorer.token_acc
    print 'POS', scorer.tags_acc
    print 'UAS', scorer.uas
    print 'LAS', scorer.las

    print 'NER P', scorer.ents_p
    print 'NER R', scorer.ents_r
    print 'NER F', scorer.ents_f

예제 #7

0

파일 보기

파일: train.py 프로젝트: michigan-com/spaCy

def main(train_loc, dev_loc, model_dir, n_sents=0, n_iter=15, out_loc="", verbose=False,
         debug=False, corruption_level=0.0, gold_preproc=False, eval_only=False):
    if not eval_only:
        gold_train = list(read_json_file(train_loc))
        train(English, gold_train, model_dir,
              feat_set='basic' if not debug else 'debug',
              gold_preproc=gold_preproc, n_sents=n_sents,
              corruption_level=corruption_level, n_iter=n_iter,
              verbose=verbose)
    #if out_loc:
    #    write_parses(English, dev_loc, model_dir, out_loc, beam_width=beam_width)
    scorer = evaluate(English, list(read_json_file(dev_loc)),
                      model_dir, gold_preproc=gold_preproc, verbose=verbose)
    print('TOK', scorer.token_acc)
    print('POS', scorer.tags_acc)
    print('UAS', scorer.uas)
    print('LAS', scorer.las)

    print('NER P', scorer.ents_p)
    print('NER R', scorer.ents_r)
    print('NER F', scorer.ents_f)

예제 #8

0

파일 보기

파일: train.py 프로젝트: pratikmehta14/spaCy

def main(train_loc, dev_loc, model_dir, n_sents=0, n_iter=15, out_loc="", verbose=False,
         debug=False, corruption_level=0.0, gold_preproc=False, eval_only=False):
    if not eval_only:
        gold_train = list(read_json_file(train_loc))
        train(English, gold_train, model_dir,
              feat_set='basic' if not debug else 'debug',
              gold_preproc=gold_preproc, n_sents=n_sents,
              corruption_level=corruption_level, n_iter=n_iter,
              verbose=verbose)
    #if out_loc:
    #    write_parses(English, dev_loc, model_dir, out_loc, beam_width=beam_width)
    scorer = evaluate(English, list(read_json_file(dev_loc)),
                      model_dir, gold_preproc=gold_preproc, verbose=verbose)
    print('TOK', scorer.token_acc)
    print('POS', scorer.tags_acc)
    print('UAS', scorer.uas)
    print('LAS', scorer.las)

    print('NER P', scorer.ents_p)
    print('NER R', scorer.ents_r)
    print('NER F', scorer.ents_f)

예제 #9

0

파일 보기

파일: train.py 프로젝트: Develer/spaCy

def main(language, train_loc, dev_loc, model_dir, n_sents=0, n_iter=15, out_loc="", verbose=False,
         debug=False, corruption_level=0.0, gold_preproc=False, eval_only=False, pseudoprojective=False):
    lang = {'en':English, 'de':German}.get(language)

    if not eval_only:
        gold_train = list(read_json_file(train_loc))
        train(lang, gold_train, model_dir,
              feat_set='basic' if not debug else 'debug',
              gold_preproc=gold_preproc, n_sents=n_sents,
              corruption_level=corruption_level, n_iter=n_iter,
              verbose=verbose,pseudoprojective=pseudoprojective)
    if out_loc:
        write_parses(lang, dev_loc, model_dir, out_loc)
    scorer = evaluate(lang, list(read_json_file(dev_loc)),
                      model_dir, gold_preproc=gold_preproc, verbose=verbose)
    print('TOK', scorer.token_acc)
    print('POS', scorer.tags_acc)
    print('UAS', scorer.uas)
    print('LAS', scorer.las)

    print('NER P', scorer.ents_p)
    print('NER R', scorer.ents_r)
    print('NER F', scorer.ents_f)

예제 #10

0

파일 보기

파일: train.py 프로젝트: nournia/spaCy

def write_parses(Language, dev_loc, model_dir, out_loc, beam_width=None):
    nlp = Language(data_dir=model_dir)
    if beam_width is not None:
        nlp.parser.cfg.beam_width = beam_width
    gold_tuples = read_json_file(dev_loc)
    scorer = Scorer()
    out_file = codecs.open(out_loc, "w", "utf8")
    for raw_text, sents in gold_tuples:
        sents = _merge_sents(sents)
        for annot_tuples, brackets in sents:
            if raw_text is None:
                tokens = nlp.tokenizer.tokens_from_list(annot_tuples[1])
                nlp.tagger(tokens)
                nlp.entity(tokens)
                nlp.parser(tokens)
            else:
                tokens = nlp(raw_text, merge_mwes=False)
            gold = GoldParse(tokens, annot_tuples)
            scorer.score(tokens, gold, verbose=False)
            for t in tokens:
                out_file.write("%s\t%s\t%s\t%s\n" % (t.orth_, t.tag_, t.head.orth_, t.dep_))
    return scorer

예제 #11

0

파일 보기

파일: train.py 프로젝트: domsooch/spaCy

def write_parses(Language, dev_loc, model_dir, out_loc, beam_width=None):
    nlp = Language(data_dir=model_dir)
    if beam_width is not None:
        nlp.parser.cfg.beam_width = beam_width
    gold_tuples = read_json_file(dev_loc)
    scorer = Scorer()
    out_file = codecs.open(out_loc, 'w', 'utf8')
    for raw_text, sents in gold_tuples:
        sents = _merge_sents(sents)
        for annot_tuples, brackets in sents:
            if raw_text is None:
                tokens = nlp.tokenizer.tokens_from_list(annot_tuples[1])
                nlp.tagger(tokens)
                nlp.entity(tokens)
                nlp.parser(tokens)
            else:
                tokens = nlp(raw_text, merge_mwes=False)
            gold = GoldParse(tokens, annot_tuples)
            scorer.score(tokens, gold, verbose=False)
            for t in tokens:
                out_file.write('%s\t%s\t%s\t%s\n' %
                               (t.orth_, t.tag_, t.head.orth_, t.dep_))
    return scorer

예제 #12

0

파일 보기

파일: train.py 프로젝트: anukat2015/spaCy

def write_parses(Language, dev_loc, model_dir, out_loc):
    nlp = Language(data_dir=model_dir)
    gold_tuples = read_json_file(dev_loc)
    scorer = Scorer()
    out_file = io.open(out_loc, 'w', 'utf8')
    for raw_text, sents in gold_tuples:
        sents = _merge_sents(sents)
        for annot_tuples, brackets in sents:
            if raw_text is None:
                tokens = nlp.tokenizer.tokens_from_list(annot_tuples[1])
                nlp.tagger(tokens)
                nlp.entity(tokens)
                nlp.parser(tokens)
            else:
                tokens = nlp(raw_text)
            #gold = GoldParse(tokens, annot_tuples)
            #scorer.score(tokens, gold, verbose=False)
            for sent in tokens.sents:
                for t in sent:
                    if not t.is_space:
                        out_file.write(
                            '%d\t%s\t%s\t%s\t%s\n' %
                            (t.i, t.orth_, t.tag_, t.head.orth_, t.dep_))
                out_file.write('\n')

예제 #13

0

파일 보기

파일: train.py 프로젝트: Develer/spaCy

def write_parses(Language, dev_loc, model_dir, out_loc):
    nlp = Language(data_dir=model_dir)
    gold_tuples = read_json_file(dev_loc)
    scorer = Scorer()
    out_file = io.open(out_loc, 'w', 'utf8')
    for raw_text, sents in gold_tuples:
        sents = _merge_sents(sents)
        for annot_tuples, brackets in sents:
            if raw_text is None:
                tokens = nlp.tokenizer.tokens_from_list(annot_tuples[1])
                nlp.tagger(tokens)
                nlp.entity(tokens)
                nlp.parser(tokens)
            else:
                tokens = nlp(raw_text)
            #gold = GoldParse(tokens, annot_tuples)
            #scorer.score(tokens, gold, verbose=False)
            for sent in tokens.sents:
                for t in sent:
                    if not t.is_space:
                        out_file.write(
                            '%d\t%s\t%s\t%s\t%s\n' % (t.i, t.orth_, t.tag_, t.head.orth_, t.dep_)
                        )
                out_file.write('\n')

예제 #14

0

파일 보기

Developed and tested for spaCy 2.0.6. Updated for v2.2.2
"""
import random
import plac
import spacy
import os.path
from spacy.tokens import Doc
from spacy.gold import read_json_file, GoldParse

random.seed(0)

PWD = os.path.dirname(__file__)

TRAIN_DATA = list(
    read_json_file(
        os.path.join(PWD, "ner_example_data", "ner-sent-per-line.json")))


def get_position_label(i, words, tags, heads, labels, ents):
    """Return labels indicating the position of the word in the document.
    """
    if len(words) < 20:
        return "short-doc"
    elif i == 0:
        return "first-word"
    elif i < 10:
        return "early-word"
    elif i < 20:
        return "mid-word"
    elif i == len(words) - 1:
        return "last-word"

예제 #15

0

파일 보기

The specific example here is not necessarily a good idea --- but it shows
how an arbitrary objective function for some word can be used.

Developed and tested for spaCy 2.0.6
'''
import random
import plac
import spacy
import os.path
from spacy.gold import read_json_file, GoldParse

random.seed(0)

PWD = os.path.dirname(__file__)

TRAIN_DATA = list(read_json_file(os.path.join(PWD, 'training-data.json')))


def get_position_label(i, words, tags, heads, labels, ents):
    '''Return labels indicating the position of the word in the document.
    '''
    if len(words) < 20:
        return 'short-doc'
    elif i == 0:
        return 'first-word'
    elif i < 10:
        return 'early-word'
    elif i < 20:
        return 'mid-word'
    elif i == len(words) - 1:
        return 'last-word'

예제 #16

0

파일 보기

파일: ner_multitask_objective.py 프로젝트: IndicoDataSolutions/spaCy

The specific example here is not necessarily a good idea --- but it shows
how an arbitrary objective function for some word can be used.

Developed and tested for spaCy 2.0.6
'''
import random
import plac
import spacy
import os.path
from spacy.gold import read_json_file, GoldParse

random.seed(0)

PWD = os.path.dirname(__file__)

TRAIN_DATA = list(read_json_file(os.path.join(PWD, 'training-data.json')))



def get_position_label(i, words, tags, heads, labels, ents):
    '''Return labels indicating the position of the word in the document.
    '''
    if len(words) < 20:
        return 'short-doc'
    elif i == 0:
        return 'first-word'
    elif i < 10:
        return 'early-word'
    elif i < 20:
        return 'mid-word'
    elif i == len(words)-1: