def test_load(self): data_dir = English.default_data_dir() if path.exists(path.join(data_dir, 'vocab')): vocab = Vocab.from_dir(path.join(data_dir, 'vocab')) if path.exists(path.join(data_dir, 'deps')): parser = Parser.from_dir(path.join(data_dir, 'deps'), vocab.strings, ArcEager)
def test_thinc_load(self): data_dir = English.default_data_dir() model_loc = path.join(data_dir, 'deps', 'model') # n classes. moves.n_moves above # n features. len(templates) + 1 above model = LinearModel(92, 116) model.load(model_loc)
def test_thinc_load(self): data_dir = English.default_data_dir() model_loc = path.join(data_dir, 'deps', 'model') # n classes. moves.n_moves above # n features. len(templates) + 1 above if path.exists(model_loc): model = LinearModel(92, 116) model.load(model_loc)
def test_load_careful(self): config_data = {"labels": {"0": {"": True}, "1": {"": True}, "2": {"cc": True, "agent": True, "ccomp": True, "prt": True, "meta": True, "nsubjpass": True, "csubj": True, "conj": True, "dobj": True, "neg": True, "csubjpass": True, "mark": True, "auxpass": True, "advcl": True, "aux": True, "ROOT": True, "prep": True, "parataxis": True, "xcomp": True, "nsubj": True, "nummod": True, "advmod": True, "punct": True, "relcl": True, "quantmod": True, "acomp": True, "compound": True, "pcomp": True, "intj": True, "poss": True, "npadvmod": True, "case": True, "attr": True, "dep": True, "appos": True, "det": True, "nmod": True, "amod": True, "dative": True, "pobj": True, "expl": True, "predet": True, "preconj": True, "oprd": True, "acl": True}, "3": {"cc": True, "agent": True, "ccomp": True, "prt": True, "meta": True, "nsubjpass": True, "csubj": True, "conj": True, "acl": True, "poss": True, "neg": True, "mark": True, "auxpass": True, "advcl": True, "aux": True, "amod": True, "ROOT": True, "prep": True, "parataxis": True, "xcomp": True, "nsubj": True, "nummod": True, "advmod": True, "punct": True, "quantmod": True, "acomp": True, "pcomp": True, "intj": True, "relcl": True, "npadvmod": True, "case": True, "attr": True, "dep": True, "appos": True, "det": True, "nmod": True, "dobj": True, "dative": True, "pobj": True, "iobj": True, "expl": True, "predet": True, "preconj": True, "oprd": True}, "4": {"ROOT": True}}, "seed": 0, "features": "basic", "beam_width": 1} data_dir = English.default_data_dir() vocab = Vocab.from_dir(path.join(data_dir, 'vocab')) moves = ArcEager(vocab.strings, config_data['labels']) templates = get_templates(config_data['features']) model = Model(moves.n_moves, templates, path.join(data_dir, 'deps')) parser = Parser(vocab.strings, moves, model)
def count_freqs(input_loc, output_loc): print(output_loc) vocab = English.default_vocab(get_lex_attr=None) tokenizer = Tokenizer.from_dir( vocab, path.join(English.default_data_dir(), 'tokenizer')) counts = PreshCounter() for json_comment in iter_comments(input_loc): doc = tokenizer(json_comment['body']) doc.count_by(ORTH, counts=counts) with codecs.open(output_loc, 'w', 'utf8') as file_: for orth, freq in counts: string = tokenizer.vocab.strings[orth] if not string.isspace(): file_.write('%d\t%s\n' % (freq, string))
def count_freqs(input_loc, output_loc): print(output_loc) vocab = English.default_vocab(get_lex_attr=None) tokenizer = Tokenizer.from_dir(vocab, path.join(English.default_data_dir(), 'tokenizer')) counts = PreshCounter() for json_comment in iter_comments(input_loc): doc = tokenizer(json_comment['body']) doc.count_by(ORTH, counts=counts) with io.open(output_loc, 'w', 'utf8') as file_: for orth, freq in counts: string = tokenizer.vocab.strings[orth] if not string.isspace(): file_.write('%d\t%s\n' % (freq, string))
def test_load(self): data_dir = English.default_data_dir() vocab = Vocab.from_dir(path.join(data_dir, 'vocab')) parser = Parser.from_dir(path.join(data_dir, 'deps'), vocab.strings, ArcEager)
def test_load(self): data_dir = English.default_data_dir() vocab = Vocab.from_dir(path.join(data_dir, 'vocab')) tagger = Tagger.from_dir(path.join(data_dir, 'tagger'), vocab)
def test_load(self): data_dir = English.default_data_dir() vocab = Vocab.from_dir(path.join(data_dir, 'vocab')) tokenizer = Tokenizer.from_dir(vocab, path.join(data_dir, 'tokenizer'))
def test_load(self): vocab = Vocab.from_dir(path.join(English.default_data_dir(), 'vocab'))
def test_load_careful(self): config_data = { "labels": { "0": { "": True }, "1": { "": True }, "2": { "cc": True, "agent": True, "ccomp": True, "prt": True, "meta": True, "nsubjpass": True, "csubj": True, "conj": True, "dobj": True, "neg": True, "csubjpass": True, "mark": True, "auxpass": True, "advcl": True, "aux": True, "ROOT": True, "prep": True, "parataxis": True, "xcomp": True, "nsubj": True, "nummod": True, "advmod": True, "punct": True, "relcl": True, "quantmod": True, "acomp": True, "compound": True, "pcomp": True, "intj": True, "poss": True, "npadvmod": True, "case": True, "attr": True, "dep": True, "appos": True, "det": True, "nmod": True, "amod": True, "dative": True, "pobj": True, "expl": True, "predet": True, "preconj": True, "oprd": True, "acl": True }, "3": { "cc": True, "agent": True, "ccomp": True, "prt": True, "meta": True, "nsubjpass": True, "csubj": True, "conj": True, "acl": True, "poss": True, "neg": True, "mark": True, "auxpass": True, "advcl": True, "aux": True, "amod": True, "ROOT": True, "prep": True, "parataxis": True, "xcomp": True, "nsubj": True, "nummod": True, "advmod": True, "punct": True, "quantmod": True, "acomp": True, "pcomp": True, "intj": True, "relcl": True, "npadvmod": True, "case": True, "attr": True, "dep": True, "appos": True, "det": True, "nmod": True, "dobj": True, "dative": True, "pobj": True, "iobj": True, "expl": True, "predet": True, "preconj": True, "oprd": True }, "4": { "ROOT": True } }, "seed": 0, "features": "basic", "beam_width": 1 } data_dir = English.default_data_dir() vocab = Vocab.from_dir(path.join(data_dir, 'vocab')) moves = ArcEager(vocab.strings, config_data['labels']) templates = get_templates(config_data['features']) model = Model(moves.n_moves, templates, path.join(data_dir, 'deps')) parser = Parser(vocab.strings, moves, model)
def test_load(self): data_dir = English.default_data_dir() if path.exists(path.join(data_dir, 'vocab')): vocab = Vocab.from_dir(path.join(data_dir, 'vocab')) tagger = Tagger.from_dir(path.join(data_dir, 'tagger'), vocab)
def test_load(self): data_dir = English.default_data_dir() if path.exists(path.join(data_dir, 'vocab')): vocab = Vocab.from_dir(path.join(data_dir, 'vocab')) tokenizer = Tokenizer.from_dir(vocab, path.join(data_dir, 'tokenizer'))
def test_load(self): if path.exists(path.join(English.default_data_dir(), 'vocab')): vocab = Vocab.from_dir( path.join(English.default_data_dir(), 'vocab'))