Exemple #1
0
def test_parser(args):
    parser = create_parser(lang=args.lang, parser_class=args.parser)

    total = 0
    wrong = 0

    sentence = None
    with open(args.infile) as f:
        for line in f:
            if sentence:
                total += 1
                correct_edge = hedge(line.strip())
                parser_output = parser.parse(sentence)
                parsed_sentence = parser_output['parses'][0]
                edge = parsed_sentence['main_edge']
                sent = parsed_sentence['spacy_sentence']
                if edge != correct_edge:
                    wrong += 1
                    print_tree(sent.root)
                    print('expected:')
                    print(correct_edge)
                    print('result:')
                    print(edge)
                sentence = None
            else:
                sentence = line.strip()

    print('%s wrong out of %s.' % (wrong, total))
Exemple #2
0
    def __init__(self,
                 hg=None,
                 sequence=None,
                 lang=None,
                 corefs=False,
                 parser=None,
                 parser_class=None):
        self.hg = hg
        self.sequence = sequence
        self.lang = lang

        if parser_class:
            plang = parser_lang(parser_class)
            if lang:
                if lang != plang:
                    msg = 'specified language ({}) and parser language ({}) '\
                          'do not match'.format(lang, plang)
                    raise RuntimeError(msg)
            else:
                self.lang = plang

        if parser is None:
            self.parser = create_parser(lang=lang,
                                        parser_class=parser_class,
                                        lemmas=True,
                                        corefs=corefs)
        else:
            self.parser = parser
Exemple #3
0
 def get_parser(self, agent):
     if self.parser is None:
         corefs = self.corefs in {'resolve', 'replace'}
         self.parser = create_parser(lang=self.lang,
                                     parser_class=self.parser_class,
                                     lemmas=True,
                                     resolve_corefs=corefs)
     return self.parser
def manual_test(args):
    parser = create_parser(lang=args.lang, parser_class=args.parser)

    he = ManualEvaluation()

    sentences = []

    # read existing tests
    try:
        with open(args.outfile, 'r') as f:
            for line in f:
                parts = line.split('\t')
                if len(parts) == 4:
                    sentence = parts[0].strip()
                    sentences.append(sentence)
                    edge = hedge(parts[1].strip())
                    answer = parts[2].strip()
                    defects = list(
                        hedge(edge_str) for edge_str in parts[3].split('&'))

                    he.apply_evaluation(answer, edge, defects)
    except FileNotFoundError:
        pass

    with open(args.infile, 'r') as f:
        for line in f:
            print('GLOBAL:')
            print(colored(str(he), 'white'))

            sentence = line.strip()

            if sentence not in sentences:
                sentences.append(sentence)
                parser_output = parser.parse(sentence)
                parsed_sentence = parser_output['parses'][0]
                edge = parsed_sentence['main_edge']

                if edge:
                    print('\n{}\n{}\n'.format(sentence, indented(edge)))

                    answer = he.input()
                    if answer == 'd':
                        defects = input_defects(sentence, edge)
                    else:
                        defects = []
                    he.apply_evaluation(answer, edge, defects)

                    defect_str = '&'.join(
                        [defect.to_str() for defect in defects])
                    row_str = '\t'.join(
                        (sentence, edge.to_str(), answer, defect_str))
                    with open(args.outfile, 'a') as of:
                        of.write('{}\n'.format(row_str))
    def __init__(self, lang):
        self.parser = create_parser(name=lang)

        self.sentences = set()
        self.tokens = 0
        self.correct_edges = 0
        self.ignored = 0

        self.input_files = None

        self.sentence = None
        self.source = None
        self.atoms = None
        self.spacy_sentence = None
        self.token2atom = None
def extract_sentences(args):
    parser = create_parser(name=args.lang)
    sentences = []

    count = 0
    with open(args.infile, 'r') as infile, open(args.outfile, 'w') as outfile:
        for line in infile.readlines():
            paragraph = line.strip()
            if len(paragraph) > 0:
                parse_results = parser.parse(paragraph)
                for parse in parse_results['parses']:
                    sentences.append(parse['text'])
                    count += 1
                    if count % 100 == 0:
                        print('{} sentences found'.format(count))

    random.shuffle(sentences)

    with open(args.outfile, 'w') as outfile:
        for sentence in sentences:
            outfile.write('{}\n'.format(sentence))
def generate_alpha_training_data(args):
    total_sentences = 0
    ignored_sentences = 0
    failed_parses = 0
    total_atoms = 0

    parser = create_parser(name=args.lang)

    with open(args.infile, 'r') as infile, open(args.outfile, 'w') as outfile:
        for line in infile.readlines():
            case = json.loads(line)
            sentence = case['sentence']
            atoms = case['atoms']
            parse_results = parser.parse(sentence)
            parse = parse_results['parses'][0]
            spacy_sentence = parse['spacy_sentence']
            if case['ignore']:
                ignored_sentences += 1
            elif len(atoms) == len(spacy_sentence):
                total_sentences += 1
                total_atoms += len(atoms)

                for i in range(len(atoms)):
                    atom = atoms[i]
                    token = spacy_sentence[i]

                    word_before = ''
                    word_after = ''
                    pos_before = ''
                    pos_after = ''
                    dep_before = ''
                    dep_after = ''
                    punct_before = False
                    punct_after = False
                    if i > 0:
                        word_before = str(spacy_sentence[i - 1])
                        pos_before = spacy_sentence[i - 1].pos_
                        dep_before = spacy_sentence[i - 1].dep_
                        if spacy_sentence[i - 1].pos_ == 'PUNCT':
                            punct_before = True
                    if i < len(atoms) - 1:
                        word_after = str(spacy_sentence[i + 1])
                        pos_after = spacy_sentence[i + 1].pos_
                        dep_after = spacy_sentence[i + 1].dep_
                        if spacy_sentence[i + 1].pos_ == 'PUNCT':
                            punct_after = True

                    head = token.head
                    is_root = head is None
                    has_lefts = token.n_lefts > 0
                    has_rights = token.n_rights > 0
                    outfile.write(('{}' + '\t{}' * 23 + '\n').format(
                        hedge(atom).type()[0], str(token), token.pos_,
                        token.tag_, token.dep_,
                        str(head) if head else '', head.pos_ if head else '',
                        head.tag_ if head else '', head.dep_ if head else '',
                        is_root, has_lefts, has_rights, token.ent_type_,
                        token.shape_[:2], word_before, word_after,
                        punct_before, punct_after, pos_before, pos_after,
                        dep_before, dep_after, case['correct'],
                        case['source']))
            else:
                failed_parses += 1
            print('sentences: {}; ignored: {}; failed: {}; atoms: {}'.format(
                total_sentences, ignored_sentences, failed_parses,
                total_atoms))
        print('done.')
Exemple #8
0
 def get_parser(self, agent):
     if self.parser is None:
         self.parser = create_parser(
             name=self.lang, lemmas=True, resolve_corefs=True)
     return self.parser
Exemple #9
0
from graphbrain.parsers import create_parser, print_tree


if __name__ == '__main__':
    text = """
    Satellites from NASA and other agencies have been tracking sea ice changes
    since 1979.
    """

    parser = create_parser(lang='en', lemmas=True)
    parse_results = parser.parse(text)

    for parse in parse_results['parses']:
        print_tree(parse['spacy_sentence'].root)
        print(parse['main_edge'])
        print('>> Extra edges:')
        for edge in parse['extra_edges']:
            print(edge)