예제 #1
0
    def run(self):
        infile = self.system.get_infile(self)
        parser = self.system.get_parser(self)
        sequence = self.system.get_sequence(self)

        pos = 0
        for paragraph in paragraphs(infile):
            parse_results = parser.parse(paragraph)
            for parse in parse_results['parses']:
                main_edge = parse['resolved_corefs']

                # add main edge
                if main_edge:
                    # attach text to edge
                    text = parse['text']
                    attr = {'text': text}

                    yield create_op(main_edge,
                                    sequence=sequence,
                                    position=pos,
                                    attributes=attr)
                    self.edges += 1
                    pos += 1

                    # add extra edges
                    for edge in parse['extra_edges']:
                        yield create_op(edge)
            for edge in parse_results['inferred_edges']:
                print('inferred edge: {}'.format(edge.to_str()))
                yield create_op(edge, count=True)
예제 #2
0
    def process_edge(self, edge, depth):
        hg = self.system.get_hg(self)

        if not edge.is_atom():
            ct = edge.connector_type()
            if ct[:2] == 'Pd':
                pred = edge[0]
                if (len(edge) > 2 and deep_lemma(hg, pred).root()
                        in CONFLICT_PRED_LEMMAS):
                    subjects = edge.edges_with_argrole('s')
                    objects = edge.edges_with_argrole('o')
                    if len(subjects) == 1 and len(objects) == 1:
                        subject = strip_concept(subjects[0])
                        obj = strip_concept(objects[0])
                        if (subject and obj and has_proper_concept(subject)
                                and has_proper_concept(obj)):
                            actor_orig = main_coref(hg, subject)
                            actor_targ = main_coref(hg, obj)
                            conflict_edge = hedge(
                                ('conflict/P/.', actor_orig, actor_targ, edge))
                            if (is_actor(hg, actor_orig)
                                    and is_actor(hg, actor_targ)):
                                yield create_op(conflict_edge)
                                for wedge in self._topics(
                                        hg, actor_orig, actor_targ, edge):
                                    yield wedge
                                self.conflicts += 1
예제 #3
0
    def _parse_title(self, text, author):
        parser = self.system.get_parser(self)

        parts = title_parts(text)

        title_edge = ['title/P/.reddit', author]
        for part in parts:
            parse_results = parser.parse(part)
            for op in self.system.parse_results2ops(parse_results):
                yield op

            for parse in parse_results['parses']:
                if 'resolved_corefs' in parse:
                    main_edge = parse['resolved_corefs']
                else:
                    main_edge = parse['main_edge']

                if main_edge:
                    title_edge.append(main_edge)

        if len(title_edge) > 2:
            # add title edge
            yield create_op(title_edge)
            self.titles_added += 1

        self.titles_parsed += 1
예제 #4
0
    def process_edge(self, edge, depth):
        hg = self.hg.get_hg(self)

        _, main_actor, claim, main_edge = edge
        actors = find_actors(hg, claim)
        for actor in actors:
            yield create_op(
                ('claim-actor/P/.', main_actor, actor, claim, main_edge))
예제 #5
0
 def _topics(self, hg, actor_orig, actor_targ, edge):
     for item in edge[1:]:
         if item.type()[0] == 's':
             if item[0].to_str() in CONFLICT_TOPIC_TRIGGERS:
                 for concept in all_concepts(item[1]):
                     if hg.degree(concept) > 1:
                         yield create_op(('conflict-topic/P/.', actor_orig,
                                          actor_targ, concept, edge))
                         self.conflict_topics += 1
예제 #6
0
    def _parse_row(self, row):
        parser = self.system.get_parser(self)

        parts = text_parts(row[self.text])

        for part in parts:
            parse_results = parser.parse(part)
            for parse in parse_results['parses']:
                main_edge = parse['main_edge']

                # add main edge
                if main_edge:
                    # attach text to edge
                    text = parse['text']
                    attr = {'text': text}
                    yield create_op(main_edge, attributes=attr)

                    # add extra edges
                    for edge in parse['extra_edges']:
                        yield create_op(edge)
예제 #7
0
    def _parse_title(self, text, author):
        parser = self.system.get_parser(self)

        parts = title_parts(text)

        title_edge = ['title/P/.reddit', author]
        tags = []
        for part in parts:
            parse_results = parser.parse(part)
            for parse in parse_results['parses']:
                main_edge = parse['resolved_corefs']

                # add main edge
                if main_edge:
                    # attach text to edge
                    text = parse['text']
                    attr = {'text': text}
                    yield create_op(main_edge, attributes=attr)

                    # add extra edges
                    for edge in parse['extra_edges']:
                        yield create_op(edge)

                    if main_edge.type()[0] == 'R':
                        title_edge.append(main_edge)
                    else:
                        tags.append(main_edge)
            for edge in parse_results['inferred_edges']:
                yield create_op(edge, count=True)

        if len(title_edge) > 2:
            # add title edge
            yield create_op(title_edge)
            self.titles_added += 1

            # add title tags
            if len(tags) > 0:
                tags_edge = ['tags/P/.reddit', title_edge] + tags
                yield create_op(tags_edge)

        self.titles_parsed += 1
예제 #8
0
def _update_main_coref_ops(hg, edge):
    cref_id = coref_id(hg, edge)
    corefs = coref_set(hg, edge)

    best_coref = None
    best_degree = -1
    for coref in corefs:
        d = hg.degree(coref)
        if d > best_degree:
            best_degree = d
            best_coref = coref

    coref_edge = hedge((main_coref_pred, cref_id, best_coref))
    if not hg.exists(coref_edge):
        old = set(hg.search('({} {} *)'.format(main_coref_pred, cref_id)))
        for old_edge in old:
            # hg.remove(old_edge)
            # print('&&&')
            # print(old_edge)
            yield create_op(old_edge, optype='remove')
        # hg.add(coref_edge, primary=False)
        yield create_op(coref_edge, primary=False)
예제 #9
0
    def parse_text(self, infile, parser, sequence):
        pos = 0

        paragraphs = list(read_paragraphs(infile))

        if self.progress_bar:
            pbar = progressbar.ProgressBar(max_value=len(paragraphs)).start()
        else:
            pbar = None

        for i, paragraph in enumerate(paragraphs):
            parse_results = parser.parse(paragraph)
            for parse in parse_results['parses']:
                main_edge = parse['resolved_corefs']

                # add main edge
                if main_edge:
                    # attach text to edge
                    text = parse['text']
                    attr = {'text': text}

                    # print('main edge: {}'.format(main_edge.to_str()))
                    yield create_op(main_edge,
                                    sequence=sequence,
                                    position=pos,
                                    attributes=attr)
                    pos += 1

                    # add extra edges
                    for edge in parse['extra_edges']:
                        yield create_op(edge)
            for edge in parse_results['inferred_edges']:
                # print('inferred edge: {}'.format(edge.to_str()))
                yield create_op(edge, count=True)
            if self.progress_bar:
                pbar.update(i)

        if self.progress_bar:
            pbar.finish()
예제 #10
0
 def process_edge(self, edge, depth):
     if not edge.is_atom():
         et = edge.type()
         if et[0] == 'C':
             ct = edge[0].connector_type()
             parent = None
             if ct[0] == 'B':
                 mcs = edge.main_concepts()
                 if len(mcs) == 1:
                     parent = mcs[0]
             elif ct[0] == 'M' and len(edge) == 2:
                 parent = edge[1]
             if parent:
                 ont_edge = (const.type_of_pred, edge, parent)
                 yield create_op(ont_edge, primary=False)
예제 #11
0
    def run(self):
        url = self.system.get_url(self)
        parser = self.system.get_parser(self)
        sequence = self.system.get_sequence(self)

        title, lang = url2title_and_lang(url)
        text = read_wikipedia(title, lang)

        pos = 0
        for line in text.split('\n'):
            paragraph = line.strip()
            if len(paragraph) == 0:
                continue

            parse_results = parser.parse(paragraph)
            for parse in parse_results['parses']:
                main_edge = parse['resolved_corefs']

                # add main edge
                if main_edge:
                    # attach text to edge
                    text = parse['text']
                    attr = {'text': text}

                    yield create_op(main_edge,
                                    sequence=sequence,
                                    position=pos,
                                    attributes=attr)
                    self.edges += 1
                    pos += 1

                    # add extra edges
                    for edge in parse['extra_edges']:
                        yield create_op(edge)
            for edge in parse_results['inferred_edges']:
                yield create_op(edge, count=True)
예제 #12
0
def make_corefs_ops(hg, edge1, edge2):
    # print('\n### make_corefs_ops {} {}'.format(edge1, edge2))
    cref_id_1 = coref_id(hg, edge1)
    cref_id_2 = coref_id(hg, edge2)

    if cref_id_1 is None:
        if cref_id_2 is None:
            new_cref_id = _new_coref_id()
        else:
            new_cref_id = cref_id_2
    elif cref_id_2 is None:
        new_cref_id = cref_id_1
    else:
        count1 = len(coref_set(hg, edge1))
        count2 = len(coref_set(hg, edge2))
        if count2 > count1:
            new_cref_id = cref_id_2
        else:
            new_cref_id = cref_id_1

    update = False
    if cref_id_1 != new_cref_id:
        for op in _change_coref_id_ops(hg, edge1, new_cref_id):
            yield op
        update = True
    if cref_id_2 != new_cref_id:
        for op in _change_coref_id_ops(hg, edge2, new_cref_id):
            yield op
        update = True

    # hg.add((coref_pred, edge1, edge2), primary=False)
    yield create_op((coref_pred, edge1, edge2), primary=False)

    if update:
        for op in _update_main_coref_ops(hg, edge1):
            yield op
예제 #13
0
    def parse_results2ops(self, parse_results, sequence=None, pos=-1):
        for parse in parse_results['parses']:
            if self.corefs == 'resolve':
                main_edge = parse['main_edge']
                resolved_edge = parse['resolved_corefs']
            elif self.corefs == 'replace':
                main_edge = parse['resolved_corefs']
                resolved_edge = None
            else:
                main_edge = parse['main_edge']
                resolved_edge = None

            # add main edge
            if main_edge:
                # attach text to edge
                text = parse['text']
                attr = {'text': text}

                if sequence:
                    yield create_op(main_edge,
                                    sequence=sequence,
                                    position=pos,
                                    attributes=attr)
                else:
                    yield create_op(main_edge, attributes=attr)
                pos += 1

                if self.corefs == 'resolve':
                    yield create_op(resolved_edge, attributes=attr)
                    coref_res_edge = hedge(
                        (const.coref_res_pred, main_edge, resolved_edge))
                    yield create_op(coref_res_edge)

                # add extra edges
                for edge in parse['extra_edges']:
                    yield create_op(edge)
        for edge in parse_results['inferred_edges']:
            yield create_op(edge, count=True)
예제 #14
0
def _set_coref_id_op(hg, edge, coref_id):
    attributes = {coref_set_id_key: coref_id}
    return create_op(edge, optype='set_attributes', attributes=attributes)
예제 #15
0
 def on_end(self):
     for actor in self.actor_counter:
         if self.actor_counter[actor] > 0:
             yield create_op(('actor/P/.', actor))
예제 #16
0
def make_singular_plural_ops(hg, single, plural):
    yield create_op((singular_plural_pred, single, plural), primary=False)
예제 #17
0
 def process_edge(self, edge, depth):
     yield create_op(edge)
예제 #18
0
 def process_edge(self, edge, depth):
     for redge in conjunctions_decomposition(edge):
         yield create_op(redge)
예제 #19
0
 def process_edge(self, edge, depth):
     for edge in conjunctions_resolution(edge):
         yield create_op(edge)
예제 #20
0
파일: claims.py 프로젝트: dzynin/graphbrain
    def on_end(self):
        # assign genders
        self.female = set()
        self.group = set()
        self.male = set()
        self.non_human = set()

        self.logger.debug('assigning genders')
        i = 0
        with progressbar.ProgressBar(max_value=len(self.actors)) as bar:
            for actor in self.actors:
                gender = self._gender(actor)
                if gender == 'female':
                    self.female.add(actor)
                elif gender == 'group':
                    self.group.add(actor)
                elif gender == 'male':
                    self.male.add(actor)
                elif gender == 'non-human':
                    self.non_human.add(actor)

                # write gender
                if gender:
                    gender_atom = '{}/P/.'.format(gender)
                    yield create_op((gender_atom, actor))

                i += 1
                bar.update(i)

        # write claims
        self.logger.debug('writing claims')
        i = 0
        with progressbar.ProgressBar(max_value=len(self.claims)) as bar:
            for claim_data in self.claims:
                actor = claim_data['actor']
                claim = claim_data['claim']
                edge = claim_data['edge']

                # anaphora resolution
                prep = _subject_preposition(claim)
                if prep:
                    resolve = False
                    if prep == 'she':
                        resolve = actor in self.female
                    elif prep == 'they':
                        resolve = actor in self.group
                    elif prep == 'he':
                        resolve = actor in self.male
                    elif prep == 'it':
                        resolve = actor in self.non_human

                    if resolve:
                        self.logger.debug('ANAPHORA')
                        self.logger.debug('actor: {}'.format(actor))
                        self.logger.debug('before: {}'.format(claim))
                        claim = replace_subject(claim, actor)
                        self.logger.debug('after: {}'.format(claim))
                        self.anaphoras += 1

                # write claim
                yield create_op(('claim/P/.', actor, claim, edge))

                i += 1
                bar.update(i)