def test_incremental_assembler_constructor():
    ia = IncrementalAssembler([s1, s2])
    assert ia.prepared_stmts == [s1, s2]
    assert ia.stmts_by_hash == {s1h: s1, s2h: s2}
    assert ia.evs_by_stmt_hash == {s1h: [ev1], s2h: [ev2]}, ia.evs_by_stmt_hash
    assert ia.refinement_edges == {(s1h, s2h)}
    assert set(ia.refinements_graph.nodes()) == {s1h, s2h}
    assert set(ia.get_all_supporting_evidence(s1h)) == {ev1, ev2}
    assert set(ia.get_all_supporting_evidence(s2h)) == {ev2}
def test_incremental_assembler_add_statement_duplicate():
    ev3 = Evidence('eidos', text='3')
    s3 = Influence(e1, e2, ev3)
    s3h = s3.get_hash(matches_fun=location_matches_compositional)
    ia = IncrementalAssembler([s1, s2])
    delta = ia.add_statements([s3])
    assert not delta.new_stmts, delta.new_stmts
    assert delta.new_evidences == {s3h: [ev3]}, delta.new_evidences
    assert not delta.new_refinements, delta.new_refinements
    # TODO: test beliefs
    assert set(ia.get_all_supporting_evidence(s1h)) == {ev1, ev2, ev3}
    assert set(ia.get_all_supporting_evidence(s2h)) == {ev2}
def test_post_processing_new_stmts():
    stmts = copy.deepcopy([s1, s2])
    ia = IncrementalAssembler([stmts[0]])
    delta = ia.add_statements([stmts[1]])
    assert len(delta.new_stmts) == 1
    stmt = list(delta.new_stmts.values())[0]
    assert stmt.subj.concept.name == 'crop'

    # Check that we added annotations
    assert 'agents' in stmt.evidence[0].annotations
    assert stmt.evidence[0].annotations['agents'] == {
        'raw_text': ['some_text2', 'some_text2']
    }, stmt.evidence[0].annotations['agents']
def test_incremental_assembler_add_statement_new_refinement():
    ev4 = Evidence('eidos', text='4')
    s4 = Influence(e2, e4, ev4)
    s4h = s4.get_hash(matches_fun=location_matches_compositional)
    ia = IncrementalAssembler([s1, s2])
    delta = ia.add_statements([s4])
    assert delta.new_stmts, {s4h: s4}
    assert delta.new_evidences == {s4h: [ev4]}, delta.new_evidences
    assert delta.new_refinements == {(s1h, s4h), (s2h, s4h)}, \
        delta.new_refinements
    # TODO: test beliefs
    assert set(ia.get_all_supporting_evidence(s1h)) == {ev1, ev2, ev4}
    assert set(ia.get_all_supporting_evidence(s2h)) == {ev2, ev4}
    assert set(ia.get_all_supporting_evidence(s4h)) == {ev4}
def test_post_processing_all_stmts():
    stmts = copy.deepcopy([s1, s2])
    ia = IncrementalAssembler(stmts)
    stmts_out = ia.get_statements()
    # Check that we normalized concept names
    assert stmts_out[0].subj.concept.name == 'agriculture'
    # Check that we added flattened groundings
    flat_grounding = [{'grounding': 'wm/concept/agriculture',
                       'name': 'agriculture', 'score': 1.0}]
    assert stmts_out[0].subj.concept.db_refs['WM_FLAT'] == \
        flat_grounding, flat_grounding
    # Check that we added annotations
    assert 'agents' in stmts_out[0].evidence[0].annotations
    assert stmts_out[0].evidence[0].annotations['agents'] == {
        'raw_text': ['some_text1', 'some_text2']
    }, stmts_out[0].evidence[0].annotations['agents']
Ejemplo n.º 6
0
    def assemble(self):
        """Run assembly on the prepared statements.

        This function loads all the prepared statements associated with the
        corpus and then runs assembly on them.
        """
        all_stmts = []
        for record in self.dart_records:
            stmts = self.sc.db.get_statements_for_document(
                document_id=record['document_id'],
                reader=record['reader'],
                reader_version=record['reader_version'])
            all_stmts += stmts
        ia = IncrementalAssembler(all_stmts)
        self.assembled_stmts = ia.get_statements()
        self.metadata['num_statements'] = len(self.assembled_stmts)
def test_apply_grounding_curation():
    gr1 = [('theme1', 0.8), None, ('process', 0.7), None]
    gr2 = ['theme2', 'property2', None, None]
    cur = {
        "before": {"subj": {"factor": 'x',
                            "concept": gr1},
                   "obj": {"factor": 'y',
                           "concept": 'z'}},
        "after": {"subj": {"factor": 'x',
                           "concept": gr2},
                  "obj": {"factor": 'y',
                          "concept": 'z'}},
    }
    c1 = Concept('x', db_refs={'WM': [gr1]})
    stmt = Influence(Event(c1), Event('y'))
    IncrementalAssembler.apply_grounding_curation(stmt, cur)
    assert stmt.subj.concept.db_refs['WM'][0] == \
        [('theme2', 1.0), ('property2', 1.0), None, None]
Ejemplo n.º 8
0
    def assemble(self):
        """Run assembly on the prepared statements.

        This function loads all the prepared statements associated with the
        corpus and then runs assembly on them.
        """
        all_stmts = []
        logger.info('Loading statements from DB for %d records' %
                    len(self.dart_records))
        for record in tqdm.tqdm(self.dart_records):
            stmts = self.sc.db.get_statements_for_record(record['storage_key'])
            all_stmts += stmts
        logger.info('Instantiating incremental assembler with %d statements' %
                    len(all_stmts))
        ia = IncrementalAssembler(all_stmts)
        logger.info('Getting assembled statements')
        self.assembled_stmts = ia.get_statements()
        logger.info('Got %d assembled statements' % len(self.assembled_stmts))
        self.metadata['num_statements'] = len(self.assembled_stmts)
Ejemplo n.º 9
0
 def load_project(self, project_id, record_keys=None):
     # 1. Select records associated with project
     if record_keys is None:
         record_keys = self.db.get_records_for_project(project_id)
     # 2. Select statements from prepared stmts table
     prepared_stmts = []
     for record_key in record_keys:
         prepared_stmts += self.db.get_statements_for_record(record_key)
     # 3. Select curations for project
     curations = self.get_project_curations(project_id)
     # 4. Initiate an assembler
     assembler = IncrementalAssembler(prepared_stmts, curations=curations)
     self.assemblers[project_id] = assembler