Ejemplo n.º 1
0
def assemble_one_corpus():
    """For assembling one of the four corpora."""
    path = '/home/bmg16/data/wm/2-Jsonld'
    corpus_size = '16k'
    prefix = '%s%s' % (path, corpus_size)
    fnames = glob.glob('%s/*.jsonld' % prefix)

    # For large corpus
    all_statements = []
    for idx, fname in enumerate(fnames):
        ep = eidos.process_json_file(fname)
        for stmt in ep.statements:
            for ev in stmt.evidence:
                ev.annotations['provenance'][0]['document']['@id'] = \
                    os.path.basename(fname)

        all_statements += ep.statements
        print('%d: %d' % (idx, len(all_statements)))
    with open('%s/3-Indra%s.pkl' % (prefix, corpus_size), 'wb') as fh:
        pickle.dump(all_statements, fh)

    scorer = get_eidos_scorer()
    assembled_stmts = ac.run_preassembly(all_statements,
                                         belief_scorer=scorer,
                                         return_toplevel=False)

    jd = stmts_to_json(assembled_stmts, use_sbo=False)
    with open('%s/3-Indra%s.json' % (prefix, corpus_size), 'w') as fh:
        json.dump(jd, fh, indent=1)
Ejemplo n.º 2
0
 def process_args(self, args_json):
     for arg in args_json:
         if arg == 'stmt_type':
             args_json[arg] = get_statement_by_name(args_json[arg])
         elif arg in ['matches_fun', 'refinement_fun']:
             args_json[arg] = pipeline_functions[args_json[arg]]
         elif arg == 'curations':
             Curation = namedtuple(
                 'Curation', ['pa_hash', 'source_hash', 'tag'])
             args_json[arg] = [
                 Curation(cur['pa_hash'], cur['source_hash'], cur['tag'])
                 for cur in args_json[arg]]
         elif arg == 'belief_scorer':
             if args_json[arg] == 'wm':
                 args_json[arg] = get_eidos_scorer()
             else:
                 args_json[arg] = None
         elif arg == 'ontology':
             if args_json[arg] == 'wm':
                 args_json[arg] = world_ontology
             else:
                 args_json[arg] = bio_ontology
         elif arg == 'whitelist' or arg == 'mutations':
             args_json[arg] = {
                 gene: [tuple(mod) for mod in mods]
                 for gene, mods in args_json[arg].items()}
     return args_json
Ejemplo n.º 3
0
    def run_assembly(self):
        """Run INDRA's assembly pipeline on the Statements."""
        self.eliminate_copies()
        stmts = self.get_indra_stmts()
        stmts = self.filter_event_association(stmts)
        stmts = ac.filter_no_hypothesis(stmts)
        if not self.assembly_config.get('skip_map_grounding'):
            stmts = ac.map_grounding(stmts)
        if self.assembly_config.get('standardize_names'):
            ac.standardize_names_groundings(stmts)
        if self.assembly_config.get('filter_ungrounded'):
            score_threshold = self.assembly_config.get('score_threshold')
            stmts = ac.filter_grounded_only(stmts,
                                            score_threshold=score_threshold)
        if self.assembly_config.get('merge_groundings'):
            stmts = ac.merge_groundings(stmts)
        if self.assembly_config.get('merge_deltas'):
            stmts = ac.merge_deltas(stmts)
        relevance_policy = self.assembly_config.get('filter_relevance')
        if relevance_policy:
            stmts = self.filter_relevance(stmts, relevance_policy)
        if not self.assembly_config.get('skip_filter_human'):
            stmts = ac.filter_human_only(stmts)
        if not self.assembly_config.get('skip_map_sequence'):
            stmts = ac.map_sequence(stmts)
        # Use WM hierarchies and belief scorer for WM preassembly
        preassembly_mode = self.assembly_config.get('preassembly_mode')
        if preassembly_mode == 'wm':
            hierarchies = get_wm_hierarchies()
            belief_scorer = get_eidos_scorer()
            stmts = ac.run_preassembly(stmts,
                                       return_toplevel=False,
                                       belief_scorer=belief_scorer,
                                       hierarchies=hierarchies)
        else:
            stmts = ac.run_preassembly(stmts, return_toplevel=False)
        belief_cutoff = self.assembly_config.get('belief_cutoff')
        if belief_cutoff is not None:
            stmts = ac.filter_belief(stmts, belief_cutoff)
        stmts = ac.filter_top_level(stmts)

        if self.assembly_config.get('filter_direct'):
            stmts = ac.filter_direct(stmts)
            stmts = ac.filter_enzyme_kinase(stmts)
            stmts = ac.filter_mod_nokinase(stmts)
            stmts = ac.filter_transcription_factor(stmts)

        if self.assembly_config.get('mechanism_linking'):
            ml = MechLinker(stmts)
            ml.gather_explicit_activities()
            ml.reduce_activities()
            ml.gather_modifications()
            ml.reduce_modifications()
            ml.gather_explicit_activities()
            ml.replace_activations()
            ml.require_active_forms()
            stmts = ml.statements

        self.assembled_stmts = stmts
Ejemplo n.º 4
0
def test_wm_scorer():
    scorer = wm_scorer.get_eidos_scorer()
    stmt = Influence(Concept('a'), Concept('b'),
                     evidence=[Evidence(source_api='eidos')])
    # Make sure other sources are still in the map
    assert 'hume' in scorer.prior_probs['rand']
    assert 'biopax' in scorer.prior_probs['syst']
    engine = BeliefEngine(scorer)
    engine.set_prior_probs([stmt])
Ejemplo n.º 5
0
def test_wm_scorer():
    scorer = wm_scorer.get_eidos_scorer()
    stmt = Influence(Concept('a'),
                     Concept('b'),
                     evidence=[Evidence(source_api='eidos')])
    # Make sure other sources are still in the map
    assert 'hume' in scorer.prior_probs['rand']
    assert 'biopax' in scorer.prior_probs['syst']
    engine = BeliefEngine(scorer)
    engine.set_prior_probs([stmt])
Ejemplo n.º 6
0
def assemble_stmts(stmts):
    print('Running preassembly')
    hm = get_wm_hierarchies()
    scorer = get_eidos_scorer()
    stmts = ac.run_preassembly(stmts,
                               belief_scorer=scorer,
                               return_toplevel=True,
                               flatten_evidence=True,
                               flatten_evidence_collect_from='supported_by',
                               poolsize=2)
    return stmts
Ejemplo n.º 7
0
def default_assembly(stmts):
    from indra.belief.wm_scorer import get_eidos_scorer
    from indra.preassembler.hierarchy_manager import get_wm_hierarchies
    hm = get_wm_hierarchies()
    scorer = get_eidos_scorer()
    stmts = ac.run_preassembly(stmts, belief_scorer=scorer,
                               return_toplevel=True,
                               flatten_evidence=True,
                               flatten_evidence_collect_from='supported_by',
                               poolsize=4)
    stmts = ac.merge_groundings(stmts)
    stmts = ac.merge_deltas(stmts)
    stmts = ac.standardize_names_groundings(stmts)
    return stmts
Ejemplo n.º 8
0
def default_assembly(stmts):
    from indra.belief.wm_scorer import get_eidos_scorer
    from indra.preassembler.hierarchy_manager import get_wm_hierarchies
    hm = get_wm_hierarchies()
    scorer = get_eidos_scorer()
    stmts = ac.run_preassembly(stmts, belief_scorer=scorer,
                               return_toplevel=True,
                               flatten_evidence=True,
                               flatten_evidence_collect_from='supported_by',
                               poolsize=4)
    stmts = ac.merge_groundings(stmts)
    stmts = ac.merge_deltas(stmts)
    stmts = ac.standardize_names_groundings(stmts)
    return stmts
Ejemplo n.º 9
0
def test_readme_wm_pipeline():
    from indra.tools import assemble_corpus as ac
    from indra.belief.wm_scorer import get_eidos_scorer
    from indra.ontology.world import world_ontology
    stmts = wm_raw_stmts
    # stmts = ac.filter_grounded_only(stmts)  # Does not work on test stmts
    belief_scorer = get_eidos_scorer()
    stmts = ac.run_preassembly(stmts,
                               return_toplevel=False,
                               belief_scorer=belief_scorer,
                               ontology=world_ontology,
                               normalize_opposites=True,
                               normalize_ns='WM')
    stmts = ac.filter_belief(stmts, 0.8)    # Apply belief cutoff of e.g., 0.8
    assert stmts, 'Update example to yield statements list of non-zero length'
Ejemplo n.º 10
0
def run_preassembly():
    """Run preassembly on a list of INDRA Statements."""
    if request.method == 'OPTIONS':
        return {}
    response = request.body.read().decode('utf-8')
    body = json.loads(response)
    stmts_json = body.get('statements')
    stmts = stmts_from_json(stmts_json)
    scorer = body.get('scorer')
    return_toplevel = body.get('return_toplevel')
    if scorer == 'wm':
        belief_scorer = get_eidos_scorer()
    else:
        belief_scorer = None
    stmts_out = ac.run_preassembly(stmts, belief_scorer=belief_scorer,
                                   return_toplevel=return_toplevel)
    return _return_stmts(stmts_out)
Ejemplo n.º 11
0
               #'50': '/home/bmg16/Dropbox/postdoc/darpa/src/indra_apps/' + \
               #      'wm_fao/20181101/2-Jsonld50',
               '500': '/home/bmg16/Dropbox/postdoc/darpa/src/indra_apps/' + \
                      'wm_fao/20181101/2-Jsonld500',
                '16k': '/home/bmg16/data/wm/2-Jsonld16k',
                }
    all_statements = []
    for corpus_size, path in corpora.items():
        fnames = glob.glob('%s/*.jsonld' % path)
        for idx, fname in enumerate(fnames):
            ep = eidos.process_json_file(fname)
            for stmt in ep.statements:
                for ev in stmt.evidence:
                    ev.annotations['provenance'][0]['document']['@id'] = \
                        os.path.basename(fname)
                    ev.annotations['provenance'][0]['document']['corpus'] = \
                        corpus_size
            all_statements += ep.statements
            print('%d: %d' % (idx, len(all_statements)))

    scorer = get_eidos_scorer()
    assembled_stmts = ac.run_preassembly(all_statements,
                                         belief_scorer=scorer,
                                         return_toplevel=False)

    jd = stmts_to_json(assembled_stmts, use_sbo=False)
    with open('3-Indra-merged-500-16k.json', 'w') as fh:
        json.dump(jd, fh, indent=1)

#    assemble_all()
Ejemplo n.º 12
0
from indra.tools import assemble_corpus as ac
from indra.sources.eidos import migration_table_processor as mtp
from indra.preassembler.hierarchy_manager import YamlHierarchyManager
from indra.preassembler.make_eidos_hume_ontologies import load_yaml_from_url, \
    rdf_graph_from_yaml
from indra.preassembler.custom_preassembly import location_time_delta_matches, \
    location_time_delta_refinement
from indra.belief.wm_scorer import get_eidos_scorer


wm_ont_url = ('https://raw.githubusercontent.com/WorldModelers/'\
              'Ontologies/master/wm.yml')

if __name__ == '__main__':
    fname = 'grounded CAG links - New Ontology.xlsx'
    stmts = mtp.process_workbook(fname)
    hm = YamlHierarchyManager(load_yaml_from_url(wm_ont_url),
                              rdf_graph_from_yaml, True)
    stmts = ac.run_preassembly(stmts,
                               return_toplevel=False,
                               belief_score=get_eidos_scorer(),
                               hierarchies={'entity': hm},
                               matches_fun=location_time_delta_matches,
                               refinement_fun=location_time_delta_refinement)
    stmts = ac.standardize_names_groundings(stmts)