コード例 #1
0
def test_filter_by_curation():
    new_st1 = deepcopy(st1)
    new_ev = Evidence(text='a -> b', source_api='new')
    new_st1.evidence.append(new_ev)
    stmts_in = [new_st1, st2, st3]
    assert len(new_st1.evidence) == 2
    assert all(st.belief != 1 for st in stmts_in)
    Curation = namedtuple('Curation', ['pa_hash', 'source_hash', 'tag'])
    cur1 = Curation(new_st1.get_hash(), new_st1.evidence[0].get_source_hash(),
                    'grounding')
    cur2 = Curation(new_st1.get_hash(), new_st1.evidence[1].get_source_hash(),
                    'wrong_relation')
    cur3 = Curation(new_st1.get_hash(), new_st1.evidence[0].get_source_hash(),
                    'correct')
    cur4 = Curation(st2.get_hash(), st2.evidence[0].get_source_hash(),
                    'correct')
    # With 'any' policy it is enough to have one incorrect curation
    any_incorrect_one_cur = ac.filter_by_curation(stmts_in, [cur1], 'any')
    assert len(any_incorrect_one_cur) == 2
    assert new_st1 not in any_incorrect_one_cur
    # With 'all' policy all evidences have to be curated
    all_incorrect_one_cur = ac.filter_by_curation(stmts_in, [cur1], 'all')
    assert len(all_incorrect_one_cur) == 3, len(all_incorrect_one_cur)
    assert new_st1 in all_incorrect_one_cur
    all_incorrect_two_cur = ac.filter_by_curation(stmts_in, [cur1, cur2],
                                                  'all')
    assert len(all_incorrect_two_cur) == 2
    assert new_st1 not in all_incorrect_two_cur
    # Correct curation cancels out incorrect
    assert len(new_st1.evidence) == 2
    correct_incorrect = ac.filter_by_curation(stmts_in,
                                              [cur1, cur2, cur3, cur4],
                                              'all',
                                              update_belief=False)
    assert len(correct_incorrect) == 3, len(correct_incorrect)
    assert new_st1 in correct_incorrect
    # new_st1.evidence[1] should be filtered out because there's only incorrect
    # curation(cur2), new_st1.evidence[0] stays because correct cancels out
    # incorrect (cur1, cur3)
    assert len(new_st1.evidence) == 1
    assert new_st1.evidence[0].source_api == 'assertion'
    assert all(st.belief != 1 for st in correct_incorrect)
    # Optionally update belief to 1 for correct curation
    new_belief = ac.filter_by_curation(stmts_in, [cur1, cur2, cur3, cur4],
                                       'all',
                                       update_belief=True)
    assert new_belief[0].belief == 1
    assert new_belief[1].belief == 1
    assert new_belief[2].belief == 0.7
コード例 #2
0
def get_statements(target):
    #tas_stmts = get_tas_stmts(target)
    db_stmts = get_db_stmts(target)
    stmts = db_stmts
    #stmts = tas_stmts + db_stmts
    stmts = filter_misgrounding(target, stmts)
    stmts = ac.run_preassembly(stmts)
    stmts = ac.filter_by_curation(stmts, db_curations)
    stmts = filter_neg(stmts)
    return stmts
コード例 #3
0
    def _filter_stmts(self, stmts):
        """This is an internal function that is applied to filter statements.

        In general, this does nothing, but some sub classes may want to limit
        the statements that are presented. This is applied to both the complete
        statements list (retrieved by `get_statements`) and the sample (gotten
        through `get_sample`).
        """
        stmts = filter_by_curation(stmts, curations=curs)
        return stmts
コード例 #4
0
def get_statements(target):
    tas_stmts = get_tas_stmts(target)
    db_stmts = get_db_stmts(target)
    stmts = filter_misgrounding(target, tas_stmts + db_stmts)
    stmts = ac.run_preassembly(stmts)
    stmts = ac.filter_by_curation(stmts, db_curations)

    ev_counts = {s.get_hash(): len(s.evidence) for s in stmts}
    source_counts = {}
    for stmt in stmts:
        stmt_source_counts = get_source_counts_dict()
        for ev in stmt.evidence:
            stmt_source_counts[ev.source_api] += 1
        source_counts[stmt.get_hash()] = stmt_source_counts
    return stmts, ev_counts, source_counts
コード例 #5
0
def assemble_statements(kinase, stmts, curs):
    """Run assembly steps on statements."""
    # Remove unary statements and ones with many agents
    stmts = [stmt for stmt in stmts if (1 < len(stmt.real_agent_list()) < 4)]
    stmts = replace_ctd(stmts, ctd_stmts_by_gene.get(kinase, []))
    # We do this at this point to make sure we capture the original DB
    # hashes before modifying statements to allow lookup
    for stmt in stmts:
        for ev in stmt.evidence:
            ev.annotations['prior_hash'] = stmt.get_hash()
    stmts = fix_invalidities(stmts)
    stmts = ac.filter_grounded_only(stmts)
    stmts = ac.filter_human_only(stmts)
    stmts = ac.filter_by_curation(stmts, curations=curs)
    stmts = unify_lspci(stmts)
    stmts = remove_contradictions(stmts)
    # Rename chemicals
    logger.info('Renaming chemicals')
    for stmt in stmts:
        for agent in stmt.real_agent_list():
            if agent.db_refs.get('CHEBI') and len(agent.name) > 25:
                rename_chemical(agent)
    # Remove long names
    logger.info('Removing statements with long names')
    stmts = [
        stmt for stmt in stmts if all(
            len(a.name) < 20 for a in stmt.real_agent_list())
    ]
    logger.info('%d statements remaining' % len(stmts))
    # Remove microRNAs
    logger.info('Removing microRNA statements')
    stmts = [
        stmt for stmt in stmts
        if not any('miR' in a.name for a in stmt.real_agent_list())
    ]
    logger.info('%d statements remaining' % len(stmts))
    stmts = add_source_urls(stmts)
    with open('data/assembled/%s.pkl' % kinase, 'wb') as fh:
        pickle.dump(stmts, fh)
    return stmts
コード例 #6
0
def filter_incorrect_curations(stmts):
    # Filter incorrect curations
    indra_op_filtered = ac.filter_by_curation(stmts, curations=db_curations)
    return indra_op_filtered
コード例 #7
0
        gilda_obj = gilda.ground(obj)
        gilda_obj = gilda_obj[0].term.entry_name if gilda_obj else 'NA'

        normalized_df.append({
            'Subject': subj,
            'Normalized subject': gilda_subj,
            'Object': obj,
            'Normalized object': gilda_obj
        })
        # Downloading statements using INDRA REST API
        idrp = idr.get_statements(subject=gilda_subj, object=gilda_obj)
        stmts = stmts + idrp.statements

    # Filtering out the indirect INDRA statements
    #indra_stmts = ac.filter_direct(stmts)
    indra_stmts = ac.run_preassembly(stmts, run_refinement=False)
    indra_filtered = ac.filter_by_curation(indra_stmts, curations=db_curations)

    indra_op_filtered = filter_complex_statements(indra_filtered, subj_set,
                                                  obj_set)

    indra_op_filtered = ac.run_preassembly(indra_op_filtered,
                                           run_refinement=False)

    html_assembler(indra_op_filtered,
                   os.path.join(OUTPUT, file + '_indra_report.html'))

    normalized_df = pd.DataFrame(normalized_df)
    normalized_df.to_csv(
        os.path.join(INPUT, file, file + '_normalized_names.csv'))
コード例 #8
0
    indra_db_stmts = list(stmts_by_hash.values())

    # Filtering out the indirect INDRA statements
    indra_db_stmts = ac.filter_direct(indra_db_stmts)

    # Fetch omnipath database biomolecular interactions and
    # process them into INDRA statements
    op = process_from_web()

    # Filter statements which are not ligands/receptors from
    # OmniPath database
    op_filtered = filter_op_stmts(op.statements, full_ligand_set,
                                  receptor_genes_go)
    op_filtered = ac.filter_direct(op_filtered)

    op_filtered = ac.filter_by_curation(op_filtered, curations=db_curations)

    # Merge omnipath/INDRA statements and run assembly
    indra_op_stmts = ac.run_preassembly(indra_db_stmts + op_filtered,
                                        run_refinement=False)
    # Filter incorrect curations
    indra_op_filtered = filter_incorrect_curations(indra_op_stmts)

    # Filter complex statements
    indra_op_filtered = filter_complex_statements(indra_op_filtered,
                                                  full_ligand_set,
                                                  receptor_genes_go)

    # We do this again because when removing complex members, we
    # end up with more duplicates
    indra_op_filtered = ac.run_preassembly(indra_op_filtered,
コード例 #9
0
    with open('../../grounding_map.json', 'r') as fh:
        grounding_map = json.load(fh)
    #####################

    # Querying for and assembling statements
    all_stmts = []
    for db_ns, db_id, name in groundings:
        if db_id in black_list:
            print('Skipping %s in black list' % name)
            continue
        print('Looking up %s' % name)
        db_stmts = get_db_stmts_by_grounding(db_ns, db_id)
        tas_stmts = get_tas_stmts(db_ns, db_id) if db_ns == 'HGNC' else []
        stmts = db_stmts + tas_stmts
        smts = ac.filter_by_curation(stmts, db_curations)
        stmts = reground_stmts(stmts, grounding_map, misgrounding_map)
        all_stmts += stmts
    all_stmts = make_unique_hashes(all_stmts)
    all_stmts = ac.run_preassembly(all_stmts)
    ########################################

    # Dunp results
    with open('disease_map_indra_stmts_full.pkl', 'wb') as fh:
        pickle.dump(all_stmts, fh)

    stmts_to_json_file(all_stmts, 'disease_map_indra_stmts_full.json')

    filtered_stmts = filter_prior_all(all_stmts, groundings)
    with open('disease_map_indra_stmts_filtered.pkl', 'wb') as fh:
        pickle.dump(filtered_stmts, fh)