def get_agent_from_refs(db_refs): """Get an agent given its db_refs.""" name = get_standard_name(db_refs) if not name: name = db_refs.get('TEXT') if name and db_refs: return Agent(name, db_refs=db_refs)
def get_raw_statement_text_grounding_counts(stmts): texts = [] ev_text_for_agent_text = {} for stmt in stmts: for agent in stmt.agent_list(): if agent is None: continue if stmt.evidence[0].source_api == 'eidos': txt = agent.db_refs['TEXT_NORM'] else: txt = agent.db_refs['TEXT'] ev_text_for_agent_text[txt] = (stmt.evidence[0].pmid, stmt.evidence[0].text) assert txt, agent.db_refs gr = agent.get_grounding() standard_name = get_standard_name(*gr) if gr[0] else '' url = get_identifiers_url(*gr) if gr[0] is not None else '' gilda_grounding = gilda.ground(txt, context=stmt.evidence[0].text) gilda_grounding = '%s:%s' % (gilda_grounding[0].term.db, gilda_grounding[0].term.id) \ if gilda_grounding else '' texts.append((txt, ('%s:%s' % gr) if gr[0] else '', standard_name, url, gilda_grounding)) cnt = Counter(texts) return cnt, ev_text_for_agent_text
def get_eidos_gilda_grounding_counts(stmts): """Return normalized text counts (name in case of Eidos concepts) and evidence texts corresponding to each agent text.""" texts = [] ev_text_for_agent_text = {} for stmt in stmts: for agent in stmt.agent_list(): txt = agent.name matches = gilda.ground(txt) if matches: gr = matches[0].term.db, matches[0].term.id else: gr = None, None standard_name = get_standard_name(*gr) \ if gr[0] is not None else '' url = get_identifiers_url(*gr) if gr[0] is not None else '' ev_text_for_agent_text[txt] = (stmt.evidence[0].pmid, stmt.evidence[0].text) texts.append( (txt, ('%s:%s' % gr) if gr[0] else '', standard_name, url, '')) # Count the unique text-grounding entries cnt = Counter(texts) return cnt, ev_text_for_agent_text
db_refs[db_ns] = db_id db_refs = standardize_db_refs(db_refs) return db_refs def get_prioritized_db_refs_key(db_refs): for db_ns in default_ns_order: db_id = db_refs.get(db_ns) if db_id: return db_ns, db_id return None, None def get_unique_prioritized_keys(map_name=default_map_name): valid_element_refs = get_all_valid_element_refs(map_name) db_refs = [indra_db_refs_from_minerva_refs(refs) for refs in valid_element_refs] prioritized_keys = [get_prioritized_db_refs_key(db_ref) for db_ref in db_refs] unique_prioritized_keys = {key for key in prioritized_keys if key[0] is not None} return unique_prioritized_keys if __name__ == '__main__': keys = get_unique_prioritized_keys(default_map_name) with open('minerva_disease_map_indra_ids.csv', 'w') as fh: for db_ns, db_id in sorted(keys): name = get_standard_name({db_ns: db_id}) fh.write('%s,%s,%s\n' % (db_ns, db_id, name))