def test_null_request(): try: dbr.get_statements() except ValueError: return except BaseException as e: assert False, "Raised wrong exception: " + str(e) assert False, "Null request did not raise any exception."
def get_from_source(entity, verb=None): dbn, dbi = get_grounding_from_name(entity) key = '%s@%s' % (dbi, dbn) if not verb or verb not in mod_map: stmts = indra_db_rest.get_statements(subject=key) else: stmt_type = mod_map[verb] stmts = indra_db_rest.get_statements(subject=key, stmt_type=stmt_type) return stmts
def test_famplex_namespace(): stmts = dbr.get_statements('PDGF@FPLX', 'FOS', stmt_type='IncreaseAmount') print(len(stmts)) assert all([s.agent_list()[0].db_refs.get('FPLX') == 'PDGF' for s in stmts]),\ 'Not all subjects match.' assert all([s.agent_list()[1].name == 'FOS' for s in stmts]),\ 'Not all objects match.'
def __check_request(seconds, *args, **kwargs): now = datetime.now() stmts = dbr.get_statements(*args, **kwargs) assert stmts, "Got no statements." time_taken = datetime.now() - now assert time_taken.seconds < seconds, time_taken.seconds return stmts
def _make_processor(self): """This method is overwritten to prevent excessive queries. Given N entities, the odds of finding common neighbors among them all decrease rapidly as N increases. Thus, if N is 100, you will likely run out of common neighbors after only a few queries. This implementation takes advantage of that fact, thus preventing hangs in essentially trivial cases with large N. """ # Prep the settings with some defaults. kwargs = self.query.settings.copy() if 'max_stmts' not in kwargs.keys(): kwargs['max_stmts'] = 100 if 'ev_limit' not in kwargs.keys(): kwargs['ev_limit'] = 2 if 'persist' not in kwargs.keys(): kwargs['persist'] = False # Run multiple queries, building up a single processor and a dict of # agents held in common. processor = None for ag, ag_key in zip(self.query.agents, self.query.agent_keys): if ag_key is None: continue # Make another query. kwargs[self._role.lower()] = ag_key new_processor = idbr.get_statements(**kwargs) new_processor.wait_until_done() # Look for new agents. for other_ag, stmt in self._iter_stmts(new_processor.statements): if other_ag is None or 'HGNC' not in other_ag.db_refs.keys(): continue other_id = other_ag.name # If this is the first pass, init the dict. if processor is None and other_id not in self.commons.keys(): self.commons[other_id] = {ag.name: [stmt]} # Otherwise we can only add to the sub-dicts and their lists. elif other_id in self.commons.keys(): if ag.name not in self.commons[other_id].keys(): self.commons[other_id][ag.name] = [] self.commons[other_id][ag.name].append(stmt) # If this isn't the first time around, remove all entries that # didn't find match this time around. if processor is None: processor = new_processor else: self.commons = {other_id: data for other_id, data in self.commons.items() if ag.name in data.keys()} processor.merge_results(new_processor) # If there's nothing left in common, it won't get better. if not self.commons: break return processor
def test_regulate_amount(): stmts = dbr.get_statements('FOS', stmt_type='RegulateAmount', simple_response=True) print(len(stmts)) stmt_types = {type(s).__name__ for s in stmts} print(stmt_types) assert {'IncreaseAmount', 'DecreaseAmount'}.issubset(stmt_types), stmt_types
def machine(manager: Manager, agents: List[str], local: bool, host: str): """Get content from the INDRA machine and upload to BEL Commons.""" from indra.sources import indra_db_rest from pybel import from_indra_statements statements = indra_db_rest.get_statements(agents=agents) click.echo('got {} statements from INDRA'.format(len(statements))) graph = from_indra_statements( statements, name='INDRA Machine for {}'.format(', '.join(sorted(agents))), version=time.strftime('%Y%m%d'), ) click.echo('built BEL graph with {} nodes and {} edges'.format( graph.number_of_nodes(), graph.number_of_edges())) if 0 == len(graph): click.echo('not uploading empty graph') sys.exit(-1) if local: to_database(graph, manager=manager) else: resp = to_web(graph, host=host) resp.raise_for_status()
def _get_matching_stmts(stmt_ref): # Filter by statement type. stmt_type = stmt_ref.__class__.__name__ agent_name_list = [ _get_agent_ref(ag) for ag in stmt_ref.agent_list() ] non_binary_statements = (Complex, SelfModification, ActiveForm) # TODO: We should look at more than just the agent name. # Doing so efficiently may require changes to the web api. if isinstance(stmt_ref, non_binary_statements): agent_list = [ ag_name for ag_name in agent_name_list if ag_name is not None ] kwargs = {} else: agent_list = [] kwargs = { k: v for k, v in zip(['subject', 'object'], agent_name_list) } if not any(kwargs.values()): return [] print(agent_list) ip = get_statements(agents=agent_list, stmt_type=stmt_type, **kwargs) return ip.statements
def get_db_stmts_by_grounding(db_ns, db_id): ip = indra_db_rest.get_statements(agents=['%s@%s' % (db_id, db_ns)], ev_limit=100, max_stmts=5000) stmts = filter_out_source_evidence(ip.statements, {'medscan', 'tas'}) print('%d statements for %s:%s' % (len(stmts), db_ns, db_id)) return stmts
def get_drug_statements(groundings): all_stmts = {} for db_ns, db_id in groundings: print('Searching for %s@%s' % (db_id, db_ns)) idp = indra_db_rest.get_statements(subject='%s@%s' % (db_id, db_ns), ev_limit=100) stmts = idp.statements stmts = ac.filter_by_type(stmts, Inhibition) + \ ac.filter_by_type(stmts, Complex) new_stmts = [] for stmt in stmts: new_ev = [] for ev in stmt.evidence: if ev.source_api != 'medscan': new_ev.append(ev) if not new_ev: continue stmt.evidence = new_ev new_stmts.append(stmt) for stmt in new_stmts: all_stmts[stmt.get_hash()] = stmt stmts = list(all_stmts.values()) stmts = filter_db_support(stmts) stmts = fix_invalid(stmts) return stmts
def get_binary_directed(entity1, entity2, verb=None): dbn, dbi = get_grounding_from_name(entity1) key1 = '%s@%s' % (dbi, dbn) dbn, dbi = get_grounding_from_name(entity2) key2 = '%s@%s' % (dbi, dbn) print(key1, key2) if not verb or verb not in mod_map: stmts = indra_db_rest.get_statements(subject=key1, object=key2) elif verb in mod_map: stmt_type = mod_map[verb] stmts = indra_db_rest.get_statements(subject=key1, object=key2, stmt_type=stmt_type) print(len(stmts)) return stmts
def suggest_statements(self, u_stmt, v_stmt, num_statements=5): def query_agent(ag): ns = None if 'HGNC' in ag.db_refs: (ns, id) = ('HGNC', ag.db_refs['HGNC']) elif 'FPLX' in ag.db_refs: (ns, id) = ('FPLX', ag.db_refs['FPLX']) elif 'CHEBI' in ag.db_refs: (ns, id) = ('CHEBI', ag.db_refs['CHEBI']) ag_str = ('%s@%s' % (id, ns)) if ns else None return ag_str subj_agent = u_stmt.agent_list()[1] obj_agent = v_stmt.agent_list()[0] subj = query_agent(subj_agent) obj = query_agent(obj_agent) if not (subj and obj): return [] ip = get_statements(subject=subj, object=obj, persist=False, ev_limit=10) stmts = ip.statements stmts.sort(key=lambda s: len(s.evidence), reverse=True) end_ix = len(stmts) if len(stmts) < num_statements else num_statements return stmts[0:end_ix], subj_agent, obj_agent
def test_sort_by_ev_count(): p = dbr.get_statements(object="MEK", stmt_type="Inhibition", sort_by='ev_count', limit=10, ev_limit=None) assert p.statements counts = [p.get_ev_count(s) for s in p.statements] assert counts == sorted(counts, reverse=True),\ f"Counts mis-ordered!\ncounts: {counts}\nev_counts: {p.get_ev_counts()}"
def __check_request(seconds, *args, **kwargs): check_stmts = kwargs.pop('check_stmts', True) now = datetime.now() resp = dbr.get_statements(*args, **kwargs) time_taken = datetime.now() - now if check_stmts: assert resp.statements, "Got no statements." return resp
def test_sort_by_belief(): p = dbr.get_statements(object="MEK", stmt_type="Inhibition", sort_by='belief', limit=10) assert p.statements beliefs = [s.belief for s in p.statements] assert beliefs == sorted(beliefs, reverse=True), \ f"Beliefs mis-ordered!\nbeliefs: {beliefs}\n" \ f"belief_dict: {p.get_belief_scores()}"
def test_timeout_no_persist_agent(): candidates = ['TP53', 'NFkappaB@FPLX', 'AKT@FPLX'] agent = random.choice(candidates) print(agent) resp = dbr.get_statements(agents=[agent], persist=False, timeout=0) assert resp.is_working(), "Lookup resolved too fast." resp.wait_until_done(70) assert len(resp.statements) > 0.9*EXPECTED_BATCH_SIZE, len(resp.statements)
def test_timeout_no_persist_agent(): candidates = ['TP53', 'NFkappaB@FPLX', 'AKT@FPLX'] agent = random.choice(candidates) print(agent) resp = dbr.get_statements(agents=[agent], persist=False, timeout=0) assert resp.is_working(), "Lookup resolved too fast." resp.wait_until_done(70) assert len(resp.statements) == EXPECTED_BATCH_SIZE, len(resp.statements)
def test_famplex_namespace(): stmts = dbr.get_statements('PDGF@FPLX', 'FOS', stmt_type='IncreaseAmount', simple_response=True) print(len(stmts)) assert all([s.agent_list()[0].db_refs.get('FPLX') == 'PDGF' for s in stmts]),\ 'Not all subjects match.' assert all([s.agent_list()[1].name == 'FOS' for s in stmts]),\ 'Not all objects match.'
def test_regulate_amount(): idbp = dbr.get_statements('FOS', stmt_type='RegulateAmount') stmts = idbp.statements stmt_types = {type(s).__name__ for s in stmts} counts = idbp.get_source_counts() one_key = list(counts.keys())[0] assert counts[one_key] == idbp.get_source_count_by_hash(one_key) assert {'IncreaseAmount', 'DecreaseAmount'}.issubset(stmt_types), \ stmt_types
def get_binary_undirected(entity1, entity2): dbn, dbi = get_grounding_from_name(entity1) key1 = '%s@%s' % (dbi, dbn) dbn, dbi = get_grounding_from_name(entity2) key2 = '%s@%s' % (dbi, dbn) print(key1, key2) stmts = indra_db_rest.get_statements(agents=[key1, key2]) print(len(stmts)) return stmts
def get_db_stmts(target): ip = indra_db_rest.get_statements(object=target, stmt_type='Inhibition', ev_limit=10000) print('Number of statements from DB: %s' % len(ip.statements)) db_stmts = [s for s in ip.statements if is_small_molecule(s.subj)] db_stmts = filter_out_source_evidence(db_stmts, {'tas', 'medscan'}) return db_stmts
def _make_processor(self): """Create an instance a indra_db_rest processor. This method makes use of the `query` attribute. """ if not self.query.verb: processor = \ idbr.get_statements(subject=self.query.subj_key, object=self.query.obj_key, agents=self.query.agent_keys, **self.query.settings) else: processor = \ idbr.get_statements(subject=self.query.subj_key, object=self.query.obj_key, agents=self.query.agent_keys, stmt_type=self.query.stmt_type, **self.query.settings) return processor
def get_statements_for_kinase_db_api(kinase): logger.info('Getting statements for %s' % kinase) hgnc_id = hgnc_client.get_current_hgnc_id(kinase) if hgnc_id is None: logger.warning('Could not get HGNC ID for %s' % kinase) return None ip = get_statements(agents=['%s@HGNC' % hgnc_id], ev_limit=10000) stmts = filter_out_medscan(ip.statements) stmts = sorted(stmts, key=lambda x: len(x.evidence), reverse=True) return stmts
def test_get_statements_strict_stop_short(): start = datetime.now() p = dbr.get_statements("TNF", timeout=1, strict_stop=True) end = datetime.now() sleep(5) assert not p.is_working() dt = (end - start).total_seconds() assert 1 <= dt < 1.5, dt assert not p.statements assert not p.statements_sample
def test_get_statements_strict_stop_long(): timeout = 31 start = datetime.now() p = dbr.get_statements("TNF", timeout=timeout, strict_stop=True) end = datetime.now() sleep(5) assert not p.is_working() dt = (end - start).total_seconds() assert timeout <= dt < (timeout + 0.5), dt assert p.statements
def get_channel_statements(channels, ev_limit=100): """Get all statements from the database for a list of gene symbols.""" all_statements = {} for channel in channels: idbp = get_statements(agents=[channel], ev_limit=ev_limit, best_first=False) source_counts = idbp.get_source_counts() stmts = filter_out_medscan(idbp.statements, source_counts) all_statements[channel] = stmts return all_statements
def test_timeout_no_persist_type_object(): candidates = ['TP53', 'NFkappaB@FPLX', 'AKT@FPLX'] agent = random.choice(candidates) print(agent) resp = dbr.get_statements(stmt_type='phosphorylation', object=agent, persist=False, timeout=0) assert resp.is_working(), "Lookup resolved too fast." resp.wait_until_done(70) assert len(resp.statements) == 1000, len(resp.statements)
def __check_request(seconds, *args, **kwargs): check_stmts = kwargs.pop('check_stmts', True) now = datetime.now() resp = dbr.get_statements(*args, **kwargs) time_taken = datetime.now() - now if check_stmts: if kwargs.get('simple_response', True): stmts = resp else: stmts = resp.statements assert stmts, "Got no statements." assert time_taken.seconds < seconds, time_taken.seconds return resp
def _get_tas_stmts(self, drug_term=None, target_term=None): timeout = 10 drug = _convert_term(drug_term) target = _convert_term(target_term) processor = get_statements(subject=drug, object=target, stmt_type='Inhibition', timeout=timeout) if processor.is_working(): msg = ("Database has failed to respond after %d seconds looking " "up %s inhibits %s." % (timeout, drug, target)) logger.error(msg) raise DatabaseTimeoutError(msg) return (s for s in processor.statements if any(ev.source_api == 'tas' for ev in s.evidence))
def __check_request(seconds, *args, **kwargs): check_stmts = kwargs.pop('check_stmts', True) simple_response = kwargs.pop('simple_response', True) kwargs['simple_response'] = simple_response now = datetime.now() resp = dbr.get_statements(*args, **kwargs) time_taken = datetime.now() - now if check_stmts: if kwargs.get('simple_response', True): stmts = resp else: stmts = resp.statements assert stmts, "Got no statements." assert time_taken.seconds < seconds, time_taken.seconds return resp
def test_get_statements_end_on_limit(): p = dbr.get_statements(subject="TNF", limit=1400, timeout=1) try: t = 0 violations = 0 violations_allowed = 3 while p.is_working(): assert t < 100 limit = p._get_next_limit() if limit == 0 and p.is_working(): violations += 1 assert violations <= violations_allowed sleep(1) t += 1 finally: p.cancel() p.wait_until_done()
def get_db_stmts(target): ip = indra_db_rest.get_statements(object=target, stmt_type='Inhibition', ev_limit=10000) print('Number of statements from DB: %s' % len(ip.statements)) db_stmts = [s for s in ip.statements if is_small_molecule(s.subj)] db_stmts = filter_out_source_evidence(db_stmts, {'tas', 'medscan'}) for stmt in db_stmts: for agent in stmt.agent_list(): if agent is not None and 'CHEBI' in agent.db_refs: chebi_id = agent.db_refs['CHEBI'] if not chebi_id.startswith('CHEBI'): agent.db_refs['CHEBI'] = 'CHEBI:%s' % chebi_id return db_stmts
def get_statements(**kwargs): # We first run the actual query and ask for a non-simple response res = indra_db_rest.get_statements(simple_response=False, **kwargs) # We get a dict of stmts keyed by stmt hashes hash_stmts_dict = res.get_hash_statements_dict() # From this we can get a dict of evidence totals fore ach stmt ev_totals = { int(stmt_hash): res.get_ev_count_by_hash(stmt_hash) for stmt_hash, stmt in hash_stmts_dict.items() } # We now sort the statements by most to least evidence by looking at # the evidence totals sorted_stmts = [ it[1] for it in sorted(hash_stmts_dict.items(), key=lambda x: ev_totals.get(int(x[0]), 0), reverse=True) ] return sorted_stmts, ev_totals
def _get_matching_stmts(stmt_ref): if not CAN_CHECK_STATEMENTS: return [] # Filter by statement type. stmt_type = stmt_ref.__class__.__name__ agent_name_list = [_get_agent_ref(ag) for ag in stmt_ref.agent_list()] non_binary_statements = [Complex, SelfModification, ActiveForm] kwargs = {} if any([isinstance(stmt_ref, tp) for tp in non_binary_statements]): kwargs['agents'] = [ag_name for ag_name in agent_name_list if ag_name is not None] else: kwargs = {k: v for k, v in zip(['subject', 'object'], agent_name_list)} if not any(kwargs.values()): raise BioagentException('Either subject or object must be ' 'something other than None.') kwargs['ev_limit'] = 2 kwargs['persist'] = False return get_statements(stmt_type=stmt_type, **kwargs)
def suggest_statements(self, u_stmt, v_stmt, num_statements=5): def query_agent(ag): ns = None if 'HGNC' in ag.db_refs: (ns, id) = ('HGNC', ag.db_refs['HGNC']) elif 'FPLX' in ag.db_refs: (ns, id) = ('FPLX', ag.db_refs['FPLX']) elif 'CHEBI' in ag.db_refs: (ns, id) = ('CHEBI', ag.db_refs['CHEBI']) ag_str = ('%s@%s' % (id, ns)) if ns else None return ag_str subj = query_agent(u_stmt.agent_list()[1]) obj = query_agent(v_stmt.agent_list()[0]) if not (subj and obj): return [] stmts = get_statements(subject=subj, object=obj, persist=False, ev_limit=10, simple_response=True) stmts.sort(key=lambda s: len(s.evidence), reverse=True) end_ix = len(stmts) if len(stmts) < num_statements else num_statements return stmts[0:end_ix]
def _get_matching_stmts(stmt_ref): if not CAN_CHECK_STATEMENTS: return [] # Filter by statement type. stmt_type = stmt_ref.__class__.__name__ agent_name_list = [_get_agent_ref(ag) for ag in stmt_ref.agent_list()] non_binary_statements = [Complex, SelfModification, ActiveForm] kwargs = {} if any([isinstance(stmt_ref, tp) for tp in non_binary_statements]): kwargs['agents'] = [ag_name for ag_name in agent_name_list if ag_name is not None] else: kwargs = {k: v for k, v in zip(['subject', 'object'], agent_name_list)} if not any(kwargs.values()): raise BioagentException('Either subject or object must be ' 'something other than None.') kwargs['ev_limit'] = 2 kwargs['persist'] = False kwargs['simple_response'] = True return get_statements(stmt_type=stmt_type, **kwargs)
def _get_matching_stmts(stmt_ref): # Filter by statement type. stmt_type = stmt_ref.__class__.__name__ agent_name_list = [_get_agent_ref(ag) for ag in stmt_ref.agent_list()] non_binary_statements = (Complex, SelfModification, ActiveForm) # TODO: We should look at more than just the agent name. # Doing so efficiently may require changes to the web api. if isinstance(stmt_ref, non_binary_statements): agent_list = [ag_name for ag_name in agent_name_list if ag_name is not None] kwargs = {} else: agent_list = [] kwargs = {k: v for k, v in zip(['subject', 'object'], agent_name_list)} if not any(kwargs.values()): return [] print(agent_list) stmts = get_statements(agents=agent_list, stmt_type=stmt_type, simple_response=True, **kwargs) return stmts
def is_mutation_activating(gene_name, mutation_status): """Checks the database for whether a mutation is activating. Parameters ---------- agent_name: str The name of the gene mutation_status: list<str> Mutations Returns ------- is_activing: bool True if the database says the mutation is activating, False if the database says the mutation is inhibiting, None if the database doesn't say """ statements = get_statements(agents=[gene_name], stmt_type='ActiveForm', simple_response=True) print('ActiveForm statements involving', gene_name) for statement in statements: print(statement)