def test_num_evidence(): ro = get_ro('primary') q = HasNumEvidence(tuple(range(5, 10))) res = q.get_statements(ro, limit=5, ev_limit=8) assert all(5 <= n < 10 for n in res.evidence_counts.values()) stmts = res.statements() assert all(5 < len(s.evidence) <= 8 for s in stmts)
def dump(self, continuing=False): if self.use_principal: ro = get_db(self.db_label) else: ro = get_ro(self.db_label) s3_path = self.get_s3_path() dump_sif(s3_path, ro=ro)
def test_evidence_filtering_trios(): ro = get_ro('primary') q1 = HasAgent('TP53') q_list = [ ~HasOnlySource('medscan'), HasSources(['reach', 'sparser']), HasDatabases(), HasReadings(), FromMeshIds(['D001943']) ] for q2, q3, q4 in combinations(q_list, 3): query = q1 | q2 | q3 | q4 ev_filter = q2.ev_filter() & q3.ev_filter() & q4.ev_filter() query.get_statements(ro, limit=2, ev_limit=5, evidence_filter=ev_filter) ev_filter = q2.ev_filter() | q3.ev_filter() | q4.ev_filter() query.get_statements(ro, limit=2, ev_limit=5, evidence_filter=ev_filter) for q2, q3, q4 in permutations(q_list, 3): query = q1 | q2 | q3 | q4 ev_filter = q2.ev_filter() & q3.ev_filter() | q4.ev_filter() query.get_statements(ro, limit=2, ev_limit=5, evidence_filter=ev_filter)
def test_evidence_bounds_basic_case(): ro = get_ro("primary") query = HasAgent('MEK') & HasAgent('ERK') \ & HasEvidenceBound(["<= 10", " > 5"]) res = query.get_hashes(ro) assert all(5 < c <= 10 for c in res.evidence_counts.values())
def test_has_hash(): ro = get_ro('primary') hashes = {h for h, in ro.session.query(ro.SourceMeta.mk_hash).limit(10)} q = HasHash(hashes) res = q.get_statements(ro, limit=5, ev_limit=8) assert set(res.results.keys()) < hashes assert set(res.results.keys()) == set(res.source_counts.keys())
def test_has_agent_namespace_only(): ro = get_ro('primary') query = HasAgent(namespace='HGNC') & HasType(["Inhibition"]) res = query.get_statements(ro, limit=100) stmts = res.statements() assert len(stmts) == 100 assert all( any('HGNC' in ag.db_refs for ag in s.agent_list()) for s in stmts)
def load_readonly_dump(db_label, ro_label, dump_file): principal_db = get_db(db_label) readonly_db = get_ro(ro_label) logger.info("Using dump_file = \"%s\"." % dump_file) logger.info("%s - Beginning upload of content (est. ~30 minutes)" % datetime.now()) with ReadonlyTransferEnv(principal_db, readonly_db): readonly_db.load_dump(dump_file)
def test_evidence_bounds_null_cases(): ro = get_ro('primary') query = HasAgent('MEK') & HasEvidenceBound(["< 1"]) res = query.get_hashes(ro, limit=10) assert len(res.results) == 0 query = HasAgent('MEK') & HasEvidenceBound(["== 0"]) res = query.get_hashes(ro, limit=10) assert len(res.results) == 0
def test_get_agents(): ro = get_ro('primary') query = HasAgent('TP53') res = query.get_agents(ro, limit=10) assert isinstance(res, QueryResult) assert len(res.results) <= 10, len(res.results) js = res.json() assert 'results' in js assert len(js['results']) == len(res.results)
def load_readonly(from_dump): """Load the readonly database with readonly schema dump.""" start = Start.from_date(from_dump) dump_file = Readonly.from_list(start.manifest).get_s3_path() if not dump_file: print(f"ERROR: No readonly dump for {start.date_stamp}") return load_readonly_dump(get_db('primary', protected=True), get_ro('primary', protected=False), dump_file)
def test_from_papers(): ro = get_ro('primary') pmid = '27014235' q = FromPapers([('pmid', pmid)]) res = q.get_statements(ro, limit=5) assert res.statements() assert all( any(ev.text_refs.get('PMID') == pmid for ev in s.evidence) for s in res.statements())
def dump(self, continuing=False): if self.use_principal: ro = get_db(self.db_label) else: ro = get_ro(self.db_label) query_res = ro.session.query(ro.FastRawPaLink.pa_json.distinct()) json_list = [json.loads(js[0]) for js in query_res.all()] s3 = boto3.client('s3') s3.put_object(Body=json.dumps(json_list), **self.get_s3_path().kw())
def test_evidence_count_is_10(): ro = get_ro('primary') query = HasAgent('TP53') - HasOnlySource('medscan') res = query.get_statements(ro, limit=2, ev_limit=10) assert isinstance(res, StatementQueryResult) stmts = res.statements() assert len(stmts) == 2 assert all(len(s.evidence) <= 10 for s in stmts) assert res.returned_evidence == 20 assert sum(res.evidence_counts.values()) > 20
def dump(self, continuing=False): if self.use_principal: ro = get_db(self.db_label) else: ro = get_ro(self.db_label) q = ro.select_all([ro.MeshMeta.mk_hash, ro.MeshMeta.mesh_num]) s3 = boto3.client('s3') s3.put_object(Body=pickle.dumps(q.all()), **self.get_s3_path().kw())
def test_has_sources(): ro = get_ro('primary') q = HasSources(['reach', 'sparser']) res = q.get_statements(ro, limit=5, ev_limit=8) assert len(res.results) == 5 stmts = res.statements() res_json = res.json() assert 'results' in res_json assert len(stmts) == len(res.results) assert all(sc[r] > 0 for sc in res.source_counts.values() for r in ['reach', 'sparser'])
def test_evidence_count_is_none(): ro = get_ro('primary') query = HasAgent('TP53') - HasOnlySource('medscan') res = query.get_statements(ro, limit=2) assert isinstance(res, StatementQueryResult) stmts = res.statements() assert len(stmts) == 2 ev_list = stmts[0].evidence assert len(ev_list) > 10 assert all( len(s.evidence) == res.evidence_counts[s.get_hash()] for s in stmts) assert res.returned_evidence == sum(res.evidence_counts.values())
def test_has_only_source(): ro = get_ro('primary') q = HasOnlySource('signor') res = q.get_statements(ro, limit=5, ev_limit=8) res_json = res.json() assert 'results' in res_json assert set(res.results.keys()) == set(res.source_counts.keys()) stmts = res.statements() assert len(stmts) == len(res.results) assert all(src_cnt > 0 if src == 'signor' else src_cnt == 0 for sc in res.source_counts.values() for src, src_cnt in sc.items())
def test_from_mesh(): ro = get_ro('primary') q = FromMeshIds(['D001943']) res = q.get_statements(ro, limit=5, ev_limit=8) mm_entries = ro.select_all( [ro.MeshTermMeta.mk_hash, ro.MeshTermMeta.mesh_num], ro.MeshTermMeta.mk_hash.in_(set(res.results.keys()))) mm_dict = defaultdict(list) for h, mn in mm_entries: mm_dict[h].append(mn) assert all(1943 in mn_list for mn_list in mm_dict.values())
def test_has_num_agents(): ro = get_ro('primary') q = HasNumAgents((1, 2)) res = q.get_statements(ro, limit=5, ev_limit=8) stmts = res.statements() assert all(len(s.agent_list()) in (1, 2) for s in stmts) q = HasNumAgents((6, )) res = q.get_statements(ro, limit=5, ev_limit=8) stmts = res.statements() assert all( sum([ag is not None for ag in s.agent_list()]) == 6 for s in stmts)
def test_has_databases(): ro = get_ro('primary') q = HasDatabases() res = q.get_statements(ro, limit=5, ev_limit=8) for sc in res.source_counts.values(): for src, cnt in sc.items(): if src in SOURCE_GROUPS['databases'] and cnt > 0: break else: assert False, f"No databases found in: {sc}" assert set(res.results.keys()) == set(res.source_counts.keys()) stmts = res.statements() assert len(stmts) == len(res.results)
def _check_belief_sorted_result(query): ro = get_ro('primary') # Test `get_statements` res = query.get_statements(ro, sort_by='belief', limit=50, ev_limit=3) stmts = res.statements() assert len(res.belief_scores) == 50 assert len(stmts) == 50 assert all(len(s.evidence) <= 3 for s in stmts) assert all(0 <= s2.belief <= s1.belief <= 1 for s1, s2 in zip(stmts[:-1], stmts[1:])) # Test `get_hashes` res = query.get_hashes(ro, sort_by='belief', limit=500) assert len(res.belief_scores) <= 500, len(res.belief_scores) assert len(res.results) == len(res.belief_scores) beliefs = list(res.belief_scores.values()) assert all(b1 >= b2 for b1, b2 in zip(beliefs[:-1], beliefs[1:])) # Test `get_agents` res = query.get_agents(ro, limit=500, sort_by='belief', with_hashes=True) assert len(res.belief_scores) <= 500 assert len(res.belief_scores) == len(res.results) beliefs = list(res.belief_scores.values()) assert all(b1 >= b2 for b1, b2 in zip(beliefs[:-1], beliefs[1:])) # Test AgentJsonExpander First level. first_res = next(iter(res.results.values())) exp = AgentJsonExpander(first_res['agents'], hashes=first_res['hashes']) res = exp.expand(ro, sort_by='belief') assert len(res.belief_scores) == len(res.results) assert all(e['agents'] == first_res['agents'] and set(e['hashes']) <= set(first_res['hashes']) for e in res.results.values()) beliefs = list(res.belief_scores.values()) assert all(b1 >= b2 for b1, b2 in zip(beliefs[:-1], beliefs[1:])) # Test AgentJsonExpander Second level. first_res = next(iter(res.results.values())) exp2 = AgentJsonExpander(first_res['agents'], stmt_type=first_res['type'], hashes=first_res['hashes']) res = exp2.expand(ro, sort_by='belief') assert len(res.belief_scores) == len(res.results.values()) assert all( e['agents'] == first_res['agents'] and e['type'] == first_res['type'] and e['hash'] in first_res['hashes'] for e in res.results.values()) beliefs = list(res.belief_scores.values()) assert all(b1 >= b2 for b1, b2 in zip(beliefs[:-1], beliefs[1:]))
def test_real_world_examples(): ro = get_ro('primary') query = (HasAgent('MEK', namespace='FPLX', role='SUBJECT') & HasAgent('ERK', namespace='FPLX', role='OBJECT') & HasType(['Phosphorylation']) - HasOnlySource('medscan')) ev_filter = HasOnlySource('medscan').invert().ev_filter() res = query.get_statements(ro, limit=100, ev_limit=10, evidence_filter=ev_filter) assert len(res.results) query = HasAgent('RAS') & HasAgent('RAF') & HasNumAgents((3, )) res = query.get_statements(ro, limit=100, ev_limit=10) stmts = res.statements() assert len(stmts)
def test_evidence_filtering_has_database(): ro = get_ro('primary') q1 = HasAgent('TP53') q2 = HasDatabases() query = q1 & q2 res = query.get_statements(ro, limit=2, ev_limit=None, evidence_filter=q2.ev_filter()) assert isinstance(res, StatementQueryResult) stmts = res.statements() assert len(stmts) == 2 assert all(ev.source_api not in SOURCE_GROUPS['reading'] for s in stmts for ev in s.evidence) js = res.json() assert 'results' in js assert len(js['results']) == len(stmts)
def test_evidence_filtering_mesh(): ro = get_ro('primary') q1 = HasAgent('TP53') q2 = FromMeshIds(['D001943']) query = q1 & q2 res = query.get_statements(ro, limit=2, ev_limit=None, evidence_filter=q2.ev_filter()) assert isinstance(res, StatementQueryResult) stmts = res.statements() assert len(stmts) == 2 assert all( len(s.evidence) < res.evidence_counts[s.get_hash()] for s in stmts) js = res.json() assert 'results' in js assert len(js['results']) == len(stmts)
def test_has_type(): ro = get_ro('primary') q = HasType(['Phosphorylation', 'Activation']) res = q.get_statements(ro, limit=5, ev_limit=8) stmts = res.statements() assert all(s.__class__.__name__ in ('Phosphorylation', 'Activation') for s in stmts) type_list = ['SelfModification', 'RegulateAmount', 'Translocation'] q = HasType(type_list, include_subclasses=True) res = q.get_statements(ro, limit=5, ev_limit=8) stmts = res.statements() types = { t for bt in (get_statement_by_name(n) for n in type_list) for t in [bt] + get_all_descendants(bt) } assert all(type(s) in types for s in stmts)
def run_all(continuing, delete_existing, load_only, dump_only, no_redirect_to_principal): """Generate new dumps and list existing dumps.""" from indra_db import get_ro # Check if the readonly db handle is needed if not dump_only: ro_manager = get_ro('primary', protected=False) else: ro_manager = None dump(get_db('primary', protected=False), ro_manager, delete_existing, continuing, load_only, dump_only, no_redirect_to_principal=no_redirect_to_principal)
def test_evidence_filtering_has_only_source(): ro = get_ro('primary') q1 = HasAgent('TP53') q2 = ~HasOnlySource('medscan') query = q1 & q2 res = query.get_statements(ro, limit=2, ev_limit=None, evidence_filter=q2.ev_filter()) assert isinstance(res, StatementQueryResult) stmts = res.statements() assert len(stmts) == 2 assert not any( ev.text_refs.get('READER') == 'medscan' for s in stmts for ev in s.evidence) js = res.json() assert 'results' in js assert len(js['results']) == len(stmts)
def test_evidence_filtering_has_source(): ro = get_ro('primary') q1 = HasAgent('TP53') q2 = HasSources(['reach', 'sparser']) query = q1 & q2 res = query.get_statements(ro, limit=2, ev_limit=None, evidence_filter=q2.ev_filter()) assert isinstance(res, StatementQueryResult) stmts = res.statements() assert len(stmts) == 2 assert all( ev.text_refs.get('READER') in ['REACH', 'SPARSER'] for s in stmts for ev in s.evidence) js = res.json() assert 'results' in js assert len(js['results']) == len(stmts)
def _choose_db(cls, **kwargs): # If we don't need a database handle, just keep on walking. if not cls.db_required: return None # If a database is required, the type must be specified. assert len(cls.db_options), \ "If db is required, db_options are too." # If a handle was given, use it, regardless of other inputs, if it # is at all permissible. if 'ro' in kwargs and 'db' in kwargs: raise ValueError("Only one database handle may be defined at" "a time.") if 'ro' in kwargs: if 'readonly' in cls.db_options: return kwargs['ro'] else: raise ValueError("Cannot use readonly database, but ro " "handle was given.") elif 'db' in kwargs: if 'principal' in cls.db_options: return kwargs['db'] else: raise ValueError("Cannot use principal database, but db " "handle was given.") # Otherwise, read or guess the database type, and make a new instance. if len(cls.db_options) == 1: db_opt = cls.db_options[0] else: if 'use_principal' in kwargs: if kwargs['use_principal']: db_opt = 'principal' else: db_opt = 'readonly' else: raise ValueError("No database specified.") if db_opt == 'principal': return get_db('primary', protected=False) else: # if db_opt == 'readonly' return get_ro('primary', protected=False)
def test_has_agent(): ro = get_ro('primary') q = HasAgent('RAS') res = q.get_statements(ro, limit=5, ev_limit=8) stmts = res.statements() assert all('RAS' in [ag.name for ag in s.agent_list() if ag is not None] for s in stmts) q = HasAgent('MEK', namespace='FPLX', role='SUBJECT') res = q.get_statements(ro, limit=5, ev_limit=8) stmts = res.statements() assert all('MEK' == s.agent_list(deep_sorted=True)[0].db_refs['FPLX'] or ( isinstance(s, Complex) and 'MEK' in {a.db_refs.get('FPLX') for a in s.agent_list()}) for s in stmts) q = HasAgent('CHEBI:63637', namespace='CHEBI', agent_num=3) res = q.get_statements(ro, limit=5, ev_limit=8) stmts = res.statements() for s in stmts: ag = s.agent_list(deep_sorted=True)[3] assert ag.name == 'vemurafenib' assert ag.db_refs['CHEBI'] == 'CHEBI:63637'