def test_agent_text_storage(): A1 = Agent('A', db_refs={'TEXT': 'A'}) A2 = Agent('A', db_refs={'TEXT': 'alpha'}) B1 = Agent('B', db_refs={'TEXT': 'bag'}) B2 = Agent('B', db_refs={'TEXT': 'bug'}) C = Agent('C') D = Agent('D') inp = [ Complex([A1, B1], evidence=Evidence(text='A complex bag.')), Complex([B2, A2], evidence=Evidence(text='bug complex alpha once.')), Complex([B2, A2], evidence=Evidence(text='bug complex alpha again.')), Complex([A1, C, B2], evidence=Evidence(text='A complex C bug.')), Phosphorylation(A1, B1, evidence=Evidence(text='A phospo bags.')), Phosphorylation(A2, B2, evidence=Evidence(text='alpha phospho bugs.')), Conversion(D, [A1, B1], [C, D], evidence=Evidence(text='D: A bag -> C D')), Conversion(D, [B1, A2], [C, D], evidence=Evidence(text='D: bag a -> C D')), Conversion(D, [B2, A2], [D, C], evidence=Evidence(text='D: bug a -> D C')), Conversion(D, [B1, A1], [C, D], evidence=Evidence(text='D: bag A -> C D')), Conversion(D, [A1], [A1, C], evidence=Evidence(text='D: A -> A C')) ] pa = Preassembler(hierarchies, inp) unq1 = pa.combine_duplicates() assert len(unq1) == 5, len(unq1) assert all([len(ev.annotations['prior_uuids']) == 1 for s in unq1 for ev in s.evidence if len(s.evidence) > 1]),\ 'There can only be one prior evidence per uuid at this stage.' ev_uuid_dict = { ev.annotations['prior_uuids'][0]: ev.annotations['agents'] for s in unq1 for ev in s.evidence } for s in inp: raw_text = [ ag.db_refs.get('TEXT') for ag in s.agent_list(deep_sorted=True) ] assert raw_text == ev_uuid_dict[s.uuid]['raw_text'],\ str(raw_text) + '!=' + str(ev_uuid_dict[s.uuid]['raw_text']) # Now run pa on the above corpus plus another statement. inp2 = unq1 + [ Complex([A1, C, B1], evidence=Evidence(text='A complex C bag.')) ] pa2 = Preassembler(hierarchies, inp2) unq2 = pa2.combine_duplicates() assert len(unq2) == 5, len(unq2) old_ev_list = [] new_ev = None for s in unq2: for ev in s.evidence: if ev.text == inp2[-1].evidence[0].text: new_ev = ev else: old_ev_list.append(ev) assert all([len(ev.annotations['prior_uuids']) == 2 for ev in old_ev_list]) assert new_ev assert len(new_ev.annotations['prior_uuids']) == 1
def test_complexes_from_hierarchy(): exp = ef.Expander(hierarchies) complexes = exp.complexes_from_hierarchy() keys = [c.matches_key() for c in complexes] probe_stmt = Complex([Agent('AMPK_alpha', db_refs={'FPLX':'AMPK_alpha'}), Agent('AMPK_beta', db_refs={'FPLX':'AMPK_beta'}), Agent('AMPK_gamma', db_refs={'FPLX':'AMPK_gamma'})]) assert probe_stmt.matches_key() in keys
def test_complex_refinement(): ras = Agent('RAS') raf = Agent('RAF') mek = Agent('MEK') st1 = Complex([ras, raf]) st2 = Complex([mek, ras, raf]) pa = Preassembler(hierarchies, stmts=[st1, st2]) pa.combine_related() assert len(pa.unique_stmts) == 2 assert len(pa.related_stmts) == 2
def test_complex_refinement_order(): st1 = Complex([Agent('MED23'), Agent('ELK1')]) st2 = Complex([ Agent('ELK1', mods=[ModCondition('phosphorylation')]), Agent('MED23') ]) pa = Preassembler(hierarchies, stmts=[st1, st2]) pa.combine_duplicates() pa.combine_related() assert len(pa.related_stmts) == 1
def test_homodimer_refinement(): egfr = Agent('EGFR') erbb = Agent('ERBB2') st1 = Complex([erbb, erbb]) st2 = Complex([erbb, egfr]) pa = Preassembler(hierarchies, stmts=[st1, st2]) pa.combine_duplicates() assert len(pa.unique_stmts) == 2 pa.combine_related() assert len(pa.related_stmts) == 2
def test_grounding_aggregation_complex(): mek = Agent('MEK') braf1 = Agent('BRAF', db_refs={'TEXT': 'braf', 'HGNC': '1097'}) braf2 = Agent('BRAF', db_refs={'TEXT': 'BRAF', 'dummy': 'dummy'}) braf3 = Agent('BRAF', db_refs={'TEXT': 'Braf', 'UP': 'P15056'}) st1 = Complex([mek, braf1]) st2 = Complex([braf2, mek]) st3 = Complex([mek, braf3]) pa = Preassembler(hierarchies, stmts=[st1, st2, st3]) unique_stmts = pa.combine_duplicates() assert len(unique_stmts) == 3
def test_complex_agent_refinement(): ras = Agent('RAS') raf1 = Agent('RAF', mods=[ModCondition('ubiquitination', None, None, True)]) raf2 = Agent('RAF', mods=[ModCondition('ubiquitination', None, None, False)]) st1 = Complex([ras, raf1]) st2 = Complex([ras, raf2]) pa = Preassembler(hierarchies, stmts=[st1, st2]) pa.combine_related() assert len(pa.unique_stmts) == 2 assert len(pa.related_stmts) == 2
def test_preassemble_related_complex(): ras = Agent('RAS', db_refs={'FPLX': 'RAS'}) kras = Agent('KRAS', db_refs={'HGNC': '6407'}) hras = Agent('HRAS', db_refs={'HGNC': '5173'}) st1 = Complex([kras, hras]) st2 = Complex([kras, ras]) st3 = Complex([hras, kras]) st4 = Complex([ras, kras]) pa = Preassembler(hierarchies, [st1, st2, st3, st4]) uniq = pa.combine_duplicates() assert len(uniq) == 2 top = pa.combine_related() assert len(top) == 1
def process_binding_statements(self): """Looks for Binding events in the graph and extracts them into INDRA statements. In particular, looks for a Binding event node with outgoing edges with relations Theme and Theme2 - the entities these edges point to are the two constituents of the Complex INDRA statement. """ G = self.G statements = [] binding_nodes = self.find_event_with_outgoing_edges( 'Binding', ['Theme', 'Theme2']) for node in binding_nodes: theme1 = self.get_entity_text_for_relation(node, 'Theme') theme1_node = self.get_related_node(node, 'Theme') theme2 = self.get_entity_text_for_relation(node, 'Theme2') assert (theme1 is not None) assert (theme2 is not None) evidence = self.node_to_evidence(theme1_node, is_direct=True) statements.append( Complex([s2a(theme1), s2a(theme2)], evidence=evidence)) return statements
def geneways_action_to_indra_statement_type(actiontype, plo): """Return INDRA Statement corresponding to Geneways action type. Parameters ---------- actiontype : str The verb extracted by the Geneways processor plo : str A one character string designating whether Geneways classifies this verb as a physical, logical, or other interaction Returns ------- statement_generator : If there is no mapping to INDRA statements from this action type the return value is None. If there is such a mapping, statement_generator is an anonymous function that takes in the subject agent, object agent, and evidence, in that order, and returns an INDRA statement object. """ actiontype = actiontype.lower() statement_generator = None is_direct = (plo == 'P') if actiontype == 'bind': statement_generator = lambda substance1, substance2, evidence: \ Complex([substance1, substance2], evidence=evidence) is_direct = True elif actiontype == 'phosphorylate': statement_generator = lambda substance1, substance2, evidence: \ Phosphorylation(substance1, substance2, evidence=evidence) is_direct = True return (statement_generator, is_direct)
def test_map_agent(): erk = Agent('ERK1', db_refs={'TEXT': 'ERK1'}) p_erk = Agent('P-ERK', db_refs={'TEXT': 'p-ERK'}) stmt = Complex([erk, p_erk]) mapped_stmts = gm.map_stmts([stmt]) mapped_ag = mapped_stmts[0].members[1] assert mapped_ag.name == 'ERK' assert mapped_ag.db_refs.get('FPLX') == 'ERK'
def _get_db_no_pa_stmts(): db = get_temp_db(clear=True) db_builder = DbBuilder(db) db_builder.add_text_refs([('12345', 'PMC54321'), ('24680', 'PMC08642'), ('97531', )]) db_builder.add_text_content([['pubmed-ttl', 'pubmed-abs', 'pmc_oa'], ['pubmed-abs', 'manuscripts'], ['pubmed-ttl', 'pubmed-abs']]) db_builder.add_readings([['REACH', 'TRIPS'], ['REACH', 'SPARSER'], ['REACH', 'ISI'], ['SPARSER'], ['REACH', 'SPARSER'], ['SPARSER', 'TRIPS', 'REACH'], ['REACH', 'EIDOS']]) db_builder.add_raw_reading_statements([ [Phosphorylation(mek, erk)], # reach pubmed title [Phosphorylation(mek, erk, 'T', '124')], # trips pubmed title [ Phosphorylation(mek, erk), Inhibition(erk, ras), (Phosphorylation(mek, erk), 'in the body') ], # reach pubmed-abs [ Complex([mek, erk]), Complex([erk, ras]), (Phosphorylation(None, erk), 'In the body') ], # sparser pubmed-abs [], # reach pmc_oa [], # ISI pmc_oa [Phosphorylation(map2k1, mapk1)], # sparser pubmed-abs [], # reach manuscripts [], # sparser manuscripts [Inhibition(simvastatin_ng, raf), Activation(map2k1_mg, erk)], # sparser pubmed title [], # TRIPS pubmed title [], # reach pubmed title [], # reach pubmed abs [], # eidos pubmed abs ]) db_builder.add_databases(['biopax', 'tas', 'bel']) db_builder.add_raw_database_statements([[ Activation(mek, raf), Inhibition(erk, ras), Phosphorylation(mek, erk) ], [Inhibition(simvastatin, raf)], [Phosphorylation(mek, erk, 'T', '124')]]) return db
def test_map_agent(): erk = Agent('ERK1', db_refs={'TEXT': 'ERK1'}) p_erk = Agent('P-ERK', db_refs={'TEXT': 'p-ERK'}) stmt = Complex([erk, p_erk]) gm = GroundingMapper(default_grounding_map, default_agent_map) mapped_stmts = gm.map_agents([stmt]) mapped_ag = mapped_stmts[0].members[1] assert mapped_ag.name == 'ERK' assert mapped_ag.db_refs.get('BE') == 'ERK'
def process_row(row, up_web_fallback=False): """Process one row of the DataFrame into an INDRA Statement.""" host_agent = get_agent_from_grounding(row['host_grounding'], up_web_fallback=up_web_fallback) vir_agent = get_agent_from_grounding(row['vir_grounding'], up_web_fallback=up_web_fallback) # There's a column that is always a - character assert row['dash'] == '-', row['dash'] exp_method_id, exp_method_name = parse_psi_mi(row['exp_method']) int_type__id, int_type_name = parse_psi_mi(row['int_type']) assert row['host_tax'].startswith('taxid:'), row['host_tax'] _, host_tax = row['host_tax'].split(':') assert row['vir_tax'].startswith('taxid:'), row['vir_tax'] _, vir_tax = row['vir_tax'].split(':') assert row['score'].startswith('virhostnet-miscore:'), row['score'] _, score = row['score'].split(':') score = float(score) source_ids = parse_source_ids(row['source_id']) annotations = { 'exp_method': { 'id': exp_method_id, 'name': exp_method_name }, 'int_type': { 'id': int_type__id, 'name': int_type_name }, 'host_tax': host_tax, 'vir_tax': vir_tax, 'score': score, **source_ids, } text_refs = parse_text_refs(row['publication']) ev = Evidence(source_api='virhostnet', annotations=annotations, text_refs=text_refs, pmid=text_refs.get('PMID'), source_id=source_ids.get('virhostnet-rid')) stmt = Complex([host_agent, vir_agent], evidence=[ev]) return stmt
def get_statements(gene_list): res_dict = _send_request(gene_list, include_interactors=True) statements = [] for int_id, interaction in res_dict.items(): agent_a_name = interaction['OFFICIAL_SYMBOL_A'] agent_b_name = interaction['OFFICIAL_SYMBOL_B'] agent_a = Agent(agent_a_name, db_refs={'HGNC': agent_a_name}) agent_b = Agent(agent_b_name, db_refs={'HGNC': agent_b_name}) ev = Evidence(source_api='biogrid', source_id=int_id, pmid=interaction['PUBMED_ID'], text=None, annotations=interaction) stmt = Complex([agent_a, agent_b], evidence=ev) statements.append(stmt) return statements
def __init__(self, biogrid_file=None, physical_only=True): self.statements = [] self.physical_only = physical_only # If a path to the file is included, process it, skipping the header if biogrid_file: rows = read_unicode_csv(biogrid_file, '\t', skiprows=1) # If no file is provided, download from web else: logger.info('No data file specified, downloading from BioGrid ' 'at %s' % biogrid_file_url) rows = _download_biogrid_data(biogrid_file_url) # Process the rows into Statements for row in tqdm.tqdm(rows, desc='Processing BioGRID rows'): # There are some extra columns that we don't need to take and # thereby save space in annotations filt_row = [None if item == '-' else item for item in row][:len(columns)] bg_row = _BiogridRow(*filt_row) # Filter out non-physical interactions if desired if self.physical_only and bg_row.exp_system_type != 'physical': continue # Ground agents agent_a = self._make_agent(bg_row.symbol_a, bg_row.entrez_a, bg_row.swissprot_a, bg_row.trembl_a) agent_b = self._make_agent(bg_row.symbol_b, bg_row.entrez_b, bg_row.swissprot_b, bg_row.trembl_b) # Skip any agents with neither HGNC grounding or string name if agent_a is None or agent_b is None: continue # Get evidence pmid_match = re.match(r'PUBMED:(\d+)', bg_row.publication) doi_match = re.match(r'DOI:(.*)', bg_row.publication) text_refs = {} if pmid_match: text_refs['PMID'] = pmid_match.groups()[0] elif doi_match: text_refs['DOI'] = doi_match.groups()[0] ev = Evidence(source_api='biogrid', source_id=bg_row.biogrid_int_id, pmid=text_refs.get('PMID'), text_refs=text_refs, annotations=dict(bg_row._asdict())) # Make statement s = Complex([agent_a, agent_b], evidence=ev) self.statements.append(s)
def complexes_from_hierarchy(self): # Iterate over the partof_closure to determine all of the complexes # and all of their members all_complexes = {} for subunit, complex in self.entities.partof_closure: complex_subunits = all_complexes.get(complex, []) complex_subunits.append(subunit) all_complexes[complex] = complex_subunits # Now iterate over all of the complexes and create Complex statements complex_stmts = [] for complex, subunits in all_complexes.items(): # Create an Evidence object for the statement with the URI of the # complex as the source_id ev = Evidence(source_api='famplex', source_id=complex) subunit_agents = [_agent_from_uri(su) for su in subunits] complex_stmt = Complex(subunit_agents, evidence=[ev]) complex_stmts.append(complex_stmt) return complex_stmts
def get_statements(gene_list): res_dict = _send_request(gene_list, include_interactors=True) statements = [] if res_dict is None: return statements def get_db_refs(egid): hgnc_id = hgnc_client.get_hgnc_from_entrez(egid) if not hgnc_id: logger.info("No HGNC ID for Entrez ID: %s" % egid) return (None, {}) hgnc_name = hgnc_client.get_hgnc_name(hgnc_id) if not hgnc_name: logger.info("No HGNC name for HGNC ID: %s" % hgnc_id) return (None, {}) up_id = hgnc_client.get_uniprot_id(hgnc_id) if not up_id: logger.info("No Uniprot ID for EGID / HGNC ID / Symbol " "%s / %s / %s" % (egid, hgnc_id, hgnc_name)) return (None, {}) return (hgnc_name, {'HGNC': hgnc_id, 'UP': up_id}) for int_id, interaction in res_dict.items(): agent_a_egid = interaction['ENTREZ_GENE_A'] agent_b_egid = interaction['ENTREZ_GENE_B'] agent_a_name, agent_a_db_refs = get_db_refs(agent_a_egid) agent_b_name, agent_b_db_refs = get_db_refs(agent_b_egid) if agent_a_name is None or agent_b_name is None: continue if interaction['EXPERIMENTAL_SYSTEM_TYPE'] != 'physical': logger.info("Skipping non-physical interaction: %s" % str(interaction)) continue agent_a = Agent(agent_a_name, db_refs=agent_a_db_refs) agent_b = Agent(agent_b_name, db_refs=agent_b_db_refs) ev = Evidence(source_api='biogrid', source_id=int_id, pmid=interaction['PUBMED_ID'], text=None, annotations=interaction) stmt = Complex([agent_a, agent_b], evidence=ev) statements.append(stmt) return statements
def complexes_from_hierarchy(self): # Iterate over the partof_closure to determine all of the complexes # and all of their members fplx_nodes = [('FPLX', entry) for entry in get_famplex_entities()] all_complexes = {} for fplx_node in fplx_nodes: parts = self.ontology.ancestors_rel(*fplx_node, {'partof'}) if not parts: continue complex_subunits = all_complexes.get(fplx_node, []) for part in parts: complex_subunits.append(part) all_complexes[fplx_node] = complex_subunits # Now iterate over all of the complexes and create Complex statements complex_stmts = [] for complex, subunits in all_complexes.items(): # Create an Evidence object for the statement with the URI of the # complex as the source_id ev = Evidence(source_api='famplex', source_id=complex) subunit_agents = [_agent_from_ns_id(*su) for su in subunits] complex_stmt = Complex(subunit_agents, evidence=[ev]) complex_stmts.append(complex_stmt) return complex_stmts
def _construct_database(): db = get_temp_db(clear=True) db_builder = DbBuilder(db) db_builder.add_text_refs([('12345', 'PMC54321'), ('24680', 'PMC08642')]) db_builder.add_text_content([['pubmed-abs', 'pmc_oa'], ['pubmed-abs']]) db_builder.add_readings([['REACH'], ['REACH'], ['REACH', 'SPARSER']]) mek = Agent('MEK', db_refs={'FPLX': 'MEK'}) erk = Agent('ERK', db_refs={'FPLX': 'ERK'}) raf = Agent('RAF', db_refs={'FPLX': 'RAF'}) db_builder.add_raw_reading_statements( [[Phosphorylation(mek, erk), Complex([mek, erk])], [Phosphorylation(mek, erk)], [Activation(mek, erk)], [Complex([mek, erk]), Complex([raf, erk])]]) db_builder.add_databases(['signor']) db_builder.add_raw_database_statements([[Complex([raf, erk])]]) db_builder.add_pa_statements([(Phosphorylation(mek, erk), [0, 2]), (Complex([mek, erk]), [1, 4]), (Activation(mek, erk), [3]), (Complex([raf, erk]), [5, 6])]) return db
def _get_db_with_pa_stmts(): db = get_temp_db(clear=True) db_builder = DbBuilder(db) db_builder.add_text_refs([('12345', 'PMC54321'), ('24680', 'PMC08642'), ('97531', ), ('87687', )]) db_builder.add_text_content([['pubmed-ttl', 'pubmed-abs', 'pmc_oa'], ['pubmed-ttl', 'pubmed-abs', 'manuscripts'], ['pubmed-ttl', 'pubmed-abs', 'pmc_oa'], ['pubmed-ttl', 'pubmed-abs']]) db_builder.add_readings([ # Ref 1 ['REACH', 'TRIPS'], # pubmed ttl ['REACH', 'SPARSER'], # pubmed abs ['REACH', 'ISI'], # pmc_oa # Ref 2 ['REACH', 'TRIPS'], # pubmed ttl (new) ['SPARSER'], # pubmed abs ['REACH', 'SPARSER'], # manuscripts # Ref 3 ['SPARSER', 'TRIPS', 'REACH'], # pubmed ttl ['REACH', 'EIDOS', 'SPARSER'], # pubmed abs ['SPARSER'], # pmc oa (new) # Ref 4 ['TRIPS', 'REACH', 'SPARSER'], # pubmed ttl (new) ['REACH', 'SPARSER'], # pubmed abs (new) ]) db_builder.add_raw_reading_statements([ # Ref 1 # pubmed ttl [Phosphorylation(mek, erk)], # reach [Phosphorylation(mek, erk, 'T', '124')], # trips # pubmed abs [ Phosphorylation(mek, erk), Inhibition(erk, ras), (Phosphorylation(mek, erk), 'in the body') ], # reach [ Complex([mek, erk]), Complex([erk, ras]), (Phosphorylation(None, erk), 'In the body') ], # sparser # pmc OA [], # reach [], # ISI # Ref 2 # pubmed ttl [Phosphorylation(map2k1, mapk1)], # reach (new) [Phosphorylation(map2k1, mapk1, 'T', '124')], # trips (new) # pubmed abs [Phosphorylation(map2k1, mapk1)], # sparser # manuscript [], # reach [], # sparser # Ref 3 # pubmed ttl [], # sparser [Inhibition(simvastatin, raf)], # TRIPS [], # reach # pubmed abs [], # reach [], # eidos [Activation(map2k1_mg, erk), Inhibition(simvastatin_ng, raf)], # sparser (new) # pmc oa [ Inhibition(simvastatin_ng, raf), Inhibition(erk, ras), Activation(ras, raf) ], # sparser (new) # Ref 4 # pubmed ttl [], # trips (new) [], # reach (new) [], # sparser (new) # pubmed abstract [ Activation(kras, braf), Complex([map2k1, mapk1]), Complex([kras, braf]) ], # reach (new) [ Complex([kras, braf]), Complex([mek, erk]), IncreaseAmount(kras, braf) ], # sparser (new) ]) db_builder.add_databases(['biopax', 'tas', 'bel']) db_builder.add_raw_database_statements([[ Activation(mek, raf), Inhibition(erk, ras), Phosphorylation(mek, erk), Activation(ras, raf) ], [Inhibition(simvastatin, raf)], [Phosphorylation(mek, erk, 'T', '124')]]) db_builder.add_pa_statements([(Phosphorylation(mek, erk), [0, 2, 4, 25], [1, 8]), (Phosphorylation(mek, erk, 'T', '124'), [1, 28]), (Phosphorylation(None, erk), [7], [0, 1, 8]), (Activation(mek, raf), [23]), (Inhibition(simvastatin, raf), [11, 27]), (Complex([mek, erk]), [5]), (Complex([erk, ras]), [6]), (Inhibition(erk, ras), [3, 24]), (Phosphorylation(map2k1, mapk1), [10])]) # Add the preassembly update. pu = db.PreassemblyUpdates(corpus_init=True) db.session.add(pu) db.session.commit() return db
def test_dump_build(): """Test the dump pipeline. Method ------ CREATE CONTEXT: - Create a local principal database with a small amount of content. Aim for representation of stmt motifs and sources. - Create a local readonly database. - Create a fake bucket (moto) RUN THE DUMP CHECK THE RESULTS """ assert config.is_db_testing() # Create the dump locale. s3 = boto3.client('s3') dump_head = config.get_s3_dump() s3.create_bucket(Bucket=dump_head.bucket) assert dump_head.bucket == S3_DATA_LOC['bucket'] # Create the principal database. db = get_temp_db(clear=True) db.copy('text_ref', [ # trid ('1', 1, 'PMC1', 1), # 1 ('2', 2, 'PMC2', 2), # 2 ('3', 3, None, None), # 3 (None, None, 'PMC4', 4) # 4 ], ('pmid', 'pmid_num', 'pmcid', 'pmcid_num')) db.copy('mesh_ref_annotations', [ (1, 11, False), (1, 13, False), (1, 12, True), (2, 12, True), (3, 13, False), (3, 33, True) ], ('pmid_num', 'mesh_num', 'is_concept')) db.copy('text_content', [ # tcid (1, 'pubmed', 'txt', 'abstract'), # 1 (1, 'pmc', 'xml', 'fulltext'), # 2 (2, 'pubmed', 'txt', 'title'), # 3 (3, 'pubmed', 'txt', 'abstract'), # 4 (3, 'pmc', 'xml', 'fulltext'), # 5 (4, 'pmc', 'xml', 'fulltext') # 6 ], ('text_ref_id', 'source', 'format', 'text_type')) db.copy('reading', [(tcid, rdr, 1, reader_versions[rdr][-1], 'emtpy') for tcid, rdr in [ # 1 2 3 (1, 'reach'), (1, 'eidos'), (1, 'isi'), # 4 (2, 'reach'), # 5 6 7 (3, 'reach'), (3, 'eidos'), (3, 'trips'), # 8 (4, 'reach'), # 9 (5, 'reach'), # 10 (6, 'reach') ]], ('text_content_id', 'reader', 'batch_id', 'reader_version', 'format')) db.copy('db_info', [ ('signor', 'signor', 'Signor'), # 1 ('pc', 'biopax', 'Pathway Commons'), # 2 ('medscan', 'medscan', 'MedScan') # 3 ], ('db_name', 'source_api', 'db_full_name')) raw_stmts = { 'reading': { 2: [ Inhibition( Agent('Fever', db_refs={'TEXT': 'fever', 'MESH': 'D005334'}), Agent('Cough', db_refs={'TEXT': 'cough', 'MESH': 'D003371'}), evidence=Evidence(text="We found fever inhibits cough.") ) ], 4: [ Phosphorylation( Agent('MEK', db_refs={'FPLX': 'MEK', 'TEXT': 'mek'}), Agent('ERK', db_refs={'FPLX': 'MEK', 'TEXT': 'erk'}), evidence=Evidence(text="mek phosphorylates erk, so say I.") ), Activation( Agent('MAP2K1', db_refs={'HGNC': '6840', 'TEXT': 'MEK1'}), Agent('MAPK1', db_refs={'HGNC': '6871', 'TEXT': 'ERK1'}), evidence=Evidence(text="MEK1 activates ERK1, or os I'm told.") ), Activation( Agent('ERK', db_refs={'FPLX': 'ERK', 'TEXT': 'ERK'}), Agent('JNK', db_refs={'FPLX': 'JNK', 'TEXT': 'JNK'}), evidence=Evidence(text="ERK activates JNK, maybe.") ), Complex([ Agent('MEK', db_refs={'FPLX': 'MEK', 'TEXT': 'MAP2K'}), Agent('ERK', db_refs={'FPLX': 'ERK', 'TEXT': 'MAPK'}), Agent('RAF', db_refs={'FPLX': 'RAF', 'TEXT': 'RAF'}) ], evidence=Evidence(text="MAP2K, MAPK, and RAF form a complex.")) ], 7: [ Activation( Agent('ERK', db_refs={'FPLX': 'ERK', 'TEXT': 'ERK'}), Agent('JNK', db_refs={'FPLX': 'JNK', 'TEXT': 'JNK'}), evidence=Evidence(text='ERK activates JNK, maybe.') ) ], 8: [ Complex([ Agent('MEK', db_refs={'FPLX': 'MEK', 'TEXT': 'mek'}), Agent('ERK', db_refs={'FPLX': 'ERK', 'TEXT': 'erk'}) ], evidence=Evidence(text="...in the mek-erk complex.")) ], }, 'databases': { 2: [ Conversion( Agent('FRK', db_refs={'HGNC': '3955'}), [Agent('ATP', db_refs={'MESH': 'D000255'})], [Agent('hydron', db_refs={'CHEBI': 'CHEBI:15378'})] ) ], 3: [ Phosphorylation( Agent('MEK', db_refs={'FPLX': 'MEK', 'TEXT': 'MEK'}), Agent('ERK', db_refs={'FPLX': 'ERK', 'TEXT': 'ERK'}), evidence=Evidence(text="...MEK phosphorylates ERK medscan.") ) ] } } simple_insert_stmts(db, raw_stmts) # Run preassembly. prass.create_corpus(db) # Do the dump proceedure. ro = get_temp_ro(clear=True) dump(db, ro) # Check that the s3 dump exists. all_dumps = dm.list_dumps() assert len(all_dumps) == 1 # Check to make sure all the dump files are present. dump_path = all_dumps[0] file_list = dump_path.list_objects(s3) assert dm.Start.from_list(file_list) assert dm.Readonly.from_list(file_list) assert dm.Belief.from_list(file_list) assert dm.Sif.from_list(file_list) assert dm.StatementHashMeshId.from_list(file_list) assert dm.FullPaStmts.from_list(file_list) assert dm.End.from_list(file_list) # Check what tables are active in the readonly database. active_tables = ro.get_active_tables() for tbl in ro.get_tables(): if ro.tables[tbl]._temp: # If it was temp, it should be gone. assert tbl not in active_tables else: # Otherwise, it should be there. assert tbl in active_tables # Check that the principal db has no more ro schema. assert 'readonly' not in db.get_schemas() # Check contents of the readonly database. assert len(ro.select_all(ro.FastRawPaLink)) \ == len(db.select_all(db.RawUniqueLinks)) # Check that a query basically works. from indra_db.client.readonly import HasAgent res = HasAgent('MEK').get_statements(ro) assert len(res.statements()) == 2, len(res.statements()) # Check that belief is represented in the table. bdict = {h: b for h, b in ro.select_all([ro.SourceMeta.mk_hash, ro.SourceMeta.belief])} assert all(1 >= b > 0 for b in bdict.values()) # Check to make sure lambda was diverted correctly. call_records = config.get_test_call_records() assert len(call_records) == 2 assert all(rec.func_name == '_set_lambda_env' for rec in call_records) assert all(isinstance(rec.args[1], dict) for rec in call_records) assert 'INDRAROOVERRIDE' in call_records[0].args[1] assert call_records[0].args[1]['INDRAROOVERRIDE'] == str(db.url) assert not call_records[1].args[1]
def _get_op_complex(self, source, target, evidence_list): ag_list = self._complex_agents_from_op_complex(source) + \ self._complex_agents_from_op_complex(target) return Complex(members=ag_list, evidence=evidence_list)