def test_to_signed_graph(): ia = IndraNetAssembler([ab1, ab2, ab3, ab4, bc1, bc2, bc3, bc4]) df = ia.make_df() net = IndraNet.from_df(df) signed_graph = net.to_signed_graph(sign_dict=default_sign_dict, weight_mapping=_weight_mapping) assert len(signed_graph.nodes) == 3 assert len(signed_graph.edges) == 4 assert set([ stmt['stmt_type'] for stmt in signed_graph['a']['b'][0]['statements'] ]) == {'Activation', 'IncreaseAmount'} assert set([ stmt['stmt_type'] for stmt in signed_graph['a']['b'][1]['statements'] ]) == {'Inhibition'} assert set([ stmt['stmt_type'] for stmt in signed_graph['b']['c'][0]['statements'] ]) == {'Activation', 'IncreaseAmount'} assert set([ stmt['stmt_type'] for stmt in signed_graph['b']['c'][1]['statements'] ]) == {'Inhibition', 'DecreaseAmount'} assert all(signed_graph.edges[e].get('belief', False) for e in signed_graph.edges) assert all( isinstance(signed_graph.edges[e]['belief'], (float, np.longfloat)) for e in signed_graph.edges) assert all(signed_graph.edges[e].get('weight', False) for e in signed_graph.edges) assert all( isinstance(signed_graph.edges[e]['weight'], (float, np.longfloat)) for e in signed_graph.edges)
def test_make_df(): ia = IndraNetAssembler([st1, st2, st3, st4, st5, st6]) df = ia.make_df() assert isinstance(df, pd.DataFrame) assert len(df) == 9 assert set(df.columns) == { 'agA_name', 'agB_name', 'agA_ns', 'agA_id', 'agB_ns', 'agB_id', 'stmt_type', 'evidence_count', 'stmt_hash', 'belief', 'source_counts' }
def export_joint_tsv(all_stmts, fname): all_stmts_by_hash = {} for kinase, stmts in all_stmts.items(): for stmt in stmts: all_stmts_by_hash[stmt.get_hash()] = stmt all_stmts_flat = list(all_stmts_by_hash.values()) ia = IndraNetAssembler(all_stmts_flat) df = ia.make_df() df.to_csv(fname, index=False, sep='\t')
def test_make_df(): ia = IndraNetAssembler([st1, st2, st3, st4, st5, st6, st9]) df = ia.make_df() assert isinstance(df, pd.DataFrame) assert len(df) == 10 assert set(df.columns) == { 'agA_name', 'agB_name', 'agA_ns', 'agA_id', 'agB_ns', 'agB_id', 'stmt_type', 'evidence_count', 'stmt_hash', 'belief', 'source_counts', 'initial_sign', 'residue', 'position' } assert df.residue.isna().sum() == 9 # Check that all but one row is NaN assert df.position.isna().sum() == 9 # Check that all but one row is NaN # Extra column df2 = ia.make_df( extra_columns=[('stmt_type_upper', lambda stmt: type(stmt).__name__.upper())]) assert isinstance(df, pd.DataFrame) assert len(df) == 10 assert set(df2.columns) - set(df.columns) == {'stmt_type_upper'} assert df2.stmt_type_upper[0] == 'ACTIVATION'
def assemble_signed_graph(self, mode='local', bucket=EMMAA_BUCKET_NAME): """Assemble the model into signed graph and return the assembled graph. """ if not self.assembled_stmts: self.run_assembly() ia = IndraNetAssembler(self.assembled_stmts) signed_graph = ia.make_model(graph_type='signed') if mode == 's3' and 'indranet' in self.export_formats: fname = f'indranet_{self.date_str}.tsv' df = ia.make_df() df.to_csv(fname, sep='\t', index=False) logger.info(f'Uploading {fname}') client = get_s3_client(unsigned=False) client.upload_file(fname, bucket, f'exports/{self.name}/{fname}') return signed_graph
def test_from_df(): ia = IndraNetAssembler([st1, st2, st3, st4, st5, st6, st7]) df = ia.make_df() net = IndraNet.from_df(df) assert len(net.nodes) == 6 assert len(net.edges) == 9 # Stmt with 1 agent should not be added assert 'e' not in net.nodes # Complex with more than 3 agents should not be added assert ('f', 'g', 0) in net.edges assert ('h', 'i', 0) not in net.edges # Test node attributes assert net.nodes['a']['ns'] == 'HGNC', net.nodes['a']['ns'] assert net.nodes['a']['id'] == '1' # Test edge attributes e = net['a']['c'][0] assert e['stmt_type'] == 'Inhibition' assert e['belief'] == 0.76 assert e['evidence_count'] == 3 assert net['b']['d'][0]['evidence_count'] == 0
def test_to_digraph(): ia = IndraNetAssembler([ab1, ab2, ab3, ab4, bc1, bc2, bc3, bc4]) df = ia.make_df() net = IndraNet.from_df(df) assert len(net.nodes) == 3 assert len(net.edges) == 8 digraph = net.to_digraph(weight_mapping=_weight_mapping) assert len(digraph.nodes) == 3 assert len(digraph.edges) == 2 assert set([ stmt['stmt_type'] for stmt in digraph['a']['b']['statements'] ]) == {'Activation', 'Phosphorylation', 'Inhibition', 'IncreaseAmount'} assert all(digraph.edges[e].get('belief', False) for e in digraph.edges) assert all( isinstance(digraph.edges[e]['belief'], (float, np.longfloat)) for e in digraph.edges) assert all(digraph.edges[e].get('weight', False) for e in digraph.edges) assert all( isinstance(digraph.edges[e]['weight'], (float, np.longfloat)) for e in digraph.edges) digraph_from_df = IndraNet.digraph_from_df(df) assert nx.is_isomorphic(digraph, digraph_from_df)
def export_tsv(statements, fname): """Export statements into TSV.""" ia = IndraNetAssembler(statements) df = ia.make_df() df.to_csv(fname, index=False, sep='\t')