def get_relevant_signor_statements(): """Get Inhibition and Activation statements from SIGNOR Returns ---------- signor_stmts_by_id : dict Dictionary mapping SIGNOR IDs to lists of associated statements """ print('Collecting SIGNOR statements from the web.') sp = signor.process_from_web() signor_stmts_by_id = defaultdict(list) for stmt in sp.statements: if isinstance(stmt, Inhibition) or isinstance(stmt, Activation): signor_stmts_by_id[stmt.evidence[0].source_id].append(stmt) signor_stmts_by_id = dict(signor_stmts_by_id) return signor_stmts_by_id
def _dump_test_data(filename, num_per_type=10): """Get corpus of statements for testing that has a range of stmt types.""" sp = signor.process_from_web() # Group statements by type stmts_by_type = defaultdict(list) for stmt in sp.statements: stmts_by_type[stmt.__class__].append(stmt) # Sample statements of each type (without replacement) stmt_sample = [] for stmt_type, stmt_list in stmts_by_type.items(): if len(stmt_list) <= num_per_type: stmt_sample.extend(stmt_list) else: stmt_sample.extend(random.sample(stmt_list, num_per_type)) # Make a random binary class vector for the stmt list y_arr = [random.choice((0, 1)) for s in stmt_sample] with open(test_stmt_path, 'wb') as f: pickle.dump((stmt_sample, y_arr), f) return stmt_sample
def _get_statements(self): from indra.sources.signor import process_from_web proc = process_from_web() return proc.statements
def get_signor_stmts(): """Return a list of activity and a list of amount regulation stmts.""" sp = signor.process_from_web() return ac.filter_by_type(sp.statements, RegulateActivity), \ ac.filter_by_type(sp.statements, RegulateAmount)
import sys import pickle from indra.sources import signor from .util import get_mod_sites if __name__ == '__main__': output_file = sys.argv[1] sp = signor.process_from_web() sites = get_mod_sites(sp.statements) with open(output_file, 'wb') as f: pickle.dump(sites, f)