def test_assembly_cycle(): stmts = stmts_from_json_file( os.path.join(HERE, 'data', 'compositional_refinement_cycle_test.json')) # 874 is a refinement of -534 pipeline = AssemblyPipeline(comp_assembly_json) assembled_stmts = pipeline.run(stmts) assert assembled_stmts[0].supported_by == [assembled_stmts[1]]
def post(self): """Run an assembly pipeline for a list of Statements. Parameters ---------- statements : list[indra.statements.Statement.to_json()] A list of INDRA Statements to run the pipeline. pipeline : list[dict] A list of dictionaries representing steps in the pipeline. Each step should have a 'function' key and, if appropriate, 'args' and 'kwargs' keys. For more documentation and examples, see https://indra.readthedocs.io/en/latest/modules/pipeline.html Returns ------- statements : list[indra.statements.Statement.to_json()] The list of INDRA Statements resulting from running the pipeline on the list of input Statements. """ args = request.json stmts = stmts_from_json(args.get('statements')) pipeline_steps = args.get('pipeline') ap = AssemblyPipeline(pipeline_steps) stmts_out = ap.run(stmts) return _return_stmts(stmts_out)
def preprocess_statements( raw_statements: List[Statement], steps: List[Dict[str, Any]], ) -> List[Statement]: """Run a preprocessing pipeline on raw statements. Parameters ---------- raw_statements : A list of INDRA Statements to preprocess. steps : A list of AssemblyPipeline steps that define the steps of preprocessing. Returns ------- preprocessed_statements : A list of preprocessed INDRA Statements. """ logger.info('Running preprocessing on %d statements' % len(raw_statements)) ap = AssemblyPipeline(steps) preprocessed_statements = ap.run(raw_statements) logger.info('%d statements after preprocessing' % len(preprocessed_statements)) return preprocessed_statements
def run_assembly(self): """Run INDRA's assembly pipeline on the Statements.""" self.eliminate_copies() stmts = self.get_indra_stmts() stnames = {s.name for s in self.search_terms} ap = AssemblyPipeline(self.assembly_config) self.assembled_stmts = ap.run(stmts, stnames=stnames)
def run_assembly(self): """Run INDRA's assembly pipeline on the Statements.""" from indra_world.belief import get_eidos_scorer from indra_world.ontology import load_world_ontology self.eliminate_copies() stmts = self.get_indra_stmts() stnames = {s.name for s in self.search_terms} ap = AssemblyPipeline(self.assembly_config['main']) self.assembled_stmts = ap.run(stmts, stnames=stnames)
def get_statements(self): """Return a flat list of statements with their evidences.""" stmts = [] for sh, stmt in deepcopy(self.stmts_by_hash).items(): stmt.evidence = self.evs_by_stmt_hash.get(sh, []) stmt.belief = self.beliefs[sh] stmts.append(stmt) # TODO: add refinement edges as supports/supported_by? # Here we run some post-processing steps on the statements ap = AssemblyPipeline(steps=self.post_processing_steps) stmts = ap.run(stmts) return stmts
def assemble_dynamic_pysb(self, **kwargs): """Assemble a version of a PySB model for dynamic simulation.""" # First need to run regular assembly if not self.assembled_stmts: self.run_assembly() if 'dynamic' in self.assembly_config: logger.info('Assembling dynamic PySB model') ap = AssemblyPipeline(self.assembly_config['dynamic']) # Not overwrite assembled stmts stmts = deepcopy(self.assembled_stmts) new_stmts = ap.run(stmts) pa = PysbAssembler() pa.add_statements(new_stmts) pysb_model = pa.make_model() return pysb_model logger.info('Did not find dynamic assembly steps')
def test_running_pipeline(): # From json file ap = AssemblyPipeline.from_json_file(test_json) assert ap # AssemblyPipeline has methods for length and iteration assert len(ap) == 5 for step in ap: assert step assembled_stmts = ap.run(stmts) assert assembled_stmts assert len(assembled_stmts) == 2 # By manually adding steps ap2 = AssemblyPipeline() ap2.append(filter_no_hypothesis) ap2.append(map_grounding) ap2.append(filter_grounded_only) ap2.append(map_sequence) ap2.append(run_preassembly, return_toplevel=False) assembled_stmts2 = ap2.run(stmts) assert assembled_stmts2 assert len(assembled_stmts2) == 2
def assemble_dynamic_pysb(self, mode='local', bucket=EMMAA_BUCKET_NAME): """Assemble a version of a PySB model for dynamic simulation.""" # First need to run regular assembly if not self.assembled_stmts: self.run_assembly() if 'dynamic' in self.assembly_config: logger.info('Assembling dynamic PySB model') ap = AssemblyPipeline(self.assembly_config['dynamic']) # Not overwrite assembled stmts stmts = deepcopy(self.assembled_stmts) self.dynamic_assembled_stmts = ap.run(stmts) pa = PysbAssembler() pa.add_statements(self.dynamic_assembled_stmts) pysb_model = pa.make_model() if mode == 's3' and 'gromet' in self.export_formats: fname = f'gromet_{self.date_str}.json' pysb_to_gromet(pysb_model, self.name, self.dynamic_assembled_stmts, fname) logger.info(f'Uploading {fname}') client = get_s3_client(unsigned=False) client.upload_file(fname, bucket, f'exports/{self.name}/{fname}') return pysb_model logger.info('Did not find dynamic assembly steps')
def add_statements(self, stmts): """Add new statements for incremental assembly. Parameters ---------- stmts : list[indra.statements.Statement] A list of new prepared statements to be incrementally assembled into the set of existing statements. Returns ------- AssemblyDelta An AssemblyDelta object representing the changes to the assembly as a result of the new added statements. """ # We fist organize statements by hash stmts_by_hash = defaultdict(list) for stmt in stmts: self.annotate_evidences(stmt) stmts_by_hash[stmt.get_hash( matches_fun=self.matches_fun)].append(stmt) stmts_by_hash = dict(stmts_by_hash) # We next create the new statements and new evidences data structures new_stmts = {} new_evidences = defaultdict(list) for sh, stmts_for_hash in stmts_by_hash.items(): if sh not in self.stmts_by_hash: new_stmts[sh] = stmts_for_hash[0] self.stmts_by_hash[sh] = stmts_for_hash[0] self.evs_by_stmt_hash[sh] = [] for stmt in stmts_for_hash: for ev in stmt.evidence: new_evidences[sh].append(ev) self.evs_by_stmt_hash[sh].append(ev) new_evidences = dict(new_evidences) # Here we run some post-processing steps on the new statements ap = AssemblyPipeline(steps=self.post_processing_steps) # NOTE: the assumption here is that the processing steps modify the # statement objects directly, this could be modified to return # statements that are then set in the hash-keyed dict ap.run(list(new_stmts.values())) # Next we extend refinements and re-calculate beliefs logger.info('Extending refinement filters') for filter in self.refinement_filters: filter.extend(new_stmts) new_refinements = set() logger.info('Finding refinements for new statements') for sh, stmt in tqdm.tqdm(new_stmts.items()): refinements = None for filter in self.refinement_filters: # Note that this gets less specifics refinements = filter.get_related(stmt, refinements) # We order hashes by less specific first and more specific second new_refinements |= {(ref, sh) for ref in refinements} # This expects a list of less specific hashes for the statement extend_refinements_graph(self.refinements_graph, stmt, list(refinements), matches_fun=self.matches_fun) logger.info('Getting beliefs') beliefs = self.get_beliefs() logger.info('Returning assembly delta') return AssemblyDelta(new_stmts, new_evidences, new_refinements, beliefs, matches_fun=self.matches_fun)
def test_compositional_refinement_polarity_bug(): stmts = stmts_from_json_file( os.path.join(HERE, 'data', 'test_missing_refinement.json')) pipeline = AssemblyPipeline(comp_assembly_json) assembled_stmts = pipeline.run(stmts) assert assembled_stmts[0].supported_by == [assembled_stmts[1]]