def preprocess_statements( raw_statements: List[Statement], steps: List[Dict[str, Any]], ) -> List[Statement]: """Run a preprocessing pipeline on raw statements. Parameters ---------- raw_statements : A list of INDRA Statements to preprocess. steps : A list of AssemblyPipeline steps that define the steps of preprocessing. Returns ------- preprocessed_statements : A list of preprocessed INDRA Statements. """ logger.info('Running preprocessing on %d statements' % len(raw_statements)) ap = AssemblyPipeline(steps) preprocessed_statements = ap.run(raw_statements) logger.info('%d statements after preprocessing' % len(preprocessed_statements)) return preprocessed_statements
def post(self): """Run an assembly pipeline for a list of Statements. Parameters ---------- statements : list[indra.statements.Statement.to_json()] A list of INDRA Statements to run the pipeline. pipeline : list[dict] A list of dictionaries representing steps in the pipeline. Each step should have a 'function' key and, if appropriate, 'args' and 'kwargs' keys. For more documentation and examples, see https://indra.readthedocs.io/en/latest/modules/pipeline.html Returns ------- statements : list[indra.statements.Statement.to_json()] The list of INDRA Statements resulting from running the pipeline on the list of input Statements. """ args = request.json stmts = stmts_from_json(args.get('statements')) pipeline_steps = args.get('pipeline') ap = AssemblyPipeline(pipeline_steps) stmts_out = ap.run(stmts) return _return_stmts(stmts_out)
def test_assembly_cycle(): stmts = stmts_from_json_file( os.path.join(HERE, 'data', 'compositional_refinement_cycle_test.json')) # 874 is a refinement of -534 pipeline = AssemblyPipeline(comp_assembly_json) assembled_stmts = pipeline.run(stmts) assert assembled_stmts[0].supported_by == [assembled_stmts[1]]
def run_assembly(self): """Run INDRA's assembly pipeline on the Statements.""" self.eliminate_copies() stmts = self.get_indra_stmts() stnames = {s.name for s in self.search_terms} ap = AssemblyPipeline(self.assembly_config) self.assembled_stmts = ap.run(stmts, stnames=stnames)
def run_assembly(self): """Run INDRA's assembly pipeline on the Statements.""" from indra_world.belief import get_eidos_scorer from indra_world.ontology import load_world_ontology self.eliminate_copies() stmts = self.get_indra_stmts() stnames = {s.name for s in self.search_terms} ap = AssemblyPipeline(self.assembly_config['main']) self.assembled_stmts = ap.run(stmts, stnames=stnames)
def get_statements(self): """Return a flat list of statements with their evidences.""" stmts = [] for sh, stmt in deepcopy(self.stmts_by_hash).items(): stmt.evidence = self.evs_by_stmt_hash.get(sh, []) stmt.belief = self.beliefs[sh] stmts.append(stmt) # TODO: add refinement edges as supports/supported_by? # Here we run some post-processing steps on the statements ap = AssemblyPipeline(steps=self.post_processing_steps) stmts = ap.run(stmts) return stmts
def assemble_dynamic_pysb(self, **kwargs): """Assemble a version of a PySB model for dynamic simulation.""" # First need to run regular assembly if not self.assembled_stmts: self.run_assembly() if 'dynamic' in self.assembly_config: logger.info('Assembling dynamic PySB model') ap = AssemblyPipeline(self.assembly_config['dynamic']) # Not overwrite assembled stmts stmts = deepcopy(self.assembled_stmts) new_stmts = ap.run(stmts) pa = PysbAssembler() pa.add_statements(new_stmts) pysb_model = pa.make_model() return pysb_model logger.info('Did not find dynamic assembly steps')
def assemble_dynamic_pysb(self, mode='local', bucket=EMMAA_BUCKET_NAME): """Assemble a version of a PySB model for dynamic simulation.""" # First need to run regular assembly if not self.assembled_stmts: self.run_assembly() if 'dynamic' in self.assembly_config: logger.info('Assembling dynamic PySB model') ap = AssemblyPipeline(self.assembly_config['dynamic']) # Not overwrite assembled stmts stmts = deepcopy(self.assembled_stmts) self.dynamic_assembled_stmts = ap.run(stmts) pa = PysbAssembler() pa.add_statements(self.dynamic_assembled_stmts) pysb_model = pa.make_model() if mode == 's3' and 'gromet' in self.export_formats: fname = f'gromet_{self.date_str}.json' pysb_to_gromet(pysb_model, self.name, self.dynamic_assembled_stmts, fname) logger.info(f'Uploading {fname}') client = get_s3_client(unsigned=False) client.upload_file(fname, bucket, f'exports/{self.name}/{fname}') return pysb_model logger.info('Did not find dynamic assembly steps')
corpus_id, grounding_mode='compositional', extract_filter=['influence']) ''' stmts = [] for reader in reader_versions['compositional']: logger.info('Loading %s' % reader) if os.path.exists('compositional_dec2020_%s_raw.pkl' % reader): with open('compositional_dec2020_%s_raw.pkl' % reader, 'rb') as fh: stmts += pickle.load(fh) ''' logger.info('Got a total of %s statements' % len(stmts)) assembly_config_file = os.path.join(HERE, os.pardir, 'indra_wm_service', 'resources', 'assembly_compositional_phase3.json') pipeline = AssemblyPipeline.from_json_file(assembly_config_file) assembled_stmts = pipeline.run(stmts) num_docs = 472 meta_data = { 'corpus_id': corpus_id, 'description': 'Compositional grounding assembly for the intial ' 'Phase 3 documents, Eidos only.', 'display_name': 'Compositional grounding assembly Phase 3 (Eidos)', 'readers': list(reader_versions.keys()), 'assembly': { 'level': 'grounding_location', 'grounding_threshold': 0.6, }, 'num_statements': len(assembled_stmts), 'num_documents': num_docs
def test_pipeline_methods(): ap = AssemblyPipeline() assert len(ap) == 0 ap.append(filter_grounded_only) assert len(ap) == 1 ap.insert(0, filter_no_hypothesis) assert len(ap) == 2 assert ap.steps[0] == {'function': 'filter_no_hypothesis'} # Append functions with arguments and runnable arguments ap.append(filter_by_type, Activation) assert len(ap) == 3 assert ap.steps[2] == { 'function': 'filter_by_type', 'args': [{ 'stmt_type': 'Activation' }] }, ap.steps[2] ap.append(run_preassembly, matches_fun=location_matches, refinement_fun=location_refinement, normalize_equivalences=True, normalize_opposites=True, normalize_ns='WM', belief_scoret=RunnableArgument(get_eidos_scorer), ontology=world_ontology) assert len(ap) == 4 assert isinstance(ap.steps[3], dict) assert isinstance(ap.steps[3]['kwargs'], dict) assert len(ap.steps[3]['kwargs']) == 7 # Run argument to get value assert isinstance(ap.get_argument_value({'function': 'get_eidos_scorer'}), BeliefScorer) # Get a function object as argument assert ap.get_argument_value({ 'function': 'location_matches', 'no_run': True }) == location_matches # Get statement type as argument assert ap.get_argument_value({'stmt_type': 'Activation'}) == Activation # Get simple argument values assert ap.get_argument_value('test') == 'test' assert ap.get_argument_value(4) == 4 assert ap.get_argument_value(True) assert not ap.get_argument_value(False) assert ap.get_argument_value([1, 2, 3]) == [1, 2, 3]
def test_running_pipeline(): # From json file ap = AssemblyPipeline.from_json_file(test_json) assert ap # AssemblyPipeline has methods for length and iteration assert len(ap) == 5 for step in ap: assert step assembled_stmts = ap.run(stmts) assert assembled_stmts assert len(assembled_stmts) == 2 # By manually adding steps ap2 = AssemblyPipeline() ap2.append(filter_no_hypothesis) ap2.append(map_grounding) ap2.append(filter_grounded_only) ap2.append(map_sequence) ap2.append(run_preassembly, return_toplevel=False) assembled_stmts2 = ap2.run(stmts) assert assembled_stmts2 assert len(assembled_stmts2) == 2
import logging import datetime from indra_world.sources.dart import process_reader_output, DartClient from indra_world.assembly.incremental_assembler import \ IncrementalAssembler from indra_world.resources import get_resource_file from indra.pipeline import AssemblyPipeline from indra_world.assembly.operations import * from .db import DbManager logger = logging.getLogger(__name__) preparation_pipeline = AssemblyPipeline.from_json_file( get_resource_file('statement_preparation.json')) expected_readers = {'eidos', 'hume', 'sofia'} class ServiceController: def __init__(self, db_url, dart_client=None): self.db = DbManager(db_url) self.assemblers = {} self.assembly_triggers = {} if dart_client: self.dart_client = dart_client else: self.dart_client = DartClient(storage_mode='web') def new_project(self, project_id, name, corpus_id=None): res = self.db.add_project(project_id, name) if res is None:
def add_statements(self, stmts): """Add new statements for incremental assembly. Parameters ---------- stmts : list[indra.statements.Statement] A list of new prepared statements to be incrementally assembled into the set of existing statements. Returns ------- AssemblyDelta An AssemblyDelta object representing the changes to the assembly as a result of the new added statements. """ # We fist organize statements by hash stmts_by_hash = defaultdict(list) for stmt in stmts: self.annotate_evidences(stmt) stmts_by_hash[stmt.get_hash( matches_fun=self.matches_fun)].append(stmt) stmts_by_hash = dict(stmts_by_hash) # We next create the new statements and new evidences data structures new_stmts = {} new_evidences = defaultdict(list) for sh, stmts_for_hash in stmts_by_hash.items(): if sh not in self.stmts_by_hash: new_stmts[sh] = stmts_for_hash[0] self.stmts_by_hash[sh] = stmts_for_hash[0] self.evs_by_stmt_hash[sh] = [] for stmt in stmts_for_hash: for ev in stmt.evidence: new_evidences[sh].append(ev) self.evs_by_stmt_hash[sh].append(ev) new_evidences = dict(new_evidences) # Here we run some post-processing steps on the new statements ap = AssemblyPipeline(steps=self.post_processing_steps) # NOTE: the assumption here is that the processing steps modify the # statement objects directly, this could be modified to return # statements that are then set in the hash-keyed dict ap.run(list(new_stmts.values())) # Next we extend refinements and re-calculate beliefs logger.info('Extending refinement filters') for filter in self.refinement_filters: filter.extend(new_stmts) new_refinements = set() logger.info('Finding refinements for new statements') for sh, stmt in tqdm.tqdm(new_stmts.items()): refinements = None for filter in self.refinement_filters: # Note that this gets less specifics refinements = filter.get_related(stmt, refinements) # We order hashes by less specific first and more specific second new_refinements |= {(ref, sh) for ref in refinements} # This expects a list of less specific hashes for the statement extend_refinements_graph(self.refinements_graph, stmt, list(refinements), matches_fun=self.matches_fun) logger.info('Getting beliefs') beliefs = self.get_beliefs() logger.info('Returning assembly delta') return AssemblyDelta(new_stmts, new_evidences, new_refinements, beliefs, matches_fun=self.matches_fun)
ot = get_text(stmt.obj) if text_too_long(st, k) or text_too_long(ot, k): continue new_stmts.append(stmt) logger.info(f'{len(new_stmts)} statements after filter.') return new_stmts if __name__ == '__main__': # Load all raw statements eidos_stmts = load_eidos() hume_stmts = load_hume() sofia_stmts = load_sofia() cwms_stmts = load_cwms() hume_ap = AssemblyPipeline.from_json_file('hume_redundant.json') hume_stmts = hume_ap.run(hume_stmts) # Reground where needed reground_ap = AssemblyPipeline.from_json_file('reground_stmts.json') sofia_stmts = reground_ap.run(sofia_stmts) cwms_stmts = reground_ap.run(cwms_stmts) # Run shared assembly steps stmts = eidos_stmts + hume_stmts + sofia_stmts + cwms_stmts ap = AssemblyPipeline.from_json_file('assembly_steps.json') stmts = ap.run(stmts) funs = { 'grounding': None, 'location': location_matches,
def test_compositional_refinement_polarity_bug(): stmts = stmts_from_json_file( os.path.join(HERE, 'data', 'test_missing_refinement.json')) pipeline = AssemblyPipeline(comp_assembly_json) assembled_stmts = pipeline.run(stmts) assert assembled_stmts[0].supported_by == [assembled_stmts[1]]
for stmt in pp.statements: for ev in stmt.evidence: if 'provenance' not in ev.annotations: ev.annotations['provenance'] = [{ 'document': { '@id': doc_id } }] else: prov = ev.annotations['provenance'][0]['document'] prov['@id'] = doc_id stmts += pp.statements if grounding == 'compositional': validate_grounding_format(stmts) ap = AssemblyPipeline.from_json_file('assembly_%s.json' % grounding) assembled_stmts = ap.run(stmts) if do_upload: corpus_id = 'compositional_v4' stmts_to_json_file(assembled_stmts, '%s.json' % corpus_id) meta_data = { 'corpus_id': corpus_id, 'description': ('Assembly of 4 reader outputs with the ' 'compositional ontology (%s).' % ont_url), 'display_name': 'Compositional ontology assembly v3', 'readers': readers,