def test_running_pipeline(): # From json file ap = AssemblyPipeline.from_json_file(test_json) assert ap # AssemblyPipeline has methods for length and iteration assert len(ap) == 5 for step in ap: assert step assembled_stmts = ap.run(stmts) assert assembled_stmts assert len(assembled_stmts) == 2 # By manually adding steps ap2 = AssemblyPipeline() ap2.append(filter_no_hypothesis) ap2.append(map_grounding) ap2.append(filter_grounded_only) ap2.append(map_sequence) ap2.append(run_preassembly, return_toplevel=False) assembled_stmts2 = ap2.run(stmts) assert assembled_stmts2 assert len(assembled_stmts2) == 2
corpus_id, grounding_mode='compositional', extract_filter=['influence']) ''' stmts = [] for reader in reader_versions['compositional']: logger.info('Loading %s' % reader) if os.path.exists('compositional_dec2020_%s_raw.pkl' % reader): with open('compositional_dec2020_%s_raw.pkl' % reader, 'rb') as fh: stmts += pickle.load(fh) ''' logger.info('Got a total of %s statements' % len(stmts)) assembly_config_file = os.path.join(HERE, os.pardir, 'indra_wm_service', 'resources', 'assembly_compositional_phase3.json') pipeline = AssemblyPipeline.from_json_file(assembly_config_file) assembled_stmts = pipeline.run(stmts) num_docs = 472 meta_data = { 'corpus_id': corpus_id, 'description': 'Compositional grounding assembly for the intial ' 'Phase 3 documents, Eidos only.', 'display_name': 'Compositional grounding assembly Phase 3 (Eidos)', 'readers': list(reader_versions.keys()), 'assembly': { 'level': 'grounding_location', 'grounding_threshold': 0.6, }, 'num_statements': len(assembled_stmts), 'num_documents': num_docs
ot = get_text(stmt.obj) if text_too_long(st, k) or text_too_long(ot, k): continue new_stmts.append(stmt) logger.info(f'{len(new_stmts)} statements after filter.') return new_stmts if __name__ == '__main__': # Load all raw statements eidos_stmts = load_eidos() hume_stmts = load_hume() sofia_stmts = load_sofia() cwms_stmts = load_cwms() hume_ap = AssemblyPipeline.from_json_file('hume_redundant.json') hume_stmts = hume_ap.run(hume_stmts) # Reground where needed reground_ap = AssemblyPipeline.from_json_file('reground_stmts.json') sofia_stmts = reground_ap.run(sofia_stmts) cwms_stmts = reground_ap.run(cwms_stmts) # Run shared assembly steps stmts = eidos_stmts + hume_stmts + sofia_stmts + cwms_stmts ap = AssemblyPipeline.from_json_file('assembly_steps.json') stmts = ap.run(stmts) funs = { 'grounding': None, 'location': location_matches,
import logging import datetime from indra_world.sources.dart import process_reader_output, DartClient from indra_world.assembly.incremental_assembler import \ IncrementalAssembler from indra_world.resources import get_resource_file from indra.pipeline import AssemblyPipeline from indra_world.assembly.operations import * from .db import DbManager logger = logging.getLogger(__name__) preparation_pipeline = AssemblyPipeline.from_json_file( get_resource_file('statement_preparation.json')) expected_readers = {'eidos', 'hume', 'sofia'} class ServiceController: def __init__(self, db_url, dart_client=None): self.db = DbManager(db_url) self.assemblers = {} self.assembly_triggers = {} if dart_client: self.dart_client = dart_client else: self.dart_client = DartClient(storage_mode='web') def new_project(self, project_id, name, corpus_id=None): res = self.db.add_project(project_id, name) if res is None:
for stmt in pp.statements: for ev in stmt.evidence: if 'provenance' not in ev.annotations: ev.annotations['provenance'] = [{ 'document': { '@id': doc_id } }] else: prov = ev.annotations['provenance'][0]['document'] prov['@id'] = doc_id stmts += pp.statements if grounding == 'compositional': validate_grounding_format(stmts) ap = AssemblyPipeline.from_json_file('assembly_%s.json' % grounding) assembled_stmts = ap.run(stmts) if do_upload: corpus_id = 'compositional_v4' stmts_to_json_file(assembled_stmts, '%s.json' % corpus_id) meta_data = { 'corpus_id': corpus_id, 'description': ('Assembly of 4 reader outputs with the ' 'compositional ontology (%s).' % ont_url), 'display_name': 'Compositional ontology assembly v3', 'readers': readers,