Ejemplo n.º 1
0
def preprocess_statements(
    raw_statements: List[Statement],
    steps: List[Dict[str, Any]],
) -> List[Statement]:
    """Run a preprocessing pipeline on raw statements.

    Parameters
    ----------
    raw_statements :
        A list of INDRA Statements to preprocess.
    steps :
        A list of AssemblyPipeline steps that define the steps of
        preprocessing.

    Returns
    -------
    preprocessed_statements :
        A list of preprocessed INDRA Statements.
    """
    logger.info('Running preprocessing on %d statements' % len(raw_statements))
    ap = AssemblyPipeline(steps)
    preprocessed_statements = ap.run(raw_statements)
    logger.info('%d statements after preprocessing' %
                len(preprocessed_statements))
    return preprocessed_statements
Ejemplo n.º 2
0
Archivo: api.py Proyecto: steppi/indra
    def post(self):
        """Run an assembly pipeline for a list of Statements.

        Parameters
        ----------
        statements : list[indra.statements.Statement.to_json()]
            A list of INDRA Statements to run the pipeline.

        pipeline : list[dict]
            A list of dictionaries representing steps in the pipeline. Each
            step should have a 'function' key and, if appropriate, 'args' and
            'kwargs' keys. For more documentation and examples, see
            https://indra.readthedocs.io/en/latest/modules/pipeline.html

        Returns
        -------
        statements : list[indra.statements.Statement.to_json()]
            The list of INDRA Statements resulting from running the pipeline
            on the list of input Statements.
        """
        args = request.json
        stmts = stmts_from_json(args.get('statements'))
        pipeline_steps = args.get('pipeline')
        ap = AssemblyPipeline(pipeline_steps)
        stmts_out = ap.run(stmts)
        return _return_stmts(stmts_out)
Ejemplo n.º 3
0
def test_assembly_cycle():
    stmts = stmts_from_json_file(
        os.path.join(HERE, 'data', 'compositional_refinement_cycle_test.json'))
    # 874 is a refinement of -534
    pipeline = AssemblyPipeline(comp_assembly_json)
    assembled_stmts = pipeline.run(stmts)
    assert assembled_stmts[0].supported_by == [assembled_stmts[1]]
Ejemplo n.º 4
0
 def run_assembly(self):
     """Run INDRA's assembly pipeline on the Statements."""
     self.eliminate_copies()
     stmts = self.get_indra_stmts()
     stnames = {s.name for s in self.search_terms}
     ap = AssemblyPipeline(self.assembly_config)
     self.assembled_stmts = ap.run(stmts, stnames=stnames)
Ejemplo n.º 5
0
 def run_assembly(self):
     """Run INDRA's assembly pipeline on the Statements."""
     from indra_world.belief import get_eidos_scorer
     from indra_world.ontology import load_world_ontology
     self.eliminate_copies()
     stmts = self.get_indra_stmts()
     stnames = {s.name for s in self.search_terms}
     ap = AssemblyPipeline(self.assembly_config['main'])
     self.assembled_stmts = ap.run(stmts, stnames=stnames)
Ejemplo n.º 6
0
 def get_statements(self):
     """Return a flat list of statements with their evidences."""
     stmts = []
     for sh, stmt in deepcopy(self.stmts_by_hash).items():
         stmt.evidence = self.evs_by_stmt_hash.get(sh, [])
         stmt.belief = self.beliefs[sh]
         stmts.append(stmt)
     # TODO: add refinement edges as supports/supported_by?
     # Here we run some post-processing steps on the statements
     ap = AssemblyPipeline(steps=self.post_processing_steps)
     stmts = ap.run(stmts)
     return stmts
Ejemplo n.º 7
0
 def assemble_dynamic_pysb(self, **kwargs):
     """Assemble a version of a PySB model for dynamic simulation."""
     # First need to run regular assembly
     if not self.assembled_stmts:
         self.run_assembly()
     if 'dynamic' in self.assembly_config:
         logger.info('Assembling dynamic PySB model')
         ap = AssemblyPipeline(self.assembly_config['dynamic'])
         # Not overwrite assembled stmts
         stmts = deepcopy(self.assembled_stmts)
         new_stmts = ap.run(stmts)
         pa = PysbAssembler()
         pa.add_statements(new_stmts)
         pysb_model = pa.make_model()
         return pysb_model
     logger.info('Did not find dynamic assembly steps')
Ejemplo n.º 8
0
 def assemble_dynamic_pysb(self, mode='local', bucket=EMMAA_BUCKET_NAME):
     """Assemble a version of a PySB model for dynamic simulation."""
     # First need to run regular assembly
     if not self.assembled_stmts:
         self.run_assembly()
     if 'dynamic' in self.assembly_config:
         logger.info('Assembling dynamic PySB model')
         ap = AssemblyPipeline(self.assembly_config['dynamic'])
         # Not overwrite assembled stmts
         stmts = deepcopy(self.assembled_stmts)
         self.dynamic_assembled_stmts = ap.run(stmts)
         pa = PysbAssembler()
         pa.add_statements(self.dynamic_assembled_stmts)
         pysb_model = pa.make_model()
         if mode == 's3' and 'gromet' in self.export_formats:
             fname = f'gromet_{self.date_str}.json'
             pysb_to_gromet(pysb_model, self.name,
                            self.dynamic_assembled_stmts, fname)
             logger.info(f'Uploading {fname}')
             client = get_s3_client(unsigned=False)
             client.upload_file(fname, bucket,
                                f'exports/{self.name}/{fname}')
         return pysb_model
     logger.info('Did not find dynamic assembly steps')
Ejemplo n.º 9
0
                                   corpus_id,
                                   grounding_mode='compositional',
                                   extract_filter=['influence'])
    '''
    stmts = []
    for reader in reader_versions['compositional']:
        logger.info('Loading %s' % reader)
        if os.path.exists('compositional_dec2020_%s_raw.pkl' % reader):
            with open('compositional_dec2020_%s_raw.pkl' % reader, 'rb') as fh:
                stmts += pickle.load(fh)
    '''
    logger.info('Got a total of %s statements' % len(stmts))
    assembly_config_file = os.path.join(HERE, os.pardir, 'indra_wm_service',
                                        'resources',
                                        'assembly_compositional_phase3.json')
    pipeline = AssemblyPipeline.from_json_file(assembly_config_file)
    assembled_stmts = pipeline.run(stmts)

    num_docs = 472
    meta_data = {
        'corpus_id': corpus_id,
        'description': 'Compositional grounding assembly for the intial '
        'Phase 3 documents, Eidos only.',
        'display_name': 'Compositional grounding assembly Phase 3 (Eidos)',
        'readers': list(reader_versions.keys()),
        'assembly': {
            'level': 'grounding_location',
            'grounding_threshold': 0.6,
        },
        'num_statements': len(assembled_stmts),
        'num_documents': num_docs
Ejemplo n.º 10
0
def test_pipeline_methods():
    ap = AssemblyPipeline()
    assert len(ap) == 0
    ap.append(filter_grounded_only)
    assert len(ap) == 1
    ap.insert(0, filter_no_hypothesis)
    assert len(ap) == 2
    assert ap.steps[0] == {'function': 'filter_no_hypothesis'}
    # Append functions with arguments and runnable arguments
    ap.append(filter_by_type, Activation)
    assert len(ap) == 3
    assert ap.steps[2] == {
        'function': 'filter_by_type',
        'args': [{
            'stmt_type': 'Activation'
        }]
    }, ap.steps[2]
    ap.append(run_preassembly,
              matches_fun=location_matches,
              refinement_fun=location_refinement,
              normalize_equivalences=True,
              normalize_opposites=True,
              normalize_ns='WM',
              belief_scoret=RunnableArgument(get_eidos_scorer),
              ontology=world_ontology)
    assert len(ap) == 4
    assert isinstance(ap.steps[3], dict)
    assert isinstance(ap.steps[3]['kwargs'], dict)
    assert len(ap.steps[3]['kwargs']) == 7
    # Run argument to get value
    assert isinstance(ap.get_argument_value({'function': 'get_eidos_scorer'}),
                      BeliefScorer)
    # Get a function object as argument
    assert ap.get_argument_value({
        'function': 'location_matches',
        'no_run': True
    }) == location_matches
    # Get statement type as argument
    assert ap.get_argument_value({'stmt_type': 'Activation'}) == Activation
    # Get simple argument values
    assert ap.get_argument_value('test') == 'test'
    assert ap.get_argument_value(4) == 4
    assert ap.get_argument_value(True)
    assert not ap.get_argument_value(False)
    assert ap.get_argument_value([1, 2, 3]) == [1, 2, 3]
Ejemplo n.º 11
0
def test_running_pipeline():
    # From json file
    ap = AssemblyPipeline.from_json_file(test_json)
    assert ap
    # AssemblyPipeline has methods for length and iteration
    assert len(ap) == 5
    for step in ap:
        assert step
    assembled_stmts = ap.run(stmts)
    assert assembled_stmts
    assert len(assembled_stmts) == 2
    # By manually adding steps
    ap2 = AssemblyPipeline()
    ap2.append(filter_no_hypothesis)
    ap2.append(map_grounding)
    ap2.append(filter_grounded_only)
    ap2.append(map_sequence)
    ap2.append(run_preassembly, return_toplevel=False)
    assembled_stmts2 = ap2.run(stmts)
    assert assembled_stmts2
    assert len(assembled_stmts2) == 2
Ejemplo n.º 12
0
import logging
import datetime
from indra_world.sources.dart import process_reader_output, DartClient
from indra_world.assembly.incremental_assembler import \
    IncrementalAssembler
from indra_world.resources import get_resource_file
from indra.pipeline import AssemblyPipeline
from indra_world.assembly.operations import *
from .db import DbManager

logger = logging.getLogger(__name__)

preparation_pipeline = AssemblyPipeline.from_json_file(
    get_resource_file('statement_preparation.json'))

expected_readers = {'eidos', 'hume', 'sofia'}


class ServiceController:
    def __init__(self, db_url, dart_client=None):
        self.db = DbManager(db_url)
        self.assemblers = {}
        self.assembly_triggers = {}
        if dart_client:
            self.dart_client = dart_client
        else:
            self.dart_client = DartClient(storage_mode='web')

    def new_project(self, project_id, name, corpus_id=None):
        res = self.db.add_project(project_id, name)
        if res is None:
Ejemplo n.º 13
0
    def add_statements(self, stmts):
        """Add new statements for incremental assembly.

        Parameters
        ----------
        stmts : list[indra.statements.Statement]
            A list of new prepared statements to be incrementally assembled
            into the set of existing statements.

        Returns
        -------
        AssemblyDelta
            An AssemblyDelta object representing the changes to the assembly
            as a result of the new added statements.
        """
        # We fist organize statements by hash
        stmts_by_hash = defaultdict(list)
        for stmt in stmts:
            self.annotate_evidences(stmt)
            stmts_by_hash[stmt.get_hash(
                matches_fun=self.matches_fun)].append(stmt)
        stmts_by_hash = dict(stmts_by_hash)

        # We next create the new statements and new evidences data structures
        new_stmts = {}
        new_evidences = defaultdict(list)
        for sh, stmts_for_hash in stmts_by_hash.items():
            if sh not in self.stmts_by_hash:
                new_stmts[sh] = stmts_for_hash[0]
                self.stmts_by_hash[sh] = stmts_for_hash[0]
                self.evs_by_stmt_hash[sh] = []
            for stmt in stmts_for_hash:
                for ev in stmt.evidence:
                    new_evidences[sh].append(ev)
                    self.evs_by_stmt_hash[sh].append(ev)
        new_evidences = dict(new_evidences)
        # Here we run some post-processing steps on the new statements
        ap = AssemblyPipeline(steps=self.post_processing_steps)
        # NOTE: the assumption here is that the processing steps modify the
        # statement objects directly, this could be modified to return
        # statements that are then set in the hash-keyed dict
        ap.run(list(new_stmts.values()))

        # Next we extend refinements and re-calculate beliefs
        logger.info('Extending refinement filters')
        for filter in self.refinement_filters:
            filter.extend(new_stmts)
        new_refinements = set()
        logger.info('Finding refinements for new statements')
        for sh, stmt in tqdm.tqdm(new_stmts.items()):
            refinements = None
            for filter in self.refinement_filters:
                # Note that this gets less specifics
                refinements = filter.get_related(stmt, refinements)
            # We order hashes by less specific first and more specific second
            new_refinements |= {(ref, sh) for ref in refinements}
            # This expects a list of less specific hashes for the statement
            extend_refinements_graph(self.refinements_graph,
                                     stmt,
                                     list(refinements),
                                     matches_fun=self.matches_fun)
        logger.info('Getting beliefs')
        beliefs = self.get_beliefs()
        logger.info('Returning assembly delta')
        return AssemblyDelta(new_stmts,
                             new_evidences,
                             new_refinements,
                             beliefs,
                             matches_fun=self.matches_fun)
Ejemplo n.º 14
0
        ot = get_text(stmt.obj)
        if text_too_long(st, k) or text_too_long(ot, k):
            continue
        new_stmts.append(stmt)
    logger.info(f'{len(new_stmts)} statements after filter.')
    return new_stmts


if __name__ == '__main__':
    # Load all raw statements
    eidos_stmts = load_eidos()
    hume_stmts = load_hume()
    sofia_stmts = load_sofia()
    cwms_stmts = load_cwms()

    hume_ap = AssemblyPipeline.from_json_file('hume_redundant.json')
    hume_stmts = hume_ap.run(hume_stmts)

    # Reground where needed
    reground_ap = AssemblyPipeline.from_json_file('reground_stmts.json')
    sofia_stmts = reground_ap.run(sofia_stmts)
    cwms_stmts = reground_ap.run(cwms_stmts)

    # Run shared assembly steps
    stmts = eidos_stmts + hume_stmts + sofia_stmts + cwms_stmts
    ap = AssemblyPipeline.from_json_file('assembly_steps.json')
    stmts = ap.run(stmts)

    funs = {
        'grounding': None,
        'location': location_matches,
Ejemplo n.º 15
0
def test_compositional_refinement_polarity_bug():
    stmts = stmts_from_json_file(
        os.path.join(HERE, 'data', 'test_missing_refinement.json'))
    pipeline = AssemblyPipeline(comp_assembly_json)
    assembled_stmts = pipeline.run(stmts)
    assert assembled_stmts[0].supported_by == [assembled_stmts[1]]
Ejemplo n.º 16
0
            for stmt in pp.statements:
                for ev in stmt.evidence:
                    if 'provenance' not in ev.annotations:
                        ev.annotations['provenance'] = [{
                            'document': {
                                '@id': doc_id
                            }
                        }]
                    else:
                        prov = ev.annotations['provenance'][0]['document']
                        prov['@id'] = doc_id
            stmts += pp.statements
        if grounding == 'compositional':
            validate_grounding_format(stmts)

    ap = AssemblyPipeline.from_json_file('assembly_%s.json' % grounding)
    assembled_stmts = ap.run(stmts)

    if do_upload:
        corpus_id = 'compositional_v4'
        stmts_to_json_file(assembled_stmts, '%s.json' % corpus_id)

        meta_data = {
            'corpus_id':
            corpus_id,
            'description': ('Assembly of 4 reader outputs with the '
                            'compositional ontology (%s).' % ont_url),
            'display_name':
            'Compositional ontology assembly v3',
            'readers':
            readers,