Ejemplo n.º 1
0
def test_reground_texts():
    er = EidosReader()
    er.initialize_reader()
    groundings = er.reground_texts(['rainfall', 'hunger'])
    assert groundings[0][0][0] == \
           ('wm/concept/causal_factor/environmental/'
            'meteorologic/precipitation/rainfall'), groundings
    assert groundings[1][0][0] == \
           'wm/concept/causal_factor/condition/famine', groundings
Ejemplo n.º 2
0
def reground_stmts(stmts):
    ont_manager = _make_un_ontology()
    eidos_reader = EidosReader()
    # Send the latest ontology and list of concept texts to Eidos
    yaml_str = yaml.dump(ont_manager.yaml_root)
    concepts = []
    for stmt in stmts:
        for concept in stmt.agent_list():
            concept_txt = concept.db_refs.get('TEXT')
            concepts.append(concept_txt)
    groundings = eidos_reader.reground_texts(concepts, yaml_str)
    # Update the corpus with new groundings
    idx = 0
    for stmt in stmts:
        for concept in stmt.agent_list():
            concept.db_refs['UN'] = groundings[idx]
            idx += 1
    return stmts
Ejemplo n.º 3
0
 def __init__(self, scorer=None, corpora=None):
     self.corpora = corpora if corpora else {}
     self.scorer = scorer if scorer else get_eidos_bayesian_scorer()
     self.ont_manager = _make_un_ontology()
     self.eidos_reader = EidosReader()
Ejemplo n.º 4
0
class LiveCurator(object):
    """Class coordinating the real-time curation of a corpus of Statements.

    Parameters
    ----------
    scorer : indra.belief.BeliefScorer
        A scorer object to use for the curation
    corpora : dict[str, Corpus]
        A dictionary mapping corpus IDs to Corpus objects.
    """

    def __init__(self, scorer=None, corpora=None):
        self.corpora = corpora if corpora else {}
        self.scorer = scorer if scorer else get_eidos_bayesian_scorer()
        self.ont_manager = _make_un_ontology()
        self.eidos_reader = EidosReader()

    # TODO: generalize this to other kinds of scorers
    def reset_scorer(self):
        """Reset the scorer used for couration."""
        self.scorer = get_eidos_bayesian_scorer()
        for corpus_id, corpus in self.corpora.items():
            corpus.curations = {}

    def get_corpus(self, corpus_id):
        """Return a corpus given an ID.

        If the corpus ID cannot be found, an InvalidCorpusError is raised.

        Parameters
        ----------
        corpus_id : str
            The ID of the corpus to return.

        Returns
        -------
        Corpus
            The corpus with the given ID.
        """
        try:
            corpus = self.corpora[corpus_id]
            return corpus
        except KeyError:
            raise InvalidCorpusError

    def submit_curation(self, corpus_id, curations):
        """Submit correct/incorrect curations fo a given corpus.

        Parameters
        ----------
        corpus_id : str
            The ID of the corpus to which the curations apply.
        curations : dict
            A dict of curations with keys corresponding to Statement UUIDs and
            values corresponding to correct/incorrect feedback.
        """
        corpus = self.get_corpus(corpus_id)
        # Start tabulating the curation counts
        prior_counts = {}
        subtype_counts = {}
        # Take each curation from the input
        for uuid, correct in curations.items():
            # Save the curation in the corpus
            # TODO: handle already existing curation
            stmt = corpus.statements.get(uuid)
            if stmt is None:
                logger.warning('%s is not in the corpus.' % uuid)
                continue
            corpus.curations[uuid] = correct
            # Now take all the evidences of the statement and assume that
            # they follow the correctness of the curation and contribute to
            # counts for their sources
            for ev in stmt.evidence:
                # Make the index in the curation count list
                idx = 0 if correct else 1
                extraction_rule = ev.annotations.get('found_by')
                # If there is no extraction rule then we just score the source
                if not extraction_rule:
                    try:
                        prior_counts[ev.source_api][idx] += 1
                    except KeyError:
                        prior_counts[ev.source_api] = [0, 0]
                        prior_counts[ev.source_api][idx] += 1
                # Otherwise we score the specific extraction rule
                else:
                    try:
                        subtype_counts[ev.source_api][extraction_rule][idx] \
                            += 1
                    except KeyError:
                        if ev.source_api not in subtype_counts:
                            subtype_counts[ev.source_api] = {}
                        subtype_counts[ev.source_api][extraction_rule] = [0, 0]
                        subtype_counts[ev.source_api][extraction_rule][idx] \
                            += 1
        # Finally, we update the scorer with the new curation counts
        self.scorer.update_counts(prior_counts, subtype_counts)

    def update_beliefs(self, corpus_id):
        """Return updated belief scores for a given corpus.

        Parameters
        ----------
        corpus_id : str
            The ID of the corpus for which beliefs are to be updated.

        Returns
        -------
        dict
            A dictionary of belief scores with keys corresponding to Statement
            UUIDs and values to new belief scores.
        """
        corpus = self.get_corpus(corpus_id)
        be = BeliefEngine(self.scorer)
        stmts = list(corpus.statements.values())
        be.set_prior_probs(stmts)
        # Here we set beliefs based on actual curation
        for uuid, correct in corpus.curations.items():
            stmt = corpus.statements.get(uuid)
            if stmt is None:
                logger.warning('%s is not in the corpus.' % uuid)
                continue
            stmt.belief = correct
        belief_dict = {st.uuid: st.belief for st in stmts}
        return belief_dict

    def update_groundings(self, corpus_id):
        corpus = self.get_corpus(corpus_id)

        # Send the latest ontology and list of concept texts to Eidos
        yaml_str = yaml.dump(self.ont_manager.yaml_root)
        concepts = []
        for stmt in corpus.raw_statements:
            for concept in stmt.agent_list():
                concept_txt = concept.db_refs.get('TEXT')
                concepts.append(concept_txt)
        groundings = self.eidos_reader.reground_texts(concepts, yaml_str)
        # Update the corpus with new groundings
        idx = 0
        for stmt in corpus.raw_statements:
            for concept in stmt.agent_list():
                concept.db_refs['UN'] = groundings[idx]
                idx += 1
        assembled_statements = default_assembly(corpus.raw_statements)
        corpus.statements = {s.uuid: s for s in assembled_statements}
        return assembled_statements
Ejemplo n.º 5
0
@app.route('/process_text', methods=['POST'])
def process_text():
    text = request.json.get('text')
    if not text:
        return {}
    res = er.process_text(text)
    return json.dumps(res)


@app.route('/reground', methods=['POST'])
def reground():
    text = request.json.get('text')
    ont_yml = request.json.get('ont_yml', wm_yml)
    topk = request.json.get('topk', 10)
    is_canonicalized = request.json.get('is_canonicalized', False)
    if not text:
        return []
    if isinstance(text, str):
        text = [text]
    res = er.reground_texts(text, ont_yml, topk=topk,
                            is_canonicalized=is_canonicalized)
    return json.dumps(res)


if __name__ == '__main__':
    port = int(sys.argv[1]) if len(sys.argv) > 1 else 6666
    er = EidosReader()
    er.process_text('hello')  # This is done to initialize the system
    app.run(host='0.0.0.0', port=port)
Ejemplo n.º 6
0
from .process import reground_stmts, remove_raw_grounding

onts = {
    'flattened_interventions':
    ('https://raw.githubusercontent.com/WorldModelers/Ontologies/master/'
     'wm_with_flattened_interventions_metadata.yml'),
    'main':
    ('https://raw.githubusercontent.com/WorldModelers/Ontologies/master/'
     'wm_metadata.yml'),
    'no_regrounding':
    ('https://raw.githubusercontent.com/WorldModelers/Ontologies/master/'
     'wm_metadata.yml'),
}

if __name__ == '__main__':
    eidos_reader = EidosReader()

    for key, ont_url in onts.items():
        with open('eidos_raw.pkl', 'rb') as fh:
            stmts = pickle.load(fh)
        #stmts = load_eidos()
        #stmts = ac.filter_by_type(stmts, Influence)
        #remove_namespaces(stmts, ['WHO', 'MITRE12', 'UN', 'PROPS',
        #                          'INTERVENTIONS'])
        ont = load_world_ontology(ont_url)
        if key != 'no_regrounding':
            stmts = reground_stmts(stmts, ont, 'WM', None, True)

        scorer = get_eidos_scorer()

        matches_fun, refinement_fun = None, None
Ejemplo n.º 7
0
import json
from indra.sources.eidos.reader import EidosReader
from indra.sources import hume
from indra.statements import stmts_to_json_file


def load_config():
    with open('config.json', 'r') as fh:
        config = json.load(fh)
    return config


er = EidosReader()


def do_regrounding(stmts):
    concepts = []
    for stmt in stmts:
        for concept in stmt.agent_list():
            concept_txt = concept.db_refs.get('TEXT')
            concepts.append(concept_txt)
    groundings = er.reground_texts(concepts)
    # Update the corpus with new groundings
    idx = 0
    for stmt in stmts:
        for concept in stmt.agent_list():
            concept.db_refs['UN'] = groundings[idx]
            idx += 1
    return stmts

Ejemplo n.º 8
0
read with Eidos. To run the server, do

    python -m indra.sources.eidos.server

and then submit POST requests to the `localhost:5000/process_text` endpoint
with JSON content as `{'text': 'text to read'}`. The response will be the
Eidos JSON-LD output.
"""

import json
from flask import Flask, request
from indra.sources.eidos.reader import EidosReader


app = Flask(__name__)


@app.route('/process_text', methods=['POST'])
def process_text():
    text = request.json.get('text')
    if not text:
        return {}
    res = er.process_text(text, 'json_ld')
    return json.dumps(res)


if __name__ == '__main__':
    er = EidosReader()
    er.process_text('hello', 'json_ld')
    app.run(host='0.0.0.0')
Ejemplo n.º 9
0
 def __init__(self, scorer=None, corpora=None):
     self.corpora = corpora if corpora else {}
     self.scorer = scorer if scorer else get_eidos_bayesian_scorer()
     self.ont_manager = _make_un_ontology()
     self.eidos_reader = EidosReader()
Ejemplo n.º 10
0
class LiveCurator(object):
    """Class coordinating the real-time curation of a corpus of Statements.

    Parameters
    ----------
    scorer : indra.belief.BeliefScorer
        A scorer object to use for the curation
    corpora : dict[str, Corpus]
        A dictionary mapping corpus IDs to Corpus objects.
    """

    def __init__(self, scorer=None, corpora=None):
        self.corpora = corpora if corpora else {}
        self.scorer = scorer if scorer else get_eidos_bayesian_scorer()
        self.ont_manager = _make_un_ontology()
        self.eidos_reader = EidosReader()

    # TODO: generalize this to other kinds of scorers
    def reset_scorer(self):
        """Reset the scorer used for couration."""
        self.scorer = get_eidos_bayesian_scorer()
        for corpus_id, corpus in self.corpora.items():
            corpus.curations = {}

    def get_corpus(self, corpus_id):
        """Return a corpus given an ID.

        If the corpus ID cannot be found, an InvalidCorpusError is raised.

        Parameters
        ----------
        corpus_id : str
            The ID of the corpus to return.

        Returns
        -------
        Corpus
            The corpus with the given ID.
        """
        try:
            corpus = self.corpora[corpus_id]
            return corpus
        except KeyError:
            raise InvalidCorpusError

    def submit_curation(self, corpus_id, curations):
        """Submit correct/incorrect curations fo a given corpus.

        Parameters
        ----------
        corpus_id : str
            The ID of the corpus to which the curations apply.
        curations : dict
            A dict of curations with keys corresponding to Statement UUIDs and
            values corresponding to correct/incorrect feedback.
        """
        corpus = self.get_corpus(corpus_id)
        # Start tabulating the curation counts
        prior_counts = {}
        subtype_counts = {}
        # Take each curation from the input
        for uuid, correct in curations.items():
            # Save the curation in the corpus
            # TODO: handle already existing curation
            stmt = corpus.statements.get(uuid)
            if stmt is None:
                logger.warning('%s is not in the corpus.' % uuid)
                continue
            corpus.curations[uuid] = correct
            # Now take all the evidences of the statement and assume that
            # they follow the correctness of the curation and contribute to
            # counts for their sources
            for ev in stmt.evidence:
                # Make the index in the curation count list
                idx = 0 if correct else 1
                extraction_rule = ev.annotations.get('found_by')
                # If there is no extraction rule then we just score the source
                if not extraction_rule:
                    try:
                        prior_counts[ev.source_api][idx] += 1
                    except KeyError:
                        prior_counts[ev.source_api] = [0, 0]
                        prior_counts[ev.source_api][idx] += 1
                # Otherwise we score the specific extraction rule
                else:
                    try:
                        subtype_counts[ev.source_api][extraction_rule][idx] \
                            += 1
                    except KeyError:
                        if ev.source_api not in subtype_counts:
                            subtype_counts[ev.source_api] = {}
                        subtype_counts[ev.source_api][extraction_rule] = [0, 0]
                        subtype_counts[ev.source_api][extraction_rule][idx] \
                            += 1
        # Finally, we update the scorer with the new curation counts
        self.scorer.update_counts(prior_counts, subtype_counts)

    def update_beliefs(self, corpus_id):
        """Return updated belief scores for a given corpus.

        Parameters
        ----------
        corpus_id : str
            The ID of the corpus for which beliefs are to be updated.

        Returns
        -------
        dict
            A dictionary of belief scores with keys corresponding to Statement
            UUIDs and values to new belief scores.
        """
        corpus = self.get_corpus(corpus_id)
        be = BeliefEngine(self.scorer)
        stmts = list(corpus.statements.values())
        be.set_prior_probs(stmts)
        # Here we set beliefs based on actual curation
        for uuid, correct in corpus.curations.items():
            stmt = corpus.statements.get(uuid)
            if stmt is None:
                logger.warning('%s is not in the corpus.' % uuid)
                continue
            stmt.belief = correct
        belief_dict = {st.uuid: st.belief for st in stmts}
        return belief_dict

    def update_groundings(self, corpus_id):
        corpus = self.get_corpus(corpus_id)

        # Send the latest ontology and list of concept texts to Eidos
        yaml_str = yaml.dump(self.ont_manager.yaml_root)
        concepts = []
        for stmt in corpus.raw_statements:
            for concept in stmt.agent_list():
                concept_txt = concept.db_refs.get('TEXT')
                concepts.append(concept_txt)
        groundings = self.eidos_reader.reground_texts(concepts, yaml_str)
        # Update the corpus with new groundings
        idx = 0
        for stmt in corpus.raw_statements:
            for concept in stmt.agent_list():
                concept.db_refs['UN'] = groundings[idx]
                idx += 1
        assembled_statements = default_assembly(corpus.raw_statements)
        corpus.statements = {s.uuid: s for s in assembled_statements}
        return assembled_statements