def eidos_process_statements(sentence=None, webservice=None, use_webService=False): print('Running...rul ') ep = None if use_webService: if webservice is None: webservice = 'http://localhost:9000' if sentence is not None: ep = eidos.process_text(sentence, webservice=webservice) else: ep = eidos.process_text(sentence) return ep
def post(self): """Process text with EIDOS and return INDRA Statements. Parameters ---------- text : str The text to be processed. webservice : Optional[str] An Eidos reader web service URL to send the request to. If None, the reading is assumed to be done with the Eidos JAR rather than via a web service. Default: None grounding_ns : Optional[list] A list of name spaces for which INDRA should represent groundings, when given. If not specified or None, all grounding name spaces are propagated. If an empty list, no groundings are propagated. Example: ['UN', 'WM'], Default: None Returns ------- statements : list[indra.statements.Statement.to_json()] A list of extracted INDRA Statements. """ args = request.json text = args.get('text') webservice = args.get('webservice') grounding_ns = args.get('grounding_ns') if not webservice: abort(400, 'No web service address provided.') ep = eidos.process_text(text, webservice=webservice, grounding_ns=grounding_ns) return _stmts_from_proc(ep)
def process_texts(texts): """Process article texts with Eidos and extract INDRA Statements. Parameters ---------- texts : dict A dictionary mapping PIIs to texts to process. Returns ------- pii_stmts : dict A dictionary mapping PIIs as keys and extracted INDRA statements. """ eidos_url = os.environ.get('EIDOS_URL') logger.info('Reading with Eidos URL: %s' % eidos_url) pii_stmts = {} for pii, txt in texts.items(): logger.info('Reading the article with %s pii.' % pii) try: ep = eidos.process_text(txt, webservice=eidos_url) if ep: pii_stmts[pii] = ep.statements # Handle Connection and other errors except Exception as e: logger.info('Could not read the text because of %s' % str(e)) continue return pii_stmts
def reground_tests(tests, webservice): """Reground tests to updated ontology.""" stmts = [test.stmt for test in tests] texts = [stmt.evidence[0].text for stmt in stmts] text = ' '.join(texts) new_stmts = process_text(text, webservice=webservice).statements new_stmts = standardize_names_groundings(new_stmts) new_tests = [StatementCheckingTest(stmt) for stmt in new_stmts] return new_tests
def eidos_process_text(): """Process text with EIDOS and return INDRA Statements.""" if request.method == 'OPTIONS': return {} response = request.body.read().decode('utf-8') body = json.loads(response) text = body.get('text') webservice = body.get('webservice') ep = eidos.process_text(text, webservice=webservice) return _stmts_from_proc(ep)
def text_to_stmts(text): """Run Eidos reading on a given text and return INDRA Statements.""" # We use some caching here so that sentences we have already read # are not re-read. fname = text.replace(' ', '_').replace(',', '_') + '.jsonld' if os.path.exists(fname): ep = eidos.process_json_ld_file(fname) else: ep = eidos.process_text(text) shutil.move('eidos_output.json', fname) return ep.statements
def from_text(cls, text: str, webservice=None): """ Construct an AnalysisGraph object from text, using Eidos to perform machine reading. Args: text: Input text to be processed by Eidos. webservice: URL for Eidos webservice, either the INDRA web service, or the instance of Eidos running locally on your computer (e.g. http://localhost:9000. """ eidosProcessor = process_text(text, webservice=webservice) return cls.from_statements(eidosProcessor.statements)
def test_process_text(): ep = eidos.process_text('The cost of fuel decreases water trucking.', out_format='json_ld') assert ep is not None assert len(ep.statements) == 1 stmt = ep.statements[0] assert isinstance(stmt, Influence) assert stmt.subj.name == 'cost fuel' assert stmt.obj.name == 'water trucking' assert stmt.obj_delta.get('polarity') == -1 assert(stmt.evidence[0].annotations['found_by'] == 'ported_syntax_1_verb-Causal')
def test_process_text(): ep = eidos.process_text('The cost of fuel decreases water trucking.') assert ep is not None assert len(ep.statements) == 1 stmt = ep.statements[0] assert isinstance(stmt, Influence) assert stmt.subj.concept.name == 'fuel', stmt.subj.concept.name assert stmt.obj.concept.name == 'water trucking', stmt.obj.concept.name assert stmt.obj.delta.polarity == -1 assert stmt.evidence[0].annotations['found_by'] == \ 'ported_syntax_1_verb-Causal' assert 'TEXT' in stmt.subj.concept.db_refs assert 'TEXT' in stmt.obj.concept.db_refs
def test_process_text(): ep = eidos.process_text('The cost of fuel decreases water trucking.') assert ep is not None assert len(ep.statements) == 1 stmt = ep.statements[0] assert isinstance(stmt, Influence) assert stmt.subj.concept.name == 'fuel', stmt.subj.concept.name assert stmt.obj.concept.name == 'water trucking', stmt.obj.concept.name assert stmt.obj.delta.polarity == -1 assert stmt.evidence[0].annotations['found_by'] == \ 'ported_syntax_1_verb-Causal' assert 'TEXT' in stmt.subj.concept.db_refs assert 'TEXT' in stmt.obj.concept.db_refs
def eidos_process_text(): """Process text with EIDOS and return INDRA Statements.""" if request.method == 'OPTIONS': return {} req = request.body.read().decode('utf-8') body = json.loads(req) text = body.get('text') webservice = body.get('webservice') if not webservice: response.status = 400 response.content_type = 'application/json' return json.dumps({'error': 'No web service address provided.'}) ep = eidos.process_text(text, webservice=webservice) return _stmts_from_proc(ep)
def on_read(b): global articles global statements raw_txt = elsevier_client.extract_text(articles[int(paper_id.value)]) if 'Internal Server Error' in raw_txt: print('Sorry, that paper was not accessible for reading.') statements = [] ep = eidos.process_text(raw_txt, webservice='http://localhost:5000') statements = ep.statements print('We extracted %d statements:' % len(statements)) for stmt in statements: sg = stmt.subj.db_refs['UN'][0][0].split('/')[-1] og = stmt.obj.db_refs['UN'][0][0].split('/')[-1] printmd('* **%s**(%s) %s **%s**(%s)' % (sg, stmt.subj.name, '->' if stmt.overall_polarity() == 1 else '-|', og, stmt.obj.name))
def read_eidos(docnames): stmts = [] for docname in docnames: fname = os.path.join('docs', '%s.txt' % docname) jsonname = os.path.join('eidos', '%s.txt.jsonld' % docname) if os.path.exists(jsonname): ep = eidos.process_json_ld_file(jsonname) else: with open(fname, 'r') as fh: print('Reading %s' % docname) txt = fh.read() ep = eidos.process_text(txt, save_json=jsonname, out_format='json_ld') print('%d stmts from %s' % (len(ep.statements), docname)) # Set the PMID on these statements so that we can get the document ID # during assembly for stmt in ep.statements: stmt.evidence[0].pmid = docname stmts += ep.statements return stmts
def test_process_text_json_ld(): ep = eidos.process_text('The cost of fuel decreases water trucking.', out_format='json_ld') assert ep is not None assert len(ep.statements) == 1 stmt = ep.statements[0] assert isinstance(stmt, Influence) assert stmt.subj.name == 'cost fuel' assert stmt.obj.name == 'water trucking' assert stmt.obj_delta.get('polarity') == -1 assert(stmt.evidence[0].annotations['found_by'] == 'ported_syntax_1_verb-Causal') assert 'TEXT' in stmt.subj.db_refs assert 'TEXT' in stmt.obj.db_refs # assert 'UN' in stmt.subj.db_refs # assert 'UN' in stmt.obj.db_refs # FIXME: once groundings are propagated well from offline reading # this should work # assert len(stmt.subj.db_refs['UN']) > 5 # assert len(stmt.obj.db_refs['UN']) > 5 # Make sure sanitization works sanitized = ep._sanitize('-LRB-something-RRB-') assert sanitized == '(something)'
def run_indra(doc): text = fix_periods(doc['extracted_text']) ep = eidos.process_text(text, webservice=EIDOS_WS_URL) statements = [i.to_json() for i in ep.statements] return statements
def from_text(cls, text: str): """ Construct an AnalysisGraph object from text, using Eidos to perform machine reading. """ eidosProcessor = process_text(text) return cls.from_statements(eidosProcessor.statements)
reads the abstracts corresponding to each PMID with Eidos. It is complementary to the pipeline which starts with the CORD19 document set.""" import os import time import pickle from tqdm import tqdm from indra.sources import eidos from indra.literature import pubmed_client root = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir) keywords = ['covid19', 'covid-19', 'sars-cov-2', 'sars-cov2'] ids = [] for kw in keywords: ids += pubmed_client.get_ids(kw) stmts = {} for pmid in tqdm(ids): time.sleep(3) abst = pubmed_client.get_abstract(pmid) if not abst: continue ep = eidos.process_text(abst, webservice='http://localhost:9000/') for stmt in ep.statements: stmt.evidence[0].pmid = pmid stmts[pmid] = ep.statements with open(os.path.join(root, 'stmts', 'eidos_abstract_stmts.pkl'), 'wb') as fh: pickle.dump(stmts, fh)