Esempio n. 1
0
    def get_statements(self, reprocess=False):
        """General method to create statements."""
        if self._statements is None or reprocess:
            # Handle the case that there is no content.
            if self.content is None:
                self._statements = []
                return []

            # Map to the different processors.
            if self.reader == ReachReader.name:
                json_str = json.dumps(self.content)
                processor = reach.process_json_str(json_str)
            elif self.reader == SparserReader.name:
                processor = sparser.process_json_dict(self.content)
                if processor is not None:
                    processor.set_statements_pmid(None)
            elif self.reader == TripsReader.name:
                processor = trips.process_xml(self.content)
            else:
                raise ReadingError("Unknown reader: %s." % self.reader)

            # Get the statements from the processor, if it was resolved.
            if processor is None:
                logger.error("Production of statements from %s failed for %s."
                             % (self.reader, self.content_id))
                stmts = []
            else:
                stmts = processor.statements
            self._statements = stmts[:]
        else:
            stmts = self._statements[:]
        return stmts
Esempio n. 2
0
 def get_statements(self):
     """General method to create statements."""
     logger.debug("Making statements from %s." % self.reading_id)
     if self.reader == ReachReader.name:
         if self.format == formats.JSON:
             # Process the reach json into statements.
             json_str = json.dumps(self.content)
             processor = reach.process_json_str(json_str)
         else:
             raise ReadingError("Incorrect format for Reach output: %s." %
                                self.format)
     elif self.reader == SparserReader.name:
         if self.format == formats.JSON:
             # Process the sparser content into statements
             processor = sparser.process_json_dict(self.content)
         else:
             raise ReadingError(
                 "Sparser should only ever be JSON, not %s." % self.format)
     if processor is None:
         logger.error("Production of statements from %s failed for %s." %
                      (self.reader, self.tcid))
         stmts = []
     else:
         processor.set_statements_pmid(None)
         stmts = processor.statements
     return stmts
Esempio n. 3
0
    def get_statements(self, reprocess=False):
        """General method to create statements."""
        if self._statements is None or reprocess:
            # Handle the case that there is no content.
            if self.content is None:
                self._statements = []
                return []

            # Map to the different processors.
            if self.reader == ReachReader.name:
                json_str = json.dumps(self.content)
                processor = reach.process_json_str(json_str)
            elif self.reader == SparserReader.name:
                processor = sparser.process_json_dict(self.content)
                if processor is not None:
                    processor.set_statements_pmid(None)
            elif self.reader == TripsReader.name:
                processor = trips.process_xml(self.content)
            else:
                raise ReadingError("Unknown reader: %s." % self.reader)

            # Get the statements from the processor, if it was resolved.
            if processor is None:
                logger.error(
                    "Production of statements from %s failed for %s." %
                    (self.reader, self.content_id))
                stmts = []
            else:
                stmts = processor.statements
            self._statements = stmts[:]
        else:
            stmts = self._statements[:]
        return stmts
Esempio n. 4
0
def read_pmids(pmids, date):
    """Return extracted INDRA Statements per PMID after running reading on AWS.

    Parameters
    ----------
    pmids : list[str]
        A list of PMIDs to read.

    Returns
    -------
    dict[str, list[indra.statements.Statement]]
        A dict of PMIDs and the list of Statements extracted for the given
        PMID by reading.
    """
    pmid_fname = 'pmids-%s.txt' % date_str
    with open(pmid_fname, 'wt') as fh:
        fh.write('\n'.join(pmids))
    job_list = submit_reading('emmaa', pmid_fname, ['reach'])
    date_str = date.strftime('%Y-%m-%d-%H-%M-%S')
    wait_for_complete('run_reach_queue',
                      job_list,
                      idle_log_timeout=600,
                      kill_on_log_timeout=True)
    pmid_stmts = {}
    for pmid in pmids:
        reach_json_str = get_reader_json_str('reach', pmid)
        rp = reach.process_json_str(reach_json_str)
        if not rp:
            pmid_stmts[pmid] = []
        else:
            pmid_stmts[pmid] = rp.statements
    return pmid_stmts
Esempio n. 5
0
def reach_process_json():
    """Process REACH json and return INDRA Statements."""
    if request.method == 'OPTIONS':
        return {}
    response = request.body.read().decode('utf-8')
    body = json.loads(response)
    json_str = body.get('json')
    rp = reach.process_json_str(json_str)
    return _stmts_from_proc(rp)
Esempio n. 6
0
def reach_process_json():
    """Process REACH json and return INDRA Statements."""
    if request.method == 'OPTIONS':
        return {}
    response = request.body.read().decode('utf-8')
    body = json.loads(response)
    json_str = body.get('json')
    rp = reach.process_json_str(json_str)
    return _stmts_from_proc(rp)
Esempio n. 7
0
def process_reach_str(reach_json_str, pmid):
    if reach_json_str is None:
        raise ValueError('reach_json_str cannot be None')
    # Run the REACH processor on the JSON
    try:
        reach_proc = reach.process_json_str(reach_json_str, citation=pmid)
    # If there's a problem, skip it
    except Exception as e:
        print("Exception processing %s" % pmid)
        print(e)
        return []
    return reach_proc.statements
Esempio n. 8
0
File: api.py Progetto: budakn/INDRA
def reach_process_json():
    """Process REACH json and return INDRA Statements."""
    if request.method == 'OPTIONS':
        return {}
    response = request.body.read().decode('utf-8')
    body = json.loads(response)
    json_str = body.get('json')
    rp = reach.process_json_str(json_str)
    if rp and rp.statements:
        stmts = stmts_to_json(rp.statements)
        res = {'statements': stmts}
        return res
    else:
        res = {'statements': []}
    return res
Esempio n. 9
0
File: api.py Progetto: steppi/indra
    def post(self):
        """Process REACH json and return INDRA Statements.

        Parameters
        ----------
        json : str
            The json string to be processed.

        Returns
        -------
        statements : list[indra.statements.Statement.to_json()]
            A list of extracted INDRA Statements.
        """
        args = request.json
        json_str = args.get('json')
        rp = reach.process_json_str(json_str)
        return _stmts_from_proc(rp)
Esempio n. 10
0
def process_paper_aws(pmid, start_time_local):
    try:
        metadata, content_type = get_full_text(pmid, metadata=True)
    except Exception as e:
        logger.error('Could not get content from S3: %s' % e)
        return None, None
    logger.info('Downloading %s output from AWS' % pmid)
    reach_json_str = get_reader_json_str('reach', pmid)
    if not reach_json_str:
        logger.info('Could not get output.')
        return None, content_type
    rp = reach.process_json_str(reach_json_str)

    current_time_local = datetime.datetime.now(tzlocal.get_localzone())
    dt_script = current_time_local - start_time_local
    last_mod_remote = metadata['LastModified']
    dt = (current_time_local - last_mod_remote)
    # If it was not modified since the script started
    if dt > dt_script:
        content_type = 'existing_json'
    return rp, content_type
Esempio n. 11
0
def process_paper_aws(pmid, start_time_local):
    try:
        metadata, content_type = get_full_text(pmid, metadata=True)
    except Exception as e:
        logger.error('Could not get content from S3: %s' % e)
        return None, None
    logger.info('Downloading %s output from AWS' % pmid)
    reach_json_str = get_reader_json_str('reach', pmid)
    if not reach_json_str:
        logger.info('Could not get output.')
        return None, content_type
    rp = reach.process_json_str(reach_json_str)

    current_time_local = datetime.datetime.now(tzlocal.get_localzone())
    dt_script = current_time_local - start_time_local
    last_mod_remote = metadata['LastModified']
    dt = (current_time_local - last_mod_remote)
    # If it was not modified since the script started
    if dt > dt_script:
        content_type = 'existing_json'
    return rp, content_type
Esempio n. 12
0
def read_pmids(pmids, date):
    """Return extracted INDRA Statements per PMID after running reading on AWS.

    Parameters
    ----------
    pmids : list[str]
        A list of PMIDs to read.
    date : datetime
        The date and time associated with the reading, typically the
        current time.

    Returns
    -------
    dict[str, list[indra.statements.Statement]
        A dict of PMIDs and the list of Statements extracted for the given
        PMID by reading.
    """
    date_str = date.strftime('%Y-%m-%d-%H-%M-%S')
    pmid_fname = 'pmids-%s.txt' % date_str
    with open(pmid_fname, 'wt') as fh:
        fh.write('\n'.join(pmids))
    job_list = submit_reading('emmaa', pmid_fname, ['reach'])
    monitor = BatchMonitor('run_reach_queue', job_list)
    monitor.watch_and_wait(idle_log_timeout=600,  kill_on_log_timeout=True)
    pmid_stmts = {}
    for pmid in pmids:
        reach_json_str = get_reader_json_str('reach', pmid)
        if reach_json_str is None:
            pmid_stmts[pmid] = []
            continue
        rp = reach.process_json_str(reach_json_str)
        if not rp:
            pmid_stmts[pmid] = []
        else:
            pmid_stmts[pmid] = rp.statements
    return pmid_stmts
Esempio n. 13
0
    pmid_list_file = sys.argv[1]
    start_ix = int(sys.argv[2])
    end_ix = int(sys.argv[3])

    # Load the list of PMIDs from the given file
    with open(pmid_list_file) as f:
        pmid_list = [line.strip('\n') for line in f.readlines()]
    if end_ix > len(pmid_list):
        end_ix = len(pmid_list)

    stmts = {}
    for ix, pmid in enumerate(pmid_list[start_ix:end_ix]):
        reach_json_str = s3_client.get_reader_json_str('reach', pmid)
        # Logging message will have been produced by get_reach_output
        if reach_json_str is None:
            continue
        # Run the REACH processor on the JSON
        try:
            logger.info('%d: Processing %s' % (ix, pmid))
            reach_proc = reach.process_json_str(reach_json_str, citation=pmid)
        # If there's a problem, skip it
        except Exception as e:
            print("Exception processing %s" % pmid)
            print(e)
            continue

        stmts[pmid] = reach_proc.statements

    with open('reach_stmts_%d_%d.pkl' % (start_ix, end_ix), 'wb') as f:
        pickle.dump(stmts, f, protocol=2)
Esempio n. 14
0
 def parse_results(content):
     json_str = json.dumps(content)
     return reach.process_json_str(json_str)
Esempio n. 15
0
    pmid_list_file = sys.argv[1]
    start_ix = int(sys.argv[2])
    end_ix = int(sys.argv[3])

    # Load the list of PMIDs from the given file
    with open(pmid_list_file) as f:
        pmid_list = [line.strip('\n') for line in f.readlines()]
    if end_ix > len(pmid_list):
        end_ix = len(pmid_list)

    stmts = {}
    for ix, pmid in enumerate(pmid_list[start_ix:end_ix]):
        reach_json_str = s3_client.get_reader_json_str('reach', pmid)
        # Logging message will have been produced by get_reach_output
        if reach_json_str is None:
            continue
        # Run the REACH processor on the JSON
        try:
            logger.info('%d: Processing %s' % (ix, pmid))
            reach_proc = reach.process_json_str(reach_json_str, citation=pmid)
        # If there's a problem, skip it
        except Exception as e:
            print("Exception processing %s" % pmid)
            print(e)
            continue

        stmts[pmid] = reach_proc.statements

    with open('reach_stmts_%d_%d.pkl' % (start_ix, end_ix), 'wb') as f:
        pickle.dump(stmts, f)
Esempio n. 16
0
 def get_processor(content):
     json_str = json.dumps(content)
     return reach.process_json_str(json_str)