Beispiel #1
0
def get_reader_output(db,
                      ref_id,
                      ref_type='tcid',
                      reader=None,
                      reader_version=None):
    """Return reader output for a given text content.

    Parameters
    ----------
    db : :py:class:`DatabaseManager`
        Reference to the DB to query
    ref_id : int or str
        The text reference ID whose reader output should be returned
    ref_type : Optional[str]
        The type of ID to look for, options include
        'tcid' for the database's internal unique text content ID,
        or 'pmid', 'pmcid', 'doi, 'pii', 'manuscript_id'
        Default: 'tcid'
    reader : Optional[str]
        The name of the reader whose output is of interest
    reader_version : Optional[str]
        The specific version of the reader

    Returns
    -------
    reading_results : dict{dict{list[str]}}
        A dict of reader outputs that match the query criteria, indexed first
        by text content id, then by reader.
    """
    if ref_type == 'tcid':
        clauses = [db.Reading.text_content_id == ref_id]
    else:
        trids = _get_trids(db, ref_id, ref_type)
        if not trids:
            return []
        logger.debug("Found %d text ref ids." % len(trids))
        clauses = [
            db.TextContent.text_ref_id.in_(trids),
            db.Reading.text_content_id == db.TextContent.id
        ]
    if reader:
        clauses.append(db.Reading.reader == reader.upper())
    if reader_version:
        clauses.append(db.Reading.reader_version == reader_version)

    res = db.select_all(
        [db.Reading.text_content_id, db.Reading.reader, db.Reading.bytes],
        *clauses)
    reading_dict = defaultdict(lambda: defaultdict(lambda: []))
    for tcid, reader, result in res:
        unpacked_result = None
        if len(result) == 0:
            logger.warning("Got reading result with zero content.")
        else:
            unpacked_result = unpack(result)
        reading_dict[tcid][reader].append(unpacked_result)
    return reading_dict
Beispiel #2
0
    def add_paper_ids(self, initial_ids, id_type='pmid'):
        """Convert if needed and save paper IDs.

        Parameters
        ----------
        initial_ids : set(str)
            A set of paper IDs.
        id_type : str
            What type the given IDs are (e.g. pmid, doi, pii). All IDs except
            for PIIs will be converted into TextRef IDs before saving.
        """
        logger.info(f'Adding new paper IDs from {len(initial_ids)} {id_type}s')
        if id_type in {'pii', 'TRID'}:
            self.paper_ids.update(set(initial_ids))
        else:
            db = get_db('primary')
            for paper_id in initial_ids:
                trids = _get_trids(db, paper_id, id_type)
                # Some papers might be not in the database yet
                if trids:
                    self.paper_ids.add(trids[0])