def get_reader_output(db, ref_id, ref_type='tcid', reader=None, reader_version=None): """Return reader output for a given text content. Parameters ---------- db : :py:class:`DatabaseManager` Reference to the DB to query ref_id : int or str The text reference ID whose reader output should be returned ref_type : Optional[str] The type of ID to look for, options include 'tcid' for the database's internal unique text content ID, or 'pmid', 'pmcid', 'doi, 'pii', 'manuscript_id' Default: 'tcid' reader : Optional[str] The name of the reader whose output is of interest reader_version : Optional[str] The specific version of the reader Returns ------- reading_results : dict{dict{list[str]}} A dict of reader outputs that match the query criteria, indexed first by text content id, then by reader. """ if ref_type == 'tcid': clauses = [db.Reading.text_content_id == ref_id] else: trids = _get_trids(db, ref_id, ref_type) if not trids: return [] logger.debug("Found %d text ref ids." % len(trids)) clauses = [ db.TextContent.text_ref_id.in_(trids), db.Reading.text_content_id == db.TextContent.id ] if reader: clauses.append(db.Reading.reader == reader.upper()) if reader_version: clauses.append(db.Reading.reader_version == reader_version) res = db.select_all( [db.Reading.text_content_id, db.Reading.reader, db.Reading.bytes], *clauses) reading_dict = defaultdict(lambda: defaultdict(lambda: [])) for tcid, reader, result in res: unpacked_result = None if len(result) == 0: logger.warning("Got reading result with zero content.") else: unpacked_result = unpack(result) reading_dict[tcid][reader].append(unpacked_result) return reading_dict
def add_paper_ids(self, initial_ids, id_type='pmid'): """Convert if needed and save paper IDs. Parameters ---------- initial_ids : set(str) A set of paper IDs. id_type : str What type the given IDs are (e.g. pmid, doi, pii). All IDs except for PIIs will be converted into TextRef IDs before saving. """ logger.info(f'Adding new paper IDs from {len(initial_ids)} {id_type}s') if id_type in {'pii', 'TRID'}: self.paper_ids.update(set(initial_ids)) else: db = get_db('primary') for paper_id in initial_ids: trids = _get_trids(db, paper_id, id_type) # Some papers might be not in the database yet if trids: self.paper_ids.add(trids[0])