class RawUniqueLinks(Base, IndraDBTable): __tablename__ = 'raw_unique_links' _always_disp = ['raw_stmt_id', 'pa_stmt_mk_hash'] _indices = [ BtreeIndex('raw_unique_links_raw_stmt_id_idx', 'raw_stmt_id'), BtreeIndex('raw_unique_links_pa_stmt_mk_hash_idx', 'pa_stmt_mk_hash') ] id = Column(Integer, primary_key=True) raw_stmt_id = Column(Integer, ForeignKey('raw_statements.id'), nullable=False) pa_stmt_mk_hash = Column(BigInteger, ForeignKey('pa_statements.mk_hash'), nullable=False) __table_args = (UniqueConstraint('raw_stmt_id', 'pa_stmt_mk_hash', name='stmt-link-uniqueness'), )
class MtiRefAnnotationsTest(Base, IndraDBTable): __tablename__ = 'mti_ref_annotations_test' _always_disp = ['pmid_num', 'mesh_num', 'qual_num'] _indices = [ BtreeIndex('mti_ref_annotations_test_pmid_idx', 'pmid_num'), BtreeIndex('mti_ref_annotations_test_mesh_id_idx', 'mesh_num'), BtreeIndex('mti_ref_annotations_test_qual_id_idx', 'qual_num') ] id = Column(Integer, primary_key=True) pmid_num = Column(Integer, nullable=False) mesh_num = Column(Integer, nullable=False) qual_num = Column(Integer) major_topic = Column(Boolean, default=False) is_concept = Column(Boolean, default=False) __table_args__ = (UniqueConstraint('pmid_num', 'mesh_num', 'qual_num', 'is_concept', name='mesh-uniqueness'), )
class TextRef(Base, IndraDBTable): __tablename__ = 'text_ref' _ref_cols = ['pmid', 'pmcid', 'doi', 'pii', 'url', 'manuscript_id'] _always_disp = ['id', 'pmid', 'pmcid'] _indices = [ StringIndex('text_ref_pmid_idx', 'pmid'), StringIndex('text_ref_pmcid_idx', 'pmcid'), BtreeIndex('text_ref_pmid_num_idx', 'pmid_num'), BtreeIndex('text_ref_pmcid_num_idx', 'pmcid_num'), BtreeIndex('text_ref_doi_ns_idx', 'doi_ns'), BtreeIndex('text_ref_doi_id_idx', 'doi_id'), StringIndex('text_ref_doi_idx', 'doi') ] id = Column(Integer, primary_key=True) pmid = Column(String(20)) pmid_num = Column(Integer) pmcid = Column(String(20)) pmcid_num = Column(Integer) pmcid_version = Column(Integer) doi = Column(String(100)) doi_ns = Column(Integer) doi_id = Column(String) pii = Column(String(250)) url = Column(String, unique=True) manuscript_id = Column(String(100), unique=True) create_date = Column(DateTime, default=func.now()) last_updated = Column(DateTime, onupdate=func.now()) __table_args__ = (UniqueConstraint('pmid', 'doi', name='pmid-doi'), UniqueConstraint('pmid', 'pmcid', name='pmid-pmcid'), UniqueConstraint('pmcid', 'doi', name='pmcid-doi')) def __repr__(self): terms = [f'id={self.id}'] for col in ['pmid', 'pmcid', 'doi', 'pii', 'url', 'manuscript_id']: if getattr(self, col) is not None: terms.append(f'{col}={getattr(self, col)}') if len(terms) > 2: break return f'{self.__class__.__name__}({", ".join(terms)})' @classmethod def new(cls, pmid=None, pmcid=None, doi=None, pii=None, url=None, manuscript_id=None): pmid, pmid_num = cls.process_pmid(pmid) pmcid, pmcid_num, pmcid_version = cls.process_pmcid(pmcid) doi, doi_ns, doi_id = cls.process_doi(doi) return cls(pmid=pmid, pmid_num=pmid_num, pmcid=pmcid, pmcid_num=pmcid_num, pmcid_version=pmcid_version, doi=doi, doi_ns=doi_ns, doi_id=doi_id, pii=pii, url=url, manuscript_id=manuscript_id) def update(self, **updates): for id_type, id_val in updates.items(): if id_type == 'pmid': self.pmid, self.pmid_num = self.process_pmid(id_val) elif id_type == 'pmcid': self.pmcid, self.pmcid_num, self.pmcid_version = \ self.process_pmcid(id_val) elif id_type == 'doi': self.doi, self.doi_ns, self.doi_id = \ self.process_doi(id_val) else: setattr(self, id_type, id_val) return @staticmethod def process_pmid(pmid): if not pmid: return None, None if not pmid.isdigit(): return pmid, None return pmid, int(pmid) @staticmethod def process_pmcid(pmcid): if not pmcid: return None, None, None if not pmcid.startswith('PMC'): return pmcid, None, None if '.' in pmcid: pmcid, version_number_str = pmcid.split('.') if version_number_str.isdigit(): version_number = int(version_number_str) else: version_number = None else: version_number = None if not pmcid[3:].isdigit(): return pmcid, None, version_number return pmcid, int(pmcid[3:]), version_number @staticmethod def process_doi(doi): # Check for invalid DOIs if not doi: return None, None, None # Regularize case. doi = doi.upper() if not doi.startswith('10.'): return doi, None, None # Split up the parts of the DOI parts = doi[3:].split('/') if len(parts) < 2: return doi, None, None # Check the namespace number, make it an integer. namespace_str = parts[0] if not namespace_str.isdigit(): return doi, None, None namespace = int(namespace_str) # Join the res of the parts together. group_id = '/'.join(parts[1:]) return doi, namespace, group_id @classmethod def pmid_in(cls, pmid_list, filter_ids=False): """Get sqlalchemy clauses for a list of pmids.""" pmid_num_set = set() for pmid in pmid_list: _, pmid_num = cls.process_pmid(pmid) if pmid_num is None: if filter_ids: logger.warning('"%s" is not a valid pmid. Skipping.' % pmid) continue else: ValueError('"%s" is not a valid pmid.' % pmid) pmid_num_set.add(pmid_num) return cls.pmid_num.in_(pmid_num_set) @classmethod def pmcid_in(cls, pmcid_list, filter_ids=False): """Get the sqlalchemy clauses for a list of pmcids.""" pmcid_num_set = set() for pmcid in pmcid_list: _, pmcid_num, _ = cls.process_pmcid(pmcid) if not pmcid_num: if filter_ids: logger.warning('"%s" does not look like a valid ' 'pmcid. Skipping.' % pmcid) continue else: raise ValueError('"%s" is not a valid pmcid.' % pmcid) else: pmcid_num_set.add(pmcid_num) return cls.pmcid_num.in_(pmcid_num_set) @classmethod def doi_in(cls, doi_list, filter_ids=False): """Get clause for looking up a list of dois.""" doi_tuple_set = set() for doi in doi_list: doi, doi_ns, doi_id = cls.process_doi(doi) if not doi_ns: if filter_ids: logger.warning('"%s" does not look like a normal doi. ' 'Skipping.' % doi) continue else: raise ValueError('"%s" is not a valid doi.' % doi) else: doi_tuple_set.add((doi_ns, doi_id)) return tuple_(cls.doi_ns, cls.doi_id).in_(doi_tuple_set) def get_ref_dict(self): ref_dict = {} for ref in self._ref_cols: val = getattr(self, ref, None) if val: ref_dict[ref.upper()] = val ref_dict['TRID'] = self.id return ref_dict