コード例 #1
0
ファイル: principal_schema.py プロジェクト: steppi/indra_db
 class RawUniqueLinks(Base, IndraDBTable):
     __tablename__ = 'raw_unique_links'
     _always_disp = ['raw_stmt_id', 'pa_stmt_mk_hash']
     _indices = [
         BtreeIndex('raw_unique_links_raw_stmt_id_idx', 'raw_stmt_id'),
         BtreeIndex('raw_unique_links_pa_stmt_mk_hash_idx',
                    'pa_stmt_mk_hash')
     ]
     id = Column(Integer, primary_key=True)
     raw_stmt_id = Column(Integer,
                          ForeignKey('raw_statements.id'),
                          nullable=False)
     pa_stmt_mk_hash = Column(BigInteger,
                              ForeignKey('pa_statements.mk_hash'),
                              nullable=False)
     __table_args = (UniqueConstraint('raw_stmt_id',
                                      'pa_stmt_mk_hash',
                                      name='stmt-link-uniqueness'), )
コード例 #2
0
ファイル: principal_schema.py プロジェクト: kolusask/indra_db
 class MtiRefAnnotationsTest(Base, IndraDBTable):
     __tablename__ = 'mti_ref_annotations_test'
     _always_disp = ['pmid_num', 'mesh_num', 'qual_num']
     _indices = [
         BtreeIndex('mti_ref_annotations_test_pmid_idx', 'pmid_num'),
         BtreeIndex('mti_ref_annotations_test_mesh_id_idx', 'mesh_num'),
         BtreeIndex('mti_ref_annotations_test_qual_id_idx', 'qual_num')
     ]
     id = Column(Integer, primary_key=True)
     pmid_num = Column(Integer, nullable=False)
     mesh_num = Column(Integer, nullable=False)
     qual_num = Column(Integer)
     major_topic = Column(Boolean, default=False)
     is_concept = Column(Boolean, default=False)
     __table_args__ = (UniqueConstraint('pmid_num',
                                        'mesh_num',
                                        'qual_num',
                                        'is_concept',
                                        name='mesh-uniqueness'), )
コード例 #3
0
ファイル: principal_schema.py プロジェクト: kolusask/indra_db
    class TextRef(Base, IndraDBTable):
        __tablename__ = 'text_ref'
        _ref_cols = ['pmid', 'pmcid', 'doi', 'pii', 'url', 'manuscript_id']
        _always_disp = ['id', 'pmid', 'pmcid']
        _indices = [
            StringIndex('text_ref_pmid_idx', 'pmid'),
            StringIndex('text_ref_pmcid_idx', 'pmcid'),
            BtreeIndex('text_ref_pmid_num_idx', 'pmid_num'),
            BtreeIndex('text_ref_pmcid_num_idx', 'pmcid_num'),
            BtreeIndex('text_ref_doi_ns_idx', 'doi_ns'),
            BtreeIndex('text_ref_doi_id_idx', 'doi_id'),
            StringIndex('text_ref_doi_idx', 'doi')
        ]

        id = Column(Integer, primary_key=True)
        pmid = Column(String(20))
        pmid_num = Column(Integer)
        pmcid = Column(String(20))
        pmcid_num = Column(Integer)
        pmcid_version = Column(Integer)
        doi = Column(String(100))
        doi_ns = Column(Integer)
        doi_id = Column(String)
        pii = Column(String(250))
        url = Column(String, unique=True)
        manuscript_id = Column(String(100), unique=True)
        create_date = Column(DateTime, default=func.now())
        last_updated = Column(DateTime, onupdate=func.now())

        __table_args__ = (UniqueConstraint('pmid', 'doi', name='pmid-doi'),
                          UniqueConstraint('pmid', 'pmcid', name='pmid-pmcid'),
                          UniqueConstraint('pmcid', 'doi', name='pmcid-doi'))

        def __repr__(self):
            terms = [f'id={self.id}']
            for col in ['pmid', 'pmcid', 'doi', 'pii', 'url', 'manuscript_id']:
                if getattr(self, col) is not None:
                    terms.append(f'{col}={getattr(self, col)}')
                if len(terms) > 2:
                    break
            return f'{self.__class__.__name__}({", ".join(terms)})'

        @classmethod
        def new(cls,
                pmid=None,
                pmcid=None,
                doi=None,
                pii=None,
                url=None,
                manuscript_id=None):
            pmid, pmid_num = cls.process_pmid(pmid)
            pmcid, pmcid_num, pmcid_version = cls.process_pmcid(pmcid)
            doi, doi_ns, doi_id = cls.process_doi(doi)
            return cls(pmid=pmid,
                       pmid_num=pmid_num,
                       pmcid=pmcid,
                       pmcid_num=pmcid_num,
                       pmcid_version=pmcid_version,
                       doi=doi,
                       doi_ns=doi_ns,
                       doi_id=doi_id,
                       pii=pii,
                       url=url,
                       manuscript_id=manuscript_id)

        def update(self, **updates):
            for id_type, id_val in updates.items():
                if id_type == 'pmid':
                    self.pmid, self.pmid_num = self.process_pmid(id_val)
                elif id_type == 'pmcid':
                    self.pmcid, self.pmcid_num, self.pmcid_version = \
                        self.process_pmcid(id_val)
                elif id_type == 'doi':
                    self.doi, self.doi_ns, self.doi_id = \
                        self.process_doi(id_val)
                else:
                    setattr(self, id_type, id_val)
            return

        @staticmethod
        def process_pmid(pmid):
            if not pmid:
                return None, None

            if not pmid.isdigit():
                return pmid, None

            return pmid, int(pmid)

        @staticmethod
        def process_pmcid(pmcid):
            if not pmcid:
                return None, None, None

            if not pmcid.startswith('PMC'):
                return pmcid, None, None

            if '.' in pmcid:
                pmcid, version_number_str = pmcid.split('.')
                if version_number_str.isdigit():
                    version_number = int(version_number_str)
                else:
                    version_number = None
            else:
                version_number = None

            if not pmcid[3:].isdigit():
                return pmcid, None, version_number

            return pmcid, int(pmcid[3:]), version_number

        @staticmethod
        def process_doi(doi):
            # Check for invalid DOIs
            if not doi:
                return None, None, None

            # Regularize case.
            doi = doi.upper()

            if not doi.startswith('10.'):
                return doi, None, None

            # Split up the parts of the DOI
            parts = doi[3:].split('/')
            if len(parts) < 2:
                return doi, None, None

            # Check the namespace number, make it an integer.
            namespace_str = parts[0]
            if not namespace_str.isdigit():
                return doi, None, None
            namespace = int(namespace_str)

            # Join the res of the parts together.
            group_id = '/'.join(parts[1:])

            return doi, namespace, group_id

        @classmethod
        def pmid_in(cls, pmid_list, filter_ids=False):
            """Get sqlalchemy clauses for a list of pmids."""
            pmid_num_set = set()
            for pmid in pmid_list:
                _, pmid_num = cls.process_pmid(pmid)
                if pmid_num is None:
                    if filter_ids:
                        logger.warning('"%s" is not a valid pmid. Skipping.' %
                                       pmid)
                        continue
                    else:
                        ValueError('"%s" is not a valid pmid.' % pmid)
                pmid_num_set.add(pmid_num)
            return cls.pmid_num.in_(pmid_num_set)

        @classmethod
        def pmcid_in(cls, pmcid_list, filter_ids=False):
            """Get the sqlalchemy clauses for a list of pmcids."""
            pmcid_num_set = set()
            for pmcid in pmcid_list:
                _, pmcid_num, _ = cls.process_pmcid(pmcid)
                if not pmcid_num:
                    if filter_ids:
                        logger.warning('"%s" does not look like a valid '
                                       'pmcid. Skipping.' % pmcid)
                        continue
                    else:
                        raise ValueError('"%s" is not a valid pmcid.' % pmcid)
                else:
                    pmcid_num_set.add(pmcid_num)

            return cls.pmcid_num.in_(pmcid_num_set)

        @classmethod
        def doi_in(cls, doi_list, filter_ids=False):
            """Get clause for looking up a list of dois."""
            doi_tuple_set = set()
            for doi in doi_list:
                doi, doi_ns, doi_id = cls.process_doi(doi)
                if not doi_ns:
                    if filter_ids:
                        logger.warning('"%s" does not look like a normal doi. '
                                       'Skipping.' % doi)
                        continue
                    else:
                        raise ValueError('"%s" is not a valid doi.' % doi)
                else:
                    doi_tuple_set.add((doi_ns, doi_id))

            return tuple_(cls.doi_ns, cls.doi_id).in_(doi_tuple_set)

        def get_ref_dict(self):
            ref_dict = {}
            for ref in self._ref_cols:
                val = getattr(self, ref, None)
                if val:
                    ref_dict[ref.upper()] = val
            ref_dict['TRID'] = self.id
            return ref_dict