コード例 #1
0
    def _get_curie_and_type_from_id(variant_id):
        """
        Given a variant id, our best guess at its curie and type (snp, haplotype, etc)
        'None' will be used for both curie and type  for IDs that we can't process

        # 2019-May three snp-id have  ' e' or ' a'  appended. note space.
        # examples: 'rs2440154 e-A'  and 'rs2440154 e'
        # including the suffix in the url is a web noop but breaks rdflib

        :param variant_id:
        :return:
        """
        curie = None
        variant_type = None

        # remove space before hyphens
        variant_id = re.sub(r' -', '-', variant_id).strip()
        if re.search(r' x ', variant_id) or re.search(r',', variant_id):
            # TODO deal with rs1234 x rs234... (haplotypes?)
            LOG.warning("Cannot parse variant groups of this format: %s",
                        variant_id)
        elif re.search(r';', variant_id):
            curie = ':haplotype_' + Source.hash_id(
                variant_id)  # deliberate 404
            variant_type = "haplotype"
        elif variant_id[:2] == 'rs':
            # remove whitespace from errant id, rs6194 5053-?
            curie = 'dbSNP:' + variant_id.split('-')[0].replace(' ', '')
            # curie = re.sub(r'-.*$', '', curie).strip()
            variant_type = "snp"
            # remove the alteration
        elif variant_id[:3] == 'kgp':
            # http://www.1000genomes.org/faq/what-are-kgp-identifiers
            curie = 'GWAS:' + variant_id.split('-')[0]
            variant_type = "snp"
        elif variant_id[:3] == 'chr':
            # like: chr10:106180121-G
            variant_id = re.sub(r'-?', '-N', variant_id)
            variant_id = re.sub(r' ', '', variant_id)
            # going to hate myself but ...
            # moving this from a broken base node to yet another blank node
            # It had produced this monstrocity with the embedded quote
            # :gwas--Nc-Nh-Nr-N1-N1-N--N1-N0-N2-N7-N5-N1-N1-N0-N2-N"-N?-N
            curie = Source.make_id('gwas-' + re.sub(r':', '-', variant_id),
                                   '_')
            variant_type = "snp"
        elif variant_id.strip() == '':
            pass
        else:
            LOG.warning("There's a snp id i can't manage: %s", variant_id)

        return curie, variant_type
コード例 #2
0
ファイル: WormBase.py プロジェクト: tegar9000/dipper-1
 def make_reagent_targeted_gene_id(gene_id, reagent_id):
     return Source.make_id('-'.join((gene_id, reagent_id)), '_')