예제 #1
0
파일: GeneBot.py 프로젝트: stuppie/bw_old
def run(login, gene_records, chrom_wdid):
    for record in tqdm(gene_records):
        if 'genomic_pos' not in record:
            # see: http://mygene.info/v3/gene/855814
            PBB_Core.WDItemEngine.log(
                "WARNING",
                format_msg(record['_id']['@value'], "no_position", '',
                           ENTREZ_PROP))
            continue
        if isinstance(record['genomic_pos']['@value'], list):
            # see: http://mygene.info/v3/gene/853483
            PBB_Core.WDItemEngine.log(
                "WARNING",
                format_msg(record['_id']['@value'], "multiple_positions", '',
                           ENTREZ_PROP))
            continue
        wd_item_construction(record, strain_info, chrom_wdid, login)
예제 #2
0
def run(login, records, add_pubmed):

    # get all entrez gene id -> wdid mappings, where found in taxon is this strain
    gene_wdid_mapping = WDHelper().id_mapper(
        "P351", (("P703", strain_info['organism_wdid']), ))

    # get all goID to wdid mappings
    go_wdid_mapping = WDHelper().id_mapper("P686")

    for record in tqdm(records, desc=strain_info['organism_name']):
        entrez_gene = str(record['entrezgene']['@value'])
        if entrez_gene not in gene_wdid_mapping:
            PBB_Core.WDItemEngine.log(
                "ERROR",
                format_msg(record['_id']['@value'], "gene_not_found", None,
                           ENTREZ_PROP))
            continue
        gene_qid = gene_wdid_mapping[entrez_gene]
        protein_item(record, strain_info, gene_qid, go_wdid_mapping, login,
                     add_pubmed)
예제 #3
0
def run_encodes(login, records):
    # get all entrez gene id -> wdid mappings, where found in taxon is this strain
    gene_wdid_mapping = PBB_Helpers.id_mapper(
        "P351", (("P703", strain_info['organism_wdid']), ))

    # get all ensembl protein id -> wdid mappings, where found in taxon is this strain
    protein_wdid_mapping = PBB_Helpers.id_mapper(
        "P705", (("P703", strain_info['organism_wdid']), ))

    for record in tqdm(records, desc=strain_info['organism_name']):
        entrez_gene = str(record['entrezgene']['@value'])
        if entrez_gene not in gene_wdid_mapping:
            PBB_Core.WDItemEngine.log(
                "ERROR",
                format_msg(record['_id']['@value'], "gene_not_found", None,
                           ENTREZ_PROP))
            continue
        gene_qid = gene_wdid_mapping[entrez_gene]
        protein_qid = protein_wdid_mapping[record['ensembl']['@value']
                                           ['protein']]
        gene_encodes_statement(gene_qid, protein_qid, 'ncbi_gene', entrez_gene,
                               record['ensembl']['@source'], login)
예제 #4
0
def protein_item(record, strain_info, gene_qid, go_wdid_mapping, login,
                 add_pubmed):
    """
    generate pbb_core item object
    """

    item_name = '{} {}'.format(record['name']['@value'],
                               record['ensembl']['@value']['protein'])
    item_description = '{} protein found in {}'.format(
        strain_info['organism_type'], strain_info['organism_name'])

    s = []

    ############
    # external IDs
    ############
    # will be used for reference statements
    external_ids = {
        'entrez_gene': str(record['entrezgene']['@value']),
        'ensembl_protein': record['ensembl']['@value']['protein'],
        'ensembl_gene': record['ensembl']['@value']['gene'],
        'refseq_protein': record['refseq']['@value']['protein'],
        'uniprot': record['uniprot']['@value']['Swiss-Prot']
    }

    # ensembl protein id
    ensembl_ref = make_ref_source(record['ensembl']['@source'],
                                  'ensembl_protein',
                                  external_ids['ensembl_protein'])
    s.append(
        PBB_Core.WDString(external_ids['ensembl_protein'],
                          'P705',
                          references=[ensembl_ref]))
    # refseq protein id
    refseq_ref = make_ref_source(record['refseq']['@source'], 'refseq_protein',
                                 external_ids['refseq_protein'])
    s.append(
        PBB_Core.WDString(external_ids['refseq_protein'],
                          'P637',
                          references=[refseq_ref]))
    # uniprot id
    uniprot_ref = make_ref_source(record['uniprot']['@source'], 'uniprot',
                                  external_ids['uniprot'])
    s.append(
        PBB_Core.WDString(external_ids['uniprot'],
                          'P352',
                          references=[uniprot_ref]))

    ############
    # GO terms
    # TODO: https://www.wikidata.org/wiki/Q3460832
    ############

    preprocess_go(record)
    print(record)
    go_source = record['go']['@source']
    go_id_prop = source_ref_id[go_source['_id']]
    reference = make_ref_source(go_source, go_id_prop,
                                external_ids[go_id_prop])
    for go_level, go_records in record['go']['@value'].items():
        level_wdid = go_props[go_level]
        for go_record in go_records:
            go_wdid = go_wdid_mapping[go_record['id']]
            evidence_wdid = go_evidence_codes[go_record['evidence']]
            evidence_statement = PBB_Core.WDItemID(value=evidence_wdid,
                                                   prop_nr='P459',
                                                   is_qualifier=True)
            this_reference = copy.deepcopy(reference)
            if add_pubmed:
                for pubmed in go_record['pubmed']:
                    pmid_wdid = PBB_Helpers.PubmedStub(pubmed).create(login)
                    this_reference.append(
                        PBB_Core.WDItemID(pmid_wdid, 'P248',
                                          is_reference=True))
            s.append(
                PBB_Core.WDItemID(go_wdid,
                                  level_wdid,
                                  references=[this_reference],
                                  qualifiers=[evidence_statement]))

    ############
    # statements with no referencable sources (make by hand, for now...)
    ############
    # subclass of protein
    s.append(PBB_Core.WDItemID('Q8054', 'P279', references=[ensembl_ref]))

    # found in taxon
    s.append(
        PBB_Core.WDItemID(strain_info['organism_wdid'],
                          'P703',
                          references=[ensembl_ref]))

    # encodes gene
    s.append(PBB_Core.WDItemID(gene_qid, 'P702', references=[ensembl_ref]))

    try:
        wd_item_protein = PBB_Core.WDItemEngine(
            item_name=item_name,
            domain='proteins',
            data=s,
            append_value=['P279'],
            fast_run=True,
            fast_run_base_filter={
                'P352': '',
                'P703': strain_info['organism_wdid']
            })
        wd_item_protein.set_label(item_name)
        wd_item_protein.set_description(item_description, lang='en')
        wd_item_protein.set_aliases(
            [record['symbol']['@value'], record['locus_tag']['@value']])
    except Exception as e:
        print(e)
        PBB_Core.WDItemEngine.log(
            "ERROR",
            format_msg(record['entrezgene']['@value'], str(e), None,
                       ENTREZ_PROP))
        return

    try_write(wd_item_protein, record['entrezgene']['@value'], 'P351', login)
예제 #5
0
def make_chroms(strain_info, retrieved, login):
    chrom_wdid = {}
    for chrom_num, genome_id in strain_info['chrom_genomeid_map'].items():

        item_name = '{} chromosome {}'.format(strain_info['organism_name'],
                                              chrom_num)
        item_description = '{} chromosome'.format(strain_info['organism_type'])
        print(item_name)
        print(genome_id)

        reference = make_ref(retrieved, genome_id)
        statements = []
        statements.append(
            PBB_Core.WDItemID(value='Q37748',
                              prop_nr='P279',
                              references=[reference
                                          ]))  # subclass of chromosome
        statements.append(
            PBB_Core.WDItemID(value=strain_info['organism_wdid'],
                              prop_nr='P703',
                              references=[reference]))  # found in taxon
        statements.append(
            PBB_Core.WDString(value=genome_id,
                              prop_nr='P2249',
                              references=[reference]))  # genome id

        wd_item = PBB_Core.WDItemEngine(item_name=item_name,
                                        domain='chromosome',
                                        data=statements,
                                        append_value=['P279'],
                                        fast_run=True,
                                        fast_run_base_filter={
                                            'P703':
                                            strain_info['organism_wdid'],
                                            'P2249': ''
                                        })

        if wd_item.require_write:
            print("require write")
            wd_item.set_label(item_name)
            wd_item.set_description(item_description, lang='en')
            try:
                msg = "CREATE" if wd_item.create_new_item else "UPDATE"
                wd_item.write(login=login)
                PBB_Core.WDItemEngine.log(
                    "INFO",
                    format_msg(genome_id,
                               msg,
                               wd_item.wd_item_id,
                               external_id_prop='P2249'))
            except Exception as e:
                print(e)
                PBB_Core.WDItemEngine.log(
                    "ERROR",
                    format_msg(genome_id,
                               str(e),
                               wd_item.wd_item_id,
                               external_id_prop='P2249'))
        else:
            chrom_wdid[chrom_num] = wd_item.wd_item_id
            PBB_Core.WDItemEngine.log(
                "INFO",
                format_msg(genome_id,
                           "SKIP",
                           wd_item.wd_item_id,
                           external_id_prop='P2249'))

    return chrom_wdid