Exemple #1
0
def yield_uniprot_gene_mappings(gene_ids):
    import biopsy.identifiers.biomart as biomart, csv
    query = biomart.new_query()
    dataset = biomart.add_dataset(query, 'mmusculus_gene_ensembl')
    biomart.add_filter(dataset, 'ensembl_gene_id', ",".join(gene_ids))
    biomart.add_attribute(dataset, 'ensembl_gene_id')
    biomart.add_attribute(dataset, 'unified_uniprot_accession')
    for row in csv.reader(biomart.execute_query(query), delimiter=','):
        if row[1]:
            yield biopsy.DbRef.parse_as(row[0], biopsy.db.ensembl), biopsy.DbRef.parse_as(row[1], biopsy.db.swissprot)
Exemple #2
0
def proteins_for_species(species):
    "Goes to biomart to map all genes in species to Swissprot proteins"
    from . import entrez
    result = cookbook.DictOfSets()

    # build biomart query for swissprot and execute
    query = biomart.new_query()
    dataset = biomart.add_dataset(query, species)
    biomart.add_attribute(dataset, 'ensembl_gene_id')
    biomart.add_attribute(dataset, 'ensembl_transcript_id')
    biomart.add_attribute(dataset, 'uniprot_swissprot_accession')
    for row in csv.reader(biomart.execute_query(query), delimiter=','):
        gene_ref = biopsy.transfac.DbRef.parse_as(row[0],
                                                  biopsy.transfac.db.ensembl)
        transcript_ref = biopsy.transfac.DbRef.parse_as(
            row[1], biopsy.transfac.db.ensembl)
        if row[2]:
            protein_ref = biopsy.transfac.DbRef.parse_as(
                row[2], biopsy.transfac.db.swissprot)
            result[gene_ref].add((transcript_ref, protein_ref))

    # build biomart query for entrez protein and execute
    query = biomart.new_query()
    dataset = biomart.add_dataset(query, species)
    biomart.add_attribute(dataset, 'ensembl_gene_id')
    biomart.add_attribute(dataset, 'ensembl_transcript_id')
    biomart.add_attribute(dataset, 'protein')
    for row in csv.reader(biomart.execute_query(query), delimiter=','):
        gene_ref = biopsy.transfac.DbRef.parse_as(row[0],
                                                  biopsy.transfac.db.ensembl)
        transcript_ref = biopsy.transfac.DbRef.parse_as(
            row[1], biopsy.transfac.db.ensembl)
        if row[2]:
            protein_acc = row[2]
            #print "'%s'" % protein_acc
            if protein_acc in entrez.mouse_proteins().acc2id:
                for protein_id in entrez.mouse_proteins().acc2id[protein_acc]:
                    ref = T.DbRef(T.db.entrez_protein, "", protein_id)
                    result[gene_ref].add((transcript_ref, ref))

    return result
Exemple #3
0
def proteins_for_species(species):
    "Goes to biomart to map all genes in species to Swissprot proteins"
    from . import entrez
    result = cookbook.DictOfSets()

    # build biomart query for swissprot and execute
    query = biomart.new_query()
    dataset = biomart.add_dataset(query, species)
    biomart.add_attribute(dataset, 'ensembl_gene_id')
    biomart.add_attribute(dataset, 'ensembl_transcript_id')
    biomart.add_attribute(dataset, 'uniprot_swissprot_accession')
    for row in csv.reader(biomart.execute_query(query), delimiter=','):
        gene_ref = biopsy.transfac.DbRef.parse_as(row[0], biopsy.transfac.db.ensembl)
        transcript_ref = biopsy.transfac.DbRef.parse_as(row[1], biopsy.transfac.db.ensembl)
        if row[2]:
            protein_ref = biopsy.transfac.DbRef.parse_as(row[2], biopsy.transfac.db.swissprot)
            result[gene_ref].add((transcript_ref, protein_ref))

    # build biomart query for entrez protein and execute
    query = biomart.new_query()
    dataset = biomart.add_dataset(query, species)
    biomart.add_attribute(dataset, 'ensembl_gene_id')
    biomart.add_attribute(dataset, 'ensembl_transcript_id')
    biomart.add_attribute(dataset, 'protein')
    for row in csv.reader(biomart.execute_query(query), delimiter=','):
        gene_ref = biopsy.transfac.DbRef.parse_as(row[0], biopsy.transfac.db.ensembl)
        transcript_ref = biopsy.transfac.DbRef.parse_as(row[1], biopsy.transfac.db.ensembl)
        if row[2]:
            protein_acc = row[2]
            #print "'%s'" % protein_acc
            if protein_acc in entrez.mouse_proteins().acc2id:
                for protein_id in entrez.mouse_proteins().acc2id[protein_acc]:
                    ref = T.DbRef(T.db.entrez_protein, "", protein_id)
                    result[gene_ref].add((transcript_ref, ref))

    return result
Exemple #4
0
def _mgi_ids_from_biomart():
    "Goes to biomart to map all mouse genes to MGI ids"
    from . import entrez
    result = cookbook.DictOfSets()

    # build biomart query for swissprot and execute
    query = biomart.new_query()
    dataset = biomart.add_dataset(query, 'mmusculus_gene_ensembl')
    biomart.add_attribute(dataset, 'ensembl_gene_id')
    biomart.add_attribute(dataset, 'external_gene_id')
    for row in csv.reader(biomart.execute_query(query), delimiter=','):
        gene_ref = biopsy.transfac.DbRef.parse_as(row[0], biopsy.transfac.db.ensembl)
        if row[1]:
            mgi_acc = row[1]
            if mgi_acc in mgi.acc2id():
                yield gene_ref, mgi.acc2id()[mgi_acc]
Exemple #5
0
def get_orthologs(species1, species2):
    "Returns a dict mapping ensembl genes of species 1 to genes of species 2."
    result = cookbook.DictOfSets()

    # build biomart query and execute
    query = biomart.new_query()
    dataset = biomart.add_dataset(query, species[species2].dataset())
    biomart.add_attribute(dataset, 'ensembl_gene_id')
    biomart.add_attribute(dataset, '%s_ensembl_gene' % species[species1].short_name)
    biomart.print_query(query, 'query.xml')
    for row in csv.reader(biomart.execute_query(query), delimiter=','):
        if row[1]:
            ref_1 = biopsy.transfac.DbRef.parse_as(row[0], biopsy.transfac.db.ensembl)
            ref_2 = biopsy.transfac.DbRef.parse_as(row[1], biopsy.transfac.db.ensembl)
            result[ref_2].add(ref_1)

    return result
Exemple #6
0
def _mgi_ids_from_biomart():
    "Goes to biomart to map all mouse genes to MGI ids"
    from . import entrez
    result = cookbook.DictOfSets()

    # build biomart query for swissprot and execute
    query = biomart.new_query()
    dataset = biomart.add_dataset(query, 'mmusculus_gene_ensembl')
    biomart.add_attribute(dataset, 'ensembl_gene_id')
    biomart.add_attribute(dataset, 'external_gene_id')
    for row in csv.reader(biomart.execute_query(query), delimiter=','):
        gene_ref = biopsy.transfac.DbRef.parse_as(row[0],
                                                  biopsy.transfac.db.ensembl)
        if row[1]:
            mgi_acc = row[1]
            if mgi_acc in mgi.acc2id():
                yield gene_ref, mgi.acc2id()[mgi_acc]
Exemple #7
0
def get_orthologs(species1, species2):
    "Returns a dict mapping ensembl genes of species 1 to genes of species 2."
    result = cookbook.DictOfSets()

    # build biomart query and execute
    query = biomart.new_query()
    dataset = biomart.add_dataset(query, species[species2].dataset())
    biomart.add_attribute(dataset, 'ensembl_gene_id')
    biomart.add_attribute(dataset,
                          '%s_ensembl_gene' % species[species1].short_name)
    biomart.print_query(query, 'query.xml')
    for row in csv.reader(biomart.execute_query(query), delimiter=','):
        if row[1]:
            ref_1 = biopsy.transfac.DbRef.parse_as(row[0],
                                                   biopsy.transfac.db.ensembl)
            ref_2 = biopsy.transfac.DbRef.parse_as(row[1],
                                                   biopsy.transfac.db.ensembl)
            result[ref_2].add(ref_1)

    return result