Beispiel #1
0
def get_genes_from_Ensembl_pairwise(args, sourcedata, targetdata):
    genelist = []

    inputtype = args.inputType

    firstspecies = args.firstspecies
    secondspecies = args.secondspecies
    if (firstspecies == None):
        print "Argument -s1 <firstspeciesname> is required"
    elif (secondspecies == None):
        print "Argument -s2 <secondspeciesname> is required"
    else:
        account = HostAccount('ensembldb.ensembl.org', 'anonymous', '')
        firstgenome = Genome(firstspecies, ENSEMBL_VERSION, account)
        secondgenome = Genome(secondspecies, ENSEMBL_VERSION, account)

        if (inputtype == "id"):
            firstgeneid = args.firstgeneid
            if (firstgeneid == None):
                print "Argument -gid1 <firstgeneid> is required"
            secondgeneid = args.secondgeneid
            if (secondgeneid == None):
                print "Argument -gid2 <secondgeneid> is required"

            if (firstgeneid != None and secondgeneid != None):
                print "Retreving Genes", firstgeneid, secondgeneid

                firstgene = firstgenome.getGeneByStableId(StableId=firstgeneid)
                secondgene = secondgenome.getGeneByStableId(
                    StableId=secondgeneid)

        if (inputtype == "name"):
            gene = args.gene
            if (gene == None):
                print "Argument -g <genename> is required"
            else:
                print "Retreving Genes", name, "from species", firstspecies, secondspecies

                firstgene = get_gene_from_Ensembl_by_name(gene, firstgenome)
                secondgene = get_gene_from_Ensembl_by_name(gene, secondgenome)

        sourcedata += get_cds_data(firstgene) + get_cds_data(secondgene)
        targetdata += get_gene_data(firstgene) + get_gene_data(secondgene)

    return sourcedata, targetdata
def ensembl_to_hgnc(gene_list):
    account = HostAccount('blackrussian', 'bioinfo', 'A29bcd1234#', port=3306)
    human = Genome('human', Release=73, account=account)

    hgnc_list = []

    for gene in gene_list:
        hgnc_list.append(human.getGeneByStableId(StableId=gene).Symbol)

    hgnc_list = set(hgnc_list)

    return hgnc_list
Beispiel #3
0
def ensembl_to_hgnc(gene_list):
    account = HostAccount('blackrussian', 'bioinfo', 'A29bcd1234#', port=3306)
    human = Genome('human', Release=73, account=account)

    hgnc_list = []

    for gene in gene_list:
        hgnc_list.append(human.getGeneByStableId(StableId=gene).Symbol)

    hgnc_list = set(hgnc_list)

    return hgnc_list
Beispiel #4
0
def get_genes_from_Ensembl_multiple(args, sourcedata, targetdata):

    inputtype = args.inputType

    if (inputtype == "id"):
        geneidlistfile = args.geneIdListFile
        if (geneidlistfile == None):
            print "Argument -gidlf <geneidlistfilename> is required"
        else:
            for line in open(geneidlistfile, "r").readlines():
                parse = line.split("\n")[0].split(" ")
                if (len(parse) > 1):
                    species = parse[0]
                    geneid = parse[1]
                    print "Retreving Gene", geneid
                    account = HostAccount('ensembldb.ensembl.org', 'anonymous',
                                          '')
                    genome = Genome(species, ENSEMBL_VERSION, account)
                    gene = genome.getGeneByStableId(StableId=geneid)
                    sourcedata += get_cds_data(gene)
                    targetdata += get_gene_data(gene)

    if (inputtype == "name"):
        gene = args.gene
        if (gene == None):
            print "Argument -g <genename> is required"
        specieslistfile = args.specieslistfile
        if (specieslistfile == None):
            print "Argument -slf <specieslistfilename> is required"
        if (gene != None and specieslistfile != None):
            for line in open(specieslistfile, "r").readlines():
                parse = line.split("\n")[0].split(" ")
                if (len(parse) > 0):
                    species = parse[0]
                    print "Retreving Gene", gene, "from species", species
                    account = HostAccount('ensembldb.ensembl.org', 'anonymous',
                                          '')
                    genome = Genome(species, ENSEMBL_VERSION, account)
                    gene = get_gene_from_Ensembl_by_name(gene, genome)
                    sourcedata += get_cds_data(gene)
                    targetdata += get_gene_data(gene)
    return sourcedata, targetdata
def main():
    import os
    script_dir = os.path.dirname(os.path.abspath(__file__))
    """ Neccesary to log into the ensembl database """
    import os
    from cogent.db.ensembl import HostAccount
    if 'ENSEMBL_ACCOUNT' in os.environ:
        host, username, password = os.environ['ENSEMBL_ACCOUNT'].split()
        account = HostAccount(host, username, password)
    else:
        account = None
    """ gathers the transcript id and protein sequence from gene """
    sp = "zebrafish"
    gn = "ENSDARG00000027279"
    from cogent.db.ensembl import Genome
    specie = Genome(Species=sp, Release="81", account=None)
    gene = specie.getGeneByStableId(StableId=gn)
    for tr in gene.Transcripts:
        print(tr.StableId)
        for ex in tr.Exons:
            print(ex.Symbol)
Beispiel #6
0
import os
import sqlalchemy as sql
from cogent.db.ensembl import HostAccount, Genome

#account = HostAccount(*os.environ['ENSEMBL_ACCOUNT'].split())
if 'ENSEMBL_ACCOUNT' in os.environ:
    host, username, password = os.environ['ENSEMBL_ACCOUNT'].split()
    account = HostAccount(host, username, password)
else:
    account = None

human = Genome('human', Release=69, account=account)

# BRCA1
gene = human.getGeneByStableId(StableId="ENSG00000167131")

# get the db tables we need
external_db = human.CoreDb.getTable("external_db")
object_xref = human.CoreDb.getTable("object_xref")
xref = human.CoreDb.getTable("xref")

# get the external db ID for refseq mrna
refseq_mrna_id = sql.select([external_db.c.external_db_id],
                            external_db.c.db_name.like('RefSeq_mRNA')).execute().fetchone()


# query for a specific transcript ID
print "Querying for mRNA REFSEQ entries for one transcript"
query = sql.select([object_xref, xref],
        sql.and_(xref.c.xref_id==object_xref.c.xref_id,
        object_xref.c.ensembl_id == 1345831,
Beispiel #7
0
human = Genome(Species='human', Release=Release, account=account)
print human

'''
A Note on Coordinate Systems

The positions employed on Ensembls web-site, and in their MySQL database differ 
from those used internally by cogent.db.ensembl.In all cases where you are querying 
cogent.db.ensembl objects directly inputting nucleotide positions you can indicate 
you are using Ensembl coordinates by setting ensembl_coord=True.
If you are explicitly passing in a cogent.db.ensembl region, that argument has no effect.
'''

## Selecting Gene
#Via StableID
brca1 = human.getGeneByStableId(StableId='ENSG00000012048')
print brca1.Description

#Or gene symbol
genes = human.getGenesMatching(Symbol='brca2')
for gene in genes:
     if gene.Symbol.lower() == 'brca2':
         break

brca2 = gene # so we keep track of this reference for later on
print "Symbol\t", brca2.Symbol
print "Descr.\t", brca2.Description
print "gene\t", brca2
print "loc.\t", brca2.Location
print "length\t" ,len(brca2)
Beispiel #8
0
Release=80
from cogent.db.ensembl import Species, Genome
human = Genome(Species='human', Release=Release, account=None)
gene = human.getGeneByStableId(StableId='ENSG00000205274')
print gene.Symbol