def get_genes_from_Ensembl_pairwise(args, sourcedata, targetdata): genelist = [] inputtype = args.inputType firstspecies = args.firstspecies secondspecies = args.secondspecies if (firstspecies == None): print "Argument -s1 <firstspeciesname> is required" elif (secondspecies == None): print "Argument -s2 <secondspeciesname> is required" else: account = HostAccount('ensembldb.ensembl.org', 'anonymous', '') firstgenome = Genome(firstspecies, ENSEMBL_VERSION, account) secondgenome = Genome(secondspecies, ENSEMBL_VERSION, account) if (inputtype == "id"): firstgeneid = args.firstgeneid if (firstgeneid == None): print "Argument -gid1 <firstgeneid> is required" secondgeneid = args.secondgeneid if (secondgeneid == None): print "Argument -gid2 <secondgeneid> is required" if (firstgeneid != None and secondgeneid != None): print "Retreving Genes", firstgeneid, secondgeneid firstgene = firstgenome.getGeneByStableId(StableId=firstgeneid) secondgene = secondgenome.getGeneByStableId( StableId=secondgeneid) if (inputtype == "name"): gene = args.gene if (gene == None): print "Argument -g <genename> is required" else: print "Retreving Genes", name, "from species", firstspecies, secondspecies firstgene = get_gene_from_Ensembl_by_name(gene, firstgenome) secondgene = get_gene_from_Ensembl_by_name(gene, secondgenome) sourcedata += get_cds_data(firstgene) + get_cds_data(secondgene) targetdata += get_gene_data(firstgene) + get_gene_data(secondgene) return sourcedata, targetdata
def ensembl_to_hgnc(gene_list): account = HostAccount('blackrussian', 'bioinfo', 'A29bcd1234#', port=3306) human = Genome('human', Release=73, account=account) hgnc_list = [] for gene in gene_list: hgnc_list.append(human.getGeneByStableId(StableId=gene).Symbol) hgnc_list = set(hgnc_list) return hgnc_list
def ensembl_to_hgnc(gene_list): account = HostAccount('blackrussian', 'bioinfo', 'A29bcd1234#', port=3306) human = Genome('human', Release=73, account=account) hgnc_list = [] for gene in gene_list: hgnc_list.append(human.getGeneByStableId(StableId=gene).Symbol) hgnc_list = set(hgnc_list) return hgnc_list
def get_genes_from_Ensembl_multiple(args, sourcedata, targetdata): inputtype = args.inputType if (inputtype == "id"): geneidlistfile = args.geneIdListFile if (geneidlistfile == None): print "Argument -gidlf <geneidlistfilename> is required" else: for line in open(geneidlistfile, "r").readlines(): parse = line.split("\n")[0].split(" ") if (len(parse) > 1): species = parse[0] geneid = parse[1] print "Retreving Gene", geneid account = HostAccount('ensembldb.ensembl.org', 'anonymous', '') genome = Genome(species, ENSEMBL_VERSION, account) gene = genome.getGeneByStableId(StableId=geneid) sourcedata += get_cds_data(gene) targetdata += get_gene_data(gene) if (inputtype == "name"): gene = args.gene if (gene == None): print "Argument -g <genename> is required" specieslistfile = args.specieslistfile if (specieslistfile == None): print "Argument -slf <specieslistfilename> is required" if (gene != None and specieslistfile != None): for line in open(specieslistfile, "r").readlines(): parse = line.split("\n")[0].split(" ") if (len(parse) > 0): species = parse[0] print "Retreving Gene", gene, "from species", species account = HostAccount('ensembldb.ensembl.org', 'anonymous', '') genome = Genome(species, ENSEMBL_VERSION, account) gene = get_gene_from_Ensembl_by_name(gene, genome) sourcedata += get_cds_data(gene) targetdata += get_gene_data(gene) return sourcedata, targetdata
def main(): import os script_dir = os.path.dirname(os.path.abspath(__file__)) """ Neccesary to log into the ensembl database """ import os from cogent.db.ensembl import HostAccount if 'ENSEMBL_ACCOUNT' in os.environ: host, username, password = os.environ['ENSEMBL_ACCOUNT'].split() account = HostAccount(host, username, password) else: account = None """ gathers the transcript id and protein sequence from gene """ sp = "zebrafish" gn = "ENSDARG00000027279" from cogent.db.ensembl import Genome specie = Genome(Species=sp, Release="81", account=None) gene = specie.getGeneByStableId(StableId=gn) for tr in gene.Transcripts: print(tr.StableId) for ex in tr.Exons: print(ex.Symbol)
import os import sqlalchemy as sql from cogent.db.ensembl import HostAccount, Genome #account = HostAccount(*os.environ['ENSEMBL_ACCOUNT'].split()) if 'ENSEMBL_ACCOUNT' in os.environ: host, username, password = os.environ['ENSEMBL_ACCOUNT'].split() account = HostAccount(host, username, password) else: account = None human = Genome('human', Release=69, account=account) # BRCA1 gene = human.getGeneByStableId(StableId="ENSG00000167131") # get the db tables we need external_db = human.CoreDb.getTable("external_db") object_xref = human.CoreDb.getTable("object_xref") xref = human.CoreDb.getTable("xref") # get the external db ID for refseq mrna refseq_mrna_id = sql.select([external_db.c.external_db_id], external_db.c.db_name.like('RefSeq_mRNA')).execute().fetchone() # query for a specific transcript ID print "Querying for mRNA REFSEQ entries for one transcript" query = sql.select([object_xref, xref], sql.and_(xref.c.xref_id==object_xref.c.xref_id, object_xref.c.ensembl_id == 1345831,
human = Genome(Species='human', Release=Release, account=account) print human ''' A Note on Coordinate Systems The positions employed on Ensembls web-site, and in their MySQL database differ from those used internally by cogent.db.ensembl.In all cases where you are querying cogent.db.ensembl objects directly inputting nucleotide positions you can indicate you are using Ensembl coordinates by setting ensembl_coord=True. If you are explicitly passing in a cogent.db.ensembl region, that argument has no effect. ''' ## Selecting Gene #Via StableID brca1 = human.getGeneByStableId(StableId='ENSG00000012048') print brca1.Description #Or gene symbol genes = human.getGenesMatching(Symbol='brca2') for gene in genes: if gene.Symbol.lower() == 'brca2': break brca2 = gene # so we keep track of this reference for later on print "Symbol\t", brca2.Symbol print "Descr.\t", brca2.Description print "gene\t", brca2 print "loc.\t", brca2.Location print "length\t" ,len(brca2)
Release=80 from cogent.db.ensembl import Species, Genome human = Genome(Species='human', Release=Release, account=None) gene = human.getGeneByStableId(StableId='ENSG00000205274') print gene.Symbol