def organism_version(cls, name): oname = cls.organism_name_search(name) #FIXME, dirty hack to ensure file id downloaded orngServerFiles.localpath_download("NCBI_geneinfo", "gene_info.%s.db" % oname) return orngServerFiles.info("NCBI_geneinfo", "gene_info.%s.db" % oname)["datetime"]
def __init__(self, organism, genematcher=None): """ An dictionary like object for accessing NCBI gene info Arguments:: - *organism* Organism id Example:: >>> info = NCBIGeneInfo("H**o sapiens") """ self.taxid = self.organism_name_search(organism) fname = orngServerFiles.localpath_download("NCBI_geneinfo", "gene_info.%s.db" % self.taxid) file = open(fname, "rb") self.update(dict([(line.split("\t", 3)[1], line) for line in file.read().splitlines() if line.strip() and not line.startswith("#")])) # NOTE orig init time for gene matcher: 2.5s, new 4s: investigate the slowdown # NOTE matches are not the same because aliases are build a bit # differently (main name versus old aliases conflict!) self.matcher = genematcher if self.matcher == None: if self.taxid == '352472': self.matcher = matcher([GMNCBI(self.taxid), GMDicty(), [GMNCBI(self.taxid), GMDicty()]]) else: self.matcher = matcher([GMNCBI(self.taxid)]) #if this is done with a gene matcher, pool target names self.matcher.set_targets(self.keys())
def history(self): if getattr(self, "_history", None) is None: fname = orngServerFiles.localpath_download("NCBI_geneinfo", "gene_history.%s.db" % self.taxid) try: self._history = dict([(line.split("\t")[2], GeneHistory(line)) for line in open(fname, "rb").read().splitlines()]) except Exception, ex: print >> sys.srderr, "Loading NCBI gene history failed.", ex self._history = {}
def load(self): path = orngServerFiles.localpath_download(self.DOMAIN, self.FILENAME) lines = open(path, "rb").read().splitlines()[:-1] self._homologs = {} self._homologs = dict([((h.taxonomy_id, h.gene_symbol), h) for h in [_homolog(line) for line in lines]]) self._homologs_by_group = reduce( lambda dict, h: dict[h.group_id].append(h) or dict, self._homologs.values(), defaultdict(list))
def _db(self, taxid=None): """ Return an open sqlite3.Connection object. """ taxid = taxid or self.taxid filename = orngServerFiles.localpath_download("PPI", self.SERVER_FILE.format(taxid=taxid)) if not os.path.exists(filename): raise ValueError("Database is missing.") return sqlite3.connect(filename)
def _db(self, taxid=None): """ Return an open sqlite3.Connection object. """ taxid = taxid or self.taxid filename = orngServerFiles.localpath_download( "PPI", self.SERVER_FILE.format(taxid=taxid)) if not os.path.exists(filename): raise ValueError("Database is missing.") return sqlite3.connect(filename)
def __loadOntologyFromDisk(self): """ Function loads MeSH ontology and chemical annotation into internal data structures. """ self.toID = dict() # name -> [IDs] Be careful !!! One name can match many IDs! self.toName = dict() # ID -> name self.toDesc = dict() # name -> description self.fromCID = dict() # cid -> term id self.fromPMID = dict() # pmid -> term id d = file(orngServerFiles.localpath_download('MeSH', 'mesh-ontology.dat')) f = file(orngServerFiles.localpath_download('MeSH', 'cid-annotation.dat')) # loading ontology graph t = 0 for i in d: t += 1 parts = i.split("\t") # delimiters are tabs if(len(parts) != 3): print "error reading ontology ", parts[0] parts[2] = parts[2].rstrip("\n\r") ids = parts[1].split(";") self.toID[parts[0]] = ids # append additional ID self.toDesc[parts[0]] = parts[2] for r in ids: self.toName[r] = parts[0] # loading cid -> mesh for i in f: parts = i.split(";") # delimiters are tabs if(len(parts) != 2): print "error reading ontology ", parts[0] parts[1] = parts[1].rstrip("\n\r") cid = int(parts[0]) if self.fromCID.has_key(cid): self.fromCID[cid].append(parts[1]) else: self.fromCID[cid] = [parts[1]] # loading pmid -> mesh, TODO # print "Current MeSH ontology contains ", t, " mesh terms." return True
def history(self): if getattr(self, "_history", None) is None: fname = orngServerFiles.localpath_download( "NCBI_geneinfo", "gene_history.%s.db" % self.taxid) try: self._history = dict([ (line.split("\t")[2], GeneHistory(line)) for line in open(fname, "rb").read().splitlines() ]) except Exception, ex: print >> sys.srderr, "Loading NCBI gene history failed.", ex self._history = {}
def load(self): import cPickle dir = orngServerFiles.localpath("EnsembleGeneInfo") if not os.path.exists(dir): os.makedirs(dir) try: filename = orngServerFiles.localpath_download("EnsembleGeneInfo", self.filename()) info = cPickle.load(open(filename, "rb")) except Exception, ex: filename = orngServerFiles.localpath("EnsembleGeneInfo", self.filename()) info = self.create_info() cPickle.dump(info, open(filename, "wb"))
def load_miRNA_microCosm(org="mus_musculus", max_pvalue=None, min_score=None): """ Load miRNA's from microcosm into the global scope (currently only Mus musculus is supported) """ global IDs, LABELS, miRNA_lib, mat_toPre, ACCtoID global preIDs, premiRNA_lib, preACCtoID, clusters global num_toClusters, clusters_toNum file = osf.localpath_download("miRNA", "v5.txt.{org}".format(org=org)) [IDs, LABELS, miRNA_lib, mat_toPre, ACCtoID] = parse_targets_microcosm_v5(file, max_pvalue=max_pvalue, min_score=min_score) [preIDs, premiRNA_lib, preACCtoID, clusters] = [], {}, {}, {} num_toClusters, clusters_toNum = {}, {}
def load(self): import cPickle dir = orngServerFiles.localpath("EnsembleGeneInfo") if not os.path.exists(dir): os.makedirs(dir) try: filename = orngServerFiles.localpath_download( "EnsembleGeneInfo", self.filename()) info = cPickle.load(open(filename, "rb")) except Exception, ex: filename = orngServerFiles.localpath("EnsembleGeneInfo", self.filename()) info = self.create_info() cPickle.dump(info, open(filename, "wb"))
def __init__(self, organism, genematcher=None): """ An dictionary like object for accessing NCBI gene info Arguments:: - *organism* Organism id Example:: >>> info = NCBIGeneInfo("H**o sapiens") """ self.taxid = self.organism_name_search(organism) fname = orngServerFiles.localpath_download( "NCBI_geneinfo", "gene_info.%s.db" % self.taxid) file = open(fname, "rb") self.update( dict([(line.split("\t", 3)[1], line) for line in file.read().splitlines() if line.strip() and not line.startswith("#")])) # NOTE orig init time for gene matcher: 2.5s, new 4s: investigate the slowdown # NOTE matches are not the same because aliases are build a bit # differently (main name versus old aliases conflict!) self.matcher = genematcher if self.matcher == None: if self.taxid == '352472': self.matcher = matcher([ GMNCBI(self.taxid), GMDicty(), [GMNCBI(self.taxid), GMDicty()] ]) else: self.matcher = matcher([GMNCBI(self.taxid)]) #if this is done with a gene matcher, pool target names self.matcher.set_targets(self.keys())
def list_serverfiles(): fname = orngServerFiles.localpath_download(sfdomain, "index.pck") flist = pickle.load(open(fname, 'r')) return list_serverfiles_from_flist(flist)
from __future__ import absolute_import, division from collections import defaultdict import math, os, random, re, urllib from Orange.orng import orngServerFiles as osf import statc from . import gene as ge, go, kegg as kg, utils, taxonomy as obiTaxonomy op = utils.stats mirnafile = osf.localpath_download('miRNA', 'miRNA.txt') premirnafile = osf.localpath_download('miRNA', 'premiRNA.txt') ################################################################################################################ ################################################################################################################ def __build_lib(filename, labels=True, MATtoPRE=True, ACCtoID=True, clust=False): """ build_lib() function takes as input a filename and gives as output some variables there will be used in the module. """ content = [l.rstrip() for l in open(filename).readlines()][1:] to_return = []
def create_aliases_version(self): orngServerFiles.localpath_download("Affy", self.organism + ".pickle") return orngServerFiles.info("Affy", self.organism + ".pickle")["datetime"]
def create_aliases(self): filename = orngServerFiles.localpath_download( "Affy", self.organism + ".pickle") import cPickle return cPickle.load(open(filename, "rb"))
def create_aliases(self): filename = orngServerFiles.localpath_download("Affy", self.organism + ".pickle") import cPickle return cPickle.load(open(filename, "rb"))
def load_serverfiles(hierarchy, organism): return load_fn(hierarchy, organism, list_serverfiles, lambda h,o: orngServerFiles.localpath_download(sfdomain, filename(h, o)))
def load(self): path = orngServerFiles.localpath_download(self.DOMAIN, self.FILENAME) lines = open(path, "rb").read().splitlines()[:-1] self._homologs = {} self._homologs = dict([((h.taxonomy_id, h.gene_symbol), h) for h in [_homolog(line) for line in lines]]) self._homologs_by_group = reduce(lambda dict, h: dict[h.group_id].append(h) or dict, self._homologs.values(), defaultdict(list))
from __future__ import absolute_import, division from collections import defaultdict import math, os, random, re, urllib from Orange.orng import orngServerFiles as osf import statc from . import gene as ge, go, kegg as kg, utils, taxonomy as obiTaxonomy op = utils.stats mirnafile = osf.localpath_download('miRNA','miRNA.txt') premirnafile = osf.localpath_download('miRNA','premiRNA.txt') ################################################################################################################ ################################################################################################################ def __build_lib(filename, labels=True, MATtoPRE=True, ACCtoID=True, clust=False): """ build_lib() function takes as input a filename and gives as output some variables there will be used in the module. """ content = [l.rstrip() for l in open(filename).readlines()][1:] to_return = [] ids = [l.split('\t')[0] for l in content] to_return.append(ids) if labels:
def __init__(self): import sqlite3 self.con = sqlite3.connect( orngServerFiles.localpath_download("HomoloGene", "InParanoid.sqlite"))
def __init__(self): import sqlite3 self.con = sqlite3.connect(orngServerFiles.localpath_download("HomoloGene", "InParanoid.sqlite"))