sf_server.upload( "GO", "gene_ontology_edit.obo.tar.gz", filename, title="Gene Ontology (GO)", tags=["gene", "ontology", "GO", "essential", "#uncompressed:%i" % uncompressedSize(filename), "#version:%i" % obiGO.Ontology.version] ) sf_server.unprotect("GO", "gene_ontology_edit.obo.tar.gz") orgMap = {"352472": "44689", "562": "83333", "3055": None, "7955": None, "11103": None, "2104": None, "4754": None, "31033": None, "8355": None, "4577": None} commonOrgs = dict([(obiGO.from_taxid(id), id) for id in obiTaxonomy.common_taxids() if obiGO.from_taxid(id) != None]) essentialOrgs = [obiGO.from_taxid(id) for id in obiTaxonomy.essential_taxids()] exclude = ["goa_uniprot", "goa_pdb", "GeneDB_tsetse", "reactome", "goa_zebrafish", "goa_rat", "goa_mouse"] updatedTaxonomy = defaultdict(set) for org in list_available_organisms(): if org in exclude or org not in commonOrgs: continue
############################################################################################################################################################## ############################################################################################################################################################## path = os.path.join(environ.buffer_dir, "tmp_miRNA") print 'path: ', path serverFiles = sf_server try: os.mkdir(path) except OSError: pass org_taxo = [tax.name(id) for id in tax.common_taxids()] ### targets library from TargetScan try: tarscan_url = 'http://www.targetscan.org//vert_50//vert_50_data_download/Conserved_Site_Context_Scores.txt.zip' zf = zipfile.ZipFile(StringIO.StringIO(urllib.urlopen(tarscan_url).read())) arch = zf.read(zf.namelist()[0]).splitlines()[1:] arch.pop() mirnas = [a.split('\t')[3] for a in arch] gene_ids = [a.split('\t')[1] for a in arch] TargetScanLib = {} for m, t in zip(mirnas, gene_ids): if not (m in TargetScanLib):
sf_server.protect(DOMAIN, GDS_INFO, "0") gds_info_datetime = datetime.fromtimestamp(0) # read the information from the local file gds_info, excluded = cPickle.load(file(localfile, "rb")) # excluded should be a dictionary (GEO_ID, TAX_ID) # if need to refresh the data base if force_update: gds_info, excluded = ({}, {}) # list of common organisms may have changed, rescan excluded list excluded = dict([(id, taxid) for id, taxid in excluded.items() if taxid not in obiTaxonomy.common_taxids()]) excluded.update([(id, info["taxid"]) for id, info in gds_info.items() if info["taxid"] not in obiTaxonomy.common_taxids()]) gds_info = dict([(id, info) for id, info in gds_info.items() if info["taxid"] in obiTaxonomy.common_taxids()]) # get the list of GDS files from NCBI directory print "Retrieving ftp directory ..." ftp = ftplib.FTP(FTP_NCBI) ftp.login() ftp.cwd(NCBI_DIR) dirlist = [] ftp.dir(dirlist.append) from datetime import timedelta
f.close() sf_server.upload(DOMAIN, GDS_INFO, localfile, TITLE, TAGS) sf_server.protect(DOMAIN, GDS_INFO, "0") gds_info_datetime = datetime.fromtimestamp(0) # read the information from the local file gds_info, excluded = cPickle.load(file(localfile, "rb")) # excluded should be a dictionary (GEO_ID, TAX_ID) # if need to refresh the data base if force_update: gds_info, excluded = ({}, {}) # list of common organisms may have changed, rescan excluded list excluded = dict([(id, taxid) for id, taxid in excluded.items() if taxid not in obiTaxonomy.common_taxids()]) excluded.update([(id, info["taxid"]) for id, info in gds_info.items() if info["taxid"] not in obiTaxonomy.common_taxids()]) gds_info = dict([(id, info) for id, info in gds_info.items() if info["taxid"] in obiTaxonomy.common_taxids()]) # get the list of GDS files from NCBI directory print "Retrieving ftp directory ..." ftp = ftplib.FTP(FTP_NCBI) ftp.login() ftp.cwd(NCBI_DIR) dirlist = [] ftp.dir(dirlist.append) from datetime import timedelta
############################################################################################################################################################## ############################################################################################################################################################## path = os.path.join(environ.buffer_dir, "tmp_miRNA") print 'path: ', path serverFiles = sf_server try: os.mkdir(path) except OSError: pass org_taxo = [tax.name(id) for id in tax.common_taxids()] ### targets library from TargetScan try: tarscan_url = 'http://www.targetscan.org//vert_50//vert_50_data_download/Conserved_Site_Context_Scores.txt.zip' zf = zipfile.ZipFile(StringIO(urllib.urlopen(tarscan_url).read())) arch = zf.read(zf.namelist()[0]).splitlines()[1:] arch.pop() mirnas = [a.split('\t')[3] for a in arch] gene_ids = [a.split('\t')[1] for a in arch] TargetScanLib = {} for m,t in zip(mirnas,gene_ids): if not(m in TargetScanLib):