def _createTfAndDisease2RankedGeneListMapping(disease2geneShelfFn, geneAndTf2TfbsCountShelfFn, rankedGeneListShelfFn): 'disease2geneShelfFn geneAndTf2TfbsCountShelfFn geneListShelfFn' disease2geneShelf = safeshelve.open(disease2geneShelfFn, 'r') geneAndTf2TfbsCountShelf = safeshelve.open(geneAndTf2TfbsCountShelfFn, 'r') rankedGeneListShelf = safeshelve.open(rankedGeneListShelfFn, 'c') allDiseases = disease2geneShelf.keys() allTfs = set([]) for key in geneAndTf2TfbsCountShelf: allTfs.add(ast.literal_eval(key)[0]) for tf in allTfs: for disease in allDiseases: geneList = disease2geneShelf[disease] countList = [ geneAndTf2TfbsCountShelf.get(repr((tf, x.lower()))) for x in geneList ] countList = [(x if x is not None else 0) for x in countList] sumCounts = sum(countList) propList = [ 1.0 * x / sumCounts if sumCounts != 0 else 0 for x in countList ] rankedGeneListShelf[repr( (tf, disease))] = zip(geneList, countList, propList) disease2geneShelf.close() geneAndTf2TfbsCountShelf.close() rankedGeneListShelf.close()
def updateShelveItemsAndCopyToNewFile(cls): assert not os.path.exists(cls.SHELVE_COPY_FN) assert not os.path.exists(cls.SHELVE_ERRORS_FN) trackInfoShelveCopy = safeshelve.open(cls.SHELVE_COPY_FN, 'c', protocol=cls.PROTOCOL) trackInfoShelveErrors = safeshelve.open(cls.SHELVE_ERRORS_FN, 'c', protocol=cls.PROTOCOL) trackInfoShelve = safeshelve.open(cls.SHELVE_FN, 'r', protocol=cls.PROTOCOL) keys = trackInfoShelve.keys() trackInfoShelve.close() for i,key in enumerate(keys): try: ti = TrackInfo.createInstanceFromKey(key) except: trackInfoShelve = safeshelve.open(cls.SHELVE_FN, 'r', protocol=cls.PROTOCOL) trackInfoShelveErrors[key] = trackInfoShelve[key] trackInfoShelve.close() trackInfoShelveCopy[key] = ti if i%10000 == 0: print '.', trackInfoShelveCopy.close() trackInfoShelveErrors.close()
def createMeshHierarchyMappings(hierarchyIdFile, mapIdToHeadingFn, mapParentToChildrenFn, mapChildToParentsFn): """hierarchyIdFile mapIdToHeadingFn mapParentToChildrenFn mapChildToParentsFn""" mapParentToChildrenFile = safeshelve.open(mapParentToChildrenFn) mapChildToParentsFile = safeshelve.open(mapChildToParentsFn) mapIdToHeadingFile = safeshelve.open(mapIdToHeadingFn, 'r') for line in open(hierarchyIdFile, 'r'): cols = line.strip().split() try: parent, child = [mapIdToHeadingFile[cols[x]] for x in [0,2]] except KeyError, e: print e continue if parent == child: continue if not parent in mapParentToChildrenFile: mapParentToChildrenFile[parent] = [child] else: temp = mapParentToChildrenFile[parent] temp.append(child) mapParentToChildrenFile[parent] = [x for x in sorted(set(temp))] if not child in mapChildToParentsFile: mapChildToParentsFile[child] = [parent] else: temp = mapChildToParentsFile[child] temp.append(parent) mapChildToParentsFile[child] = [x for x in sorted(set(temp))]
def execute(cls, choices, galaxyFn=None, username=''): '''Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' if choices[2]=='Transfac TF ids': mappingFn = 'pwm2TFids.shelf' mapping = safeshelve.open(Tool1.MAPPING_SHELVES_PATH + os.sep + mappingFn ) elif choices[2]== 'Transfac TF readable names': mappingFn = 'pwm2TFnamesNew.shelf' mapping = safeshelve.open(Tool1.MAPPING_SHELVES_PATH + os.sep + mappingFn ) elif choices[2]== 'HGNC gene symbols': mappingFn = 'PWM_to_HGNC.txt' mapping = dict([line.strip().split() for line in open(Tool1.MAPPING_SHELVES_PATH + os.sep + mappingFn).readlines()]) else: raise Exception(choices[2]) if galaxyFn==None: for key in sorted(mapping.keys()): print key + ':' + ','.join(mapping[key]) + os.linesep, else: mappingStaticFile = GalaxyRunSpecificFile(['mapping.txt'], galaxyFn) f = mappingStaticFile.getFile() for key in sorted(mapping.keys()): if type(mapping[key]) in (list,tuple): mapping[key] = ','.join(mapping[key]) f.write( key + ':' + mapping[key] + os.linesep ) f.close() print mappingStaticFile.getLink('View/download mapping')
def execute(choices, galaxyFn=None, username=''): '''Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' if choices[2]=='Transfac TF ids': mappingFn = 'pwm2TFids.shelf' mapping = safeshelve.open(Tool1.MAPPING_SHELVES_PATH + os.sep + mappingFn ) elif choices[2]== 'Transfac TF readable names': mappingFn = 'pwm2TFnamesNew.shelf' mapping = safeshelve.open(Tool1.MAPPING_SHELVES_PATH + os.sep + mappingFn ) elif choices[2]== 'HGNC gene symbols': mappingFn = 'PWM_to_HGNC.txt' mapping = dict([line.strip().split() for line in open(Tool1.MAPPING_SHELVES_PATH + os.sep + mappingFn).readlines()]) else: raise Exception(choices[2]) if galaxyFn==None: for key in sorted(mapping.keys()): print key + ':' + ','.join(mapping[key]) + os.linesep, else: mappingStaticFile = GalaxyRunSpecificFile(['mapping.txt'], galaxyFn) f = mappingStaticFile.getFile() for key in sorted(mapping.keys()): if type(mapping[key]) in (list,tuple): mapping[key] = ','.join(mapping[key]) f.write( key + ':' + mapping[key] + os.linesep ) f.close() print mappingStaticFile.getLink('View/download mapping')
def _getStorage(mode='r'): fn = ProfilingStorage.STORAGE_FN #if not os.path.exists(os.path.dirname(fn)): # fn = os.path.basename(fn) if mode=='c': return safeshelve.open(fn, 'c', writeback=True) else: return safeshelve.open(fn, 'r')
def _getStorage(mode='r'): fn = ProfilingStorage.STORAGE_FN #if not os.path.exists(os.path.dirname(fn)): # fn = os.path.basename(fn) if mode == 'c': return safeshelve.open(fn, 'c', writeback=True) else: return safeshelve.open(fn, 'r')
def mergeTrackInfoShelves(otherTrackInfoShelveFn): mainShelve = safeshelve.open(TrackInfo.SHELVE_FN, 'r') otherShelve = safeshelve.open(otherTrackInfoShelveFn, 'r') mergeKeys = [key for key in otherShelve if key not in mainShelve] for key in mergeKeys: ti = otherShelve[key] ti.store()
def findTFsOccurringInRegions(cls, genome, tfSource, regionsBedFn, upFlankSize, downFlankSize, galaxyFn): uniqueWebPath = getUniqueWebPath(extractIdFromGalaxyFn(galaxyFn)) #assert genome == 'hg18' #other genomes not supported. TF id links do not specify genome for pre-selection of analysis tfTrackNameMappings = TfInfo.getTfTrackNameMappings(genome) assert tfTrackNameMappings != {}, 'No TF info for genome: %s' % genome tfTrackName = tfTrackNameMappings[tfSource] if (upFlankSize == downFlankSize == 0): flankedRegionsFn = regionsBedFn else: flankedRegionsFn= uniqueWebPath + os.sep + 'flankedRegs.bed' GalaxyInterface.expandBedSegments(regionsBedFn, flankedRegionsFn, genome, upFlankSize, downFlankSize) regSpec, binSpec = 'bed', flankedRegionsFn res = cls._runCategoryPointCount(genome, regSpec, binSpec, tfTrackName) tfNames = res.getResDictKeys() #print 'RES: ', res.getGlobalResult()[tfNames[0]], type(res.getGlobalResult()[tfNames[0]]) import third_party.safeshelve as safeshelve pwm2tfids = safeshelve.open(os.sep.join([HB_SOURCE_CODE_BASE_DIR,'data','pwm2TFids.shelf']), 'r') tf2class = safeshelve.open(os.sep.join([HB_SOURCE_CODE_BASE_DIR,'data','TfId2Class.shelf']), 'r') pwmName2id= safeshelve.open(os.sep.join([HB_SOURCE_CODE_BASE_DIR,'data','pwmName2id.shelf']), 'r') #print tfNames[0],tfNames[1], ' VS ', pwm2tfids.keys()[0], len(pwm2tfids) #tfs = list(reversed(sorted([(res.getGlobalResult()[tf], tf, '%s (%i hits (class %s))'%(tf, res.getGlobalResult()[tf]), '/'.join([tf2class[x] for x in pwm2tfids[tf]]) ) for tf in tfNames]))) #num hits, tfName, tfTextInclHits tfs = list(reversed(sorted([(res.getGlobalResult()[tf], tf, '%s (%i hits )'%(tf, res.getGlobalResult()[tf]) + \ (' (class: %s)'%'/'.join(set([str(tf2class.get(x)) for x in pwm2tfids[pwmName2id[tf]] if x in tf2class]))\ if (tf in pwmName2id and pwmName2id[tf] in pwm2tfids and any([x in tf2class for x in pwm2tfids[pwmName2id[tf]]]))\ else '') ) \ for tf in tfNames])) ) #num hits, tfName, tfTextInclHits tfsPlural = 's' if len(tfs)!=1 else '' print '<p>There are %i TF%s targeting your regions of interest, using "%s" as source of TF occurrences.</p>' % (len(tfs), tfsPlural, tfSource) expansionStr = ' flanked' if not (upFlankSize == downFlankSize == 0) else '' idHtmlFileNamer = GalaxyRunSpecificFile(['allTfIds.html'],galaxyFn) idHtmlFileNamer.writeTextToFile('<br>'.join(['<a href=/hbdev/hyper?track1=%s&track2=>%s</a>'%( quote(':'.join(tfTrackName+[tf[1]])), tf[2]) for tf in tfs])) print '<p>', idHtmlFileNamer.getLink('Inspect html file'), ' of all TF IDs occurring 1 or more times within your%s regions of interest, with each TF ID linking to analysis with this TF pre-selected.</p>' % (expansionStr) idFileNamer = GalaxyRunSpecificFile(['allTfIds.txt'],galaxyFn) idFileNamer.writeTextToFile(os.linesep.join([tf[2] for tf in tfs]) + os.linesep) print '<p>', idFileNamer.getLink('Inspect text file'), ' listing all TF IDs occurring 1 or more times within your%s regions of interest.</p>' % (expansionStr) extractedTfbsFileNamer = GalaxyRunSpecificFile(['tfbsInGeneRegions.bed'],galaxyFn) GalaxyInterface.extractTrackManyBins(genome, tfTrackName, regSpec, binSpec, True, 'bed', False, False, extractedTfbsFileNamer.getDiskPath(), True) print '<p>', extractedTfbsFileNamer.getLoadToHistoryLink('Inspect bed-file'), 'of all TF binding sites occurring within your%s regions of interest.</p>' % (expansionStr) for dummy,tf,dummy2 in tfs: extractedTfbsFileNamer = GalaxyRunSpecificFile([tf+'_tfbsInGeneRegions.bed'],galaxyFn) GalaxyInterface.extractTrackManyBins(genome, tfTrackName+[tf], regSpec, binSpec, True, 'bed', False, False, extractedTfbsFileNamer.getDiskPath()) print '<p>', extractedTfbsFileNamer.getLoadToHistoryLink('Binding sites of the TF %s' %tf, 'bed'), 'occurring within your%s regions of interest (bed-file).</p>' % (expansionStr)
def makeShelfKeysLowercase(shelfFn, newShelfFn): 'shelfFn newShelfFn' origShelf = safeshelve.open(shelfFn, 'r') newShelf = safeshelve.open(newShelfFn, 'c') for key in origShelf.keys(): # print key if key.lower() in newShelf: print '%s: Duplicate: %s and %s. Using the longest.' % (key.lower(), origShelf[key], newShelf[key.lower()]) newShelf[key.lower()] = (origShelf[key] if len(origShelf[key]) > len(newShelf[key.lower()]) else newShelf[key.lower()]) else: newShelf[key.lower()] = origShelf[key] newShelf.close() origShelf.close()
def getOptionsBox1(): SHELVE_FN = DATA_FILES_PATH + '/UserToolsCollection.shelve' s = safeshelve.open(SHELVE_FN) users = s.keys() s.close() return ['----- select -----'] + users
def updateCacheDict(cls, stat): DebugInfoShelve = safeshelve.open(cls.SHELVE_FN, 'c') stat = str(stat) if stat in DebugInfoShelve and type( DebugInfoShelve[stat]).__name__ == 'dict': cls._cacheDict = DebugInfoShelve[stat] DebugInfoShelve.close()
def makeLowercaseName2NameShelfFromTnSubTypes(genome, trackName, shelfFn): 'genome trackName shelfFn' trackName = re.split('/|:', trackName) from gold.application.GalaxyInterface import GalaxyInterface analysisDef = "-> ListOfPresentCategoriesStat" results = GalaxyInterface.runManual([trackName, None], analysisDef, '*', '*', genome, printResults=False, printHtmlWarningMsgs=False) categories = results.getGlobalResult()['Result'] shelf = safeshelve.open(shelfFn) for cat in categories: shelf[cat.lower()] = cat ##basePath = createDirPath(trackName, genome) #basePath = gcf.createOrigPath(genome, trackName) #shelf = safeshelve.open(shelfFn) # #for fn in os.listdir(basePath): # if os.path.isdir(os.sep.join([basePath, fn])) and not any([fn.startswith(x) for x in ['_','.'] + GenomeInfo.getExtendedChrList(genome)]): # shelf[fn.lower()] = fn shelf.close()
def _getFilteredSelections(prevChoices, shelfFn): allPwms = SelectTfTool._getAllPwms(prevChoices) shelf = safeshelve.open(shelfFn, 'r') return [ x for x in shelf.keys() if any([pwm.upper() in allPwms for pwm in shelf[x]]) ]
def getCategorySetForSubTracks(genome, baseTrackName, shelveFn): """genome baseTrackName shelveFn""" baseTrackName = re.split('/|:', baseTrackName) mapping = {} for trackName in OrigTrackNameSource(genome, baseTrackName): if trackName == baseTrackName: continue subTrackName = trackName[len(baseTrackName):] basePath = gcf.createOrigPath(genome, trackName) relFns = [x for x in os.listdir(basePath) if x[0] not in [',']] assert len( relFns ) == 1, 'only tracks with single file is supported, thus not: ' + str( relFns) fn = basePath + os.sep + relFns[0] try: categories = list( set([ line.split()[3] for line in open(fn) if line.strip() != '' ])) except: print 'Error, at filename %s and current line: %s' % (fn, line) raise mapping[':'.join(subTrackName)] = categories shelf = safeshelve.open(shelveFn) shelf.update(mapping) shelf.close()
def execute(choices, galaxyFn=None, username=''): ''' Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' key = username + '::' + choices[1] + '::' paramDict = dict( [tuple(v.split(':', 1)) for v in choices[0].split('\n')]) if paramDict.has_key('tool_id'): print 'Added test for: ', paramDict['tool_id'] import hashlib import third_party.safeshelve as safeshelve SHELVE_FN = DATA_FILES_PATH + os.sep + 'tests' + os.sep + '%s.shelve' % paramDict[ 'tool_id'] print SHELVE_FN d = safeshelve.open(SHELVE_FN) key += hashlib.sha224(choices[0]).hexdigest() argList = [] for t in choices[0].split('\n'): k, v = t.split(':', 1) argList.append('='.join([k, quote(v)])) d[key] = '&'.join(argList) d.close()
def parse( self, tool_source, guid=None ): root = tool_source.root tool_id = root.get('id') proto_module = root.get('proto_tool_module') proto_class = root.get('proto_tool_class') if proto_module and proto_class: s = safeshelve.open('database/proto-tool-cache.shelve') s[tool_id] = (proto_module, proto_class) s.close() if root.find('inputs') is None: inputs = ElementTree.Element('inputs') inputs.append(ElementTree.Element('param', name='mako', type='hidden', value=self.proto_mako)) inputs.append(ElementTree.Element('param', name='tool_id', type='hidden', value=root.get('id'))) inputs.append(ElementTree.Element('param', name='tool_name', type='hidden', value=root.get('name'))) root.append(inputs) if root.find('outputs') is None: outputs = ElementTree.Element('outputs') outputs.append(ElementTree.Element('data', format='html', name='output')) root.append(outputs) super(ProtoGenericTool, self).parse(tool_source, guid) #self.command = '$GALAXY_ROOT_DIR/lib/proto/protoToolExecute.py $output' self.command = self.proto_command self.interpreter = 'python' self.options['sanitize'] = False self.action = self.proto_action self.check_values = False self.method = 'post'
def createS2(): s = safeshelve.open('slett', 'c') s2 = {} for i in xrange(100000): s2[str(i)] = i * 3 print len(s2.keys()) s.update(s2) s.close()
def _getDiseaseCategories(prevChoices): allDiseases = [ x.split('(')[0] for x in SelectDiseaseTool._getAllDiseases(prevChoices) ] shelf = safeshelve.open( SelectDiseaseTool.DISEASE_PARENT_TO_CHILD_SHELF_FN, 'r') return [cat for cat in shelf.keys() if any([disease in allDiseases \ for disease in shelf[cat]])]
def loadS2(): s = safeshelve.open('slett', 'r') #s2 = dict(s.items()) s2 = {} s2.update(s) #s.close() print 'mid..' for i in xrange(100000): temp = s[str(i)] + 1
def getInstalledProtoTools(): tool_shelve = safeshelve.open(PROTO_TOOL_SHELVE_FN, 'r') installed_class_info = [ tool_shelve.get(t) for t in tool_shelve.keys() if os.path.exists( os.path.join(SOURCE_CODE_BASE_DIR, tool_shelve.get(t)[0].replace('.', os.path.sep)) + '.py') ] tool_shelve.close() return installed_class_info
def _getSelectedDiseasesDict(prevChoices): if prevChoices[1] != 'Disease categories': return None shelfFn = SelectDiseaseTool.DISEASE_PARENT_TO_CHILD_SHELF_FN selectedDict = prevChoices[3] mapping = safeshelve.open(shelfFn, 'r') selectedDiseases = set(reduce(lambda x, y: x+y,\ [[x] + mapping[x] for x in selectedDict if selectedDict[x]], [])) return OrderedDict(sorted([(x, x.split('(')[0] in selectedDiseases) for x in \ SelectDiseaseTool._getAllDiseases(prevChoices)]))
def _updateContentsIfNecessary(self, chr): #if self._contents is None: # self._contents = {} # if self.fileExists(): # self._contents.update(safeshelve.open(self._fn, 'r')) if not chr in self._updatedChrs: if self.fileExists(): brListForChr = safeshelve.open(self._fn, 'r').get(chr) if brListForChr is not None: self._contents[chr] = brListForChr self._updatedChrs.add(chr)
def __new__(cls, genome=None): if genome is None: return object.__new__(cls) genomeInfoShelve = safeshelve.open(SHELVE_FN) stored = genomeInfoShelve.get(genome) genomeInfoShelve.close() if stored is not None: return stored else: return object.__new__(cls)
def createAllNodesAndLeaves(mapFn, fullMapFn): """mapFn fullMapFn""" mapFile = safeshelve.open(mapFn, 'r') fullMapFile = safeshelve.open(fullMapFn) def _getAllChildren(mapFile, curChild, parents): if curChild not in mapFile: return [] allChildren = [x for x in mapFile[curChild] if x not in parents] for child in copy.copy(allChildren): allChildren += _getAllChildren(mapFile, child, parents + [curChild]) return allChildren for entry in mapFile: fullMapFile[entry] = [x for x in sorted(set(_getAllChildren(mapFile, entry, [])))] mapFile.close() fullMapFile.close()
def parseMatrixTextFileToShelf(txtFn, outShelfFn, rowPos2NameShelfFn=None, colPos2NameShelfFn=None, \ rowPos2ElCountShelfFn=None, colPos2ElCountShelfFn=None, keyType='names', countType='count'): "txtFn outShelfFn rowPos2NameShelfFn=None colPos2NameShelfFn=None rowPos2ElCountShelfFn=None colPos2ElCountShelfFn=None, keyType=names countType=count" assert keyType in ['names', 'pos'] assert countType in ['count','log','binary'] map = {} firstRealLine = True rowNames = [] for line in open(txtFn): print '.', line = line.strip() if len(line)==0 or line[0] == '#': continue if firstRealLine: colNames = line.split('\t') firstRealLine = False continue cols = line.split('\t') rowName = cols[0] rowNames.append(rowName) tableVals = cols[1:] assert len(tableVals) == len(colNames), \ 'len(tableVals) != len(colNames) (%i != %i)' % (len(tableVals), len(colNames)) for i in range(len(tableVals)): try: curVal = tableVals[i].replace(' ','') val = int(curVal) except: try: val = float(curVal) except: val = ast.literal_eval(curVal) if countType=='binary': val = 1 if val>0 else 0 elif countType=='log': val = int( math.ceil( math.log(val+1,2) ) ) if keyType == 'names': map[repr((rowName.lower(), colNames[i].lower()))] = val elif keyType == 'pos': map[repr((len(rowNames), i+1))] = val shelf = safeshelve.open(outShelfFn) shelf.update(map) shelf.close() _createPos2NameShelf(rowPos2NameShelfFn, rowNames) _createPos2NameShelf(colPos2NameShelfFn, colNames) _createPos2ElCountShelf(rowPos2ElCountShelfFn, rowNames) _createPos2ElCountShelf(colPos2ElCountShelfFn, colNames)
def mergeShelvesTransitively(inShelf1Fn, inShelf2Fn, outShelfFn, includeSecondShelf='True'): """inShelf1Fn inShelf2Fn outShelfFn The values of the first shelf can be a list. """ if isinstance(includeSecondShelf, basestring): includeSecondShelf = ast.literal_eval(includeSecondShelf) assert includeSecondShelf in [True, False] inShelf1 = safeshelve.open(inShelf1Fn, 'r') inShelf2 = safeshelve.open(inShelf2Fn, 'r') outShelf = safeshelve.open(outShelfFn, 'c') if includeSecondShelf: for key, val in inShelf2.items(): outShelf[key] = val for key, vals in inShelf1.items(): if type(vals) != list: vals = [vals] transVals = [] for val in vals: try: if val not in inShelf2: val = val.replace('_', ' ') if val not in inShelf2: raise transVals += (inShelf2[val]) except: print 'Unmatched value in %s: %s' % (inShelf2Fn, val) if len(transVals) > 0: outShelf[key] = transVals inShelf1.close() inShelf2.close() outShelf.close()
def __new__(cls, genome, trackName): #Temporary hack if genome in ['hg18','NCBI36']: genome = 'NCBI36' trackInfoShelve = safeshelve.open(cls.SHELVE_FN, 'c', protocol=cls.PROTOCOL) stored = trackInfoShelve.get( constructKey(genome, trackName) ) trackInfoShelve.close() if stored is not None: return stored else: return object.__new__(cls)
def _createPos2NameShelf(pos2NameShelfFn, nameList): if pos2NameShelfFn is not None: pos2NameShelf = safeshelve.open(pos2NameShelfFn) for i,name in enumerate(nameList): assert name.startswith(str(i+1)) name = ' '.join(name.split(' ')[1:]) if name.find(' (') != -1 and name.endswith(')'): name = ' ('.join(name.split(' (')[:-1]) pos2NameShelf[repr(i+1)] = name.lower() pos2NameShelf.close()
def reverseMappingHavingListValues(inShelfFn, outShelfFn): """inShelfFn, outShelfFn reverse a mapping that (in input) goes from a key to a list of values, that is key->val1,val2...valN end up with a reversed mapping from key (valK from any value list) to list of values (keys having this in its value list) """ inShelf = safeshelve.open(inShelfFn,'r') revMap = {} for key in inShelf: el = inShelf[key] if type(el) != list: el = [el] for val in el: if not val in revMap: revMap[val] = [] revMap[val].append(key) #print revMap.items()[0:3] shelf = safeshelve.open(outShelfFn) shelf.update(revMap) shelf.close() inShelf.close()
def _createPos2ElCountShelf(pos2ElCountShelfFn, nameList): if pos2ElCountShelfFn is not None: if len(nameList)>0 and not (nameList[0].find(' (') != -1 and nameList[0].endswith(')')): return pos2ElCountShelf = safeshelve.open(pos2ElCountShelfFn) for i,name in enumerate(nameList): assert name.startswith(str(i+1)) elCount = name.split(' (')[-1][:-1] pos2ElCountShelf[repr(i+1)] = elCount pos2ElCountShelf.close()
def _getSelectedPwmsSet(prevChoices): if prevChoices[1] == 'TF name': shelfFn = SelectTfTool.TF_NAMES_TO_PWM_SHELF_FN selectedDict = prevChoices[3] elif prevChoices[1] == 'TF class': shelfFn = SelectTfTool.TF_CLASSES_TO_PWM_SHELF_FN selectedDict = prevChoices[4] else: return None mapping = safeshelve.open(shelfFn) pwms = reduce(lambda x, y: x+y,\ [mapping[x] for x in selectedDict if selectedDict[x]], []) return set([pwm.upper() for pwm in pwms])
def getToolPrototype(toolId): tool_shelve = None try: tool_shelve = safeshelve.open(PROTO_TOOL_SHELVE_FN, 'r') module_name, class_name = tool_shelve[str(toolId)] module = __import__(module_name, fromlist=[class_name]) prototype = getattr(module, class_name)(toolId) #except KeyError: # prototype = None finally: if tool_shelve: tool_shelve.close() return prototype
def createMappingsFromMeshAsciiFile(asciiFn, mapHeadingToIdFn, mapIdToHeadingFn): """asciiFn mapHeadingToIdFn mapIdToHeadingFn""" mapHeadingToIdFile = safeshelve.open(mapHeadingToIdFn) mapIdToHeadingFile = safeshelve.open(mapIdToHeadingFn) curHeading = '' curId = None for line in open(asciiFn, 'r'): line = line.strip() if line=='*NEWRECORD': if curHeading != '' and curId != '': mapHeadingToIdFile[curHeading] = curId mapIdToHeadingFile[curId] = curHeading curHeading = '' curId = None if line.startswith('MH = '): curHeading = line[5:] if line.startswith('UI = '): curId = line[5:] mapHeadingToIdFile.close() mapIdToHeadingFile.close()
def getOptionsBox2(cls, prevChoices): # Alternatively: getOptionsBoxKey() #repr(cls.username) SHELVE_FN = DATA_FILES_PATH + '/UserToolsCollection.shelve' s = safeshelve.open(SHELVE_FN) #return cls.userName if s.has_key(prevChoices[0]): linkTemplate = '<a href="%s"> %s </a><br/><br/>' toolLinks = [ linkTemplate % (v, k) for k, v in s[prevChoices[0]].items() ] if toolLinks: return '__rawstr__', '<br/><br/>' + '\n'.join(toolLinks)
def removeUnusedRecords(): trackInfoShelve = safeshelve.open(SHELVE_FN, 'w') iremoved = 0 ifound = 0 for key in trackInfoShelve.keys(): try: ti = TrackInfo.createInstanceFromKey(key) fn = ti.getOrigFn() if not os.path.exists(fn): raise Exception('Should exclude nmer tracks and other tracks without standardized track (e.g. intensity tracks). How? Not sure..') ti.removeEntryFromShelve() iremoved = iremoved + 1 else: ifound= ifound + 1 except Exception, e: print "Something wrong with ", fn , ", ", e
print "inne i adhoc repair" #python test/sandbox/div/adhoc_repair.py > /xanadu/home/vegardny/prosjekter/hyperbrowser/div/adhoc_out.txt import datetime import sys import third_party.safeshelve as safeshelve from gold.description.TrackInfo import TrackInfo from gold.description.TrackInfo import SHELVE_FN from gold.origdata.PreProcessTracksJob import PreProcessAllTracksJob import re #print "SHELVE_FN=", SHELVE_FN trackInfoShelve = safeshelve.open(SHELVE_FN, 'w') allkeys=trackInfoShelve.keys() trackInfoShelve.close() count = 0 for key in allkeys: #print key, count try: ti = TrackInfo.createInstanceFromKey(key) if ti.timeOfPreProcessing > datetime.datetime(2011, 11, 8, 23,0,0) and ti.timeOfPreProcessing < datetime.datetime(2011, 11, 25, 23,0,0): if re.search('Nmers|Trashcan|external|Restriction', ti.getOrigFn())==None: # Nmers, external print 'trying to repair track ', ti.genome, ti.trackName, ti.timeOfPreProcessing count = count +1 ### Sette ID til None og preprocesse. #ti.id = None #ti.store() #PreProcessAllTracksJob(ti.genome, ti.trackName).process()
def updateCacheDict(cls, stat): DebugInfoShelve = safeshelve.open(cls.SHELVE_FN, 'r') if stat in DebugInfoShelve and type(DebugInfoShelve[stat]).__name__=='dict': cls._cacheDict = DebugInfoShelve[stat] DebugInfoShelve.close()
def _getFilteredSelections(prevChoices, shelfFn): allPwms = SelectTfTool._getAllPwms(prevChoices) shelf = safeshelve.open(shelfFn, 'r') return [x for x in shelf.keys() if any([pwm.upper() in allPwms for pwm in shelf[x]])]
def execute(cls, choices, galaxyFn=None, username=''): shelveDict = {'track1':choices[3] if choices[3]!=cls.NO_TRACK_SHORTNAME else None} shelveDict['track2'] = choices[5] if choices[5]!=cls.NO_TRACK_SHORTNAME else None print len(choices) print cls._extraParams for i in range(len(cls._extraParams)): index = i*2+cls.FIRST_EXTRA_PARAM_BOX_NUMBER+1 shelveDict[index] = choices[index].strip() DebugInfoShelve = safeshelve.open(cls.SHELVE_FN) DebugInfoShelve[choices[0]] = shelveDict DebugInfoShelve.close() try: cls.setupDebugModeAndLogging(verbose=False) #add box to select this print 'Getting Unsplittable statClass' statClassName = choices[0] #statClass = STAT_CLASS_DICT[statClassName] #try: print 'Preparing arguments to init' unsplittableStatClass = MagicStatFactory._getClass(statClassName, 'Unsplittable') genome = choices[1] from gold.track.Track import PlainTrack prefixTN1 = cls.STD_PREFIX_TN if choices[2] == 'yes' else [] tn1 = prefixTN1 + choices[3].split(':') track1 = PlainTrack(tn1) if choices[3]!=cls.NO_TRACK_SHORTNAME else None prefixTN2 = cls.STD_PREFIX_TN if choices[4] == 'yes' else [] tn2 = prefixTN2 + choices[5].split(':') track2 = PlainTrack(tn2) if choices[5]!=cls.NO_TRACK_SHORTNAME else None from gold.track.GenomeRegion import GenomeRegion #region = GenomeRegion(genome, 'chr1',1000,2000) #region2 = GenomeRegion(genome, 'chr1',5000,6000) kwArgs = {} regVal = choices[cls.FIRST_EXTRA_PARAM_BOX_NUMBER+1] binSpecVal = choices[cls.FIRST_EXTRA_PARAM_BOX_NUMBER+3] ubSource = UserBinSource(regVal, binSpecVal, genome=genome) region = list(ubSource)[0] if len(cls._extraParams)>3: for i in range(len(cls._extraParams)): paramName = choices[i*2+cls.FIRST_EXTRA_PARAM_BOX_NUMBER] param = paramName[:paramName.find('(')].strip() val = choices[i*2+cls.FIRST_EXTRA_PARAM_BOX_NUMBER+1].strip() if val !='': kwArgs[param] = val shelveDict[i*2+cls.FIRST_EXTRA_PARAM_BOX_NUMBER+1] = val print 'Calling __init__' # statObj = unsplittableStatClass(region, track1, track2, **kwArgs) print 'Calling createChildren' statObj.createChildren() print 'Calling getResult' statObj.getResult() #except: # raise #print 'Preparing arguments to init' #genome = 'hg18' #prefixTN = ['DNA structure'] if choices[2] == 'yes' else [] #from gold.track.Track import PlainTrack #tn1 = prefixTN + choices[3].split(':') #track1 = PlainTrack(tn1) #tn2 = prefixTN + choices[5].split(':') #track2 = PlainTrack(tn2) #from gold.track.GenomeRegion import GenomeRegion ##region = GenomeRegion(genome, 'chr1',1000,2000) ##region2 = GenomeRegion(genome, 'chr1',5000,6000) # #kwArgs = {} #regVal = choices[cls.FIRST_EXTRA_PARAM_BOX_NUMBER+1] #binSpecVal = choices[cls.FIRST_EXTRA_PARAM_BOX_NUMBER+3] #ubSource = UserBinSource(regVal, binSpecVal, genome=choices[1]) #region = list(UserBinSource)[0] # #if len(cls._extraParams)>2: # for i in range(2,len(cls._extraParams)): # paramName = choices[i*2+cls.FIRST_EXTRA_PARAM_BOX_NUMBER] # param = paramName[:paramName.find('(')].strip() # val = choices[i*2+cls.FIRST_EXTRA_PARAM_BOX_NUMBER+1].strip() # if val !='': # kwArgs[param] = val # shelveDict[i*2+cls.FIRST_EXTRA_PARAM_BOX_NUMBER+1] = val # # ##extraParams += [v.strip() for v in choices.kwArgs.split(',')] if choices.kwArgs.strip() != '' else [] ##args = [region, track1, track2] # #print 'Calling __init__' ## #statObj = unsplittableStatClass(region, track1, track2, **kwArgs) # #print 'Calling createChildren' #statObj.createChildren() # #print 'Calling getResult' #statObj.getResult() print 'Running StatJob' magicStatClass = STAT_CLASS_DICT[statClassName] #res = StatJob([region,region2],track1,track2,magicStatClass,**kwArgs).run() res = StatJob(ubSource,track1,track2,magicStatClass,**kwArgs).run() from quick.application.GalaxyInterface import GalaxyInterface GalaxyInterface._viewResults([res],galaxyFn) except Exception, e: print 'Error: ',e raise
def saveMap(self, args): if args['name'].isdigit(): galaxyId = int(args['name']) outDir = getUniqueWebPath(['%03d' % (galaxyId / 1000), str(galaxyId)]) else: outDir = '/'.join([GoogleMapsInterface.BASE_DIR, args['name']]) try: os.makedirs(outDir + '/cookies') except OSError, e: if e.errno == errno.EEXIST: pass else: raise e cname = args['id'] if 'id' in args else 'common' s = safeshelve.open(outDir + '/cookies/' + cname + '.shelve') s['markers'] = args['markers'] s['clusters'] = args['clusters'] s['idxclusters'] = args['idxclusters'] s.close() return {'debug': outDir} def restoreMap(self, args): map = GoogleMapsInterface.Map(args['name']) return map.getSavedCookies(args['id']) #if args['name'].isdigit(): # galaxyId = int(args['name']) # outDir = getUniqueWebPath(['%03d' % (galaxyId / 1000), str(galaxyId)]) #else: # outDir = '/'.join([GoogleMapsInterface.BASE_DIR, args['name']]) #r = {}
def removeFilteredEntriesFromShelve(cls, genome, trackNameFilter): filteredKeys = TrackInfo.getFilteredEntriesFromShelve(genome, trackNameFilter) trackInfoShelve = safeshelve.open(cls.SHELVE_FN, 'w', protocol=cls.PROTOCOL) for key in filteredKeys: del trackInfoShelve[key] trackInfoShelve.close()
def getFilteredEntriesFromShelve(cls, genome, trackNameFilter): filterKey = constructKey(genome, trackNameFilter) trackInfoShelve = safeshelve.open(cls.SHELVE_FN, 'r', protocol=cls.PROTOCOL) filteredKeys = [x for x in trackInfoShelve.keys() if x.startswith(filterKey)] trackInfoShelve.close() return filteredKeys
def removeEntryFromShelve(self): trackInfoShelve = safeshelve.open(self.SHELVE_FN, protocol=self.PROTOCOL) key = constructKey(self.genome, self.trackName) if key in trackInfoShelve: del trackInfoShelve[key] trackInfoShelve.close()
def store(self): trackInfoShelve = safeshelve.open(self.SHELVE_FN, protocol=self.PROTOCOL) trackInfoShelve[ constructKey(self.genome, self.trackName) ] = self trackInfoShelve.close()
def storeBoundingRegions(self, boundingRegionTuples, genomeElementChrList, sparse): assert sparse in [False, True] tempContents = OrderedDict() genomeElementChrs = set(genomeElementChrList) lastRegion = None chrStartIdxs = OrderedDict() chrEndIdxs = OrderedDict() totElCount = 0 totBinCount = 0 for br in boundingRegionTuples: if lastRegion is None or br.region.chr != lastRegion.chr: if br.region.chr in tempContents: raise InvalidFormatError("Error: bounding region (%s) is not grouped with previous bounding regions of the same chromosome (sequence)." % br.region) lastRegion = None tempContents[br.region.chr] = OrderedDict() #sorteddict() if sparse: chrStartIdxs[br.region.chr] = totElCount else: if br.region < lastRegion: raise InvalidFormatError("Error: bounding regions in the same chromosome (sequence) are unsorted: %s > %s." % (lastRegion, br.region)) if lastRegion.overlaps(br.region): raise InvalidFormatError("Error: bounding regions '%s' and '%s' overlap." % (lastRegion, br.region)) if lastRegion.end == br.region.start: raise InvalidFormatError("Error: bounding regions '%s' and '%s' are adjoining (there is no gap between them)." % (lastRegion, br.region)) if len(br.region) < 1: raise InvalidFormatError("Error: bounding region '%s' does not have positive length." % br.region) if not sparse and len(br.region) != br.elCount: raise InvalidFormatError("Error: track type representation is dense, but the length of bounding region '%s' is not equal to the element count: %s != %s" % (br.region, len(br.region), br.elCount)) startIdx, endIdx = (totElCount, totElCount + br.elCount) if not sparse else (None, None) totElCount += br.elCount if sparse: chrEndIdxs[br.region.chr] = totElCount tempContents[br.region.chr][br.region.start] = BoundingRegionInfo(br.region.start, br.region.end, startIdx, endIdx, 0, 0) lastRegion = br.region if sparse: totBinCount = 0 for chr in tempContents: chrLen = GenomeInfo.getChrLen(self._genome, chr) numBinsInChr = CompBinManager.getNumOfBins(GenomeRegion(start=0, end=chrLen)) for key in tempContents[chr].keys(): startBinIdx = totBinCount endBinIdx = totBinCount + numBinsInChr brInfo = tempContents[chr][key] if chr in genomeElementChrs: tempContents[chr][key] = BoundingRegionInfo(brInfo.start, brInfo.end, \ chrStartIdxs[chr], chrEndIdxs[chr], \ startBinIdx, endBinIdx) else: if chrEndIdxs[chr] - chrStartIdxs[chr] > 0: raise InvalidFormatError("Error: bounding region '%s' has incorrect element count: %s > 0" % (GenomeRegion(chr=chr, start=brInfo.start, end=brInfo.end), chrEndIdxs[chr] - chrStartIdxs[chr])) tempContents[chr][key] = BoundingRegionInfo(brInfo.start, brInfo.end, 0, 0, 0, 0) if chr in genomeElementChrs: totBinCount += numBinsInChr if len(genomeElementChrs - set(tempContents.keys())) > 0: raise InvalidFormatError('Error: some chromosomes (sequences) contains data, but has no bounding regions: %s' % ', '.join(genomeElementChrs - set(tempContents.keys()))) ensurePathExists(self._fn) for chr in tempContents: brInfoDict = tempContents[chr] tempContents[chr] = BrInfoHolder(tuple(brInfoDict.keys()), tuple(brInfoDict.values())) brShelve = safeshelve.open(self._fn) brShelve.update(tempContents) brShelve.close() while not self.fileExists(): from gold.application.LogSetup import logMessage logMessage("Bounding region shelve file '%s' has yet to be created" % self._fn) import time time.sleep(0.2)
def _getDiseaseCategories(prevChoices): allDiseases = [x.split('(')[0] for x in SelectDiseaseTool._getAllDiseases(prevChoices)] shelf = safeshelve.open(SelectDiseaseTool.DISEASE_PARENT_TO_CHILD_SHELF_FN, 'r') return [cat for cat in shelf.keys() if any([disease in allDiseases \ for disease in shelf[cat]])]
# # The Genomic HyperBrowser is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with The Genomic HyperBrowser. If not, see <http://www.gnu.org/licenses/>. #!/usr/bin/env python import os import sys import third_party.safeshelve as safeshelve from config.Config import DATA_FILES_PATH commands = safeshelve.open(DATA_FILES_PATH + os.sep +'CommandCatalog.shelve') if len(sys.argv) == 1: print 'syntax: ' print 'to add: add [name] [command]' print 'to remove: rm [name] [command]' print 'to print: print [name]' print 'to use: [name]' print 'available commands: ' print ','.join(commands.keys() ) sys.exit(0) if sys.argv[1] == 'add': assert(len(sys.argv) >= 4) commands[sys.argv[2]] = ' '.join(sys.argv[3:]) elif sys.argv[1] == 'rm':
import third_party.safeshelve as safeshelve import os from quick.aux.CustomFuncCatalog import reverseMappingHavingListValues from config.Config import HB_SOURCE_CODE_BASE_DIR DATA_PATH = os.sep.join([HB_SOURCE_CODE_BASE_DIR, 'data', 'tfbs']) pwm2TFids = safeshelve.open(DATA_PATH + os.sep + 'pwm2TFids.shelf', 'r') pwm2TFnamesNew = safeshelve.open(DATA_PATH + os.sep + 'pwm2TFnamesNew.shelf', 'r') pwmName2id = safeshelve.open(DATA_PATH + os.sep + 'pwmName2id.shelf', 'r') TfId2Class = safeshelve.open(DATA_PATH + os.sep + 'TfId2Class.shelf', 'r') pwmIdToPretty = safeshelve.open(DATA_PATH + os.sep + 'pwm2pretties.shelf', 'r') pwmName2TfClassesFn = DATA_PATH + os.sep + 'pwmName2TfClasses.shelf' pwmName2TfNamesFn = DATA_PATH + os.sep + 'pwmName2TfNames.shelf' pwmName2PrettyNamesFn = DATA_PATH + os.sep + 'pwmName2PrettyNames.shelf' pwmName2TfClasses = safeshelve.open(pwmName2TfClassesFn, 'c') pwmName2TfNames = safeshelve.open(pwmName2TfNamesFn, 'c') pwmName2PrettyNames = safeshelve.open(pwmName2PrettyNamesFn, 'c') tfNames2pwmNamesFn = DATA_PATH + os.sep + 'tfNames2pwmNames.shelf' tfClasses2pwmNamesFn = DATA_PATH + os.sep + 'tfClasses2pwmNames.shelf' prettyNames2pwmNamesFn = DATA_PATH + os.sep + 'prettyNames2pwmNames.shelf' pwnId2Classes = {} for id in pwm2TFids.keys(): for tfid in pwm2TFids[id]: tfClass = TfId2Class.get(tfid) if tfClass is None: