def isInstalled(self): # Caching added to improve reaction time when displaying genome selection box if not self.installed: from quick.application.ProcTrackOptions import ProcTrackOptions self.installed = self.timeOfInstallation is not None and \ ProcTrackOptions.isValidTrack(self.genome, GenomeInfo.getChrTrackName(self.genome), fullAccess=True) and \ ProcTrackOptions.isValidTrack(self.genome, GenomeInfo.getAssemblyGapsTrackName(self.genome), fullAccess=True) if self.installed: self.store() return self.installed
def _compute(self): kwArgs = copy(self._kwArgs) if 'rawStatistic' in kwArgs: del kwArgs['rawStatistic'] matrixElRes = [] tr1Subtypes = ProcTrackOptions.getSubtypes(self.getGenome(), self._track.trackName, True) tr2Subtypes = ProcTrackOptions.getSubtypes(self.getGenome(), self._track2.trackName, True) assert len(tr1Subtypes) > 0, str(self._track.trackName) assert len(tr2Subtypes) > 0, str(self._track2.trackName) if 'minimal' in self._kwArgs: tr1Subtypes = tr1Subtypes[:1] tr2Subtypes = tr2Subtypes[:1] for subtype1 in tr1Subtypes: #['0','1']: #for subtype2 in ['0','1']: for subtype2 in tr2Subtypes: # print ',' tn1 = self._track.trackName + [subtype1] tn2 = self._track2.trackName + [subtype2] if not os.path.exists(createDirPath( tn1, self.getGenome())) or not os.path.exists( createDirPath(tn2, self.getGenome())): raise IncompatibleTracksError #print ',' track1 = Track(tn1) track1.formatConverters = self._track.formatConverters track2 = Track(tn2) track2.formatConverters = self._track2.formatConverters #self._addChild(self._rawStatistic(self._region, track1, track2, **kwArgs) ) matrixElRes.append( self._rawStatistic(self._region, track1, track2, **kwArgs).getResult()) ResultsMemoizer.flushStoredResults() #assert len(self._children) == 7 #return dict(zip( '00,01,10,11'.split(','), [x.getResult() for x in self._children[3:]])) allChildRes = array(matrixElRes) #allChildRes = array([x.getResult() for x in self._children[3:]]) allChildRes = allChildRes.reshape((len(tr1Subtypes), len(tr2Subtypes))) return { 'Result': OrderedDict([('Matrix', allChildRes.tolist()), ('Rows', tr1Subtypes), ('Cols', tr2Subtypes)]) }
def getOptionsBoxTrack2Source(prevChoices): ''' See getOptionsBox1(). ''' if prevChoices[-1] in ['Track','History'] or (prevChoices[-3] == 'Track' and ProcTrackOptions.isValidTrack(prevChoices[0], prevChoices[-2].split(':'), fullAccess=True)) or (prevChoices[-3] == 'History' and prevChoices[-2] != ''): return ['--- select ---','Track','History']
def _compute(self): kwArgs = copy(self._kwArgs) if 'rawStatistic' in kwArgs: del kwArgs['rawStatistic'] matrixElRes = [] tr1Subtypes = ProcTrackOptions.getSubtypes(self.getGenome(), self._track.trackName, True) assert len(tr1Subtypes) > 0 for subtype1 in tr1Subtypes:#['0','1']: for subtype2 in ['0','1']: tn1 = self._track.trackName + [subtype1] tn2 = self._track2.trackName + [subtype2] if not os.path.exists(createDirPath(tn1,self.getGenome())) or not os.path.exists(createDirPath(tn2,self.getGenome())): raise IncompatibleTracksError #print ',' track1 = Track( tn1) track1.formatConverters = self._track.formatConverters track2 = Track( tn2) track2.formatConverters = self._track2.formatConverters #self._addChild(self._rawStatistic(self._region, track1, track2, **kwArgs) ) matrixElRes.append( self._rawStatistic(self._region, track1, track2, **kwArgs).getResult() ) ResultsMemoizer.flushStoredResults() #assert len(self._children) == 7 #return dict(zip( '00,01,10,11'.split(','), [x.getResult() for x in self._children[3:]])) allChildRes = array(matrixElRes) #allChildRes = array([x.getResult() for x in self._children[3:]]) allChildRes = allChildRes.reshape((-1,2)) return OrderedDict([('Matrix', allChildRes.tolist()), ('Rows', tr1Subtypes), ('Cols', ['Case','Control'])])
def _validateGenome(cls, choices, validateBinaryTracksIfPresent=True): from quick.multitrack.MultiTrackCommon import getGSuiteFromGalaxyTN from quick.application.ProcTrackOptions import ProcTrackOptions allGSuiteGalaxyTNs = [ getattr(choices, key) for key in cls.GSUITE_FILE_OPTIONS_BOX_KEYS ] if all(allGSuiteGalaxyTNs): if (not cls._allowGenomeOverride(choices) ) and cls._getNumUniquelySpecifiedGenomes(choices) > 1: return cls.ERROR_GENOME_BUILD_MISMATCH + ', '.join( cls._getGsuiteGenomes(choices)) errorStr = GeneralGuiTool._checkGenome(choices.genome) if errorStr: return errorStr if not cls._allowMultipleGenomes( choices) and choices.genome == GSuiteConstants.MULTIPLE: return cls.ERROR_MULTIPLE_GENOMES_NOT_ALLOWED if validateBinaryTracksIfPresent: for galaxyTN in allGSuiteGalaxyTNs: gSuite = getGSuiteFromGalaxyTN(galaxyTN) if gSuite.fileFormat == GSuiteConstants.PREPROCESSED and gSuite.location == GSuiteConstants.LOCAL: for gSuiteTrack in gSuite.allTracks(): if not ProcTrackOptions.isValidTrack( choices.genome, gSuiteTrack.trackName, True): return cls.ERROR_PREPROCESSED_TRACK_INVALID % gSuiteTrack.title
def execute(cls, choices, galaxyFn=None, username=''): from quick.application.ProcTrackOptions import ProcTrackOptions #from quick.application.ProcTrackOptions import ProcTrackOptions #SHELVE_FN = DATA_FILES_PATH + sep + 'TrackInfo.shelve' #trackInfoShelve = shelve.open(SHELVE_FN, 'c') # #cellType = choices.celltype.split('(')[0].strip() #hg19Keys = [k for k in trackInfoShelve.keys() if k.startswith('hg19:Gene regulation')] #print 'Size of trackInfo shelve(hg19):', len(hg19Keys) # ##code for finding candidate tracks for hg19 for selected celltype #trackCandidateDict = dict() #for tnStr in hg19Keys: # value = trackInfoShelve.get(tnStr).description # if re.search('cell='+cellType+'.*dataType=ChipSeq<.*view=Peaks<', value): # tn = tnStr.split(':') # if ProcTrackOptions.isValidTrack(tn[0], tn[1:], True): # trackCandidateDict[tnStr] = re.sub('[\-\_\s]','', value.split('antibody=')[1].split('<')[0].strip().upper()) genome = choices.genome mutationTrack = choices.track.split(':') expand = choices.expand if choices.datasource == cls.DATA_REPO: dataFn = DATA_FILES_PATH + 'EncodeBasedTfMappings.txt' else: dataFn = ExternalTrackManager.extractFnFromGalaxyTN( choices.history.split(':')) gSuiteDict = cls.convertGTrackSuiteToDict(dataFn) cellTypeList = [k for k, v in choices.celltype.items() if v] motifFn = HB_SOURCE_CODE_BASE_DIR + '/data/all_PWMs.txt' motifFn2 = ExternalTrackManager.extractFnFromGalaxyTN( choices.pwmhistory.split(':')) if choices.pwmhistory else None motifScanObj = MotifScanner(motifFn, fn2=motifFn2) resultDict = dict() for cellType, tfDict in gSuiteDict.items(): if not cellType in cellTypeList: continue multiTfDict = MultiExactlySpecifiedTF() for keyTf, trackPwmList in tfDict.items(): for track, motifId in trackPwmList: if not ProcTrackOptions.isValidTrack( genome, track.split(':'), True): print 'missing or invalid track: ', track continue tfObj = ExactlySpecifiedTF( keyTf, track, motifId, [track.split(':'), mutationTrack], galaxyFn) tfObj.getFastaFiles(genome) tfObj.getPwmScores(motifId, motifScanObj) multiTfDict[tfObj.tf + '_' + tfObj.chipSeqPeaks + '_' + motifId] = tfObj resultDict[cellType] = multiTfDict for cType, mDict in resultDict.items(): print mDict.getHtmlResultsTable()
def getOptionsBox15(prevChoices): if prevChoices[-2] == 'Select single reference track': assert prevChoices[4] == 'DHS' genome = prevChoices[0] tn = 'Private:GK:Psych:DHSs'.split(':') return ProcTrackOptions.getSubtypes(genome, tn, True) elif prevChoices[-2] == 'Select a range among all reference tracks': return ''
def getOptionsBoxTfTracks(cls, prevChoices): if prevChoices.sourceTfsDetails != cls.SELECT: genome = prevChoices.genome sourceTfs = prevChoices.sourceTfs sourceTfsDetails = prevChoices.sourceTfsDetails if sourceTfs == cls.SELECT: return elif sourceTfs == 'Hyperbrowser repository': tfSourceTN = TfTrackNameMappings.getTfTrackNameMappings( prevChoices.genome)[sourceTfsDetails] subtypes = ProcTrackOptions.getSubtypes( prevChoices.genome, tfSourceTN, True) falses = ['False'] * len(subtypes) return OrderedDict(zip(subtypes, falses)) elif sourceTfs == cls.REGIONS_FROM_HISTORY: if isinstance(sourceTfsDetails, basestring): galaxyTN = sourceTfsDetails.split(':') if galaxyTN[ 1] == "gsuite": #ExternalTrackManager.extractFileSuffixFromGalaxyTN(prevChoices.sourceTfsDetails, allowUnsupportedSuffixes=True) == "gsuite" errorString = GeneralGuiTool._checkGSuiteFile( sourceTfsDetails) if not errorString: gSuite = getGSuiteFromGalaxyTN(sourceTfsDetails) sizeErrorString = GeneralGuiTool._checkGSuiteTrackListSize( gSuite, 1, 1000) if not sizeErrorString: reqErrorString = GeneralGuiTool._checkGSuiteRequirements \ (gSuite, AllTfsOfRegions.GSUITE_ALLOWED_FILE_FORMATS, AllTfsOfRegions.GSUITE_ALLOWED_LOCATIONS, AllTfsOfRegions.GSUITE_ALLOWED_TRACK_TYPES, AllTfsOfRegions.GSUITE_DISALLOWED_GENOMES) if not reqErrorString: validity = 'Valid' else: return else: return else: return if validity == 'Valid': selectedTrackNames = [] gSuite = getGSuiteFromGalaxyTN(sourceTfsDetails) for track in gSuite.allTracks(): selectedTrackNames.append(':'.join( track.trackName)) falses = ['False'] * len(selectedTrackNames) return OrderedDict(zip(selectedTrackNames, falses)) else: tfTrackName = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN( genome, galaxyTN) return [':'.join(tfTrackName)] else: return else: return else: return
def superLine2batch(cls, line, genome): if line.strip() == '' or line[0] == '#': return [] cols = line.split(BATCH_COL_SEPARATOR) if len(cols) == 4: cols = cols[:3] + ['dummy', cols[3]] line = BATCH_COL_SEPARATOR.join(cols) from quick.batch.BatchRunner import BatchRunner #errorResult, userBinSource = BatchRunner._constructBins(cols[0], cols[1], genome) #if errorResult is not None: # return [BATCH_COL_SEPARATOR.join([ 'dummy', cols[0], cols[1], 'dummyTN1', 'dummyTN2', 'dummyStatName' ])] for colIndex, col in zip(range( 2, 5), cols[2:5]): #not binSpec, but tn1, tn2, statistic.. if '/' in col: splitPoint = col.find('/') splittedCols = col[0:splitPoint], col[splitPoint + 1:] splittedListLines = [ cols[0:colIndex] +[splitCol]+ cols[colIndex+1:]\ for splitCol in splittedCols] #1+'' return reduce(lambda x, y: x + y, [ cls.superLine2batch(BATCH_COL_SEPARATOR.join(line), genome) for line in splittedListLines ]) for colIndex, col in zip(range(2, 4), cols[2:4]): if '*' in col: typeParts = col.split(':') assert typeParts[-1] == '*' and not any( '*' in part for part in typeParts[:-1]) #whitespacedTrackName = [x.replace('_',' ') for x in typeParts[:-1]] unquotedTrackName = [unquote(x) for x in typeParts[:-1]] #print 'TEMP: ',genome, unquotedTrackName starOptions = ProcTrackOptions.getSubtypes( genome, unquotedTrackName, True) if len(starOptions) == 0: raise Exception('No subtracks for parent track: %s' % str(unquotedTrackName)) #typeOptions = [':'.join(typeParts[:-1] + [starOpt.replace(' ','_')]) for starOpt in starOptions] typeOptions = [ ':'.join(typeParts[:-1] + [quote(starOpt)]) for starOpt in starOptions ] splittedCols = typeOptions splittedListLines = [ cols[0:colIndex] +[splitCol]+ cols[colIndex+1:]\ for splitCol in splittedCols] return reduce(lambda x, y: x + y, [ cls.superLine2batch(BATCH_COL_SEPARATOR.join(line), genome) for line in splittedListLines ]) return [cls.generateBatchName(line) + BATCH_COL_SEPARATOR + line]
def yielder(self, curTn): if self._avoidLiterature and curTn == GenomeInfo.getPropertyTrackName(self._genome, 'literature'): return for subtype in ProcTrackOptions.getSubtypes(self._genome, curTn, self._fullAccess): #if self._avoidLiterature and subtype == 'Literature': if subtype[0] in ['.','_']: continue newTn = curTn + [subtype] doBreak = False for subTn in self.yielder(newTn): yield subTn if ProcTrackOptions.isValidTrack(self._genome, curTn, self._fullAccess): yield curTn
def getOptionsBoxDataset(prevChoices): #if prevChoices.genome == 'hg19': if prevChoices.genome in ['hg19', 'mm9']: parentTrack = ColocalizationIn3DTool.PARENT_TRACKNAME + [ prevChoices.interactions ] + [prevChoices.cellLine] return ProcTrackOptions.getSubtypes(prevChoices.genome, parentTrack, fullAccess=False)
def _getAllDiseases(prevChoices): if isinstance(prevChoices[2], dict): return prevChoices[2].keys() else: tn = SelectDiseaseTool._getDiseaseTn(prevChoices) return [ x for x in ProcTrackOptions.getSubtypes( SelectDiseaseTool.GENOME, tn) ]
def _getAllPwms(prevChoices): if isinstance(prevChoices[2], dict): return prevChoices[2].keys() else: tfTrackName = SelectTfTool.TRACK_DICT[prevChoices[0]] return [ pwm for pwm in ProcTrackOptions.getSubtypes( SelectTfTool.GENOME, tfTrackName) ]
def getOptionsBox3(prevChoices): seedSource = prevChoices[1] if seedSource == 'TFBS from history': return ('__history__', 'bed') else: genome = prevChoices[0] tfSourceTN = TfInfo.getTfTrackNameMappings(genome)[prevChoices[1]] subtypes = ProcTrackOptions.getSubtypes(genome, tfSourceTN, True) return subtypes
def getOptionsBox3(prevChoices): seedSource = prevChoices[1] if seedSource == 'TFBS from history': return ('__history__','bed') else: genome = prevChoices[0] tfSourceTN = TfInfo.getTfTrackNameMappings(genome)[ prevChoices[1] ] subtypes = ProcTrackOptions.getSubtypes(genome, tfSourceTN, True) return subtypes
def _isValidTrack(prevChoices, tnChoiceIndex=1): from quick.application.GalaxyInterface import GalaxyInterface from quick.application.ProcTrackOptions import ProcTrackOptions genome = prevChoices[0] tn = prevChoices[tnChoiceIndex].split(':') return ProcTrackOptions.isValidTrack(genome, tn, True) or \ GalaxyInterface.isNmerTrackName(genome, tn)
def getOptionsBox6(cls, prevChoices): if prevChoices[4] == cls.REGIONS_FROM_HISTORY: return ('__history__','bed','bedgraph') else: tfSourceTN = TfInfo.getTfTrackNameMappings(prevChoices[1])[ prevChoices[4] ] genome = prevChoices[1] subtypes = ProcTrackOptions.getSubtypes(genome, tfSourceTN, True) #logMessage(str(subtypes)) #return ['V$AHR_01'] return subtypes
def getOptionsBoxTrack6Source(prevChoices): ''' See getOptionsBox1(). ''' if prevChoices[-1] in [ 'Track', 'History' ] or (prevChoices[-3] == 'Track' and ProcTrackOptions.isValidTrack( prevChoices[0], prevChoices[-2].split(':'), fullAccess=True)) or (prevChoices[-3] == 'History' and prevChoices[-2] != ''): return ['--- select ---', 'Track', 'History']
def getOptionsBox6(cls, prevChoices): if prevChoices[4] == cls.REGIONS_FROM_HISTORY: return ('__history__', 'bed', 'bedgraph') else: tfSourceTN = TfInfo.getTfTrackNameMappings( prevChoices[1])[prevChoices[4]] genome = prevChoices[1] subtypes = ProcTrackOptions.getSubtypes(genome, tfSourceTN, True) #logMessage(str(subtypes)) #return ['V$AHR_01'] return subtypes
def validateAndReturnErrors(choices): genome, errorStr = CreateSegmentsFromGeneListTool._getGenomeChoice(choices, 0) if errorStr: return errorStr ensemblTn = GenomeInfo.getEnsemblTrackName(genome) if not ProcTrackOptions.isValidTrack(genome, ensemblTn): return 'The selected genome have not been set up with a Ensembl gene track. If you require this functionality for the selected genome, please contact the HyperBrowser team.' if choices[1].strip() == '': return 'Please enter a list of genes (using Ensembl IDs)'
def _requiredTracksAreValid(self): from quick.application.ProcTrackOptions import ProcTrackOptions requiredTrackNames = [ self.getSequenceTrackName(self.genome), self.getChrTrackName(self.genome), self.getAssemblyGapsTrackName(self.genome) ] return all( ProcTrackOptions.isValidTrack(self.genome, tn, fullAccess=True) for tn in requiredTrackNames)
def _calcAndStoreSubTrackCount(self, trackName): ti = TrackInfo(self._genome, trackName) trackCount = 0 for subTrackName in ProcTrackOptions.getSubtypes(self._genome, trackName, True): subTrackCount = TrackInfo(self._genome, trackName + [subTrackName]).subTrackCount if subTrackCount: trackCount += subTrackCount if ti.isValid(): trackCount += 1 ti.subTrackCount = trackCount ti.store()
def getOptionsBoxTrack2(cls, prevChoices): if prevChoices[4]=='yes' : genome = prevChoices[1] prefixTN = cls.STD_PREFIX_TN trackList = ProcTrackOptions.getSubtypes(genome, prefixTN, True) if cls._cacheDict.get('track2') in trackList: trackList.remove(cls._cacheDict.get('track2')) trackList.insert(0, cls._cacheDict.get('track2')) return [cls.NO_TRACK_SHORTNAME] + trackList else: return '__track__','history'
def getOptionsBoxTrack2(cls, prevChoices): if prevChoices[4] == 'yes': genome = prevChoices[1] prefixTN = cls.STD_PREFIX_TN trackList = ProcTrackOptions.getSubtypes(genome, prefixTN, True) if cls._cacheDict.get('track2') in trackList: trackList.remove(cls._cacheDict.get('track2')) trackList.insert(0, cls._cacheDict.get('track2')) return [cls.NO_TRACK_SHORTNAME] + trackList else: return '__track__', 'history'
def yielder(self, curTn, level=0): if self._avoidLiterature and curTn == GenomeInfo.getPropertyTrackName( self._genome, 'literature'): return for subtype in ProcTrackOptions.getSubtypes(self._genome, curTn, self._fullAccess): #if self._avoidLiterature and subtype == 'Literature': if subtype[0] in ['.', '_']: continue newTn = curTn + [subtype] doBreak = False for subTn in self.yielder(newTn, level=level + 1): yield subTn if self._includeParentTrack or level > 0: if ProcTrackOptions.isValidTrack(self._genome, curTn, self._fullAccess): yield curTn
def _calcAndStoreSubTrackCount(self, trackName): ti = TrackInfo(self._genome, trackName) trackCount = 0 for subTrackName in ProcTrackOptions.getSubtypes( self._genome, trackName, True): subTrackCount = TrackInfo(self._genome, trackName + [subTrackName]).subTrackCount if subTrackCount: trackCount += subTrackCount if ti.isValid(): trackCount += 1 ti.subTrackCount = trackCount ti.store()
def _inferTrackName(rawTN, genome, fullAccess): #genome = DEFAULT_GENOME if rawTN.lower() in ['blank','none','dummy','_',' ','']: return None #trackName = rawTN.replace('_',' ').split(':') #trackName = rawTN.split(':') trackName = convertTNstrToTNListFormat(rawTN) if ProcTrackOptions.isValidTrack(genome, trackName, fullAccess): return trackName else: raise InvalidRunSpecException('Error in trackname specification. \''\ + rawTN + '\' does not match any tracknames. This may be because of limited user permissions.')
def getOptionsBoxTfTracks(cls, prevChoices): if prevChoices.sourceTfs: if prevChoices.sourceTfs == cls.REGIONS_FROM_HISTORY: return ('__history__', 'bed', 'category.bed', 'gtrack') elif prevChoices.sourceTfs == cls.SELECT: return else: tfSourceTN = TfTrackNameMappings.getTfTrackNameMappings( prevChoices.genome)[prevChoices.sourceTfs] subtypes = ProcTrackOptions.getSubtypes( prevChoices.genome, tfSourceTN, True) return subtypes else: return
def getOptionsBox6(cls, prevChoices): #if prevChoices[0]: # cls.updateCacheDict(prevChoices[0]) if prevChoices[4] == 'yes': genome = prevChoices[3] prefixTN = cls.STD_PREFIX_TN trackList = ProcTrackOptions.getSubtypes(genome, prefixTN, True) #if cls._cacheDict.get('track1') in trackList: # trackList.remove(cls._cacheDict.get('track1')) # trackList.insert(0, cls._cacheDict.get('track1')) return trackList else: return '__track__', 'history'
def getOptionsBox6(cls, prevChoices): #if prevChoices[0]: # cls.updateCacheDict(prevChoices[0]) if prevChoices[4] =='yes' : genome = prevChoices[3] prefixTN = cls.STD_PREFIX_TN trackList = ProcTrackOptions.getSubtypes(genome, prefixTN, True) #if cls._cacheDict.get('track1') in trackList: # trackList.remove(cls._cacheDict.get('track1')) # trackList.insert(0, cls._cacheDict.get('track1')) return trackList else: return '__track__','history'
def getAllChosenTracks(choices): ''' See getOptionsBox1(). ''' allTracks = [] trackParamNames = [('Track1Source', 'Track1'), ('Track2Source', 'Track2'),('Track3Source','Track3'),\ ('Track4Source', 'Track4'),('Track5Source', 'Track5'),('Track6Source','Track6')] #print 'TEMP: ', choices, trackParamNames for source, trackName in [(getattr(choices, s), getattr(choices,t).split(':')) for s,t in trackParamNames if getattr(choices,t) not in [None,''] ]: if source == 'History': allTracks.append(trackName) elif source == 'Track' and ProcTrackOptions.isValidTrack(choices.Genome, trackName, fullAccess=True): allTracks.append(trackName) return allTracks
def _inferTrackName(trackName, genome, fullAccess): if len(trackName) == 0 or \ len(trackName) == 1 and trackName[0].lower() in ['blank', 'none', 'dummy', '_', ' ', '']: return None # trackName = rawTN.replace('_',' ').split(':') # trackName = rawTN.split(':') # # trackName = convertTNstrToTNListFormat(rawTN) if ProcTrackOptions.isValidTrack(genome, trackName, fullAccess): return trackName else: raise InvalidRunSpecException('Error in trackname specification. \'' +\ ':'.join(trackName) + '\' does not match any tracknames. ' +\ 'This may be because of limited user permissions.')
def _isValidTrack(choices, tnChoiceIndex=1, genomeChoiceIndex=0): from quick.application.GalaxyInterface import GalaxyInterface from quick.application.ProcTrackOptions import ProcTrackOptions genome, errorStr = GeneralGuiTool._getGenomeChoice( choices, genomeChoiceIndex) if errorStr or genome is None: return False trackName, errorStr = GeneralGuiTool._getTrackChoice( choices, tnChoiceIndex) if errorStr: return False return ProcTrackOptions.isValidTrack(genome, trackName, True) or \ GalaxyInterface.isNmerTrackName(genome, trackName)
def validateTracks(choices): ''' See getOptionsBox1(). ''' allTracks = [] trackParamNames = [('Track1Source', 'Track1'), ('Track2Source', 'Track2'),('Track3Source','Track3'),\ ('Track4Source', 'Track4'),('Track5Source', 'Track5'),('Track6Source','Track6')] count = 0 for trackNumber, source, trackName in [(t, getattr(choices, s), getattr(choices,t).split(':')) for s,t in trackParamNames if type(getattr(choices,t))==str]: count+=1 if source == 'Tracks': if not ProcTrackOptions.isValidTrack(choices.Genome, trackName): return 'Invalid track: the path for %s is not correctly specified' % trackNumber if count==0: return 'No valid tracks chosen'
def getOptionsBoxInteractions( prevChoices): # Alternatively: getOptionsBoxKey2() ''' See getOptionsBox1(). prevChoices is a namedtuple of selections made by the user in the previous input boxes (that is, a namedtuple containing only one element in this case). The elements can accessed either by index, e.g. prevChoices[0] for the result of input box 1, or by key, e.g. prevChoices.key (case 2). ''' #if prevChoices.genome == 'hg19': if prevChoices.genome in ['hg19', 'mm9']: parentTrack = ColocalizationIn3DTool.PARENT_TRACKNAME return ProcTrackOptions.getSubtypes(prevChoices.genome, parentTrack, fullAccess=False)
def superLine2batch(cls, line, genome): if line.strip() == '' or line[0]=='#': return [] cols = line.split(BATCH_COL_SEPARATOR) if len(cols) == 4: cols = cols[:3] + ['dummy', cols[3]] line = BATCH_COL_SEPARATOR.join(cols) from quick.batch.BatchRunner import BatchRunner #errorResult, userBinSource = BatchRunner._constructBins(cols[0], cols[1], genome) #if errorResult is not None: # return [BATCH_COL_SEPARATOR.join([ 'dummy', cols[0], cols[1], 'dummyTN1', 'dummyTN2', 'dummyStatName' ])] for colIndex, col in zip(range(2,5), cols[2:5]): if '/' in col: splitPoint = col.find('/') splittedCols = col[0:splitPoint], col[splitPoint+1:] splittedListLines = [ cols[0:colIndex] +[splitCol]+ cols[colIndex+1:]\ for splitCol in splittedCols] #1+'' return reduce(lambda x,y:x+y, [cls.superLine2batch(BATCH_COL_SEPARATOR.join(line), genome) for line in splittedListLines]) for colIndex, col in zip(range(2,4), cols[2:4]): if '*' in col: typeParts = col.split(':') assert typeParts[-1]=='*' and not any('*' in part for part in typeParts[:-1]) #whitespacedTrackName = [x.replace('_',' ') for x in typeParts[:-1]] unquotedTrackName = [unquote(x) for x in typeParts[:-1]] print 'TEMP: ',genome, unquotedTrackName starOptions = ProcTrackOptions.getSubtypes(genome, unquotedTrackName, True) if len(starOptions) == 0: raise Exception('No subtracks for parent track: %s' % str(unquotedTrackName)) #typeOptions = [':'.join(typeParts[:-1] + [starOpt.replace(' ','_')]) for starOpt in starOptions] typeOptions = [':'.join(typeParts[:-1] + [starOpt]) for starOpt in starOptions] splittedCols = typeOptions splittedListLines = [ cols[0:colIndex] +[splitCol]+ cols[colIndex+1:]\ for splitCol in splittedCols] return reduce(lambda x,y:x+y, [cls.superLine2batch(BATCH_COL_SEPARATOR.join(line), genome) for line in splittedListLines]) return [cls.generateBatchName(line)+BATCH_COL_SEPARATOR+line]
def getTrackNamesFromFormParameters(cls, choices): if choices[0] == 'From repository': genome = choices[2] trackNames = [ unquote(val).split(':') for val in choices[4].values() if val ] trackNames += [ v.split(':') for v in choices[5:] if v and ProcTrackOptions.isValidTrack(genome, v.split(':'), True) ] return genome, trackNames else: #if 'From GSuite' gSuite = getGSuiteFromGalaxyTN(choices[1]) tracks = [ gSuiteTrack.trackName for gSuiteTrack in gSuite.allTracks() ] genome = gSuite.genome return genome, tracks
def validateAndReturnErrors(choices): ''' Should validate the selected input parameters. If the parameters are not valid, an error text explaining the problem should be returned. The GUI then shows this text to the user (if not empty) and greys out the execute button (also if the text isempty). If all parameters are valid, the method should return None, which enables the execute button. ''' genome, tn, tf = ExtractIntersectingGenesTool._getBasicTrackFormat(choices) geneRegsTrackName = GenomeInfo.getStdGeneRegsTn(genome) if not ExtractIntersectingGenesTool._isValidTrack(choices): return "" return "The selected track (%s) is not valid." % ':'.join(tn) if tf.split()[-1] not in ['points', 'segments']: return "The track format of the selected track must be either points or segments. Currently: %s" % tf if not ProcTrackOptions.isValidTrack(genome, geneRegsTrackName, True): return "The track used for gene ids (%s) is not valid. This is an internal error." % ':'.join(geneRegsTrackName)
def validateTracks(choices): ''' See getOptionsBox1(). ''' allTracks = [] trackParamNames = [('Track1Source', 'Track1'), ('Track2Source', 'Track2'),('Track3Source','Track3'),\ ('Track4Source', 'Track4'),('Track5Source', 'Track5'),('Track6Source','Track6')] count = 0 for trackNumber, source, trackName in \ [(t, getattr(choices, s), getattr(choices, t).split(':')) for s,t in trackParamNames if isinstance(getattr(choices, t), basestring)]: count += 1 if source == 'Tracks': if not ProcTrackOptions.isValidTrack(choices.Genome, trackName): return 'Invalid track: the path for %s is not correctly specified' % trackNumber if count == 0: return 'No valid tracks chosen'
def getAllChosenTracks(choices): ''' See getOptionsBox1(). ''' allTracks = [] trackParamNames = [('Track1Source', 'Track1'), ('Track2Source', 'Track2'),('Track3Source','Track3'),\ ('Track4Source', 'Track4'),('Track5Source', 'Track5'),('Track6Source','Track6')] #print 'TEMP: ', choices, trackParamNames for source, trackName in [(getattr(choices, s), getattr(choices, t).split(':')) for s, t in trackParamNames if getattr(choices, t) not in [None, '']]: if source == 'History': allTracks.append(trackName) elif source == 'Track' and ProcTrackOptions.isValidTrack( choices.Genome, trackName, fullAccess=True): allTracks.append(trackName) return allTracks
def execute(cls, choices, galaxyFn=None, username=''): from quick.application.ProcTrackOptions import ProcTrackOptions from quick.application.GalaxyInterface import GalaxyInterface import gold.application.StatRunner analysisDef = 'dummy -> PercentageChangeStat' genome = choices[0] binSpec = '*' regSpec = 'Days_1900_2036:36890-41424' tnRoot = 'Company stocks:Historical prices:OSE:' stockList = [k for k, v in choices[1].items() if v] numStocks = 0 totalPercent = 0.0 for stock in stockList: tn = tnRoot + stock if ProcTrackOptions.isValidTrack(genome, tn.split(':'), fullAccess=True): resultDict = GalaxyInterface.runManual( [tn.split(':'), choices[2].split(':')], analysisDef, regSpec, binSpec, 'days', galaxyFn, printResults=False, printProgress=False) for k, v in resultDict.items(): print 'increase (from jan. 2001 - jun. 2013) for ', stock, ': ', v res = v['Result'] if res == 0.0 or res > 10000: continue totalPercent += v['Result'] numStocks += 1 else: print 'this is not a valid track', tn print 'Average increase (from jan. 2001 - jun. 2013): ', totalPercent / numStocks, ' (number of stocks =', numStocks, ')'
def validateAndReturnErrors(choices): ''' Should validate the selected input parameters. If the parameters are not valid, an error text explaining the problem should be returned. The GUI then shows this text to the user (if not empty) and greys out the execute button (also if the text isempty). If all parameters are valid, the method should return None, which enables the execute button. ''' trackChoice = 'history' if choices.trackSource == 'history' else 'track' errorStr = ExtractIntersectingGenesTool._checkTrack( choices, trackChoice, 'genome') if errorStr: return errorStr genome, tn, tf = ExtractIntersectingGenesTool._getBasicTrackFormat( choices, trackChoice) if tf.split()[-1] not in ['points', 'segments']: return "The track format of the selected track must be either points or segments. Currently: %s" % tf geneRegsTrackName = GenomeInfo.getStdGeneRegsTn(genome) if not ProcTrackOptions.isValidTrack(genome, geneRegsTrackName, True): return "The track used for gene ids (%s) is not valid. This is an internal error." % ':'.join( geneRegsTrackName)
def _getAllPwms(prevChoices): if isinstance(prevChoices[2], dict): return prevChoices[2].keys() else: tfTrackName = SelectTfTool.TRACK_DICT[prevChoices[0]] return [pwm for pwm in ProcTrackOptions.getSubtypes(SelectTfTool.GENOME, tfTrackName)]
print 'FAILED: Error when preprocessing TestGenome. Error:' print ' ' + str(e).strip() sys.exit(1) for allowOverlaps in [False, True]: fromDir = createDirPath(['GESourceTracks'], 'TestGenome', allowOverlaps=allowOverlaps) toDir = createDirPath([], 'ModelsForExternalTracks', allowOverlaps=allowOverlaps) try: if not os.path.exists(toDir): shutil.copytree(fromDir, toDir) print 'OK: Copied from %s to %s.' % (fromDir, toDir) except Exception, e: print 'FAILED: Error occurred copying from %s to %s: ' % (fromDir, toDir) + str(e).strip() sys.exit(1) for track in ProcTrackOptions.getSubtypes('TestGenome', ['GESourceTracks']): ti = TrackInfo('TestGenome', ['GESourceTracks', track]) ti.trackName = [track] ti.genome = 'ModelsForExternalTracks' ti.store() from quick.util.GenomeInfo import GenomeInfo from datetime import datetime gi = GenomeInfo('TestGenome') gi.fullName = 'TestGenome' gi.sourceUrls = ['http://hgdownload.cse.ucsc.edu/goldenPath/hg18/chromosomes/chr21.fa.gz', \ 'http://hgdownload.cse.ucsc.edu/goldenPath/hg18/chromosomes/chrM.fa.gz'] gi.sourceChrNames = ['chr21', 'chrM'] gi.installedBy = 'Setup.py' gi.genomeBuildSource = 'NCBI' gi.genomeBuildName = 'hg18'
def _getAllDiseases(prevChoices): if isinstance(prevChoices[2], dict): return prevChoices[2].keys() else: tn = SelectDiseaseTool._getDiseaseTn(prevChoices) return [x for x in ProcTrackOptions.getSubtypes(SelectDiseaseTool.GENOME, tn)]
def execute(choices, galaxyFn=None, username=''): #setupDebugModeAndLogging() from time import time startTime = time() print HtmlCore().begin() print '<pre>' genome = choices[0] #assert genome=='hg19' flankSize = choices[3] if choices[1] == 'Prepared catalogues': if choices[2] == 'GiulioNewGwas': gwasTnBase = 'Private:GK:NewGwasBase'.split(':') elif choices[2] == 'GiulioAllGwas': gwasTnBase = 'Private:GK:AllGiulioGwasSnpsAsOf9feb13'.split( ':') elif choices[2] == 'GiulioMay13Gwas': gwasTnBase = 'Private:GK:Gwas:GiulioMay13'.split(':') elif choices[2] == 'SmallTest': gwasTnBase = 'Private:GK:Gwas'.split(':') else: raise gwasTnBase += [flankSize] elif choices[1] == 'Custom track': gwasTnBase = choices[2].split(':') assert flankSize == 'SNPs' else: assert False, choices[1] referenceTrackSource = choices[4] normalization = choices[5] assert normalization == 'CoverageDepth' analysisType = choices[6] if analysisType == 'Enrichment': ResultClass = EnrichmentGwasResults elif analysisType == 'Testing': ResultClass = HypothesisTestingGwasResults nullmodelMapping = dict( zip([ 'Sample disease regions uniformly', 'Sample disease regions with preserved inter-region spacings', 'Sample disease regions with preserved distance to nearest exon' ], [ 'PermutedSegsAndSampledIntersegsTrack_', 'PermutedSegsAndIntersegsTrack_', 'SegsSampledByDistanceToReferenceTrack_,trackNameIntensity=Genes and gene subsets^Exons^Ensembl exons' ])) nullmodel = nullmodelMapping[choices[9]] assert nullmodel in [ 'PermutedSegsAndSampledIntersegsTrack_', 'PermutedSegsAndIntersegsTrack_', 'SegsSampledByDistanceToReferenceTrack_,trackNameIntensity=Genes and gene subsets^Exons^Ensembl exons' ] else: raise kernelType = choices[7] kernelParam = choices[8] if choices[10] == 'Include links to full underlying results': includeDetailedResults = True elif choices[10] == 'Only produce main result values': includeDetailedResults = False else: raise InvalidRunSpecException('Did not understand option: %s' % choices[12]) mcDepth = choices[11] if choices[12] == 'yes': includeLocalResults = True elif choices[12] == 'no': includeLocalResults = False else: raise InvalidRunSpecException('Did not understand option: %s' % choices[12]) if choices[15] == 'yes': useCache = True elif choices[15] == 'no': useCache = False else: raise InvalidRunSpecException('Did not understand option: %s' % choices[15]) if choices[16] == 'yes': printProgress = True elif choices[16] == 'no': printProgress = False else: raise InvalidRunSpecException('Did not understand option: %s' % choices[16]) from quick.application.GalaxyInterface import GalaxyInterface #print GalaxyInterface.getHtmlForToggles() #print GalaxyInterface.getHtmlBeginForRuns() #from quick.webtools.GwasAPI import getEnrichmentValues print 'Progress: ' #print 'base: ',gwasTnBase #print 'leaves: ',GalaxyInterface.getSubTrackNames(genome, gwasTnBase,deep=False, username=username) disRes = MultiGwasResults() from gold.application.HyperBrowserCLI import getSubTrackLeafTerms from quick.application.ProcTrackOptions import ProcTrackOptions #for gwasTrackLeaf in GalaxyInterface.getSubTrackNames(genome, gwasTnBase,deep=False, username=username): allDiseases = getSubTrackLeafTerms(genome, gwasTnBase, username=username) if len(allDiseases) == 0: assert ProcTrackOptions.isValidTrack( genome, gwasTnBase, GalaxyInterface.userHasFullAccess( username)), 'Genome: %s, TN: %s, Access: %s' % ( genome, gwasTnBase, GalaxyInterface.userHasFullAccess(username)) allDiseases = gwasTnBase[-1:] gwasTnBase = gwasTnBase[:-1] for disease in allDiseases: #print 'Leaf:',gwasTrackLeaf[0] #if not gwasTrackLeaf[0] in ['11 - Height.txt']: #if not disease in ['1 - Alzheimer.txt','10 - Graves.txt']:#['Malaria','UC']: # print 'IGNORING: ', gwasTrackLeaf[0] # continue #if gwasTrackLeaf in [[],None] or gwasTrackLeaf[0]=='-- All subtypes --': #continue #gwasTn = ':'.join(gwasTnBase + [gwasTrackLeaf[0]]) gwasTn = ':'.join(gwasTnBase + [disease]) #print 'Running API: ', "$getEnrichmentValues(%s, '%s', '%s')" % ([gwasTn], referenceTrackSource, normalization) #enrichmentsDict = getEnrichmentValues([gwasTn], referenceTrackSource, normalization)#, ['114 - Brain_Mid_Frontal_Lobe.txt','134 - Rectal_Smooth_Muscle.txt']) #assert len(enrichmentsDict.values())==1 #enrichments = enrichmentsDict.values()[0] #if gwasTrackLeaf[0] in ['Malaria','UC']: #print 'HERE IS WHAT I GOT: ',enrichmentsDict #print 'ENR: ',enrichments #print 'One: ', (enrichments.values()[0])['enrichment']['13 - CD4'].getGlobalResult() #assert 'enrichment' in (enrichments.values()[0]), (enrichments.values()[0]) #disRes[gwasTrackLeaf[0]] = (enrichments.values()[0])['enrichment'] #disRes[gwasTrackLeaf[0]] = (enrichments.values()[0]) #disease = gwasTrackLeaf[0] #disRes[disease] = [x.getGlobalResult() for x in enrichments] #print 'DISres: ', disRes[gwasTrackLeaf[0]] #from quick.util.CommonFunctions import extractIdFromGalaxyFn res = ResultClass(gwasId=disease, verbose=True, galaxyFn=galaxyFn) #referenceSubTypes = enrichments.keys() #referenceSubTypes = [x[0] for x in GalaxyInterface.getSubTrackNames(genome, 'Private:GK:Psych:DHSs'.split(':'), deep=False, username=username) if not x[0] == '-- All subtypes --'] if referenceTrackSource == 'H3K4me3': refTrackBase = 'Private:GK:Psych:H3K4me3' refTrackCoverageFunction = 'Private^GK^Psych^H3K4me3CoverageTrack' elif referenceTrackSource == 'DHS': refTrackBase = 'Private:GK:Psych:DHSs' refTrackCoverageFunction = 'Private^GK^Psych^DHSCoverageTrack' elif referenceTrackSource == 'Chromatin state 1-AP': refTrackBase = 'Private:Anders:Chromatin State Segmentation:1_Active_Promoter' refTrackCoverageFunction = 'Private^GWAS^Chromatin^CoverageFunctionTracks^1_Active_PromoterV2' elif referenceTrackSource == 'Chromatin state 4-SE': refTrackBase = 'Private:Anders:Chromatin State Segmentation:4_Strong_Enhancer' refTrackCoverageFunction = 'Private^GWAS^Chromatin^CoverageFunctionTracks^4_Strong_Enhancer' elif referenceTrackSource == 'Chromatin state 5-SE': refTrackBase = 'Private:Anders:Chromatin State Segmentation:5_Strong_Enhancer' refTrackCoverageFunction = 'Private^GWAS^Chromatin^CoverageFunctionTracks^5_Strong_Enhancer' else: raise refTrackSelectType = choices[13] allReferenceTracks = [ x[0] for x in GalaxyInterface.getSubTrackNames( genome, refTrackBase.split(':'), deep=False, username=username) if not x[0] == '-- All subtypes --' ] if refTrackSelectType == 'Use all reference tracks': referenceSubTypes = allReferenceTracks elif refTrackSelectType == 'Select single reference track': referenceSubTypes = [choices[14]] assert referenceSubTypes[0] in allReferenceTracks elif refTrackSelectType == 'Select a range among all reference tracks': try: firstRefTrack, lastRefTrack = choices[14].split('-') referenceSubTypes = allReferenceTracks[ int(firstRefTrack):int(lastRefTrack) + 1] print 'Analyzing %s among a total of %s reference tracks' % ( choices[14], len(allReferenceTracks)) except Exception: print 'Range format should be e.g. "15-18".' raise else: raise for referenceSubType in referenceSubTypes: #if not referenceSubType in ['107 - Adult_Kidney.txt','106 - Adipose_Nuclei.txt']: # #print 'IGNORING: ',referenceSubType # continue # if analysisType == 'Enrichment': res[referenceSubType] = directGetEnrichment( gwasTn, referenceSubType, refTrackBase, kernelType, kernelParam, useCache, printProgress) elif analysisType == 'Testing': res[referenceSubType] = directGetTestResults( gwasTn, referenceSubType, refTrackBase, kernelType, kernelParam, refTrackCoverageFunction, nullmodel, mcDepth, useCache, printProgress) else: raise #print disease, referenceSubType, res[referenceSubType] #print "ENR: ",enrichments #res[referenceSubType] = enrichments[referenceSubType] disRes[disease] = res #for disease in disRes: # print 'D FULL %s:' %disease, disRes[disease] # print 'D DICTS %s:'%disease, disRes[disease].getAllGlobalResultDicts() # print 'DISEASE %s:'%disease, disRes[disease].getAllGlobalResults() print 'Total run time (excluding figure generation): %i seconds.' % ( time() - startTime) print '</pre>' #print GalaxyInterface.getHtmlBeginForRuns() print '<h1>Results</h1>' if len(allDiseases) > 1: try: heatMapLink = disRes.getLinkToClusteredHeatmap( 'Heatmap', galaxyFn) print '<h3>Heatmap</h3>', heatMapLink #, '<br>' except: print '<p>Creation of heatmap failed</p>' tableOutput = disRes.getHtmlResultsTable(includeDetailedResults) print '<h3>Results table</h3>', tableOutput if choices[-1]: print '<h3>Prior coloring table</h3>' colorFn = ExternalTrackManager.extractFnFromGalaxyTN( choices[-1].split(':')) print disRes.getColoredSortedReferencesTable(colorFn) if includeLocalResults: print '<h3>Local results</h3>' print disRes.getLinksToAllLocalHtmlResultsTables(galaxyFn)
fromDir = createDirPath(['GESourceTracks'], 'TestGenome', allowOverlaps=allowOverlaps) toDir = createDirPath([], 'ModelsForExternalTracks', allowOverlaps=allowOverlaps) try: if not os.path.exists(toDir): shutil.copytree(fromDir, toDir) print 'OK: Copied from %s to %s.' % (fromDir, toDir) except Exception, e: print 'FAILED: Error occurred copying from %s to %s: ' % ( fromDir, toDir) + str(e).strip() sys.exit(1) for track in ProcTrackOptions.getSubtypes('TestGenome', ['GESourceTracks']): ti = TrackInfo('TestGenome', ['GESourceTracks', track]) ti.trackName = [track] ti.genome = 'ModelsForExternalTracks' ti.store() from quick.util.GenomeInfo import GenomeInfo from datetime import datetime gi = GenomeInfo('TestGenome') gi.fullName = 'TestGenome' gi.sourceUrls = ['http://hgdownload.cse.ucsc.edu/goldenPath/hg18/chromosomes/chr21.fa.gz', \ 'http://hgdownload.cse.ucsc.edu/goldenPath/hg18/chromosomes/chrM.fa.gz'] #gi.sourceChrNames = ['chr21', 'chrM'] gi.installedBy = 'Setup.py' gi.genomeBuildSource = 'NCBI' gi.genomeBuildName = 'hg18'