def _createRandomizedNumpyArrays(self, binLen, starts, ends, vals, strands, ids, edges, weights, extras, region): referenceTV = PlainTrack(self._trackNameIntensity).getTrackView( region ) #self._trackNameIntensity based on naming convenience wrt. inheritance if len(referenceTV.valsAsNumpyArray()) == 0: raise InvalidRunSpecException('Error: No reference data available for sampling randomized locations in region' + \ str(region) + \ '. Please check that the reference track was created with the same main track that is being randomized in this analysis.') if referenceTV.trackFormat.isDense(): raise InvalidRunSpecException( 'Error: Cannot sample by distance to reference if reference is a dense track' ) else: return self._createRandomizedNumpyArraysFromDistanceToReference( binLen, starts, ends, vals, strands, ids, edges, weights, extras, referenceTV)
def _createRandomizedNumpyArrays(self, binLen, starts, ends, vals, strands, ids, edges, weights, extras, region): universeTV = PlainTrack(self._trackNameUniverse).getTrackView(region) if universeTV.trackFormat.isDense(): raise InvalidRunSpecException('Error: Universe needs to be a binary (non-dense) track') else: return self._createRandomizedNumpyArraysFromBinaryUniverse( binLen, starts, ends, vals, strands, ids, edges, weights, extras, universeTV)
def _constructBins(regSpec, binSpec, genome, trackNames): # Construct and check bins try: from quick.application.GalaxyInterface import GalaxyInterface userBinSource = GalaxyInterface._getUserBinSource(regSpec, binSpec, genome, trackNames) return [None, userBinSource] except Exception, e: results = Results([], [], '') results.addError(InvalidRunSpecException('Error in specification of analysis region or binsize: ' + str(e))) logMessage('Error in specification of analysis region (' + regSpec +') or binsize: (' + binSpec + ')') if DebugConfig.PASS_ON_BATCH_EXCEPTIONS: raise return [results, None]
def _createRandomizedNumpyArrays(self, binLen, starts, ends, vals, strands, ids, edges, weights, extras, region): referenceTV = PlainTrack(self._trackNameIntensity).getTrackView( region ) #self._trackNameIntensity based on naming convenience wrt. inheritance if referenceTV.trackFormat.isDense(): raise InvalidRunSpecException( 'Error: Intensity needs to be a binary (non-dense) track') else: return self._createRandomizedNumpyArraysFromBinaryIntensity( binLen, starts, ends, vals, strands, ids, edges, weights, extras, referenceTV)
def _inferTrackName(trackName, genome, fullAccess): if len(trackName) == 0 or \ len(trackName) == 1 and trackName[0].lower() in ['blank', 'none', 'dummy', '_', ' ', '']: return None # trackName = rawTN.replace('_',' ').split(':') # trackName = rawTN.split(':') # # trackName = convertTNstrToTNListFormat(rawTN) if ProcTrackOptions.isValidTrack(genome, trackName, fullAccess): return trackName else: raise InvalidRunSpecException('Error in trackname specification. \'' +\ ':'.join(trackName) + '\' does not match any tracknames. ' +\ 'This may be because of limited user permissions.')
def _createRandomizedNumpyArrays(self, binLen, starts, ends, vals, strands, ids, edges, weights, extras, region): intensityTV = PlainTrack(self._trackNameIntensity).getTrackView(region) if len(intensityTV.valsAsNumpyArray()) == 0: raise InvalidRunSpecException('Error: No intensity data available for sampling randomized locations in region' + \ str(region) + \ '. Please check that the intensity track was created with the same main track that is being randomized in this analysis.') #intensityTV = PlainTrack(self._trackNameIntensity).getTrackView(self._origRegion) #Dependence on origRegion is not nice, but not a big problem.. if intensityTV.trackFormat.isDense(): assert intensityTV.trackFormat.isValued('number') return self._createRandomizedNumpyArraysFromIntensityFunction(binLen, starts, ends, vals, strands, ids, edges, \ weights, extras, intensityTV) else: raise NotImplementedError
def parseBatchLine(batchLine, genome, fullAccess): if batchLine[0] == '#' or batchLine.strip() == '': return from urllib import unquote #Split and check number of columns cols = [x for x in batchLine.strip().split(BATCH_COL_SEPARATOR)] if len(cols) != 6: results = Results(['N/A'], ['N/A'], 'N/A') #results.addResultComponent( 'Invalid',InvalidRunResultComponent('Error in batch specification. 6 columns are required, while '\ # + str(len(cols)) + ' are given.')) results.addError(InvalidRunSpecException('Error in batch specification. 6 columns are required, while '\ + str(len(cols)) + ' are given: ' + batchLine)) return results, None, None, None, None bc = BatchContents() bc.regSpec = cols[1] bc.binSpec = unquote(cols[2]) from quick.application.ExternalTrackManager import ExternalTrackManager if ExternalTrackManager.isGalaxyTrack(bc.binSpec.split(':')): bc.binSpec = ExternalTrackManager.extractFnFromGalaxyTN( bc.binSpec.split(':')) try: from quick.application.GalaxyInterface import GalaxyInterface bc.trackName1 = [unquote(x) for x in cols[3].split(':')] bc.trackName2 = [unquote(x) for x in cols[4].split(':')] bc.cleanedTrackName1, bc.cleanedTrackName2 = GalaxyInterface._cleanUpTracks( [bc.trackName1, bc.trackName2], genome, realPreProc=True) bc.cleanedTrackName1 = BatchRunner._inferTrackName( bc.cleanedTrackName1, genome, fullAccess) bc.cleanedTrackName2 = BatchRunner._inferTrackName( bc.cleanedTrackName2, genome, fullAccess) except (InvalidRunSpecException, IdenticalTrackNamesError), e: if DebugConfig.PASS_ON_BATCH_EXCEPTIONS: raise bc.errorResult = Results(['N/A'], ['N/A'], 'N/A') bc.errorResult.addError(e) return bc
def parseBatchLine(batchLine, genome, fullAccess): if batchLine[0] == '#' or batchLine.strip() == '': return from urllib import unquote # Split and check number of columns cols = [x for x in batchLine.strip().split(BATCH_COL_SEPARATOR)] if len(cols) != 6: results = Results(['N/A'], ['N/A'], 'N/A') results.addError(InvalidRunSpecException('Error in batch specification. 6 columns are required, while '\ + str(len(cols)) + ' are given: ' + batchLine)) return results, None, None, None, None bc = BatchContents() bc.regSpec = cols[1] bc.binSpec = unquote(cols[2]) from quick.application.ExternalTrackManager import ExternalTrackManager if ExternalTrackManager.isGalaxyTrack(bc.binSpec.split(':')): bc.binSpec = ExternalTrackManager.extractFnFromGalaxyTN(bc.binSpec.split(':')) bc.statClassName, bc.paramDict = BatchRunner._parseClassAndParams(cols[5]) bc.trackNames = [[unquote(x) for x in cols[i].split(':')] for i in [3, 4]] if 'trackNameIntensity' in bc.paramDict: bc.trackNames.append(convertTNstrToTNListFormat(bc.paramDict['trackNameIntensity'], doUnquoting=True)) from quick.application.GalaxyInterface import GalaxyInterface partlyCleanedTrackNames = GalaxyInterface._cleanUpTracks(bc.trackNames, genome, realPreProc=True) try: bc.cleanedTrackNames = BatchRunner._inferTrackNames(partlyCleanedTrackNames, genome, fullAccess) except (InvalidRunSpecException,IdenticalTrackNamesError), e: if DebugConfig.PASS_ON_BATCH_EXCEPTIONS: raise bc.errorResult = Results(['N/A'],['N/A'],'N/A') bc.errorResult.addError(e) return bc
def execute(choices, galaxyFn=None, username=''): #setupDebugModeAndLogging() from time import time startTime = time() print HtmlCore().begin() print '<pre>' genome = choices[0] #assert genome=='hg19' flankSize = choices[3] if choices[1] == 'Prepared catalogues': if choices[2] == 'GiulioNewGwas': gwasTnBase = 'Private:GK:NewGwasBase'.split(':') elif choices[2] == 'GiulioAllGwas': gwasTnBase = 'Private:GK:AllGiulioGwasSnpsAsOf9feb13'.split( ':') elif choices[2] == 'GiulioMay13Gwas': gwasTnBase = 'Private:GK:Gwas:GiulioMay13'.split(':') elif choices[2] == 'SmallTest': gwasTnBase = 'Private:GK:Gwas'.split(':') else: raise gwasTnBase += [flankSize] elif choices[1] == 'Custom track': gwasTnBase = choices[2].split(':') assert flankSize == 'SNPs' else: assert False, choices[1] referenceTrackSource = choices[4] normalization = choices[5] assert normalization == 'CoverageDepth' analysisType = choices[6] if analysisType == 'Enrichment': ResultClass = EnrichmentGwasResults elif analysisType == 'Testing': ResultClass = HypothesisTestingGwasResults nullmodelMapping = dict( zip([ 'Sample disease regions uniformly', 'Sample disease regions with preserved inter-region spacings', 'Sample disease regions with preserved distance to nearest exon' ], [ 'PermutedSegsAndSampledIntersegsTrack_', 'PermutedSegsAndIntersegsTrack_', 'SegsSampledByDistanceToReferenceTrack_,trackNameIntensity=Genes and gene subsets^Exons^Ensembl exons' ])) nullmodel = nullmodelMapping[choices[9]] assert nullmodel in [ 'PermutedSegsAndSampledIntersegsTrack_', 'PermutedSegsAndIntersegsTrack_', 'SegsSampledByDistanceToReferenceTrack_,trackNameIntensity=Genes and gene subsets^Exons^Ensembl exons' ] else: raise kernelType = choices[7] kernelParam = choices[8] if choices[10] == 'Include links to full underlying results': includeDetailedResults = True elif choices[10] == 'Only produce main result values': includeDetailedResults = False else: raise InvalidRunSpecException('Did not understand option: %s' % choices[12]) mcDepth = choices[11] if choices[12] == 'yes': includeLocalResults = True elif choices[12] == 'no': includeLocalResults = False else: raise InvalidRunSpecException('Did not understand option: %s' % choices[12]) if choices[15] == 'yes': useCache = True elif choices[15] == 'no': useCache = False else: raise InvalidRunSpecException('Did not understand option: %s' % choices[15]) if choices[16] == 'yes': printProgress = True elif choices[16] == 'no': printProgress = False else: raise InvalidRunSpecException('Did not understand option: %s' % choices[16]) from quick.application.GalaxyInterface import GalaxyInterface #print GalaxyInterface.getHtmlForToggles() #print GalaxyInterface.getHtmlBeginForRuns() #from quick.webtools.GwasAPI import getEnrichmentValues print 'Progress: ' #print 'base: ',gwasTnBase #print 'leaves: ',GalaxyInterface.getSubTrackNames(genome, gwasTnBase,deep=False, username=username) disRes = MultiGwasResults() from gold.application.HyperBrowserCLI import getSubTrackLeafTerms from quick.application.ProcTrackOptions import ProcTrackOptions #for gwasTrackLeaf in GalaxyInterface.getSubTrackNames(genome, gwasTnBase,deep=False, username=username): allDiseases = getSubTrackLeafTerms(genome, gwasTnBase, username=username) if len(allDiseases) == 0: assert ProcTrackOptions.isValidTrack( genome, gwasTnBase, GalaxyInterface.userHasFullAccess( username)), 'Genome: %s, TN: %s, Access: %s' % ( genome, gwasTnBase, GalaxyInterface.userHasFullAccess(username)) allDiseases = gwasTnBase[-1:] gwasTnBase = gwasTnBase[:-1] for disease in allDiseases: #print 'Leaf:',gwasTrackLeaf[0] #if not gwasTrackLeaf[0] in ['11 - Height.txt']: #if not disease in ['1 - Alzheimer.txt','10 - Graves.txt']:#['Malaria','UC']: # print 'IGNORING: ', gwasTrackLeaf[0] # continue #if gwasTrackLeaf in [[],None] or gwasTrackLeaf[0]=='-- All subtypes --': #continue #gwasTn = ':'.join(gwasTnBase + [gwasTrackLeaf[0]]) gwasTn = ':'.join(gwasTnBase + [disease]) #print 'Running API: ', "$getEnrichmentValues(%s, '%s', '%s')" % ([gwasTn], referenceTrackSource, normalization) #enrichmentsDict = getEnrichmentValues([gwasTn], referenceTrackSource, normalization)#, ['114 - Brain_Mid_Frontal_Lobe.txt','134 - Rectal_Smooth_Muscle.txt']) #assert len(enrichmentsDict.values())==1 #enrichments = enrichmentsDict.values()[0] #if gwasTrackLeaf[0] in ['Malaria','UC']: #print 'HERE IS WHAT I GOT: ',enrichmentsDict #print 'ENR: ',enrichments #print 'One: ', (enrichments.values()[0])['enrichment']['13 - CD4'].getGlobalResult() #assert 'enrichment' in (enrichments.values()[0]), (enrichments.values()[0]) #disRes[gwasTrackLeaf[0]] = (enrichments.values()[0])['enrichment'] #disRes[gwasTrackLeaf[0]] = (enrichments.values()[0]) #disease = gwasTrackLeaf[0] #disRes[disease] = [x.getGlobalResult() for x in enrichments] #print 'DISres: ', disRes[gwasTrackLeaf[0]] #from quick.util.CommonFunctions import extractIdFromGalaxyFn res = ResultClass(gwasId=disease, verbose=True, galaxyFn=galaxyFn) #referenceSubTypes = enrichments.keys() #referenceSubTypes = [x[0] for x in GalaxyInterface.getSubTrackNames(genome, 'Private:GK:Psych:DHSs'.split(':'), deep=False, username=username) if not x[0] == '-- All subtypes --'] if referenceTrackSource == 'H3K4me3': refTrackBase = 'Private:GK:Psych:H3K4me3' refTrackCoverageFunction = 'Private^GK^Psych^H3K4me3CoverageTrack' elif referenceTrackSource == 'DHS': refTrackBase = 'Private:GK:Psych:DHSs' refTrackCoverageFunction = 'Private^GK^Psych^DHSCoverageTrack' elif referenceTrackSource == 'Chromatin state 1-AP': refTrackBase = 'Private:Anders:Chromatin State Segmentation:1_Active_Promoter' refTrackCoverageFunction = 'Private^GWAS^Chromatin^CoverageFunctionTracks^1_Active_PromoterV2' elif referenceTrackSource == 'Chromatin state 4-SE': refTrackBase = 'Private:Anders:Chromatin State Segmentation:4_Strong_Enhancer' refTrackCoverageFunction = 'Private^GWAS^Chromatin^CoverageFunctionTracks^4_Strong_Enhancer' elif referenceTrackSource == 'Chromatin state 5-SE': refTrackBase = 'Private:Anders:Chromatin State Segmentation:5_Strong_Enhancer' refTrackCoverageFunction = 'Private^GWAS^Chromatin^CoverageFunctionTracks^5_Strong_Enhancer' else: raise refTrackSelectType = choices[13] allReferenceTracks = [ x[0] for x in GalaxyInterface.getSubTrackNames( genome, refTrackBase.split(':'), deep=False, username=username) if not x[0] == '-- All subtypes --' ] if refTrackSelectType == 'Use all reference tracks': referenceSubTypes = allReferenceTracks elif refTrackSelectType == 'Select single reference track': referenceSubTypes = [choices[14]] assert referenceSubTypes[0] in allReferenceTracks elif refTrackSelectType == 'Select a range among all reference tracks': try: firstRefTrack, lastRefTrack = choices[14].split('-') referenceSubTypes = allReferenceTracks[ int(firstRefTrack):int(lastRefTrack) + 1] print 'Analyzing %s among a total of %s reference tracks' % ( choices[14], len(allReferenceTracks)) except Exception: print 'Range format should be e.g. "15-18".' raise else: raise for referenceSubType in referenceSubTypes: #if not referenceSubType in ['107 - Adult_Kidney.txt','106 - Adipose_Nuclei.txt']: # #print 'IGNORING: ',referenceSubType # continue # if analysisType == 'Enrichment': res[referenceSubType] = directGetEnrichment( gwasTn, referenceSubType, refTrackBase, kernelType, kernelParam, useCache, printProgress) elif analysisType == 'Testing': res[referenceSubType] = directGetTestResults( gwasTn, referenceSubType, refTrackBase, kernelType, kernelParam, refTrackCoverageFunction, nullmodel, mcDepth, useCache, printProgress) else: raise #print disease, referenceSubType, res[referenceSubType] #print "ENR: ",enrichments #res[referenceSubType] = enrichments[referenceSubType] disRes[disease] = res #for disease in disRes: # print 'D FULL %s:' %disease, disRes[disease] # print 'D DICTS %s:'%disease, disRes[disease].getAllGlobalResultDicts() # print 'DISEASE %s:'%disease, disRes[disease].getAllGlobalResults() print 'Total run time (excluding figure generation): %i seconds.' % ( time() - startTime) print '</pre>' #print GalaxyInterface.getHtmlBeginForRuns() print '<h1>Results</h1>' if len(allDiseases) > 1: try: heatMapLink = disRes.getLinkToClusteredHeatmap( 'Heatmap', galaxyFn) print '<h3>Heatmap</h3>', heatMapLink #, '<br>' except: print '<p>Creation of heatmap failed</p>' tableOutput = disRes.getHtmlResultsTable(includeDetailedResults) print '<h3>Results table</h3>', tableOutput if choices[-1]: print '<h3>Prior coloring table</h3>' colorFn = ExternalTrackManager.extractFnFromGalaxyTN( choices[-1].split(':')) print disRes.getColoredSortedReferencesTable(colorFn) if includeLocalResults: print '<h3>Local results</h3>' print disRes.getLinksToAllLocalHtmlResultsTables(galaxyFn)