def _compute(self): kwArgs = copy(self._kwArgs) if 'rawStatistic' in kwArgs: del kwArgs['rawStatistic'] matrixElRes = [] tr1Subtypes = ProcTrackOptions.getSubtypes(self.getGenome(), self._track.trackName, True) tr2Subtypes = ProcTrackOptions.getSubtypes(self.getGenome(), self._track2.trackName, True) assert len(tr1Subtypes) > 0, str(self._track.trackName) assert len(tr2Subtypes) > 0, str(self._track2.trackName) if 'minimal' in self._kwArgs: tr1Subtypes = tr1Subtypes[:1] tr2Subtypes = tr2Subtypes[:1] for subtype1 in tr1Subtypes: #['0','1']: #for subtype2 in ['0','1']: for subtype2 in tr2Subtypes: # print ',' tn1 = self._track.trackName + [subtype1] tn2 = self._track2.trackName + [subtype2] if not os.path.exists(createDirPath( tn1, self.getGenome())) or not os.path.exists( createDirPath(tn2, self.getGenome())): raise IncompatibleTracksError #print ',' track1 = Track(tn1) track1.formatConverters = self._track.formatConverters track2 = Track(tn2) track2.formatConverters = self._track2.formatConverters #self._addChild(self._rawStatistic(self._region, track1, track2, **kwArgs) ) matrixElRes.append( self._rawStatistic(self._region, track1, track2, **kwArgs).getResult()) ResultsMemoizer.flushStoredResults() #assert len(self._children) == 7 #return dict(zip( '00,01,10,11'.split(','), [x.getResult() for x in self._children[3:]])) allChildRes = array(matrixElRes) #allChildRes = array([x.getResult() for x in self._children[3:]]) allChildRes = allChildRes.reshape((len(tr1Subtypes), len(tr2Subtypes))) return { 'Result': OrderedDict([('Matrix', allChildRes.tolist()), ('Rows', tr1Subtypes), ('Cols', tr2Subtypes)]) }
def _compute(self): kwArgs = copy(self._kwArgs) if 'rawStatistic' in kwArgs: del kwArgs['rawStatistic'] matrixElRes = [] tr1Subtypes = ProcTrackOptions.getSubtypes(self.getGenome(), self._track.trackName, True) assert len(tr1Subtypes) > 0 for subtype1 in tr1Subtypes:#['0','1']: for subtype2 in ['0','1']: tn1 = self._track.trackName + [subtype1] tn2 = self._track2.trackName + [subtype2] if not os.path.exists(createDirPath(tn1,self.getGenome())) or not os.path.exists(createDirPath(tn2,self.getGenome())): raise IncompatibleTracksError #print ',' track1 = Track( tn1) track1.formatConverters = self._track.formatConverters track2 = Track( tn2) track2.formatConverters = self._track2.formatConverters #self._addChild(self._rawStatistic(self._region, track1, track2, **kwArgs) ) matrixElRes.append( self._rawStatistic(self._region, track1, track2, **kwArgs).getResult() ) ResultsMemoizer.flushStoredResults() #assert len(self._children) == 7 #return dict(zip( '00,01,10,11'.split(','), [x.getResult() for x in self._children[3:]])) allChildRes = array(matrixElRes) #allChildRes = array([x.getResult() for x in self._children[3:]]) allChildRes = allChildRes.reshape((-1,2)) return OrderedDict([('Matrix', allChildRes.tolist()), ('Rows', tr1Subtypes), ('Cols', ['Case','Control'])])
def getOptionsBox15(prevChoices): if prevChoices[-2] == 'Select single reference track': assert prevChoices[4] == 'DHS' genome = prevChoices[0] tn = 'Private:GK:Psych:DHSs'.split(':') return ProcTrackOptions.getSubtypes(genome, tn, True) elif prevChoices[-2] == 'Select a range among all reference tracks': return ''
def getOptionsBoxTfTracks(cls, prevChoices): if prevChoices.sourceTfsDetails != cls.SELECT: genome = prevChoices.genome sourceTfs = prevChoices.sourceTfs sourceTfsDetails = prevChoices.sourceTfsDetails if sourceTfs == cls.SELECT: return elif sourceTfs == 'Hyperbrowser repository': tfSourceTN = TfTrackNameMappings.getTfTrackNameMappings( prevChoices.genome)[sourceTfsDetails] subtypes = ProcTrackOptions.getSubtypes( prevChoices.genome, tfSourceTN, True) falses = ['False'] * len(subtypes) return OrderedDict(zip(subtypes, falses)) elif sourceTfs == cls.REGIONS_FROM_HISTORY: if isinstance(sourceTfsDetails, basestring): galaxyTN = sourceTfsDetails.split(':') if galaxyTN[ 1] == "gsuite": #ExternalTrackManager.extractFileSuffixFromGalaxyTN(prevChoices.sourceTfsDetails, allowUnsupportedSuffixes=True) == "gsuite" errorString = GeneralGuiTool._checkGSuiteFile( sourceTfsDetails) if not errorString: gSuite = getGSuiteFromGalaxyTN(sourceTfsDetails) sizeErrorString = GeneralGuiTool._checkGSuiteTrackListSize( gSuite, 1, 1000) if not sizeErrorString: reqErrorString = GeneralGuiTool._checkGSuiteRequirements \ (gSuite, AllTfsOfRegions.GSUITE_ALLOWED_FILE_FORMATS, AllTfsOfRegions.GSUITE_ALLOWED_LOCATIONS, AllTfsOfRegions.GSUITE_ALLOWED_TRACK_TYPES, AllTfsOfRegions.GSUITE_DISALLOWED_GENOMES) if not reqErrorString: validity = 'Valid' else: return else: return else: return if validity == 'Valid': selectedTrackNames = [] gSuite = getGSuiteFromGalaxyTN(sourceTfsDetails) for track in gSuite.allTracks(): selectedTrackNames.append(':'.join( track.trackName)) falses = ['False'] * len(selectedTrackNames) return OrderedDict(zip(selectedTrackNames, falses)) else: tfTrackName = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN( genome, galaxyTN) return [':'.join(tfTrackName)] else: return else: return else: return
def getOptionsBoxDataset(prevChoices): #if prevChoices.genome == 'hg19': if prevChoices.genome in ['hg19', 'mm9']: parentTrack = ColocalizationIn3DTool.PARENT_TRACKNAME + [ prevChoices.interactions ] + [prevChoices.cellLine] return ProcTrackOptions.getSubtypes(prevChoices.genome, parentTrack, fullAccess=False)
def _getAllDiseases(prevChoices): if isinstance(prevChoices[2], dict): return prevChoices[2].keys() else: tn = SelectDiseaseTool._getDiseaseTn(prevChoices) return [ x for x in ProcTrackOptions.getSubtypes( SelectDiseaseTool.GENOME, tn) ]
def _getAllPwms(prevChoices): if isinstance(prevChoices[2], dict): return prevChoices[2].keys() else: tfTrackName = SelectTfTool.TRACK_DICT[prevChoices[0]] return [ pwm for pwm in ProcTrackOptions.getSubtypes( SelectTfTool.GENOME, tfTrackName) ]
def getOptionsBox3(prevChoices): seedSource = prevChoices[1] if seedSource == 'TFBS from history': return ('__history__', 'bed') else: genome = prevChoices[0] tfSourceTN = TfInfo.getTfTrackNameMappings(genome)[prevChoices[1]] subtypes = ProcTrackOptions.getSubtypes(genome, tfSourceTN, True) return subtypes
def getOptionsBox3(prevChoices): seedSource = prevChoices[1] if seedSource == 'TFBS from history': return ('__history__','bed') else: genome = prevChoices[0] tfSourceTN = TfInfo.getTfTrackNameMappings(genome)[ prevChoices[1] ] subtypes = ProcTrackOptions.getSubtypes(genome, tfSourceTN, True) return subtypes
def superLine2batch(cls, line, genome): if line.strip() == '' or line[0] == '#': return [] cols = line.split(BATCH_COL_SEPARATOR) if len(cols) == 4: cols = cols[:3] + ['dummy', cols[3]] line = BATCH_COL_SEPARATOR.join(cols) from quick.batch.BatchRunner import BatchRunner #errorResult, userBinSource = BatchRunner._constructBins(cols[0], cols[1], genome) #if errorResult is not None: # return [BATCH_COL_SEPARATOR.join([ 'dummy', cols[0], cols[1], 'dummyTN1', 'dummyTN2', 'dummyStatName' ])] for colIndex, col in zip(range( 2, 5), cols[2:5]): #not binSpec, but tn1, tn2, statistic.. if '/' in col: splitPoint = col.find('/') splittedCols = col[0:splitPoint], col[splitPoint + 1:] splittedListLines = [ cols[0:colIndex] +[splitCol]+ cols[colIndex+1:]\ for splitCol in splittedCols] #1+'' return reduce(lambda x, y: x + y, [ cls.superLine2batch(BATCH_COL_SEPARATOR.join(line), genome) for line in splittedListLines ]) for colIndex, col in zip(range(2, 4), cols[2:4]): if '*' in col: typeParts = col.split(':') assert typeParts[-1] == '*' and not any( '*' in part for part in typeParts[:-1]) #whitespacedTrackName = [x.replace('_',' ') for x in typeParts[:-1]] unquotedTrackName = [unquote(x) for x in typeParts[:-1]] #print 'TEMP: ',genome, unquotedTrackName starOptions = ProcTrackOptions.getSubtypes( genome, unquotedTrackName, True) if len(starOptions) == 0: raise Exception('No subtracks for parent track: %s' % str(unquotedTrackName)) #typeOptions = [':'.join(typeParts[:-1] + [starOpt.replace(' ','_')]) for starOpt in starOptions] typeOptions = [ ':'.join(typeParts[:-1] + [quote(starOpt)]) for starOpt in starOptions ] splittedCols = typeOptions splittedListLines = [ cols[0:colIndex] +[splitCol]+ cols[colIndex+1:]\ for splitCol in splittedCols] return reduce(lambda x, y: x + y, [ cls.superLine2batch(BATCH_COL_SEPARATOR.join(line), genome) for line in splittedListLines ]) return [cls.generateBatchName(line) + BATCH_COL_SEPARATOR + line]
def getOptionsBox6(cls, prevChoices): if prevChoices[4] == cls.REGIONS_FROM_HISTORY: return ('__history__','bed','bedgraph') else: tfSourceTN = TfInfo.getTfTrackNameMappings(prevChoices[1])[ prevChoices[4] ] genome = prevChoices[1] subtypes = ProcTrackOptions.getSubtypes(genome, tfSourceTN, True) #logMessage(str(subtypes)) #return ['V$AHR_01'] return subtypes
def getOptionsBox6(cls, prevChoices): if prevChoices[4] == cls.REGIONS_FROM_HISTORY: return ('__history__', 'bed', 'bedgraph') else: tfSourceTN = TfInfo.getTfTrackNameMappings( prevChoices[1])[prevChoices[4]] genome = prevChoices[1] subtypes = ProcTrackOptions.getSubtypes(genome, tfSourceTN, True) #logMessage(str(subtypes)) #return ['V$AHR_01'] return subtypes
def _calcAndStoreSubTrackCount(self, trackName): ti = TrackInfo(self._genome, trackName) trackCount = 0 for subTrackName in ProcTrackOptions.getSubtypes(self._genome, trackName, True): subTrackCount = TrackInfo(self._genome, trackName + [subTrackName]).subTrackCount if subTrackCount: trackCount += subTrackCount if ti.isValid(): trackCount += 1 ti.subTrackCount = trackCount ti.store()
def getOptionsBoxTrack2(cls, prevChoices): if prevChoices[4] == 'yes': genome = prevChoices[1] prefixTN = cls.STD_PREFIX_TN trackList = ProcTrackOptions.getSubtypes(genome, prefixTN, True) if cls._cacheDict.get('track2') in trackList: trackList.remove(cls._cacheDict.get('track2')) trackList.insert(0, cls._cacheDict.get('track2')) return [cls.NO_TRACK_SHORTNAME] + trackList else: return '__track__', 'history'
def getOptionsBoxTrack2(cls, prevChoices): if prevChoices[4]=='yes' : genome = prevChoices[1] prefixTN = cls.STD_PREFIX_TN trackList = ProcTrackOptions.getSubtypes(genome, prefixTN, True) if cls._cacheDict.get('track2') in trackList: trackList.remove(cls._cacheDict.get('track2')) trackList.insert(0, cls._cacheDict.get('track2')) return [cls.NO_TRACK_SHORTNAME] + trackList else: return '__track__','history'
def _calcAndStoreSubTrackCount(self, trackName): ti = TrackInfo(self._genome, trackName) trackCount = 0 for subTrackName in ProcTrackOptions.getSubtypes( self._genome, trackName, True): subTrackCount = TrackInfo(self._genome, trackName + [subTrackName]).subTrackCount if subTrackCount: trackCount += subTrackCount if ti.isValid(): trackCount += 1 ti.subTrackCount = trackCount ti.store()
def getOptionsBoxTfTracks(cls, prevChoices): if prevChoices.sourceTfs: if prevChoices.sourceTfs == cls.REGIONS_FROM_HISTORY: return ('__history__', 'bed', 'category.bed', 'gtrack') elif prevChoices.sourceTfs == cls.SELECT: return else: tfSourceTN = TfTrackNameMappings.getTfTrackNameMappings( prevChoices.genome)[prevChoices.sourceTfs] subtypes = ProcTrackOptions.getSubtypes( prevChoices.genome, tfSourceTN, True) return subtypes else: return
def getOptionsBox6(cls, prevChoices): #if prevChoices[0]: # cls.updateCacheDict(prevChoices[0]) if prevChoices[4] =='yes' : genome = prevChoices[3] prefixTN = cls.STD_PREFIX_TN trackList = ProcTrackOptions.getSubtypes(genome, prefixTN, True) #if cls._cacheDict.get('track1') in trackList: # trackList.remove(cls._cacheDict.get('track1')) # trackList.insert(0, cls._cacheDict.get('track1')) return trackList else: return '__track__','history'
def getOptionsBox6(cls, prevChoices): #if prevChoices[0]: # cls.updateCacheDict(prevChoices[0]) if prevChoices[4] == 'yes': genome = prevChoices[3] prefixTN = cls.STD_PREFIX_TN trackList = ProcTrackOptions.getSubtypes(genome, prefixTN, True) #if cls._cacheDict.get('track1') in trackList: # trackList.remove(cls._cacheDict.get('track1')) # trackList.insert(0, cls._cacheDict.get('track1')) return trackList else: return '__track__', 'history'
def getOptionsBoxInteractions( prevChoices): # Alternatively: getOptionsBoxKey2() ''' See getOptionsBox1(). prevChoices is a namedtuple of selections made by the user in the previous input boxes (that is, a namedtuple containing only one element in this case). The elements can accessed either by index, e.g. prevChoices[0] for the result of input box 1, or by key, e.g. prevChoices.key (case 2). ''' #if prevChoices.genome == 'hg19': if prevChoices.genome in ['hg19', 'mm9']: parentTrack = ColocalizationIn3DTool.PARENT_TRACKNAME return ProcTrackOptions.getSubtypes(prevChoices.genome, parentTrack, fullAccess=False)
def superLine2batch(cls, line, genome): if line.strip() == '' or line[0]=='#': return [] cols = line.split(BATCH_COL_SEPARATOR) if len(cols) == 4: cols = cols[:3] + ['dummy', cols[3]] line = BATCH_COL_SEPARATOR.join(cols) from quick.batch.BatchRunner import BatchRunner #errorResult, userBinSource = BatchRunner._constructBins(cols[0], cols[1], genome) #if errorResult is not None: # return [BATCH_COL_SEPARATOR.join([ 'dummy', cols[0], cols[1], 'dummyTN1', 'dummyTN2', 'dummyStatName' ])] for colIndex, col in zip(range(2,5), cols[2:5]): if '/' in col: splitPoint = col.find('/') splittedCols = col[0:splitPoint], col[splitPoint+1:] splittedListLines = [ cols[0:colIndex] +[splitCol]+ cols[colIndex+1:]\ for splitCol in splittedCols] #1+'' return reduce(lambda x,y:x+y, [cls.superLine2batch(BATCH_COL_SEPARATOR.join(line), genome) for line in splittedListLines]) for colIndex, col in zip(range(2,4), cols[2:4]): if '*' in col: typeParts = col.split(':') assert typeParts[-1]=='*' and not any('*' in part for part in typeParts[:-1]) #whitespacedTrackName = [x.replace('_',' ') for x in typeParts[:-1]] unquotedTrackName = [unquote(x) for x in typeParts[:-1]] print 'TEMP: ',genome, unquotedTrackName starOptions = ProcTrackOptions.getSubtypes(genome, unquotedTrackName, True) if len(starOptions) == 0: raise Exception('No subtracks for parent track: %s' % str(unquotedTrackName)) #typeOptions = [':'.join(typeParts[:-1] + [starOpt.replace(' ','_')]) for starOpt in starOptions] typeOptions = [':'.join(typeParts[:-1] + [starOpt]) for starOpt in starOptions] splittedCols = typeOptions splittedListLines = [ cols[0:colIndex] +[splitCol]+ cols[colIndex+1:]\ for splitCol in splittedCols] return reduce(lambda x,y:x+y, [cls.superLine2batch(BATCH_COL_SEPARATOR.join(line), genome) for line in splittedListLines]) return [cls.generateBatchName(line)+BATCH_COL_SEPARATOR+line]
def yielder(self, curTn): if self._avoidLiterature and curTn == GenomeInfo.getPropertyTrackName(self._genome, 'literature'): return for subtype in ProcTrackOptions.getSubtypes(self._genome, curTn, self._fullAccess): #if self._avoidLiterature and subtype == 'Literature': if subtype[0] in ['.','_']: continue newTn = curTn + [subtype] doBreak = False for subTn in self.yielder(newTn): yield subTn if ProcTrackOptions.isValidTrack(self._genome, curTn, self._fullAccess): yield curTn
def yielder(self, curTn, level=0): if self._avoidLiterature and curTn == GenomeInfo.getPropertyTrackName( self._genome, 'literature'): return for subtype in ProcTrackOptions.getSubtypes(self._genome, curTn, self._fullAccess): #if self._avoidLiterature and subtype == 'Literature': if subtype[0] in ['.', '_']: continue newTn = curTn + [subtype] doBreak = False for subTn in self.yielder(newTn, level=level + 1): yield subTn if self._includeParentTrack or level > 0: if ProcTrackOptions.isValidTrack(self._genome, curTn, self._fullAccess): yield curTn
print 'FAILED: Error when preprocessing TestGenome. Error:' print ' ' + str(e).strip() sys.exit(1) for allowOverlaps in [False, True]: fromDir = createDirPath(['GESourceTracks'], 'TestGenome', allowOverlaps=allowOverlaps) toDir = createDirPath([], 'ModelsForExternalTracks', allowOverlaps=allowOverlaps) try: if not os.path.exists(toDir): shutil.copytree(fromDir, toDir) print 'OK: Copied from %s to %s.' % (fromDir, toDir) except Exception, e: print 'FAILED: Error occurred copying from %s to %s: ' % (fromDir, toDir) + str(e).strip() sys.exit(1) for track in ProcTrackOptions.getSubtypes('TestGenome', ['GESourceTracks']): ti = TrackInfo('TestGenome', ['GESourceTracks', track]) ti.trackName = [track] ti.genome = 'ModelsForExternalTracks' ti.store() from quick.util.GenomeInfo import GenomeInfo from datetime import datetime gi = GenomeInfo('TestGenome') gi.fullName = 'TestGenome' gi.sourceUrls = ['http://hgdownload.cse.ucsc.edu/goldenPath/hg18/chromosomes/chr21.fa.gz', \ 'http://hgdownload.cse.ucsc.edu/goldenPath/hg18/chromosomes/chrM.fa.gz'] gi.sourceChrNames = ['chr21', 'chrM'] gi.installedBy = 'Setup.py' gi.genomeBuildSource = 'NCBI' gi.genomeBuildName = 'hg18'
fromDir = createDirPath(['GESourceTracks'], 'TestGenome', allowOverlaps=allowOverlaps) toDir = createDirPath([], 'ModelsForExternalTracks', allowOverlaps=allowOverlaps) try: if not os.path.exists(toDir): shutil.copytree(fromDir, toDir) print 'OK: Copied from %s to %s.' % (fromDir, toDir) except Exception, e: print 'FAILED: Error occurred copying from %s to %s: ' % ( fromDir, toDir) + str(e).strip() sys.exit(1) for track in ProcTrackOptions.getSubtypes('TestGenome', ['GESourceTracks']): ti = TrackInfo('TestGenome', ['GESourceTracks', track]) ti.trackName = [track] ti.genome = 'ModelsForExternalTracks' ti.store() from quick.util.GenomeInfo import GenomeInfo from datetime import datetime gi = GenomeInfo('TestGenome') gi.fullName = 'TestGenome' gi.sourceUrls = ['http://hgdownload.cse.ucsc.edu/goldenPath/hg18/chromosomes/chr21.fa.gz', \ 'http://hgdownload.cse.ucsc.edu/goldenPath/hg18/chromosomes/chrM.fa.gz'] #gi.sourceChrNames = ['chr21', 'chrM'] gi.installedBy = 'Setup.py' gi.genomeBuildSource = 'NCBI' gi.genomeBuildName = 'hg18'
def _getAllDiseases(prevChoices): if isinstance(prevChoices[2], dict): return prevChoices[2].keys() else: tn = SelectDiseaseTool._getDiseaseTn(prevChoices) return [x for x in ProcTrackOptions.getSubtypes(SelectDiseaseTool.GENOME, tn)]
def _getAllPwms(prevChoices): if isinstance(prevChoices[2], dict): return prevChoices[2].keys() else: tfTrackName = SelectTfTool.TRACK_DICT[prevChoices[0]] return [pwm for pwm in ProcTrackOptions.getSubtypes(SelectTfTool.GENOME, tfTrackName)]