def testUserBinSource(self): bins = [bin for bin in UserBinSource('*', '*', genome='hg18')] if len(bins) != 0: self.assertEqual(24, len(bins)) self.assertEqual('chr22:1-49691432 (intersects centromere)', bins[21].strWithCentromerInfo()) else: bins = [ bin for bin in UserBinSource('*', '*', genome='TestGenome') ] self.assertEqual(2, len(bins)) self.assertEqual('chr21:1-46944323 (intersects centromere)', bins[0].strWithCentromerInfo())
def getChrArmRegs(cls, genome, categoryFilterList=None): from quick.application.UserBinSource import UserBinSource fn = cls.getChrArmRegsFn(genome) if categoryFilterList is not None: return UserBinSource('file', fn, genome, categoryFilterList) else: if genome not in cls._chrArmRegs: if fn is not None and os.path.exists(fn): cls._chrArmRegs[genome] = UserBinSource('file', fn, genome) else: cls._chrArmRegs[genome] = None return cls._chrArmRegs[genome]
def runIntegrationTest(): # myTrack = Track(['bs','wenjie1']) # myTrack2 = Track(['melting']) # myTrack = Track(['melting','2state']) myTrack = Track(['melting', '2state']) myTrack2 = Track(['melting']) # userBinSource = AutoBinner(parseRegSpec('chr1:1-1000000', genome), 100000) #fixme: do a conversion from binSpecification to binSource.. # userBinSource = UserBinSource('chr1:1-10000','1000') #fixme: do a conversion from binSpecification to binSource.. # userBinSource = UserBinSource('chr1:184916000-184936000','10000') #fixme: do a conversion from binSpecification to binSource.. # userBinSource = UserBinSource('chr1:11913000-11916000','1000') #fixme: do a conversion from binSpecification to binSource.. userBinSource = UserBinSource( 'chrM', '10000') #fixme: do a conversion from binSpecification to binSource.. #regionIter = GenomeElementSource('/Volumes/insilico.titan.uio.no/HyperBrowser/new_hb/data/2sSegs.bed','hg18') #genomeAnchor = GenomeRegion(genome='hg18', chr='chrM', start=0, end=14) #trackView = TrackView([2, 6, 12], [3, 9, 14], None, None, genomeAnchor) #trackView2 = TrackView(None, None, [79, 70, 76, 68, 69, 71, 79, 79, 80, 73, 68, 69, 78, 80], None, genomeAnchor) #myTrack = MyTrack(trackView) #myTrack2 = MyTrack(trackView2) print StatRunner.run(userBinSource, myTrack, myTrack2, MeanInsideOutsideTwoTailRandStat)
def test2(): myTrack = Track(['melting']) myTrack2 = Track(['genes', 'refseq']) userBinSource = UserBinSource('chr1:0-10', '10') res = StatRunner.run(userBinSource, myTrack, myTrack2, wrapClass(CustomRStat, {'scriptFn': fn})) resColl = ResultsCollection() resColl.addResults(None, res) print resColl.getHtmlString()
def _init(self, globalSource='', minimal=False): if isIter(self._region): raise SplittableStatNotAvailableError() if minimal: self._globalSource = MinimalBinSource(self._region.genome) elif globalSource == 'test': self._globalSource = UserBinSource( 'TestGenome:chr21:10000000-15000000', '1000000') else: self._globalSource = GlobalBinSource(self._region.genome)
def getGeneRegs(cls, genome, fn, categoryFilterList=None, cluster=True): if fn is not None and os.path.exists(fn): if cluster: from quick.application.UserBinSource import UserBinSource return UserBinSource('file', fn, genome, categoryFilterList) else: from quick.application.UserBinSource import UnBoundedUnClusteredUserBinSource return UnBoundedUnClusteredUserBinSource( 'file', fn, genome, categoryFilterList) else: return None
def getChrBandRegs(cls, genome, categoryFilterList=None): from quick.application.UserBinSource import UserBinSource fn = cls.getChrBandRegsFn(genome) if fn is not None and os.path.exists(fn): return UserBinSource('file', fn, genome, categoryFilterList, strictMatch=False) else: return None
def doAnalysisFromDefTwo(cls, tracks, analysisDef, regSpec, binSpec, genome, flag): userBinSource = UserBinSource(regSpec, binSpec, genome) job = AnalysisDefJob(analysisDef.getDefAfterChoices(), tracks[0].trackName, tracks[1].trackName, userBinSource, galaxyFn=None) result = job.run() unorderedResults = [(str(key), result[key]) for key in result.getAllRegionKeys()] return list(cls.sortDictionaries(unorderedResults, flag))
def __init__(self, region, track, track2, globalSource='', minimal=False, **kwArgs): if isIter(region): raise SplittableStatNotAvailableError() if minimal == True: self._globalSource = MinimalBinSource(region.genome) elif globalSource == 'test': self._globalSource = UserBinSource('TestGenome:chr21:10000000-15000000','1000000') else: self._globalSource = GlobalBinSource(region.genome) super(self.__class__, self).__init__(region, track, track2, globalSource=globalSource, minimal=minimal, **kwArgs)
def generateUserBinSource(self, regSpec, binSpec): if self.ALLOW_OVERLAPPING_USER_BINS: from quick.application.UserBinSource import BoundedUnMergedUserBinSource ubSource = BoundedUnMergedUserBinSource(regSpec, binSpec, genome=self._genome) #altUbSource = UserBinSource(regSpec,binSpec,genome) from gold.application.LogSetup import logMessage logMessage( 'NB! Using unmerged local bins - meaning that global results are not necessarily valid.' ) # +\ #'%i non-overlapping vs %i if clustered' % (len(list(ubSource )), len(list(altUbSource )) ) else: from quick.application.UserBinSource import UserBinSource ubSource = UserBinSource(regSpec, binSpec, genome=self._genome) return ubSource
def getGlobalSource(globalSourceStr, genome, minimal): if minimal == True: return MinimalBinSource(genome) elif globalSourceStr == 'test': return UserBinSource('TestGenome:chr21:10000000-15000000', '1000000') elif globalSourceStr == 'chrs': return GenomeInfo.getChrRegs(genome) elif globalSourceStr == 'chrarms': return GenomeInfo.getChrArmRegs(genome) elif globalSourceStr == 'ensembl': return GenomeInfo.getStdGeneRegs(genome) elif globalSourceStr == 'userbins': from gold.application.StatRunner import StatJob assert StatJob.USER_BIN_SOURCE is not None return StatJob.USER_BIN_SOURCE #return kwArgs['userBins'] else: raise ShouldNotOccurError('globalSource not recognized')
def testAllCombinationsUserBinSource(self): binSource = UserBinSource('chr21:1-3m', '1m', genome='TestGenome') pairedBins = AllCombinationsUserBinSource(binSource) self.assertEqual(6, sum(1 for bin in pairedBins)) bins = list(binSource) PGR = PairedGenomeRegion self.assertEqual([ PGR(bins[0], bins[0]), PGR(bins[0], bins[1]), PGR(bins[0], bins[2]), PGR(bins[1], bins[1]), PGR(bins[1], bins[2]), PGR(bins[2], bins[2]) ], [binPair for binPair in pairedBins]) binPair = next(iter(pairedBins)) self.assertEqual('(chr21:1-1000000, chr21:1-1000000)', str(binPair)) self.assertEqual('(chr21:1-1000000, chr21:1-1000000)', binPair.strWithCentromerInfo()) self.assertEqual('(chr21:0m-1m, chr21:0m-1m)', binPair.strShort()) self.assertEqual(-8725196983528202459, hash(binPair))
from quick.extra.TrackExtractor import TrackExtractor from quick.application.UserBinSource import UserBinSource trackName1 = ['Genes and gene subsets', 'Genes', 'Refseq'] #or any other track that are precomputed on the server #genome regions to extract could be from a bed-file: #regions = UserBinSource('file','myRegions.bed') #or implicitly declared, here as 500 regions, 1k long, in the beginning of chr1: regions = UserBinSource( 'chr1:1-500000', '1k', 'hg18' ) #could also have been e.g. genomewide as UserBinSource('*','*','hg18') #options globalCoords = False #now gives coordinates relative to each region. Using True would have given global coordinates (chromosome-offsets) asOriginal = False #gives output in the original format (overrides the fileFormatName attribute if True) allowOverlaps = False #if set to False, any overlapping segments are merged into "super-segments" #Generate files. Here only one track, but could have been list of many tracks. #Either combine track data for all regions in a single file: TrackExtractor.extractManyToOneDir([trackName1], regions, 'myOutputFolder', 'bed', globalCoords, asOriginal) #Or one could have created a separate folder with track-data for each region: #TrackExtractor.extractManyToRegionDirs([trackName1], regions, 'myOutputFolder', 'bed', globalCoords, asOriginal)
# the name of the tool. # """ def create_track(file_name, trackName): from gtrackcore.core.Api import importFile importFile(file_name, genome="hg18", trackName=trackName) t = PlainTrack([trackName]) single_track_ts = SingleTrackTS(t, {"title": trackName}) return single_track_ts if __name__ == "__main__": from gold.track.TrackStructure import SingleTrackTS, FlatTracksTS from gold.track.Track import PlainTrack track1 = create_track("testfile8.bed", "testfile8") track2 = create_track("testfile7.bed", "testfile7") #run_analysis("testfile.k") #t = PlainTrack(["test"]) ts = FlatTracksTS() ts["test1"] = track1 ts["test2"] = track2 print(ts) analysisBins = UserBinSource("chr1", "*", genome="hg18") RandomizationGuiTool.run_on_extracted_variables(ts, analysisBins, 1, WITHIN_TRACKS_CATEGORY, PERMUTED_SEGS_AND_INTERSEGS_STR, galaxyFn="./testfile.gsuite", genome="hg18")
#trackNames = [['sequence']] #regions = UserBinSource('file','/usit/titan/u1/geirksa/_data/2kbUpstreamsNoIRFNoGenes.bed') #TrackExtractor.extractManyToOneDir(trackNames, regions, '/usit/titan/u1/geirksa/_allergi/IRFNegData/',False) #trackNames = [['bs','ucsc'], ['bs','cisred'],['bs','bothDBs']] #regions = UserBinSource('file','/usit/titan/u1/geirksa/_data/IRF_200bpWins.bed') #TrackExtractor.extractManyToOneDir(trackNames, regions, '/usit/titan/u1/geirksa/_allergi/IRF200bpStaticPredsOneDs/',True,True) #TrackExtractor.extractManyToRegionDirs(trackNames, regions, '/usit/titan/u1/geirksa/_allergi/IRF200bpStaticPredsManyDs/',True,True) #trackNames = [['genes','refseq']] #regions = UserBinSource('file','/usit/titan/u1/geirksa/_data/ets1.bed') #TrackExtractor.extractManyToOneDir(trackNames, regions, '/usit/titan/u1/geirksa/_allergi/ETS1/',True,True) #trackName = ['Phenotype and Disease Associations','HPV specific','1kb up USCS exons'] #regions = UserBinSource('file','/data1/rrresearch/standardizedTracks/hg18/Phenotype and Disease Associations/Virus integration/HPV/.hpv.bed') #TrackExtractor.extractOneTrackManyRegsToOneFile(trackName, regions, '/usit/titan/u1/geirksa/_output/ExonsCloseToHPV.bed', True, True) trackName = ['Genes and gene subsets', 'Genes', 'CCDS'] regions = UserBinSource( 'file', '/usit/invitro/hyperbrowser/standardizedTracks/hg18/Phenotype and disease associations/Assorted experiments/Virus integration, Derse et al. (2007)/MLV/MLV.bed' ) TrackExtractor.extractOneTrackManyToRegionFilesInOneZipFile( trackName, regions, '/norstore_osl/hyperbrowser/nosync/nobackup/test/HPV_CCDS.zip', globalCoords=True, asOriginal=True, allowOverlaps=False, ignoreEmpty=True)
def execute(cls, choices, galaxyFn=None, username=''): #cls._setDebugModeIfSelected(choices) # from config.DebugConfig import DebugConfig # from config.DebugConfig import DebugModes # DebugConfig.changeMode(DebugModes.RAISE_HIDDEN_EXCEPTIONS_NO_VERBOSE) # DebugUtil.insertBreakPoint(5678, suspend=False) choices_gsuite = choices.gsuite selected_metadata = choices.cat choices_queryTrack = choices.query #genome = 'hg19' genome = choices.genome queryTS = factory.getSingleTrackTS(genome, choices_queryTrack) refTS = factory.getFlatTracksTS(genome, choices_gsuite) categoricalTS = refTS.getSplittedByCategoryTS(selected_metadata) fullTS = TrackStructureV2() fullTS['query'] = queryTS fullTS['reference'] = categoricalTS spec = AnalysisSpec(SummarizedInteractionPerTsCatV2Stat) parameter = 'minLqMedUqMax' spec.addParameter('pairwiseStatistic', ObservedVsExpectedStat.__name__) spec.addParameter('summaryFunc', parameter) bins = UserBinSource('chr1', '*', genome=genome) res = doAnalysis(spec, bins, fullTS) tsRes = res.getGlobalResult()['Result'] htmlCore = HtmlCore() htmlCore.begin() if parameter == 'minAndMax': htmlCore.tableHeader(['Track', 'min-max'], sortable=False, tableId='tab1') for k, it in tsRes.iteritems(): htmlCore.tableLine([ k, str("%.2f" % it.getResult()[0]) + '-' + str("%.2f" % it.getResult()[1]) ]) htmlCore.tableFooter() if parameter == 'minLqMedUqMax': dataList = [] categories = [] for keyE, itE in tsRes.iteritems(): categories.append(keyE) dataList.append(list(itE.getResult())) from quick.webtools.restricted.visualization.visualizationGraphs import \ visualizationGraphs vg = visualizationGraphs() res = vg.drawBoxPlotChart(dataList, categories=categories, seriesName=selected_metadata) htmlCore.line(res) htmlCore.end() print htmlCore
def execute(cls, choices, galaxyFn=None, username=''): ''' Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' import gold.gsuite.GSuiteComposer as GSuiteComposer from gold.gsuite.GSuite import GSuite from gold.gsuite.GSuiteTrack import GSuiteTrack, HbGSuiteTrack from gold.origdata.TrackGenomeElementSource import TrackViewListGenomeElementSource from gold.origdata.FileFormatComposer import getComposerClsFromFileSuffix from quick.multitrack.MultiTrackCommon import getGSuiteFromGalaxyTN from quick.application.ExternalTrackManager import ExternalTrackManager from quick.application.GalaxyInterface import GalaxyInterface from quick.application.UserBinSource import UserBinSource from quick.extra.TrackExtractor import TrackExtractor genome = choices.genome gSuite = getGSuiteFromGalaxyTN(choices.gSuite) if choices.withOverlaps == cls.NO_OVERLAPS: if choices.trackSource == cls.FROM_HISTORY_TEXT: filterTrackName = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, choices.trackHistory) else: filterTrackName = choices.track.split(':') else: if choices.trackSource == cls.FROM_HISTORY_TEXT: regSpec = ExternalTrackManager.extractFileSuffixFromGalaxyTN(choices.trackHistory) binSpec = ExternalTrackManager.extractFnFromGalaxyTN(choices.trackHistory) else: regSpec = 'track' binSpec = choices.track userBinSource = UserBinSource(regSpec, binSpec, genome) desc = cls.OUTPUT_GSUITE_DESCRIPTION emptyFn = cls.extraGalaxyFn \ [getGSuiteHistoryOutputName('nointersect', description=desc, datasetInfo=choices.gSuite)] primaryFn = cls.extraGalaxyFn \ [getGSuiteHistoryOutputName('primary', description=desc, datasetInfo=choices.gSuite)] errorFn = cls.extraGalaxyFn \ [getGSuiteHistoryOutputName('nopreprocessed', description=desc, datasetInfo=choices.gSuite)] preprocessedFn = cls.extraGalaxyFn \ [getGSuiteHistoryOutputName('preprocessed', description=desc, datasetInfo=choices.gSuite)] hiddenStorageFn = cls.extraGalaxyFn \ [getGSuiteHistoryOutputName('storage', description=desc, datasetInfo=choices.gSuite)] analysisDef = '-> TrackIntersectionStat' # analysisDef = '-> TrackIntersectionWithValStat' numTracks = gSuite.numTracks() progressViewer = ProgressViewer([(cls.PROGRESS_INTERSECT_MSG, numTracks), (cls.PROGRESS_PREPROCESS_MSG, numTracks)], galaxyFn) emptyGSuite = GSuite() primaryGSuite = GSuite() for track in gSuite.allTracks(): newSuffix = cls.OUTPUT_TRACKS_SUFFIX extraFileName = os.path.sep.join(track.trackName) extraFileName = changeSuffixIfPresent(extraFileName, newSuffix=newSuffix) title = getTitleWithSuffixReplaced(track.title, newSuffix) primaryTrackUri = GalaxyGSuiteTrack.generateURI( galaxyFn=hiddenStorageFn, extraFileName=extraFileName, suffix=newSuffix if not extraFileName.endswith(newSuffix) else '') primaryTrack = GSuiteTrack(primaryTrackUri, title=title, genome=track.genome, attributes=track.attributes) if choices.withOverlaps == cls.NO_OVERLAPS: res = GalaxyInterface.runManual([track.trackName, filterTrackName], analysisDef, '*', '*', genome=genome, galaxyFn=galaxyFn, username=username) trackViewList = [res[key]['Result'] for key in sorted(res.keys())] tvGeSource = TrackViewListGenomeElementSource(genome, trackViewList) composerCls = getComposerClsFromFileSuffix(cls.OUTPUT_TRACKS_SUFFIX) composerCls(tvGeSource).composeToFile(primaryTrack.path) else: TrackExtractor.extractOneTrackManyRegsToOneFile( \ track.trackName, userBinSource, primaryTrack.path, fileFormatName=cls.OUTPUT_TRACKS_SUFFIX, \ globalCoords=True, asOriginal=False, allowOverlaps=True) # Temporary hack until better solution for empty result tracks have been implemented from gold.origdata.GenomeElementSource import GenomeElementSource geSource = GenomeElementSource(primaryTrack.path, genome=genome, suffix=cls.OUTPUT_TRACKS_SUFFIX) try: geSource.parseFirstDataLine() primaryGSuite.addTrack(primaryTrack) except Exception, e: # Most likely empty file primaryTrack.comment = e.message emptyGSuite.addTrack(primaryTrack) numTracks -= 1 progressViewer.updateProgressObjectElementCount( cls.PROGRESS_PREPROCESS_MSG, numTracks) # progressViewer.update()
def execute(cls, choices, galaxyFn=None, username=''): shelveDict = { 'track1': choices[3] if choices[3] != cls.NO_TRACK_SHORTNAME else None } shelveDict['track2'] = choices[ 5] if choices[5] != cls.NO_TRACK_SHORTNAME else None print len(choices) print cls._extraParams for i in range(len(cls._extraParams)): index = i * 2 + cls.FIRST_EXTRA_PARAM_BOX_NUMBER + 1 shelveDict[index] = choices[index].strip() DebugInfoShelve = safeshelve.open(cls.SHELVE_FN) DebugInfoShelve[choices[0]] = shelveDict DebugInfoShelve.close() try: from gold.application.LogSetup import setupDebugModeAndLogging setupDebugModeAndLogging() print 'Getting Unsplittable statClass' statClassName = choices[0] #statClass = STAT_CLASS_DICT[statClassName] #try: print 'Preparing arguments to init' unsplittableStatClass = MagicStatFactory._getClass( statClassName, 'Unsplittable') genome = choices[1] from gold.track.Track import PlainTrack prefixTN1 = cls.STD_PREFIX_TN if choices[2] == 'yes' else [] tn1 = prefixTN1 + choices[3].split(':') track1 = PlainTrack( tn1) if choices[3] != cls.NO_TRACK_SHORTNAME else None prefixTN2 = cls.STD_PREFIX_TN if choices[4] == 'yes' else [] tn2 = prefixTN2 + choices[5].split(':') track2 = PlainTrack( tn2) if choices[5] != cls.NO_TRACK_SHORTNAME else None from gold.track.GenomeRegion import GenomeRegion #region = GenomeRegion(genome, 'chr1',1000,2000) #region2 = GenomeRegion(genome, 'chr1',5000,6000) kwArgs = {} regVal = choices[cls.FIRST_EXTRA_PARAM_BOX_NUMBER + 1] binSpecVal = choices[cls.FIRST_EXTRA_PARAM_BOX_NUMBER + 3] ubSource = UserBinSource(regVal, binSpecVal, genome=genome) region = list(ubSource)[0] if len(cls._extraParams) > 3: for i in range(len(cls._extraParams)): paramName = choices[i * 2 + cls.FIRST_EXTRA_PARAM_BOX_NUMBER] param = paramName[:paramName.find('(')].strip() val = choices[i * 2 + cls.FIRST_EXTRA_PARAM_BOX_NUMBER + 1].strip() if val != '': kwArgs[param] = val shelveDict[i * 2 + cls.FIRST_EXTRA_PARAM_BOX_NUMBER + 1] = val print 'Calling __init__' # statObj = unsplittableStatClass(region, track1, track2, **kwArgs) print 'Calling createChildren' statObj.createChildren() print 'Calling getResult' statObj.getResult() #except: # raise #print 'Preparing arguments to init' #genome = 'hg18' #prefixTN = ['DNA structure'] if choices[2] == 'yes' else [] #from gold.track.Track import PlainTrack #tn1 = prefixTN + choices[3].split(':') #track1 = PlainTrack(tn1) #tn2 = prefixTN + choices[5].split(':') #track2 = PlainTrack(tn2) #from gold.track.GenomeRegion import GenomeRegion ##region = GenomeRegion(genome, 'chr1',1000,2000) ##region2 = GenomeRegion(genome, 'chr1',5000,6000) # #kwArgs = {} #regVal = choices[cls.FIRST_EXTRA_PARAM_BOX_NUMBER+1] #binSpecVal = choices[cls.FIRST_EXTRA_PARAM_BOX_NUMBER+3] #ubSource = UserBinSource(regVal, binSpecVal, genome=choices[1]) #region = list(UserBinSource)[0] # #if len(cls._extraParams)>2: # for i in range(2,len(cls._extraParams)): # paramName = choices[i*2+cls.FIRST_EXTRA_PARAM_BOX_NUMBER] # param = paramName[:paramName.find('(')].strip() # val = choices[i*2+cls.FIRST_EXTRA_PARAM_BOX_NUMBER+1].strip() # if val !='': # kwArgs[param] = val # shelveDict[i*2+cls.FIRST_EXTRA_PARAM_BOX_NUMBER+1] = val # # ##extraParams += [v.strip() for v in choices.kwArgs.split(',')] if choices.kwArgs.strip() != '' else [] ##args = [region, track1, track2] # #print 'Calling __init__' ## #statObj = unsplittableStatClass(region, track1, track2, **kwArgs) # #print 'Calling createChildren' #statObj.createChildren() # #print 'Calling getResult' #statObj.getResult() print 'Running StatJob' magicStatClass = STAT_CLASS_DICT[statClassName] #res = StatJob([region,region2],track1,track2,magicStatClass,**kwArgs).run() res = StatJob(ubSource, track1, track2, magicStatClass, **kwArgs).run() from quick.application.GalaxyInterface import GalaxyInterface GalaxyInterface._viewResults([res], galaxyFn) except Exception, e: print 'Error: ', e raise