def getBoundingRegionInfo(self, region):
        self._updateContentsIfNecessary(region.chr)

        if region.chr in self._contents:
            brInfoHolder = self._contents[region.chr]

            #Temporary, to store old preprocessed boundingRegion.shelve files
            isDict = isinstance(brInfoHolder, dict)
            if isDict:
                brStarts = brInfoHolder.keys()
            else:
                brStarts = brInfoHolder.brStarts

            #idx = self._contents[region.chr].keys().bisect_right(region.start)
            idx = bisect_right(brStarts, region.start)

            if idx > 0:
                if isDict:
                    brInfo = brInfoHolder[brStarts[idx - 1]]
                else:
                    brInfo = brInfoHolder.brInfos[idx - 1]

                if region.start < brInfo.end and region.end <= brInfo.end:
                    return brInfo

            if not self._minimalRegion == region:
                #
                #There are bounding regions in the same chromosome, but not any encompassing the user bin
                #Thus the bounding regions are explicitly defined (not just the complete chromosome)
                #
                from gold.util.CommonFunctions import prettyPrintTrackName
                raise OutsideBoundingRegionError("The analysis region '%s' is outside the bounding regions of track: %s" \
                                                 % (region, prettyPrintTrackName(self._trackName)))

        return BoundingRegionInfo(region.start, region.end, 0, 0, 0, 0)
 def getBoundingRegionInfo(self, region):
     self._updateContentsIfNecessary(region.chr)
     
     if region.chr in self._contents:
         brInfoHolder = self._contents[region.chr]
         
         #Temporary, to store old preprocessed boundingRegion.shelve files
         isDict = isinstance(brInfoHolder, dict)
         if isDict:
             brStarts = brInfoHolder.keys()
         else:
             brStarts = brInfoHolder.brStarts
             
         #idx = self._contents[region.chr].keys().bisect_right(region.start)
         idx = bisect_right(brStarts, region.start)
         
         if idx > 0:
             if isDict:
                 brInfo = brInfoHolder[brStarts[idx-1]]
             else:
                 brInfo = brInfoHolder.brInfos[idx-1]
             
             if region.start < brInfo.end and region.end <= brInfo.end:
                 return brInfo
                 
         if not self._minimalRegion == region:
             #
             #There are bounding regions in the same chromosome, but not any encompassing the user bin
             #Thus the bounding regions are explicitly defined (not just the complete chromosome)
             #
             from gold.util.CommonFunctions import prettyPrintTrackName
             raise OutsideBoundingRegionError("The analysis region '%s' is outside the bounding regions of track: %s" \
                                              % (region, prettyPrintTrackName(self._trackName)))
     
     return BoundingRegionInfo(region.start, region.end, 0, 0, 0, 0)
Exemple #3
0
    def getTrackView(self, region):
        allowOverlaps = self._trackFormatReq.allowOverlaps()
        borderHandling = self._trackFormatReq.borderHandling()
        assert (allowOverlaps is not None)
        assert (borderHandling is not None)

        origTrackView = self._getRawTrackView(region, borderHandling,
                                              allowOverlaps)

        if self.formatConverters is None:
            self.formatConverters = getFormatConverters(
                origTrackView.trackFormat, self._trackFormatReq)

        if self.formatConverters == []:
            raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\
                                          + str(origTrackView.trackFormat) +
                                          ('(' + origTrackView.trackFormat._val + ')' if origTrackView.trackFormat._val else '') + \
                                          ' does not satisfy ' + str(self._trackFormatReq))

        if not self.formatConverters[0].canHandle(origTrackView.trackFormat,
                                                  self._trackFormatReq):
            raise IncompatibleTracksError(getClassName(self.formatConverters[0]) +\
                                          ' does not support conversion from ' + str(origTrackView.trackFormat) + \
                                          ' to ' + str(self._trackFormatReq))
        return self.formatConverters[0].convert(origTrackView)
    def getAllBoundingRegions(self):
        if not self.fileExists():
            from gold.util.CommonFunctions import prettyPrintTrackName
            raise BoundingRegionsNotAvailableError('Bounding regions not available for track: ' + \
                prettyPrintTrackName(self._trackName))

        for chr in GenomeInfo.getExtendedChrList(self._genome):
            for reg in self.getAllBoundingRegionsForChr(chr):
                yield reg
Exemple #5
0
    def _getRawTrackView(self, region, borderHandling, allowOverlaps):
        assert len(region) == 1

        from collections import OrderedDict
        from gold.track.CommonMemmapFunctions import findEmptyVal
        from gold.track.TrackView import TrackView
        import numpy as np

        geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN(self.trackName, region.genome)
        prefixList = geSource.getPrefixList()
        valDataType = geSource.getValDataType()
        valDim = geSource.getValDim()
        weightDataType = geSource.getEdgeWeightDataType()
        weightDim = geSource.getEdgeWeightDim()

        startList, endList, valList, strandList, idList, edgesList, weightsList = [None]*7
        extraLists=OrderedDict()

        tf = TrackFormat.createInstanceFromPrefixList(prefixList, valDataType, valDim, \
                                                      weightDataType, weightDim)
        if allowOverlaps and (tf.isDense() or geSource.hasNoOverlappingElements()):
            raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\
                                          + str(tf) + ' does not satisfy ' + str(self._trackFormatReq))

        denseAndInterval = tf.isDense() and tf.isInterval()
        numEls = 2 if denseAndInterval else 1

        if valDataType == 'S':
            valDataType = 'S2'
        if weightDataType == 'S':
            weightDataType = 'S2'

        for prefix in prefixList:
            if prefix == 'start':
                startList = np.array([-1], dtype='int32')
            elif prefix == 'end':
                if denseAndInterval:
                    endList = np.array([0, 1], dtype='int32')
                else:
                    endList = np.array([0], dtype='int32')
            elif prefix == 'val':
                valList = np.array([findEmptyVal(valDataType)] * valDim * numEls, \
                                   dtype=valDataType).reshape((numEls, valDim) if valDim > 1 else numEls)
            elif prefix == 'strand':
                strandList = np.array([1] * numEls, dtype='int8')
            elif prefix == 'id':
                idList = np.array([''] * numEls, dtype='S1')
            elif prefix == 'edges':
                edgesList = np.array([['']] * numEls, dtype='S1')
            elif prefix == 'weights':
                weightsList = np.array([[[findEmptyVal(weightDataType)]]] * weightDim * numEls, \
                                       dtype=weightDataType).reshape((numEls, 1, weightDim) if weightDim > 1 else (numEls, 1))
            else:
                extraLists[prefix] = np.array([''] * numEls, dtype='S1')

        return TrackView(region, startList, endList, valList, strandList, idList, edgesList, weightsList, borderHandling, allowOverlaps, extraLists)
Exemple #6
0
    def _getRawTrackView(self, region, borderHandling, allowOverlaps):
        assert region.start == 0 and region.end == 1
        
        from collections import OrderedDict
        from gold.track.CommonMemmapFunctions import findEmptyVal
        from gold.track.TrackView import TrackView
        import numpy as np
        
        geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN(self.trackName, region.genome)
        prefixList = geSource.getPrefixList()
        valDataType = geSource.getValDataType()
        valDim = geSource.getValDim()
        weightDataType = geSource.getEdgeWeightDataType()
        weightDim = geSource.getEdgeWeightDim()

        startList, endList, valList, strandList, idList, edgesList, weightsList = [None]*7
        extraLists=OrderedDict()
        
        tf = TrackFormat.createInstanceFromPrefixList(prefixList, valDataType, valDim, \
                                                      weightDataType, weightDim)
        if allowOverlaps and (tf.isDense() or geSource.hasNoOverlappingElements()):
            raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\
                                          + str(tf) + ' does not satisfy ' + str(self._trackFormatReq))
        
        denseAndInterval = tf.isDense() and tf.isInterval()
        numEls = 2 if denseAndInterval else 1
        
        if valDataType == 'S':
            valDataType = 'S2'
        if weightDataType == 'S':
            weightDataType = 'S2'
        
        for prefix in prefixList:
            if prefix == 'start':
                startList = np.array([-1], dtype='int32')
            elif prefix == 'end':
                if denseAndInterval:
                    endList = np.array([0, 1], dtype='int32')
                else:
                    endList = np.array([0], dtype='int32')
            elif prefix == 'val':
                valList = np.array([findEmptyVal(valDataType)] * valDim * numEls, \
                                   dtype=valDataType).reshape((numEls, valDim) if valDim > 1 else numEls)
            elif prefix == 'strand':
                strandList = np.array([1] * numEls, dtype='int8')
            elif prefix == 'id':
                idList = np.array([''] * numEls, dtype='S1')
            elif prefix == 'edges':
                edgesList = np.array([['']] * numEls, dtype='S1')
            elif prefix == 'weights':
                weightsList = np.array([[[findEmptyVal(weightDataType)]]] * weightDim * numEls, \
                                       dtype=weightDataType).reshape((numEls, 1, weightDim) if weightDim > 1 else (numEls, 1))
            else:
                extraLists[prefix] = np.array([''] * numEls, dtype='S1')
        
        return TrackView(region, startList, endList, valList, strandList, idList, edgesList, weightsList, borderHandling, allowOverlaps, extraLists)
 def _createChildren(self):
     rawStat = self.getRawStatisticClass( self._kwArgs['rawStatistic'] )
     self._childDict = {}
     
     t = self._collectTracks()
     for i in range(len(t)):
         for j in range(i+1,len(t)):
             from gold.util.CommonFunctions import prettyPrintTrackName                
             resKey = ' vs '.join([prettyPrintTrackName(track.trackName, shortVersion=True) for track in [t[i],t[j]] ])
             self._childDict[resKey] = self._addChild( rawStat(self._region, t[i], t[j], self._getTrackFormatReq() ) )
 def _getBoundingRegionShelve(self, trackName):
     if trackName in [None, []] or ExternalTrackManager.isVirtualTrack(trackName):
         brShelve = None
     else:
         brShelve = BoundingRegionShelve(self.genome, trackName, allowOverlaps=False)
         if not brShelve.fileExists():
             raise BoundingRegionsNotAvailableError('Bounding regions not available for track: ' + \
                 prettyPrintTrackName(trackName))
     
     return brShelve
    def _determineStatClass(self):
        assert( hasattr(self, '_track') )
        assert( hasattr(self, '_track2') )
        dummyGESource = MinimalBinSource(self._genome)

        if len(self._statClassList)==0:
            #logging.getLogger(HB_LOGGER).warning('Stat class list is empty, for analysisDef: ' + self._analysisLine)
            logMessage('Stat class list is empty, for analysisDef: ' + self._analysisLine, level = logging.WARNING)
            if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                raise ShouldNotOccurError('Stat class list is empty. Analysisdef: '+self._analysisLine)
        
        for statClass in self._statClassList:
            if DebugConfig.VERBOSE:
                logMessage(statClass.__name__ + ': Trying (' + self.getDefAfterChoices() + ')')
#                print statClass.__name__ + ': Trying (' + self.getDefAfterChoices() + ')'

            #for reversed, trackA, trackB in [(False, self._track, self._track2), (True, self._track2, self._track) ]:
            for trackA, trackB in [[self._track, self._track2]]:
                if trackA == None:
                    continue

                try:
                    StatJob(dummyGESource, trackA, trackB, statClass, minimal=True, **self.getChoices()).run(False)
                    #In order not to mess up integration tests
                    initSeed()
                    for track in [trackA, trackB]:
                        if track is not None and track.formatConverters is None:
                            raise IncompatibleTracksError('Track ' + prettyPrintTrackName(track.trackName) +\
                                                          'was created, but not touched by statistic')
                    
                except IncompatibleTracksError, e:
                    if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                        raise
                    if DebugConfig.VERBOSE:
                        logException(e, message='(Warning: error in _determineStatClass for stat: %s)' % statClass.__name__)
                    #if VERBOSE:
                    #    print 'Incompatible tracks: ', \
                    #          statClass.__name__ + ': ' + e.__class__.__name__ + ': ' + str(e)
                    #    print 'Incompatible: ', e
                except (AssertionError, IncompatibleAssumptionsError, IdenticalTrackNamesError), e:
                    if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                        raise
                    if DebugConfig.VERBOSE:
                        logException(e, message='(Warning: error in _determineStatClass for stat: %s)' % statClass.__name__)
                    #if VERBOSE:
                    #    print 'Warning: exception in getStat: ', \
                    #        statClass.__name__ + ': ' + e.__class__.__name__ + ': ' + str(e)
                    #    traceback.print_exc(file=sys.stdout)
                except OSError, e:
                    if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                        raise
                    elif not 'withOverlaps' in str(e):
                        raise
    def _getBoundingRegionShelve(self, trackName):
        if trackName in [None, []
                         ] or ExternalTrackManager.isVirtualTrack(trackName):
            brShelve = None
        else:
            brShelve = BoundingRegionShelve(self.genome,
                                            trackName,
                                            allowOverlaps=False)
            if not brShelve.fileExists():
                raise BoundingRegionsNotAvailableError('Bounding regions not available for track: ' + \
                    prettyPrintTrackName(trackName))

        return brShelve
Exemple #11
0
 def _appendConverterOptions(self, track, labelKey):
     if track is None:
         return
     
     if self.getChoice(labelKey) is not None:
         assert(self.getChoice(labelKey) == getClassName(track.formatConverters[0]))
         return
     
     labelPair = (labelKey, '_Treat ' + prettyPrintTrackName(track.trackName) + ' as')
     choicePairs = [ ( getClassName(fc), fc.getOutputDescription(TrackInfo(self._genome, track.trackName).trackFormatName) ) \
                     for fc in track.formatConverters ]
     
     text = '[' + ':'.join(labelPair) + '=' + '/'.join( [':'.join(x) for x in choicePairs] ) + ']'
     self._analysisParts.append(AnalysisOption( text ))
Exemple #12
0
    def _createChildren(self):
        rawStat = self.getRawStatisticClass(self._kwArgs['rawStatistic'])
        self._childDict = {}

        t = self._tracks
        for i in range(len(t)):
            for j in range(i + 1, len(t)):
                from gold.util.CommonFunctions import prettyPrintTrackName
                resKey = ' vs '.join([
                    prettyPrintTrackName(track.trackName, shortVersion=True)
                    for track in [t[i], t[j]]
                ])
                self._childDict[resKey] = self._addChild(
                    rawStat(self._region, t[i], t[j],
                            self._getTrackFormatReq()))
Exemple #13
0
    def _appendConverterOptions(self, track, labelKey):
        if track is None:
            return

        if track.formatConverters is None:
            # May happen in the second track object if one analyses a track versus itself
            return
        
        if self.getChoice(labelKey) is not None:
            assert(self.getChoice(labelKey) == getClassName(track.formatConverters[0]))
            return
        
        labelPair = (labelKey, '_Treat ' + prettyPrintTrackName(track.trackName) + ' as')
        choicePairs = [ ( getClassName(fc), fc.getOutputDescription(TrackInfo(self._genome, track.trackName).trackFormatName) ) \
                        for fc in track.formatConverters ]
        
        text = '[' + ':'.join(labelPair) + '=' + '/'.join( [':'.join(x) for x in choicePairs] ) + ']'
        self._analysisParts.append(AnalysisOption( text ))
Exemple #14
0
 def getTrackView(self, region):
     allowOverlaps = self._trackFormatReq.allowOverlaps()
     borderHandling = self._trackFormatReq.borderHandling()
     assert(allowOverlaps is not None) 
     assert(borderHandling is not None) 
     
     origTrackView = self._getRawTrackView(region, borderHandling, allowOverlaps)
     
     if self.formatConverters is None:
         self.formatConverters = getFormatConverters(origTrackView.trackFormat, self._trackFormatReq)
     
     if self.formatConverters == []:
         raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\
                                       + str(origTrackView.trackFormat) +
                                       ('(' + origTrackView.trackFormat._val + ')' if origTrackView.trackFormat._val else '') + \
                                       ' does not satisfy ' + str(self._trackFormatReq))
     
     if not self.formatConverters[0].canHandle(origTrackView.trackFormat, self._trackFormatReq):
         raise IncompatibleTracksError(getClassName(self.formatConverters[0]) +\
                                       ' does not support conversion from ' + str(origTrackView.trackFormat) + \
                                       ' to ' + str(self._trackFormatReq))
     return self.formatConverters[0].convert(origTrackView)
def getSubtracksAsGSuite(genome, parentTrack, username=''):
    from gold.description.TrackInfo import TrackInfo
    from quick.application.GalaxyInterface import GalaxyInterface
    from quick.application.ProcTrackNameSource import ProcTrackNameSource

    fullAccess = GalaxyInterface.userHasFullAccess(username)
    procTrackNameSource = ProcTrackNameSource(genome,
                                              fullAccess=fullAccess,
                                              includeParentTrack=False)

    gSuite = GSuite()
    for trackName in procTrackNameSource.yielder(parentTrack):
        trackType = TrackInfo(genome, trackName).trackFormatName.lower()
        trackType = cleanUpTrackType(trackType)
        uri = HbGSuiteTrack.generateURI(trackName=trackName)
        title = prettyPrintTrackName(trackName)
        if title.startswith("'") and title.endswith("'") and len(title) > 1:
            title = title[1:-1]
        gSuite.addTrack(
            GSuiteTrack(uri, title=title, trackType=trackType, genome=genome))

    return gSuite
    def execute(choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results page
        in Galaxy history. If getOutputFormat is anything else than HTML, the
        output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''
        genome = choices.genome
        targetTrack = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(
            genome,
            choices.targetTrack,
            printErrors=False,
            printProgress=False)

        refGSuite = getGSuiteFromGalaxyTN(choices.refTrackCollection)

        regSpec, binSpec = UserBinMixin.getRegsAndBinsSpec(choices)

        analysisBins = GalaxyInterface._getUserBinSource(regSpec,
                                                         binSpec,
                                                         genome=genome)

        results = TrackReportCommon.getOverlapResultsForTrackVsCollection(
            genome, targetTrack, refGSuite, analysisBins=analysisBins)
        processedResults = TrackReportCommon.processRawResults(results)

        targetTrackTitle = prettyPrintTrackName(targetTrack)
        title = 'Screening of track ' + targetTrackTitle
        sortedProcessedResultsTupleList = sorted(
            processedResults.iteritems(),
            key=lambda x: x[1][STAT_LIST_INDEX[STAT_FACTOR_OBSERVED_VS_EXPECTED
                                               ]],
            reverse=True)
        refTrackNames = [x[0] for x in sortedProcessedResultsTupleList]
        refTrackNames = [
            x.replace('\'', '').replace('"', '') for x in refTrackNames
        ]
        plotData = [x[1] for x in sortedProcessedResultsTupleList]
        #         plotData = zip(*plotData) #invert
        plotData = normalizeMatrixData(plotData)

        printVals = tuple([str(targetTrackTitle)]) + tuple(
            [str(x[0]) for x in sortedProcessedResultsTupleList[0:3]])

        htmlCore = HtmlCore()
        htmlCore.begin()
        htmlCore.header(title)

        if choices.bmQid and choices.bmQid not in ['None']:
            htmlCore.append(
                str(
                    quick.gsuite.GSuiteHbIntegration.
                    getAnalysisQuestionInfoHtml(choices.bmQid)))

        htmlCore.divBegin('resultsDiv')
        htmlCore.paragraph('''
            The query track <b>%s</b> overlaps most strongly (is most highly enriched) with the tracks <b>%s</b>, <b>%s</b> and <b>%s</b> 
            from the selected collection. See below for a full (ranked) table of overlap and enrichment.
        ''' % printVals)

        htmlCore.paragraph('''
        The coverage of the query track is %s bps.
        ''' % strWithNatLangFormatting(
            TrackReportCommon.getQueryTrackCoverageFromRawOverlapResults(
                results)))

        htmlCore.tableHeader(TrackReportCommon.HEADER_ROW,
                             sortable=True,
                             tableId='resultsTable')
        for refTrackName, refTrackResults in sortedProcessedResultsTupleList:
            line = [refTrackName
                    ] + [strWithNatLangFormatting(x) for x in refTrackResults]
            htmlCore.tableLine(line)
        htmlCore.tableFooter()
        htmlCore.divEnd()
        '''

        addColumnPlotToHtmlCore(htmlCore, 
                                refTrackNames,  
                                TrackReportCommon.HEADER_ROW[1:], 
                                'stat', 'Results plot (data is normalized for better visual comparison) ', 
                                plotData, xAxisRotation = 315)
        '''
        addPlotToHtmlCore(
            htmlCore,
            refTrackNames,
            TrackReportCommon.HEADER_ROW[1:],
            'stat',
            'Results plot (data is normalized for better visual comparison) ',
            plotData,
            xAxisRotation=315)

        htmlCore.hideToggle(styleClass='debug')
        htmlCore.end()

        print htmlCore
    def execute(cls, choices, galaxyFn=None, username=''):
        '''Is called when execute-button is pushed by web-user.
        Should print output as HTML to standard out, which will be directed to a results page in Galaxy history.
        If needed, can call _getStaticPath to get a path where additional files can be put (e.g. generated image files)
        choices is a list of selections made by web-user in each options box.
        '''

        #print 'choices: ', choices
        genome = choices.genome
        queryTrackStr = choices.history if choices.trackSource == 'history' else choices.track
        queryTrack = queryTrackStr.split(':')

        geneSource = choices.geneSource
        upFlankSize = downFlankSize = int(choices.flank)

        print GalaxyInterface.getHtmlBeginForRuns(galaxyFn)
        print GalaxyInterface.getHtmlForToggles(withRunDescription=False)

        geneIntersection = GeneIntersection(genome, geneSource, queryTrack,
                                            galaxyFn)
        geneIntersection.expandReferenceTrack(upFlankSize, downFlankSize)

        expansionStr = ', after expansion,' if not (
            upFlankSize == downFlankSize == 0) else ''
        print '<p>There are %i %s-genes that%s intersect elements from your query track (%s).</p>' % \
            (geneIntersection.getNumberOfIntersectedBins(), geneSource, expansionStr, prettyPrintTrackName(queryTrack))

        idFileNamer = geneIntersection.getGeneIdStaticFileWithContent()
        print '<p>', idFileNamer.getLoadToHistoryLink(
            'Load to history the list',
            'txt'), ' of all %s IDs intersecting query track.</p>' % (
                geneSource)

        regFileNamer = geneIntersection.getIntersectedRegionsStaticFileWithContent(
        )
        print '<p>', regFileNamer.getLoadToHistoryLink(
            'Load to history a BED file',
            'bed'), ' of all %s gene regions intersecting query track.</p>' % (
                geneSource)

        print GalaxyInterface.getHtmlEndForRuns()
                if self.PASS_ON_EXCEPTIONS:
                    raise
                else:
                    self._printExceptionMsg(e, trackName, Error=False)
            except Exception, e:
                TrackInfoDataCollector(self._genome, trackName).removeEntryFromShelve()
                if self.PASS_ON_EXCEPTIONS:
                    raise
                else:
                    self._printExceptionMsg(e, trackName, Error=True)                
                
            self._calcAndStoreSubTrackCount(trackName)
            
        if self._raiseIfAnyWarnings and len(self._warningTrackNames) > 0:
            raise Warning('Warnings occurred in the following tracks: ' + \
                          ', '.join(prettyPrintTrackName(tn) for tn in self._warningTrackNames))
        return atLeastOneFinalized

    def _allTrackNames(self):
        raise AbstractClassError
    
    def _allGESources(self, trackName):
        raise AbstractClassError
        
    def _decorateGESource(self, geSource):
        return GEDependentAttributesHolder(geSource)
        
    def _getGESourceManager(self, rawGESource):
        return StdGESourceManager(rawGESource)
        
    def _shouldPreProcess(self):
Exemple #19
0
    def _determineStatClass(self):
        assert( hasattr(self, '_track') )
        assert( hasattr(self, '_track2') )
        dummyGESource = MinimalBinSource(self._genome)

        if len(self._statClassList)==0:
            #logging.getLogger(HB_LOGGER).warning('Stat class list is empty, for analysisDef: ' + self._analysisLine)
            if self._reversed:
                logMessage('Stat class list is empty, for analysisDef: ' + self._analysisLine, level = logging.WARNING)
            if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                raise ShouldNotOccurError('Stat class list is empty. Analysisdef: '+self._analysisLine)
        
        for statClass in self._statClassList:
            if DebugConfig.VERBOSE:
                logMessage(statClass.__name__ + ': Trying (' + self.getDefAfterChoices() + ')')
#                print statClass.__name__ + ': Trying (' + self.getDefAfterChoices() + ')'

            #for reversed, trackA, trackB in [(False, self._track, self._track2), (True, self._track2, self._track) ]:

            trackA, trackB = self._track, self._track2
            if trackA is None:
                continue

            try:
                # The hackiest of all hacks!
                # TODO: reimplement together with TrackStructure
                job = StatJob(dummyGESource, trackA, trackB, statClass, minimal=True,
                              **self.getChoices(filterByActivation=True))
                stat = job._getSingleResult(dummyGESource[0])[-1]
                tracks = stat._tracks if hasattr(stat, '_tracks') else [trackA, trackB]
                trackUniqueKeys = [Track(tr.trackName).getUniqueKey(self._genome) for tr in tracks
                                   if tr is not None]

                StatJob(dummyGESource, trackA, trackB, statClass, minimal=True,
                        **self.getChoices(filterByActivation=True)).run(False)
                #In order not to mess up integration tests
                initSeed()

                for trackIndex, restTrackIndexes in allElementsVersusRest(xrange(len(tracks))):
                    track = tracks[trackIndex]
                    if track is not None and track.formatConverters is None:
                        uniqueKeyForRestTracks = \
                            set(trackUniqueKeys[i] for i in restTrackIndexes)

                        # If several tracks are the same, memory memoization will only result
                        # in one RawDataStat being created, for one Track object. This is a
                        # wanted optimization. In other cases, something is probably wrong if
                        # a track has not been touched. However, this rule may be revisited
                        # when track structure functionality is implemented.
                        if trackUniqueKeys[trackIndex] not in uniqueKeyForRestTracks:
                            raise IncompatibleTracksError(
                                'Track ' + prettyPrintTrackName(track.trackName) +
                                ' was created, but not touched by statistic')

            except IncompatibleTracksError, e:
                if DebugConfig.VERBOSE:
                    logException(e, level=logging.DEBUG,
                                 message='(Warning: error in _determineStatClass for stat: %s)' % statClass.__name__)
                if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                    raise
            except (AssertionError, IncompatibleAssumptionsError, IdenticalTrackNamesError), e:
                if DebugConfig.VERBOSE:
                    logException(e, level=logging.DEBUG,
                                 message='(Warning: error in _determineStatClass for stat: %s)' % statClass.__name__)
                if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                    raise
                else:
                    self._printExceptionMsg(e, trackName, Error=False)
            except Exception, e:
                collector.removeEntry()
                if DebugConfig.PASS_ON_PREPROCESS_EXCEPTIONS:
                    raise_from(
                        PreprocessError(
                            self._addContextToExceptionMsg(e, trackName)), e)
                else:
                    self._printExceptionMsg(e, trackName, Error=True)

            self._calcAndStoreSubTrackCount(trackName)

        if self._raiseIfAnyWarnings and len(self._warningTrackNames) > 0:
            raise Warning('Warnings occurred in the following tracks: ' + \
                          ', '.join(prettyPrintTrackName(tn) for tn in self._warningTrackNames))
        return atLeastOneFinalized

    def _allTrackNames(self):
        raise AbstractClassError

    def _allGESourceManagers(self, trackName, allowOverlaps):
        trackNameStr = ':'.join(trackName)
        self._status = "Trying to create GESourceManager " \
                       "(trackName: {}, allowOverlaps: {})".format(trackNameStr, allowOverlaps)
        collector = PreProcMetaDataCollector(self._genome, trackName)
        if allowOverlaps == False and collector.overlapRuleHasBeenFinalized(
                True):
            for i in range(1):
                self._status = 'Trying to prepare preprocessing for track "%s"' % trackNameStr + \
                                (' (allowOverlaps: %s)' % allowOverlaps)
Exemple #21
0
 def execute(cls, choices, galaxyFn=None, username=''):
     #val = strVal.split(':')[1].split('k')[0];
     htmlTemplate = '''<html><head>\n\n<link href="http://ajax.googleapis.com/ajax/libs/jqueryui/1.8/themes/base/jquery-ui.css" rel="stylesheet" type="text/css"/>\n  <script src="http://ajax.googleapis.com/ajax/libs/jquery/1.5/jquery.min.js"></script>\n  <script src="http://ajax.googleapis.com/ajax/libs/jqueryui/1.8/jquery-ui.min.js"></script>\n
     <script type='text/javascript' src='https://www.google.com/jsapi'></script>
     <script type='text/javascript'> 
       google.load("visualization", "1", {packages:["corechart"]});\n google.setOnLoadCallback(drawLine);
       function drawLine(divId) {\n}
   </script>
     <style type="text/css">\n    #slider { margin: 10px; }\n  </style>\n  <script type="text/javascript">\n  jQuery(document).ready(function() {\n    jQuery("#slider").slider({min: 0, value: 370, max: %i });\n  });\n  </script>\n\n\n  <link rel="stylesheet" type="text/css" href="http://hyperbrowser.uio.no/gsuite/static/hyperbrowser/files/kaitre//image_zoom/styles/stylesheet.css" />
                 \n<script language="javascript" type="text/javascript" src="http://hyperbrowser.uio.no/gsuite/static/hyperbrowser/files/kaitre//image_zoom/scripts/mootools-1.2.1-core.js">\n</script><script language="javascript" type="text/javascript" src="http://hyperbrowser.uio.no/gsuite/static/hyperbrowser/files/kaitre//image_zoom/scripts/mootools-1.2-more.js">\n</script><script language="javascript" type="text/javascript" src="http://hyperbrowser.uio.no/gsuite/static/hyperbrowser/files/kaitre//image_zoom/scripts/ImageZoom.js"></script>\n
     \n\n\n\n<script type="text/javascript" >\nliste =%s;\ncounter = 0;\n\n\nfunction point_it2(event){\n
     document.myform.posAnchor.value = "";
     chrom = %s;\n
     trackNames = %s;
     pos_x = event.offsetX?(event.offsetX):event.pageX-document.getElementById("zoomer_image").offsetLeft;\n	pos_y = event.offsetY?(event.offsetY):event.pageY-document.getElementById("zoomer_image").offsetTop;\n        factor = %i;\n        pos_x = Math.floor(pos_x/factor);\n	pos_y = Math.floor(pos_y/factor);\n	counter++;\n
     var strVal = liste[pos_y][pos_x];
     var strTab = strVal.split(",");
     
     
     val = strTab[0];
     streng = chrom+":"+strTab[0]+"k | ";
     for(i=0; i<trackNames.length; i++) { 
         streng = streng + trackNames[i]+': '+strTab[i+1]+'%% | ';
        }
         
     document.myform.posAnchor.value = streng;\n
     jQuery( "#slider" ).slider( "option", "value", val );\n
     
             }\n</script>\n\n\n\n\n</head>
     <body>
     <h2 align="center" style="color:#FF7400;">Heatmap for chromosome %s</h2> 
     <div id="slider" ></div><br>
     \n<form name="myform" action="http://www.mydomain.com/myformhandler.cgi" method="POST">\n<div align="center">\n\n<input type="text" name="posAnchor" size="250" value=".">\n<br>\n</div>\n</form>\n<br>
     <div id="container"><!-- Image zoom start --><div id="zoomer_big_container"></div><div id="zoomer_thumb">\n<a href="%s" target="_blank" >\n<img src="%s" /></a></div><!-- Image zoom end --></div>\n\n\n%s
      
      <br/>%s</body></html>''' # onchange="jQuery('zoomer_region').css({ 'left': '31px', 'top': '15px'});"
     
     tableRowEntryTemplate = """<div class="tabbertab"><h2>%s</h2><a href="%s"><img src="%s" /></a></div>"""
     htmlPageTemplate = """<html><head>\n<script type="text/javascript" src="/gsuite/static/scripts/tabber.js"></script>\n<link href="/gsuite/static/style/tabber.css" rel="stylesheet" type="text/css" />\n
                 </head><body>%s</body></html>"""
     
     #fileDict = dict()
     binsize = parseShortenedSizeSpec(choices[10])
         
     tnList = []
     trackNameList = []
     genome = choices[0]
     chrLength = GenomeInfo.getStdChrLengthDict(genome)
     
     for index in [1,4,7]:
         startTime = time.time()
         if choices[index] in ['-- No track --','',None]:
             tnList.append(None)
             trackNameList.append('.')
             continue
         elif choices[index] == 'history':
             #trackName = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(choices[0], choices[index+1].split(':'))
             trackName = choices[index+1].split(':')
             tnList.append(ExternalTrackManager.extractFnFromGalaxyTN(trackName))
             trackNameList.append(prettyPrintTrackName(trackName))
         else:
             trackName = choices[index+2].split(':')
             track = PlainTrack(trackName)
             regionList = [GenomeRegion(genome, chrom, 0, chrLength[chrom]) for chrom in GenomeInfo.getChrList(genome)]
             tnList.append((track, regionList))    
             trackNameList.append(prettyPrintTrackName(trackName))
             
             
     trackNames = repr([v for v in trackNameList if v!='.'])
     tr1, tr2, tr3 = tnList
     ResultDicts = []
     maxVals = []#list of the maximum coverage in a bin for each track Used for normalization purposes
     chrsWithData = set()# set of chromosomes with any data. No point in generating images with no data...
     microDictList = []
     counter = 0
     for tr,color in [(tr1, (1,0,0)),(tr2, (0,1,0)),(tr3, (0,0,1))]:
         
         maxVal = None
         if tr:
             if len([1 for v in tnList if v]) == 1:
                 color = (1,1,1)
             
             res, microDict, maxVal, trackChrs = cls.getValuesFromBedFile(genome, tr,color, binsize)
             microDictList.append((trackNames[counter],microDict))
             counter+=1
             chrsWithData = chrsWithData|trackChrs
             ResultDicts += [res]   
         maxVals.append(maxVal)
         
     
     htmlTableContent = []
     resultDict = cls.syncResultDict(ResultDicts)
     binfactor = binsize/1000
     for chrom in sorted(list(chrsWithData), cmp=alphanum):
         valList = resultDict[chrom]
         areaList = []
         #For doing recursive pattern picture
         bigFactor = int(10*(binsize/10000.0)**(0.5))
         smallFactor = bigFactor/3
         posMatrix = cls.getResult(len(valList), 2,2)
         javaScriptList = [[0 for v in xrange(len(posMatrix[0])*bigFactor) ] for t in xrange(len(posMatrix)*bigFactor)]
         rowLen = len(posMatrix[0])
         
         im = Image.new("RGB", (rowLen, len(posMatrix)), "white")
         for yIndex, row in enumerate(posMatrix):
             for xIndex, elem in enumerate(row):
                 im.putpixel((xIndex, yIndex), valList[elem])
                 region = yIndex*rowLen + xIndex
                 #for yVals in range(yIndex*bigFactor, (yIndex+1)*bigFactor):
                 #    for xVals in range(yIndex*bigFactor, (yIndex+1)*bigFactor):
                 #        javaScriptList[yVals][xVals] = chrom+':'+str(elem)+'-'+str(elem+1)+': '+repr([ v/255.0 for v in valList[elem]])
                 
                 #javaScriptList[yIndex][xIndex] = chrom+':'+str(elem*binfactor)+'k - '+str((elem+1)*binfactor)+'k : '+repr([ trackNameList[indx]+'='+str(round(v*100/255.0, 2))+'%' for indx, v in enumerate(valList[elem])])
                 javaScriptList[yIndex][xIndex] = ','.join([str(elem*binfactor)]+[ str(round(v*100/255.0, 2)) for indx, v in enumerate(valList[elem]) if trackNameList[indx] !='.'] )
         for i in range(len(javaScriptList)):
             javaScriptList[i] = [v for v in javaScriptList[i] if v !=0]
             
     
         imSmall = im.resize((len(posMatrix[0])*smallFactor, len(posMatrix)*smallFactor))
         im2 = im.resize((len(posMatrix[0])*bigFactor, len(posMatrix)*bigFactor))
         
         fileElements = [GalaxyRunSpecificFile(['Recursive', chrom+'.png' ], galaxyFn ), GalaxyRunSpecificFile(['Recursive', chrom+'Big.png' ], galaxyFn), GalaxyRunSpecificFile(['Recursive', chrom+'Zooming.html' ], galaxyFn)]
         #fileDict['Recursive/'+chrom] = fileElements
         imSmall.save(fileElements[0].getDiskPath(ensurePath=True))
         im2.save(fileElements[1].getDiskPath(ensurePath=True))
         
         trackAndValRangeTab = zip(trackNameList, maxVals)
         colorTab = []
         onlyOneTrack = True if len([v for v in maxVals if v]) ==1 else False
         for color, vals in [('Red_combination',[1,0,0]), ('Green_combination',[0,1,0]), ('Blue_combination',[0,0,1]),('Red-Green_combination',[1,1,0]), ('Red-Blue_combination',[1,0,1]), ('Green-Blue_combination',[0,1,1]), ('Red-Green-Blue_combination',[1,1,1])]:    
             
             if not None in [maxVals[i] for i in range(len(vals)) if vals[i]>0]:
                 im = Image.new("RGB", (256 , 1), "white")
                 tracksInvolved = ' & '.join([str(index+1) for index, v in enumerate(vals) if v>0])
                 if onlyOneTrack:
                     vals = [1,1,1]
                 for val in range(256):
                     colVal = [val*v for v in vals]
                     
                     im.putpixel((val,0), tuple(colVal))
                 imColFile = GalaxyRunSpecificFile(['Recursive', color+'.png' ], galaxyFn)
                 imCol = im.resize((256, 10))
                 imCol.save(imColFile.getDiskPath(ensurePath=True))
                 colorTab.append('<tr><td>Track %s</td><td>  <img src="%s" /></td></tr>'% (tracksInvolved, imColFile.getURL()))
                 
         
         htmlTnRangeVals= '<br/><br/><table align="center"  cellspacing="10"><tr><th>Track number</th><th>Track name</th><th>Value range</th></tr>\n'
         htmlTnRangeVals += '\n'.join(['<tr/><td>Track %i </td><td>%s</td><td> 0 - %i</td></tr>' % (index+1, v[0], v[1]) for index, v in  enumerate(trackAndValRangeTab) if v[1]] )
         htmlTnRangeVals+='</table> <br/><table align="center"  cellspacing="10"><tr><th>Track combination</th><th>Colour range</th></tr>' + '\n'.join(colorTab) + '</table>\n'
         lineTabStr= ''
         #if chrom == 'chr1':
         #    tempList = [range(100)]+[v[1]['chr1'][26] for v in microDictList]
         #    chartTemplate =  "['%i',  %i, %i, %i]"
         #    lineTab = [ chartTemplate % v for v in zip(*tempList)]    
         #    lineTemplate = """<div id="%s" onclick="{\nvar data = google.visualization.arrayToDataTable([\n    %s\n  ]);\nvar options = {  title: 'Detailed Graph'    };var chart = new google.visualization.LineChart(document.getElementById('%s'));chart.draw(data, options);}" style="width: 1000px; height: 700px;"></div>"""
         #    lineTabStr = lineTemplate % ('line_div', ', '.join(lineTab),'line_div')    
         open(fileElements[2].getDiskPath(ensurePath=True),'w').write(htmlTemplate % (int(GenomeInfo.getChrLen(genome, chrom)/1000.0)+1, repr(javaScriptList), repr(chrom), trackNames,bigFactor, chrom, fileElements[1].getURL(), fileElements[0].getURL(), htmlTnRangeVals, lineTabStr) )# 
         htmlTableContent.append(tableRowEntryTemplate % (chrom, fileElements[2].getURL(), fileElements[0].getURL()))
         
         # FOr doing normal picture
         #columns = int(round((len(valList)/1000)+0.5))
         #im = Image.new("RGB", (1000, columns), "white")        
         #y=-1    
         #for index, valuTuple in enumerate(valList):
         #    x = index%1000
         #
         #    if x == 0:
         #        y+=1
         #    try:
         #        im.putpixel((x, y), valuTuple)
         #    except:
         #        pass
         #im.save(chrom+'.png')
         #htmlTableContent.append(tableRowEntryTemplate % (chrom, chrom+'.png'))
     
     tabberMal = '<div class="tabber">%s</div>'
     #tempRes, res = [],[]
     res = [tabberMal % v for v in htmlTableContent]
     #for i in htmlTableContent:
     #    if len(tempRes) == 10:
     #        res.append(tabberMal % '\n'.join(tempRes))
     #        tempRes = []
     #    tempRes.append(i)
     #if len(tempRes)>0:
     #    res.append(tabberMal % '\n'.join(tempRes))
     open(galaxyFn,'w').write(htmlPageTemplate % ('<br/>'.join(res)))