def __init__(self, geSource):
        self._geSource = self._decorateGESource(geSource)
        self._boundingRegionsAndGEsCorrespond = None

        self._areValsCategorical = TrackFormat.createInstanceFromGeSource(geSource).getValTypeName() == 'Category'
        self._areEdgeWeightsCategorical = TrackFormat.createInstanceFromGeSource(geSource).getWeightTypeName() == 'Category'
        self._valCategories = set()
        self._edgeWeightCategories = set()

        self._numElements = OrderedDefaultDict(int)
        self._maxStrLens = OrderedDefaultDict(partial(self._initMaxStrLens, self._getMaxStrLensKeys()))
        self._maxNumEdges = OrderedDefaultDict(int)

        self._hasCalculatedStats = False
Exemple #2
0
    def validateAndReturnErrors(cls, choices):
        '''
        Should validate the selected input parameters. If the parameters are not
        valid, an error text explaining the problem should be returned. The GUI
        then shows this text to the user (if not empty) and greys out the
        execute button (even if the text is empty). If all parameters are valid,
        the method should return None, which enables the execute button.
        '''

        errorStr = cls._checkTrack(choices,
                                   trackChoiceIndex='track',
                                   genomeChoiceIndex='genome')
        if errorStr:
            return errorStr

        if choices.track and not choices.attr:
            return 'You have chosen a track with no attributes (columns) supported for splitting. ' \
                   'Attributes that do not support splitting are: ' + ', '.join(cls.UNSUPPORTED_ATTRS)

        geSource = etm.getGESourceFromGalaxyOrVirtualTN(
            choices.track, choices.genome)
        trackFormat = TrackFormat.createInstanceFromGeSource(geSource)

        if trackFormat.isDense():
            return 'The track format of the selected track file is: %s' % trackFormat.getFormatName() +\
                   ' This tool only supports track types Points, Segments, or variations of these.'
Exemple #3
0
    def _composeContents(self,
                         out,
                         hbColumns,
                         columns,
                         geSource,
                         onlyNonDefault=True,
                         singleDataLine=False):
        tf = TrackFormat.createInstanceFromGeSource(self._geSource)
        out.write(self._composeHeaderLines(onlyNonDefault))
        out.write(self._composeColSpecLine(columns))

        for br, geList in iterateOverBRTuplesWithContainedGEs(
                geSource, onlyYieldTwoGEs=singleDataLine):
            if br is not None:
                out.write(self._composeBoundingRegionLine(br))

            for i, ge in enumerate(
                    self._removeStartElementIfApplicable(tf, geList)):
                out.write(
                    self._composeDataLine(ge, hbColumns, i + 1,
                                          i + 1 == len(geList)))

                if singleDataLine:
                    break
            if singleDataLine:
                break
Exemple #4
0
    def _getBasicTrackFormat(choices, tnChoiceIndex=1, genomeChoiceIndex=0):
        genome = GeneralGuiTool._getGenomeChoice(choices, genomeChoiceIndex)[0]
        tn = GeneralGuiTool._getTrackChoice(choices, tnChoiceIndex)[0]

        from quick.application.GalaxyInterface import GalaxyInterface
        from gold.description.TrackInfo import TrackInfo
        from quick.application.ExternalTrackManager import ExternalTrackManager
        from gold.track.TrackFormat import TrackFormat

        if ExternalTrackManager.isGalaxyTrack(tn):
            geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN(
                tn, genome)
            try:
                tf = GeneralGuiTool._convertToBasicTrackFormat(
                    TrackFormat.createInstanceFromGeSource(
                        geSource).getFormatName())
            except Warning:
                return genome, tn, ''
        else:
            if GalaxyInterface.isNmerTrackName(genome, tn):
                tfName = 'Points'
            else:
                tfName = TrackInfo(genome, tn).trackFormatName
            tf = GeneralGuiTool._convertToBasicTrackFormat(tfName)
        return genome, tn, tf
    def getOptionsBoxOutputFormat(cls, prevChoices):
        if prevChoices.changeFormat == cls.OUTPUT_FORMAT_CONVERT:
            try:
                from gold.origdata.GenomeElementSource import GenomeElementSource
                from gold.origdata.FileFormatComposer import findMatchingFileFormatComposers
                from gold.track.TrackFormat import TrackFormat

                gSuite = getGSuiteFromGalaxyTN(prevChoices.gsuite)
                selectedTracks = cls._getSelectedTracks(prevChoices, gSuite)

                allGeSources = [
                    GenomeElementSource(track.path,
                                        genome=track.genome,
                                        printWarnings=False,
                                        suffix=track.suffix)
                    for track in selectedTracks
                ]
                matchingComposersForAllSelectedTracks = \
                    [findMatchingFileFormatComposers(TrackFormat.createInstanceFromGeSource(geSource)) for geSource in allGeSources]

                commonComposers = reduce(
                    set.intersection,
                    map(set, matchingComposersForAllSelectedTracks))
                return [
                    composer.fileFormatName for composer in commonComposers
                ]
            except:
                return []
 def __init__(self, geSource):
     GESourceManager.__init__(self, geSource)
     self._tf = TrackFormat.createInstanceFromGeSource(geSource)
     self._numElements = defaultdict(partial(OrderedDefaultDict, int))
     self._valCategories = defaultdict(partial(OrderedDefaultDict, set))
     self._edgeWeightCategories = defaultdict(partial(OrderedDefaultDict, set))
     self._maxStrLens = defaultdict(partial(OrderedDefaultDict, \
         partial(self._initMaxStrLens, self._getMaxStrLensKeys())))
     self._maxNumEdges = defaultdict(partial(OrderedDefaultDict, int))
 def getOptionsBoxConversion(prevChoices):
     if prevChoices.history:
         try:
             geSource = UniversalConverterTool._getGESource(prevChoices)
             matchingComposers = findMatchingFileFormatComposers(TrackFormat.createInstanceFromGeSource(geSource))
             return ['%s -> %s (track type: %s)' % \
                     (geSource.getFileFormatName(), composerInfo.fileFormatName, composerInfo.trackFormatName) \
                     for composerInfo in matchingComposers if geSource.getFileFormatName() != composerInfo.fileFormatName]
         except:
             return []
 def getOptionsBoxConversion(prevChoices):
     if prevChoices.history:
         try:
             geSource = UniversalConverterTool._getGESource(prevChoices)
             matchingComposers = findMatchingFileFormatComposers(
                 TrackFormat.createInstanceFromGeSource(geSource))
             return ['%s -> %s (track type: %s)' % \
                     (geSource.getFileFormatName(), composerInfo.fileFormatName, composerInfo.trackFormatName) \
                     for composerInfo in matchingComposers if geSource.getFileFormatName() != composerInfo.fileFormatName]
         except:
             return []
 def _getGESourceManagerFromGESource(self, geSource):
     tf = TrackFormat.createInstanceFromGeSource(geSource)
     if tf.reprIsDense():
         if tf.getValTypeName() in [
                 'Number', 'Number (integer)', 'Case-control'
         ]:
             return SkipExtraPassDenseGESourceManager(geSource)
         else:
             raise NotSupportedError
     else:
         return GESourceManager(geSource)
 def __new__(self, geSource, brRegionList):
     tf = TrackFormat.createInstanceFromGeSource(geSource)
     if tf.reprIsDense():
         if tf.getValTypeName() == 'Number':
             return NumberFunctionOneChrSortedNoOverlapsGESourceManager.__new__\
                 (NumberFunctionOneChrSortedNoOverlapsGESourceManager, geSource, brRegionList)
         else:
             raise NotSupportedError
     else:
         return SparseOneChrSortedNoOverlapsGESourceManager.__new__\
             (SparseOneChrSortedNoOverlapsGESourceManager, geSource, brRegionList)
Exemple #11
0
 def getOptionsBoxFormat(prevChoices):
     if prevChoices.track:
         geSource = etm.getGESourceFromGalaxyOrVirtualTN(
             prevChoices.track, prevChoices.genome)
         tf = TrackFormat.createInstanceFromGeSource(geSource)
         matchingComposers = findMatchingFileFormatComposers(tf)
         conversions = [geSource.getFileFormatName() + \
                        ' (no conversion, track type: %s)' % tf.getFormatName()]
         conversions += ['%s -> %s (track type: %s)' % (geSource.getFileFormatName(), \
                         composerInfo.fileFormatName, composerInfo.trackFormatName) \
                         for composerInfo in matchingComposers \
                         if geSource.getFileFormatName() != composerInfo.fileFormatName]
         return conversions
 def _composeContents(self, out, hbColumns, columns, geSource, onlyNonDefault=True, singleDataLine=False):
     tf = TrackFormat.createInstanceFromGeSource(self._geSource)
     out.write( self._composeHeaderLines(onlyNonDefault) )
     out.write( self._composeColSpecLine(columns) )
     
     for br, geList in iterateOverBRTuplesWithContainedGEs(geSource, onlyAddTwoGEs=singleDataLine):
         if br is not None:
             out.write( self._composeBoundingRegionLine(br) )
         
         for i, ge in enumerate(self._removeStartElementIfApplicable(tf, geList)):
             out.write( self._composeDataLine(ge, hbColumns, i+1, i+1 == len(geList)) )
             
             if singleDataLine:
                 break
         if singleDataLine:
             break
 def _allGESources(self, trackName):
     regionList = self._regionList if self._preProcess else [
         self._regionList[0]
     ]
     for region in regionList:
         self._status = "Trying to create custom track geSource for region: {}".format(
             region)
         geSource = self._getGeSourceCallBackFunc(self._genome,
                                                  self._trackName, region,
                                                  **self._callBackArgs)
         tf = TrackFormat.createInstanceFromGeSource(geSource)
         if tf.reprIsDense():
             brList = [BoundingRegionTuple(region, len(region))]
             yield BrTuplesGESourceWrapper(geSource, brList)
         else:
             brList = [BoundingRegionTuple(region, 0)]
             yield GEBoundingRegionElementCounter(geSource, brList)
 def _calcTrackStatistics(self, chr, allowOverlaps):
     if chr not in self._numElements[allowOverlaps]:
         
         # In order to handle the first element of each bounding region for
         # genome partitions and step functions correctly
         tf = TrackFormat.createInstanceFromGeSource(self._geSource)
         if tf.isDense() and tf.isInterval():
             geList = self._getGEBuckets(allowOverlaps)[chr]
             prevEnd = 0
             for br in self._getBRBuckets(allowOverlaps)[chr]:
                 for i, el in enumerate(geList[prevEnd:prevEnd + br.elCount]):
                     self._updateTrackStatistics(el, chr, allowOverlaps, \
                         firstElInPartitionBoundingRegion=(i==0))
                 prevEnd += br.elCount
         else:
             for el in self._getGEBuckets(allowOverlaps)[chr]:
                 self._updateTrackStatistics(el, chr, allowOverlaps)
Exemple #15
0
    def _getValueTypeName(choices, tnChoiceIndex=1, genomeChoiceIndex=0):
        genome = GeneralGuiTool._getGenomeChoice(choices, genomeChoiceIndex)[0]
        tn = GeneralGuiTool._getTrackChoice(choices, tnChoiceIndex)[0]

        from quick.application.GalaxyInterface import GalaxyInterface
        from gold.description.TrackInfo import TrackInfo
        from quick.application.ExternalTrackManager import ExternalTrackManager
        from gold.track.TrackFormat import TrackFormat

        if ExternalTrackManager.isGalaxyTrack(tn):
            geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN(
                tn, genome)
            valTypeName = TrackFormat.createInstanceFromGeSource(
                geSource).getValTypeName()
        else:
            if GalaxyInterface.isNmerTrackName(genome, tn):
                valTypeName = ''
            else:
                valTypeName = TrackInfo(genome, tn).markType
        return valTypeName.lower()
    def testSorting(self):
        geSourceTest = self._commonSetup()
        
        for caseName in geSourceTest.cases:
            if not caseName.startswith('gtrack'):
                continue
                
            if 'no_sort' in caseName:
                print 'Test case skipped: ' + caseName
                continue
                
            print caseName
            print
            
            case = geSourceTest.cases[caseName]
            testFn = self._writeTestFile(case)
            print open(testFn).read()
            print
            
            sortedContents = sortGtrackFileAndReturnContents(testFn, case.genome)
            print sortedContents

            sourceClass = GenomeElementSource if case.sourceClass is None else case.sourceClass
            forPreProcessor = True if case.sourceClass is None else False
            sortedGeSource = GEDependentAttributesHolder(sourceClass('sortedFile.gtrack', case.genome, \
                                                                     forPreProcessor=forPreProcessor, \
                                                                     printWarnings=False, \
                                                                     strToUseInsteadOfFn=sortedContents))
            
            
            reprIsDense = TrackFormat.createInstanceFromGeSource(sortedGeSource).reprIsDense()
            
            if not reprIsDense:
                self.assertEquals(sorted(case.assertElementList), [ge for ge in sortedGeSource])
            else:
                for ge in sortedGeSource:
                    pass
            
            self.assertEquals(sorted(case.boundingRegionsAssertList), [br for br in sortedGeSource.getBoundingRegionTuples()])
    def _compose(self, out):
        trackName = self._geSource.getTrackName()
        if trackName is not None:
            name = ':'.join(self._geSource.getTrackName()).replace(' ','_')
        else:
            name = None
        
        print >>out, 'track type=wiggle_0' + (' name=%s' % name if name is not None else '')

        tf = TrackFormat.createInstanceFromGeSource(self._geSource)
        span = self._geSource.getFixedLength()
        step = self._geSource.getFixedGapSize() + span
        
        isFixedStep = (tf.reprIsDense() or step > 1 or (step == 1 and span != 1))
        
        for brt, geList in iterateOverBRTuplesWithContainedGEs(self._geSource):
            if len(geList) == 0:
                continue
            
            if isFixedStep:
                self._composeFixedStepDeclarationLine(out, brt.region, step, span)
            else:
                curChr, curSpan = self._composeVariableStepDeclarationLine(out, geList[0])
            
            for i,ge in enumerate(geList):
                if i==0 and tf.isDense() and tf.isInterval() and \
                    self._geSource.addsStartElementToDenseIntervals():
                    continue
                
                val = self._commonFormatNumberVal(ge.val)
                
                if isFixedStep:
                    cols = [val]
                else:
                    if ge.chr != curChr or self._getVariableSpan(ge) != curSpan:
                        curChr, curSpan = self._composeVariableStepDeclarationLine(out, ge)
                    cols = [str(ge.start+1), val]
                
                print >>out, '\t'.join([str(x) for x in cols])
Exemple #18
0
    def validateAndReturnErrors(choices):
        '''
        Should validate the selected input parameters. If the parameters are not
        valid, an error text explaining the problem should be returned. The GUI
        then shows this text to the user (if not empty) and greys out the
        execute button (even if the text is empty). If all parameters are valid,
        the method should return None, which enables the execute button.
        '''

        if not choices.genome:
            return 'Please select genome'

        if not choices.catTrack:
            return 'Please select categorical track from history'

        geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN(
            choices.catTrack.split(':'), choices.genome)

        trackFormat = TrackFormat.createInstanceFromGeSource(geSource)

        if trackFormat.getValTypeName() != 'Category':
            return 'Please select <b>categorical</b> track from history, current is of type ' + trackFormat.getValTypeName(
            )
    def _allGESourceManagers(self, trackName, allowOverlaps):
        trackNameStr = ':'.join(trackName)
        self._status = "Trying to create GESourceManager " \
                       "(trackName: {}, allowOverlaps: {})".format(trackNameStr, allowOverlaps)
        collector = PreProcMetaDataCollector(self._genome, trackName)
        if allowOverlaps == False and collector.overlapRuleHasBeenFinalized(
                True):
            for i in range(1):
                self._status = 'Trying to prepare preprocessing for track "%s"' % trackNameStr + \
                                (' (allowOverlaps: %s)' % allowOverlaps)
                yield self._getGESourceManagerFromTrack(trackName)
        else:
            for geSource in self._allGESources(trackName):
                if allowOverlaps == True:
                    tf = TrackFormat.createInstanceFromGeSource(geSource)
                    if tf.isDense() or geSource.hasNoOverlappingElements():
                        return

                self._status = 'Trying to prepare preprocessing for track "%s"' % trackNameStr + \
                                (' (filename: "%s")' % geSource.getFileName() if geSource.hasOrigFile() else '') + \
                                (' (allowOverlaps: %s)' % allowOverlaps)
                if PreProcessUtils.shouldPreProcessGESource(
                        trackName, geSource, allowOverlaps):
                    yield self._getGESourceManagerFromGESource(geSource)
Exemple #20
0
 def _init(self):
     self._allValsAreBedVals = False
     tf = TrackFormat.createInstanceFromGeSource(self._geSource)
     if tf.getValTypeName() == 'Number (integer)':
         self._allValsAreBedVals = all(
             (0 <= ge.val <= 1000) for ge in self._geSource)
            testFn = self._writeTestFile(case)
            print open(testFn).read()
            print

            sortedContents = sortGtrackFileAndReturnContents(
                testFn, case.genome)
            print sortedContents

            sourceClass = GenomeElementSource if case.sourceClass is None else case.sourceClass
            forPreProcessor = True if case.sourceClass is None else False
            sortedGeSource = GEDependentAttributesHolder(sourceClass('sortedFile.gtrack', case.genome, \
                                                                     forPreProcessor=forPreProcessor, \
                                                                     printWarnings=False, \
                                                                     strToUseInsteadOfFn=sortedContents))

            reprIsDense = TrackFormat.createInstanceFromGeSource(
                sortedGeSource).reprIsDense()

            if not reprIsDense:
                self.assertEquals(sorted(case.assertElementList),
                                  [ge for ge in sortedGeSource])
            else:
                for ge in sortedGeSource:
                    pass

            self.assertEquals(
                sorted(case.boundingRegionsAssertList),
                [br for br in sortedGeSource.getBoundingRegionTuples()])

    def runTest(self):
        pass
 def __new__(self, geSource):
     tf = TrackFormat.createInstanceFromGeSource(geSource)
     if tf.reprIsDense():
         return DenseStdGESourceManager.__new__(DenseStdGESourceManager, geSource)
     else:
         return SparseStdGESourceManager.__new__(SparseStdGESourceManager, geSource)
 def _init(self):
     self._allValsAreBedVals = False
     tf = TrackFormat.createInstanceFromGeSource(self._geSource)
     if tf.getValTypeName() == 'Number (integer)':
         self._allValsAreBedVals = all((0 <= ge.val <= 1000) for ge in self._geSource)