Esempio n. 1
0
    def __init__(self, genomeAnchor, startList, endList, valList, strandList, idList, edgesList, \
                 weightsList, borderHandling, allowOverlaps, extraLists=OrderedDict()):
        assert (startList is not None) or (endList is not None) or (
            valList is not None) or (edgesList is not None)
        assert borderHandling in ['crop']

        self.genomeAnchor = genomeAnchor.getCopy()
        self.trackFormat = TrackFormat(startList, endList, valList, strandList,
                                       idList, edgesList, weightsList,
                                       extraLists)
        self.borderHandling = borderHandling
        self.allowOverlaps = allowOverlaps

        self._trackElement = TrackElement(self)
        #self._bpLevelArray = None

        self._startList = startList
        self._endList = endList
        self._valList = valList
        self._strandList = strandList
        self._idList = idList
        self._edgesList = edgesList
        self._weightsList = weightsList
        self._extraLists = copy(extraLists)

        self._handlePointsAndPartitions()

        if self._startList is None:
            self._trackElement.start = noneFunc
        if self._endList is None:
            self._trackElement.end = noneFunc
        if self._valList is None:
            self._trackElement.val = noneFunc
        if self._strandList is None:
            self._trackElement.strand = noneFunc
        if self._idList is None:
            self._trackElement.id = noneFunc
        if self._edgesList is None:
            self._trackElement.edges = noneFunc
        if self._weightsList is None:
            self._trackElement.weights = noneFunc

        self._updateNumListElements()

        for i, list in enumerate([self._startList, self._endList, self._valList, self._strandList, self._idList, self._edgesList, self._weightsList] \
            + [extraList for extraList in self._extraLists.values()]):
            assert list is None or len(
                list) == self._numListElements, 'List (%s): ' % i + str(
                    list) + ' (expected %s elements, found %s)' % (
                        self._numListElements, len(list))
Esempio n. 2
0
    def __init__(self, geSource):
        self._geSource = self._decorateGESource(geSource)
        self._boundingRegionsAndGEsCorrespond = None

        self._areValsCategorical = TrackFormat.createInstanceFromGeSource(geSource).getValTypeName() == 'Category'
        self._areEdgeWeightsCategorical = TrackFormat.createInstanceFromGeSource(geSource).getWeightTypeName() == 'Category'
        self._valCategories = set()
        self._edgeWeightCategories = set()

        self._numElements = OrderedDefaultDict(int)
        self._maxStrLens = OrderedDefaultDict(partial(self._initMaxStrLens, self._getMaxStrLensKeys()))
        self._maxNumEdges = OrderedDefaultDict(int)

        self._hasCalculatedStats = False
 def _assertIsCompatibleWith(self, tfReq, reqList):
     for start in [None, []]:
         for end in [None, []]:
             for val in [None, []]:
                 for strand in [None, []]:
                     for id,edges,weights in [(None,None,None), ([],None,None), ([],[],None), ([],[],[])]:
                         for extra in [None, {'a':[],'b':[]}]:
                             if [] in [start, end, val]:
                                 tf = TrackFormat(start, end, val, strand, id, edges, weights, extra)
                                 propList = [tf.isDense(), tf.isValued(), tf.isInterval(), tf.isLinked(), tf.hasStrand(), tf.hasId(), tf.isWeighted(), tf.hasExtra(), \
                                             tf.getValTypeName() if tf.getValTypeName() != '' else False, \
                                             tf.getWeightTypeName() if tf.getWeightTypeName() != '' else False, \
                                             tf.getExtraNames() if tf.getExtraNames() != [] else False]
                                 isCompatible = (not False in [(r==None or r==p) for r,p in zip(reqList, propList)])
                                 self.assertEqual(isCompatible, tfReq.isCompatibleWith(tf))
Esempio n. 4
0
    def validateAndReturnErrors(cls, choices):
        '''
        Should validate the selected input parameters. If the parameters are not
        valid, an error text explaining the problem should be returned. The GUI
        then shows this text to the user (if not empty) and greys out the
        execute button (even if the text is empty). If all parameters are valid,
        the method should return None, which enables the execute button.
        '''

        errorStr = cls._checkTrack(choices,
                                   trackChoiceIndex='track',
                                   genomeChoiceIndex='genome')
        if errorStr:
            return errorStr

        if choices.track and not choices.attr:
            return 'You have chosen a track with no attributes (columns) supported for splitting. ' \
                   'Attributes that do not support splitting are: ' + ', '.join(cls.UNSUPPORTED_ATTRS)

        geSource = etm.getGESourceFromGalaxyOrVirtualTN(
            choices.track, choices.genome)
        trackFormat = TrackFormat.createInstanceFromGeSource(geSource)

        if trackFormat.isDense():
            return 'The track format of the selected track file is: %s' % trackFormat.getFormatName() +\
                   ' This tool only supports track types Points, Segments, or variations of these.'
Esempio n. 5
0
    def _composeContents(self,
                         out,
                         hbColumns,
                         columns,
                         geSource,
                         onlyNonDefault=True,
                         singleDataLine=False):
        tf = TrackFormat.createInstanceFromGeSource(self._geSource)
        out.write(self._composeHeaderLines(onlyNonDefault))
        out.write(self._composeColSpecLine(columns))

        for br, geList in iterateOverBRTuplesWithContainedGEs(
                geSource, onlyYieldTwoGEs=singleDataLine):
            if br is not None:
                out.write(self._composeBoundingRegionLine(br))

            for i, ge in enumerate(
                    self._removeStartElementIfApplicable(tf, geList)):
                out.write(
                    self._composeDataLine(ge, hbColumns, i + 1,
                                          i + 1 == len(geList)))

                if singleDataLine:
                    break
            if singleDataLine:
                break
Esempio n. 6
0
def updateTrackInfoToVersion15(genome, trackName):
    "genome trackName"
    if isinstance(trackName, str):
        trackName = qcf.convertTNstrToTNListFormat(trackName)

    ti = TrackInfo(genome, trackName)
    ti.preProcOverlapRules = []
    trackFormatList = []
    for allowOverlaps in [True, False]:
        from gold.origdata.PreProcessUtils import PreProcessUtils
        if PreProcessUtils.preProcFilesExist(genome, trackName, allowOverlaps):
            ti.preProcOverlapRules.append(allowOverlaps)
            if PreProcessUtils.mergedPreProcFilesExist(genome, trackName, allowOverlaps):
                mergedPrefixInfoDict = PreProcessUtils.getMergedPrefixInfoDict(genome, trackName, allowOverlaps)
                ti.mergedPrefixInfoDictPerOverlapRule[allowOverlaps] = mergedPrefixInfoDict
                trackFormatList.append(TrackFormat.createInstanceFromPrefixInfoDict(mergedPrefixInfoDict))
    if trackFormatList:
        ti.trackFormatHash = hash(tuple(trackFormatList))
    if not ti.geSourceVersion:
        ti.geSourceVersion = ti.preProcVersion
    ti.preProcVersion = '1.5'
    origPath = qcf.createOrigPath(genome, trackName)
    if os.path.exists(origPath):
        ti.id = ti.constructIdFromPath(genome, qcf.createOrigPath(genome, trackName), ti.geSourceVersion, ti.preProcVersion)
    print ti.id, ti.preProcOverlapRules, ti.mergedPrefixInfoDictPerOverlapRule, ti.trackFormatHash, ti.geSourceVersion, ti.preProcVersion
    ti.store()
    print Track(trackName).getUniqueKey(genome)
Esempio n. 7
0
    def _getBasicTrackFormat(choices, tnChoiceIndex=1, genomeChoiceIndex=0):
        genome = GeneralGuiTool._getGenomeChoice(choices, genomeChoiceIndex)[0]
        tn = GeneralGuiTool._getTrackChoice(choices, tnChoiceIndex)[0]

        from quick.application.GalaxyInterface import GalaxyInterface
        from gold.description.TrackInfo import TrackInfo
        from quick.application.ExternalTrackManager import ExternalTrackManager
        from gold.track.TrackFormat import TrackFormat

        if ExternalTrackManager.isGalaxyTrack(tn):
            geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN(
                tn, genome)
            try:
                tf = GeneralGuiTool._convertToBasicTrackFormat(
                    TrackFormat.createInstanceFromGeSource(
                        geSource).getFormatName())
            except Warning:
                return genome, tn, ''
        else:
            if GalaxyInterface.isNmerTrackName(genome, tn):
                tfName = 'Points'
            else:
                tfName = TrackInfo(genome, tn).trackFormatName
            tf = GeneralGuiTool._convertToBasicTrackFormat(tfName)
        return genome, tn, tf
    def getOptionsBoxOutputFormat(cls, prevChoices):
        if prevChoices.changeFormat == cls.OUTPUT_FORMAT_CONVERT:
            try:
                from gold.origdata.GenomeElementSource import GenomeElementSource
                from gold.origdata.FileFormatComposer import findMatchingFileFormatComposers
                from gold.track.TrackFormat import TrackFormat

                gSuite = getGSuiteFromGalaxyTN(prevChoices.gsuite)
                selectedTracks = cls._getSelectedTracks(prevChoices, gSuite)

                allGeSources = [
                    GenomeElementSource(track.path,
                                        genome=track.genome,
                                        printWarnings=False,
                                        suffix=track.suffix)
                    for track in selectedTracks
                ]
                matchingComposersForAllSelectedTracks = \
                    [findMatchingFileFormatComposers(TrackFormat.createInstanceFromGeSource(geSource)) for geSource in allGeSources]

                commonComposers = reduce(
                    set.intersection,
                    map(set, matchingComposersForAllSelectedTracks))
                return [
                    composer.fileFormatName for composer in commonComposers
                ]
            except:
                return []
 def loadTrackView(trackData, region, borderHandling, allowOverlaps, trackName=[]):
     """
     trackData : see TrackSource.getTrackData {'id' : smartmemmap}
     region : see GenomeRegion
     """
     #brShelve = BoundingRegionShelve(region.genome, trackName, allowOverlaps)
     brShelve = trackData.boundingRegionShelve
     brInfo = brShelve.getBoundingRegionInfo(region) if brShelve is not None else None
     
     extraArrayNames = [arrayName for arrayName in trackData if arrayName not in \
                        RESERVED_PREFIXES.keys() + ['leftIndex', 'rightIndex']]
     
     reservedArrays = [TrackViewLoader._getArray(trackData, arrayName, brInfo) for arrayName in RESERVED_PREFIXES]
     extraArrays = [TrackViewLoader._getArray(trackData, arrayName, brInfo) for arrayName in extraArrayNames]
     trackFormat = TrackFormat( *(reservedArrays + [OrderedDict(zip(extraArrayNames, extraArrays))]) )
     
     if trackFormat.reprIsDense():
         if brInfo is None:
             leftIndex = region.start
             rightIndex = region.end
         else:
             leftIndex = region.start - brInfo.start
             rightIndex = region.end - brInfo.start 
     else:
         leftBin = CompBinManager.getBinNumber(region.start)
         rightBin = CompBinManager.getBinNumber(region.end-1)
         #leftBin = region.start/COMP_BIN_SIZE
         #rightBin = (region.end-1)/COMP_BIN_SIZE
         
         if trackData.get('leftIndex') is None or trackData.get('rightIndex') is None:
             raise IOError('Preprocessed track not found. TrackData: ' + ', '.join(trackData.keys()))
         
         leftIndex = TrackViewLoader._getArray(trackData, 'leftIndex', brInfo, leftBin)
         rightIndex = TrackViewLoader._getArray(trackData, 'rightIndex', brInfo, rightBin)
     
     slicedReservedArrays = [(array[leftIndex:rightIndex] if array is not None else None) for array in reservedArrays]
     slicedExtraArrays = [(array[leftIndex:rightIndex] if array is not None else None) for array in extraArrays]
     
     argList = [region] + slicedReservedArrays + [borderHandling, allowOverlaps] + [OrderedDict(zip(extraArrayNames, slicedExtraArrays))]
     tv = TrackView( *(argList) )
     
     if not trackFormat.reprIsDense():
         tv.sliceElementsAccordingToGenomeAnchor()
         #tv._doScatteredSlicing()
     return tv
 def extractToFile(self, fn, outTrackName):
     append = False
     for region in GlobalBinSource(self._genome):
         print 'Creating segmentation for chr: ',region.chr
         trackView = PlainTrack(self._inTrackName).getTrackView(region)
         teSource = FunctionCategorizerWrapper(trackView, self._categorizerMethod, minSegLen=self._minSegLen)
         teSource.trackFormat = TrackFormat.createInstanceFromPrefixList(['start','end','val'])
         TrackExtractor._extract(teSource, outTrackName, region, fn, append=append, globalCoords=True, addSuffix=True)
         append = True
Esempio n. 11
0
 def __init__(self, geSource):
     GESourceManager.__init__(self, geSource)
     self._tf = TrackFormat.createInstanceFromGeSource(geSource)
     self._numElements = defaultdict(partial(OrderedDefaultDict, int))
     self._valCategories = defaultdict(partial(OrderedDefaultDict, set))
     self._edgeWeightCategories = defaultdict(partial(OrderedDefaultDict, set))
     self._maxStrLens = defaultdict(partial(OrderedDefaultDict, \
         partial(self._initMaxStrLens, self._getMaxStrLensKeys())))
     self._maxNumEdges = defaultdict(partial(OrderedDefaultDict, int))
Esempio n. 12
0
 def testExtra(self):
     tf = TrackFormat.createInstanceFromPrefixList(['start', 'a', 'b', 'c'], 'float64', 1, 'float64', 1)
     self.assertTrue(tf.hasExtra(specificExtra='a'))
     self.assertFalse(tf.hasExtra(specificExtra='d'))
     
     self.assertEqual(['a','b','c'], tf.getExtraNames())
     
     tfq = TrackFormatReq(interval=False, extra=['a','b'])
     self.assertFalse(tfq.isCompatibleWith(tf))
Esempio n. 13
0
    def _getRawTrackView(self, region, borderHandling, allowOverlaps):
        assert region.start == 0 and region.end == 1
        
        from collections import OrderedDict
        from gold.track.CommonMemmapFunctions import findEmptyVal
        from gold.track.TrackView import TrackView
        import numpy as np
        
        geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN(self.trackName, region.genome)
        prefixList = geSource.getPrefixList()
        valDataType = geSource.getValDataType()
        valDim = geSource.getValDim()
        weightDataType = geSource.getEdgeWeightDataType()
        weightDim = geSource.getEdgeWeightDim()

        startList, endList, valList, strandList, idList, edgesList, weightsList = [None]*7
        extraLists=OrderedDict()
        
        tf = TrackFormat.createInstanceFromPrefixList(prefixList, valDataType, valDim, \
                                                      weightDataType, weightDim)
        if allowOverlaps and (tf.isDense() or geSource.hasNoOverlappingElements()):
            raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\
                                          + str(tf) + ' does not satisfy ' + str(self._trackFormatReq))
        
        denseAndInterval = tf.isDense() and tf.isInterval()
        numEls = 2 if denseAndInterval else 1
        
        if valDataType == 'S':
            valDataType = 'S2'
        if weightDataType == 'S':
            weightDataType = 'S2'
        
        for prefix in prefixList:
            if prefix == 'start':
                startList = np.array([-1], dtype='int32')
            elif prefix == 'end':
                if denseAndInterval:
                    endList = np.array([0, 1], dtype='int32')
                else:
                    endList = np.array([0], dtype='int32')
            elif prefix == 'val':
                valList = np.array([findEmptyVal(valDataType)] * valDim * numEls, \
                                   dtype=valDataType).reshape((numEls, valDim) if valDim > 1 else numEls)
            elif prefix == 'strand':
                strandList = np.array([1] * numEls, dtype='int8')
            elif prefix == 'id':
                idList = np.array([''] * numEls, dtype='S1')
            elif prefix == 'edges':
                edgesList = np.array([['']] * numEls, dtype='S1')
            elif prefix == 'weights':
                weightsList = np.array([[[findEmptyVal(weightDataType)]]] * weightDim * numEls, \
                                       dtype=weightDataType).reshape((numEls, 1, weightDim) if weightDim > 1 else (numEls, 1))
            else:
                extraLists[prefix] = np.array([''] * numEls, dtype='S1')
        
        return TrackView(region, startList, endList, valList, strandList, idList, edgesList, weightsList, borderHandling, allowOverlaps, extraLists)
Esempio n. 14
0
    def _getRawTrackView(self, region, borderHandling, allowOverlaps):
        assert len(region) == 1

        from collections import OrderedDict
        from gold.track.CommonMemmapFunctions import findEmptyVal
        from gold.track.TrackView import TrackView
        import numpy as np

        geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN(self.trackName, region.genome)
        prefixList = geSource.getPrefixList()
        valDataType = geSource.getValDataType()
        valDim = geSource.getValDim()
        weightDataType = geSource.getEdgeWeightDataType()
        weightDim = geSource.getEdgeWeightDim()

        startList, endList, valList, strandList, idList, edgesList, weightsList = [None]*7
        extraLists=OrderedDict()

        tf = TrackFormat.createInstanceFromPrefixList(prefixList, valDataType, valDim, \
                                                      weightDataType, weightDim)
        if allowOverlaps and (tf.isDense() or geSource.hasNoOverlappingElements()):
            raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\
                                          + str(tf) + ' does not satisfy ' + str(self._trackFormatReq))

        denseAndInterval = tf.isDense() and tf.isInterval()
        numEls = 2 if denseAndInterval else 1

        if valDataType == 'S':
            valDataType = 'S2'
        if weightDataType == 'S':
            weightDataType = 'S2'

        for prefix in prefixList:
            if prefix == 'start':
                startList = np.array([-1], dtype='int32')
            elif prefix == 'end':
                if denseAndInterval:
                    endList = np.array([0, 1], dtype='int32')
                else:
                    endList = np.array([0], dtype='int32')
            elif prefix == 'val':
                valList = np.array([findEmptyVal(valDataType)] * valDim * numEls, \
                                   dtype=valDataType).reshape((numEls, valDim) if valDim > 1 else numEls)
            elif prefix == 'strand':
                strandList = np.array([1] * numEls, dtype='int8')
            elif prefix == 'id':
                idList = np.array([''] * numEls, dtype='S1')
            elif prefix == 'edges':
                edgesList = np.array([['']] * numEls, dtype='S1')
            elif prefix == 'weights':
                weightsList = np.array([[[findEmptyVal(weightDataType)]]] * weightDim * numEls, \
                                       dtype=weightDataType).reshape((numEls, 1, weightDim) if weightDim > 1 else (numEls, 1))
            else:
                extraLists[prefix] = np.array([''] * numEls, dtype='S1')

        return TrackView(region, startList, endList, valList, strandList, idList, edgesList, weightsList, borderHandling, allowOverlaps, extraLists)
 def getOptionsBoxConversion(prevChoices):
     if prevChoices.history:
         try:
             geSource = UniversalConverterTool._getGESource(prevChoices)
             matchingComposers = findMatchingFileFormatComposers(TrackFormat.createInstanceFromGeSource(geSource))
             return ['%s -> %s (track type: %s)' % \
                     (geSource.getFileFormatName(), composerInfo.fileFormatName, composerInfo.trackFormatName) \
                     for composerInfo in matchingComposers if geSource.getFileFormatName() != composerInfo.fileFormatName]
         except:
             return []
 def _calcTrackFormatHash(mergedPrefixInfoDictPerOverlapRule):
     trackFormatList = []
     for allowOverlaps in mergedPrefixInfoDictPerOverlapRule.keys():
         mergedPrefixInfoDict = mergedPrefixInfoDictPerOverlapRule[
             allowOverlaps]
         trackFormatList.append(
             TrackFormat.createInstanceFromPrefixInfoDict(
                 mergedPrefixInfoDict))
     trackFormatHash = hash(tuple(trackFormatList))
     return trackFormatHash
Esempio n. 17
0
 def testValTypes(self):
     tf = TrackFormat.createInstanceFromPrefixList(['start', 'val'], 'float128', 2, 'float64', 1)
     
     self.assertTrue(tf.isValued(specificValType='mean_sd'))
     self.assertFalse(tf.isValued(specificValType='number'))
     
     self.assertEqual('Mean and std.dev.', tf.getValTypeName())
     self.assertEqual('Valued points', tf.getFormatName())
     
     tfq = TrackFormatReq(interval=False, val='tc')
     self.assertFalse(tfq.isCompatibleWith(tf))
Esempio n. 18
0
    def testWeightTypes(self):
        tf = TrackFormat.createInstanceFromPrefixList(['id', 'edges', 'weights'], 'float64', 1, 'S8', 3)
        
        self.assertTrue(tf.isWeighted(specificWeightType='category_vector'))
        self.assertFalse(tf.isWeighted(specificWeightType='number'))

        self.assertEqual('Vector of categories', tf.getWeightTypeName())
        self.assertEqual('Linked base pairs', tf.getFormatName())
        
        tfq = TrackFormatReq(linked=True, weights='number')
        self.assertFalse(tfq.isCompatibleWith(tf))
Esempio n. 19
0
 def testCompatibilityWithExceptions(self):
     tf = TrackFormat.createInstanceFromPrefixList(['start', 'val'], 'float64', 1, 'float64', 1)
     
     self.assertFalse(TrackFormatReq(interval=True, strand=True, val='number')\
                      .isCompatibleWith(tf))
     self.assertFalse(TrackFormatReq(interval=True, strand=True, val='number')\
                      .isCompatibleWith(tf, ['interval']))
     self.assertTrue(TrackFormatReq(interval=True, strand=True, val='number')\
                     .isCompatibleWith(tf, ['interval', 'hasStrand']))
     self.assertFalse(TrackFormatReq(interval=True, strand=True, val='tc')\
                      .isCompatibleWith(tf, ['interval', 'hasStrand']))
Esempio n. 20
0
 def __new__(self, geSource, brRegionList):
     tf = TrackFormat.createInstanceFromGeSource(geSource)
     if tf.reprIsDense():
         if tf.getValTypeName() == 'Number':
             return NumberFunctionOneChrSortedNoOverlapsGESourceManager.__new__\
                 (NumberFunctionOneChrSortedNoOverlapsGESourceManager, geSource, brRegionList)
         else:
             raise NotSupportedError
     else:
         return SparseOneChrSortedNoOverlapsGESourceManager.__new__\
             (SparseOneChrSortedNoOverlapsGESourceManager, geSource, brRegionList)
 def _getGESourceManagerFromGESource(self, geSource):
     tf = TrackFormat.createInstanceFromGeSource(geSource)
     if tf.reprIsDense():
         if tf.getValTypeName() in [
                 'Number', 'Number (integer)', 'Case-control'
         ]:
             return SkipExtraPassDenseGESourceManager(geSource)
         else:
             raise NotSupportedError
     else:
         return GESourceManager(geSource)
 def getOptionsBoxConversion(prevChoices):
     if prevChoices.history:
         try:
             geSource = UniversalConverterTool._getGESource(prevChoices)
             matchingComposers = findMatchingFileFormatComposers(
                 TrackFormat.createInstanceFromGeSource(geSource))
             return ['%s -> %s (track type: %s)' % \
                     (geSource.getFileFormatName(), composerInfo.fileFormatName, composerInfo.trackFormatName) \
                     for composerInfo in matchingComposers if geSource.getFileFormatName() != composerInfo.fileFormatName]
         except:
             return []
Esempio n. 23
0
    def testExtra(self):
        tf = TrackFormat.createInstanceFromPrefixList(['start', 'a', 'b', 'c'],
                                                      'float64', 1, 'float64',
                                                      1)
        self.assertTrue(tf.hasExtra(specificExtra='a'))
        self.assertFalse(tf.hasExtra(specificExtra='d'))

        self.assertEqual(['a', 'b', 'c'], tf.getExtraNames())

        tfq = TrackFormatReq(interval=False, extra=['a', 'b'])
        self.assertFalse(tfq.isCompatibleWith(tf))
Esempio n. 24
0
    def testWeightTypes(self):
        tf = TrackFormat.createInstanceFromPrefixList(
            ['id', 'edges', 'weights'], 'float64', 1, 'S8', 3)

        self.assertTrue(tf.isWeighted(specificWeightType='category_vector'))
        self.assertFalse(tf.isWeighted(specificWeightType='number'))

        self.assertEqual('Vector of categories', tf.getWeightTypeName())
        self.assertEqual('Linked base pairs', tf.getFormatName())

        tfq = TrackFormatReq(linked=True, weights='number')
        self.assertFalse(tfq.isCompatibleWith(tf))
Esempio n. 25
0
    def testValTypes(self):
        tf = TrackFormat.createInstanceFromPrefixList(['start', 'val'],
                                                      'float128', 2, 'float64',
                                                      1)

        self.assertTrue(tf.isValued(specificValType='mean_sd'))
        self.assertFalse(tf.isValued(specificValType='number'))

        self.assertEqual('Mean and std.dev.', tf.getValTypeName())
        self.assertEqual('Valued points', tf.getFormatName())

        tfq = TrackFormatReq(interval=False, val='tc')
        self.assertFalse(tfq.isCompatibleWith(tf))
Esempio n. 26
0
    def testCompatibilityWithExceptions(self):
        tf = TrackFormat.createInstanceFromPrefixList(['start', 'val'],
                                                      'float64', 1, 'float64',
                                                      1)

        self.assertFalse(TrackFormatReq(interval=True, strand=True, val='number')\
                         .isCompatibleWith(tf))
        self.assertFalse(TrackFormatReq(interval=True, strand=True, val='number')\
                         .isCompatibleWith(tf, ['interval']))
        self.assertTrue(TrackFormatReq(interval=True, strand=True, val='number')\
                        .isCompatibleWith(tf, ['interval', 'hasStrand']))
        self.assertFalse(TrackFormatReq(interval=True, strand=True, val='tc')\
                         .isCompatibleWith(tf, ['interval', 'hasStrand']))
Esempio n. 27
0
 def getOptionsBoxFormat(prevChoices):
     if prevChoices.track:
         geSource = etm.getGESourceFromGalaxyOrVirtualTN(
             prevChoices.track, prevChoices.genome)
         tf = TrackFormat.createInstanceFromGeSource(geSource)
         matchingComposers = findMatchingFileFormatComposers(tf)
         conversions = [geSource.getFileFormatName() + \
                        ' (no conversion, track type: %s)' % tf.getFormatName()]
         conversions += ['%s -> %s (track type: %s)' % (geSource.getFileFormatName(), \
                         composerInfo.fileFormatName, composerInfo.trackFormatName) \
                         for composerInfo in matchingComposers \
                         if geSource.getFileFormatName() != composerInfo.fileFormatName]
         return conversions
Esempio n. 28
0
    def __iter__(self):
        self = copy(self)
        
        #does not support function, partitions and points:
        if (False in [attrs in self._geSource.getPrefixList() for attrs in ['start', 'end']]):
            raise NotSupportedError('Binning file must be segments. Current file format: ' + \
                                    TrackFormat.createInstanceFromPrefixList(self._geSource.getPrefixList(), \
                                                                             self._geSource.getValDataType(), \
                                                                             self._geSource.getValDim(), \
                                                                             self._geSource.getEdgeWeightDataType(), \
                                                                             self._geSource.getEdgeWeightDim()).getFormatName() )

        self._geIter = self._geSource.__iter__()
        return self
Esempio n. 29
0
 def testFormats(self):
     for start in [None, []]:
         for end in [None, []]:
             for val in [None, []]:
                 for strand in [None, []]:
                     for id, edges, weights in [(None, None, None),
                                                ([], None, None),
                                                ([], [], None),
                                                ([], [], [])]:
                         for extra in [None, {'a': [], 'b': []}]:
                             if [] in [start, end, val, edges]:
                                 tf = TrackFormat(start, end, val, strand,
                                                  id, edges, weights, extra)
                                 self._assertTrackFormat(tf, start==[], end==[], val==[], strand==[], id==[], edges==[], weights==[], \
                                                         hasExtra=extra is not None, extra=extra.keys() if extra is not None else [])
Esempio n. 30
0
 def _composeContents(self, out, hbColumns, columns, geSource, onlyNonDefault=True, singleDataLine=False):
     tf = TrackFormat.createInstanceFromGeSource(self._geSource)
     out.write( self._composeHeaderLines(onlyNonDefault) )
     out.write( self._composeColSpecLine(columns) )
     
     for br, geList in iterateOverBRTuplesWithContainedGEs(geSource, onlyAddTwoGEs=singleDataLine):
         if br is not None:
             out.write( self._composeBoundingRegionLine(br) )
         
         for i, ge in enumerate(self._removeStartElementIfApplicable(tf, geList)):
             out.write( self._composeDataLine(ge, hbColumns, i+1, i+1 == len(geList)) )
             
             if singleDataLine:
                 break
         if singleDataLine:
             break
Esempio n. 31
0
 def _calcTrackStatistics(self, chr, allowOverlaps):
     if chr not in self._numElements[allowOverlaps]:
         
         # In order to handle the first element of each bounding region for
         # genome partitions and step functions correctly
         tf = TrackFormat.createInstanceFromGeSource(self._geSource)
         if tf.isDense() and tf.isInterval():
             geList = self._getGEBuckets(allowOverlaps)[chr]
             prevEnd = 0
             for br in self._getBRBuckets(allowOverlaps)[chr]:
                 for i, el in enumerate(geList[prevEnd:prevEnd + br.elCount]):
                     self._updateTrackStatistics(el, chr, allowOverlaps, \
                         firstElInPartitionBoundingRegion=(i==0))
                 prevEnd += br.elCount
         else:
             for el in self._getGEBuckets(allowOverlaps)[chr]:
                 self._updateTrackStatistics(el, chr, allowOverlaps)
 def _allGESources(self, trackName):
     regionList = self._regionList if self._preProcess else [
         self._regionList[0]
     ]
     for region in regionList:
         self._status = "Trying to create custom track geSource for region: {}".format(
             region)
         geSource = self._getGeSourceCallBackFunc(self._genome,
                                                  self._trackName, region,
                                                  **self._callBackArgs)
         tf = TrackFormat.createInstanceFromGeSource(geSource)
         if tf.reprIsDense():
             brList = [BoundingRegionTuple(region, len(region))]
             yield BrTuplesGESourceWrapper(geSource, brList)
         else:
             brList = [BoundingRegionTuple(region, 0)]
             yield GEBoundingRegionElementCounter(geSource, brList)
Esempio n. 33
0
    def __iter__(self):
        self = copy(self)

        #does not support function, partitions and points:
        if (False in [
                attrs in self._geSource.getPrefixList()
                for attrs in ['start', 'end']
        ]):
            raise NotSupportedError('Binning file must be segments. Current file format: ' + \
                                    TrackFormat.createInstanceFromPrefixList(self._geSource.getPrefixList(), \
                                                                             self._geSource.getValDataType(), \
                                                                             self._geSource.getValDim(), \
                                                                             self._geSource.getEdgeWeightDataType(), \
                                                                             self._geSource.getEdgeWeightDim()).getFormatName() )

        self._geIter = self._geSource.__iter__()
        return self
Esempio n. 34
0
 def extractToFile(self, fn, outTrackName):
     append = False
     for region in GlobalBinSource(self._genome):
         print 'Creating segmentation for chr: ', region.chr
         trackView = PlainTrack(self._inTrackName).getTrackView(region)
         teSource = FunctionCategorizerWrapper(trackView,
                                               self._categorizerMethod,
                                               minSegLen=self._minSegLen)
         teSource.trackFormat = TrackFormat.createInstanceFromPrefixList(
             ['start', 'end', 'val'])
         TrackExtractor._extract(teSource,
                                 outTrackName,
                                 region,
                                 fn,
                                 append=append,
                                 globalCoords=True,
                                 addSuffix=True)
         append = True
Esempio n. 35
0
    def _getValueTypeName(choices, tnChoiceIndex=1, genomeChoiceIndex=0):
        genome = GeneralGuiTool._getGenomeChoice(choices, genomeChoiceIndex)[0]
        tn = GeneralGuiTool._getTrackChoice(choices, tnChoiceIndex)[0]

        from quick.application.GalaxyInterface import GalaxyInterface
        from gold.description.TrackInfo import TrackInfo
        from quick.application.ExternalTrackManager import ExternalTrackManager
        from gold.track.TrackFormat import TrackFormat

        if ExternalTrackManager.isGalaxyTrack(tn):
            geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN(
                tn, genome)
            valTypeName = TrackFormat.createInstanceFromGeSource(
                geSource).getValTypeName()
        else:
            if GalaxyInterface.isNmerTrackName(genome, tn):
                valTypeName = ''
            else:
                valTypeName = TrackInfo(genome, tn).markType
        return valTypeName.lower()
    def testSorting(self):
        geSourceTest = self._commonSetup()
        
        for caseName in geSourceTest.cases:
            if not caseName.startswith('gtrack'):
                continue
                
            if 'no_sort' in caseName:
                print 'Test case skipped: ' + caseName
                continue
                
            print caseName
            print
            
            case = geSourceTest.cases[caseName]
            testFn = self._writeTestFile(case)
            print open(testFn).read()
            print
            
            sortedContents = sortGtrackFileAndReturnContents(testFn, case.genome)
            print sortedContents

            sourceClass = GenomeElementSource if case.sourceClass is None else case.sourceClass
            forPreProcessor = True if case.sourceClass is None else False
            sortedGeSource = GEDependentAttributesHolder(sourceClass('sortedFile.gtrack', case.genome, \
                                                                     forPreProcessor=forPreProcessor, \
                                                                     printWarnings=False, \
                                                                     strToUseInsteadOfFn=sortedContents))
            
            
            reprIsDense = TrackFormat.createInstanceFromGeSource(sortedGeSource).reprIsDense()
            
            if not reprIsDense:
                self.assertEquals(sorted(case.assertElementList), [ge for ge in sortedGeSource])
            else:
                for ge in sortedGeSource:
                    pass
            
            self.assertEquals(sorted(case.boundingRegionsAssertList), [br for br in sortedGeSource.getBoundingRegionTuples()])
    def _compose(self, out):
        trackName = self._geSource.getTrackName()
        if trackName is not None:
            name = ':'.join(self._geSource.getTrackName()).replace(' ','_')
        else:
            name = None
        
        print >>out, 'track type=wiggle_0' + (' name=%s' % name if name is not None else '')

        tf = TrackFormat.createInstanceFromGeSource(self._geSource)
        span = self._geSource.getFixedLength()
        step = self._geSource.getFixedGapSize() + span
        
        isFixedStep = (tf.reprIsDense() or step > 1 or (step == 1 and span != 1))
        
        for brt, geList in iterateOverBRTuplesWithContainedGEs(self._geSource):
            if len(geList) == 0:
                continue
            
            if isFixedStep:
                self._composeFixedStepDeclarationLine(out, brt.region, step, span)
            else:
                curChr, curSpan = self._composeVariableStepDeclarationLine(out, geList[0])
            
            for i,ge in enumerate(geList):
                if i==0 and tf.isDense() and tf.isInterval() and \
                    self._geSource.addsStartElementToDenseIntervals():
                    continue
                
                val = self._commonFormatNumberVal(ge.val)
                
                if isFixedStep:
                    cols = [val]
                else:
                    if ge.chr != curChr or self._getVariableSpan(ge) != curSpan:
                        curChr, curSpan = self._composeVariableStepDeclarationLine(out, ge)
                    cols = [str(ge.start+1), val]
                
                print >>out, '\t'.join([str(x) for x in cols])
Esempio n. 38
0
    def validateAndReturnErrors(choices):
        '''
        Should validate the selected input parameters. If the parameters are not
        valid, an error text explaining the problem should be returned. The GUI
        then shows this text to the user (if not empty) and greys out the
        execute button (even if the text is empty). If all parameters are valid,
        the method should return None, which enables the execute button.
        '''

        if not choices.genome:
            return 'Please select genome'

        if not choices.catTrack:
            return 'Please select categorical track from history'

        geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN(
            choices.catTrack.split(':'), choices.genome)

        trackFormat = TrackFormat.createInstanceFromGeSource(geSource)

        if trackFormat.getValTypeName() != 'Category':
            return 'Please select <b>categorical</b> track from history, current is of type ' + trackFormat.getValTypeName(
            )
    def _allGESourceManagers(self, trackName, allowOverlaps):
        trackNameStr = ':'.join(trackName)
        self._status = "Trying to create GESourceManager " \
                       "(trackName: {}, allowOverlaps: {})".format(trackNameStr, allowOverlaps)
        collector = PreProcMetaDataCollector(self._genome, trackName)
        if allowOverlaps == False and collector.overlapRuleHasBeenFinalized(
                True):
            for i in range(1):
                self._status = 'Trying to prepare preprocessing for track "%s"' % trackNameStr + \
                                (' (allowOverlaps: %s)' % allowOverlaps)
                yield self._getGESourceManagerFromTrack(trackName)
        else:
            for geSource in self._allGESources(trackName):
                if allowOverlaps == True:
                    tf = TrackFormat.createInstanceFromGeSource(geSource)
                    if tf.isDense() or geSource.hasNoOverlappingElements():
                        return

                self._status = 'Trying to prepare preprocessing for track "%s"' % trackNameStr + \
                                (' (filename: "%s")' % geSource.getFileName() if geSource.hasOrigFile() else '') + \
                                (' (allowOverlaps: %s)' % allowOverlaps)
                if PreProcessUtils.shouldPreProcessGESource(
                        trackName, geSource, allowOverlaps):
                    yield self._getGESourceManagerFromGESource(geSource)
Esempio n. 40
0
 def getTrackFormat(self):
     return TrackFormat.createInstanceFromPrefixList(self._tempTrackInfo.prefixList, \
                                                     self._tempTrackInfo.valDataType, \
                                                     self._tempTrackInfo.valDim, \
                                                     self._tempTrackInfo.weightDataType, \
                                                     self._tempTrackInfo.weightDim)
 def getOptionsBoxFormat(prevChoices):
     tf = TrackFormat.createInstanceFromPrefixList(['start', 'end'])
     return [
         composer.fileFormatName
         for composer in findMatchingFileFormatComposers(tf)
     ]
Esempio n. 42
0
 def __new__(self, geSource):
     tf = TrackFormat.createInstanceFromGeSource(geSource)
     if tf.reprIsDense():
         return DenseStdGESourceManager.__new__(DenseStdGESourceManager, geSource)
     else:
         return SparseStdGESourceManager.__new__(SparseStdGESourceManager, geSource)
Esempio n. 43
0
class TrackView(object):
    def _handlePointsAndPartitions(self):
        #if self.trackFormat.isDense() and not self.trackFormat.reprIsDense():
        if self.trackFormat.isPartitionOrStepFunction():
            self._startList = self._endList[:-1]
            self._endList = self._endList[1:]
            if self._valList is not None:
                self._valList = self._valList[1:]
            if self._strandList is not None:
                self._strandList = self._strandList[1:]
            if self._idList is not None:
                self._idList = self._idList[1:]
            if self._edgesList is not None:
                self._edgesList = self._edgesList[1:]
            if self._weightsList is not None:
                self._weightsList = self._weightsList[1:]
            for key, extraList in self._extraLists.items():
                if extraList is not None:
                    self._extraLists[key] = extraList[1:]
        if self.trackFormat.isPoints():
            self._endList = VirtualPointEnd(self._startList)

    def __init__(self, genomeAnchor,
                 startList=None, endList=None, valList=None, strandList=None,
                 idList=None, edgesList=None, weightsList=None,
                 borderHandling='crop', allowOverlaps=False, extraLists=OrderedDict()):
        assert (startList is not None) or (endList is not None) or (valList is not None) or (edgesList is not None)
        assert borderHandling in ['crop']

        self.genomeAnchor = genomeAnchor.getCopy()
        self.trackFormat = TrackFormat(startList, endList, valList, strandList, idList, edgesList, weightsList, extraLists)
        self.borderHandling = borderHandling
        self.allowOverlaps = allowOverlaps

        self._trackElement = TrackElement(self)
        #self._bpLevelArray = None

        self._startList = startList
        self._endList = endList
        self._valList = valList
        self._strandList = strandList
        self._idList = idList
        self._edgesList = edgesList
        self._weightsList = weightsList
        self._extraLists = copy(extraLists)

        self._handlePointsAndPartitions()

        if self._startList is None:
            self._trackElement.start = noneFunc
        if self._endList is None:
            self._trackElement.end = noneFunc
        if self._valList is None:
            self._trackElement.val = noneFunc
        if self._strandList is None:
            self._trackElement.strand = noneFunc
        if self._idList is None:
            self._trackElement.id = noneFunc
        if self._edgesList is None:
            self._trackElement.edges = noneFunc
        if self._weightsList is None:
            self._trackElement.weights = noneFunc

        self._updateNumListElements()

        for i, list in enumerate([self._startList, self._endList, self._valList, self._strandList, self._idList, self._edgesList, self._weightsList] \
            + [extraList for extraList in self._extraLists.values()]):
                assert list is None or len(list) == self._numListElements, 'List (%s): ' % i + str(list) + ' (expected %s elements, found %s)' % (self._numListElements, len(list))

    def __iter__(self):
        self._trackElement._index = -1
        return self

    def _updateNumListElements(self):
        ""
        self._numListElements = self._computeNumListElements()

        if self.allowOverlaps and self._numListElements > 0:
            self._numIterElements = self._computeNumIterElements()
        else:
            self._numIterElements = self._numListElements

    def _computeNumListElements(self):
        for list in [self._startList, self._endList, self._valList, self._edgesList]:
            if list is not None:
                return len(list)
        raise ShouldNotOccurError

    def _computeNumIterElements(self):
        for list in [self._startList, self._endList, self._valList, self._edgesList]:
            if list is not None:
                if isinstance(list, numpy.ndarray):
                    return len(self._removeStowawaysFromNumpyArray(list))
                else:
                    return sum(1 for x in self)
        raise ShouldNotOccurError

    def __len__(self):
        ""
        return self._bpSize()

    def getNumElements(self):
        return self._numIterElements

    def _bpSize(self):
        return len(self.genomeAnchor)

    def next(self):
        self._trackElement._index += 1

        #To remove any blind passengers - segments entirely in front of genomeanchor,
        # but sorted after a larger segment crossing the border
        if self.allowOverlaps and not self.trackFormat.reprIsDense():
            while self._trackElement._index < self._numListElements and self._endList[self._trackElement._index] <= self.genomeAnchor.start: #self._trackElement.end() <= 0:
                self._trackElement._index += 1

        if self._trackElement._index < self._numListElements:
            return self._trackElement
        else:
            raise StopIteration

    def _findLeftIndex(self):
        leftIndex = 0
        #remove track elements entirely to the left of the anchor
        while leftIndex < len(self._endList) and self._endList[leftIndex] <= self.genomeAnchor.start:
            leftIndex += 1
        return leftIndex

    def _findRightIndex(self):
        rightIndex = self._numListElements
        while rightIndex > 0 and self._startList[rightIndex-1] >= self.genomeAnchor.end:
            rightIndex -= 1
        return rightIndex

    def sliceElementsAccordingToGenomeAnchor(self):
        assert( not self.trackFormat.reprIsDense() )
        self._doScatteredSlicing()

    def _doScatteredSlicing(self):
        leftIndex = self._findLeftIndex()
        rightIndex = self._findRightIndex()

        if self._bpSize() == 0:
            rightIndex = leftIndex

        self._startList = self._startList[leftIndex:rightIndex]
        self._endList = self._endList[leftIndex:rightIndex]

        if self._valList is not None:
            self._valList = self._valList[leftIndex:rightIndex]
        if self._strandList is not None:
            self._strandList = self._strandList[leftIndex:rightIndex]
        if self._idList is not None:
            self._idList = self._idList[leftIndex:rightIndex]
        if self._edgesList is not None:
            self._edgesList = self._edgesList[leftIndex:rightIndex]
        if self._weightsList is not None:
            self._weightsList = self._weightsList[leftIndex:rightIndex]
        for key, extraList in self._extraLists.items():
            self._extraLists[key] = extraList[leftIndex:rightIndex]
        self._updateNumListElements()

    def _doDenseSlicing(self, i, j):
        if self._valList is not None:
            self._valList = self._valList[i:j]
        if self._strandList is not None:
            self._strandList = self._strandList[i:j]
        if self._idList is not None:
            self._idList = self._idList[i:j]
        if self._edgesList is not None:
            self._edgesList = self._edgesList[i:j]
        if self._weightsList is not None:
            self._weightsList = self._weightsList[i:j]
        for key, extraList in self._extraLists.items():
            self._extraLists[key] = extraList[i:j]
        self._updateNumListElements()

    def __getslice__(self, i, j):
        slicedTV = TrackView(self.genomeAnchor, self._startList, self._endList, \
                             self._valList, self._strandList, self._idList, \
                             self._edgesList, self._weightsList, \
                             self.borderHandling, self.allowOverlaps, \
                             extraLists=self._extraLists)
        slicedTV.trackFormat = self.trackFormat

        slicedTV.genomeAnchor.start += i
        if j>=0:
            try:
                slicedTV.genomeAnchor.end = min(self.genomeAnchor.end, self.genomeAnchor.start + j)
            except FloatingPointError: # Caused by trackView[:] with self.genomeAnchor.start > 0
                slicedTV.genomeAnchor.end = self.genomeAnchor.end
        if j<0:
            slicedTV.genomeAnchor.end += j

        if self.trackFormat.reprIsDense():
            slicedTV._doDenseSlicing(i,j)
        else:
            slicedTV._doScatteredSlicing()
        return slicedTV

    def _getBpLevelModificationArray(self, indexes, vals):
        bpLevelMod = numpy.bincount(indexes, vals)
        origLen = len(bpLevelMod)
        bpLevelMod.resize(self._bpSize()+1)
        bpLevelMod[origLen:] = 0
        return bpLevelMod

    def _commonGetBpLevelArray(self, vals):
        if self.trackFormat.reprIsDense():
            if self.allowOverlaps:
                raise ShouldNotOccurError()
            return vals
        else:
            bpLevelArray = numpy.zeros(self._bpSize()+1)
            numElements = self.getNumElements()
            if numElements > 0:
                bpLevelArray += self._getBpLevelModificationArray(self.startsAsNumpyArray(), vals)
                bpLevelArray -= self._getBpLevelModificationArray(self.endsAsNumpyArray(), vals)
                bpLevelArray = bpLevelArray.cumsum(dtype='float64')
            return bpLevelArray[:-1]

    def getBinaryBpLevelArray(self):
        vals = numpy.ones(self.getNumElements(), dtype='int32')
        return numpy.array(self._commonGetBpLevelArray(vals), dtype='bool8')

    def getCoverageBpLevelArray(self):
        vals = numpy.ones(self.getNumElements(), dtype='int32')
        return numpy.array(self._commonGetBpLevelArray(vals), dtype='int32')

    def getValueBpLevelArray(self, voidValue=0):
        '''
        Creates a bp-level function of any valued track. In case of scattered tracks,
        uncovered aras are filled with voidValue (which would typically be set to 0 or numpy.nan).
        In the case of overlapping regions, the values are added.'''

        assert self.trackFormat.isValued('number'), self.trackFormat
        vals = self.valsAsNumpyArray()
        bpLevelArray = numpy.array(self._commonGetBpLevelArray(vals), dtype=vals.dtype)
        if voidValue != 0:
            bpLevelArray[~self.getBinaryBpLevelArray()] = voidValue
        return bpLevelArray

    def _removeStowawaysFromNumpyArray(self, numpyArray):
        '''
        To remove any stowaways - segments entirely in front of genomeanchor,
        but sorted after a larger segment crossing the border.
        '''
        if self.allowOverlaps and len(numpyArray) > 0:
            numpyArray = numpyArray[numpy.where(self._endList > self.genomeAnchor.start)]
        return numpyArray

    def _commonAsNumpyArray(self, numpyArray, numpyArrayModMethod, name):
        assert(self.borderHandling in ['crop'])
        if numpyArray is None:
            return None

        numpyArray = self._removeStowawaysFromNumpyArray(numpyArray)

        if numpyArrayModMethod is not None:
            return numpyArrayModMethod(numpyArray)
        else:
            return numpyArray

    def startsAsNumpyArray(self):
        return self._commonAsNumpyArray(self._startList, self._startListModMethod, 'starts')

    def _startListModMethod(self, startList):
        return numpy.maximum(startList - self.genomeAnchor.start, \
                             numpy.zeros(len(startList), dtype='int32'))

    def endsAsNumpyArray(self):
        return self._commonAsNumpyArray(self._endList, self._endListModMethod, 'ends')

    def _endListModMethod(self, endList):
        return numpy.minimum(endList - self.genomeAnchor.start, \
                             numpy.zeros(len(endList), dtype='int32') + len(self.genomeAnchor))

    def valsAsNumpyArray(self):
        return self._commonAsNumpyArray(self._valList, None, 'vals')

    def strandsAsNumpyArray(self):
        return self._commonAsNumpyArray(self._strandList, None, 'strands')

    def idsAsNumpyArray(self):
        return self._commonAsNumpyArray(self._idList, None, 'ids')

    def edgesAsNumpyArray(self):
        return self._commonAsNumpyArray(self._edgesList, None, 'edges')

    def weightsAsNumpyArray(self):
        return self._commonAsNumpyArray(self._weightsList, None, 'weights')

    def extrasAsNumpyArray(self, key):
        assert self.hasExtra(key)
        from functools import partial
        return self._commonAsNumpyArray(self._extraLists[key], None, 'extras')

    def allExtrasAsDictOfNumpyArrays(self):
        return OrderedDict([(key,self.extrasAsNumpyArray(key)) for key in self._extraLists])

    def hasExtra(self, key):
        return key in self._extraLists
Esempio n. 44
0
 def _init(self):
     self._allValsAreBedVals = False
     tf = TrackFormat.createInstanceFromGeSource(self._geSource)
     if tf.getValTypeName() == 'Number (integer)':
         self._allValsAreBedVals = all((0 <= ge.val <= 1000) for ge in self._geSource)
Esempio n. 45
0
 def _init(self):
     self._allValsAreBedVals = False
     tf = TrackFormat.createInstanceFromGeSource(self._geSource)
     if tf.getValTypeName() == 'Number (integer)':
         self._allValsAreBedVals = all(
             (0 <= ge.val <= 1000) for ge in self._geSource)
            testFn = self._writeTestFile(case)
            print open(testFn).read()
            print

            sortedContents = sortGtrackFileAndReturnContents(
                testFn, case.genome)
            print sortedContents

            sourceClass = GenomeElementSource if case.sourceClass is None else case.sourceClass
            forPreProcessor = True if case.sourceClass is None else False
            sortedGeSource = GEDependentAttributesHolder(sourceClass('sortedFile.gtrack', case.genome, \
                                                                     forPreProcessor=forPreProcessor, \
                                                                     printWarnings=False, \
                                                                     strToUseInsteadOfFn=sortedContents))

            reprIsDense = TrackFormat.createInstanceFromGeSource(
                sortedGeSource).reprIsDense()

            if not reprIsDense:
                self.assertEquals(sorted(case.assertElementList),
                                  [ge for ge in sortedGeSource])
            else:
                for ge in sortedGeSource:
                    pass

            self.assertEquals(
                sorted(case.boundingRegionsAssertList),
                [br for br in sortedGeSource.getBoundingRegionTuples()])

    def runTest(self):
        pass
 def getTrackFormat(self):
     return TrackFormat.createInstanceFromPrefixList(self._prefixList, \
                                                     self._valDataType, \
                                                     self._valDim, \
                                                     self._weightDataType, \
                                                     self._weightDim)
Esempio n. 48
0
 def _assertIsCompatibleWith(self, tfReq, reqList):
     for start in [None, []]:
         for end in [None, []]:
             for val in [None, []]:
                 for strand in [None, []]:
                     for id, edges, weights in [(None, None, None),
                                                ([], None, None),
                                                ([], [], None),
                                                ([], [], [])]:
                         for extra in [None, {'a': [], 'b': []}]:
                             if [] in [start, end, val]:
                                 tf = TrackFormat(start, end, val, strand,
                                                  id, edges, weights, extra)
                                 propList = [tf.isDense(), tf.isValued(), tf.isInterval(), tf.isLinked(), tf.hasStrand(), tf.hasId(), tf.isWeighted(), tf.hasExtra(), \
                                             tf.getValTypeName() if tf.getValTypeName() != '' else False, \
                                             tf.getWeightTypeName() if tf.getWeightTypeName() != '' else False, \
                                             tf.getExtraNames() if tf.getExtraNames() != [] else False]
                                 isCompatible = (not False in [
                                     (r == None or r == p)
                                     for r, p in zip(reqList, propList)
                                 ])
                                 self.assertEqual(
                                     isCompatible,
                                     tfReq.isCompatibleWith(tf))