Beispiel #1
0
    def __init__(self, geSource):
        self._geSource = self._decorateGESource(geSource)
        self._boundingRegionsAndGEsCorrespond = None

        self._areValsCategorical = TrackFormat.createInstanceFromGeSource(geSource).getValTypeName() == 'Category'
        self._areEdgeWeightsCategorical = TrackFormat.createInstanceFromGeSource(geSource).getWeightTypeName() == 'Category'
        self._valCategories = set()
        self._edgeWeightCategories = set()

        self._numElements = OrderedDefaultDict(int)
        self._maxStrLens = OrderedDefaultDict(partial(self._initMaxStrLens, self._getMaxStrLensKeys()))
        self._maxNumEdges = OrderedDefaultDict(int)

        self._hasCalculatedStats = False
Beispiel #2
0
 def _assertIsCompatibleWith(self, tfReq, reqList):
     for start in [None, []]:
         for end in [None, []]:
             for val in [None, []]:
                 for strand in [None, []]:
                     for id,edges,weights in [(None,None,None), ([],None,None), ([],[],None), ([],[],[])]:
                         for extra in [None, {'a':[],'b':[]}]:
                             if [] in [start, end, val]:
                                 tf = TrackFormat(start, end, val, strand, id, edges, weights, extra)
                                 propList = [tf.isDense(), tf.isValued(), tf.isInterval(), tf.isLinked(), tf.hasStrand(), tf.hasId(), tf.isWeighted(), tf.hasExtra(), \
                                             tf.getValTypeName() if tf.getValTypeName() != '' else False, \
                                             tf.getWeightTypeName() if tf.getWeightTypeName() != '' else False, \
                                             tf.getExtraNames() if tf.getExtraNames() != [] else False]
                                 isCompatible = (not False in [(r==None or r==p) for r,p in zip(reqList, propList)])
                                 self.assertEqual(isCompatible, tfReq.isCompatibleWith(tf))
Beispiel #3
0
 def loadTrackView(trackData, region, borderHandling, allowOverlaps, trackName=[]):
     """
     trackData : see TrackSource.getTrackData {'id' : smartmemmap}
     region : see GenomeRegion
     """
     #brShelve = BoundingRegionShelve(region.genome, trackName, allowOverlaps)
     brShelve = trackData.boundingRegionShelve
     brInfo = brShelve.getBoundingRegionInfo(region) if brShelve is not None else None
     
     extraArrayNames = [arrayName for arrayName in trackData if arrayName not in \
                        RESERVED_PREFIXES.keys() + ['leftIndex', 'rightIndex']]
     
     reservedArrays = [TrackViewLoader._getArray(trackData, arrayName, brInfo) for arrayName in RESERVED_PREFIXES]
     extraArrays = [TrackViewLoader._getArray(trackData, arrayName, brInfo) for arrayName in extraArrayNames]
     trackFormat = TrackFormat( *(reservedArrays + [OrderedDict(zip(extraArrayNames, extraArrays))]) )
     
     if trackFormat.reprIsDense():
         if brInfo is None:
             leftIndex = region.start
             rightIndex = region.end
         else:
             leftIndex = region.start - brInfo.start
             rightIndex = region.end - brInfo.start 
     else:
         leftBin = CompBinManager.getBinNumber(region.start)
         rightBin = CompBinManager.getBinNumber(region.end-1)
         #leftBin = region.start/COMP_BIN_SIZE
         #rightBin = (region.end-1)/COMP_BIN_SIZE
         
         if trackData.get('leftIndex') is None or trackData.get('rightIndex') is None:
             raise IOError('Preprocessed track not found. TrackData: ' + ', '.join(trackData.keys()))
         
         leftIndex = TrackViewLoader._getArray(trackData, 'leftIndex', brInfo, leftBin)
         rightIndex = TrackViewLoader._getArray(trackData, 'rightIndex', brInfo, rightBin)
     
     slicedReservedArrays = [(array[leftIndex:rightIndex] if array is not None else None) for array in reservedArrays]
     slicedExtraArrays = [(array[leftIndex:rightIndex] if array is not None else None) for array in extraArrays]
     
     argList = [region] + slicedReservedArrays + [borderHandling, allowOverlaps] + [OrderedDict(zip(extraArrayNames, slicedExtraArrays))]
     tv = TrackView( *(argList) )
     
     if not trackFormat.reprIsDense():
         tv.sliceElementsAccordingToGenomeAnchor()
         #tv._doScatteredSlicing()
     return tv
Beispiel #4
0
 def testExtra(self):
     tf = TrackFormat.createInstanceFromPrefixList(['start', 'a', 'b', 'c'], 'float64', 1, 'float64', 1)
     self.assertTrue(tf.hasExtra(specificExtra='a'))
     self.assertFalse(tf.hasExtra(specificExtra='d'))
     
     self.assertEqual(['a','b','c'], tf.getExtraNames())
     
     tfq = TrackFormatReq(interval=False, extra=['a','b'])
     self.assertFalse(tfq.isCompatibleWith(tf))
Beispiel #5
0
    def testWeightTypes(self):
        tf = TrackFormat.createInstanceFromPrefixList(['id', 'edges', 'weights'], 'float64', 1, 'S8', 3)
        
        self.assertTrue(tf.isWeighted(specificWeightType='category_vector'))
        self.assertFalse(tf.isWeighted(specificWeightType='number'))

        self.assertEqual('Vector of categories', tf.getWeightTypeName())
        self.assertEqual('Linked base pairs', tf.getFormatName())
        
        tfq = TrackFormatReq(linked=True, weights='number')
        self.assertFalse(tfq.isCompatibleWith(tf))
Beispiel #6
0
 def testValTypes(self):
     tf = TrackFormat.createInstanceFromPrefixList(['start', 'val'], 'float128', 2, 'float64', 1)
     
     self.assertTrue(tf.isValued(specificValType='mean_sd'))
     self.assertFalse(tf.isValued(specificValType='number'))
     
     self.assertEqual('Mean and std.dev.', tf.getValTypeName())
     self.assertEqual('Valued points', tf.getFormatName())
     
     tfq = TrackFormatReq(interval=False, val='tc')
     self.assertFalse(tfq.isCompatibleWith(tf))
Beispiel #7
0
 def testCompatibilityWithExceptions(self):
     tf = TrackFormat.createInstanceFromPrefixList(['start', 'val'], 'float64', 1, 'float64', 1)
     
     self.assertFalse(TrackFormatReq(interval=True, strand=True, val='number')\
                      .isCompatibleWith(tf))
     self.assertFalse(TrackFormatReq(interval=True, strand=True, val='number')\
                      .isCompatibleWith(tf, ['interval']))
     self.assertTrue(TrackFormatReq(interval=True, strand=True, val='number')\
                     .isCompatibleWith(tf, ['interval', 'hasStrand']))
     self.assertFalse(TrackFormatReq(interval=True, strand=True, val='tc')\
                      .isCompatibleWith(tf, ['interval', 'hasStrand']))
 def _getGESourceManagerFromGESource(self, geSource):
     tf = TrackFormat.createInstanceFromGeSource(geSource)
     if tf.reprIsDense():
         if tf.getValTypeName() in ['Number', 'Number (integer)', 'Case-control']:
             return RegionBasedGESourceManager(geSource, self._regionList, \
                                               calcStatsInExtraPass=False, countElsInBoundingRegions=False)
         else:
             raise NotSupportedError
     else:
         return RegionBasedGESourceManager(geSource, self._regionList, \
                                           calcStatsInExtraPass=True, countElsInBoundingRegions=True)
Beispiel #9
0
    def __iter__(self):
        self = copy(self)
        
        #does not support function, partitions and points:
        if (False in [attrs in self._geSource.getPrefixList() for attrs in ['start', 'end']]):
            raise NotSupportedError('Binning file must be segments. Current file format: ' + \
                                    TrackFormat.createInstanceFromPrefixList(self._geSource.getPrefixList(), \
                                                                             self._geSource.getValDataType(), \
                                                                             self._geSource.getValDim(), \
                                                                             self._geSource.getEdgeWeightDataType(), \
                                                                             self._geSource.getEdgeWeightDim()).getFormatName() )

        self._geIter = self._geSource.__iter__()
        return self
Beispiel #10
0
    def _composeContents(self, out, hbColumns, columns, geSource, onlyNonDefault=True, singleDataLine=False):
        tf = TrackFormat.createInstanceFromGeSource(self._geSource)
        out.write(self._composeHeaderLines(onlyNonDefault))
        out.write(self._composeColSpecLine(columns))

        for br, geList in iterateOverBRTuplesWithContainedGEs(geSource, onlyYieldTwoGEs=singleDataLine):
            if br is not None:
                out.write(self._composeBoundingRegionLine(br))

            for i, ge in enumerate(self._removeStartElementIfApplicable(tf, geList)):
                out.write(self._composeDataLine(ge, hbColumns, i + 1, i + 1 == len(geList)))

                if singleDataLine:
                    break
            if singleDataLine:
                break
Beispiel #11
0
    def __init__(self, genomeAnchor, startList, endList, valList, strandList, idList, edgesList, \
                 weightsList, borderHandling, allowOverlaps, extraLists=OrderedDict()):
        assert startList!=None or endList!=None or valList!=None or edgesList!=None
        assert borderHandling in ['crop']

        self.genomeAnchor = copy(genomeAnchor)
        self.trackFormat = TrackFormat(startList, endList, valList, strandList, idList, edgesList, weightsList, extraLists)
        self.borderHandling = borderHandling
        self.allowOverlaps = allowOverlaps

        self._trackElement = TrackElement(self)
        #self._bpLevelArray = None

        self._startList = startList
        self._endList = endList
        self._valList = valList
        self._strandList = strandList
        self._idList = idList
        self._edgesList = edgesList
        self._weightsList = weightsList
        self._extraLists = copy(extraLists)

        self._handlePointsAndPartitions()

        if self._startList is None:
            self._trackElement.start = noneFunc
        if self._endList is None:
            self._trackElement.end = noneFunc
        if self._valList is None:
            self._trackElement.val = noneFunc
        if self._strandList is None:
            self._trackElement.strand = noneFunc
        if self._idList is None:
            self._trackElement.id = noneFunc
        if self._edgesList is None:
            self._trackElement.edges = noneFunc
        if self._weightsList is None:
            self._trackElement.weights = noneFunc

        self._updateNumListElements()

        for i, list in enumerate([self._startList, self._endList, self._valList, self._strandList, self._idList, self._edgesList, self._weightsList] \
            + [extraList for extraList in self._extraLists.values()]):
                assert list is None or len(list) == self._numListElements, 'List (%s): ' % i + str(list) + ' (expected %s elements, found %s)' % (self._numListElements, len(list))
    def _allGESourceManagers(self, trackName, allowOverlaps):
        collector = PreProcMetaDataCollector(self._genome, trackName)
        if allowOverlaps == False and collector.overlapRuleHasBeenFinalized(True):
            for i in range(1):
                self._status = 'Trying to prepare preprocessing for track "%s"' % ':'.join(trackName) + \
                                (' (allowOverlaps: %s)' % allowOverlaps)
                yield self._getGESourceManagerFromTrack(trackName)
        else:
            for geSource in self._allGESources(trackName):
                if allowOverlaps == True:
                    tf = TrackFormat.createInstanceFromGeSource(geSource)
                    if tf.isDense() or geSource.hasNoOverlappingElements():
                        return

                self._status = 'Trying to prepare preprocessing for track "%s"' % ':'.join(trackName) + \
                                (' (filename: "%s")' % geSource.getFileName() if geSource.hasOrigFile() else '') + \
                                (' (allowOverlaps: %s)' % allowOverlaps)
                if PreProcessUtils.shouldPreProcessGESource(trackName, geSource, allowOverlaps):
                    yield self._getGESourceManagerFromGESource(geSource)
Beispiel #13
0
    def testSorting(self):
        geSourceTest = self._commonSetup()
        
        for caseName in geSourceTest.cases:
            if not caseName.startswith('gtrack'):
                continue
                
            if 'no_sort' in caseName:
                print 'Test case skipped: ' + caseName
                continue
                
            print caseName
            print
            
            case = geSourceTest.cases[caseName]
            testFn = self._writeTestFile(case)
            print open(testFn).read()
            print
            
            sortedContents = sortGtrackFileAndReturnContents(testFn, case.genome)
            print sortedContents

            sourceClass = GenomeElementSource if case.sourceClass is None else case.sourceClass
            forPreProcessor = True if case.sourceClass is None else False
            sortedGeSource = GEDependentAttributesHolder(sourceClass('sortedFile.gtrack', case.genome, \
                                                                     forPreProcessor=forPreProcessor, \
                                                                     printWarnings=False, \
                                                                     strToUseInsteadOfFn=sortedContents))
            
            
            reprIsDense = TrackFormat.createInstanceFromGeSource(sortedGeSource).reprIsDense()
            
            if not reprIsDense:
                self.assertEquals(sorted(case.assertElementList), [ge for ge in sortedGeSource])
            else:
                for ge in sortedGeSource:
                    pass
            
            self.assertEquals(sorted(case.boundingRegionsAssertList), [br for br in sortedGeSource.getBoundingRegionTuples()])
Beispiel #14
0
    def _compose(self, out):
        trackName = self._geSource.getTrackName()
        if trackName is not None:
            name = ':'.join(self._geSource.getTrackName()).replace(' ','_')
        else:
            name = None
        
        print >>out, 'track type=wiggle_0' + (' name=%s' % name if name is not None else '')

        tf = TrackFormat.createInstanceFromGeSource(self._geSource)
        span = self._geSource.getFixedLength()
        step = self._geSource.getFixedGapSize() + span
        
        isFixedStep = (tf.reprIsDense() or step > 1 or (step == 1 and span != 1))
        
        for brt, geList in iterateOverBRTuplesWithContainedGEs(self._geSource):
            if len(geList) == 0:
                continue
            
            if isFixedStep:
                self._composeFixedStepDeclarationLine(out, brt.region, step, span)
            else:
                curChr, curSpan = self._composeVariableStepDeclarationLine(out, geList[0])
            
            for i,ge in enumerate(geList):
                if i==0 and tf.isDense() and tf.isInterval() and \
                    self._geSource.addsStartElementToDenseIntervals():
                    continue
                
                val = self._commonFormatNumberVal(ge.val)
                
                if isFixedStep:
                    cols = [val]
                else:
                    if ge.chr != curChr or self._getVariableSpan(ge) != curSpan:
                        curChr, curSpan = self._composeVariableStepDeclarationLine(out, ge)
                    cols = [str(ge.start+1), val]
                
                print >>out, '\t'.join([str(x) for x in cols])
Beispiel #15
0
class TrackView(object):
    def _handlePointsAndPartitions(self):
        if self.trackFormat.isDense() and not self.trackFormat.reprIsDense():
            self._startList = self._endList[:-1]
            self._endList = self._endList[1:]
            if self._valList != None:
                self._valList = self._valList[1:]
            if self._strandList != None:
                self._strandList = self._strandList[1:]
            if self._idList != None:
                self._idList = self._idList[1:]
            if self._edgesList != None:
                self._edgesList = self._edgesList[1:]
            if self._weightsList != None:
                self._weightsList = self._weightsList[1:]
            for key, extraList in self._extraLists.items():
                if extraList != None:
                    self._extraLists[key] = extraList[1:]
        if not self.trackFormat.isDense() and not self.trackFormat.isInterval():
            self._endList = VirtualPointEnd(self._startList)

    def __init__(self, genomeAnchor, startList, endList, valList, strandList, idList, edgesList, \
                 weightsList, borderHandling, allowOverlaps, extraLists=OrderedDict()):
        assert startList!=None or endList!=None or valList!=None or edgesList!=None
        assert borderHandling in ['crop']

        self.genomeAnchor = copy(genomeAnchor)
        self.trackFormat = TrackFormat(startList, endList, valList, strandList, idList, edgesList, weightsList, extraLists)
        self.borderHandling = borderHandling
        self.allowOverlaps = allowOverlaps

        self._trackElement = TrackElement(self)
        #self._bpLevelArray = None

        self._startList = startList
        self._endList = endList
        self._valList = valList
        self._strandList = strandList
        self._idList = idList
        self._edgesList = edgesList
        self._weightsList = weightsList
        self._extraLists = copy(extraLists)

        self._handlePointsAndPartitions()

        if self._startList is None:
            self._trackElement.start = noneFunc
        if self._endList is None:
            self._trackElement.end = noneFunc
        if self._valList is None:
            self._trackElement.val = noneFunc
        if self._strandList is None:
            self._trackElement.strand = noneFunc
        if self._idList is None:
            self._trackElement.id = noneFunc
        if self._edgesList is None:
            self._trackElement.edges = noneFunc
        if self._weightsList is None:
            self._trackElement.weights = noneFunc

        self._updateNumListElements()

        for i, list in enumerate([self._startList, self._endList, self._valList, self._strandList, self._idList, self._edgesList, self._weightsList] \
            + [extraList for extraList in self._extraLists.values()]):
                assert list is None or len(list) == self._numListElements, 'List (%s): ' % i + str(list) + ' (expected %s elements, found %s)' % (self._numListElements, len(list))

    def __iter__(self):
        self._trackElement._index = -1
        return self

    def _updateNumListElements(self):
        if test_settings['count_elements_using_iterator']:  # Note: Should only be used with allowOverlaps=False
            self._numIterElements = sum(1 for _ in self)
            self._numListElements = self._numIterElements
            return

        self._numListElements = self._computeNumListElements()

        if self.allowOverlaps and self._numListElements > 0:
            self._numIterElements = self._computeNumIterElements()
        else:
            self._numIterElements = self._numListElements

    def _computeNumListElements(self):
        for list in [self._startList, self._endList, self._valList, self._edgesList]:
            if list is not None:
                return len(list)
        raise ShouldNotOccurError

    def _computeNumIterElements(self):
        for list in [self._startList, self._endList, self._valList, self._edgesList]:
            if list is not None:
                if isinstance(list, numpy.ndarray):
                    return len(self._removeBlindPassengersFromNumpyArray(list))
                else:
                    return sum(1 for x in self)
        raise ShouldNotOccurError

    def __len__(self):
        ""
        return self._bpSize()

    def getNumElements(self):
        return self._numIterElements

    def _bpSize(self):
        return len(self.genomeAnchor)

    def next(self):
        self._trackElement._index += 1

        #To remove any blind passengers - segments entirely in front of genomeanchor,
        # but sorted after a larger segment crossing the border
        if self.allowOverlaps and not self.trackFormat.reprIsDense():
            while self._trackElement._index < self._numListElements and self._endList[self._trackElement._index] <= self.genomeAnchor.start: #self._trackElement.end() <= 0:
                self._trackElement._index += 1

        if self._trackElement._index < self._numListElements:
            return self._trackElement
        else:
            raise StopIteration

    def _findLeftIndex(self):
        leftIndex = 0
        #remove track elements entirely to the left of the anchor
        while leftIndex < len(self._endList) and self._endList[leftIndex] <= self.genomeAnchor.start:
            leftIndex += 1
        return leftIndex

    def _findRightIndex(self):
        rightIndex = self._numListElements
        while rightIndex > 0 and self._startList[rightIndex-1] >= self.genomeAnchor.end:
            rightIndex -= 1
        return rightIndex

    def sliceElementsAccordingToGenomeAnchor(self):
        assert( not self.trackFormat.reprIsDense() )
        self._doScatteredSlicing()

    def _doScatteredSlicing(self):
        leftIndex = self._findLeftIndex()
        rightIndex = self._findRightIndex()

        if self._bpSize() == 0:
            rightIndex = leftIndex

        self._startList = self._startList[leftIndex:rightIndex]
        self._endList = self._endList[leftIndex:rightIndex]

        if self._valList != None:
            self._valList = self._valList[leftIndex:rightIndex]
        if self._strandList != None:
            self._strandList = self._strandList[leftIndex:rightIndex]
        if self._idList != None:
            self._idList = self._idList[leftIndex:rightIndex]
        if self._edgesList != None:
            self._edgesList = self._edgesList[leftIndex:rightIndex]
        if self._weightsList != None:
            self._weightsList = self._weightsList[leftIndex:rightIndex]
        for key, extraList in self._extraLists.items():
            self._extraLists[key] = extraList[leftIndex:rightIndex]
        self._updateNumListElements()

    def _doDenseSlicing(self, i, j):
        if self._valList != None:
            self._valList = self._valList[i:j]
        if self._strandList != None:
            self._strandList = self._strandList[i:j]
        if self._idList != None:
            self._idList = self._idList[i:j]
        if self._edgesList != None:
            self._edgesList = self._edgesList[i:j]
        if self._weightsList != None:
            self._weightsList = self._weightsList[i:j]
        for key, extraList in self._extraLists.items():
            self._extraLists[key] = extraList[i:j]
        self._updateNumListElements()

    def __getslice__(self, i, j):
        slicedTV = TrackView(self.genomeAnchor, self._startList, self._endList, \
                             self._valList, self._strandList, self._idList, \
                             self._edgesList, self._weightsList, \
                             self.borderHandling, self.allowOverlaps, \
                             extraLists=self._extraLists)
        slicedTV.trackFormat = self.trackFormat

        slicedTV.genomeAnchor.start += i
        if j>=0:
            try:
                slicedTV.genomeAnchor.end = min(self.genomeAnchor.end, self.genomeAnchor.start + j)
            except FloatingPointError: # Caused by trackView[:] with self.genomeAnchor.start > 0
                slicedTV.genomeAnchor.end = self.genomeAnchor.end
        if j<0:
            slicedTV.genomeAnchor.end += j

        if self.trackFormat.reprIsDense():
            slicedTV._doDenseSlicing(i,j)
        else:
            slicedTV._doScatteredSlicing()
        return slicedTV

    def _getBpLevelModificationArray(self, indexes, vals):
        bpLevelMod = numpy.bincount(indexes, vals)
        origLen = len(bpLevelMod)
        bpLevelMod.resize(self._bpSize()+1)
        bpLevelMod[origLen:] = 0
        return bpLevelMod

    def _commonGetBpLevelArray(self, vals):
        if self.trackFormat.reprIsDense():
            if self.allowOverlaps:
                raise ShouldNotOccurError()
            return vals
        else:
            bpLevelArray = numpy.zeros(self._bpSize()+1)
            numElements = self.getNumElements()
            if numElements > 0:
                bpLevelArray += self._getBpLevelModificationArray(self.startsAsNumpyArray(), vals)
                bpLevelArray -= self._getBpLevelModificationArray(self.endsAsNumpyArray(), vals)
                bpLevelArray = bpLevelArray.cumsum()
            return bpLevelArray[:-1]

    def getBinaryBpLevelArray(self):
        vals = numpy.ones(self.getNumElements(), dtype='int32')
        return numpy.array(self._commonGetBpLevelArray(vals), dtype='bool8')

    def getCoverageBpLevelArray(self):
        vals = numpy.ones(self.getNumElements(), dtype='int32')
        return numpy.array(self._commonGetBpLevelArray(vals), dtype='int32')

    def getValueBpLevelArray(self, voidValue=0):
        '''
        Creates a bp-level function of any valued track. In case of scattered tracks,
        uncovered aras are filled with voidValue (which would typically be set to 0 or numpy.nan).
        In the case of overlapping regions, the values are added.'''

        assert self.trackFormat.isValued('number'), self.trackFormat
        vals = self.valsAsNumpyArray()
        bpLevelArray = numpy.array(self._commonGetBpLevelArray(vals), dtype=vals.dtype)
        if voidValue != 0:
            bpLevelArray[~self.getBinaryBpLevelArray()] = voidValue
        return bpLevelArray

    def _removeBlindPassengersFromNumpyArray(self, numpyArray):
        '''
        To remove any blind passengers - segments entirely in front of genomeanchor,
        but sorted after a larger segment crossing the border.
        '''
        if self.allowOverlaps and len(numpyArray) > 0:
            numpyArray = numpyArray[numpy.where(self._endList > self.genomeAnchor.start)]
        return numpyArray

    def _commonAsNumpyArray(self, numpyArray, numpyArrayModMethod, name):
        assert(self.borderHandling in ['crop'])
        if numpyArray is None:
            return None

        numpyArray = self._removeBlindPassengersFromNumpyArray(numpyArray)

        if numpyArrayModMethod is not None:
            return numpyArrayModMethod(numpyArray)
        else:
            return numpyArray

    def startsAsNumpyArray(self):
        return self._commonAsNumpyArray(self._startList, self._startListModMethod, 'starts')

    def _startListModMethod(self, startList):
        return numpy.maximum(startList - self.genomeAnchor.start, \
                             numpy.zeros(len(startList), dtype='int32'))

    def endsAsNumpyArray(self):
        return self._commonAsNumpyArray(self._endList, self._endListModMethod, 'ends')

    def _endListModMethod(self, endList):
        return numpy.minimum(endList - self.genomeAnchor.start, \
                             numpy.zeros(len(endList), dtype='int32') + len(self.genomeAnchor))

    def valsAsNumpyArray(self):
        return self._commonAsNumpyArray(self._valList, None, 'vals')

    def strandsAsNumpyArray(self):
        return self._commonAsNumpyArray(self._strandList, None, 'strands')

    def idsAsNumpyArray(self):
        return self._commonAsNumpyArray(self._idList, None, 'ids')

    def edgesAsNumpyArray(self):
        return self._commonAsNumpyArray(self._edgesList, None, 'edges')

    def weightsAsNumpyArray(self):
        return self._commonAsNumpyArray(self._weightsList, None, 'weights')

    def extrasAsNumpyArray(self, key):
        assert self.hasExtra(key)
        from functools import partial
        return self._commonAsNumpyArray(self._extraLists[key], None, 'extras')

    def allExtrasAsDictOfNumpyArrays(self):
        return OrderedDict([(key,self.extrasAsNumpyArray(key)) for key in self._extraLists])

    def hasExtra(self, key):
        return key in self._extraLists
 def getTrackFormat(self):
     return TrackFormat.createInstanceFromPrefixList(self._prefixList, \
                                                     self._valDataType, \
                                                     self._valDim, \
                                                     self._weightDataType, \
                                                     self._weightDim)
Beispiel #17
0
    def _getRawTrackView(self, region, borderHandling, allowOverlaps):
        assert len(region) == 1

        from collections import OrderedDict
        from gtrackcore_memmap.track.memmap.CommonMemmapFunctions import findEmptyVal
        from gtrackcore_memmap.track.core.TrackView import TrackView
        import numpy as np

        geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN(self.trackName, region.genome)
        prefixList = geSource.getPrefixList()
        valDataType = geSource.getValDataType()
        valDim = geSource.getValDim()
        weightDataType = geSource.getEdgeWeightDataType()
        weightDim = geSource.getEdgeWeightDim()

        startList, endList, valList, strandList, idList, edgesList, weightsList = [None] * 7
        extraLists = OrderedDict()

        tf = TrackFormat.createInstanceFromPrefixList(prefixList, valDataType, valDim, weightDataType, weightDim)
        if allowOverlaps and (tf.isDense() or geSource.hasNoOverlappingElements()):
            raise IncompatibleTracksError(
                prettyPrintTrackName(self.trackName)
                + " with format: "
                + str(tf)
                + " does not satisfy "
                + str(self._trackFormatReq)
            )

        denseAndInterval = tf.isDense() and tf.isInterval()
        numEls = 2 if denseAndInterval else 1

        if valDataType == "S":
            valDataType = "S2"
        if weightDataType == "S":
            weightDataType = "S2"

        for prefix in prefixList:
            if prefix == "start":
                startList = np.array([-1], dtype="int32")
            elif prefix == "end":
                if denseAndInterval:
                    endList = np.array([0, 1], dtype="int32")
                else:
                    endList = np.array([0], dtype="int32")
            elif prefix == "val":
                valList = np.array([findEmptyVal(valDataType)] * valDim * numEls, dtype=valDataType).reshape(
                    (numEls, valDim) if valDim > 1 else numEls
                )
            elif prefix == "strand":
                strandList = np.array([1] * numEls, dtype="int8")
            elif prefix == "id":
                idList = np.array([""] * numEls, dtype="S1")
            elif prefix == "edges":
                edgesList = np.array([[""]] * numEls, dtype="S1")
            elif prefix == "weights":
                weightsList = np.array(
                    [[[findEmptyVal(weightDataType)]]] * weightDim * numEls, dtype=weightDataType
                ).reshape((numEls, 1, weightDim) if weightDim > 1 else (numEls, 1))
            else:
                extraLists[prefix] = np.array([""] * numEls, dtype="S1")

        return TrackView(
            region,
            startList,
            endList,
            valList,
            strandList,
            idList,
            edgesList,
            weightsList,
            borderHandling,
            allowOverlaps,
            extraLists,
        )
Beispiel #18
0
 def _init(self):
     self._allValsAreBedVals = False
     tf = TrackFormat.createInstanceFromGeSource(self._geSource)
     if tf.getValTypeName() == 'Number (integer)':
         self._allValsAreBedVals = all((0 <= ge.val <= 1000) for ge in self._geSource)