def getTrackView(self, region):
        assert self._origRegion == region
        allChrArmRegs = GenomeInfo.getContainingChrArms(region)
        if len(allChrArmRegs) != 1:
            raise CentromerError
        chrArm = allChrArmRegs[0]
        
        buffer = self._getIndepencyBufferSize(region)
        sourceRegs = chrArm.exclude( copy(region).extend(-buffer).extend(buffer) )
        assert len(sourceRegs) in [1,2]
        
        if not any(len(sourceReg) >= self.MIN_SOURCE_TO_SAMPLE_SIZE_RATIO * len(region) for sourceReg in sourceRegs):
            raise TooLargeBinError('Source region lengths of ' + str([len(x) for x in sourceRegs]) +
                                   ' are too small compared to region length of ' + str(len(region)) +
                                   ' according to MIN_SOURCE_TO_SAMPLE_SIZE_RATIO: ' + str(self.MIN_SOURCE_TO_SAMPLE_SIZE_RATIO))
        
        if len(sourceRegs) == 1:
            sourceReg = sourceRegs[0]
        else:
            firstSourceProportion = (len(sourceRegs[0])-len(region)) / sum(len(sourceRegs[i])-len(region) for i in range(2))
            sourceReg = sourceRegs[0] if random.random() < firstSourceProportion else sourceRegs[1]

        randOffset = random.randint( 0, len(sourceReg) - len(region) )
        start = sourceReg.start + randOffset
        end = start + len(region)
        randRegion = GenomeRegion(region.genome, region.chr, start, end)

        rawData = RawDataStat(randRegion, self._origTrack, self._trackFormatReq)
        tv = rawData.getResult()
        assert region != tv.genomeAnchor        
        return tv
예제 #2
0
 def _createChildren(self):
     self._addChild(
         RawDataStat(self._region, self._track,
                     TrackFormatReq(allowOverlaps=False, val='number')))
     self._addChild(
         RawDataStat(self._region, self._track2,
                     TrackFormatReq(allowOverlaps=False, dense=False)))
예제 #3
0
 def _createChildren(self):
     self._addChild(
         RawDataStat(self._region, self._track,
                     TrackFormatReq(val='number', dense=True)))
     self._addChild(
         RawDataStat(self._region, self._track2,
                     TrackFormatReq(val='number', dense=True)))
예제 #4
0
 def _createChildren(self):
     self._addChild(
         RawDataStat(self._region, self._track,
                     TrackFormatReq(dense=True, val='char')))
     self._addChild(
         RawDataStat(self._region, self._track2,
                     TrackFormatReq(val='category')))
 def _createChildren(self):
     self._addChild(
         RawDataStat(self._region, self._track,
                     TrackFormatReq(interval=True, val=self._markReq)))
     self._addChild(
         RawDataStat(self._region, self._track2,
                     TrackFormatReq(dense=False, interval=True)))
예제 #6
0
 def getTrackView(self, region):
     #print 'get tv for reg: ',region
     #print str(type(self._origRegion)) + " and " + str(type(region))
     assert (not isIter(self._origRegion) and self._origRegion  == region) or (isIter(self._origRegion) and region in self._origRegion) 
     
     #if self._cachedTV is None:
     rawData = RawDataStat(region, self._origTrack, self._trackFormatReq)
     origTV = rawData.getResult()
     self._checkTrackFormat(origTV)
     assert(not origTV.allowOverlaps)
     assert(origTV.borderHandling == 'crop')
     assert region == origTV.genomeAnchor
     starts, ends, vals, strands, ids, edges, weights, extras = \
         self._createRandomizedNumpyArrays(len(origTV.genomeAnchor), origTV.startsAsNumpyArray(), \
                                           origTV.endsAsNumpyArray(), origTV.valsAsNumpyArray(), \
                                           origTV.strandsAsNumpyArray(), origTV.idsAsNumpyArray(), \
                                           origTV.edgesAsNumpyArray(), origTV.weightsAsNumpyArray(), \
                                           origTV.allExtrasAsDictOfNumpyArrays(), origTV.trackFormat, region)
     
     from gold.util.CommonFunctions import getClassName
     self._cachedTV = TrackView(origTV.genomeAnchor, \
                                (starts + origTV.genomeAnchor.start if starts is not None else None), \
                                (ends + origTV.genomeAnchor.start if ends is not None else None), \
                                vals, strands, ids, edges, weights, origTV.borderHandling, origTV.allowOverlaps, extraLists=extras)
     assert self._trackFormatReq.isCompatibleWith(self._cachedTV.trackFormat), 'Incompatible track-format: '\
            + str(self._trackFormatReq) + ' VS ' + str(self._cachedTV.trackFormat)
     return self._cachedTV
예제 #7
0
 def _createChildren(self):
     self._addChild(RawDataStat(self._region, self._track, \
                    TrackFormatReq(dense=False, interval=False, \
                                   val='category', allowOverlaps=True)))
     self._addChild(RawDataStat(self._region, self._track2, \
                    TrackFormatReq(dense=False, interval=True, \
                                   val='category', allowOverlaps=True)))
    def _getTrackView(self, region):
        from gold.util.RandomUtil import random  # To initialize random generators if not done previously

        #if self._cachedTV is None:
        rawData = RawDataStat(region, self._origTrack, self._trackFormatReq)
        origTV = rawData.getResult()

        self._checkTrackFormat(origTV)
        assert(not origTV.allowOverlaps)
        assert(origTV.borderHandling == 'crop')
        assert region == origTV.genomeAnchor

        starts, ends, vals, strands, ids, edges, weights, extras = \
            self._createRandomizedNumpyArrays(len(origTV.genomeAnchor), origTV.startsAsNumpyArray(), \
                                              origTV.endsAsNumpyArray(), origTV.valsAsNumpyArray(), \
                                              origTV.strandsAsNumpyArray(), origTV.idsAsNumpyArray(), \
                                              origTV.edgesAsNumpyArray(), origTV.weightsAsNumpyArray(), \
                                              origTV.allExtrasAsDictOfNumpyArrays(), region)

        starts, ends, vals, strands, ids, edges, weights, extras = \
            self._undoTrackViewChanges(starts, ends, vals, strands, ids, edges, weights, extras, origTV)

        from gold.util.CommonFunctions import getClassName
        self._cachedTV = TrackView(origTV.genomeAnchor, starts, ends, vals, strands, ids, edges, weights, \
                                   origTV.borderHandling, origTV.allowOverlaps, extraLists=extras)

        assert self._trackFormatReq.isCompatibleWith(self._cachedTV.trackFormat), 'Incompatible track-format: '\
               + str(self._trackFormatReq) + ' VS ' + str(self._cachedTV.trackFormat)
        return self._cachedTV
 def _createChildren(self):
     self._addChild(
         RawDataStat(self._region, self._track,
                     TrackFormatReq(dense=False, interval=True))
     )  #interval=False is supported through the faster PointCountPerSegStat..
     self._addChild(
         RawDataStat(self._region, self._track2,
                     TrackFormatReq(dense=False, interval=True)))
 def _createChildren(self):
     rawSegDataStat = RawDataStat(
         self._region, self._track,
         TrackFormatReq(interval=True, dense=False))
     rawNumDataStat = RawDataStat(
         self._region, self._track2,
         TrackFormatReq(dense=True, val='number', interval=False))
     self._addChild(rawSegDataStat)
     self._addChild(rawNumDataStat)
예제 #11
0
 def _createChildren(self):
     self._addChild(
         RawDataStat(self._region, self._track,
                     TrackFormatReq(dense=False)))
     self._addChild(
         RawDataStat(self._region, self._track2,
                     TrackFormatReq(dense=False)))
     self._binSizeStat = self._addChild(
         BinSizeStat(self._region, self._track2))
 def _createChildren(self):
     self._addChild(
         RawDataStat(
             self._region, self._track,
             TrackFormatReq(dense=False, interval=False, val='number')))
     self._addChild(
         RawDataStat(
             self._region, self._track2,
             TrackFormatReq(dense=False, interval=False, val='number')))
 def _createChildren(self):
     rawDataStat = RawDataStat(self._region, self._track,
                               TrackFormatReq(dense=False, interval=False))
     self._addChild(rawDataStat)
     rawDataStat2 = RawDataStat(self._region, self._track2,
                                TrackFormatReq(dense=False, interval=True))
     self._addChild(rawDataStat2)
     self._binSizeStat = self._addChild(
         BinSizeStat(self._region, self._track2))
 def _createChildren(self):
     self._graphNodeIdStat = self._addChild(
         RawDataStat(self._region, self._track,
                     TrackFormatReq(interval=True, id=True, dense=True)))
     #self._segmentsStat = self._addChild(RawDataStat(self._region, self._track2, TrackFormatReq(interval=False, dense=False)))
     self._segmentsStat = self._addChild(
         RawDataStat(
             self._region, self._track2,
             TrackFormatReq(interval=False, dense=False,
                            allowOverlaps=None)))
예제 #15
0
 def _createChildren(self):
     #the settings of allowOverlaps is somewhat arbitrary for now..
     self._addChild(
         RawDataStat(
             self._region, self._track,
             TrackFormatReq(dense=False, interval=True,
                            allowOverlaps=False)))
     self._addChild(
         RawDataStat(
             self._region, self._track2,
             TrackFormatReq(dense=False, interval=True,
                            allowOverlaps=False)))
 def _createChildren(self):
     self._addChild(
         RawDataStat(
             self._region, self._track,
             TrackFormatReq(val='number',
                            allowOverlaps=self._configuredToAllowOverlaps(
                                strict=False))))
     self._addChild(
         RawDataStat(
             self._region, self._track2,
             TrackFormatReq(interval=True,
                            allowOverlaps=self._configuredToAllowOverlaps(
                                strict=False))))
예제 #17
0
 def _createChildren(self):
     self._addChild(
         RawDataStat(
             self._region, self._track,
             TrackFormatReq(interval=True,
                            val='category',
                            allowOverlaps=True)))
예제 #18
0
 def _createChildren(self):
     rawDataStat = RawDataStat(
         self._region, self._track,
         TrackFormatReq(dense=False,
                        interval=False,
                        allowOverlaps=(self._withOverlaps == 'yes')))
     self._addChild(rawDataStat)
예제 #19
0
 def _createChildren(self):
     for track in self._tracks:
         self._addChild(
             RawDataStat(self._region, track, TrackFormatReq(dense=False),
                         **self._kwArgs))
     self._binSizeStat = self._addChild(
         BinSizeStat(self._region, self._tracks[0]))
 def _createChildren(self):
     self._addChild(
         RawDataStat(
             self._region, self._track,
             TrackFormatReq(allowOverlaps=self._configuredToAllowOverlaps(
                 strict=False))))
     pass
 def _createChildren(self):
     self._addChild(
         RawDataStat(self._region, self._track,
                     TrackFormatReq(dense=False)))
     self._addChild(
         RawOverlapSortedStartEndCodedEventsStat(self._region, self._track,
                                                 self._track2,
                                                 **self._kwArgs))
 def _createChildren(self):
     kwArgs = copy(self._kwArgs)
     if 'rawStatistic' in kwArgs:
         del kwArgs['rawStatistic']
     self._addChild( RawDataStat(self._region, self._track, TrackFormatReq(dense=False, val='tc') ) )
     self._addChild( RawDataStat(self._region, self._track2, TrackFormatReq(dense=False, val='tc') ) )
     self._addChild( self._rawStatistic(self._region, self._track, self._track2, **kwArgs) )
     #try:
     for subtype1 in ['0','1']:
         for subtype2 in ['0','1']:
             tn1 = self._track.trackName + [subtype1]
             tn2 = self._track2.trackName + [subtype2]
             
             if not os.path.exists(createDirPath(tn1, self.getGenome())) or not os.path.exists(createDirPath(tn2,self.getGenome())):
                 raise IncompatibleTracksError
             track1 = Track( tn1)
             track1.formatConverters = self._track.formatConverters
             track2 = Track( tn2)
             track2.formatConverters = self._track2.formatConverters
             self._addChild(self._rawStatistic(self._region, track1, track2, **kwArgs) )
예제 #23
0
    def getTrackView(self, region):
        # To make sure that the origTrack is only read once across randomizations
        rawData = RawDataStat(region, self._origTrack, NeutralTrackFormatReq())
        origTv = rawData.getResult()

        if not self._trackRandomizer.supportsTrackFormat(origTv.trackFormat):
            raise NotSupportedError(
                'The original track "{}" has format "{}", '
                'which is not supported by "{}".'.format(
                    prettyPrintTrackName(self.trackName),
                    str(origTv.trackFormat),
                    self._trackRandomizer.getDescription()))

        if not self._trackRandomizer.supportsOverlapMode(origTv.allowOverlaps):
            raise NotSupportedError(
                'The original track "{}" has "allowOverlaps={}", '
                'which is not supported by "{}".'.format(
                    prettyPrintTrackName(self.trackName), origTv.allowOverlaps,
                    self._trackRandomizer.getDescription()))

        assert origTv.borderHandling == 'crop'

        return origTv
    def _compute(self):
        tvs = []
        for track in [self._track, self._track2]:
            tvs.append(
                RawDataStat(self._region, track,
                            self._getTrackFormatReq()).getResult())
#        tvs = [x.getResult() for x in self._children]
        print len(self._tracks)
        from numpy import array
        #         tvStartsOld = [x.startsAsNumpyArray()for x in tvs]
        #         tvEndsOld = [x.endsAsNumpyArray() for x in tvs]
        tvStarts = [array(x.startsAsNumpyArray(), dtype='int64') for x in tvs]
        tvEnds = [array(x.endsAsNumpyArray(), dtype='int64') for x in tvs]

        tvStarts = [
            np.array(x.startsAsNumpyArray(), dtype='int64') for x in tvs
        ]
        tvEnds = [np.array(x.endsAsNumpyArray(), dtype='int64') for x in tvs]
        print "N Starts: " + str(len(tvStarts[0]))
        for x in tvs:
            print x
            print x.__dict__
        print ".........---------------............"
        for tvs in tvStarts:
            print tvs
            print tvs.__dict__

        binSize = tvEnds[0][-1] * 2
        bins = np.range(0, binSize, localBinSize)
        print "Bins: " + str(len(bins))
        s = []
        for track in tvStarts:
            s.append(len(track))

        E = np.sum(s) / len(bins)
        print "Expected" + str(E)
        O = np.zeros((1, len(bins)))
        binPositions = [
            np.floor_divide(t_starts, 10000) for t_starts in tvStarts
        ]
        for track in binPositions:
            for binPos in track:
                O[binPos] += 1

        T = np.sum(np.power((O - E), 2) / E)
        return T
예제 #25
0
    def _createChildren(self):
        self._valBpArrayStat = self._addChild(
            ValueBpLevelArrayRawDataStat(self._region,
                                         self._track,
                                         voidValue=0))
        self._addChild(
            FormatSpecStat(self._region, self._track,
                           TrackFormatReq(allowOverlaps=False, val='number')))

        self._segmentBpArrayStat = self._addChild(
            BpLevelArrayRawDataStat(self._region,
                                    self._track2,
                                    bpDepthType='binary'))
        self._segStat = self._addChild(
            RawDataStat(
                self._region, self._track2,
                TrackFormatReq(allowOverlaps=False, dense=False,
                               interval=True)))
 def _createChildren(self):
     self._addChild( RawDataStat(self._region, self._track, TrackFormatReq(interval=True)) )
 def _createChildren(self):
     self._addChild(
         RawDataStat(self._region, self._track,
                     TrackFormatReq(interval=False, val='category')))
 def _createChildren(self):
     self._addChild( RawDataStat(self._region, self._track, TrackFormatReq(dense=False, interval=self.INTERVALS, \
                                                                           allowOverlaps = (self._withOverlaps == 'yes') ) ) )
예제 #29
0
 def _createChildren(self):
     self._addChild(
         RawDataStat(self._region, self._track,
                     TrackFormatReq(interval=False, dense=False)))
     self._addChild(BinSizeStat(self._region, self._track))
예제 #30
0
 def _createChildren(self):
     self._addChild(
         RawDataStat(
             self._region, self._track,
             TrackFormatReq(dense=True, interval=False,
                            allowOverlaps=False)))
 def _createChildren(self):
     self._addChild(CountPointStat(self._region, self._track))
     self._addChild(
         RawDataStat(self._region, self._track,
                     TrackFormatReq(dense=False, val='mean_sd')))
 def _createChildren(self):
     self._addChild( RawDataStat(self._region, self._track, TrackFormatReq(interval=True, \
                                                                           allowOverlaps = (self._withOverlaps == 'yes') ) ) )