def _compute(self):
        predictions, answerSegments = self._children[0].getResult(), self._children[1].getResult()

        answerStart = answerSegments.startsAsNumpyArray()
        answerEnd = answerSegments.endsAsNumpyArray()
        thresholds = predictions.valsAsNumpyArray()
        
        answerIndex = 0
        markList = []
        
        # For every nucleotide along the sequence
        for i in range(len(thresholds)):
            
            # Check if there are still more answers segments
            if answerIndex < len(answerStart):
                # If the nucleotide is outside the current answer segment
                if i > answerEnd[answerIndex]:
                    answerIndex = answerIndex + 1 # Go to the next segment by incrementing answerIndex by 1
                
                # If the nucleotide is outside the current answer segment, set mark to 0
                if (answerIndex >= len(answerStart)) or (i <= answerStart[answerIndex]):
                    markList.append([thresholds[i], random.random(), 0])
                else: # If the nucleotide is inside the current answer segment, set mark to 1
                    markList.append([thresholds[i], random.random(), 1])
            else: # If no more answer segments, set the mark to 0
                markList.append([thresholds[i], random.random(), 0])
        
        return markList
    def _compute(self):
        predictionSegments, answerSegments = self._children[0].getResult(), self._children[1].getResult()

        predictionStart = predictionSegments.startsAsNumpyArray()
        predictionEnd = predictionSegments.endsAsNumpyArray()  
        answerStart = answerSegments.startsAsNumpyArray()
        answerEnd = answerSegments.endsAsNumpyArray()
        # Thresholds between 0 and 100 (higher is better)
        thresholds = predictionSegments.valsAsNumpyArray()
        # Binary values, 1 = pattern exist within sequence, 0 = it don't
        values = answerSegments.valsAsNumpyArray()
        
        predictionIndex = 0
        answerIndex = 0
        markList = []
        threshold = 0
        
        # If no predictions, just add the values and set threshold to 0.0
        if len(predictionStart) == 0:
            for val in values:
                if val == True:
                    markList.append([0.0, random.random(), 1])
                else:
                    markList.append([0.0, random.random(), 0])
                    
            return markList
        
        # For every answer segment...
        while answerIndex < len(answerStart):
            # If prediction is within answer
            if (predictionIndex < len(predictionStart)) and (predictionStart[predictionIndex] >= answerStart[answerIndex]) and (predictionEnd[predictionIndex] <= answerEnd[answerIndex]):
                threshold = thresholds[predictionIndex]
                
                # Find and use the highest threshold value within this segment
                while True:
                    predictionIndex = predictionIndex + 1
                    
                    if (predictionIndex < len(predictionStart)):
                        if (predictionStart[predictionIndex] >= answerStart[answerIndex]) and (predictionEnd[predictionIndex] <= answerEnd[answerIndex]):
                            
                            if threshold < thresholds[predictionIndex]:
                                threshold = thresholds[predictionIndex]
                        else:
                            break
                    else:
                        break
            else: # If prediction is not within answer, set threshold to 0.0
                threshold = 0.0
            
            # Append results to marklist
            if values[answerIndex] == True:
                markList.append([threshold, random.random(), 1])
            else:
                markList.append([threshold, random.random(), 0])            
                
            answerIndex = answerIndex + 1
        
        return markList
    def _compute(self):
        predictionSegments, answerSegments = self._children[0].getResult(), self._children[1].getResult()

        predictionStart = predictionSegments.startsAsNumpyArray()
        predictionEnd = predictionSegments.endsAsNumpyArray()  
        answerStart = answerSegments.startsAsNumpyArray()
        answerEnd = answerSegments.endsAsNumpyArray()
        thresholds = predictionSegments.valsAsNumpyArray()
        binSize = self._binSizeStat.getResult()
        
        predictionIndex = 0
        answerIndex = 0
        markList = []
        threshold = 0
        
        # For every nucleotide along the sequence
        for i in range(binSize):
            
            # First set the threshold value for this nucleotide
            # Check if there are still more prediction segments
            if predictionIndex < len(predictionStart):
                
                # If the nucleotide are outside the current prediction segment
                if i > predictionEnd[predictionIndex]:
                    predictionIndex = predictionIndex + 1 # Go to the next segment by incrementing predictionIndex by 1
                    
                # If the nucleotide is outside the current prediction segment, set threshold to 0.00
                if (predictionIndex >= len(predictionStart)) or (i < predictionStart[predictionIndex]):
                    threshold = 0.00
                else: # else retrieve the threshold from the threshold table
                    threshold = thresholds[predictionIndex]
            else: # If no more prediction segments, set the threshold to 0.00
                threshold = 0.00
            
            # Next set mark to either 0 or 1
            # Check if there are still more answers segments
            if answerIndex < len(answerStart):
                # If the nucleotide is outside the current prediction segment
                if i > answerEnd[answerIndex]:
                    answerIndex = answerIndex + 1 # Go to the next segment by incrementing answerIndex by 1
                
                # If the nucleotide is outside the current answer segment, set mark to 0
                if (answerIndex >= len(answerStart)) or (i <= answerStart[answerIndex]):
                    markList.append([threshold, random.random(), 0])
                else: # If the nucleotide is inside the current answer segment, set mark to 1
                    markList.append([threshold, random.random(), 1])
            else: # If no more answer segments, set the mark to 0
                markList.append([threshold, random.random(), 0])
        
        return markList
    def _compute(self):
        tvMarkSegs = self._children[0].getResult()
        tvCoverSegs = self._children[1].getResult()

        relCoverAndMarkList = []
        coverBorderList = [[0, 0]]
        for coverSeg in tvCoverSegs:
            coverBorderList.append([coverSeg.start(), 0])
            coverBorderList.append([coverSeg.end(), 1])

        coverIndex = 1  #since we are operating on ranges from previous, and have added a dummy element at positions 0..
        for markSeg in tvMarkSegs:
            cover = 0
            curPos = markSeg.start()
            while coverIndex < len(
                    coverBorderList
            ) and coverBorderList[coverIndex - 1][0] < markSeg.end(
            ):  #coverIndex-1 to go one passed, since we are adding ranges from previous to current pos..
                cover += max(
                    0,
                    min(coverBorderList[coverIndex][0], markSeg.end()) -
                    curPos) * coverBorderList[coverIndex][1]
                curPos = coverBorderList[coverIndex][0]
                coverIndex += 1
            coverIndex -= 1  #since we went two passed..
            relCoverAndMarkList.append(
                [1.0 * cover / len(markSeg),
                 random.random(),
                 markSeg.val()])

        return [x[2] for x in reversed(sorted(relCoverAndMarkList))
                ], [x[0] for x in reversed(sorted(relCoverAndMarkList))]
def getRandValList(size, dtype='float64'):
   if dtype == 'float64':
      return array([random.random() - 0.5 for i in xrange(size)], dtype=dtype)
   elif dtype == 'bool8':
      return array([bool(random.getrandbits(1)) for i in xrange(size)], dtype=dtype)
   else:
      return array([], dtype=dtype)
    def getTrackView(self, region):
        assert self._origRegion == region
        allChrArmRegs = GenomeInfo.getContainingChrArms(region)
        if len(allChrArmRegs) != 1:
            raise CentromerError
        chrArm = allChrArmRegs[0]
        
        buffer = self._getIndepencyBufferSize(region)
        sourceRegs = chrArm.exclude( copy(region).extend(-buffer).extend(buffer) )
        assert len(sourceRegs) in [1,2]
        
        if not any(len(sourceReg) >= self.MIN_SOURCE_TO_SAMPLE_SIZE_RATIO * len(region) for sourceReg in sourceRegs):
            raise TooLargeBinError('Source region lengths of ' + str([len(x) for x in sourceRegs]) +
                                   ' are too small compared to region length of ' + str(len(region)) +
                                   ' according to MIN_SOURCE_TO_SAMPLE_SIZE_RATIO: ' + str(self.MIN_SOURCE_TO_SAMPLE_SIZE_RATIO))
        
        if len(sourceRegs) == 1:
            sourceReg = sourceRegs[0]
        else:
            firstSourceProportion = (len(sourceRegs[0])-len(region)) / sum(len(sourceRegs[i])-len(region) for i in range(2))
            sourceReg = sourceRegs[0] if random.random() < firstSourceProportion else sourceRegs[1]

        randOffset = random.randint( 0, len(sourceReg) - len(region) )
        start = sourceReg.start + randOffset
        end = start + len(region)
        randRegion = GenomeRegion(region.genome, region.chr, start, end)

        rawData = RawDataStat(randRegion, self._origTrack, self._trackFormatReq)
        tv = rawData.getResult()
        assert region != tv.genomeAnchor        
        return tv
Beispiel #7
0
def getRandValList(size, dtype='float64'):
    if dtype == 'float64':
        return array([random.random() - 0.5 for i in xrange(size)],
                     dtype=dtype)
    elif dtype == 'bool8':
        return array([bool(random.getrandbits(1)) for i in xrange(size)],
                     dtype=dtype)
    else:
        return array([], dtype=dtype)
Beispiel #8
0
    def _getTrackView(self, region):
        from gold.util.RandomUtil import random

        #if not hasattr(self, '_minimalRegion'):
        #    from quick.application.UserBinSource import MinimalBinSource
        #    minimalBinList = MinimalBinSource(region.genome)
        #    self._minimalRegion = minimalBinList[0] if minimalBinList is not None else None
        #
        #if  self._minimalRegion == region:
        #    return self._origTrack.getTrackView(region)

        allChrArmRegs = GenomeInfo.getContainingChrArms(region)
        if len(allChrArmRegs) != 1:
            raise CentromerError
        chrArm = allChrArmRegs[0]

        buffer = self._getIndependencyBufferSize(region)
        sourceRegs = chrArm.exclude(
            region.getCopy().extend(-buffer).extend(buffer))
        assert len(sourceRegs) in [
            1, 2
        ], "Source region must be smaller than a tenth of a chromosome arm: %s" % region

        if not any(
                len(sourceReg) >= self.MIN_SOURCE_TO_SAMPLE_SIZE_RATIO *
                len(region) for sourceReg in sourceRegs):
            raise TooLargeBinError(
                'Source region lengths of ' + str([len(x)
                                                   for x in sourceRegs]) +
                ' are too small compared to region length of ' +
                str(len(region)) +
                ' according to MIN_SOURCE_TO_SAMPLE_SIZE_RATIO: ' +
                str(self.MIN_SOURCE_TO_SAMPLE_SIZE_RATIO))

        if len(sourceRegs) == 1:
            sourceReg = sourceRegs[0]
        else:
            firstSourceProportion = (len(sourceRegs[0]) - len(region)) / sum(
                len(sourceRegs[i]) - len(region) for i in range(2))
            sourceReg = sourceRegs[0] if random.random(
            ) < firstSourceProportion else sourceRegs[1]

        randOffset = random.randint(0, len(sourceReg) - len(region))
        start = sourceReg.start + randOffset
        end = start + len(region)
        randRegion = GenomeRegion(region.genome, region.chr, start, end)

        #rawData = RawDataStat(randRegion, self._origTrack, self._trackFormatReq)
        #tv = rawData.getResult()
        tv = self._origTrack.getTrackView(randRegion)
        assert tv.genomeAnchor != region, (region, tv.genomeAnchor,
                                           getClassName(region),
                                           getClassName(tv.genomeAnchor))
        return tv
    def _compute(self):
        tvMarkSegs = self._children[0].getResult()
        tvFuncValues = self._children[1].getResult()

        funcValueAndMarkList = []
        #coverBorderList = []
        #for coverSeg in tvCoverSegs:
        #    coverBorderList.append( [coverSeg.start(), 0])
        #    coverBorderList.append( [coverSeg.end()-1, 1])

        for markSeg in tvMarkSegs:
            funcValueAndMarkList.append([
                tvFuncValues[markSeg.start():markSeg.end()].valsAsNumpyArray().
                mean(dtype='float64'),
                random.random(),
                markSeg.val()
            ])

        return [x[2] for x in reversed(sorted(funcValueAndMarkList))
                ], [x[0] for x in reversed(sorted(funcValueAndMarkList))]
 def _compute(self):
     tvMarkSegs = self._children[0].getResult()
     tvCoverSegs = self._children[1].getResult()
     
     relCoverAndMarkList = []
     coverBorderList = [[0,0]]
     for coverSeg in tvCoverSegs:
         coverBorderList.append( [coverSeg.start(), 0])
         coverBorderList.append( [coverSeg.end(), 1])
             
     coverIndex = 1 #since we are operating on ranges from previous, and have added a dummy element at positions 0..
     for markSeg in tvMarkSegs:
         cover = 0
         curPos = markSeg.start()
         while coverIndex < len(coverBorderList) and coverBorderList[coverIndex-1][0] < markSeg.end(): #coverIndex-1 to go one passed, since we are adding ranges from previous to current pos..
             cover += max(0, min(coverBorderList[coverIndex][0], markSeg.end()) - curPos) * coverBorderList[coverIndex][1]
             curPos = coverBorderList[coverIndex][0]
             coverIndex += 1
         coverIndex -= 1 #since we went two passed..
         relCoverAndMarkList.append( [1.0*cover/len(markSeg), random.random(), markSeg.val()] )
         
     return [x[2] for x in reversed(sorted(relCoverAndMarkList))], [x[0] for x in reversed(sorted(relCoverAndMarkList))]