def _assertSplitUserBin(self, compBins, start, end):
     region = GenomeRegion('hg18', 'chr1', start, end)
     compBinRegions = [
         GenomeRegion('hg18', 'chr1', elStart, elEnd)
         for elStart, elEnd in compBins
     ]
     AssertList(compBinRegions, CompBinManager.splitUserBin(region),
                self.assertEqual)
    def testBoundingRegionsChrInUnsortedOrder(self):
        self._setUpShelve()

        brTuples = [BoundingRegionTuple(GenomeRegion('TestGenome', 'chrM', 1000, 2000), 5), \
                    BoundingRegionTuple(GenomeRegion('TestGenome', 'chr21', 0, 1000000), 10), \
                    BoundingRegionTuple(GenomeRegion('TestGenome', 'chr21', 2000000, 2500000), 20)]

        self._brShelve.storeBoundingRegions(brTuples, ['chr21', 'chrM'],
                                            sparse=True)
    def testBoundingRegionsUnsortedInChr(self):
        self._setUpShelve()

        brTuples = [BoundingRegionTuple(GenomeRegion('TestGenome', 'chr21', 2000000, 2500000), 20),\
                    BoundingRegionTuple(GenomeRegion('TestGenome', 'chr21', 0, 1000000), 10)]

        self.assertRaises(InvalidFormatError,
                          self._brShelve.storeBoundingRegions,
                          brTuples, ['chr21'],
                          sparse=True)
Exemple #4
0
 def _assertIntersect(self, assertRegs, chr, regs1, regs2):
     genomeRegs1 = [GenomeRegion('TestGenome', chr, start, end) for start, end in regs1]
     genomeRegs2 = [GenomeRegion('TestGenome', chr, start, end) for start, end in regs2]
     genomeAssertRegs = [GenomeRegion('TestGenome', chr, start, end) for start, end in assertRegs]
     
     resultRegs = BoundingRegionUserBinSource.getAllIntersectingRegions\
         ('TestGenome', chr, genomeRegs1, genomeRegs2)
     
     #print [str(x) for x in resultRegs]
     self.assertListsOrDicts(genomeAssertRegs, resultRegs)
    def testIsCompBin(self):
        self.assertTrue(
            CompBinManager.isCompBin(
                GenomeRegion('TestGenome', 'chr21', 0, 100)))
        self.assertTrue(
            CompBinManager.isCompBin(
                GenomeRegion('TestGenome', 'chr21', 200, 300)))
        self.assertTrue(
            CompBinManager.isCompBin(
                GenomeRegion('TestGenome', 'chr21', 46944300, 46944323)))

        self.assertFalse(
            CompBinManager.isCompBin(GenomeRegion('TestGenome', 'chr21', 0,
                                                  40)))
        self.assertFalse(
            CompBinManager.isCompBin(
                GenomeRegion('TestGenome', 'chr21', 10, 100)))
        self.assertFalse(
            CompBinManager.isCompBin(
                GenomeRegion('TestGenome', 'chr21', 10, 200)))
        self.assertFalse(
            CompBinManager.isCompBin(
                GenomeRegion('TestGenome', 'chr21', 100, 300)))
        self.assertFalse(
            CompBinManager.isCompBin(
                GenomeRegion('TestGenome', 'chr21', 46944300, 46944322)))
        self.assertFalse(
            CompBinManager.isCompBin(
                GenomeRegion('TestGenome', 'chr21', 46944300, 46944324)))
    def testBoundingRegionIncorrectCountDense(self):
        self._setUpShelve()

        brTuples = [BoundingRegionTuple(GenomeRegion('TestGenome', 'chr21', 0, 1000000), 1000000), \
                    BoundingRegionTuple(GenomeRegion('TestGenome', 'chr21', 2000000, 2500000), 500000), \
                    BoundingRegionTuple(GenomeRegion('TestGenome', 'chrM', 1000, 2000), 500)]

        self.assertRaises(InvalidFormatError,
                          self._brShelve.storeBoundingRegions,
                          brTuples, ['chr21', 'chrM'],
                          sparse=False)
 def testGetNumOfBins(self):
     self.assertEqual(
         0, CompBinManager.getNumOfBins(GenomeRegion('hg18', 'chr1', 0, 0)))
     self.assertEqual(
         1,
         CompBinManager.getNumOfBins(GenomeRegion('hg18', 'chr1', 0, 100)))
     self.assertEqual(
         2,
         CompBinManager.getNumOfBins(GenomeRegion('hg18', 'chr1', 200,
                                                  400)))
     self.assertEqual(
         4,
         CompBinManager.getNumOfBins(GenomeRegion('hg18', 'chr1', 67, 314)))
 def splitUserBin(region):
     'Splits a region into several compBins, based on borders as defined by getCompBinSize'
     #assert( len(region) > 0 )
     start = (int(region.start) / CompBinManager.getCompBinSize()) * CompBinManager.getCompBinSize() #round off to nearest whole compBin border        
     compBins = []
 
     while start < region.end:
         part = GenomeRegion(region.genome, region.chr)
         end = start + CompBinManager.getCompBinSize()
         part.start =  max(start, region.start)
         part.end = min(end, region.end)
         compBins.append( part )
         start += CompBinManager.getCompBinSize()   
 
     return compBins
Exemple #9
0
    def nextBin(self):
        for region in self._userBinSource:
            start = region.start if region.start is not None else 0

            chrLen = GenomeInfo.getChrLen(
                region.genome,
                region.chr) if region.genome is not None else None
            regEnd = min([x for x in [region.end, chrLen] if x is not None])

            if self._binLen is None:
                yield GenomeRegion(region.genome, region.chr, start, regEnd)
            else:
                while start < regEnd:
                    end = min(start + self._binLen, regEnd)
                    yield GenomeRegion(region.genome, region.chr, start, end)
                    start += self._binLen
Exemple #10
0
    def _doRandTest(self, origTV, randTrackClasses):
        anchor = [origTV.genomeAnchor.start, origTV.genomeAnchor.end]
        intensityTV = SampleTV_Num(vals=range(anchor[1] - anchor[0]),
                                   anchor=anchor)  #fixme: not yet used..
        MyPlainTrack._origTV = intensityTV
        gtrackcore.track.random.SegsSampledByIntensityTrack.PlainTrack = MyPlainTrack
        origTrack = SampleTrack(origTV)
        anchorReg = GenomeRegion('TestGenome', 'chr21', anchor[0], anchor[1])
        binLen = len(anchorReg)

        for randClass in randTrackClasses:
            for i in range(100):
                randTrack = randClass(origTrack,
                                      anchorReg,
                                      i,
                                      trackNameIntensity='dummy_intensity')
                randTV = randTrack.getTrackView(anchorReg)

                self.assertListsOrDicts(sorted(len(el) for el in origTV),
                                        sorted(len(el) for el in randTV))

                if isinstance(randClass, PermutedSegsAndIntersegsTrack):
                    self.assertEqual(self._getInterSegLens(origTV, binLen),
                                     self._getInterSegLens(randTV, binLen))
                else:
                    self.assertEqual(
                        sum(self._getInterSegLens(origTV, binLen)),
                        sum(self._getInterSegLens(randTV, binLen)))

                for el in randTV:
                    assert (0 <= el.start() < el.end() <= binLen)

                #self.assertEqual(self._createValAndStrandDict(origTV), self._createValAndStrandDict(randTV))
                self.assertListsOrDicts(self._createSortedContents(origTV),
                                        self._createSortedContents(randTV))
Exemple #11
0
    def _createTrackView(self,
                         starts,
                         ends,
                         vals,
                         strands,
                         ids,
                         edges,
                         weights,
                         extras,
                         sourceRegion,
                         allowOverlaps,
                         sliceFull=False):
        genomeAnchor = GenomeRegion(genome=self.genome,
                                    chr=self.chr,
                                    start=sourceRegion[0],
                                    end=sourceRegion[1])

        tv = TrackView(genomeAnchor, \
                       array(starts) if starts is not None else None, \
                       array(ends) if ends is not None else None, \
                       array(vals, dtype='float64') if vals is not None else None, \
                       array(strands) if strands is not None else None, \
                       array(ids) if ids is not None else None, \
                       array(edges) if edges is not None else None, \
                       array(weights) if weights is not None else None, \
                       'crop', allowOverlaps, \
                       extraLists=OrderedDict([(key, array(extra)) for key, extra in extras.iteritems()]) if extras is not None else OrderedDict())
        if sliceFull:
            tv.sliceElementsAccordingToGenomeAnchor()
        return tv
Exemple #12
0
 def _appendBoundingRegionTuple(self):
     boundingRegion = GenomeRegion(genome=self._genome, chr=self._chr, start=self._start, \
                                   end=self._getEnd(self._getFixedStepCurElStart()))
     elCount = self._curElCountInBoundingRegion + (1 if self._isStepFunction
                                                   else 0)
     self._boundingRegionTuples.append(
         BoundingRegionTuple(boundingRegion, elCount))
Exemple #13
0
 def testPreProcessFasta(self):
     self._preProcess(['FastaGenomeElementSource'], \
     noOverlapsFileCount=2, \
     withOverlapsFileCount=None, \
     noOverlapsChrElCount={'chr21':9804, 'chrM':0}, \
     withOverlapsChrElCount=None, \
     customBins={'chr21':GenomeRegion(self.GENOME, 'chr21', 0, 9804)})
 def _appendBoundingRegionTuple(self):
     #if self._genomeElement.chr is not None:
     #    brRegion = GenomeRegion(self._genome, self._genomeElement.chr, 0, self._elCount)
     if self._chr is not None:
         brRegion = GenomeRegion(self._genome, self._chr, 0, self._elCount)
         self._boundingRegionTuples.append(
             BoundingRegionTuple(brRegion, self._elCount))
Exemple #15
0
def _getIter(elList,
             valDataType,
             valDim,
             edgeWeightDataType,
             edgeWeightDim,
             brList=[]):
    geIter = MyGeIter(valDataType, valDim, edgeWeightDataType, edgeWeightDim)

    for i in xrange(len(elList)):
        ge = GenomeElement(genome=elList[i][0],
                           chr=elList[i][1],
                           start=elList[i][2],
                           end=elList[i][3])
        if len(elList[i]) == 5:
            for prefix in elList[i][4]:
                setattr(ge, prefix, elList[i][4][prefix])
        geIter.iter.append(ge)

    for i in xrange(len(brList)):
        br = GenomeRegion(genome=brList[i][0],
                          chr=brList[i][1],
                          start=brList[i][2],
                          end=brList[i][3])
        geIter.boundingRegionTuples.append(
            BoundingRegionTuple(br, brList[i][4]))

    return geIter
 def _testGetBoundingInfoOutsideCommon(self, sparse):
     self._setUpShelve()
     self._commonStoreBoundingRegions(sparse=sparse)
     self.assertRaises(OutsideBoundingRegionError, \
                       self._brShelve.getBoundingRegionInfo, \
                       GenomeRegion('TestGenome', 'chr21', 50000, 1052000))
     self.assertRaises(OutsideBoundingRegionError, \
                       self._brShelve.getBoundingRegionInfo, \
                       GenomeRegion('TestGenome', 'chr21', 1000000, 1052000))
     self.assertRaises(OutsideBoundingRegionError, \
                       self._brShelve.getBoundingRegionInfo, \
                       GenomeRegion('TestGenome', 'chrM', 1500, 3000))
     self.assertEquals(
         BoundingRegionInfo(100000, 110000, 0, 0, 0, 0),
         self._brShelve.getBoundingRegionInfo(
             GenomeRegion('TestGenome', 'chr2', 100000, 110000)))
 def getAllIntersectingRegions(cls, genome, chr, regList1, regList2):
     regTuples1 = [(reg.start, reg.end) for reg in regList1]
     regTuples2 = [(reg.start, reg.end) for reg in regList2]
     
     if len(regTuples1) == 0 or len(regTuples2) == 0:
         return []
     
     starts1, ends1 = zip(*regTuples1)
     starts2, ends2 = zip(*regTuples2)
     starts = starts1 + starts2
     ends = ends1 + ends2
     
     borderArray = numpy.array(ends + starts)
     intersectionArray = numpy.array([-1 for e in ends] + [1 for s in starts])
     
     del regTuples1, regTuples2, starts1, starts2, starts, ends1, ends2, ends
     # Use merge sort, as it is stable
     sortedIndex = borderArray.argsort(kind='merge')
     borderArray = borderArray[sortedIndex]
     intersectionArray = intersectionArray[sortedIndex]
     
     intersectStartIndex = numpy.add.accumulate(intersectionArray) == 2
     intersectStarts = borderArray[intersectStartIndex]
     intersectEnds = borderArray[1:][intersectStartIndex[:-1]]
     
     assert len(intersectStarts) == len(intersectEnds)
     return [GenomeRegion(genome, chr, start, end) \
             for start, end in zip(intersectStarts, intersectEnds)]
    def testStdGetBoundingInfoDense(self):
        self._setUpShelve()
        self._commonStoreBoundingRegions(sparse=False)

        self.assertEquals(
            BoundingRegionInfo(0, 1000000, 0, 1000000, 0, 0),
            self._brShelve.getBoundingRegionInfo(
                GenomeRegion('TestGenome', 'chr21', 50000, 52000)))
        self.assertEquals(
            BoundingRegionInfo(2000000, 2500000, 1000000, 1500000, 0, 0),
            self._brShelve.getBoundingRegionInfo(
                GenomeRegion('TestGenome', 'chr21', 2050000, 2052000)))
        self.assertEquals(
            BoundingRegionInfo(1000, 2000, 1500000, 1501000, 0, 0),
            self._brShelve.getBoundingRegionInfo(
                GenomeRegion('TestGenome', 'chrM', 1000, 2000)))
    def __iter__(self):
        brShelve1 = self._getBoundingRegionShelve(self._trackName1)
        brShelve2 = self._getBoundingRegionShelve(self._trackName2)
        
        allBrsAreWholeChrs1 = self._commonAllBoundingRegionsAreWholeChr(brShelve1) \
            if brShelve1 is not None else False
        allBrsAreWholeChrs2 = self._commonAllBoundingRegionsAreWholeChr(brShelve2) \
            if brShelve2 is not None else False

        for chr in GenomeInfo.getExtendedChrList(self.genome):
            if brShelve1 is None:
                yield GenomeRegion(self.genome, chr, 0, GenomeInfo.getChrLen(self.genome, chr))
            else:
                brList1 = brShelve1.getAllBoundingRegionsForChr(chr)

                if brShelve2 is None or \
                    (allBrsAreWholeChrs2 and not allBrsAreWholeChrs1):
                    for reg in brList1:
                        yield reg
                else:
                    brList2 = brShelve2.getAllBoundingRegionsForChr(chr)
                    if allBrsAreWholeChrs1 and not allBrsAreWholeChrs2:
                        for reg in brList2:
                            yield reg
                    else:
                        for reg in self.getAllIntersectingRegions(self.genome, chr, brList1, brList2):
                            yield reg
 def _assertTrackViewLoading_Segments(self, trackData, indexList, start,
                                      end):
     trackView = self.trackViewLoader.loadTrackView(
         trackData, GenomeRegion(genome='TestGenome', start=start, end=end),
         'crop', False)
     i = -1
     for i, el in enumerate(trackView):
         if i < len(indexList):
             index = indexList[i]
         else:
             self.fail()
         self.assertEqual(max(0, trackData['start'][index] - start),
                          el.start())
         self.assertEqual(
             min(end, trackData['end'][index]) - start, el.end())
         self.assertAlmostEqual(trackData['val'][index], el.val())
         self.assertEqual(trackData['strand'][index], el.strand())
         self.assertEqual(trackData['id'][index], el.id())
         self.assertListsOrDicts(trackData['edges'][index], el.edges())
         self.assertListsOrDicts(trackData['weights'][index], el.weights())
         self.assertEqual(trackData['a'][index], el.a())
         self.assertEqual(trackData['b'][index], el.b())
         self.assertRaises(AttributeError, lambda: el.leftIndex)
         self.assertRaises(AttributeError, lambda: el.rightIndex)
     self.assertEqual(len(indexList), i + 1)
Exemple #21
0
    def splitUserBin(region):
        'Splits a region into several compBins, based on borders as defined by getCompBinSize'
        #assert( len(region) > 0 )
        start = (int(region.start) / CompBinManager.getCompBinSize()
                 ) * CompBinManager.getCompBinSize(
                 )  #round off to nearest whole compBin border
        compBins = []

        while start < region.end:
            part = GenomeRegion(region.genome, region.chr)
            end = start + CompBinManager.getCompBinSize()
            part.start = max(start, region.start)
            part.end = min(end, region.end)
            compBins.append(part)
            start += CompBinManager.getCompBinSize()

        return compBins
 def testNoBoundingRegions(self):
     for sparse in [False, True]:
         self._setUpShelve()
         self._brShelve.storeBoundingRegions([], [], sparse)
         self.assertEquals(
             BoundingRegionInfo(50000, 52000, 0, 0, 0, 0),
             self._brShelve.getBoundingRegionInfo(
                 GenomeRegion('TestGenome', 'chr21', 50000, 52000)))
Exemple #23
0
 def __new__(cls, genome):
     from gtrackcore.track.core.GenomeRegion import GenomeRegion
     from gtrackcore.metadata.GenomeInfo import GenomeInfo
     chrList = GenomeInfo.getChrList(genome)
     if len(chrList) > 0:
         return [
             GenomeRegion(genome,
                          GenomeInfo.getChrList(genome)[0], 0, 1)
         ]
Exemple #24
0
 def assertChrElCounts(self, trackName, chrElCountDict, allowOverlaps,
                       customBins):
     for chr in chrElCountDict.keys():
         if chr in customBins:
             region = customBins[chr]
         else:
             region = GenomeRegion(self.GENOME, chr, 0,
                                   GenomeInfo.getChrLen(self.GENOME, chr))
         tv = self._getTrackView(trackName, region, allowOverlaps)
         self.assertEquals(chrElCountDict[chr], len([x for x in tv]))
    def testBoundingRegionsNotPositive(self):
        self._setUpShelve()

        brTuples = [
            BoundingRegionTuple(GenomeRegion('TestGenome', 'chr21', 0, 0), 1)
        ]

        self.assertRaises(InvalidFormatError,
                          self._brShelve.storeBoundingRegions,
                          brTuples, ['chr21'],
                          sparse=True)
    def getAllBoundingRegionsForChr(self, chr):
        self._updateContentsIfNecessary(chr)

        if chr in self._contents:
            #Temporary
            brInfoHolder = self._contents[chr]
            if isinstance(brInfoHolder, dict):
                brInfosForChr = brInfoHolder.values()
            else:
                brInfosForChr = brInfoHolder.brInfos
            for brInfo in brInfosForChr:
                yield GenomeRegion(self._genome, chr, brInfo.start, brInfo.end)
Exemple #27
0
 def __cmp__(self, other):
     if other is None:
         return -1
     else:
         #print self.toStr()
         #print other.toStr()
         #print [cmp(getattr(self, attr), getattr(other, attr)) for attr in ['genome','chr','start','end','val','strand','id','edges','weights','extra']]
         try:
             return cmp([self.genome, self.chr, self.start, self.end, self.val, self.strand, self.id, self.edges, self.weights, self.extra] , \
                 [other.genome, other.chr, other.start, other.end, other.val, other.strand, other.id, other.edges, other.weights, other.extra])
         except:
             if isinstance(other, GenomeRegion):
                 return GenomeRegion.__cmp__(self, other)
Exemple #28
0
 def __cmp__(self, other):
     if other is None:
         return -1
     else:
         #print self.toStr()
         #print other.toStr()
         #print [cmp(getattr(self, attr), getattr(other, attr)) for attr in ['genome','chr','start','end','val','strand','id','edges','weights','extra']]
         try:
             return cmp([self.genome, self.chr, self.start, self.end, self.val, self.strand, self.id, self.edges, self.weights, self.extra] , \
                 [other.genome, other.chr, other.start, other.end, other.val, other.strand, other.id, other.edges, other.weights, other.extra])
         except:
             if isinstance(other, GenomeRegion):
                 return GenomeRegion.__cmp__(self, other)
Exemple #29
0
    def _assertLists(self, tv, starts, ends, vals, strands, ids, edges,
                     weights, extras, region):
        if extras is None:
            extras = OrderedDict()

        for attr in [starts, ends, vals, strands, ids, weights
                     ] + extras.values():
            if attr != None:
                #for el in tv:
                #    print el.start(), '-', el.end(), ',' ,
                #print
                self.assertEqual(len(attr), sum(1 for x in tv))

        self.assertEqual(GenomeRegion(genome=self.genome, chr=self.chr, start=region[0], end=region[1]),\
                         tv.genomeAnchor)

        for i, el in enumerate(tv):
            #print el.start(), el.end(), el.val(), el.strand(), el.id(), el.edges(), el.weights()
            #for key in el.getAllExtraKeysInOrder():
            #    print getattr(el, key)()

            self.assertEqual(starts[i] if starts != None else None, el.start())
            self.assertEqual(ends[i] if ends != None else None, el.end())
            if vals is None:
                self.assertEqual(None, el.val())
            else:
                self.assertAlmostEqual(vals[i], el.val())
            self.assertEqual(strands[i] if strands != None else None,
                             el.strand())
            self.assertEqual(ids[i] if ids != None else None, el.id())
            self.assertListsOrDicts(edges[i] if edges != None else None,
                                    el.edges())
            self.assertListsOrDicts(weights[i] if weights != None else None,
                                    el.weights())
            for key in extras:
                self.assertEqual(extras[key][i] if extras != None else None,
                                 getattr(el, key)())

        self._smartAssertListWithNone(starts, tv.startsAsNumpyArray())
        self._smartAssertListWithNone(ends, tv.endsAsNumpyArray())
        self._smartAssertListWithNone(vals, tv.valsAsNumpyArray())
        self._smartAssertListWithNone(strands, tv.strandsAsNumpyArray())
        self._smartAssertListWithNone(ids, tv.idsAsNumpyArray())
        self._smartAssertListWithNone(edges, tv.edgesAsNumpyArray())
        self._smartAssertListWithNone(weights, tv.weightsAsNumpyArray())
        for key in extras:
            self._smartAssertListWithNone(extras[key],
                                          tv.extrasAsNumpyArray(key))
 def _assertTrackViewLoading_Numbers(self, trackData, start, end):
     trackView = self.trackViewLoader.loadTrackView(
         trackData, GenomeRegion(genome='TestGenome', start=start, end=end),
         'crop', False)
     self.assertListsOrDicts(trackData['val'][start:end],
                             [el.val() for el in trackView])
     self.assertListsOrDicts(trackData['strand'][start:end],
                             [el.strand() for el in trackView])
     self.assertListsOrDicts(trackData['id'][start:end],
                             [el.id() for el in trackView])
     self.assertListsOrDicts(trackData['edges'][start:end],
                             [el.edges() for el in trackView])
     self.assertListsOrDicts(trackData['weights'][start:end],
                             [el.weights() for el in trackView])
     self.assertListsOrDicts(trackData['a'][start:end],
                             [el.a() for el in trackView])
     self.assertListsOrDicts(trackData['b'][start:end],
                             [el.b() for el in trackView])
Exemple #31
0
    def getBoundingRegionTuples(self):
        boundingRegionTuples = [x for x in self._getBoundingRegionTuples() \
                                if x.region.chr is not None]

        if len(boundingRegionTuples) == 0:
            from gtrackcore.input.core.GenomeElementSource import BoundingRegionTuple
            from gtrackcore.track.core.GenomeRegion import GenomeRegion
            from gtrackcore.metadata.GenomeInfo import GenomeInfo

            geChrList = self.getAllChrs()
            boundingRegionTuples = [BoundingRegionTuple( \
                                     GenomeRegion(chr=chr, start=0, end=GenomeInfo.getChrLen(self._geSource.genome, chr)), \
                                     self.getNumElementsForChr(chr) ) \
                                    for chr in geChrList]
            self._boundingRegionsAndGEsCorrespond = False
        else:
            self._boundingRegionsAndGEsCorrespond = True

        return boundingRegionTuples
Exemple #32
0
    def __init__(self,
                 vals=True,
                 strands=True,
                 anchor=None,
                 valDType='float64'):
        assert (vals != True or anchor != None)

        if anchor == None:
            numElements = len(vals)
            anchor = [10, 10 + numElements]
        else:
            numElements = anchor[1] - anchor[0]

        vals = self._createList(vals, getRandValList(numElements), valDType)
        strands = self._createList(strands, getRandStrandList(numElements),
                                   'bool8')

        #print (vals, strands, anchor)
        TrackView.__init__(
            self, GenomeRegion('TestGenome', 'chr21', anchor[0], anchor[1]),
            None, None, vals, strands, None, None, None, 'crop', False)