def merge(genome, trackName, allowOverlaps): path = createDirPath(trackName, genome, allowOverlaps=allowOverlaps) chrList = TrackInfoDataCollector(genome, trackName).getPreProcessedChrs(allowOverlaps) existingChrList = [chr for chr in ChrMemmapFolderMerger._existingChrIter(path, chrList)] if len(existingChrList) == 0: raise EmptyGESourceError('No data lines has been read from source file (probably because it is empty).') firstChrTrackData = TrackSource().getTrackData(trackName, genome, existingChrList[0], allowOverlaps, forceChrFolders=True) arrayList = firstChrTrackData.keys() for arrayName in arrayList: mergedArray = firstChrTrackData[arrayName][:] elementDim, dtypeDim = parseMemmapFileFn(firstChrTrackData[arrayName].filename)[1:3] del firstChrTrackData[arrayName] for chr in existingChrList[1:]: chrTrackData = TrackSource().getTrackData(trackName, genome, chr, allowOverlaps, forceChrFolders=True) mergedArray = ChrMemmapFolderMerger.mergeArrays(mergedArray, np.array(chrTrackData[arrayName][:])) elementDimNew, dtypeDimNew = parseMemmapFileFn(chrTrackData[arrayName].filename)[1:3] elementDim = max(elementDim, elementDimNew) dtypeDim = max(dtypeDim, dtypeDimNew) del chrTrackData[arrayName] mergedFn = createMemmapFileFn(path, arrayName, elementDim, dtypeDim, str(mergedArray.dtype)) f = np.memmap(mergedFn, dtype=mergedArray.dtype, mode='w+', shape=mergedArray.shape) f[:] = mergedArray f.flush() del f del mergedArray
def merge(genome, trackName, allowOverlaps): path = createDirPath(trackName, genome, allowOverlaps=allowOverlaps) collector = PreProcMetaDataCollector(genome, trackName) chrList = collector.getPreProcessedChrs(allowOverlaps) if not collector.getTrackFormat().reprIsDense(): chrList = sorted(chrList) existingChrList = [ chr for chr in ChrMemmapFolderMerger._existingChrIter(path, chrList) ] if len(existingChrList) == 0: raise EmptyGESourceError( 'No data lines has been read from source file (probably because it is empty).' ) firstChrTrackData = TrackSource().getTrackData(trackName, genome, existingChrList[0], allowOverlaps, forceChrFolders=True) arrayList = firstChrTrackData.keys() for arrayName in arrayList: mergedArray = firstChrTrackData[arrayName][:] elementDim, dtypeDim = parseMemmapFileFn( firstChrTrackData[arrayName].filename)[1:3] del firstChrTrackData[arrayName] for chr in existingChrList[1:]: chrTrackData = TrackSource().getTrackData(trackName, genome, chr, allowOverlaps, forceChrFolders=True) mergedArray = ChrMemmapFolderMerger.mergeArrays( mergedArray, np.array(chrTrackData[arrayName][:])) elementDimNew, dtypeDimNew = parseMemmapFileFn( chrTrackData[arrayName].filename)[1:3] elementDim = max(elementDim, elementDimNew) dtypeDim = max(dtypeDim, dtypeDimNew) del chrTrackData[arrayName] mergedFn = createMemmapFileFn(path, arrayName, elementDim, dtypeDim, str(mergedArray.dtype)) f = np.memmap(mergedFn, dtype=mergedArray.dtype, mode='w+', shape=mergedArray.shape) f[:] = mergedArray f.flush() del f del mergedArray
def testCreateIntensityTrack(self): regions = [GenomeRegion(self._genome, self._chr, 1000, 5000),\ GenomeRegion(self._genome, self._chr, 6000, 7000),\ GenomeRegion(self._genome, self._chr, 10000, 16000)] job = AnalysisDefJob('[dataStat=SimpleBpIntensityStat] [outTrackName=' + '^'.join(self._trackName) + '] [numDiscreteVals=10] -> CreateFunctionTrackStat', \ ['nums'], ['points'], regions, genome=self._genome) for x in range(2): job.run() brShelve = BoundingRegionShelve(self._genome, self._trackName, allowOverlaps=False) self.assertRaises(OutsideBoundingRegionError, \ brShelve.getBoundingRegionInfo, GenomeRegion(self._genome, self._chr, 0, 1)) #self.assertEquals(BoundingRegionInfo(0, 1, 0, 0, 0, 0), # brShelve.getBoundingRegionInfo(GenomeRegion(self._genome, self._chr, 0, 1))) self.assertEquals( BoundingRegionInfo(1000, 5000, 0, 4000, 0, 0), brShelve.getBoundingRegionInfo( GenomeRegion(self._genome, self._chr, 2000, 2001))) self.assertRaises(OutsideBoundingRegionError, \ brShelve.getBoundingRegionInfo, GenomeRegion(self._genome, self._chr, 5500, 5501)) #self.assertEquals(BoundingRegionInfo(5500, 5501, 0, 0, 0, 0), # brShelve.getBoundingRegionInfo(GenomeRegion(self._genome, self._chr, 5500, 5501))) self.assertEquals( BoundingRegionInfo(6000, 7000, 4000, 5000, 0, 0), brShelve.getBoundingRegionInfo( GenomeRegion(self._genome, self._chr, 6500, 6501))) self.assertRaises(OutsideBoundingRegionError, \ brShelve.getBoundingRegionInfo, GenomeRegion(self._genome, self._chr, 8000, 8001)) #self.assertEquals(BoundingRegionInfo(8000, 8001, 0, 0, 0, 0), # brShelve.getBoundingRegionInfo(GenomeRegion(self._genome, self._chr, 8000, 8001))) self.assertEquals( BoundingRegionInfo(10000, 16000, 5000, 11000, 0, 0), brShelve.getBoundingRegionInfo( GenomeRegion(self._genome, self._chr, 11000, 11001))) self.assertRaises(OutsideBoundingRegionError, \ brShelve.getBoundingRegionInfo, GenomeRegion(self._genome, self._chr, 16500, 16501)) #self.assertEquals(BoundingRegionInfo(16500, 16501, 0, 0, 0, 0), # brShelve.getBoundingRegionInfo(GenomeRegion(self._genome, self._chr, 16500, 16501))) trackData = TrackSource().getTrackData(self._trackName, self._genome, None, False) self.assertListsOrDicts(['val'], trackData.keys()) self.assertListsOrDicts((11000, ), trackData['val'].shape)