Beispiel #1
0
    def merge(genome, trackName, allowOverlaps):
        path = createDirPath(trackName, genome, allowOverlaps=allowOverlaps)

        collector = PreProcMetaDataCollector(genome, trackName)
        chrList = collector.getPreProcessedChrs(allowOverlaps)
        if not collector.getTrackFormat().reprIsDense():
            chrList = sorted(chrList)

        existingChrList = [
            chr
            for chr in ChrMemmapFolderMerger._existingChrIter(path, chrList)
        ]
        if len(existingChrList) == 0:
            raise EmptyGESourceError(
                'No data lines has been read from source file (probably because it is empty).'
            )

        firstChrTrackData = TrackSource().getTrackData(trackName,
                                                       genome,
                                                       existingChrList[0],
                                                       allowOverlaps,
                                                       forceChrFolders=True)
        arrayList = firstChrTrackData.keys()
        for arrayName in arrayList:
            mergedArray = firstChrTrackData[arrayName][:]
            elementDim, dtypeDim = parseMemmapFileFn(
                firstChrTrackData[arrayName].filename)[1:3]
            del firstChrTrackData[arrayName]

            for chr in existingChrList[1:]:
                chrTrackData = TrackSource().getTrackData(trackName,
                                                          genome,
                                                          chr,
                                                          allowOverlaps,
                                                          forceChrFolders=True)

                mergedArray = ChrMemmapFolderMerger.mergeArrays(
                    mergedArray, np.array(chrTrackData[arrayName][:]))
                elementDimNew, dtypeDimNew = parseMemmapFileFn(
                    chrTrackData[arrayName].filename)[1:3]
                elementDim = max(elementDim, elementDimNew)
                dtypeDim = max(dtypeDim, dtypeDimNew)

                del chrTrackData[arrayName]

            mergedFn = createMemmapFileFn(path, arrayName, elementDim,
                                          dtypeDim, str(mergedArray.dtype))

            f = np.memmap(mergedFn,
                          dtype=mergedArray.dtype,
                          mode='w+',
                          shape=mergedArray.shape)
            f[:] = mergedArray
            f.flush()
            del f
            del mergedArray
Beispiel #2
0
    def getTrackData(self,
                     trackName,
                     genome,
                     chr,
                     allowOverlaps,
                     forceChrFolders=False):
        trackData = TrackData()

        brShelve = BoundingRegionShelve(genome, trackName, allowOverlaps)
        if not forceChrFolders and brShelve.fileExists():
            chr = None

        dir = createDirPath(trackName, genome, chr, allowOverlaps)

        for fn in os.listdir(dir):
            fullFn = dir + os.sep + fn

            if fn[0] == '.' or os.path.isdir(fullFn):
                continue

            if isBoundingRegionFileName(fn):
                if fullFn not in self._fileDict:
                    self._fileDict[fullFn] = brShelve
                trackData.boundingRegionShelve = self._fileDict[fullFn]
                continue

            prefix, elementDim, dtypeDim, dtype = parseMemmapFileFn(fn)

            assert prefix not in trackData
            trackData[prefix] = self._getFile(chr, dir, fullFn, elementDim,
                                              dtype, dtypeDim)

        return trackData
Beispiel #3
0
    def getTrackData(self, trackName, genome, chr, allowOverlaps, forceChrFolders=False):
        trackData = TrackData()
        
        brShelve = BoundingRegionShelve(genome, trackName, allowOverlaps)        
        if not forceChrFolders and brShelve.fileExists():
            chr = None
        
        dir = createDirPath(trackName, genome, chr, allowOverlaps)

        for fn in os.listdir(dir):
            fullFn = dir + os.sep + fn
            
            if fn[0] == '.' or os.path.isdir(fullFn):
                continue
                
            if isBoundingRegionFileName(fn):
                if fullFn not in self._fileDict:
                    self._fileDict[fullFn] = brShelve
                trackData.boundingRegionShelve = self._fileDict[fullFn]
                continue
            
            prefix, elementDim, dtypeDim, dtype = parseMemmapFileFn(fn)
            
            assert prefix not in trackData
            trackData[prefix] = self._getFile(chr, dir, fullFn, elementDim, dtype, dtypeDim)
        
        return trackData
    def merge(genome, trackName, allowOverlaps):
        path = createDirPath(trackName, genome, allowOverlaps=allowOverlaps)

        collector = PreProcMetaDataCollector(genome, trackName)
        chrList = collector.getPreProcessedChrs(allowOverlaps)
        if not collector.getTrackFormat().reprIsDense():
            chrList = sorted(chrList)
        
        existingChrList = [chr for chr in ChrMemmapFolderMerger._existingChrIter(path, chrList)]
        if len(existingChrList) == 0:
            raise EmptyGESourceError('No data lines has been read from source file (probably because it is empty).')
            
        firstChrTrackData = TrackSource().getTrackData(trackName, genome, existingChrList[0], allowOverlaps, forceChrFolders=True)
        arrayList = firstChrTrackData.keys()
        for arrayName in arrayList:
            mergedArray = firstChrTrackData[arrayName][:]
            elementDim, dtypeDim = parseMemmapFileFn(firstChrTrackData[arrayName].filename)[1:3]
            del firstChrTrackData[arrayName]
            
            for chr in existingChrList[1:]:
                chrTrackData = TrackSource().getTrackData(trackName, genome, chr, allowOverlaps, forceChrFolders=True)
            
                mergedArray = ChrMemmapFolderMerger.mergeArrays(mergedArray, np.array(chrTrackData[arrayName][:]))
                elementDimNew, dtypeDimNew = parseMemmapFileFn(chrTrackData[arrayName].filename)[1:3]
                elementDim = max(elementDim, elementDimNew)
                dtypeDim = max(dtypeDim, dtypeDimNew)
                
                del chrTrackData[arrayName]
            
            mergedFn = createMemmapFileFn(path, arrayName, elementDim, dtypeDim, str(mergedArray.dtype))
            
            f = np.memmap(mergedFn, dtype=mergedArray.dtype, mode='w+', shape=mergedArray.shape)
            f[:] = mergedArray
            f.flush()
            del f
            del mergedArray