def getTrackData(self, trackName, genome, chr, allowOverlaps, forceChrFolders=False): trackData = TrackData() brShelve = BoundingRegionShelve(genome, trackName, allowOverlaps) if not forceChrFolders and brShelve.fileExists(): chr = None dir = createDirPath(trackName, genome, chr, allowOverlaps) for fn in os.listdir(dir): fullFn = dir + os.sep + fn if fn[0] == '.' or os.path.isdir(fullFn): continue if isBoundingRegionFileName(fn): if fullFn not in self._fileDict: self._fileDict[fullFn] = brShelve trackData.boundingRegionShelve = self._fileDict[fullFn] continue prefix, elementDim, dtypeDim, dtype = parseMemmapFileFn(fn) assert prefix not in trackData trackData[prefix] = self._getFile(chr, dir, fullFn, elementDim, dtype, dtypeDim) return trackData
def merge(genome, trackName, allowOverlaps): path = createDirPath(trackName, genome, allowOverlaps=allowOverlaps) collector = PreProcMetaDataCollector(genome, trackName) chrList = collector.getPreProcessedChrs(allowOverlaps) if not collector.getTrackFormat().reprIsDense(): chrList = sorted(chrList) existingChrList = [chr for chr in ChrMemmapFolderMerger._existingChrIter(path, chrList)] if len(existingChrList) == 0: raise EmptyGESourceError('No data lines has been read from source file (probably because it is empty).') firstChrTrackData = TrackSource().getTrackData(trackName, genome, existingChrList[0], allowOverlaps, forceChrFolders=True) arrayList = firstChrTrackData.keys() for arrayName in arrayList: mergedArray = firstChrTrackData[arrayName][:] elementDim, dtypeDim = parseMemmapFileFn(firstChrTrackData[arrayName].filename)[1:3] del firstChrTrackData[arrayName] for chr in existingChrList[1:]: chrTrackData = TrackSource().getTrackData(trackName, genome, chr, allowOverlaps, forceChrFolders=True) mergedArray = ChrMemmapFolderMerger.mergeArrays(mergedArray, np.array(chrTrackData[arrayName][:])) elementDimNew, dtypeDimNew = parseMemmapFileFn(chrTrackData[arrayName].filename)[1:3] elementDim = max(elementDim, elementDimNew) dtypeDim = max(dtypeDim, dtypeDimNew) del chrTrackData[arrayName] mergedFn = createMemmapFileFn(path, arrayName, elementDim, dtypeDim, str(mergedArray.dtype)) f = np.memmap(mergedFn, dtype=mergedArray.dtype, mode='w+', shape=mergedArray.shape) f[:] = mergedArray f.flush() del f del mergedArray