def _getRawTrackView(self, region, borderHandling, allowOverlaps): assert len(region) == 1 from collections import OrderedDict from gtrackcore.track.memmap.CommonMemmapFunctions import findEmptyVal from gtrackcore.track.core.TrackView import TrackView import numpy as np geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN(self.trackName, region.genome) prefixList = geSource.getPrefixList() valDataType = geSource.getValDataType() valDim = geSource.getValDim() weightDataType = geSource.getEdgeWeightDataType() weightDim = geSource.getEdgeWeightDim() startList, endList, valList, strandList, idList, edgesList, weightsList = [None]*7 extraLists=OrderedDict() tf = TrackFormat.createInstanceFromPrefixList(prefixList, valDataType, valDim, \ weightDataType, weightDim) if allowOverlaps and (tf.isDense() or geSource.hasNoOverlappingElements()): raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\ + str(tf) + ' does not satisfy ' + str(self._trackFormatReq)) denseAndInterval = tf.isDense() and tf.isInterval() numEls = 2 if denseAndInterval else 1 if valDataType == 'S': valDataType = 'S2' if weightDataType == 'S': weightDataType = 'S2' for prefix in prefixList: if prefix == 'start': startList = np.array([-1], dtype='int32') elif prefix == 'end': if denseAndInterval: endList = np.array([0, 1], dtype='int32') else: endList = np.array([0], dtype='int32') elif prefix == 'val': valList = np.array([findEmptyVal(valDataType)] * valDim * numEls, \ dtype=valDataType).reshape((numEls, valDim) if valDim > 1 else numEls) elif prefix == 'strand': strandList = np.array([1] * numEls, dtype='int8') elif prefix == 'id': idList = np.array([''] * numEls, dtype='S1') elif prefix == 'edges': edgesList = np.array([['']] * numEls, dtype='S1') elif prefix == 'weights': weightsList = np.array([[[findEmptyVal(weightDataType)]]] * weightDim * numEls, \ dtype=weightDataType).reshape((numEls, 1, weightDim) if weightDim > 1 else (numEls, 1)) else: extraLists[prefix] = np.array([''] * numEls, dtype='S1') return TrackView(region, startList, endList, valList, strandList, idList, edgesList, weightsList, borderHandling, allowOverlaps, extraLists)
def _getRawTrackView(self, region, borderHandling, allowOverlaps): assert len(region) == 1 from collections import OrderedDict from gtrackcore.track.memmap.CommonMemmapFunctions import findEmptyVal from gtrackcore.track.core.TrackView import TrackView import numpy as np geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN( self.trackName, region.genome) prefixList = geSource.getPrefixList() valDataType = geSource.getValDataType() valDim = geSource.getValDim() weightDataType = geSource.getEdgeWeightDataType() weightDim = geSource.getEdgeWeightDim() startList, endList, valList, strandList, idList, edgesList, weightsList = [ None ] * 7 extraLists = OrderedDict() tf = TrackFormat.createInstanceFromPrefixList(prefixList, valDataType, valDim, \ weightDataType, weightDim) if allowOverlaps and (tf.isDense() or geSource.hasNoOverlappingElements()): raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\ + str(tf) + ' does not satisfy ' + str(self._trackFormatReq)) denseAndInterval = tf.isDense() and tf.isInterval() numEls = 2 if denseAndInterval else 1 if valDataType == 'S': valDataType = 'S2' if weightDataType == 'S': weightDataType = 'S2' for prefix in prefixList: if prefix == 'start': startList = np.array([-1], dtype='int32') elif prefix == 'end': if denseAndInterval: endList = np.array([0, 1], dtype='int32') else: endList = np.array([0], dtype='int32') elif prefix == 'val': valList = np.array([findEmptyVal(valDataType)] * valDim * numEls, \ dtype=valDataType).reshape((numEls, valDim) if valDim > 1 else numEls) elif prefix == 'strand': strandList = np.array([1] * numEls, dtype='int8') elif prefix == 'id': idList = np.array([''] * numEls, dtype='S1') elif prefix == 'edges': edgesList = np.array([['']] * numEls, dtype='S1') elif prefix == 'weights': weightsList = np.array([[[findEmptyVal(weightDataType)]]] * weightDim * numEls, \ dtype=weightDataType).reshape((numEls, 1, weightDim) if weightDim > 1 else (numEls, 1)) else: extraLists[prefix] = np.array([''] * numEls, dtype='S1') return TrackView(region, startList, endList, valList, strandList, idList, edgesList, weightsList, borderHandling, allowOverlaps, extraLists)
def _commonReshapeArray(a, delta, appendFunc): return appendFunc(a, delta, findEmptyVal(str(a.dtype)))
def __init__(self, path, prefix, size, valDataType='float64', valDim=1, weightDataType='float64', weightDim=1, maxNumEdges=0, maxStrLens={}, allowAppend=True): assert valDim >= 1 and weightDim >= 1 if valDataType == 'S': valDataType = 'S' + str(max(2, maxStrLens['val'])) if weightDataType == 'S': weightDataType = 'S' + str(max(2, maxStrLens['weights'])) self._setup(prefix, 'start', getStart, writeNoSlice, None, 'int32', 1, False) self._setup(prefix, 'end', getEnd, writeNoSlice, None, 'int32', 1, False) self._setup(prefix, 'strand', getStrand, writeNoSlice, None, 'int8', 1, False) self._setup(prefix, 'val', getVal, writeNoSlice, None, valDataType, valDim, True) self._setup(prefix, 'id', getId, writeNoSlice, None, 'S' + str(maxStrLens.get('id')), 1, False) self._setup(prefix, 'edges', getEdges, writeSliceFromFront, maxNumEdges, 'S' + str(maxStrLens.get('edges')), 1, False) self._setup(prefix, 'weights', getWeights, writeSliceFromFront, maxNumEdges, weightDataType, weightDim, True) self._setup(prefix, 'leftIndex', getNone, writeNoSlice, None, 'int32', 1, False) self._setup(prefix, 'rightIndex', getNone, writeNoSlice, None, 'int32', 1, False) if not hasattr(self, '_parseFunc'): self._geParseClass = GetExtra(prefix) self._setup(prefix, prefix, self._geParseClass.parse, writeNoSlice, None, 'S' + str(maxStrLens.get(prefix)), 1, False) # If there is one number in the path, it is the data type dimension. # Only one value is allowed per element, no extra dimensions are added # to the array and the element dimension is None. # # Example: val.4.float64 contains, per element, a vector of 4 numbers. # The shape is (n,4) for n elements. # # If there are two numbers in the path, the first is the maximal element # dimension and the second is the data type dimension. # # Example: weights.3.4.float64 contains, per element, at most 3 vectors # of 4 numbers each. The shape is (n,3,4) for n elements. self._fn = createMemmapFileFn(path, prefix, self._elementDim, self._dataTypeDim, self._dataType) self._index = 0 shape = [size] + \ ([max(1, self._elementDim)] if self._elementDim is not None else []) + \ ([self._dataTypeDim] if self._dataTypeDim > 1 else []) append = os.path.exists(self._fn) if append: if not allowAppend: raise InvalidFormatError('Error: different genome element sources (e.g. different input files) tries to write to index file for the same chromosome (%s). This is probably caused by different files in the same folder containing elements from the same chromosome.' % self._fn) try: f = np.memmap( self._fn, dtype=self._dataType, mode='r+' ) self._index = len(f) / product(shape[1:]) del f existingShape = calcShapeFromMemmapFileFn(self._fn) self._contents = np.array( np.memmap(self._fn, dtype=self._dataType, mode='r+', shape=tuple(existingShape)) ) self._contents = np.r_[self._contents, np.zeros( dtype=self._dataType, shape=tuple(shape) )] except Exception: print 'Error when opening file: ', self._fn raise else: self._contents = np.zeros( dtype=self._dataType, shape=tuple(shape) ) if not append and self._setEmptyVal: self._contents[:] = findEmptyVal(self._dataType)