コード例 #1
0
ファイル: Track.py プロジェクト: brynjagr/gtrackcore
    def _getRawTrackView(self, region, borderHandling, allowOverlaps):
        assert len(region) == 1
        
        from collections import OrderedDict
        from gtrackcore.track.memmap.CommonMemmapFunctions import findEmptyVal
        from gtrackcore.track.core.TrackView import TrackView
        import numpy as np
        
        geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN(self.trackName, region.genome)
        prefixList = geSource.getPrefixList()
        valDataType = geSource.getValDataType()
        valDim = geSource.getValDim()
        weightDataType = geSource.getEdgeWeightDataType()
        weightDim = geSource.getEdgeWeightDim()

        startList, endList, valList, strandList, idList, edgesList, weightsList = [None]*7
        extraLists=OrderedDict()
        
        tf = TrackFormat.createInstanceFromPrefixList(prefixList, valDataType, valDim, \
                                                      weightDataType, weightDim)
        if allowOverlaps and (tf.isDense() or geSource.hasNoOverlappingElements()):
            raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\
                                          + str(tf) + ' does not satisfy ' + str(self._trackFormatReq))
        
        denseAndInterval = tf.isDense() and tf.isInterval()
        numEls = 2 if denseAndInterval else 1
        
        if valDataType == 'S':
            valDataType = 'S2'
        if weightDataType == 'S':
            weightDataType = 'S2'
        
        for prefix in prefixList:
            if prefix == 'start':
                startList = np.array([-1], dtype='int32')
            elif prefix == 'end':
                if denseAndInterval:
                    endList = np.array([0, 1], dtype='int32')
                else:
                    endList = np.array([0], dtype='int32')
            elif prefix == 'val':
                valList = np.array([findEmptyVal(valDataType)] * valDim * numEls, \
                                   dtype=valDataType).reshape((numEls, valDim) if valDim > 1 else numEls)
            elif prefix == 'strand':
                strandList = np.array([1] * numEls, dtype='int8')
            elif prefix == 'id':
                idList = np.array([''] * numEls, dtype='S1')
            elif prefix == 'edges':
                edgesList = np.array([['']] * numEls, dtype='S1')
            elif prefix == 'weights':
                weightsList = np.array([[[findEmptyVal(weightDataType)]]] * weightDim * numEls, \
                                       dtype=weightDataType).reshape((numEls, 1, weightDim) if weightDim > 1 else (numEls, 1))
            else:
                extraLists[prefix] = np.array([''] * numEls, dtype='S1')
        
        return TrackView(region, startList, endList, valList, strandList, idList, edgesList, weightsList, borderHandling, allowOverlaps, extraLists)
コード例 #2
0
    def _getRawTrackView(self, region, borderHandling, allowOverlaps):
        assert len(region) == 1

        from collections import OrderedDict
        from gtrackcore.track.memmap.CommonMemmapFunctions import findEmptyVal
        from gtrackcore.track.core.TrackView import TrackView
        import numpy as np

        geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN(
            self.trackName, region.genome)
        prefixList = geSource.getPrefixList()
        valDataType = geSource.getValDataType()
        valDim = geSource.getValDim()
        weightDataType = geSource.getEdgeWeightDataType()
        weightDim = geSource.getEdgeWeightDim()

        startList, endList, valList, strandList, idList, edgesList, weightsList = [
            None
        ] * 7
        extraLists = OrderedDict()

        tf = TrackFormat.createInstanceFromPrefixList(prefixList, valDataType, valDim, \
                                                      weightDataType, weightDim)
        if allowOverlaps and (tf.isDense()
                              or geSource.hasNoOverlappingElements()):
            raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\
                                          + str(tf) + ' does not satisfy ' + str(self._trackFormatReq))

        denseAndInterval = tf.isDense() and tf.isInterval()
        numEls = 2 if denseAndInterval else 1

        if valDataType == 'S':
            valDataType = 'S2'
        if weightDataType == 'S':
            weightDataType = 'S2'

        for prefix in prefixList:
            if prefix == 'start':
                startList = np.array([-1], dtype='int32')
            elif prefix == 'end':
                if denseAndInterval:
                    endList = np.array([0, 1], dtype='int32')
                else:
                    endList = np.array([0], dtype='int32')
            elif prefix == 'val':
                valList = np.array([findEmptyVal(valDataType)] * valDim * numEls, \
                                   dtype=valDataType).reshape((numEls, valDim) if valDim > 1 else numEls)
            elif prefix == 'strand':
                strandList = np.array([1] * numEls, dtype='int8')
            elif prefix == 'id':
                idList = np.array([''] * numEls, dtype='S1')
            elif prefix == 'edges':
                edgesList = np.array([['']] * numEls, dtype='S1')
            elif prefix == 'weights':
                weightsList = np.array([[[findEmptyVal(weightDataType)]]] * weightDim * numEls, \
                                       dtype=weightDataType).reshape((numEls, 1, weightDim) if weightDim > 1 else (numEls, 1))
            else:
                extraLists[prefix] = np.array([''] * numEls, dtype='S1')

        return TrackView(region, startList, endList, valList, strandList,
                         idList, edgesList, weightsList, borderHandling,
                         allowOverlaps, extraLists)
コード例 #3
0
 def _commonReshapeArray(a, delta, appendFunc):
     return appendFunc(a, delta, findEmptyVal(str(a.dtype)))
コード例 #4
0
 def _commonReshapeArray(a, delta, appendFunc):
     return appendFunc(a, delta, findEmptyVal(str(a.dtype)))
コード例 #5
0
    def __init__(self, path, prefix, size, valDataType='float64', valDim=1, weightDataType='float64', weightDim=1, maxNumEdges=0, maxStrLens={}, allowAppend=True):
        assert valDim >= 1 and weightDim >= 1

        if valDataType == 'S':
            valDataType = 'S' + str(max(2, maxStrLens['val']))
        if weightDataType == 'S':
            weightDataType = 'S' + str(max(2, maxStrLens['weights']))
            
        self._setup(prefix, 'start', getStart, writeNoSlice, None, 'int32', 1, False)
        self._setup(prefix, 'end', getEnd, writeNoSlice, None, 'int32', 1, False)
        self._setup(prefix, 'strand', getStrand, writeNoSlice, None, 'int8', 1, False)
        self._setup(prefix, 'val', getVal, writeNoSlice, None, valDataType, valDim, True)
        self._setup(prefix, 'id', getId, writeNoSlice, None, 'S' + str(maxStrLens.get('id')), 1, False)
        self._setup(prefix, 'edges', getEdges, writeSliceFromFront, maxNumEdges, 'S' + str(maxStrLens.get('edges')), 1, False)
        self._setup(prefix, 'weights', getWeights, writeSliceFromFront, maxNumEdges, weightDataType, weightDim, True)
        self._setup(prefix, 'leftIndex', getNone, writeNoSlice, None, 'int32', 1, False)
        self._setup(prefix, 'rightIndex', getNone, writeNoSlice, None, 'int32', 1, False)
        
        if not hasattr(self, '_parseFunc'):
            self._geParseClass = GetExtra(prefix)
            self._setup(prefix, prefix, self._geParseClass.parse, writeNoSlice, None, 'S' + str(maxStrLens.get(prefix)), 1, False)
        
        # If there is one number in the path, it is the data type dimension.
        # Only one value is allowed per element, no extra dimensions are added
        # to the array and the element dimension is None.
        #
        # Example: val.4.float64 contains, per element, a vector of 4 numbers.
        #          The shape is (n,4) for n elements.
        #
        # If there are two numbers in the path, the first is the maximal element
        # dimension and the second is the data type dimension.
        #
        # Example: weights.3.4.float64 contains, per element, at most 3 vectors
        #          of 4 numbers each. The shape is (n,3,4) for n elements.
        
        self._fn = createMemmapFileFn(path, prefix, self._elementDim, self._dataTypeDim, self._dataType)
        self._index = 0
        
        shape = [size] + \
                 ([max(1, self._elementDim)] if self._elementDim is not None else []) + \
                 ([self._dataTypeDim] if self._dataTypeDim > 1 else [])
        
        append = os.path.exists(self._fn)
        if append:
            if not allowAppend:
                raise InvalidFormatError('Error: different genome element sources (e.g. different input files) tries to write to index file for the same chromosome (%s). This is probably caused by different files in the same folder containing elements from the same chromosome.' % self._fn)
            
            try:
                f = np.memmap( self._fn, dtype=self._dataType, mode='r+' )
                self._index = len(f) / product(shape[1:])
                del f

                existingShape = calcShapeFromMemmapFileFn(self._fn)
                self._contents = np.array( np.memmap(self._fn, dtype=self._dataType, mode='r+', shape=tuple(existingShape)) )
                self._contents = np.r_[self._contents, np.zeros( dtype=self._dataType, shape=tuple(shape) )]
            except Exception:
                print 'Error when opening file: ', self._fn
                raise
        else:
            self._contents = np.zeros( dtype=self._dataType, shape=tuple(shape) )
            
        if not append and self._setEmptyVal:
            self._contents[:] = findEmptyVal(self._dataType)
コード例 #6
0
ファイル: OutputFile.py プロジェクト: brynjagr/gtrackcore
    def __init__(self, path, prefix, size, valDataType='float64', valDim=1, weightDataType='float64', weightDim=1, maxNumEdges=0, maxStrLens={}, allowAppend=True):
        assert valDim >= 1 and weightDim >= 1
        
        if valDataType == 'S':
            valDataType = 'S' + str(max(2, maxStrLens['val']))
        if weightDataType == 'S':
            weightDataType = 'S' + str(max(2, maxStrLens['weights']))
            
        self._setup(prefix, 'start', getStart, writeNoSlice, None, 'int32', 1, False)
        self._setup(prefix, 'end', getEnd, writeNoSlice, None, 'int32', 1, False)
        self._setup(prefix, 'strand', getStrand, writeNoSlice, None, 'int8', 1, False)
        self._setup(prefix, 'val', getVal, writeNoSlice, None, valDataType, valDim, True)
        self._setup(prefix, 'id', getId, writeNoSlice, None, 'S' + str(maxStrLens.get('id')), 1, False)
        self._setup(prefix, 'edges', getEdges, writeSliceFromFront, maxNumEdges, 'S' + str(maxStrLens.get('edges')), 1, False)
        self._setup(prefix, 'weights', getWeights, writeSliceFromFront, maxNumEdges, weightDataType, weightDim, True)
        self._setup(prefix, 'leftIndex', getNone, writeNoSlice, None, 'int32', 1, False)
        self._setup(prefix, 'rightIndex', getNone, writeNoSlice, None, 'int32', 1, False)
        
        if not hasattr(self, '_parseFunc'):
            self._geParseClass = GetExtra(prefix)
            self._setup(prefix, prefix, self._geParseClass.parse, writeNoSlice, None, 'S' + str(maxStrLens.get(prefix)), 1, False)
        
        # If there is one number in the path, it is the data type dimension.
        # Only one value is allowed per element, no extra dimensions are added
        # to the array and the element dimension is None.
        #
        # Example: val.4.float64 contains, per element, a vector of 4 numbers.
        #          The shape is (n,4) for n elements.
        #
        # If there are two numbers in the path, the first is the maximal element
        # dimension and the second is the data type dimension.
        #
        # Example: weights.3.4.float64 contains, per element, at most 3 vectors
        #          of 4 numbers each. The shape is (n,3,4) for n elements.
        
        self._fn = createMemmapFileFn(path, prefix, self._elementDim, self._dataTypeDim, self._dataType)
        self._index = 0
        
        shape = [size] + \
                 ([max(1, self._elementDim)] if self._elementDim is not None else []) + \
                 ([self._dataTypeDim] if self._dataTypeDim > 1 else [])
        
        append = os.path.exists(self._fn)
        if append:
            if not allowAppend:
                raise InvalidFormatError('Error: different genome element sources (e.g. different input files) tries to write to index file for the same chromosome (%s). This is probably caused by different files in the same folder containing elements from the same chromosome.' % self._fn)
            
            try:
                f = np.memmap( self._fn, dtype=self._dataType, mode='r+' )
                self._index = len(f) / product(shape[1:])
                del f

                existingShape = calcShapeFromMemmapFileFn(self._fn)
                self._contents = np.array( np.memmap(self._fn, dtype=self._dataType, mode='r+', shape=tuple(existingShape)) )
                self._contents = np.r_[self._contents, np.zeros( dtype=self._dataType, shape=tuple(shape) )]
            except Exception:
                print 'Error when opening file: ', self._fn
                raise
        else:
            self._contents = np.zeros( dtype=self._dataType, shape=tuple(shape) )
            
        if not append and self._setEmptyVal:
            self._contents[:] = findEmptyVal(self._dataType)