예제 #1
0
 def checkUndirectedEdges(genome, trackName, allowOverlaps):
     collector = PreProcMetaDataCollector(genome, trackName)
     if not (collector.getTrackFormat().isLinked() and collector.hasUndirectedEdges()):
         return
     
     complementEdgeWeightDict = {}
     
     for chr in collector.getPreProcessedChrs(allowOverlaps):
         trackSource = TrackSource()
         trackData = trackSource.getTrackData(trackName, genome, chr, allowOverlaps)
         
         ids = trackData['id']
         edges = trackData['edges']
         weights = trackData.get('weights')
         
         for i, id in enumerate(ids):
             edgesAttr = edges[i][edges[i] != '']
             weightsAttr = weights[i][edges[i] != ''] if weights is not None else None
             PreProcessUtils._adjustComplementaryEdgeWeightDict(complementEdgeWeightDict, id, edgesAttr, weightsAttr)
     
     if len(complementEdgeWeightDict) != 0:
             unmatchedPairs = []
             for toId in complementEdgeWeightDict:
                 for fromId in complementEdgeWeightDict[toId]:
                     unmatchedPairs.append((fromId, toId, complementEdgeWeightDict[toId][fromId]))
             raise InvalidFormatError("Error: All edges are not undirected. The following edges specifications " +\
                                      "are not matched by an opposite edge with equal weight:" + os.linesep +\
                                      os.linesep.join(["from '%s' to '%s'" % (fromId, toId) + \
                                                       (" with weight '%s'" % weight  if weight != '' else '') \
                                                       for fromId, toId, weight in unmatchedPairs]))
예제 #2
0
 def __init__(self, trackName):
     self.trackName = trackName
     self._trackSource = TrackSource()
     self._trackViewLoader = TrackViewLoader()
     self._trackFormatReq = NeutralTrackFormatReq()
     self.formatConverters = None
     self._trackId = None
예제 #3
0
    def merge(genome, trackName, allowOverlaps):
        path = createDirPath(trackName, genome, allowOverlaps=allowOverlaps)

        collector = PreProcMetaDataCollector(genome, trackName)
        chrList = collector.getPreProcessedChrs(allowOverlaps)
        if not collector.getTrackFormat().reprIsDense():
            chrList = sorted(chrList)

        existingChrList = [
            chr
            for chr in ChrMemmapFolderMerger._existingChrIter(path, chrList)
        ]
        if len(existingChrList) == 0:
            raise EmptyGESourceError(
                'No data lines has been read from source file (probably because it is empty).'
            )

        firstChrTrackData = TrackSource().getTrackData(trackName,
                                                       genome,
                                                       existingChrList[0],
                                                       allowOverlaps,
                                                       forceChrFolders=True)
        arrayList = firstChrTrackData.keys()
        for arrayName in arrayList:
            mergedArray = firstChrTrackData[arrayName][:]
            elementDim, dtypeDim = parseMemmapFileFn(
                firstChrTrackData[arrayName].filename)[1:3]
            del firstChrTrackData[arrayName]

            for chr in existingChrList[1:]:
                chrTrackData = TrackSource().getTrackData(trackName,
                                                          genome,
                                                          chr,
                                                          allowOverlaps,
                                                          forceChrFolders=True)

                mergedArray = ChrMemmapFolderMerger.mergeArrays(
                    mergedArray, np.array(chrTrackData[arrayName][:]))
                elementDimNew, dtypeDimNew = parseMemmapFileFn(
                    chrTrackData[arrayName].filename)[1:3]
                elementDim = max(elementDim, elementDimNew)
                dtypeDim = max(dtypeDim, dtypeDimNew)

                del chrTrackData[arrayName]

            mergedFn = createMemmapFileFn(path, arrayName, elementDim,
                                          dtypeDim, str(mergedArray.dtype))

            f = np.memmap(mergedFn,
                          dtype=mergedArray.dtype,
                          mode='w+',
                          shape=mergedArray.shape)
            f[:] = mergedArray
            f.flush()
            del f
            del mergedArray
예제 #4
0
 def __init__(self, trackName):
     self.trackName = trackName
     self._trackSource = TrackSource()
     self._trackViewLoader = TrackViewLoader()
     self._trackFormatReq = NeutralTrackFormatReq()
     self.formatConverters = None
     self._trackId = None
예제 #5
0
 def checkIfEdgeIdsExist(genome, trackName, allowOverlaps):
     collector = PreProcMetaDataCollector(genome, trackName)
     if not collector.getTrackFormat().isLinked():
         return
     
     uniqueIds = numpy.array([], dtype='S')
     uniqueEdgeIds = numpy.array([], dtype='S')
     
     for chr in collector.getPreProcessedChrs(allowOverlaps):
         trackSource = TrackSource()
         trackData = trackSource.getTrackData(trackName, genome, chr, allowOverlaps)
         uniqueIds = numpy.unique(numpy.concatenate((uniqueIds, trackData['id'][:])))
         uniqueEdgeIds = numpy.unique(numpy.concatenate((uniqueEdgeIds, trackData['edges'][:].flatten())))
     
     uniqueIds = uniqueIds[uniqueIds != '']
     uniqueEdgeIds = uniqueEdgeIds[uniqueEdgeIds != '']
     
     unmatchedIds = set(uniqueEdgeIds) - set(uniqueIds)
     if len(unmatchedIds) > 0:
         raise InvalidFormatError("Error: the following ids specified in the 'edges' column do not exist in the dataset: " + ', '.join(sorted(unmatchedIds)))
예제 #6
0
    def merge(genome, trackName, allowOverlaps):
        path = createDirPath(trackName, genome, allowOverlaps=allowOverlaps)

        collector = PreProcMetaDataCollector(genome, trackName)
        chrList = collector.getPreProcessedChrs(allowOverlaps)
        if not collector.getTrackFormat().reprIsDense():
            chrList = sorted(chrList)
        
        existingChrList = [chr for chr in ChrMemmapFolderMerger._existingChrIter(path, chrList)]
        if len(existingChrList) == 0:
            raise EmptyGESourceError('No data lines has been read from source file (probably because it is empty).')
            
        firstChrTrackData = TrackSource().getTrackData(trackName, genome, existingChrList[0], allowOverlaps, forceChrFolders=True)
        arrayList = firstChrTrackData.keys()
        for arrayName in arrayList:
            mergedArray = firstChrTrackData[arrayName][:]
            elementDim, dtypeDim = parseMemmapFileFn(firstChrTrackData[arrayName].filename)[1:3]
            del firstChrTrackData[arrayName]
            
            for chr in existingChrList[1:]:
                chrTrackData = TrackSource().getTrackData(trackName, genome, chr, allowOverlaps, forceChrFolders=True)
            
                mergedArray = ChrMemmapFolderMerger.mergeArrays(mergedArray, np.array(chrTrackData[arrayName][:]))
                elementDimNew, dtypeDimNew = parseMemmapFileFn(chrTrackData[arrayName].filename)[1:3]
                elementDim = max(elementDim, elementDimNew)
                dtypeDim = max(dtypeDim, dtypeDimNew)
                
                del chrTrackData[arrayName]
            
            mergedFn = createMemmapFileFn(path, arrayName, elementDim, dtypeDim, str(mergedArray.dtype))
            
            f = np.memmap(mergedFn, dtype=mergedArray.dtype, mode='w+', shape=mergedArray.shape)
            f[:] = mergedArray
            f.flush()
            del f
            del mergedArray
예제 #7
0
class Track(object):
    IS_MEMOIZABLE = True

    def __new__(cls, trackName):
        if trackName == [] or trackName is None:
            return None
        else:
            if ExternalTrackManager.isVirtualTrack(trackName):
                return VirtualMinimalTrack.__new__(VirtualMinimalTrack)
            else:
                return object.__new__(cls)

    def __init__(self, trackName):
        self.trackName = trackName
        self._trackSource = TrackSource()
        self._trackViewLoader = TrackViewLoader()
        self._trackFormatReq = NeutralTrackFormatReq()
        self.formatConverters = None
        self._trackId = None

    def _getRawTrackView(self, region, borderHandling, allowOverlaps):
        trackData = self._trackSource.getTrackData(self.trackName,
                                                   region.genome, region.chr,
                                                   allowOverlaps)
        return self._trackViewLoader.loadTrackView(trackData, region,
                                                   borderHandling,
                                                   allowOverlaps,
                                                   self.trackName)

    def getTrackView(self, region):
        allowOverlaps = self._trackFormatReq.allowOverlaps()
        borderHandling = self._trackFormatReq.borderHandling()
        assert (allowOverlaps is not None)
        assert (borderHandling is not None)

        origTrackView = self._getRawTrackView(region, borderHandling,
                                              allowOverlaps)

        if self.formatConverters is None:
            self.formatConverters = getFormatConverters(
                origTrackView.trackFormat, self._trackFormatReq)

        if self.formatConverters == []:
            raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\
                                          + str(origTrackView.trackFormat) +
                                          ('(' + origTrackView.trackFormat._val + ')' if origTrackView.trackFormat._val else '') + \
                                          ' does not satisfy ' + str(self._trackFormatReq))

        if not self.formatConverters[0].canHandle(origTrackView.trackFormat,
                                                  self._trackFormatReq):
            raise IncompatibleTracksError(getClassName(self.formatConverters[0]) +\
                                          ' does not support conversion from ' + str(origTrackView.trackFormat) + \
                                          ' to ' + str(self._trackFormatReq))
        return self.formatConverters[0].convert(origTrackView)

    def addFormatReq(self, requestedTrackFormat):
        prevFormatReq = self._trackFormatReq
        self._trackFormatReq = TrackFormatReq.merge(self._trackFormatReq,
                                                    requestedTrackFormat)
        if self._trackFormatReq is None:
            raise IncompatibleTracksError(str(prevFormatReq ) + \
                                          ' is incompatible with additional ' + str(requestedTrackFormat))

    def setFormatConverter(self, converterClassName):
        assert (self.formatConverters is None)
        if converterClassName is not None:
            self.formatConverters = [
                getFormatConverterByName(converterClassName)
            ]

    def getUniqueKey(self, genome):
        assert self.formatConverters is not None and len(
            self.formatConverters) == 1, 'FC: ' + str(self.formatConverters)
        assert( not None in [self._trackFormatReq.allowOverlaps(), \
                             self._trackFormatReq.borderHandling()] )

        if not self._trackId:
            self._trackId = TrackInfo(genome, self.trackName).id

        return hash((tuple(self.trackName), self._trackId, getClassName(self.formatConverters[0]), \
                     self.formatConverters[0].VERSION, self._trackFormatReq.allowOverlaps(), \
                     self._trackFormatReq.borderHandling()))
예제 #8
0
class Track(object):
    IS_MEMOIZABLE = True
    def __new__(cls, trackName):
        if trackName == [] or trackName is None:
            return None
        else:
            if ExternalTrackManager.isVirtualTrack(trackName):
                return VirtualMinimalTrack.__new__(VirtualMinimalTrack)
            else:
                return object.__new__(cls)
    
    def __init__(self, trackName):
        self.trackName = trackName
        self._trackSource = TrackSource()
        self._trackViewLoader = TrackViewLoader()
        self._trackFormatReq = NeutralTrackFormatReq()
        self.formatConverters = None
        self._trackId = None
        
    def _getRawTrackView(self, region, borderHandling, allowOverlaps):
        trackData = self._trackSource.getTrackData(self.trackName, region.genome, region.chr, allowOverlaps)
        return self._trackViewLoader.loadTrackView(trackData, region, borderHandling, allowOverlaps, self.trackName)
    
    def getTrackView(self, region):
        allowOverlaps = self._trackFormatReq.allowOverlaps()
        borderHandling = self._trackFormatReq.borderHandling()
        assert(allowOverlaps is not None) 
        assert(borderHandling is not None) 
        
        origTrackView = self._getRawTrackView(region, borderHandling, allowOverlaps)
        
        if self.formatConverters is None:
            self.formatConverters = getFormatConverters(origTrackView.trackFormat, self._trackFormatReq)
        
        if self.formatConverters == []:
            raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\
                                          + str(origTrackView.trackFormat) +
                                          ('(' + origTrackView.trackFormat._val + ')' if origTrackView.trackFormat._val else '') + \
                                          ' does not satisfy ' + str(self._trackFormatReq))
        
        if not self.formatConverters[0].canHandle(origTrackView.trackFormat, self._trackFormatReq):
            raise IncompatibleTracksError(getClassName(self.formatConverters[0]) +\
                                          ' does not support conversion from ' + str(origTrackView.trackFormat) + \
                                          ' to ' + str(self._trackFormatReq))
        return self.formatConverters[0].convert(origTrackView)

    def addFormatReq(self, requestedTrackFormat):
        prevFormatReq = self._trackFormatReq
        self._trackFormatReq = TrackFormatReq.merge(self._trackFormatReq, requestedTrackFormat)
        if self._trackFormatReq is None:
            raise IncompatibleTracksError(str(prevFormatReq ) + \
                                          ' is incompatible with additional ' + str(requestedTrackFormat))
    
    def setFormatConverter(self, converterClassName):
        assert( self.formatConverters is None )
        if converterClassName is not None:        
            self.formatConverters = [getFormatConverterByName(converterClassName)]
    
    def getUniqueKey(self, genome):
        assert self.formatConverters is not None and len(self.formatConverters) == 1, 'FC: '+str(self.formatConverters)
        assert( not None in [self._trackFormatReq.allowOverlaps(), \
                             self._trackFormatReq.borderHandling()] )
        
        if not self._trackId:
            self._trackId = TrackInfo(genome, self.trackName).id
            
        return hash((tuple(self.trackName), self._trackId, getClassName(self.formatConverters[0]), \
                     self.formatConverters[0].VERSION, self._trackFormatReq.allowOverlaps(), \
                     self._trackFormatReq.borderHandling()))
예제 #9
0
    def _getTrackData(self):
        for br in self._boundingRegions:
            chr = br.chr
            break

        return TrackSource().getTrackData(self._trackName, self._genome, chr, allowOverlaps=self._allowOverlaps)