Esempio n. 1
0
 def checkUndirectedEdges(genome, trackName, allowOverlaps):
     collector = PreProcMetaDataCollector(genome, trackName)
     if not (collector.getTrackFormat().isLinked() and collector.hasUndirectedEdges()):
         return
     
     complementEdgeWeightDict = {}
     
     for chr in collector.getPreProcessedChrs(allowOverlaps):
         trackSource = TrackSource()
         trackData = trackSource.getTrackData(trackName, genome, chr, allowOverlaps)
         
         ids = trackData['id']
         edges = trackData['edges']
         weights = trackData.get('weights')
         
         for i, id in enumerate(ids):
             edgesAttr = edges[i][edges[i] != '']
             weightsAttr = weights[i][edges[i] != ''] if weights is not None else None
             PreProcessUtils._adjustComplementaryEdgeWeightDict(complementEdgeWeightDict, id, edgesAttr, weightsAttr)
     
     if len(complementEdgeWeightDict) != 0:
             unmatchedPairs = []
             for toId in complementEdgeWeightDict:
                 for fromId in complementEdgeWeightDict[toId]:
                     unmatchedPairs.append((fromId, toId, complementEdgeWeightDict[toId][fromId]))
             raise InvalidFormatError("Error: All edges are not undirected. The following edges specifications " +\
                                      "are not matched by an opposite edge with equal weight:" + os.linesep +\
                                      os.linesep.join(["from '%s' to '%s'" % (fromId, toId) + \
                                                       (" with weight '%s'" % weight  if weight != '' else '') \
                                                       for fromId, toId, weight in unmatchedPairs]))
Esempio n. 2
0
 def __init__(self, trackName):
     self.trackName = trackName
     self._trackSource = TrackSource()
     self._trackViewLoader = TrackViewLoader()
     self._trackFormatReq = NeutralTrackFormatReq()
     self.formatConverters = None
     self._trackId = None
Esempio n. 3
0
 def checkIfEdgeIdsExist(genome, trackName, allowOverlaps):
     collector = PreProcMetaDataCollector(genome, trackName)
     if not collector.getTrackFormat().isLinked():
         return
     
     uniqueIds = numpy.array([], dtype='S')
     uniqueEdgeIds = numpy.array([], dtype='S')
     
     for chr in collector.getPreProcessedChrs(allowOverlaps):
         trackSource = TrackSource()
         trackData = trackSource.getTrackData(trackName, genome, chr, allowOverlaps)
         uniqueIds = numpy.unique(numpy.concatenate((uniqueIds, trackData['id'][:])))
         uniqueEdgeIds = numpy.unique(numpy.concatenate((uniqueEdgeIds, trackData['edges'][:].flatten())))
     
     uniqueIds = uniqueIds[uniqueIds != '']
     uniqueEdgeIds = uniqueEdgeIds[uniqueEdgeIds != '']
     
     unmatchedIds = set(uniqueEdgeIds) - set(uniqueIds)
     if len(unmatchedIds) > 0:
         raise InvalidFormatError("Error: the following ids specified in the 'edges' column do not exist in the dataset: " + ', '.join(sorted(unmatchedIds)))
    def merge(genome, trackName, allowOverlaps):
        path = createDirPath(trackName, genome, allowOverlaps=allowOverlaps)

        collector = PreProcMetaDataCollector(genome, trackName)
        chrList = collector.getPreProcessedChrs(allowOverlaps)
        if not collector.getTrackFormat().reprIsDense():
            chrList = sorted(chrList)
        
        existingChrList = [chr for chr in ChrMemmapFolderMerger._existingChrIter(path, chrList)]
        if len(existingChrList) == 0:
            raise EmptyGESourceError('No data lines has been read from source file (probably because it is empty).')
            
        firstChrTrackData = TrackSource().getTrackData(trackName, genome, existingChrList[0], allowOverlaps, forceChrFolders=True)
        arrayList = firstChrTrackData.keys()
        for arrayName in arrayList:
            mergedArray = firstChrTrackData[arrayName][:]
            elementDim, dtypeDim = parseMemmapFileFn(firstChrTrackData[arrayName].filename)[1:3]
            del firstChrTrackData[arrayName]
            
            for chr in existingChrList[1:]:
                chrTrackData = TrackSource().getTrackData(trackName, genome, chr, allowOverlaps, forceChrFolders=True)
            
                mergedArray = ChrMemmapFolderMerger.mergeArrays(mergedArray, np.array(chrTrackData[arrayName][:]))
                elementDimNew, dtypeDimNew = parseMemmapFileFn(chrTrackData[arrayName].filename)[1:3]
                elementDim = max(elementDim, elementDimNew)
                dtypeDim = max(dtypeDim, dtypeDimNew)
                
                del chrTrackData[arrayName]
            
            mergedFn = createMemmapFileFn(path, arrayName, elementDim, dtypeDim, str(mergedArray.dtype))
            
            f = np.memmap(mergedFn, dtype=mergedArray.dtype, mode='w+', shape=mergedArray.shape)
            f[:] = mergedArray
            f.flush()
            del f
            del mergedArray
Esempio n. 5
0
class Track(object):
    IS_MEMOIZABLE = True

    def __new__(cls, trackName):
        if trackName == [] or trackName is None:
            return None
        else:
            if ExternalTrackManager.isVirtualTrack(trackName):
                return VirtualMinimalTrack.__new__(VirtualMinimalTrack)
            else:
                return object.__new__(cls)

    def __init__(self, trackName):
        self.trackName = trackName
        self._trackSource = TrackSource()
        self._trackViewLoader = TrackViewLoader()
        self._trackFormatReq = NeutralTrackFormatReq()
        self.formatConverters = None
        self._trackId = None

    def _getRawTrackView(self, region, borderHandling, allowOverlaps):
        trackData = self._trackSource.getTrackData(self.trackName, region.genome, region.chr, allowOverlaps)
        return self._trackViewLoader.loadTrackView(trackData, region, borderHandling, allowOverlaps, self.trackName)

    def getTrackView(self, region):
        allowOverlaps = self._trackFormatReq.allowOverlaps()
        borderHandling = self._trackFormatReq.borderHandling()
        assert allowOverlaps is not None
        assert borderHandling is not None

        origTrackView = self._getRawTrackView(region, borderHandling, allowOverlaps)

        if self.formatConverters is None:
            self.formatConverters = getFormatConverters(origTrackView.trackFormat, self._trackFormatReq)

        if self.formatConverters == []:
            raise IncompatibleTracksError(
                prettyPrintTrackName(self.trackName)
                + " with format: "
                + str(origTrackView.trackFormat)
                + ("(" + origTrackView.trackFormat._val + ")" if origTrackView.trackFormat._val else "")
                + " does not satisfy "
                + str(self._trackFormatReq)
            )

        if not self.formatConverters[0].canHandle(origTrackView.trackFormat, self._trackFormatReq):
            raise IncompatibleTracksError(
                getClassName(self.formatConverters[0])
                + " does not support conversion from "
                + str(origTrackView.trackFormat)
                + " to "
                + str(self._trackFormatReq)
            )
        return self.formatConverters[0].convert(origTrackView)

    def addFormatReq(self, requestedTrackFormat):
        prevFormatReq = self._trackFormatReq
        self._trackFormatReq = TrackFormatReq.merge(self._trackFormatReq, requestedTrackFormat)
        if self._trackFormatReq is None:
            raise IncompatibleTracksError(
                str(prevFormatReq) + " is incompatible with additional " + str(requestedTrackFormat)
            )

    def setFormatConverter(self, converterClassName):
        assert self.formatConverters is None
        if converterClassName is not None:
            self.formatConverters = [getFormatConverterByName(converterClassName)]

    def getUniqueKey(self, genome):
        assert self.formatConverters is not None and len(self.formatConverters) == 1, "FC: " + str(
            self.formatConverters
        )
        assert not None in [self._trackFormatReq.allowOverlaps(), self._trackFormatReq.borderHandling()]

        if not self._trackId:
            self._trackId = TrackInfo(genome, self.trackName).id

        return hash(
            (
                tuple(self.trackName),
                self._trackId,
                getClassName(self.formatConverters[0]),
                self.formatConverters[0].VERSION,
                self._trackFormatReq.allowOverlaps(),
                self._trackFormatReq.borderHandling(),
            )
        )