def _writeTestFile(self, case):
     fn = createOrigPath(self.GENOME, self.TRACK_NAME_PREFIX + case.trackName, 'testfile' + case.suffix)
     ensurePathExists(fn)
     testfile = open(fn, 'w')
     testfile.write('\n'.join(case.headerLines + case.lines))
     testfile.close()
     return fn
 def _writeTestFile(self, case):
     fn = createOrigPath(self.GENOME,
                         self.TRACK_NAME_PREFIX + case.trackName,
                         'testfile' + case.suffix)
     ensurePathExists(fn)
     testfile = open(fn, 'w')
     testfile.write('\n'.join(case.headerLines + case.lines))
     testfile.close()
     return fn
Beispiel #3
0
    def __new__(cls, genome, trackName):
        # Temporary hack
        if genome in ["hg18", "NCBI36"]:
            genome = "NCBI36"

        ensurePathExists(cls.SHELVE_FN)
        trackInfoShelve = safeshelve.open(cls.SHELVE_FN, "c", protocol=cls.PROTOCOL)
        stored = trackInfoShelve.get(constructKey(genome, trackName))
        trackInfoShelve.close()
        if stored is not None:
            return stored
        else:
            return object.__new__(cls)
Beispiel #4
0
def renameStdTrack(genome, oldTn, newTn):
    oldPath = createOrigPath(genome, oldTn)
    assert os.path.exists(oldPath), 'ERROR: TN did not exist in stdTracks: ' + oldPath
    
    print '(renaming track in stdTracks..)'
    newPath = createOrigPath(genome, newTn)
    if not ONLY_SIMULATION:    
        assert not os.path.exists(newPath), 'ERROR: Target path already exists: ' + newPath
        ensurePathExists(newPath)
        print 'Moving from %s to %s' % (oldPath, newPath)
        shutil.move(oldPath, newPath)
    else:
        print 'Would move %s to %s' %  (oldPath, newPath)
Beispiel #5
0
def renameProcTrack(genome, oldTn, newTn):
    for allowOverlaps in [False, True]:
        oldPath = createDirPath(oldTn, genome, allowOverlaps=allowOverlaps)
        if not os.path.exists(oldPath):
            print 'Warning: TN did not exist as preproc ' + ('with overlaps' if allowOverlaps else ' without overlaps')
        else:
            print '(renaming TN in preproc ' + ('with overlaps' if allowOverlaps else ' without overlaps') + ')'
            newPath = createDirPath(newTn, genome, allowOverlaps=allowOverlaps)
            if not ONLY_SIMULATION:    
                assert not os.path.exists(newPath), 'ERROR: Target path already exists: ' + newPath
                ensurePathExists(newPath)
                shutil.move(oldPath, newPath)
            else:
                print 'Would move %s to %s' %  (oldPath, newPath)
Beispiel #6
0
 def extractOneTrackManyToRegionFilesInOneZipFile(cls, trackName, regionList, zipFn, fileFormatName=DEFAULT_FILE_FORMAT_NAME, \
                                                  globalCoords=False, asOriginal=False, allowOverlaps=False, \
                                                  ignoreEmpty=True):
     ensurePathExists(zipFn)
     zipFile = ZipFile(zipFn, 'w')
     for region in regionList:
         fn = os.path.dirname(zipFn) + os.sep + str(region).replace(':','_')
         okFn = cls.extract(trackName, [region], fn, fileFormatName=fileFormatName, \
                            globalCoords=globalCoords, addSuffix=True, asOriginal=asOriginal, \
                            allowOverlaps=allowOverlaps, ignoreEmpty=ignoreEmpty)
         if okFn:
             zipFile.write(okFn, os.path.basename(okFn))
             os.remove(okFn)
     zipFile.close()
Beispiel #7
0
def importFile(fileName, genome, trackName):
    """fileName genome trackName"""

    trackName = _convertTrackName(trackName)
    
    from gtrackcore.util.CommonFunctions import createOrigPath, ensurePathExists
    origFn = createOrigPath(genome, trackName, os.path.basename(fileName))
    if os.path.exists(origFn):
        shutil.rmtree(os.path.dirname(origFn))
    ensurePathExists(origFn)
    shutil.copy(fileName, origFn)
    os.chmod(origFn, 0664)
    
    from gtrackcore.preprocess.PreProcessTracksJob import PreProcessAllTracksJob
    PreProcessAllTracksJob(genome, trackName).process()
Beispiel #8
0
 def extractOneTrackManyToRegionFilesInOneZipFile(cls, trackName, regionList, zipFn, fileFormatName=DEFAULT_FILE_FORMAT_NAME, \
                                                  globalCoords=False, asOriginal=False, allowOverlaps=False, \
                                                  ignoreEmpty=True):
     ensurePathExists(zipFn)
     zipFile = ZipFile(zipFn, 'w')
     for region in regionList:
         fn = os.path.dirname(zipFn) + os.sep + str(region).replace(
             ':', '_')
         okFn = cls.extract(trackName, [region], fn, fileFormatName=fileFormatName, \
                            globalCoords=globalCoords, addSuffix=True, asOriginal=asOriginal, \
                            allowOverlaps=allowOverlaps, ignoreEmpty=ignoreEmpty)
         if okFn:
             zipFile.write(okFn, os.path.basename(okFn))
             os.remove(okFn)
     zipFile.close()
Beispiel #9
0
def renameStdTrack(genome, oldTn, newTn):
    oldPath = createOrigPath(genome, oldTn)
    assert os.path.exists(
        oldPath), 'ERROR: TN did not exist in stdTracks: ' + oldPath

    print '(renaming track in stdTracks..)'
    newPath = createOrigPath(genome, newTn)
    if not ONLY_SIMULATION:
        assert not os.path.exists(
            newPath), 'ERROR: Target path already exists: ' + newPath
        ensurePathExists(newPath)
        print 'Moving from %s to %s' % (oldPath, newPath)
        shutil.move(oldPath, newPath)
    else:
        print 'Would move %s to %s' % (oldPath, newPath)
Beispiel #10
0
def importFile(fileName, genome, trackName):
    """fileName genome trackName"""

    trackName = _convertTrackName(trackName)
    
    from gtrackcore.util.CommonFunctions import createOrigPath, ensurePathExists
    origFn = createOrigPath(genome, trackName, os.path.basename(fileName))
    if os.path.exists(origFn):
        shutil.rmtree(os.path.dirname(origFn))
    ensurePathExists(origFn)
    shutil.copy(fileName, origFn)
    os.chmod(origFn, 0664)
    
    from gtrackcore.preprocess.PreProcessTracksJob import PreProcessAllTracksJob
    PreProcessAllTracksJob(genome, trackName).process()
Beispiel #11
0
    def __new__(cls, genome, trackName):
        #Temporary hack
        if genome in ['hg18', 'NCBI36']:
            genome = 'NCBI36'

        ensurePathExists(cls.SHELVE_FN)
        trackInfoShelve = safeshelve.open(cls.SHELVE_FN,
                                          'c',
                                          protocol=cls.PROTOCOL)
        stored = trackInfoShelve.get(constructKey(genome, trackName))
        trackInfoShelve.close()
        if stored is not None:
            return stored
        else:
            return object.__new__(cls)
Beispiel #12
0
def renameProcTrack(genome, oldTn, newTn):
    for allowOverlaps in [False, True]:
        oldPath = createDirPath(oldTn, genome, allowOverlaps=allowOverlaps)
        if not os.path.exists(oldPath):
            print 'Warning: TN did not exist as preproc ' + (
                'with overlaps' if allowOverlaps else ' without overlaps')
        else:
            print '(renaming TN in preproc ' + ('with overlaps'
                                                if allowOverlaps else
                                                ' without overlaps') + ')'
            newPath = createDirPath(newTn, genome, allowOverlaps=allowOverlaps)
            if not ONLY_SIMULATION:
                assert not os.path.exists(
                    newPath), 'ERROR: Target path already exists: ' + newPath
                ensurePathExists(newPath)
                shutil.move(oldPath, newPath)
            else:
                print 'Would move %s to %s' % (oldPath, newPath)
    def storeBoundingRegions(self, boundingRegionTuples, genomeElementChrList,
                             sparse):
        assert sparse in [False, True]

        tempContents = OrderedDict()

        genomeElementChrs = set(genomeElementChrList)
        lastRegion = None
        chrStartIdxs = OrderedDict()
        chrEndIdxs = OrderedDict()
        totElCount = 0
        totBinCount = 0

        for br in boundingRegionTuples:
            if lastRegion is None or br.region.chr != lastRegion.chr:
                if br.region.chr in tempContents:
                    raise InvalidFormatError(
                        "Error: bounding region (%s) is not grouped with previous bounding regions of the same chromosome (sequence)."
                        % br.region)

                lastRegion = None
                tempContents[br.region.chr] = OrderedDict()
                if sparse:
                    chrStartIdxs[br.region.chr] = totElCount
            else:
                if br.region < lastRegion:
                    raise InvalidFormatError(
                        "Error: bounding regions in the same chromosome (sequence) are unsorted: %s > %s."
                        % (lastRegion, br.region))
                if lastRegion.overlaps(br.region):
                    raise InvalidFormatError(
                        "Error: bounding regions '%s' and '%s' overlap." %
                        (lastRegion, br.region))
                if lastRegion.end == br.region.start:
                    raise InvalidFormatError(
                        "Error: bounding regions '%s' and '%s' are adjoining (there is no gap between them)."
                        % (lastRegion, br.region))

            if len(br.region) < 1:
                raise InvalidFormatError(
                    "Error: bounding region '%s' does not have positive length."
                    % br.region)

            if not sparse and len(br.region) != br.elCount:
                raise InvalidFormatError(
                    "Error: track type representation is dense, but the length of bounding region '%s' is not equal to the element count: %s != %s"
                    % (br.region, len(br.region), br.elCount))

            startIdx, endIdx = (totElCount, totElCount +
                                br.elCount) if not sparse else (None, None)
            totElCount += br.elCount
            if sparse:
                chrEndIdxs[br.region.chr] = totElCount

            tempContents[br.region.chr][br.region.start] = BoundingRegionInfo(
                br.region.start, br.region.end, startIdx, endIdx, 0, 0)

            lastRegion = br.region

        if sparse:
            totBinCount = 0
            for chr in tempContents:
                chrLen = GenomeInfo.getChrLen(self._genome, chr)
                numBinsInChr = CompBinManager.getNumOfBins(
                    GenomeRegion(start=0, end=chrLen))
                for key in tempContents[chr].keys():
                    startBinIdx = totBinCount
                    endBinIdx = totBinCount + numBinsInChr
                    brInfo = tempContents[chr][key]

                    if chr in genomeElementChrs:
                        tempContents[chr][key] = BoundingRegionInfo(brInfo.start, brInfo.end, \
                                                                    chrStartIdxs[chr], chrEndIdxs[chr], \
                                                                    startBinIdx, endBinIdx)
                    else:
                        if chrEndIdxs[chr] - chrStartIdxs[chr] > 0:
                            raise InvalidFormatError(
                                "Error: bounding region '%s' has incorrect element count: %s > 0"
                                % (GenomeRegion(chr=chr,
                                                start=brInfo.start,
                                                end=brInfo.end),
                                   chrEndIdxs[chr] - chrStartIdxs[chr]))
                        tempContents[chr][key] = BoundingRegionInfo(
                            brInfo.start, brInfo.end, 0, 0, 0, 0)

                if chr in genomeElementChrs:
                    totBinCount += numBinsInChr

        if len(genomeElementChrs - set(tempContents.keys())) > 0:
            raise InvalidFormatError(
                'Error: some chromosomes (sequences) contains data, but has no bounding regions: %s'
                % ', '.join(genomeElementChrs - set(tempContents.keys())))

        ensurePathExists(self._fn)

        for chr in tempContents:
            brInfoDict = tempContents[chr]
            tempContents[chr] = BrInfoHolder(tuple(brInfoDict.keys()),
                                             tuple(brInfoDict.values()))

        brShelve = safeshelve.open(self._fn)
        brShelve.update(tempContents)
        brShelve.close()

        while not self.fileExists():
            from gtrackcore.application.LogSetup import logMessage
            logMessage(
                "Bounding region shelve file '%s' has yet to be created" %
                self._fn)
            import time
            time.sleep(0.2)
Beispiel #14
0
def _getDirPath(genome=''):
    from gtrackcore.util.CommonFunctions import createDirPath, ensurePathExists
    dirPath = createDirPath([], '')
    ensurePathExists(dirPath)
    return dirPath
Beispiel #15
0
def _getDirPath(genome=''):
    from gtrackcore.util.CommonFunctions import createDirPath, ensurePathExists
    dirPath = createDirPath([], '')
    ensurePathExists(dirPath)
    return dirPath
 def composeToFile(self, fn, ignoreEmpty=False, **kwArgs):
     ensurePathExists(fn)
     f = open(fn, 'w')
     ok = self._composeCommon(f, ignoreEmpty, **kwArgs)
     f.close()
     return ok
    def storeBoundingRegions(self, boundingRegionTuples, genomeElementChrList, sparse):
        assert sparse in [False, True]

        tempContents = OrderedDict()

        genomeElementChrs = set(genomeElementChrList)    
        lastRegion = None
        chrStartIdxs = OrderedDict()
        chrEndIdxs = OrderedDict()
        totElCount = 0
        totBinCount = 0
        
        for br in boundingRegionTuples:
            if lastRegion is None or br.region.chr != lastRegion.chr:
                if br.region.chr in tempContents:
                    raise InvalidFormatError("Error: bounding region (%s) is not grouped with previous bounding regions of the same chromosome (sequence)." % br.region)
                
                lastRegion = None
                tempContents[br.region.chr] = OrderedDict()
                if sparse:
                    chrStartIdxs[br.region.chr] = totElCount
            else:
                if br.region < lastRegion:
                    raise InvalidFormatError("Error: bounding regions in the same chromosome (sequence) are unsorted: %s > %s." % (lastRegion, br.region))
                if lastRegion.overlaps(br.region):
                    raise InvalidFormatError("Error: bounding regions '%s' and '%s' overlap." % (lastRegion, br.region))
                if lastRegion.end == br.region.start:
                    raise InvalidFormatError("Error: bounding regions '%s' and '%s' are adjoining (there is no gap between them)." % (lastRegion, br.region))
            
            if len(br.region) < 1:
                raise InvalidFormatError("Error: bounding region '%s' does not have positive length." % br.region)
                
            if not sparse and len(br.region) != br.elCount:
                raise InvalidFormatError("Error: track type representation is dense, but the length of bounding region '%s' is not equal to the element count: %s != %s" % (br.region, len(br.region), br.elCount))
            
            startIdx, endIdx = (totElCount, totElCount + br.elCount) if not sparse else (None, None)
            totElCount += br.elCount
            if sparse:
                chrEndIdxs[br.region.chr] = totElCount
            
            tempContents[br.region.chr][br.region.start] = BoundingRegionInfo(br.region.start, br.region.end, startIdx, endIdx, 0, 0)
            
            lastRegion = br.region
        
        if sparse:
            totBinCount = 0
            for chr in tempContents:
                chrLen = GenomeInfo.getChrLen(self._genome, chr)
                numBinsInChr = CompBinManager.getNumOfBins(GenomeRegion(start=0, end=chrLen))
                for key in tempContents[chr].keys():
                    startBinIdx = totBinCount
                    endBinIdx = totBinCount + numBinsInChr
                    brInfo = tempContents[chr][key]
                    
                    if chr in genomeElementChrs:
                        tempContents[chr][key] = BoundingRegionInfo(brInfo.start, brInfo.end, \
                                                                    chrStartIdxs[chr], chrEndIdxs[chr], \
                                                                    startBinIdx, endBinIdx)
                    else:
                        if chrEndIdxs[chr] - chrStartIdxs[chr] > 0:
                            raise InvalidFormatError("Error: bounding region '%s' has incorrect element count: %s > 0" % (GenomeRegion(chr=chr, start=brInfo.start, end=brInfo.end), chrEndIdxs[chr] - chrStartIdxs[chr]))
                        tempContents[chr][key] = BoundingRegionInfo(brInfo.start, brInfo.end, 0, 0, 0, 0)
                
                if chr in genomeElementChrs:
                    totBinCount += numBinsInChr
        
        if len(genomeElementChrs - set(tempContents.keys())) > 0:
            raise InvalidFormatError('Error: some chromosomes (sequences) contains data, but has no bounding regions: %s' % ', '.join(genomeElementChrs - set(tempContents.keys())))
        
        ensurePathExists(self._fn)
        
        for chr in tempContents:
            brInfoDict = tempContents[chr]
            tempContents[chr] = BrInfoHolder(tuple(brInfoDict.keys()), tuple(brInfoDict.values()))
        
        brShelve = safeshelve.open(self._fn)
        brShelve.update(tempContents)
        brShelve.close()
        
        while not self.fileExists():
            from gtrackcore.application.LogSetup import logMessage
            logMessage("Bounding region shelve file '%s' has yet to be created" % self._fn)
            import time
            time.sleep(0.2)