def __new__(cls, regSpec, binSpec, genome=None, categoryFilterList=None, strictMatch=True, includeExtraChrs = False): #,fileType): if regSpec in ['file', 'track'] + getSupportedFileSuffixesForBinning(): #if fileType != 'bed': # raise NotImplementedError assert genome is not None from gtrackcore.input.core.GenomeElementSource import GenomeElementSource if regSpec == 'file': geSource = GenomeElementSource(binSpec, genome=genome) elif regSpec == 'track': from gtrackcore.input.adapters.TrackGenomeElementSource import FullTrackGenomeElementSource trackName = convertTNstrToTNListFormat(binSpec) geSource = FullTrackGenomeElementSource(genome, trackName, allowOverlaps=False) else: geSource = GenomeElementSource(binSpec, genome=genome, suffix=regSpec) if categoryFilterList is not None: from gtrackcore.input.wrappers.GECategoryFilter import GECategoryFilter geSource = GECategoryFilter(geSource, categoryFilterList, strict=strictMatch) return cls._applyEnvelope(geSource) else: if binSpec == '*': binSize = None else: binSize = parseShortenedSizeSpec(binSpec) from gtrackcore.input.userbins.AutoBinner import AutoBinner return AutoBinner(parseRegSpec(regSpec, genome, includeExtraChrs=includeExtraChrs), binSize)
def __new__(cls, regSpec, binSpec, genome=None, categoryFilterList=None, strictMatch=True, includeExtraChrs=False): #,fileType): if regSpec in ['file', 'track'] + getSupportedFileSuffixesForBinning(): #if fileType != 'bed': # raise NotImplementedError assert genome is not None from gtrackcore.input.core.GenomeElementSource import GenomeElementSource if regSpec == 'file': geSource = GenomeElementSource(binSpec, genome=genome) elif regSpec == 'track': from gtrackcore.input.adapters.TrackGenomeElementSource import FullTrackGenomeElementSource trackName = convertTNstrToTNListFormat(binSpec) geSource = FullTrackGenomeElementSource(genome, trackName, allowOverlaps=False) else: geSource = GenomeElementSource(binSpec, genome=genome, suffix=regSpec) if categoryFilterList is not None: from gtrackcore.input.wrappers.GECategoryFilter import GECategoryFilter geSource = GECategoryFilter(geSource, categoryFilterList, strict=strictMatch) return cls._applyEnvelope(geSource) else: if binSpec == '*': binSize = None else: binSize = parseShortenedSizeSpec(binSpec) from gtrackcore.input.userbins.AutoBinner import AutoBinner return AutoBinner( parseRegSpec(regSpec, genome, includeExtraChrs=includeExtraChrs), binSize)
@classmethod def extractOneTrackManyRegsToOneFile(cls, trackName, regionList, fn, fileFormatName=DEFAULT_FILE_FORMAT_NAME, \ globalCoords=False, asOriginal=False, allowOverlaps=False): cls.extract(trackName, regionList, fn, fileFormatName=fileFormatName, globalCoords=globalCoords, \ addSuffix=False, asOriginal=asOriginal, allowOverlaps=allowOverlaps) @classmethod def extractManyToOneDir(cls, trackNameList, regionList, baseDir, fileFormatName=DEFAULT_FILE_FORMAT_NAME, \ globalCoords=False, asOriginal=False, allowOverlaps=False): for trackName in trackNameList: fn = baseDir + os.sep + '_'.join(trackName) cls.extractOneTrackManyRegsToOneFile(trackName, regionList, fn, fileFormatName=fileFormatName, globalCoords=globalCoords, asOriginal=asOriginal, \ allowOverlaps=allowOverlaps) if __name__ == "__main__": if len(sys.argv) not in [4, 5]: print 'Syntax: python TrackExtractor.py trackName:subtype genome:chr:start-end asOriginal [filename]' sys.exit(0) trackName = parseTrackNameSpec(sys.argv[1]) regions = parseRegSpec(sys.argv[2]) assert len(regions) == 1 asOriginal = eval(sys.argv[3]) fn = sys.argv[4] if len(sys.argv) == 5 else None TrackExtractor.extract(trackName, regions[0], fn, asOriginal=asOriginal)
zipFile.close() @classmethod def extractOneTrackManyRegsToOneFile(cls, trackName, regionList, fn, fileFormatName=DEFAULT_FILE_FORMAT_NAME, \ globalCoords=False, asOriginal=False, allowOverlaps=False): cls.extract(trackName, regionList, fn, fileFormatName=fileFormatName, globalCoords=globalCoords, \ addSuffix=False, asOriginal=asOriginal, allowOverlaps=allowOverlaps) @classmethod def extractManyToOneDir(cls, trackNameList, regionList, baseDir, fileFormatName=DEFAULT_FILE_FORMAT_NAME, \ globalCoords=False, asOriginal=False, allowOverlaps=False): for trackName in trackNameList: fn = baseDir + os.sep + '_'.join(trackName) cls.extractOneTrackManyRegsToOneFile(trackName, regionList, fn, fileFormatName=fileFormatName, globalCoords=globalCoords, asOriginal=asOriginal, \ allowOverlaps=allowOverlaps) if __name__ == "__main__": if len(sys.argv) not in [4, 5]: print 'Syntax: python TrackExtractor.py trackName:subtype genome:chr:start-end asOriginal [filename]' sys.exit(0) trackName = parseTrackNameSpec(sys.argv[1]) regions = parseRegSpec(sys.argv[2]) assert len(regions) == 1 asOriginal = eval(sys.argv[3]) fn = sys.argv[4] if len(sys.argv) == 5 else None TrackExtractor.extract(trackName, regions[0], fn, asOriginal=asOriginal)