Exemple #1
0
 def __new__(cls, regSpec, binSpec, genome=None, categoryFilterList=None, strictMatch=True, includeExtraChrs = False): #,fileType):
     if regSpec in ['file', 'track'] + getSupportedFileSuffixesForBinning():
         #if fileType != 'bed':
         #    raise NotImplementedError
         
         assert genome is not None
         
         from gtrackcore.input.core.GenomeElementSource import GenomeElementSource
         if regSpec == 'file':
             geSource = GenomeElementSource(binSpec, genome=genome)
         elif regSpec == 'track':
             from gtrackcore.input.adapters.TrackGenomeElementSource import FullTrackGenomeElementSource
             trackName = convertTNstrToTNListFormat(binSpec)
             geSource = FullTrackGenomeElementSource(genome, trackName, allowOverlaps=False)
         else:
             geSource = GenomeElementSource(binSpec, genome=genome, suffix=regSpec)
         
         if categoryFilterList is not None:
             from gtrackcore.input.wrappers.GECategoryFilter import GECategoryFilter
             geSource = GECategoryFilter(geSource, categoryFilterList, strict=strictMatch)
         return cls._applyEnvelope(geSource)
     else:
         if binSpec == '*':
             binSize = None
         else:
             binSize = parseShortenedSizeSpec(binSpec)
         
         from gtrackcore.input.userbins.AutoBinner import AutoBinner
         return AutoBinner(parseRegSpec(regSpec, genome, includeExtraChrs=includeExtraChrs), binSize)
Exemple #2
0
    def __new__(cls,
                regSpec,
                binSpec,
                genome=None,
                categoryFilterList=None,
                strictMatch=True,
                includeExtraChrs=False):  #,fileType):
        if regSpec in ['file', 'track'] + getSupportedFileSuffixesForBinning():
            #if fileType != 'bed':
            #    raise NotImplementedError

            assert genome is not None

            from gtrackcore.input.core.GenomeElementSource import GenomeElementSource
            if regSpec == 'file':
                geSource = GenomeElementSource(binSpec, genome=genome)
            elif regSpec == 'track':
                from gtrackcore.input.adapters.TrackGenomeElementSource import FullTrackGenomeElementSource
                trackName = convertTNstrToTNListFormat(binSpec)
                geSource = FullTrackGenomeElementSource(genome,
                                                        trackName,
                                                        allowOverlaps=False)
            else:
                geSource = GenomeElementSource(binSpec,
                                               genome=genome,
                                               suffix=regSpec)

            if categoryFilterList is not None:
                from gtrackcore.input.wrappers.GECategoryFilter import GECategoryFilter
                geSource = GECategoryFilter(geSource,
                                            categoryFilterList,
                                            strict=strictMatch)
            return cls._applyEnvelope(geSource)
        else:
            if binSpec == '*':
                binSize = None
            else:
                binSize = parseShortenedSizeSpec(binSpec)

            from gtrackcore.input.userbins.AutoBinner import AutoBinner
            return AutoBinner(
                parseRegSpec(regSpec,
                             genome,
                             includeExtraChrs=includeExtraChrs), binSize)
Exemple #3
0
    @classmethod
    def extractOneTrackManyRegsToOneFile(cls, trackName, regionList, fn, fileFormatName=DEFAULT_FILE_FORMAT_NAME, \
                                         globalCoords=False, asOriginal=False, allowOverlaps=False):
        cls.extract(trackName, regionList, fn, fileFormatName=fileFormatName, globalCoords=globalCoords, \
                    addSuffix=False, asOriginal=asOriginal, allowOverlaps=allowOverlaps)

    @classmethod
    def extractManyToOneDir(cls, trackNameList, regionList, baseDir, fileFormatName=DEFAULT_FILE_FORMAT_NAME, \
                            globalCoords=False, asOriginal=False, allowOverlaps=False):
        for trackName in trackNameList:
            fn = baseDir + os.sep + '_'.join(trackName)
            cls.extractOneTrackManyRegsToOneFile(trackName, regionList, fn, fileFormatName=fileFormatName,
                                                 globalCoords=globalCoords, asOriginal=asOriginal, \
                                                 allowOverlaps=allowOverlaps)


if __name__ == "__main__":
    if len(sys.argv) not in [4, 5]:
        print 'Syntax: python TrackExtractor.py trackName:subtype genome:chr:start-end asOriginal [filename]'
        sys.exit(0)

    trackName = parseTrackNameSpec(sys.argv[1])
    regions = parseRegSpec(sys.argv[2])
    assert len(regions) == 1
    asOriginal = eval(sys.argv[3])

    fn = sys.argv[4] if len(sys.argv) == 5 else None

    TrackExtractor.extract(trackName, regions[0], fn, asOriginal=asOriginal)
        zipFile.close()

    @classmethod
    def extractOneTrackManyRegsToOneFile(cls, trackName, regionList, fn, fileFormatName=DEFAULT_FILE_FORMAT_NAME, \
                                         globalCoords=False, asOriginal=False, allowOverlaps=False):
        cls.extract(trackName, regionList, fn, fileFormatName=fileFormatName, globalCoords=globalCoords, \
                    addSuffix=False, asOriginal=asOriginal, allowOverlaps=allowOverlaps)
        
    @classmethod
    def extractManyToOneDir(cls, trackNameList, regionList, baseDir, fileFormatName=DEFAULT_FILE_FORMAT_NAME, \
                            globalCoords=False, asOriginal=False, allowOverlaps=False):
        for trackName in trackNameList:
            fn = baseDir + os.sep + '_'.join(trackName)
            cls.extractOneTrackManyRegsToOneFile(trackName, regionList, fn, fileFormatName=fileFormatName,
                                                 globalCoords=globalCoords, asOriginal=asOriginal, \
                                                 allowOverlaps=allowOverlaps)
                
if __name__ == "__main__":
    if len(sys.argv) not in [4, 5]:
        print 'Syntax: python TrackExtractor.py trackName:subtype genome:chr:start-end asOriginal [filename]'
        sys.exit(0)
        
    trackName = parseTrackNameSpec(sys.argv[1])
    regions = parseRegSpec(sys.argv[2])
    assert len(regions) == 1
    asOriginal = eval(sys.argv[3])
        
    fn = sys.argv[4] if len(sys.argv) == 5 else None
    
    TrackExtractor.extract(trackName, regions[0], fn, asOriginal=asOriginal)