예제 #1
0
 def __new__(cls, regSpec, binSpec, genome=None, categoryFilterList=None, strictMatch=True, includeExtraChrs = False): #,fileType):
     if regSpec in ['file', 'track'] + getSupportedFileSuffixesForBinning():
         #if fileType != 'bed':
         #    raise NotImplementedError
         
         assert genome is not None
         
         from gtrackcore_memmap.input.core.GenomeElementSource import GenomeElementSource
         if regSpec == 'file':
             geSource = GenomeElementSource(binSpec, genome=genome)
         elif regSpec == 'track':
             from gtrackcore_memmap.input.adapters.TrackGenomeElementSource import FullTrackGenomeElementSource
             trackName = convertTNstrToTNListFormat(binSpec)
             geSource = FullTrackGenomeElementSource(genome, trackName, allowOverlaps=False)
         else:
             geSource = GenomeElementSource(binSpec, genome=genome, suffix=regSpec)
         
         if categoryFilterList is not None:
             from gtrackcore_memmap.input.wrappers.GECategoryFilter import GECategoryFilter
             geSource = GECategoryFilter(geSource, categoryFilterList, strict=strictMatch)
         return cls._applyEnvelope(geSource)
     else:
         if binSpec == '*':
             binSize = None
         else:
             binSize = parseShortenedSizeSpec(binSpec)
         
         from gtrackcore_memmap.input.userbins.AutoBinner import AutoBinner
         return AutoBinner(parseRegSpec(regSpec, genome, includeExtraChrs=includeExtraChrs), binSize)
예제 #2
0
        zipFile.close()

    @classmethod
    def extractOneTrackManyRegsToOneFile(cls, trackName, regionList, fn, fileFormatName=DEFAULT_FILE_FORMAT_NAME, \
                                         globalCoords=False, asOriginal=False, allowOverlaps=False):
        cls.extract(trackName, regionList, fn, fileFormatName=fileFormatName, globalCoords=globalCoords, \
                    addSuffix=False, asOriginal=asOriginal, allowOverlaps=allowOverlaps)
        
    @classmethod
    def extractManyToOneDir(cls, trackNameList, regionList, baseDir, fileFormatName=DEFAULT_FILE_FORMAT_NAME, \
                            globalCoords=False, asOriginal=False, allowOverlaps=False):
        for trackName in trackNameList:
            fn = baseDir + os.sep + '_'.join(trackName)
            cls.extractOneTrackManyRegsToOneFile(trackName, regionList, fn, fileFormatName=fileFormatName,
                                                 globalCoords=globalCoords, asOriginal=asOriginal, \
                                                 allowOverlaps=allowOverlaps)
                
if __name__ == "__main__":
    if len(sys.argv) not in [4, 5]:
        print 'Syntax: python TrackExtractor.py trackName:subtype genome:chr:start-end asOriginal [filename]'
        sys.exit(0)
        
    trackName = parseTrackNameSpec(sys.argv[1])
    regions = parseRegSpec(sys.argv[2])
    assert len(regions) == 1
    asOriginal = eval(sys.argv[3])
        
    fn = sys.argv[4] if len(sys.argv) == 5 else None
    
    TrackExtractor.extract(trackName, regions[0], fn, asOriginal=asOriginal)