def __new__(cls, genome): from gtrackcore.track.core.GenomeRegion import GenomeRegion from gtrackcore.metadata.GenomeInfo import GenomeInfo chrList = GenomeInfo.getChrList(genome) if len(chrList) > 0: return [ GenomeRegion(genome, GenomeInfo.getChrList(genome)[0], 0, 1) ]
def parseRegSpec(regSpec, genome = None, includeExtraChrs = False): from gtrackcore.track.core.GenomeRegion import GenomeRegion from gtrackcore.metadata.GenomeInfo import GenomeInfo class SimpleUserBinSource(list): pass regions = [] allRegSpecs = regSpec.strip().split(',') for curRegSpec in allRegSpecs: regParts = curRegSpec.strip().split(':') if genome == None: genome = regParts[0] #assert GenomeInfo(genome).isInstalled(), "Specified genome is not installed: %s" % genome if not (regParts[0]=='*' or regParts[0] in GenomeInfo.getExtendedChrList(genome)): #if (regParts[0]=='*' or regParts[0].startswith('chr')): # if genome == None: # genome = DEFAULT_GENOME #else: # assert genome is None or genome == regParts[0], \ assert regParts[0] == genome, \ "Region specification does not start with one of '*' or correct chromosome or genome name. Region specification: %s. Genome: %s" % (curRegSpec, genome) #genome = regParts[0] regParts = regParts[1:] if regParts[0] == '*': assert len(regParts) == 1, \ "Region specification starts with '*' but continues with ':'. Region specification: %s" % curRegSpec assert len(allRegSpecs) == 1, \ "Region specification is '*', but is in a list with other region specifications: %s" % regSpec chrList = GenomeInfo.getExtendedChrList(genome) if includeExtraChrs else GenomeInfo.getChrList(genome) for chr in chrList: regions.append(GenomeRegion(genome, chr, 0, GenomeInfo.getChrLen(genome, chr))) else: #assert(regParts[0].startswith('chr')), \ assert regParts[0] in GenomeInfo.getExtendedChrList(genome), \ "Region specification does not start with chromosome specification. Region specification: %s " % curRegSpec chr = regParts[0] try: chrLen = GenomeInfo.getChrLen(genome, chr) except Exception, e: raise InvalidFormatError("Chromosome '%s' does not exist for genome '%s'" % (chr, genome)) if len(regParts)>1: posParts = regParts[1] assert '-' in posParts, \ "Position specification does not include character '-'. Region specification: %s " % curRegSpec rawStart, rawEnd = posParts.split('-') start = int(rawStart.replace('k','001').replace('m','000001')) end = int(rawEnd.replace('k','000').replace('m','000000')) if rawEnd != '' else chrLen assert start >= 1, \ "Start position is not positive. Region specification: %s " % curRegSpec assert end >= start, \ "End position is not larger than start position. Region specification: %s " % curRegSpec assert end <= chrLen, \ "End position is larger than chromosome size. Genome: %s. Chromosome size: %s. Region specification: %s" % (genome, chrLen, curRegSpec) #-1 for conversion from 1-indexing to 0-indexing end-exclusive start-=1 else: start,end = 0, chrLen regions.append( GenomeRegion(genome, chr, start, end) )
def parseRegSpec(regSpec, genome=None, includeExtraChrs=False): from gtrackcore.track.core.GenomeRegion import GenomeRegion from gtrackcore.metadata.GenomeInfo import GenomeInfo class SimpleUserBinSource(list): pass regions = [] allRegSpecs = regSpec.strip().split(',') for curRegSpec in allRegSpecs: regParts = curRegSpec.strip().split(':') if genome == None: genome = regParts[0] #assert GenomeInfo(genome).isInstalled(), "Specified genome is not installed: %s" % genome if not (regParts[0] == '*' or regParts[0] in GenomeInfo.getExtendedChrList(genome)): #if (regParts[0]=='*' or regParts[0].startswith('chr')): # if genome == None: # genome = DEFAULT_GENOME #else: # assert genome is None or genome == regParts[0], \ assert regParts[0] == genome, \ "Region specification does not start with one of '*' or correct chromosome or genome name. Region specification: %s. Genome: %s" % (curRegSpec, genome) #genome = regParts[0] regParts = regParts[1:] if regParts[0] == '*': assert len(regParts) == 1, \ "Region specification starts with '*' but continues with ':'. Region specification: %s" % curRegSpec assert len(allRegSpecs) == 1, \ "Region specification is '*', but is in a list with other region specifications: %s" % regSpec chrList = GenomeInfo.getExtendedChrList( genome) if includeExtraChrs else GenomeInfo.getChrList(genome) for chr in chrList: regions.append( GenomeRegion(genome, chr, 0, GenomeInfo.getChrLen(genome, chr))) else: #assert(regParts[0].startswith('chr')), \ assert regParts[0] in GenomeInfo.getExtendedChrList(genome), \ "Region specification does not start with chromosome specification. Region specification: %s " % curRegSpec chr = regParts[0] try: chrLen = GenomeInfo.getChrLen(genome, chr) except Exception, e: raise InvalidFormatError( "Chromosome '%s' does not exist for genome '%s'" % (chr, genome)) if len(regParts) > 1: posParts = regParts[1] assert '-' in posParts, \ "Position specification does not include character '-'. Region specification: %s " % curRegSpec rawStart, rawEnd = posParts.split('-') start = int( rawStart.replace('k', '001').replace('m', '000001')) end = int(rawEnd.replace('k', '000').replace( 'm', '000000')) if rawEnd != '' else chrLen assert start >= 1, \ "Start position is not positive. Region specification: %s " % curRegSpec assert end >= start, \ "End position is not larger than start position. Region specification: %s " % curRegSpec assert end <= chrLen, \ "End position is larger than chromosome size. Genome: %s. Chromosome size: %s. Region specification: %s" % (genome, chrLen, curRegSpec) #-1 for conversion from 1-indexing to 0-indexing end-exclusive start -= 1 else: start, end = 0, chrLen regions.append(GenomeRegion(genome, chr, start, end))
def __new__(cls, genome): from gtrackcore.track.core.GenomeRegion import GenomeRegion from gtrackcore.metadata.GenomeInfo import GenomeInfo chrList = GenomeInfo.getChrList(genome) if len(chrList) > 0: return [GenomeRegion(genome, GenomeInfo.getChrList(genome)[0], 0, 1)]