def run(self): liftovertempbed = "%s.bed" % os.path.join(self.genomeoutdir, self.othergenome) cmd = "halLiftover %s %s %s %s %s" % (self.halfile, self.genome, self.bed, self.othergenome, liftovertempbed) if len(self.extrafields) > 0: cmd += " --keepExtra" else: cmd += " --outBedVersion %d" % self.numfield if self.tab: cmd += " --tab" system(cmd) #system("bedSort %s %s" %(liftovertempbed, liftovertempbed)) filterbed = "%s-filtered.bed" % os.path.join(self.genomeoutdir, self.othergenome) filterLongIntrons(liftovertempbed, filterbed, 100000, self.tab, self.options.ucscNames) # bedSort expects tab-separated beds, so we have to do some # format gymnastics here. if not self.tab: tabifyBed(filterbed) system("bedSort %s %s" % (filterbed, liftovertempbed)) if not self.tab: untabifyBed(liftovertempbed) outbigbed = os.path.join(self.genomeoutdir, "%s.bb" % self.othergenome) chrsizefile = os.path.join(self.outdir, self.othergenome, "chrom.sizes") if not self.asfile: cmd = "bedToBigBed -type=bed%d %s %s %s" % ( self.numfield, liftovertempbed, chrsizefile, outbigbed) if self.numfield >= 4: cmd += " -extraIndex=name" else: numextra = len(self.extrafields) if numextra > 0: type = "bed%d+%d" % (self.numfield - numextra, numextra) extraIndex = "name,%s" % ",".join(self.extrafields) else: type = "bed%d" % self.numfield extraIndex = "name" cmd = "bedToBigBed -as=%s -type=%s -extraIndex=%s %s %s %s" % ( self.asfile, type, extraIndex, liftovertempbed, chrsizefile, outbigbed) if self.tab: cmd += " -tab" system(cmd) #Cleanup: system("rm %s" % liftovertempbed) system("rm -f %s" % filterbed)
def run(self): liftovertempbed = "%s.bed" % os.path.join(self.genomeoutdir, self.othergenome) cmd = "halLiftover %s %s %s %s %s" %(self.halfile, self.genome, self.bed, self.othergenome, liftovertempbed) if len(self.extrafields) > 0: cmd += " --keepExtra" else: cmd += " --outBedVersion %d" %self.numfield if self.tab: cmd += " --tab" system(cmd) #system("bedSort %s %s" %(liftovertempbed, liftovertempbed)) filterbed = "%s-filtered.bed" %os.path.join(self.genomeoutdir, self.othergenome) filterLongIntrons(liftovertempbed, filterbed, 100000, self.tab, self.options.ucscNames) # bedSort expects tab-separated beds, so we have to do some # format gymnastics here. if not self.tab: tabifyBed(filterbed) system( "bedSort %s %s" % (filterbed, liftovertempbed) ) if not self.tab: untabifyBed(liftovertempbed) outbigbed = os.path.join(self.genomeoutdir, "%s.bb" %self.othergenome) chrsizefile = os.path.join(self.outdir, self.othergenome, "chrom.sizes") if not self.asfile: cmd = "bedToBigBed -type=bed%d %s %s %s" %(self.numfield, liftovertempbed, chrsizefile, outbigbed) if self.numfield >= 4: cmd += " -extraIndex=name" else: numextra = len(self.extrafields) if numextra > 0: type="bed%d+%d" %(self.numfield - numextra, numextra) extraIndex = "name,%s" % ",".join(self.extrafields) else: type="bed%d" %self.numfield extraIndex = "name" cmd = "bedToBigBed -as=%s -type=%s -extraIndex=%s %s %s %s" %(self.asfile, type, extraIndex, liftovertempbed, chrsizefile, outbigbed) if self.tab: cmd += " -tab" system(cmd) #Cleanup: system("rm %s" % liftovertempbed) system("rm -f %s" % filterbed)
def run(self): #beddir has the hierachy: indir/genome/chr1.bed, chr2.bed... #for each genome in beddir, lifeover the bed records of that genome to the coordinate of all other genomes #liftover bed file of each genome with available beds to all genomes genomes = self.genome2seq2len.keys() tempbeds = [] for genome in os.listdir(self.indir): if genome not in genomes: continue genomeindir = os.path.join(self.indir, genome) assert os.path.isdir(genomeindir) #Create bed directory for current genome genomeoutdir = os.path.join(self.bigbeddir, genome) system("mkdir -p %s" % genomeoutdir) #get all the bed files (".bed" ext) and as files if available (".as" ext) bedfiles, asfile, extrafields, numfield = readBedDir( genomeindir, self.tab) if numfield < 3: # This is an empty (probably from an automated # process) or otherwise malformed bed. Whine to the # user and then attempt to go as far as possible # anyway. self.logToMaster("WARNING: input bed files in %s have less " "than 3 fields, or are completely empty. " "Proceeding anyway." % genomeindir) numfield = 3 #Copy as file to bigbed dir: if asfile: system("cp %s %s" % (asfile, os.path.join(genomeoutdir, "%s.as" % genome))) elif numfield > 12: #does not have .as file, and have more than 12 fields, just treat as 12 fields numfield = 12 #Concatenate all the input bed files and convert it into bigbed to outdir/genome/genome.bb tempbed = "%s-temp.bed" % os.path.join(genomeoutdir, genome) system("cat %s/*bed | cut -f-%d > %s" % (genomeindir, numfield, tempbed)) #system( "bedSort %s %s" % (tempbed, tempbed) ) filterbed = "%s-temp-filtered.bed" % os.path.join( genomeoutdir, genome) filterLongIntrons(tempbed, filterbed, 100000, self.tab, self.options.ucscNames) # bedSort expects tab-separated beds, so we have to do some # format gymnastics here. if not self.tab: tabifyBed(filterbed) system("bedSort %s %s" % (filterbed, tempbed)) if not self.tab: untabifyBed(tempbed) outbigbed = os.path.join(genomeoutdir, "%s.bb" % genome) chrsizefile = os.path.join(self.outdir, genome, "chrom.sizes") if not asfile: # Index on the 'name' field if the bed has one indexParameter = "-extraIndex=name" if numfield >= 4 else "" cmd = "bedToBigBed -type=bed%d %s %s %s %s" % ( numfield, indexParameter, tempbed, chrsizefile, outbigbed) if self.tab: cmd = "bedToBigBed -tab -type=bed%d %s %s %s %s" % ( numfield, indexParameter, tempbed, chrsizefile, outbigbed) system(cmd) else: assert numfield >= 4 # -extraIndex=name will fail if this is not true. numextra = len(extrafields) if numextra > 0: type = "bed%d+%d" % (numfield - numextra, numextra) extraIndex = "name,%s" % ",".join(extrafields) else: type = "bed%d" % numfield extraIndex = "name" cmd = "bedToBigBed -as=%s -type=%s -extraIndex=%s %s %s %s" % ( asfile, type, extraIndex, tempbed, chrsizefile, outbigbed) if self.tab: cmd = "bedToBigBed -tab -as=%s -type=%s -extraIndex=%s %s %s %s" % ( asfile, type, extraIndex, tempbed, chrsizefile, outbigbed) system(cmd) #Liftover to all other genomes: if not self.noLiftover: for othergenome in genomes: if othergenome == genome: continue self.addChildTarget( LiftoverBed(genomeoutdir, tempbed, self.tab, asfile, extrafields, numfield, genome, othergenome, self.halfile, self.outdir, self.options)) tempbeds.append(tempbed) tempbeds.append(filterbed) self.setFollowOnTarget(CleanupFiles(tempbeds))
def run(self): #beddir has the hierachy: indir/genome/chr1.bed, chr2.bed... #for each genome in beddir, lifeover the bed records of that genome to the coordinate of all other genomes #liftover bed file of each genome with available beds to all genomes genomes = self.genome2seq2len.keys() tempbeds = [] for genome in os.listdir(self.indir): if genome not in genomes: continue genomeindir = os.path.join(self.indir, genome) assert os.path.isdir(genomeindir) #Create bed directory for current genome genomeoutdir = os.path.join(self.bigbeddir, genome) system("mkdir -p %s" %genomeoutdir) #get all the bed files (".bed" ext) and as files if available (".as" ext) bedfiles, asfile, extrafields, numfield = readBedDir(genomeindir, self.tab) if numfield < 3: # This is an empty (probably from an automated # process) or otherwise malformed bed. Whine to the # user and then attempt to go as far as possible # anyway. self.logToMaster("WARNING: input bed files in %s have less " "than 3 fields, or are completely empty. " "Proceeding anyway." % genomeindir) numfield = 3 #Copy as file to bigbed dir: if asfile: system("cp %s %s" %(asfile, os.path.join(genomeoutdir, "%s.as" %genome))) elif numfield > 12: #does not have .as file, and have more than 12 fields, just treat as 12 fields numfield = 12 #Concatenate all the input bed files and convert it into bigbed to outdir/genome/genome.bb tempbed = "%s-temp.bed" % os.path.join(genomeoutdir, genome) system( "cat %s/*bed | cut -f-%d > %s" %(genomeindir, numfield, tempbed) ) #system( "bedSort %s %s" % (tempbed, tempbed) ) filterbed = "%s-temp-filtered.bed" %os.path.join(genomeoutdir, genome) filterLongIntrons(tempbed, filterbed, 100000, self.tab, self.options.ucscNames) # bedSort expects tab-separated beds, so we have to do some # format gymnastics here. if not self.tab: tabifyBed(filterbed) system( "bedSort %s %s" % (filterbed, tempbed) ) if not self.tab: untabifyBed(tempbed) outbigbed = os.path.join(genomeoutdir, "%s.bb" %genome) chrsizefile = os.path.join(self.outdir, genome, "chrom.sizes") if not asfile: # Index on the 'name' field if the bed has one indexParameter = "-extraIndex=name" if numfield >= 4 else "" cmd = "bedToBigBed -type=bed%d %s %s %s %s" %(numfield, indexParameter, tempbed, chrsizefile, outbigbed) if self.tab: cmd = "bedToBigBed -tab -type=bed%d %s %s %s %s" %(numfield, indexParameter, tempbed, chrsizefile, outbigbed) system( cmd ) else: assert numfield >= 4 # -extraIndex=name will fail if this is not true. numextra = len(extrafields) if numextra > 0: type="bed%d+%d" %(numfield - numextra, numextra) extraIndex = "name,%s" % ",".join(extrafields) else: type="bed%d" %numfield extraIndex = "name" cmd = "bedToBigBed -as=%s -type=%s -extraIndex=%s %s %s %s" %(asfile, type, extraIndex, tempbed, chrsizefile, outbigbed) if self.tab: cmd = "bedToBigBed -tab -as=%s -type=%s -extraIndex=%s %s %s %s" %(asfile, type, extraIndex, tempbed, chrsizefile, outbigbed) system( cmd ) #Liftover to all other genomes: if not self.noLiftover: for othergenome in genomes: if othergenome == genome: continue self.addChildTarget( LiftoverBed(genomeoutdir, tempbed, self.tab, asfile, extrafields, numfield, genome, othergenome, self.halfile, self.outdir, self.options) ) tempbeds.append( tempbed ) tempbeds.append( filterbed ) self.setFollowOnTarget( CleanupFiles(tempbeds) )