Esempio n. 1
0
    def run(self):
        liftovertempbed = "%s.bed" % os.path.join(self.genomeoutdir,
                                                  self.othergenome)
        cmd = "halLiftover %s %s %s %s %s" % (self.halfile, self.genome,
                                              self.bed, self.othergenome,
                                              liftovertempbed)
        if len(self.extrafields) > 0:
            cmd += " --keepExtra"
        else:
            cmd += " --outBedVersion %d" % self.numfield
        if self.tab:
            cmd += " --tab"
        system(cmd)
        #system("bedSort %s %s" %(liftovertempbed, liftovertempbed))

        filterbed = "%s-filtered.bed" % os.path.join(self.genomeoutdir,
                                                     self.othergenome)
        filterLongIntrons(liftovertempbed, filterbed, 100000, self.tab,
                          self.options.ucscNames)
        # bedSort expects tab-separated beds, so we have to do some
        # format gymnastics here.
        if not self.tab:
            tabifyBed(filterbed)
        system("bedSort %s %s" % (filterbed, liftovertempbed))
        if not self.tab:
            untabifyBed(liftovertempbed)

        outbigbed = os.path.join(self.genomeoutdir, "%s.bb" % self.othergenome)
        chrsizefile = os.path.join(self.outdir, self.othergenome,
                                   "chrom.sizes")
        if not self.asfile:
            cmd = "bedToBigBed -type=bed%d %s %s %s" % (
                self.numfield, liftovertempbed, chrsizefile, outbigbed)
            if self.numfield >= 4:
                cmd += " -extraIndex=name"
        else:
            numextra = len(self.extrafields)
            if numextra > 0:
                type = "bed%d+%d" % (self.numfield - numextra, numextra)
                extraIndex = "name,%s" % ",".join(self.extrafields)
            else:
                type = "bed%d" % self.numfield
                extraIndex = "name"
            cmd = "bedToBigBed -as=%s -type=%s -extraIndex=%s %s %s %s" % (
                self.asfile, type, extraIndex, liftovertempbed, chrsizefile,
                outbigbed)
        if self.tab:
            cmd += " -tab"
        system(cmd)

        #Cleanup:
        system("rm %s" % liftovertempbed)
        system("rm -f %s" % filterbed)
Esempio n. 2
0
    def run(self):
        liftovertempbed = "%s.bed" % os.path.join(self.genomeoutdir, self.othergenome)
        cmd = "halLiftover %s %s %s %s %s" %(self.halfile, self.genome, self.bed, self.othergenome, liftovertempbed)
        if len(self.extrafields) > 0:
            cmd += " --keepExtra"
        else:
            cmd += " --outBedVersion %d" %self.numfield
        if self.tab:
            cmd += " --tab"
        system(cmd) 
        #system("bedSort %s %s" %(liftovertempbed, liftovertempbed))

        filterbed = "%s-filtered.bed" %os.path.join(self.genomeoutdir, self.othergenome)
        filterLongIntrons(liftovertempbed, filterbed, 100000, self.tab, self.options.ucscNames)
        # bedSort expects tab-separated beds, so we have to do some
        # format gymnastics here.
        if not self.tab:
            tabifyBed(filterbed)
        system( "bedSort %s %s" % (filterbed, liftovertempbed) )
        if not self.tab:
            untabifyBed(liftovertempbed)

        outbigbed = os.path.join(self.genomeoutdir, "%s.bb" %self.othergenome)
        chrsizefile = os.path.join(self.outdir, self.othergenome, "chrom.sizes")
        if not self.asfile:
            cmd = "bedToBigBed -type=bed%d %s %s %s" %(self.numfield, liftovertempbed, chrsizefile, outbigbed)
            if self.numfield >= 4:
                cmd += " -extraIndex=name"
        else:
            numextra = len(self.extrafields)
            if numextra > 0:
                type="bed%d+%d" %(self.numfield - numextra, numextra)
                extraIndex = "name,%s" % ",".join(self.extrafields)
            else:
                type="bed%d" %self.numfield
                extraIndex = "name"
            cmd = "bedToBigBed -as=%s -type=%s -extraIndex=%s %s %s %s" %(self.asfile, type, extraIndex, liftovertempbed, chrsizefile, outbigbed)
        if self.tab:
            cmd += " -tab"
        system(cmd)

        #Cleanup:
        system("rm %s" % liftovertempbed)
        system("rm -f %s" % filterbed)
Esempio n. 3
0
    def run(self):
        #beddir has the hierachy: indir/genome/chr1.bed, chr2.bed...
        #for each genome in beddir, lifeover the bed records of that genome to the coordinate of all other genomes

        #liftover bed file of each genome with available beds to all genomes
        genomes = self.genome2seq2len.keys()
        tempbeds = []

        for genome in os.listdir(self.indir):
            if genome not in genomes:
                continue
            genomeindir = os.path.join(self.indir, genome)
            assert os.path.isdir(genomeindir)

            #Create bed directory for current genome
            genomeoutdir = os.path.join(self.bigbeddir, genome)
            system("mkdir -p %s" % genomeoutdir)

            #get all the bed files (".bed" ext) and as files if available (".as" ext)
            bedfiles, asfile, extrafields, numfield = readBedDir(
                genomeindir, self.tab)
            if numfield < 3:
                # This is an empty (probably from an automated
                # process) or otherwise malformed bed. Whine to the
                # user and then attempt to go as far as possible
                # anyway.
                self.logToMaster("WARNING: input bed files in %s have less "
                                 "than 3 fields, or are completely empty. "
                                 "Proceeding anyway." % genomeindir)
                numfield = 3

            #Copy as file to bigbed dir:
            if asfile:
                system("cp %s %s" %
                       (asfile, os.path.join(genomeoutdir, "%s.as" % genome)))
            elif numfield > 12:  #does not have .as file, and have more than 12 fields, just treat as 12 fields
                numfield = 12

            #Concatenate all the input bed files and convert it into bigbed to outdir/genome/genome.bb
            tempbed = "%s-temp.bed" % os.path.join(genomeoutdir, genome)
            system("cat %s/*bed | cut -f-%d > %s" %
                   (genomeindir, numfield, tempbed))
            #system( "bedSort %s %s" % (tempbed, tempbed) )
            filterbed = "%s-temp-filtered.bed" % os.path.join(
                genomeoutdir, genome)
            filterLongIntrons(tempbed, filterbed, 100000, self.tab,
                              self.options.ucscNames)
            # bedSort expects tab-separated beds, so we have to do some
            # format gymnastics here.
            if not self.tab:
                tabifyBed(filterbed)
            system("bedSort %s %s" % (filterbed, tempbed))
            if not self.tab:
                untabifyBed(tempbed)

            outbigbed = os.path.join(genomeoutdir, "%s.bb" % genome)
            chrsizefile = os.path.join(self.outdir, genome, "chrom.sizes")
            if not asfile:
                # Index on the 'name' field if the bed has one
                indexParameter = "-extraIndex=name" if numfield >= 4 else ""
                cmd = "bedToBigBed -type=bed%d %s %s %s %s" % (
                    numfield, indexParameter, tempbed, chrsizefile, outbigbed)
                if self.tab:
                    cmd = "bedToBigBed -tab -type=bed%d %s %s %s %s" % (
                        numfield, indexParameter, tempbed, chrsizefile,
                        outbigbed)
                system(cmd)
            else:
                assert numfield >= 4  # -extraIndex=name will fail if this is not true.
                numextra = len(extrafields)
                if numextra > 0:
                    type = "bed%d+%d" % (numfield - numextra, numextra)
                    extraIndex = "name,%s" % ",".join(extrafields)
                else:
                    type = "bed%d" % numfield
                    extraIndex = "name"
                cmd = "bedToBigBed -as=%s -type=%s -extraIndex=%s %s %s %s" % (
                    asfile, type, extraIndex, tempbed, chrsizefile, outbigbed)
                if self.tab:
                    cmd = "bedToBigBed -tab -as=%s -type=%s -extraIndex=%s %s %s %s" % (
                        asfile, type, extraIndex, tempbed, chrsizefile,
                        outbigbed)
                system(cmd)

            #Liftover to all other genomes:
            if not self.noLiftover:
                for othergenome in genomes:
                    if othergenome == genome:
                        continue
                    self.addChildTarget(
                        LiftoverBed(genomeoutdir, tempbed, self.tab, asfile,
                                    extrafields, numfield, genome, othergenome,
                                    self.halfile, self.outdir, self.options))
            tempbeds.append(tempbed)
            tempbeds.append(filterbed)
        self.setFollowOnTarget(CleanupFiles(tempbeds))
Esempio n. 4
0
    def run(self):
        #beddir has the hierachy: indir/genome/chr1.bed, chr2.bed...
        #for each genome in beddir, lifeover the bed records of that genome to the coordinate of all other genomes
         
        #liftover bed file of each genome with available beds to all genomes
        genomes = self.genome2seq2len.keys()
        tempbeds = []
        
        for genome in os.listdir(self.indir):
            if genome not in genomes:
                continue
            genomeindir = os.path.join(self.indir, genome)
            assert os.path.isdir(genomeindir)

            #Create bed directory for current genome
            genomeoutdir = os.path.join(self.bigbeddir, genome)
            system("mkdir -p %s" %genomeoutdir)
        
            #get all the bed files (".bed" ext) and as files if available (".as" ext) 
            bedfiles, asfile, extrafields, numfield = readBedDir(genomeindir, self.tab)
            if numfield < 3:
                # This is an empty (probably from an automated
                # process) or otherwise malformed bed. Whine to the
                # user and then attempt to go as far as possible
                # anyway.
                self.logToMaster("WARNING: input bed files in %s have less "
                                 "than 3 fields, or are completely empty. "
                                 "Proceeding anyway." % genomeindir)
                numfield = 3

            #Copy as file to bigbed dir:
            if asfile:
                system("cp %s %s" %(asfile, os.path.join(genomeoutdir, "%s.as" %genome)))
            elif numfield > 12: #does not have .as file, and have more than 12 fields, just treat as 12 fields
                numfield = 12

            #Concatenate all the input bed files and convert it into bigbed to outdir/genome/genome.bb
            tempbed = "%s-temp.bed" % os.path.join(genomeoutdir, genome)
            system( "cat %s/*bed | cut -f-%d > %s" %(genomeindir, numfield, tempbed) )
            #system( "bedSort %s %s" % (tempbed, tempbed) )
            filterbed = "%s-temp-filtered.bed" %os.path.join(genomeoutdir, genome)
            filterLongIntrons(tempbed, filterbed, 100000, self.tab,
                              self.options.ucscNames)
            # bedSort expects tab-separated beds, so we have to do some
            # format gymnastics here.
            if not self.tab:
                tabifyBed(filterbed)
            system( "bedSort %s %s" % (filterbed, tempbed) )
            if not self.tab:
                untabifyBed(tempbed)

            outbigbed = os.path.join(genomeoutdir, "%s.bb" %genome) 
            chrsizefile = os.path.join(self.outdir, genome, "chrom.sizes")
            if not asfile:
                # Index on the 'name' field if the bed has one
                indexParameter = "-extraIndex=name" if numfield >= 4 else ""
                cmd = "bedToBigBed -type=bed%d %s %s %s %s" %(numfield, indexParameter, tempbed, chrsizefile, outbigbed)
                if self.tab:
                    cmd = "bedToBigBed -tab -type=bed%d %s %s %s %s" %(numfield, indexParameter, tempbed, chrsizefile, outbigbed)
                system( cmd )
            else:
                assert numfield >= 4 # -extraIndex=name will fail if this is not true.
                numextra = len(extrafields)
                if numextra > 0:
                    type="bed%d+%d" %(numfield - numextra, numextra)
                    extraIndex = "name,%s" % ",".join(extrafields)
                else:
                    type="bed%d" %numfield
                    extraIndex = "name"
                cmd = "bedToBigBed -as=%s -type=%s -extraIndex=%s %s %s %s" %(asfile, type, extraIndex, tempbed, chrsizefile, outbigbed)
                if self.tab:
                    cmd = "bedToBigBed -tab -as=%s -type=%s -extraIndex=%s %s %s %s" %(asfile, type, extraIndex, tempbed, chrsizefile, outbigbed)
                system( cmd )

            #Liftover to all other genomes:
            if not self.noLiftover:
                for othergenome in genomes:
                    if othergenome == genome:
                        continue
                    self.addChildTarget( LiftoverBed(genomeoutdir, tempbed, self.tab, asfile, extrafields, numfield, genome, othergenome, self.halfile, self.outdir, self.options) )
            tempbeds.append( tempbed )
            tempbeds.append( filterbed )
        self.setFollowOnTarget( CleanupFiles(tempbeds) )