Example #1
0
def union_exons(refbed):
	'''
	take the union of all exons defined in refbed file and build bitset
	'''
	from qcmodule import BED
	tmp = BED.ParseBED(refbed)
	all_exons = tmp.getExon()
	unioned_exons = BED.unionBed3(all_exons)
	exon_ranges = build_bitsets(unioned_exons)
	return exon_ranges
Example #2
0
def union_exons(refbed):
    '''
	take the union of all exons defined in refbed file and build bitset
	'''
    from qcmodule import BED
    tmp = BED.ParseBED(refbed)
    all_exons = tmp.getExon()
    unioned_exons = BED.unionBed3(all_exons)
    exon_ranges = build_bitsets(unioned_exons)
    return exon_ranges
def process_gene_model(gene_model):
	print >>sys.stderr, "processing " + gene_model + ' ...',
	obj = BED.ParseBED(gene_model)
	utr_3 = obj.getUTR(utr=3)
	utr_5 = obj.getUTR(utr=5)
	cds_exon = obj.getCDSExon()
	intron = obj.getIntron()
	
	intron = BED.unionBed3(intron)
	cds_exon=BED.unionBed3(cds_exon)
	utr_5 = BED.unionBed3(utr_5)
	utr_3 = BED.unionBed3(utr_3)
	
	utr_5 = BED.subtractBed3(utr_5,cds_exon)
	utr_3 = BED.subtractBed3(utr_3,cds_exon)
	intron = BED.subtractBed3(intron,cds_exon)
	intron = BED.subtractBed3(intron,utr_5)
	intron = BED.subtractBed3(intron,utr_3)
	
	intergenic_up_1kb = obj.getIntergenic(direction="up",size=1000)
	intergenic_down_1kb = obj.getIntergenic(direction="down",size=1000)
	intergenic_up_5kb = obj.getIntergenic(direction="up",size=5000)
	intergenic_down_5kb = obj.getIntergenic(direction="down",size=5000)	
	intergenic_up_10kb = obj.getIntergenic(direction="up",size=10000)
	intergenic_down_10kb = obj.getIntergenic(direction="down",size=10000)
	
	#merge integenic region
	intergenic_up_1kb=BED.unionBed3(intergenic_up_1kb)
	intergenic_up_5kb=BED.unionBed3(intergenic_up_5kb)
	intergenic_up_10kb=BED.unionBed3(intergenic_up_10kb)
	intergenic_down_1kb=BED.unionBed3(intergenic_down_1kb)
	intergenic_down_5kb=BED.unionBed3(intergenic_down_5kb)
	intergenic_down_10kb=BED.unionBed3(intergenic_down_10kb)	
	
	#purify intergenic region
	intergenic_up_1kb=BED.subtractBed3(intergenic_up_1kb,cds_exon)
	intergenic_up_1kb=BED.subtractBed3(intergenic_up_1kb,utr_5)
	intergenic_up_1kb=BED.subtractBed3(intergenic_up_1kb,utr_3)
	intergenic_up_1kb=BED.subtractBed3(intergenic_up_1kb,intron)
	intergenic_down_1kb=BED.subtractBed3(intergenic_down_1kb,cds_exon)
	intergenic_down_1kb=BED.subtractBed3(intergenic_down_1kb,utr_5)
	intergenic_down_1kb=BED.subtractBed3(intergenic_down_1kb,utr_3)
	intergenic_down_1kb=BED.subtractBed3(intergenic_down_1kb,intron)	

	#purify intergenic region
	intergenic_up_5kb=BED.subtractBed3(intergenic_up_5kb,cds_exon)
	intergenic_up_5kb=BED.subtractBed3(intergenic_up_5kb,utr_5)
	intergenic_up_5kb=BED.subtractBed3(intergenic_up_5kb,utr_3)
	intergenic_up_5kb=BED.subtractBed3(intergenic_up_5kb,intron)
	intergenic_down_5kb=BED.subtractBed3(intergenic_down_5kb,cds_exon)
	intergenic_down_5kb=BED.subtractBed3(intergenic_down_5kb,utr_5)
	intergenic_down_5kb=BED.subtractBed3(intergenic_down_5kb,utr_3)
	intergenic_down_5kb=BED.subtractBed3(intergenic_down_5kb,intron)	
	
	#purify intergenic region
	intergenic_up_10kb=BED.subtractBed3(intergenic_up_10kb,cds_exon)
	intergenic_up_10kb=BED.subtractBed3(intergenic_up_10kb,utr_5)
	intergenic_up_10kb=BED.subtractBed3(intergenic_up_10kb,utr_3)
	intergenic_up_10kb=BED.subtractBed3(intergenic_up_10kb,intron)
	intergenic_down_10kb=BED.subtractBed3(intergenic_down_10kb,cds_exon)
	intergenic_down_10kb=BED.subtractBed3(intergenic_down_10kb,utr_5)
	intergenic_down_10kb=BED.subtractBed3(intergenic_down_10kb,utr_3)
	intergenic_down_10kb=BED.subtractBed3(intergenic_down_10kb,intron)	
	
	#build intervalTree 
	cds_exon_ranges = build_bitsets(cds_exon)
	utr_5_ranges = build_bitsets(utr_5)
	utr_3_ranges = build_bitsets(utr_3)
	intron_ranges = build_bitsets(intron)
	interg_ranges_up_1kb_ranges = build_bitsets(intergenic_up_1kb)
	interg_ranges_up_5kb_ranges = build_bitsets(intergenic_up_5kb)
	interg_ranges_up_10kb_ranges = build_bitsets(intergenic_up_10kb)
	interg_ranges_down_1kb_ranges = build_bitsets(intergenic_down_1kb)
	interg_ranges_down_5kb_ranges = build_bitsets(intergenic_down_5kb)
	interg_ranges_down_10kb_ranges = build_bitsets(intergenic_down_10kb)
	
	exon_size = cal_size(cds_exon)
	intron_size = cal_size(intron)
	utr3_size = cal_size(utr_3)
	utr5_size = cal_size(utr_5)
	int_up1k_size = cal_size(intergenic_up_1kb)
	int_up5k_size = cal_size(intergenic_up_5kb)
	int_up10k_size = cal_size(intergenic_up_10kb)
	int_down1k_size = cal_size(intergenic_down_1kb)
	int_down5k_size = cal_size(intergenic_down_5kb)
	int_down10k_size = cal_size(intergenic_down_10kb)
	
	print >>sys.stderr, "Done"
	return (cds_exon_ranges,intron_ranges,utr_5_ranges,utr_3_ranges,\
			interg_ranges_up_1kb_ranges,interg_ranges_up_5kb_ranges,interg_ranges_up_10kb_ranges,\
			interg_ranges_down_1kb_ranges,interg_ranges_down_5kb_ranges,interg_ranges_down_10kb_ranges,\
			exon_size,intron_size,utr5_size,utr3_size,\
			int_up1k_size,int_up5k_size,int_up10k_size,\
			int_down1k_size,int_down5k_size,int_down10k_size)
Example #4
0
def process_gene_model(gene_model):
	print >>sys.stderr, "processing " + gene_model + ' ...',
	obj = BED.ParseBED(gene_model)
	utr_3 = obj.getUTR(utr=3)
	utr_5 = obj.getUTR(utr=5)
	cds_exon = obj.getCDSExon()
	intron = obj.getIntron()
	
	intron = BED.unionBed3(intron)
	cds_exon=BED.unionBed3(cds_exon)
	utr_5 = BED.unionBed3(utr_5)
	utr_3 = BED.unionBed3(utr_3)
	
	utr_5 = BED.subtractBed3(utr_5,cds_exon)
	utr_3 = BED.subtractBed3(utr_3,cds_exon)
	intron = BED.subtractBed3(intron,cds_exon)
	intron = BED.subtractBed3(intron,utr_5)
	intron = BED.subtractBed3(intron,utr_3)
	
	intergenic_up_1kb = obj.getIntergenic(direction="up",size=1000)
	intergenic_down_1kb = obj.getIntergenic(direction="down",size=1000)
	intergenic_up_5kb = obj.getIntergenic(direction="up",size=5000)
	intergenic_down_5kb = obj.getIntergenic(direction="down",size=5000)	
	intergenic_up_10kb = obj.getIntergenic(direction="up",size=10000)
	intergenic_down_10kb = obj.getIntergenic(direction="down",size=10000)
	
	#merge integenic region
	intergenic_up_1kb=BED.unionBed3(intergenic_up_1kb)
	intergenic_up_5kb=BED.unionBed3(intergenic_up_5kb)
	intergenic_up_10kb=BED.unionBed3(intergenic_up_10kb)
	intergenic_down_1kb=BED.unionBed3(intergenic_down_1kb)
	intergenic_down_5kb=BED.unionBed3(intergenic_down_5kb)
	intergenic_down_10kb=BED.unionBed3(intergenic_down_10kb)	
	
	#purify intergenic region
	intergenic_up_1kb=BED.subtractBed3(intergenic_up_1kb,cds_exon)
	intergenic_up_1kb=BED.subtractBed3(intergenic_up_1kb,utr_5)
	intergenic_up_1kb=BED.subtractBed3(intergenic_up_1kb,utr_3)
	intergenic_up_1kb=BED.subtractBed3(intergenic_up_1kb,intron)
	intergenic_down_1kb=BED.subtractBed3(intergenic_down_1kb,cds_exon)
	intergenic_down_1kb=BED.subtractBed3(intergenic_down_1kb,utr_5)
	intergenic_down_1kb=BED.subtractBed3(intergenic_down_1kb,utr_3)
	intergenic_down_1kb=BED.subtractBed3(intergenic_down_1kb,intron)	

	#purify intergenic region
	intergenic_up_5kb=BED.subtractBed3(intergenic_up_5kb,cds_exon)
	intergenic_up_5kb=BED.subtractBed3(intergenic_up_5kb,utr_5)
	intergenic_up_5kb=BED.subtractBed3(intergenic_up_5kb,utr_3)
	intergenic_up_5kb=BED.subtractBed3(intergenic_up_5kb,intron)
	intergenic_down_5kb=BED.subtractBed3(intergenic_down_5kb,cds_exon)
	intergenic_down_5kb=BED.subtractBed3(intergenic_down_5kb,utr_5)
	intergenic_down_5kb=BED.subtractBed3(intergenic_down_5kb,utr_3)
	intergenic_down_5kb=BED.subtractBed3(intergenic_down_5kb,intron)	
	
	#purify intergenic region
	intergenic_up_10kb=BED.subtractBed3(intergenic_up_10kb,cds_exon)
	intergenic_up_10kb=BED.subtractBed3(intergenic_up_10kb,utr_5)
	intergenic_up_10kb=BED.subtractBed3(intergenic_up_10kb,utr_3)
	intergenic_up_10kb=BED.subtractBed3(intergenic_up_10kb,intron)
	intergenic_down_10kb=BED.subtractBed3(intergenic_down_10kb,cds_exon)
	intergenic_down_10kb=BED.subtractBed3(intergenic_down_10kb,utr_5)
	intergenic_down_10kb=BED.subtractBed3(intergenic_down_10kb,utr_3)
	intergenic_down_10kb=BED.subtractBed3(intergenic_down_10kb,intron)	
	
	#build intervalTree 
	cds_exon_ranges = build_bitsets(cds_exon)
	utr_5_ranges = build_bitsets(utr_5)
	utr_3_ranges = build_bitsets(utr_3)
	intron_ranges = build_bitsets(intron)
	interg_ranges_up_1kb_ranges = build_bitsets(intergenic_up_1kb)
	interg_ranges_up_5kb_ranges = build_bitsets(intergenic_up_5kb)
	interg_ranges_up_10kb_ranges = build_bitsets(intergenic_up_10kb)
	interg_ranges_down_1kb_ranges = build_bitsets(intergenic_down_1kb)
	interg_ranges_down_5kb_ranges = build_bitsets(intergenic_down_5kb)
	interg_ranges_down_10kb_ranges = build_bitsets(intergenic_down_10kb)
	
	exon_size = cal_size(cds_exon)
	intron_size = cal_size(intron)
	utr3_size = cal_size(utr_3)
	utr5_size = cal_size(utr_5)
	int_up1k_size = cal_size(intergenic_up_1kb)
	int_up5k_size = cal_size(intergenic_up_5kb)
	int_up10k_size = cal_size(intergenic_up_10kb)
	int_down1k_size = cal_size(intergenic_down_1kb)
	int_down5k_size = cal_size(intergenic_down_5kb)
	int_down10k_size = cal_size(intergenic_down_10kb)
	
	print >>sys.stderr, "Done"
	return (cds_exon_ranges,intron_ranges,utr_5_ranges,utr_3_ranges,\
			interg_ranges_up_1kb_ranges,interg_ranges_up_5kb_ranges,interg_ranges_up_10kb_ranges,\
			interg_ranges_down_1kb_ranges,interg_ranges_down_5kb_ranges,interg_ranges_down_10kb_ranges,\
			exon_size,intron_size,utr5_size,utr3_size,\
			int_up1k_size,int_up5k_size,int_up10k_size,\
			int_down1k_size,int_down5k_size,int_down10k_size)
def main():
    usage = "%prog [options]"
    parser = OptionParser(usage, version="%prog " + __version__)

    parser.add_option("-i",
                      "--bwfile",
                      action="store",
                      type="string",
                      dest="BigWig_File",
                      help="Input BigWig file. [required]")
    parser.add_option("-o",
                      "--output",
                      action="store",
                      type="string",
                      dest="output_wig",
                      help="Output wig file. [required]")
    parser.add_option(
        "-s",
        "--chromSize",
        action="store",
        type="string",
        dest="chromSize",
        help=
        "Chromosome size file. Tab or space separated text file with 2 columns: first column is chromosome name, second column is size of the chromosome. [required]"
    )
    parser.add_option(
        "-t",
        "--wigsum",
        action="store",
        type="int",
        dest="total_wigsum",
        default=100000000,
        help=
        "Specified wigsum. 100000000 equals to coverage of 1 million 100nt reads. default=%default  [optional]"
    )
    parser.add_option("-r",
                      "--refgene",
                      action="store",
                      type="string",
                      dest="refgene_bed",
                      help="Reference gene model in bed format. [optional]")
    parser.add_option(
        "-c",
        "--chunk",
        action="store",
        type="int",
        dest="chunk_size",
        default=500000,
        help=
        "Chromosome chunk size. Each chomosome will be cut into samll chunks of this size. Decrease chunk size will save more RAM. default=%default (bp) [optional]"
    )
    parser.add_option(
        "-f",
        "--format",
        action="store",
        type="string",
        dest="out_format",
        default="bgr",
        help=
        "Output format. either \"wig\" or \"bgr\". \"bgr\" save disk space but make program slower. default=%default"
    )
    (options, args) = parser.parse_args()

    if not (options.BigWig_File and options.output_wig and options.chromSize):
        parser.print_help()
        sys.exit(0)

    OUT = open(options.output_wig, 'w')
    bw = BigWigFile(file=open(options.BigWig_File))
    chrom_sizes = load_chromsize(options.chromSize)
    exons = []
    WIG_SUM = 0.0
    if (options.refgene_bed):
        print >> sys.stderr, "Extract exons from " + options.refgene_bed
        obj = BED.ParseBED(options.refgene_bed)
        exons = obj.getExon()
        print >> sys.stderr, "Merge overlapping exons ..."
        exons = BED.unionBed3(exons)
        print >> sys.stderr, "Calculate wigsum covered by " + options.refgene_bed + ' only'
        for chrom, st, end in exons:
            try:
                bw.get_as_array(chrom, 0, 1).size
            except:
                continue

            bw_signal = bw.get_as_array(chrom, st, end)
            tmp = numpy.nansum(
                bw_signal
            )  #nan will be ignored. but if all items are 'nan', the result summay is 'nan' NOT 0
            if numpy.isnan(tmp): continue
            WIG_SUM += tmp
        print >> sys.stderr, "Total wigsum is %.2f\n" % WIG_SUM
    else:
        print >> sys.stderr, "Calculate wigsum from " + options.BigWig_File
        for chr_name, chr_size in chrom_sizes.items():  #iterate each chrom
            #if chr_name != "chrY":continue
            try:
                bw.get_as_array(chr_name, 0, 1).size
            except:
                print >> sys.stderr, "Skip " + chr_name + "!"
                continue

            print >> sys.stderr, "Processing " + chr_name + " ..."
            for interval in BED.tillingBed(chrName=chr_name,
                                           chrSize=chr_size,
                                           stepSize=options.chunk_size):
                bw_signal = bw.get_as_array(interval[0], interval[1],
                                            interval[2])
                tmp = numpy.nansum(bw_signal)
                if numpy.isnan(tmp): continue
                WIG_SUM += tmp
        print >> sys.stderr, "\nTotal wigsum is %.2f\n" % WIG_SUM

    try:
        weight = options.total_wigsum / WIG_SUM
    except:
        "Error, WIG_SUM cannot be 0"
        eys.exit(1)

    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    print >> sys.stderr, "Normalizing bigwig file ..."
    for chr_name, chr_size in chrom_sizes.items():  #iterate each chrom
        #if chr_name != "chrY":continue
        try:
            bw.get_as_array(chr_name, 0, 1).size
        except:
            print >> sys.stderr, "Skip " + chr_name + "!"
            continue

        if options.out_format.upper() == "WIG":
            print >> sys.stderr, "Writing " + chr_name + " ..."
            OUT.write('variableStep chrom=' + chr_name + '\n')
            for interval in BED.tillingBed(chrName=chr_name,
                                           chrSize=chr_size,
                                           stepSize=options.chunk_size):
                coord = interval[1]
                bw_signal = bw.get_as_array(chr_name, interval[1], interval[2])
                tmp = numpy.nansum(bw_signal)
                if numpy.isnan(tmp): continue
                bw_signal = numpy.nan_to_num(bw_signal) * weight
                for v in bw_signal:
                    coord += 1
                    if v != 0: print >> OUT, "%d\t%.2f" % (coord, v)
        elif options.out_format.upper() == "BGR":
            print >> sys.stderr, "Writing " + chr_name + " ..."
            #OUT.write('variableStep chrom='+chr_name+'\n')
            for interval in BED.tillingBed(chrName=chr_name,
                                           chrSize=chr_size,
                                           stepSize=options.chunk_size):
                v2p = collections.defaultdict(list)  #value to position
                range2p = {
                }  #coorindate range to value, bedgraph. #[start]=[len,value]
                coord = interval[1]
                bw_signal = bw.get_as_array(chr_name, interval[1], interval[2])
                tmp = numpy.nansum(bw_signal)
                if numpy.isnan(tmp): continue
                bw_signal = numpy.nan_to_num(bw_signal) * weight
                for v in bw_signal:
                    coord += 1
                    #if v != 0: print >>OUT, "%d\t%.2f" % (coord,v)
                    if v != 0: v2p[v].append(coord)
                for v in v2p:
                    for k, g in groupby(enumerate(v2p[v]), lambda
                                        (i, x): i - x):
                        for l in [map(itemgetter(1), g)]:
                            range2p[l[0] - 1] = [len(l), v]
                for i in sorted(range2p):
                    print >> OUT, chr_name + '\t' + str(i) + '\t' + str(
                        i + range2p[i][0]) + '\t' + str(range2p[i][1])
        else:
            print >> sys.stderr, "unknown output format"
            sys.exit(1)
Example #6
0
def main():
    usage = "%prog [options]"
    parser = OptionParser(usage, version="%prog " + __version__)

    parser.add_option("-i",
                      "--bwfile",
                      action="store",
                      type="string",
                      dest="BigWig_File",
                      help="Input BigWig file. [required]")
    parser.add_option("-o",
                      "--output",
                      action="store",
                      type="string",
                      dest="output_wig",
                      help="Output wig file. [required]")
    parser.add_option(
        "-s",
        "--chromSize",
        action="store",
        type="string",
        dest="chromSize",
        help=
        "Chromosome size file. Tab or space separated text file with 2 columns: first column is chromosome name, second column is size of the chromosome. [required]"
    )
    parser.add_option(
        "-t",
        "--wigsum",
        action="store",
        type="int",
        dest="total_wigsum",
        default=100000000,
        help=
        "Specified wigsum. 100000000 equals to coverage of 1 million 100nt reads. default=%default  [optional]"
    )
    parser.add_option("-r",
                      "--refgene",
                      action="store",
                      type="string",
                      dest="refgene_bed",
                      help="Reference gene model in bed format. [optional]")
    parser.add_option(
        "-c",
        "--chunk",
        action="store",
        type="int",
        dest="chunk_size",
        default=100000,
        help=
        "Chromosome chunk size. Each chomosome will be cut into samll chunks of this size. Decrease chunk size will save more RAM. default=%default (bp) [optional]"
    )
    (options, args) = parser.parse_args()

    if not (options.BigWig_File and options.output_wig and options.chromSize):
        parser.print_help()
        sys.exit(0)

    OUT = open(options.output_wig, 'w')
    bw = BigWigFile(file=open(options.BigWig_File))
    chrom_sizes = load_chromsize(options.chromSize)
    exons = []
    WIG_SUM = 0.0
    if (options.refgene_bed):
        print >> sys.stderr, "Extract exons from " + options.refgene_bed
        obj = BED.ParseBED(options.refgene_bed)
        exons = obj.getExon()
        print >> sys.stderr, "Merge overlapping exons ..."
        exons = BED.unionBed3(exons)
        print >> sys.stderr, "Calculate wigsum covered by " + options.refgene_bed + ' only'
        for chrom, st, end in exons:
            try:
                bw.get_as_array(chrom, 0, 1).size
            except:
                continue

            bw_signal = bw.get_as_array(chrom, st, end)
            tmp = numpy.nansum(
                bw_signal
            )  #nan will be ignored. but if all items are 'nan', the result summay is 'nan' NOT 0
            if numpy.isnan(tmp): continue
            WIG_SUM += tmp
        print >> sys.stderr, "Total wigsum is %.2f\n" % WIG_SUM
    else:
        print >> sys.stderr, "Calculate wigsum from " + options.BigWig_File
        for chr_name, chr_size in chrom_sizes.items():  #iterate each chrom

            try:
                bw.get_as_array(chr_name, 0, 1).size
            except:
                print >> sys.stderr, "Skip " + chr_name + "!"
                continue

            print >> sys.stderr, "Processing " + chr_name + " ..."
            for interval in BED.tillingBed(chrName=chr_name,
                                           chrSize=chr_size,
                                           stepSize=options.chunk_size):
                bw_signal = bw.get_as_array(interval[0], interval[1],
                                            interval[2])
                tmp = numpy.nansum(bw_signal)
                if numpy.isnan(tmp): continue
                WIG_SUM += tmp
        print >> sys.stderr, "\nTotal wigsum is %.2f\n" % WIG_SUM

    try:
        weight = options.total_wigsum / WIG_SUM
    except:
        "Error, WIG_SUM cannot be 0"
        eys.exit(1)

    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    print >> sys.stderr, "Normalizing bigwig file, output wiggle file"
    for chr_name, chr_size in chrom_sizes.items():  #iterate each chrom

        try:
            bw.get_as_array(chr_name, 0, 1).size
        except:
            print >> sys.stderr, "Skip " + chr_name + "!"
            continue

        print >> sys.stderr, "Writing " + chr_name + " ..."
        OUT.write('variableStep chrom=' + chr_name + '\n')
        for interval in BED.tillingBed(chrName=chr_name,
                                       chrSize=chr_size,
                                       stepSize=options.chunk_size):
            coord = interval[1]
            bw_signal = bw.get_as_array(chr_name, interval[1], interval[2])
            tmp = numpy.nansum(bw_signal)
            if numpy.isnan(tmp): continue
            bw_signal = numpy.nan_to_num(bw_signal)
            for v in bw_signal:
                coord += 1
                if v != 0: print >> OUT, "%d\t%.4f" % (coord, v * weight)
def main():
	usage="%prog [options]"
	parser = OptionParser(usage,version="%prog " + __version__)
	
	parser.add_option("-i","--bwfile",action="store",type="string",dest="BigWig_File",help="Input BigWig file. [required]")
	parser.add_option("-o","--output",action="store",type="string",dest="output_wig",help="Output wig file. [required]")
	parser.add_option("-s","--chromSize",action="store",type="string",dest="chromSize",help="Chromosome size file. Tab or space separated text file with 2 columns: first column is chromosome name, second column is size of the chromosome. [required]")
	parser.add_option("-t","--wigsum",action="store",type="int",dest="total_wigsum",default=100000000,help="Specified wigsum. 100000000 equals to coverage of 1 million 100nt reads. default=%default  [optional]")
	parser.add_option("-r","--refgene",action="store",type="string",dest="refgene_bed",help="Reference gene model in bed format. [optional]")	
	parser.add_option("-c","--chunk",action="store",type="int",dest="chunk_size",default=500000,help="Chromosome chunk size. Each chomosome will be cut into samll chunks of this size. Decrease chunk size will save more RAM. default=%default (bp) [optional]")
	parser.add_option("-f","--format",action="store",type="string",dest="out_format",default="bgr",help="Output format. either \"wig\" or \"bgr\". \"bgr\" save disk space but make program slower. default=%default")
	(options,args)=parser.parse_args()
	
	if not (options.BigWig_File and options.output_wig and options.chromSize):
		parser.print_help()
		sys.exit(0)

	OUT=open(options.output_wig,'w')
	bw = BigWigFile( file=open(options.BigWig_File) )
	chrom_sizes = load_chromsize(options.chromSize)	
	exons=[]
	WIG_SUM=0.0
	if (options.refgene_bed):	
		print >>sys.stderr, "Extract exons from " + options.refgene_bed
		obj = BED.ParseBED(options.refgene_bed)
		exons = obj.getExon()
		print >>sys.stderr, "Merge overlapping exons ..."
		exons = BED.unionBed3(exons)
		print >>sys.stderr, "Calculate wigsum covered by " + options.refgene_bed + ' only'
		for chrom,st,end in exons:
			try: bw.get_as_array(chrom,0,1).size
			except:continue

			bw_signal = bw.get_as_array(chrom,st,end)
			tmp = numpy.nansum(bw_signal)			#nan will be ignored. but if all items are 'nan', the result summay is 'nan' NOT 0
			if numpy.isnan(tmp):continue	
			WIG_SUM += tmp
		print >>sys.stderr, "Total wigsum is %.2f\n" % WIG_SUM
	else:
		print >>sys.stderr, "Calculate wigsum from " + options.BigWig_File
		for chr_name, chr_size in chrom_sizes.items():		#iterate each chrom
			#if chr_name != "chrY":continue
			try: bw.get_as_array(chr_name,0,1).size
			except:
				print >>sys.stderr, "Skip " + chr_name + "!"
				continue

			print >>sys.stderr, "Processing " + chr_name + " ..."	
			for interval in BED.tillingBed(chrName = chr_name,chrSize = chr_size,stepSize = options.chunk_size):
				bw_signal = bw.get_as_array(interval[0],interval[1],interval[2])
				tmp = numpy.nansum(bw_signal)
				if numpy.isnan(tmp):continue
				WIG_SUM += tmp
		print >>sys.stderr, "\nTotal wigsum is %.2f\n" % WIG_SUM
	
	try:
		weight = options.total_wigsum/WIG_SUM
	except:
		"Error, WIG_SUM cannot be 0"
		eys.exit(1)

	#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
	print >>sys.stderr, "Normalizing bigwig file ..."
	for chr_name, chr_size in chrom_sizes.items():          #iterate each chrom
		#if chr_name != "chrY":continue
		try: bw.get_as_array(chr_name,0,1).size
		except:
			print >>sys.stderr, "Skip " + chr_name + "!"
			continue
		
		if options.out_format.upper() == "WIG":
			print >>sys.stderr, "Writing " + chr_name + " ..."
			OUT.write('variableStep chrom='+chr_name+'\n')
			for interval in BED.tillingBed(chrName = chr_name,chrSize = chr_size,stepSize = options.chunk_size):
				coord = interval[1]
				bw_signal = bw.get_as_array(chr_name,interval[1],interval[2])
				tmp = numpy.nansum(bw_signal)
				if numpy.isnan(tmp):continue
				bw_signal = numpy.nan_to_num(bw_signal) * weight
				for v in bw_signal:
					coord +=1
					if v != 0: print >>OUT, "%d\t%.2f" % (coord,v)
		elif options.out_format.upper() == "BGR":
			print >>sys.stderr, "Writing " + chr_name + " ..."
			#OUT.write('variableStep chrom='+chr_name+'\n')
			for interval in BED.tillingBed(chrName = chr_name,chrSize = chr_size,stepSize = options.chunk_size):
				v2p = collections.defaultdict(list)     #value to position
				range2p={}      #coorindate range to value, bedgraph. #[start]=[len,value]
				coord = interval[1]
				bw_signal = bw.get_as_array(chr_name,interval[1],interval[2])
				tmp = numpy.nansum(bw_signal)
				if numpy.isnan(tmp):continue
				bw_signal = numpy.nan_to_num(bw_signal) * weight
				for v in bw_signal:
					coord +=1
					#if v != 0: print >>OUT, "%d\t%.2f" % (coord,v)
					if v != 0: v2p[v].append(coord)
				for v in v2p:
					for k,g in groupby(enumerate(v2p[v]), lambda (i,x):i-x):
						for l in [map(itemgetter(1), g)]:
							range2p[l[0]-1] = [len(l),v]
				for i in sorted(range2p):
					print >>OUT, chr_name + '\t' + str(i) +'\t' + str(i + range2p[i][0]) + '\t' + str(range2p[i][1])
		else:
			print >>sys.stderr, "unknown output format"
			sys.exit(1)
Example #8
0
def main():
    usage = "%prog [options]"
    parser = OptionParser(usage, version="%prog " + __version__)

    parser.add_option("-i",
                      "--bwfile",
                      action="store",
                      type="string",
                      dest="BigWig_File",
                      help="Input BigWig file. [required]")
    parser.add_option("-o",
                      "--output",
                      action="store",
                      type="string",
                      dest="output_wig",
                      help="Output wig file. [required]")
    parser.add_option(
        "-t",
        "--wigsum",
        action="store",
        type="int",
        dest="total_wigsum",
        default=100000000,
        help=
        "Specified wigsum. 100000000 equals to coverage of 1 million 100nt reads. default=%default  [optional]"
    )
    parser.add_option("-r",
                      "--refgene",
                      action="store",
                      type="string",
                      dest="refgene_bed",
                      help="Reference gene model in bed format. [optional]")
    parser.add_option(
        "-c",
        "--chunk",
        action="store",
        type="int",
        dest="chunk_size",
        default=500000,
        help=
        "Chromosome chunk size. Each chomosome will be cut into samll chunks of this size. Decrease chunk size will save more RAM. default=%default (bp) [optional]"
    )
    parser.add_option(
        "-f",
        "--format",
        action="store",
        type="string",
        dest="out_format",
        default="bgr",
        help=
        "Output format. either \"wig\" or \"bgr\". \"bgr\" save disk space but make program slower. default=%default"
    )
    (options, args) = parser.parse_args()

    if not (options.BigWig_File and options.output_wig):
        parser.print_help()
        sys.exit(0)

    OUT = open(options.output_wig, 'w')
    bw = pyBigWig.open(options.BigWig_File)

    if bw.isBigWig():
        pass
    else:
        print("%s is not a bigwig file!" % options.BigWig_File,
              file=sys.stderr)
        sys.exit(0)

    print("Get chromosome sizes from BigWig header ...", file=sys.stderr)
    chrom_sizes = {}
    for chr, size in bw.chroms().items():
        chrom_sizes[chr] = size

    exons = []
    WIG_SUM = 0.0
    if (options.refgene_bed):
        print("Extract exons from " + options.refgene_bed, file=sys.stderr)
        obj = BED.ParseBED(options.refgene_bed)
        exons = obj.getExon()
        print("Merge overlapping exons ...", file=sys.stderr)
        exons = BED.unionBed3(exons)
        print("Calculate wigsum covered by " + options.refgene_bed + ' only',
              file=sys.stderr)
        for chrom, st, end in exons:
            if bw.stats(chrom, st, end)[0] is None:
                continue
            bw_signal = bw.values(chrom, st, end)
            tmp = numpy.nansum(
                bw_signal
            )  #nan will be ignored. but if all items are 'nan', the result summay is 'nan' NOT 0
            if numpy.isnan(tmp): continue
            WIG_SUM += tmp
        print("Total wigsum is %.2f\n" % WIG_SUM, file=sys.stderr)
    else:
        print("Calculate wigsum from " + options.BigWig_File, file=sys.stderr)
        for chr_name, chr_size in list(
                chrom_sizes.items()):  #iterate each chrom
            if bw.stats(chr_name, 0, chr_size)[0] is None:
                print("Skip " + chr_name + "!", file=sys.stderr)
                continue

            print("Processing " + chr_name + " ...", file=sys.stderr)
            for interval in BED.tillingBed(chrName=chr_name,
                                           chrSize=chr_size,
                                           stepSize=options.chunk_size):
                if bw.stats(interval[0], interval[1], interval[2])[0] is None:
                    continue
                bw_signal = bw.values(interval[0], interval[1], interval[2])
                tmp = numpy.nansum(bw_signal)
                if numpy.isnan(tmp): continue
                WIG_SUM += tmp
        print("\nTotal wigsum is %.2f\n" % WIG_SUM, file=sys.stderr)

    try:
        weight = options.total_wigsum / WIG_SUM
    except:
        "Error, WIG_SUM cannot be 0"
        sys.exit(1)

    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    print("Normalizing bigwig file ...", file=sys.stderr)
    for chr_name, chr_size in list(chrom_sizes.items()):  #iterate each chrom

        if bw.stats(chr_name, 0, chr_size)[0] is None:
            print("Skip " + chr_name + "!", file=sys.stderr)
            continue

        if options.out_format.upper() == "WIG":
            print("Writing " + chr_name + " ...", file=sys.stderr)
            OUT.write('variableStep chrom=' + chr_name + '\n')
            for interval in BED.tillingBed(chrName=chr_name,
                                           chrSize=chr_size,
                                           stepSize=options.chunk_size):
                coord = interval[1]
                bw_signal = bw.values(chr_name, interval[1], interval[2])
                tmp = numpy.nansum(bw_signal)
                if numpy.isnan(tmp): continue
                bw_signal = numpy.nan_to_num(bw_signal) * weight
                for v in bw_signal:
                    coord += 1
                    if v != 0: print("%d\t%.2f" % (coord, v), file=OUT)
        elif options.out_format.upper() == "BGR":
            print("Writing " + chr_name + " ...", file=sys.stderr)
            #OUT.write('variableStep chrom='+chr_name+'\n')
            for interval in BED.tillingBed(chrName=chr_name,
                                           chrSize=chr_size,
                                           stepSize=options.chunk_size):
                v2p = collections.defaultdict(list)  #value to position
                range2p = {
                }  #coorindate range to value, bedgraph. #[start]=[len,value]
                coord = interval[1]
                bw_signal = bw.values(chr_name, interval[1], interval[2])
                tmp = numpy.nansum(bw_signal)
                if numpy.isnan(tmp): continue
                bw_signal = numpy.nan_to_num(bw_signal) * weight
                for v in bw_signal:
                    coord += 1
                    if v != 0: v2p[v].append(coord)
                for v in v2p:
                    for k, g in groupby(enumerate(v2p[v]),
                                        lambda i_x: i_x[0] - i_x[1]):
                        for l in [list(map(itemgetter(1), g))]:
                            range2p[l[0] - 1] = [len(l), v]
                for i in sorted(range2p):
                    print(chr_name + '\t' + str(i) + '\t' +
                          str(i + range2p[i][0]) + '\t' + str(range2p[i][1]),
                          file=OUT)
        else:
            print("unknown output format", file=sys.stderr)
            sys.exit(1)
Example #9
0
def main():
	usage="%prog [options]" + '\n' + __doc__ + "\n"
	parser = OptionParser(usage,version="%prog " + __version__)
	parser.add_option("-i","--input-file",action="store",type="string",dest="input_file",help="Input file in SAM format. Use \"-\" represents standard input [required]")
	parser.add_option("-r","--refgene",action="store",type="string",dest="ref_gene_model",help="Reference gene model in bed format. [required]")
	(options,args)=parser.parse_args()
		
	if not (options.input_file and options.ref_gene_model):
		parser.print_help()
		sys.exit(0)
	if not os.path.exists(options.ref_gene_model):
		print >>sys.stderr, '\n\n' + options.ref_gene_model + " does NOT exists" + '\n'
		#parser.print_help()
		sys.exit(0)
	if os.path.exists(options.input_file):
		file_obj=open(options.input_file)
		pass
	elif options.input_file == '-':
		file_obj=sys.stdin
		pass
	else:
		print >>sys.stderr, '\n\n' + options.input_file + " does NOT exists" + '\n'
		#parser.print_help()
		sys.exit(0)		
	print >>sys.stderr, "processing " + options.ref_gene_model + ' ...',
	obj = BED.ParseBED(options.ref_gene_model)
	utr_3 = obj.getUTR(utr=3)
	utr_5 = obj.getUTR(utr=5)
	cds_exon = obj.getCDSExon()
	intron = obj.getIntron()
	
	intron = BED.unionBed3(intron)
	cds_exon=BED.unionBed3(cds_exon)
	utr_5 = BED.unionBed3(utr_5)
	utr_3 = BED.unionBed3(utr_3)
	
	utr_5 = BED.subtractBed3(utr_5,cds_exon)
	utr_3 = BED.subtractBed3(utr_3,cds_exon)
	intron = BED.subtractBed3(intron,cds_exon)
	intron = BED.subtractBed3(intron,utr_5)
	intron = BED.subtractBed3(intron,utr_3)
	
	intergenic_up_1kb = obj.getIntergenic(direction="up",size=1000)
	intergenic_down_1kb = obj.getIntergenic(direction="down",size=1000)
	intergenic_up_5kb = obj.getIntergenic(direction="up",size=5000)
	intergenic_down_5kb = obj.getIntergenic(direction="down",size=5000)	
	intergenic_up_10kb = obj.getIntergenic(direction="up",size=10000)
	intergenic_down_10kb = obj.getIntergenic(direction="down",size=10000)
	
	#merge integenic region
	intergenic_up_1kb=BED.unionBed3(intergenic_up_1kb)
	intergenic_up_5kb=BED.unionBed3(intergenic_up_5kb)
	intergenic_up_10kb=BED.unionBed3(intergenic_up_10kb)
	intergenic_down_1kb=BED.unionBed3(intergenic_down_1kb)
	intergenic_down_5kb=BED.unionBed3(intergenic_down_5kb)
	intergenic_down_10kb=BED.unionBed3(intergenic_down_10kb)	
	
	#purify intergenic region
	intergenic_up_1kb=BED.subtractBed3(intergenic_up_1kb,cds_exon)
	intergenic_up_1kb=BED.subtractBed3(intergenic_up_1kb,utr_5)
	intergenic_up_1kb=BED.subtractBed3(intergenic_up_1kb,utr_3)
	intergenic_up_1kb=BED.subtractBed3(intergenic_up_1kb,intron)
	intergenic_down_1kb=BED.subtractBed3(intergenic_down_1kb,cds_exon)
	intergenic_down_1kb=BED.subtractBed3(intergenic_down_1kb,utr_5)
	intergenic_down_1kb=BED.subtractBed3(intergenic_down_1kb,utr_3)
	intergenic_down_1kb=BED.subtractBed3(intergenic_down_1kb,intron)	

	#purify intergenic region
	intergenic_up_5kb=BED.subtractBed3(intergenic_up_5kb,cds_exon)
	intergenic_up_5kb=BED.subtractBed3(intergenic_up_5kb,utr_5)
	intergenic_up_5kb=BED.subtractBed3(intergenic_up_5kb,utr_3)
	intergenic_up_5kb=BED.subtractBed3(intergenic_up_5kb,intron)
	intergenic_down_5kb=BED.subtractBed3(intergenic_down_5kb,cds_exon)
	intergenic_down_5kb=BED.subtractBed3(intergenic_down_5kb,utr_5)
	intergenic_down_5kb=BED.subtractBed3(intergenic_down_5kb,utr_3)
	intergenic_down_5kb=BED.subtractBed3(intergenic_down_5kb,intron)	
	
	#purify intergenic region
	intergenic_up_10kb=BED.subtractBed3(intergenic_up_10kb,cds_exon)
	intergenic_up_10kb=BED.subtractBed3(intergenic_up_10kb,utr_5)
	intergenic_up_10kb=BED.subtractBed3(intergenic_up_10kb,utr_3)
	intergenic_up_10kb=BED.subtractBed3(intergenic_up_10kb,intron)
	intergenic_down_10kb=BED.subtractBed3(intergenic_down_10kb,cds_exon)
	intergenic_down_10kb=BED.subtractBed3(intergenic_down_10kb,utr_5)
	intergenic_down_10kb=BED.subtractBed3(intergenic_down_10kb,utr_3)
	intergenic_down_10kb=BED.subtractBed3(intergenic_down_10kb,intron)	
	
	print >>sys.stderr, "Done"
	
	ranges={}
	totalReads=0
	spliceReads=0
	cUR=0
	multiMapReads=0
	
	print >>sys.stderr, "reading SAM file",
	for line in file_obj:
		if line.startswith("@"):continue
		fields=line.rstrip('\n ').split()
		flagCode=string.atoi(fields[1])
		if (flagCode & 0x0004) != 0: continue		#skip unmap reads
		totalReads +=1
		if not SAM.ParseSAM._uniqueHit_pat.search(line):		#skip multiple mapped reads
			multiMapReads +=1
			continue

		chrom = fields[2].upper()
		chromStart = string.atoi(fields[3])-1
		comb=[int(i) for i in SAM.ParseSAM._splicedHit_pat.findall(fields[5])]	#"9M4721N63M3157N8M" return ['9', '4721', '63', '3157', '8']
		cUR += (len(comb) +1)/2
		if(len(comb)>1):
			spliceReads += 1
		blockStart=[]
		blockSize=[]
			
		for i in range(0,len(comb),2):
			blockStart.append(chromStart + sum(comb[:i]) )
				
		for i in range(0,len(comb),2):
			blockSize.append(comb[i])
			
		for st,size in zip(blockStart,blockSize):
			mid = int(st) + (size/2)
			if chrom not in ranges:
				ranges[chrom] = Intersecter()
			else:
				ranges[chrom].add_interval( Interval( mid, mid ) )
	print >>sys.stderr, "Done"
	print >>sys.stderr, "Total Reads: " + str(totalReads)
	print >>sys.stderr, "Multiple Hits: " + str(multiMapReads)
	print >>sys.stderr, "Unique Hits: " + str(totalReads-multiMapReads)
	print >>sys.stderr, "Spliced Hits: " + str(spliceReads)
	print >>sys.stderr, "Total fragments: " + str(cUR)
	
	
	
	print >>sys.stderr, "\nAssignning reads ...",
	intron_read=0
	intron_base=0
	cds_exon_read=0
	cds_exon_base=0
	utr_5_read=0
	utr_5_base=0
	utr_3_read=0
	utr_3_base=0
	
	intergenic_up1kb_base=0
	intergenic_up1kb_read=0
	intergenic_down1kb_base=0
	intergenic_down1kb_read=0
	intergenic_up5kb_base=0
	intergenic_up5kb_read=0
	intergenic_down5kb_base=0
	intergenic_down5kb_read=0
	intergenic_up10kb_base=0
	intergenic_up10kb_read=0
	intergenic_down10kb_base=0
	intergenic_down10kb_read=0	
	
	(intron_base,intron_read) = base_read(intron,ranges)
	(cds_exon_base,cds_exon_read) = base_read(cds_exon,ranges)
	(utr_5_base,utr_5_read) = base_read(utr_5,ranges)
	(utr_3_base,utr_3_read) = base_read(utr_3,ranges)
	(intergenic_up1kb_base, intergenic_up1kb_read) = base_read(intergenic_up_1kb,ranges)
	(intergenic_up5kb_base, intergenic_up5kb_read) = base_read(intergenic_up_5kb,ranges)
	(intergenic_up10kb_base, intergenic_up10kb_read) = base_read(intergenic_up_10kb,ranges)
	(intergenic_down1kb_base, intergenic_down1kb_read) = base_read(intergenic_down_1kb,ranges)
	(intergenic_down5kb_base, intergenic_down5kb_read) = base_read(intergenic_down_5kb,ranges)
	(intergenic_down10kb_base, intergenic_down10kb_read) = base_read(intergenic_down_10kb,ranges)
	
	print >>sys.stderr, "Done"
	
	print >>sys.stderr, "========================================================="
	print >>sys.stderr, "Group\tTotal_bases\tReads_count\tReads/Kb"
	print >>sys.stderr, "CDS Exons:\t%d\t%d\t%5.2f" % (cds_exon_base,cds_exon_read,cds_exon_read*1000.0/cds_exon_base)
	print >>sys.stderr, "5'UTR Exons:\t%d\t%d\t%5.2f" % (utr_5_base,utr_5_read, utr_5_read*1000.0/utr_5_base)
	print >>sys.stderr, "3'UTR Exons:\t%d\t%d\t%5.2f" % (utr_3_base,utr_3_read, utr_3_read*1000.0/utr_3_base)
	print >>sys.stderr, "Intronic region:\t%d\t%d\t%5.2f" % (intron_base,intron_read,intron_read*1000.0/intron_base)
	
	print >>sys.stderr, "TSS up 1kb:\t%d\t%d\t%5.2f" % (intergenic_up1kb_base, intergenic_up1kb_read, intergenic_up1kb_read*1000.0/intergenic_up1kb_base)
	print >>sys.stderr, "TSS up 5kb:\t%d\t%d\t%5.2f" % (intergenic_up5kb_base, intergenic_up5kb_read, intergenic_up5kb_read*1000.0/intergenic_up5kb_base)
	print >>sys.stderr, "TSS up 10kb:\t%d\t%d\t%5.2f" % (intergenic_up10kb_base, intergenic_up10kb_read, intergenic_up10kb_read*1000.0/intergenic_up10kb_base)
	print >>sys.stderr, "TES down 1kb:\t%d\t%d\t%5.2f" % (intergenic_down1kb_base, intergenic_down1kb_read, intergenic_down1kb_read*1000.0/intergenic_down1kb_base)
	print >>sys.stderr, "TES down 5kb:\t%d\t%d\t%5.2f" % (intergenic_down5kb_base, intergenic_down5kb_read, intergenic_down5kb_read*1000.0/intergenic_down5kb_base)	
	print >>sys.stderr, "TES down 10kb:\t%d\t%d\t%5.2f" % (intergenic_down10kb_base, intergenic_down10kb_read, intergenic_down10kb_read*1000.0/intergenic_down10kb_base)
	print >>sys.stderr, "========================================================="
Example #10
0
def main():
    usage = "%prog [options]"
    parser = OptionParser(usage, version="%prog " + __version__)

    parser.add_option(
        "-i", "--bwfile", action="store", type="string", dest="BigWig_File", help="Input BigWig file. [required]"
    )
    parser.add_option(
        "-o", "--output", action="store", type="string", dest="output_wig", help="Output wig file. [required]"
    )
    parser.add_option(
        "-s",
        "--chromSize",
        action="store",
        type="string",
        dest="chromSize",
        help="Chromosome size file. Tab or space separated text file with 2 columns: first column is chromosome name, second column is size of the chromosome. [required]",
    )
    parser.add_option(
        "-t",
        "--wigsum",
        action="store",
        type="int",
        dest="total_wigsum",
        default=100000000,
        help="Specified wigsum. 100000000 equals to coverage of 1 million 100nt reads. default=%default  [optional]",
    )
    parser.add_option(
        "-r",
        "--refgene",
        action="store",
        type="string",
        dest="refgene_bed",
        help="Reference gene model in bed format. [optional]",
    )
    parser.add_option(
        "-c",
        "--chunk",
        action="store",
        type="int",
        dest="chunk_size",
        default=100000,
        help="Chromosome chunk size. Each chomosome will be cut into samll chunks of this size. Decrease chunk size will save more RAM. default=%default (bp) [optional]",
    )
    (options, args) = parser.parse_args()

    if not (options.BigWig_File and options.output_wig and options.chromSize):
        parser.print_help()
        sys.exit(0)

    OUT = open(options.output_wig, "w")
    bw = BigWigFile(file=open(options.BigWig_File))
    chrom_sizes = load_chromsize(options.chromSize)
    exons = []
    WIG_SUM = 0.0
    if options.refgene_bed:
        print >>sys.stderr, "Extract exons from " + options.refgene_bed
        obj = BED.ParseBED(options.refgene_bed)
        exons = obj.getExon()
        print >>sys.stderr, "Merge overlapping exons ..."
        exons = BED.unionBed3(exons)
        print >>sys.stderr, "Calculate wigsum covered by " + options.refgene_bed + " only"
        for chrom, st, end in exons:
            try:
                bw.get_as_array(chrom, 0, 1).size
            except:
                continue

            bw_signal = bw.get_as_array(chrom, st, end)
            tmp = numpy.nansum(
                bw_signal
            )  # nan will be ignored. but if all items are 'nan', the result summay is 'nan' NOT 0
            if numpy.isnan(tmp):
                continue
            WIG_SUM += tmp
        print >>sys.stderr, "Total wigsum is %.2f\n" % WIG_SUM
    else:
        print >>sys.stderr, "Calculate wigsum from " + options.BigWig_File
        for chr_name, chr_size in chrom_sizes.items():  # iterate each chrom

            try:
                bw.get_as_array(chr_name, 0, 1).size
            except:
                print >>sys.stderr, "Skip " + chr_name + "!"
                continue

            print >>sys.stderr, "Processing " + chr_name + " ..."
            for interval in BED.tillingBed(chrName=chr_name, chrSize=chr_size, stepSize=options.chunk_size):
                bw_signal = bw.get_as_array(interval[0], interval[1], interval[2])
                tmp = numpy.nansum(bw_signal)
                if numpy.isnan(tmp):
                    continue
                WIG_SUM += tmp
        print >>sys.stderr, "\nTotal wigsum is %.2f\n" % WIG_SUM

    try:
        weight = options.total_wigsum / WIG_SUM
    except:
        "Error, WIG_SUM cannot be 0"
        eys.exit(1)

        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    print >>sys.stderr, "Normalizing bigwig file, output wiggle file"
    for chr_name, chr_size in chrom_sizes.items():  # iterate each chrom

        try:
            bw.get_as_array(chr_name, 0, 1).size
        except:
            print >>sys.stderr, "Skip " + chr_name + "!"
            continue

        print >>sys.stderr, "Writing " + chr_name + " ..."
        OUT.write("variableStep chrom=" + chr_name + "\n")
        for interval in BED.tillingBed(chrName=chr_name, chrSize=chr_size, stepSize=options.chunk_size):
            coord = interval[1]
            bw_signal = bw.get_as_array(chr_name, interval[1], interval[2])
            tmp = numpy.nansum(bw_signal)
            if numpy.isnan(tmp):
                continue
            bw_signal = numpy.nan_to_num(bw_signal)
            for v in bw_signal:
                coord += 1
                if v != 0:
                    print >> OUT, "%d\t%.4f" % (coord, v * weight)