def main(argv): parser = OptionParser() parser.add_option("-s", "--species", action="store", type="string", dest="species", help="species, mm8, hg18, etc", metavar="<str>") parser.add_option("-a", "--rawreadfileA", action="store", type="string", dest="readfileA", metavar="<file>", help="raw read file A in bed format") parser.add_option("-b", "--rawreadfileB", action="store", type="string", dest="readfileB", metavar="<file>", help="raw read file B in bed format") parser.add_option("-f", "--fragment_size", action="store", type="int", dest="fragment_size", metavar="<int>", help="average size of a fragment after A experiment") parser.add_option("-d", "--islandfile", action="store", type="string", dest="islandfile", metavar="<file>", help="island file in BED format") parser.add_option("-o", "--outfile", action="store", type="string", dest="out_file", metavar="<file>", help="island read count summary file") (opt, args) = parser.parse_args(argv) if len(argv) < 12: parser.print_help() sys.exit(1) if opt.species in GenomeData.species_chroms.keys(): chroms = GenomeData.species_chroms[opt.species]; else: print "This species is not recognized, exiting"; sys.exit(1); if not Utility.fileExists(opt.readfileA): print opt.readfileA, " not found"; sys.exit(1) if not Utility.fileExists(opt.readfileB): print opt.readfileB, " not found"; sys.exit(1) A_library_size = get_total_tag_counts.get_total_tag_counts(opt.readfileA); B_library_size = get_total_tag_counts.get_total_tag_counts(opt.readfileB); print "Library size of ", opt.readfileA, ": ", A_library_size print "Library size of ", opt.readfileB, ": ", B_library_size totalA = 0; totalB = 0; islands = BED.BED(opt.species, opt.islandfile, "BED3", 0); # separate by chrom the A library SeparateByChrom.separateByChrom(chroms, opt.readfileA, '.bed1'); # separate by chrom the B library SeparateByChrom.separateByChrom(chroms, opt.readfileB, '.bed2'); island_A_readcount = {}; island_B_readcount = {}; #Find read counts on the islands for chrom in chroms: if chrom in islands.keys(): if len(islands[chrom]) != 0: island_list = islands[chrom]; if Utility.is_bed_sorted(island_list) == 0: island_list.sort(key=operator.attrgetter('start')); island_start_list = [] island_end_list = [] for item in island_list: island_start_list.append(item.start) island_end_list.append(item.end) island_A_readcount_list=[0]*len(island_list); read_file = chrom + ".bed1"; f = open(read_file,'r') for line in f: if not re.match("#", line): line = line.strip() sline = line.split() position = associate_tags_with_regions.tag_position(sline, opt.fragment_size) index =associate_tags_with_regions.find_readcount_on_islands(island_start_list, island_end_list, position); if index >= 0: island_A_readcount_list[index] += 1; totalA += 1; f.close(); island_A_readcount[chrom] = island_A_readcount_list; island_B_readcount_list=[0]*len(island_list); read_file = chrom + ".bed2"; f = open(read_file,'r') for line in f: if not re.match("#", line): line = line.strip() sline = line.split() position = associate_tags_with_regions.tag_position(sline, opt.fragment_size) index = associate_tags_with_regions.find_readcount_on_islands(island_start_list, island_end_list, position); if index >= 0: island_B_readcount_list[index] += 1; totalB += 1; f.close(); island_B_readcount[chrom] = island_B_readcount_list; #A_background_read = A_library_size - totalA; #B_background_read = B_library_size - totalB; print "Total number of A reads on islands is: ", totalA; print "Total number of B reads on islands is: ", totalB; # Calculate the p value. library_scaling_factor = A_library_size*1.0/B_library_size; #A vs B pseudo_count = 1; pvalue_A_vs_B_list = []; pvalue_B_vs_A_list = []; for chrom in chroms: if chrom in islands.keys(): if len(islands[chrom]) != 0: island_list = islands[chrom]; for index in xrange(len(island_list)): item = island_list[index]; Acount = (island_A_readcount[chrom])[index]; Bcount = (island_B_readcount[chrom])[index]; pvalue_A_vs_B = pvaule (Acount, Bcount, library_scaling_factor, pseudo_count); pvalue_A_vs_B_list.append(pvalue_A_vs_B); pvalue_B_vs_A = pvaule (Bcount, Acount, 1/library_scaling_factor, pseudo_count); pvalue_B_vs_A_list.append(pvalue_B_vs_A); #Calculate the FDR fdr_A_vs_B_list = fdr(pvalue_A_vs_B_list); fdr_B_vs_A_list = fdr(pvalue_B_vs_A_list); #Output the islands read counts, normalized read counts, fc, pvalue both ways scaling_factor = 1000000; out = open(opt.out_file, 'w'); outline = '#chrom' + "\t" + 'start' + "\t" + 'end' + "\t" + "Readcount_A" + "\t" + 'Normalized_Readcount_A' + "\t" + 'ReadcountB' + "\t" + 'Normalized_Readcount_B' + "\t" + "Fc_A_vs_B" + "\t" + "pvalue_A_vs_B" + "\t" + "FDR_A_vs_B" + "\t" + "Fc_B_vs_A" + "\t" + "pvalue_B_vs_A" + "\t" + "FDR_B_vs_A" + "\n"; out.write(outline); ii=0; for chrom in chroms: if chrom in islands.keys(): if len(islands[chrom]) != 0: island_list = islands[chrom]; for index in xrange(len(island_list)): item = island_list[index]; Acount = (island_A_readcount[chrom])[index]; Bcount = (island_B_readcount[chrom])[index]; normalized_A = Acount/ float(A_library_size) * scaling_factor; normalized_B = Bcount/ float(B_library_size) * scaling_factor; fc_A_vs_B = ((Acount + pseudo_count)*1.0/(Bcount + pseudo_count))/library_scaling_factor; fc_B_vs_A = ((Bcount + pseudo_count)*1.0/(Acount + pseudo_count)) * library_scaling_factor; outline = item.chrom + "\t" + str(item.start) + "\t" + str(item.end) + "\t" + str(Acount) + "\t" + str(normalized_A) + "\t" + str(Bcount) + "\t" + str(normalized_B) + "\t" + str(fc_A_vs_B) + "\t" + str(pvalue_A_vs_B_list[ii]) + "\t" + str(fdr_A_vs_B_list[ii]) + "\t" + str(fc_B_vs_A) + "\t" + str(pvalue_B_vs_A_list[ii]) + "\t" + str(fdr_B_vs_A_list[ii]) + "\n"; out.write(outline); ii += 1; out.close(); SeparateByChrom.cleanup(chroms, '.bed1'); SeparateByChrom.cleanup(chroms, '.bed2'); # Calculate the correlations using normalized read counts A_array=(); B_array=(); for chrom in chroms: if chrom in islands.keys(): if len(islands[chrom]) != 0: temp_array= scipy.array(island_A_readcount[chrom]); A_array=scipy.concatenate((temp_array, A_array)); temp_array= scipy.array(island_B_readcount[chrom]); B_array=scipy.concatenate((temp_array, B_array)); #Normalization to reads per million A_array = A_array/float(A_library_size) * scaling_factor; B_array = B_array/float(B_library_size) * scaling_factor; pearson=scipy.stats.pearsonr(A_array, B_array); print "Pearson's correlation is: ", pearson[0], " with p-value ", pearson[1]; spearman = scipy.stats.spearmanr(A_array, B_array); print "Spearman's correlation is: ", spearman[0], " with p-value ", spearman[1];
def main(argv): parser = OptionParser() parser.add_option("-s", "--species", action="store", type="string", dest="species", help="species, mm8, hg18, etc", metavar="<str>") parser.add_option("-a", "--rawreadfileA", action="store", type="string", dest="readfileA", metavar="<file>", help="raw read file A in bed format") parser.add_option("-b", "--rawreadfileB", action="store", type="string", dest="readfileB", metavar="<file>", help="raw read file B in bed format") parser.add_option("-f", "--fragment_size", action="store", type="int", dest="fragment_size", metavar="<int>", help="average size of a fragment after A experiment") parser.add_option("-d", "--islandfile", action="store", type="string", dest="islandfile", metavar="<file>", help="island file in BED format") parser.add_option("-o", "--outfile", action="store", type="string", dest="out_file", metavar="<file>", help="island read count summary file") (opt, args) = parser.parse_args(argv) if len(argv) < 12: parser.print_help() sys.exit(1) if opt.species in GenomeData.species_chroms.keys(): chroms = GenomeData.species_chroms[opt.species] else: print "This species is not recognized, exiting" sys.exit(1) if not Utility.fileExists(opt.readfileA): print opt.readfileA, " not found" sys.exit(1) if not Utility.fileExists(opt.readfileB): print opt.readfileB, " not found" sys.exit(1) A_library_size = get_total_tag_counts.get_total_tag_counts(opt.readfileA) B_library_size = get_total_tag_counts.get_total_tag_counts(opt.readfileB) print "Library size of ", opt.readfileA, ": ", A_library_size print "Library size of ", opt.readfileB, ": ", B_library_size totalA = 0 totalB = 0 islands = BED.BED(opt.species, opt.islandfile, "BED3", 0) # separate by chrom the A library SeparateByChrom.separateByChrom(chroms, opt.readfileA, '.bed1') # separate by chrom the B library SeparateByChrom.separateByChrom(chroms, opt.readfileB, '.bed2') island_A_readcount = {} island_B_readcount = {} #Find read counts on the islands for chrom in chroms: if chrom in islands.keys(): if len(islands[chrom]) != 0: island_list = islands[chrom] if Utility.is_bed_sorted(island_list) == 0: island_list.sort(key=operator.attrgetter('start')) island_start_list = [] island_end_list = [] for item in island_list: island_start_list.append(item.start) island_end_list.append(item.end) island_A_readcount_list = [0] * len(island_list) read_file = chrom + ".bed1" f = open(read_file, 'r') for line in f: if not re.match("#", line): line = line.strip() sline = line.split() position = associate_tags_with_regions.tag_position( sline, opt.fragment_size) index = associate_tags_with_regions.find_readcount_on_islands( island_start_list, island_end_list, position) if index >= 0: island_A_readcount_list[index] += 1 totalA += 1 f.close() island_A_readcount[chrom] = island_A_readcount_list island_B_readcount_list = [0] * len(island_list) read_file = chrom + ".bed2" f = open(read_file, 'r') for line in f: if not re.match("#", line): line = line.strip() sline = line.split() position = associate_tags_with_regions.tag_position( sline, opt.fragment_size) index = associate_tags_with_regions.find_readcount_on_islands( island_start_list, island_end_list, position) if index >= 0: island_B_readcount_list[index] += 1 totalB += 1 f.close() island_B_readcount[chrom] = island_B_readcount_list #A_background_read = A_library_size - totalA; #B_background_read = B_library_size - totalB; print "Total number of A reads on islands is: ", totalA print "Total number of B reads on islands is: ", totalB # Calculate the p value. library_scaling_factor = A_library_size * 1.0 / B_library_size #A vs B pseudo_count = 1 pvalue_A_vs_B_list = [] pvalue_B_vs_A_list = [] for chrom in chroms: if chrom in islands.keys(): if len(islands[chrom]) != 0: island_list = islands[chrom] for index in xrange(len(island_list)): item = island_list[index] Acount = (island_A_readcount[chrom])[index] Bcount = (island_B_readcount[chrom])[index] pvalue_A_vs_B = pvaule(Acount, Bcount, library_scaling_factor, pseudo_count) pvalue_A_vs_B_list.append(pvalue_A_vs_B) pvalue_B_vs_A = pvaule(Bcount, Acount, 1 / library_scaling_factor, pseudo_count) pvalue_B_vs_A_list.append(pvalue_B_vs_A) #Calculate the FDR fdr_A_vs_B_list = fdr(pvalue_A_vs_B_list) fdr_B_vs_A_list = fdr(pvalue_B_vs_A_list) #Output the islands read counts, normalized read counts, fc, pvalue both ways scaling_factor = 1000000 out = open(opt.out_file, 'w') outline = '#chrom' + "\t" + 'start' + "\t" + 'end' + "\t" + "Readcount_A" + "\t" + 'Normalized_Readcount_A' + "\t" + 'ReadcountB' + "\t" + 'Normalized_Readcount_B' + "\t" + "Fc_A_vs_B" + "\t" + "pvalue_A_vs_B" + "\t" + "FDR_A_vs_B" + "\t" + "Fc_B_vs_A" + "\t" + "pvalue_B_vs_A" + "\t" + "FDR_B_vs_A" + "\n" out.write(outline) ii = 0 for chrom in chroms: if chrom in islands.keys(): if len(islands[chrom]) != 0: island_list = islands[chrom] for index in xrange(len(island_list)): item = island_list[index] Acount = (island_A_readcount[chrom])[index] Bcount = (island_B_readcount[chrom])[index] normalized_A = Acount / float( A_library_size) * scaling_factor normalized_B = Bcount / float( B_library_size) * scaling_factor fc_A_vs_B = ( (Acount + pseudo_count) * 1.0 / (Bcount + pseudo_count)) / library_scaling_factor fc_B_vs_A = ( (Bcount + pseudo_count) * 1.0 / (Acount + pseudo_count)) * library_scaling_factor print("Acount", Acount, "Bcount", Bcount, "pseudo_count", pseudo_count, "library_scaling_factor", library_scaling_factor, "fc_A_vs_B", fc_A_vs_B, "fc_B_vs_A", fc_B_vs_A) outline = item.chrom + "\t" + str(item.start) + "\t" + str( item.end) + "\t" + str(Acount) + "\t" + str( normalized_A) + "\t" + str(Bcount) + "\t" + str( normalized_B ) + "\t" + str(fc_A_vs_B) + "\t" + str( pvalue_A_vs_B_list[ii]) + "\t" + str( fdr_A_vs_B_list[ii] ) + "\t" + str(fc_B_vs_A) + "\t" + str( pvalue_B_vs_A_list[ii]) + "\t" + str( fdr_B_vs_A_list[ii]) + "\n" out.write(outline) ii += 1 out.close() SeparateByChrom.cleanup(chroms, '.bed1') SeparateByChrom.cleanup(chroms, '.bed2') # Calculate the correlations using normalized read counts A_array = () B_array = () for chrom in chroms: if chrom in islands.keys(): if len(islands[chrom]) != 0: temp_array = scipy.array(island_A_readcount[chrom]) A_array = scipy.concatenate((temp_array, A_array)) temp_array = scipy.array(island_B_readcount[chrom]) B_array = scipy.concatenate((temp_array, B_array)) #Normalization to reads per million A_array = A_array / float(A_library_size) * scaling_factor B_array = B_array / float(B_library_size) * scaling_factor pearson = scipy.stats.pearsonr(A_array, B_array) print "Pearson's correlation is: ", pearson[0], " with p-value ", pearson[ 1] spearman = scipy.stats.spearmanr(A_array, B_array) print "Spearman's correlation is: ", spearman[ 0], " with p-value ", spearman[1]
def main(argv): parser = OptionParser() parser.add_option("-s", "--species", action="store", type="string", dest="species", help="species, mm8, hg18", metavar="<str>") parser.add_option("-a", "--rawchipreadfile", action="store", type="string", dest="chipreadfile", metavar="<file>", help="raw read file from chip in bed format") parser.add_option("-b", "--rawcontrolreadfile", action="store", type="string", dest="controlreadfile", metavar="<file>", help="raw read file from control in bed format") parser.add_option("-f", "--fragment_size", action="store", type="int", dest="fragment_size", metavar="<int>", help="average size of a fragment after CHIP experiment") parser.add_option("-d", "--islandfile", action="store", type="string", dest="islandfile", metavar="<file>", help="island file in BED format") parser.add_option("-o", "--outfile", action="store", type="string", dest="out_file", metavar="<file>", help="island read count summary file") parser.add_option("-t", "--mappable_fraction_of_genome_size ", action="store", type="float", dest="fraction", help="mapable fraction of genome size", metavar="<float>") (opt, args) = parser.parse_args(argv) if len(argv) < 14: parser.print_help() sys.exit(1) if opt.species in GenomeData.species_chroms.keys(): chroms = GenomeData.species_chroms[opt.species] genomesize = sum( GenomeData.species_chrom_lengths[opt.species].values()) genomesize = opt.fraction * genomesize else: print "This species is not recognized, exiting" sys.exit(1) chip_library_size = get_total_tag_counts.get_total_tag_counts( opt.chipreadfile) control_library_size = get_total_tag_counts.get_total_tag_counts( opt.controlreadfile) print "chip library size ", chip_library_size print "control library size ", control_library_size totalchip = 0 totalcontrol = 0 islands = BED.BED(opt.species, opt.islandfile, "BED3", 0) # separate by chrom the chip library if Utility.fileExists(opt.chipreadfile): SeparateByChrom.separateByChrom(chroms, opt.chipreadfile, '.bed1') else: print opt.chipreadfile, " not found" sys.exit(1) # separate by chrom the control library if Utility.fileExists(opt.controlreadfile): SeparateByChrom.separateByChrom(chroms, opt.controlreadfile, '.bed2') else: print opt.controlreadfile, " not found" sys.exit(1) island_chip_readcount = {} island_control_readcount = {} for chrom in chroms: if chrom in islands.keys(): if len(islands[chrom]) != 0: island_list = islands[chrom] if Utility.is_bed_sorted(island_list) == 0: island_list.sort(key=operator.attrgetter('start')) island_start_list = [] island_end_list = [] for item in island_list: island_start_list.append(item.start) island_end_list.append(item.end) island_chip_readcount_list = [0] * len(island_list) read_file = chrom + ".bed1" f = open(read_file, 'r') for line in f: if not re.match("#", line): line = line.strip() sline = line.split() position = associate_tags_with_regions.tag_position( sline, opt.fragment_size) index = associate_tags_with_regions.find_readcount_on_islands( island_start_list, island_end_list, position) if index >= 0: island_chip_readcount_list[index] += 1 totalchip += 1 f.close() island_chip_readcount[chrom] = island_chip_readcount_list island_control_readcount_list = [0] * len(island_list) read_file = chrom + ".bed2" f = open(read_file, 'r') for line in f: if not re.match("#", line): line = line.strip() sline = line.split() position = associate_tags_with_regions.tag_position( sline, opt.fragment_size) index = associate_tags_with_regions.find_readcount_on_islands( island_start_list, island_end_list, position) if index >= 0: island_control_readcount_list[index] += 1 totalcontrol += 1 f.close() island_control_readcount[chrom] = island_control_readcount_list chip_background_read = chip_library_size - totalchip control_background_read = control_library_size - totalcontrol #scaling_factor = chip_background_read*1.0/control_background_read; scaling_factor = chip_library_size * 1.0 / control_library_size print "Total number of chip reads on islands is: ", totalchip print "Total number of control reads on islands is: ", totalcontrol #print "chip_background_read ", chip_background_read #print "control_background_read ", control_background_read out = open(opt.out_file, 'w') pvalue_list = [] result_list = [] for chrom in chroms: if chrom in islands.keys(): if len(islands[chrom]) != 0: island_list = islands[chrom] for index in xrange(len(island_list)): item = island_list[index] observation = (island_chip_readcount[chrom])[index] control_tag = (island_control_readcount[chrom])[index] if (island_control_readcount[chrom])[index] > 0: #average = (island_control_readcount[chrom])[index] * scaling_factor; average = control_tag * scaling_factor fc = float(observation) / float(average) else: length = item.end - item.start + 1 average = length * control_library_size * 1.0 / genomesize average = min(0.25, average) * scaling_factor fc = float(observation) / float(average) if observation > average: pvalue = scipy.stats.poisson.sf( (island_chip_readcount[chrom])[index], average)[()] else: pvalue = 1 pvalue_list.append(pvalue) item_dic = {} item_dic['chrom'] = item.chrom item_dic['start'] = item.start item_dic['end'] = item.end item_dic['chip'] = observation item_dic['control'] = control_tag item_dic['pvalue'] = pvalue item_dic['fc'] = fc result_list.append(item_dic) pvaluearray = scipy.array(pvalue_list) pvaluerankarray = scipy.stats.rankdata(pvaluearray) totalnumber = len(result_list) for i in range(totalnumber): item = result_list[i] alpha = pvalue_list[i] * totalnumber / pvaluerankarray[i] if alpha > 1: alpha = 1 outline = item['chrom'] + "\t" + str(item['start']) + "\t" + str( item['end']) + "\t" + str(item['chip']) + "\t" + str( item['control']) + "\t" + str(item['pvalue']) + "\t" + str( item['fc']) + "\t" + str(alpha) + "\n" out.write(outline) #pvalue_list.sort() #for item in result_list: #pvalue = float(item['pvalue']) #alpha = pvalue * len(result_list) / (pvalue_list.index(pvalue) + 1) #if alpha > 1: #alpha = 1; #outline = item['chrom'] + "\t" + str(item['start']) + "\t" + str(item['end']) + "\t" + str(item['chip']) + "\t" + str(item['control']) + "\t" + str(item['pvalue']) + "\t" + str(item['fc']) + "\t" + str(alpha) + "\n"; #out.write(outline); out.close() SeparateByChrom.cleanup(chroms, '.bed1') SeparateByChrom.cleanup(chroms, '.bed2')
def main(argv): parser = OptionParser() parser.add_option("-s", "--species", action="store", type="string", dest="species", help="species, mm8, hg18", metavar="<str>") parser.add_option("-a", "--rawchipreadfile", action="store", type="string", dest="chipreadfile", metavar="<file>", help="raw read file from chip in bed format") parser.add_option("-b", "--rawcontrolreadfile", action="store", type="string", dest="controlreadfile", metavar="<file>", help="raw read file from control in BAM format") parser.add_option("-f", "--fragment_size", action="store", type="int", dest="fragment_size", metavar="<int>", help="average size of a fragment after CHIP experiment") parser.add_option("-d", "--islandfile", action="store", type="string", dest="islandfile", metavar="<file>", help="island file in BED format") parser.add_option("-o", "--outfile", action="store", type="string", dest="out_file", metavar="<file>", help="island read count summary file") parser.add_option("-t", "--mappable_fraction_of_genome_size ", action="store", type="float", dest="fraction", help="mapable fraction of genome size", metavar="<float>") (opt, args) = parser.parse_args(argv) if len(argv) < 14: parser.print_help() sys.exit(1) if opt.species in GenomeData.species_chroms.keys(): chroms = GenomeData.species_chroms[opt.species]; genomesize = sum (GenomeData.species_chrom_lengths[opt.species].values()); genomesize = opt.fraction * genomesize; else: print "This species is not recognized, exiting"; sys.exit(1); chip_library_size=get_total_tag_counts.get_total_tag_counts_bam(opt.chipreadfile); control_library_size=get_total_tag_counts.get_total_tag_counts_bam(opt.controlreadfile); print "chip library size ", chip_library_size print "control library size ", control_library_size totalchip = 0; totalcontrol = 0; islands = BED.BED(opt.species, opt.islandfile, "BED3", 0); # separate by chrom the chip library if Utility.fileExists(opt.chipreadfile): SeparateByChrom.separateByChromBamToBed(chroms, opt.chipreadfile, '.bed1'); else: print opt.chipreadfile, " not found"; sys.exit(1) # separate by chrom the control library if Utility.fileExists(opt.controlreadfile): SeparateByChrom.separateByChromBamToBed(chroms, opt.controlreadfile, '.bed2'); else: print opt.controlreadfile, " not found"; sys.exit(1) island_chip_readcount = {}; island_control_readcount = {}; for chrom in chroms: if chrom in islands.keys(): if len(islands[chrom]) != 0: island_list = islands[chrom]; if Utility.is_bed_sorted(island_list) == 0: island_list.sort(key=operator.attrgetter('start')); island_start_list = [] island_end_list = [] for item in island_list: island_start_list.append(item.start) island_end_list.append(item.end) island_chip_readcount_list=[0]*len(island_list); read_file = chrom + ".bed1"; f = open(read_file,'r') for line in f: if not re.match("#", line): line = line.strip() sline = line.split() position = associate_tags_with_regions.tag_position(sline, opt.fragment_size) index =associate_tags_with_regions.find_readcount_on_islands(island_start_list, island_end_list, position); if index >= 0: island_chip_readcount_list[index] += 1; totalchip += 1; f.close(); island_chip_readcount[chrom] = island_chip_readcount_list; island_control_readcount_list=[0]*len(island_list); read_file = chrom + ".bed2"; f = open(read_file,'r') for line in f: if not re.match("#", line): line = line.strip() sline = line.split() position = associate_tags_with_regions.tag_position(sline, opt.fragment_size) index = associate_tags_with_regions.find_readcount_on_islands(island_start_list, island_end_list, position); if index >= 0: island_control_readcount_list[index] += 1; totalcontrol += 1; f.close(); island_control_readcount[chrom] = island_control_readcount_list; chip_background_read = chip_library_size - totalchip; control_background_read = control_library_size - totalcontrol; #scaling_factor = chip_background_read*1.0/control_background_read; scaling_factor = chip_library_size*1.0/control_library_size; print "Total number of chip reads on islands is: ", totalchip; print "Total number of control reads on islands is: ", totalcontrol; #print "chip_background_read ", chip_background_read #print "control_background_read ", control_background_read out = open(opt.out_file, 'w'); pvalue_list = []; result_list = []; for chrom in chroms: if chrom in islands.keys(): if len(islands[chrom]) != 0: island_list = islands[chrom]; for index in xrange(len(island_list)): item = island_list[index]; observation = (island_chip_readcount[chrom])[index]; control_tag = (island_control_readcount[chrom])[index]; if (island_control_readcount[chrom])[index] > 0: #average = (island_control_readcount[chrom])[index] * scaling_factor; average = control_tag * scaling_factor fc = float(observation)/float(average); else: length = item.end - item.start + 1; average = length * control_library_size *1.0/genomesize; average = min(0.25, average)* scaling_factor; fc = float(observation)/float(average); if observation > average: pvalue = scipy.stats.poisson.sf((island_chip_readcount[chrom])[index], average)[()]; else: pvalue = 1; pvalue_list.append(pvalue); item_dic = {} item_dic['chrom'] = item.chrom item_dic['start'] = item.start item_dic['end'] = item.end item_dic['chip'] = observation item_dic['control'] = control_tag item_dic['pvalue'] = pvalue item_dic['fc'] = fc result_list.append(item_dic) pvaluearray=scipy.array(pvalue_list); pvaluerankarray=scipy.stats.rankdata(pvaluearray); totalnumber = len(result_list); for i in range(totalnumber): item = result_list[i]; alpha = pvalue_list[i] * totalnumber/pvaluerankarray[i]; if alpha > 1: alpha = 1; outline = item['chrom'] + "\t" + str(item['start']) + "\t" + str(item['end']) + "\t" + str(item['chip']) + "\t" + str(item['control']) + "\t" + str(item['pvalue']) + "\t" + str(item['fc']) + "\t" + str(alpha) + "\n"; out.write(outline); #pvalue_list.sort() #for item in result_list: #pvalue = float(item['pvalue']) #alpha = pvalue * len(result_list) / (pvalue_list.index(pvalue) + 1) #if alpha > 1: #alpha = 1; #outline = item['chrom'] + "\t" + str(item['start']) + "\t" + str(item['end']) + "\t" + str(item['chip']) + "\t" + str(item['control']) + "\t" + str(item['pvalue']) + "\t" + str(item['fc']) + "\t" + str(alpha) + "\n"; #out.write(outline); out.close(); SeparateByChrom.cleanup(chroms, '.bed1'); SeparateByChrom.cleanup(chroms, '.bed2');