def broadpeaks_wo_control(bam_path, WINDOW_SIZE, GAP, EFFECTIVE_PROPORTION, ISLAND_SCORE_THRESHOLD, p0): chromosomes_info = pre_counting.get_chromosomes_info(bam_path) logging.info("\nStep 1 of 4\nCOUNTING UNIQUE READS\n") input_unique_reads_count = pre_counting.count_unique_reads(bam_path, chromosomes_info) # Effective genome length (L) effective_length = pre_counting.count_effective_length(EFFECTIVE_PROPORTION, chromosomes_info) # Lambda for poisson distribution lambdaa = pre_counting.count_lambda(input_unique_reads_count, WINDOW_SIZE, effective_length) # Minimum #reads in a window for eligibility # Formula (1), finding l0 l0 = scipy.stats.poisson.ppf(1 - p0, lambdaa) NORMALIZATION_CONSTANT = 1 logging.info("\nWindow read threshold is {} reads, \ni.e. {} is minimum number of reads in window " "to consider this window `eligible` with Poisson distribution p-value {}".format(l0, l0, p0)) logging.info("\nStep 2 of 4\nMAKING WINDOW LIST\n") (window_list, window_list_dict) = islands.make_windows_list(bam_path, chromosomes_info, l0, WINDOW_SIZE, GAP, input_unique_reads_count, NORMALIZATION_CONSTANT) logging.info("\nStep 3 of 4\nMAKING ISLAND LIST\n") island_list = islands.make_islands_list(window_list, lambdaa, WINDOW_SIZE, l0, chromosomes_info, ISLAND_SCORE_THRESHOLD) return island_list
p0 = 0.1 chromosomes_info = pre_counting.get_chromosomes_info(bam_path) control_chromosomes_info = pre_counting.get_chromosomes_info(control_path) window_size = 200 gap = 1 p0 = 0.1 effective_length = 2383684366.91 control_unique_reads_count = 3758349 input_lambda = 0.53 control_lambda = 0.32 input_l0 = scipy.stats.poisson.ppf(1 - p0, input_lambda) control_l0 = scipy.stats.poisson.ppf(1 - p0, control_lambda) input_unique_reads_count = 6300518 NORMALIZATION_CONSTANT = 1 (window_list_input, window_list_input_dict) = islands.make_windows_list(bam_path, chromosomes_info, input_l0, window_size, gap, input_unique_reads_count, NORMALIZATION_CONSTANT) (window_list_control, window_list_control_dict) = islands.make_windows_list(control_path, chromosomes_info, control_l0, window_size, gap, control_unique_reads_count, NORMALIZATION_CONSTANT) island_list_input = islands.make_islands_list(window_list_input, input_lambda, window_size, input_l0, chromosomes_info, ISLAND_SCORE_THRESHOLD) island_list_control = islands.make_islands_list(window_list_control, control_lambda, window_size, control_l0, control_chromosomes_info, ISLAND_SCORE_THRESHOLD) NORMALIZATION_CONSTANT = float(control_unique_reads_count) / input_unique_reads_count
def broadpeaks_with_control(bam_path, control_path, window_size, gap, EFFECTIVE_PROPORTION, ISLAND_SCORE_THRESHOLD, p0): chromosomes_info = pre_counting.get_chromosomes_info(bam_path) control_chromosomes_info = pre_counting.get_chromosomes_info(control_path) logging.info("\nStep 1 of 4\nCOUNTING UNIQUE READS\n") logging.info("\nFor input file\n") # input_unique_reads_count = 6300518 input_unique_reads_count = pre_counting.count_unique_reads(bam_path, chromosomes_info) logging.info("\nFor control file\n") # control_unique_reads_count = 3758349 control_unique_reads_count = pre_counting.count_unique_reads(control_path, control_chromosomes_info) # Effective genome length (L) # effective_length = 2383684366.91 effective_length = pre_counting.count_effective_length(EFFECTIVE_PROPORTION, chromosomes_info) # Lambda for poisson distribution # input_lambda = 0.53 input_lambda = pre_counting.count_lambda(input_unique_reads_count, window_size, effective_length) # control_lambda = 0.32 control_lambda = pre_counting.count_lambda(control_unique_reads_count, window_size, effective_length) # Minimum #reads in a window for eligibility # Formula (1), finding l0 input_l0 = scipy.stats.poisson.ppf(1 - p0, input_lambda) control_l0 = scipy.stats.poisson.ppf(1 - p0, control_lambda) logging.info("\nWindow read threshold is {} reads, \ni.e. {} is minimum number of reads in window " "to consider this window `eligible` with Poisson distribution p-value {}".format(input_l0, input_l0, p0)) logging.info("\nStep 2 of 4\nMAKING WINDOW LIST\n") # for two libraries are independent, we do not scale them here NORMALIZATION_CONSTANT = 1 logging.info("\nFor input file\n") (window_list_input, window_list_input_dict) = islands.make_windows_list(bam_path, chromosomes_info, input_l0, window_size, gap, input_unique_reads_count, NORMALIZATION_CONSTANT) logging.info("\nFor control file\n") (window_list_control, window_list_control_dict) = islands.make_windows_list(control_path, chromosomes_info, control_l0, window_size, gap, control_unique_reads_count, NORMALIZATION_CONSTANT) #window_list = islands.modify_window_list_based_on_control(control_path, chromosomes_info, l0, window_size, gap, input_unique_reads_count, control_unique_reads_count, window_list_temp) logging.info("\nStep 3 of 4\nMAKING ISLAND LIST\n") logging.info("\nFor input file\n") island_list_input = islands.make_islands_list(window_list_input, input_lambda, window_size, input_l0, chromosomes_info, ISLAND_SCORE_THRESHOLD) logging.info("\nFor control file\n") island_list_control = islands.make_islands_list(window_list_control, control_lambda, window_size, control_l0, control_chromosomes_info, ISLAND_SCORE_THRESHOLD) """ # with switching tracks calculate_fdr(island_list_input, window_list_control) calculate_fdr(island_list_control, window_list_input) island_list = islands.find_unintersected_islands(island_list_input,island_list_control) # calculate FDR FDR = (len(island_list_control) - (len(island_list_input)-len(island_list)))/len(island_list) logging.info("\nFDR is {} reads, \n".format(FDR)) """ # appending FDR to island_list_input # normalization to smaller dataset if input_unique_reads_count >= control_unique_reads_count: NORMALIZATION_CONSTANT = float(control_unique_reads_count) / input_unique_reads_count FDR_calculation.calculate_and_append_score_for_fdr(island_list_input, window_list_control_dict, input_lambda, window_size, NORMALIZATION_CONSTANT, 1) FDR_calculation.calculate_and_append_score_for_fdr(island_list_control, window_list_input_dict, control_lambda, window_size, 1, NORMALIZATION_CONSTANT) else: NORMALIZATION_CONSTANT = float(input_unique_reads_count) / control_unique_reads_count FDR_calculation.calculate_and_append_score_for_fdr(island_list_input, window_list_control_dict, input_lambda, window_size, 1, NORMALIZATION_CONSTANT) FDR_calculation.calculate_and_append_score_for_fdr(island_list_control, window_list_input_dict, control_lambda, window_size, NORMALIZATION_CONSTANT, 1) FDR_calculation.calculate_and_append_fdr(island_list_input, island_list_control, 7) FDR_calculation.calculate_and_append_fdr(island_list_input, island_list_control, 8) return island_list_input