def slice_pooled_peaks(self, threshold, pooled_threshold, rep_files, pseudorep_files, pooled_files, pooled_peaks, output_dir, ranking_measure='tag-count'): idrutil = IdrUtilities() # Determine how many peaks we want to keep. keep_count = idrutil.get_peaks_within_threshold(threshold, rep_files) idrutil.get_peaks_within_threshold(threshold, pseudorep_files) pooled_count = idrutil.get_peaks_within_threshold( pooled_threshold, pooled_files) # Pooled count should be within 2-fold of keep_count if abs(math.log(keep_count / pooled_count, 2)) > 1: print('!! Warning: The number of peaks within the replicate ' + 'threshold is not within two-fold of the number of ' + 'peaks within the pooled threshold. This could indicate ' + 'inconsistencies in the datasets.\n' + 'Replicate count: {}, Pooled count: {}'.format( keep_count, pooled_count)) # Slice our pooled peak file accordingly. output_file = idrutil.slice_peaks(pooled_peaks, keep_count, ranking_measure, output_dir) print('{} peaks output to {}'.format(keep_count, output_file))
def slice_pooled_peaks( self, threshold, pooled_threshold, rep_files, pseudorep_files, pooled_files, pooled_peaks, output_dir, ranking_measure="tag-count", ): idrutil = IdrUtilities() # Determine how many peaks we want to keep. keep_count = idrutil.get_peaks_within_threshold(threshold, rep_files) idrutil.get_peaks_within_threshold(threshold, pseudorep_files) pooled_count = idrutil.get_peaks_within_threshold(pooled_threshold, pooled_files) # Pooled count should be within 2-fold of keep_count if abs(math.log(keep_count / pooled_count, 2)) > 1: print( "!! Warning: The number of peaks within the replicate " + "threshold is not within two-fold of the number of " + "peaks within the pooled threshold. This could indicate " + "inconsistencies in the datasets.\n" + "Replicate count: {}, Pooled count: {}".format(keep_count, pooled_count) ) # Slice our pooled peak file accordingly. output_file = idrutil.slice_peaks(pooled_peaks, keep_count, ranking_measure, output_dir) print("{} peaks output to {}".format(keep_count, output_file))