def make_window_regions(configuration): print("{0} Creating window regions...".format(INFO)) print("{0} Processing type is: {1}".format(INFO, configuration['processing']['type'])) if configuration['processing']['type'] == 'multi': from parallel import par_make_window_regions return par_make_window_regions(configuration=configuration) windowsize = configuration["window_size"] chromosome = configuration["chromosome"] print("{0} Window size: {1}".format(INFO, windowsize)) print("{0} Chromosome: {1}".format(INFO, chromosome)) regions = configuration["regions"] print("{0} Regions used {1}".format(INFO, regions)) regions_list = [(start, end) for start, end in zip(regions["start"], regions["end"])] regions_created = [] counter = 0 for r in regions_list: start_idx = r[0] end_idx = r[1] print("{0} Start index: {1}".format(INFO, start_idx)) sys.stdout.flush() print("{0} End index: {1}".format(INFO, end_idx)) sys.stdout.flush() region = Region(idx=counter, start=start_idx, end=end_idx, window_size=windowsize) kwargs = {"sam_read_config": configuration["sam_read_config"]} if "debug" in configuration: kwargs["debug"] = configuration["debug"] print("{0} Creating WGA Windows...".format(INFO)) sys.stdout.flush() region.make_wga_windows(chromosome=chromosome, ref_filename=configuration["reference_file"]["filename"], bam_filename=configuration["wga_file"]["filename"], **kwargs) if region.get_n_windows(type_=WindowType.WGA) == 0: raise Error("WGA windows have not been created") else: print("{0} Number of WGA " "windows: {1}".format(INFO, region.get_n_windows(type_=WindowType.WGA))) sys.stdout.flush() print("{0} Creating No WGA Windows...".format(INFO)) sys.stdout.flush() region.make_no_wga_windows(chromosome=chromosome, ref_filename=configuration["reference_file"]["filename"], bam_filename=configuration["no_wga_file"]["filename"], **kwargs) if region.get_n_windows(type_=WindowType.NO_WGA) == 0: raise Error("Non-WGA windows have not been created") else: print("{0} Number of Non WGA" " windows: {1}".format(INFO, region.get_n_windows(type_=WindowType.NO_WGA))) sys.stdout.flush() regions_created.append(region) counter += 1 return regions_created
def make_window_regions(configuration): print("{0} Creating window regions...".format(INFO)) windowsize = configuration["window_size"] chromosome = configuration["chromosome"] print("{0} Window size: {1}".format(INFO, windowsize)) print("{0} Chromosome: {1}".format(INFO, chromosome)) regions = configuration["regions"] print("{0} Regions used {1}".format(INFO, regions)) regions_list = [(start, end) for start, end in zip(regions["start"], regions["end"])] regions_created = [] counter = 0 for r in regions_list: start_idx = r[0] end_idx = r[1] print("{0} Start index: {1}".format(INFO, start_idx)) print("{0} End index: {1}".format(INFO, end_idx)) region = Region(idx=counter, start=start_idx, end=end_idx, window_size=windowsize) kwargs = {} if "quality_theshold" in configuration: kwargs["quality_theshold"] = configuration["quality_theshold"] if "debug" in configuration: kwargs["debug"] = configuration["debug"] print("{0} Creating WGA Windows...".format(INFO)) region.make_wga_windows( chromosome=chromosome, ref_filename=configuration["reference_file"]["filename"], test_filename=configuration["test_file"]["filename"], **kwargs) if region.get_n_windows(type_=WindowType.WGA) == 0: raise Error("WGA windows have not been created") else: print("{0} Number of WGA windows: {1}".format( INFO, region.get_n_windows(type_=WindowType.WGA))) print("{0} Creating No WGA Windows...".format(INFO)) region.make_no_wga_windows( chromosome=chromosome, ref_filename=configuration["reference_file"]["filename"], test_filename=configuration["no_wga_file"]["filename"], **kwargs) if region.get_n_windows(type_=WindowType.NO_WGA) == 0: raise Error("Non-WGA windows have not been created") else: print("{0} Number of non-wga" " windows: {1}".format( INFO, region.get_n_windows(type_=WindowType.NO_WGA))) if "check_windowing_sanity" in configuration and \ configuration["check_windowing_sanity"]: region.check_windows_sanity() # compute the mixed windows for the region region.get_mixed_windows() # filter the windows for N's if "remove_windows_with_N" in configuration and\ configuration["remove_windows_with_N"]: print("{0} Filtering windows for Ns...".format(INFO)) region.remove_windows_with_ns() print("{0} Number of wga windows" " after filtering: {1}".format( INFO, region.get_n_windows(type_=WindowType.WGA))) print("{0} Number of non-wga windows" " after filtering: {1}".format( INFO, region.get_n_windows(type_=WindowType.NO_WGA))) print("{0} Done...".format(INFO)) elif "mark_N_windows" in configuration and\ configuration["mark_N_windows"]: print("{0} Marking N " " windows with: {1}".format( INFO, configuration["mark_for_N_windows"])) counter_ns = \ region.mark_windows_with_ns(n_mark=configuration["mark_for_N_windows"]) print("{0} Marked as N {1} Windows".format(INFO, counter_ns)) else: print("{0} No filtering windows" " for Ns requested...".format(INFO)) print("{0} Number of mixed " "windows: {1}".format(INFO, region.get_n_mixed_windows())) print("{0} Number of N windows: {1}".format(INFO, region.count_n_windows())) if "outlier_remove" in configuration and\ configuration["outlier_remove"]: region.remove_outliers(configuration=configuration) print("{0} Number of windows " "after outlier removal: {1}".format( INFO, region.get_n_mixed_windows())) print("{0} Number of N windows " "after outlier removal {1}".format(INFO, region.count_n_windows())) else: print("{0} No outlier " "removal performed".format(INFO)) # save the region statistics region.save_mixed_windows_statistic(statistic="mean") regions_created.append(region) counter += 1 return regions_created