def make_window_regions(configuration): print("{0} Creating window regions...".format(INFO)) windowsize = configuration["window_size"] chromosome = configuration["chromosome"] print("{0} Window size: {1}".format(INFO, windowsize)) print("{0} Chromosome: {1}".format(INFO, chromosome)) regions = configuration["regions"] print("{0} Regions used {1}".format(INFO, regions)) regions_list = [(start, end) for start, end in zip(regions["start"], regions["end"])] regions_created = [] counter = 0 for r in regions_list: start_idx = r[0] end_idx = r[1] print("{0} Start index: {1}".format(INFO, start_idx)) print("{0} End index: {1}".format(INFO, end_idx)) region = Region(idx=counter, start=start_idx, end=end_idx, window_size=windowsize) kwargs = {} if "quality_theshold" in configuration: kwargs["quality_theshold"] = configuration["quality_theshold"] if "debug" in configuration: kwargs["debug"] = configuration["debug"] print("{0} Creating WGA Windows...".format(INFO)) region.make_wga_windows( chromosome=chromosome, ref_filename=configuration["reference_file"]["filename"], test_filename=configuration["test_file"]["filename"], **kwargs) if region.get_n_windows(type_=WindowType.WGA) == 0: raise Error("WGA windows have not been created") else: print("{0} Number of WGA windows: {1}".format( INFO, region.get_n_windows(type_=WindowType.WGA))) print("{0} Creating No WGA Windows...".format(INFO)) region.make_no_wga_windows( chromosome=chromosome, ref_filename=configuration["reference_file"]["filename"], test_filename=configuration["no_wga_file"]["filename"], **kwargs) if region.get_n_windows(type_=WindowType.NO_WGA) == 0: raise Error("Non-WGA windows have not been created") else: print("{0} Number of non-wga" " windows: {1}".format( INFO, region.get_n_windows(type_=WindowType.NO_WGA))) if "check_windowing_sanity" in configuration and \ configuration["check_windowing_sanity"]: region.check_windows_sanity() # compute the mixed windows for the region region.get_mixed_windows() # filter the windows for N's if "remove_windows_with_N" in configuration and\ configuration["remove_windows_with_N"]: print("{0} Filtering windows for Ns...".format(INFO)) region.remove_windows_with_ns() print("{0} Number of wga windows" " after filtering: {1}".format( INFO, region.get_n_windows(type_=WindowType.WGA))) print("{0} Number of non-wga windows" " after filtering: {1}".format( INFO, region.get_n_windows(type_=WindowType.NO_WGA))) print("{0} Done...".format(INFO)) elif "mark_N_windows" in configuration and\ configuration["mark_N_windows"]: print("{0} Marking N " " windows with: {1}".format( INFO, configuration["mark_for_N_windows"])) counter_ns = \ region.mark_windows_with_ns(n_mark=configuration["mark_for_N_windows"]) print("{0} Marked as N {1} Windows".format(INFO, counter_ns)) else: print("{0} No filtering windows" " for Ns requested...".format(INFO)) print("{0} Number of mixed " "windows: {1}".format(INFO, region.get_n_mixed_windows())) print("{0} Number of N windows: {1}".format(INFO, region.count_n_windows())) if "outlier_remove" in configuration and\ configuration["outlier_remove"]: region.remove_outliers(configuration=configuration) print("{0} Number of windows " "after outlier removal: {1}".format( INFO, region.get_n_mixed_windows())) print("{0} Number of N windows " "after outlier removal {1}".format(INFO, region.count_n_windows())) else: print("{0} No outlier " "removal performed".format(INFO)) # save the region statistics region.save_mixed_windows_statistic(statistic="mean") regions_created.append(region) counter += 1 return regions_created
def make_window_regions(configuration): print("{0} Creating window regions...".format(INFO)) print("{0} Processing type is: {1}".format(INFO, configuration['processing']['type'])) if configuration['processing']['type'] == 'multi': from parallel import par_make_window_regions return par_make_window_regions(configuration=configuration) windowsize = configuration["window_size"] chromosome = configuration["chromosome"] print("{0} Window size: {1}".format(INFO, windowsize)) print("{0} Chromosome: {1}".format(INFO, chromosome)) regions = configuration["regions"] print("{0} Regions used {1}".format(INFO, regions)) regions_list = [(start, end) for start, end in zip(regions["start"], regions["end"])] regions_created = [] counter = 0 for r in regions_list: start_idx = r[0] end_idx = r[1] print("{0} Start index: {1}".format(INFO, start_idx)) sys.stdout.flush() print("{0} End index: {1}".format(INFO, end_idx)) sys.stdout.flush() region = Region(idx=counter, start=start_idx, end=end_idx, window_size=windowsize) kwargs = {"sam_read_config": configuration["sam_read_config"]} if "debug" in configuration: kwargs["debug"] = configuration["debug"] print("{0} Creating WGA Windows...".format(INFO)) sys.stdout.flush() region.make_wga_windows(chromosome=chromosome, ref_filename=configuration["reference_file"]["filename"], bam_filename=configuration["wga_file"]["filename"], **kwargs) if region.get_n_windows(type_=WindowType.WGA) == 0: raise Error("WGA windows have not been created") else: print("{0} Number of WGA " "windows: {1}".format(INFO, region.get_n_windows(type_=WindowType.WGA))) sys.stdout.flush() print("{0} Creating No WGA Windows...".format(INFO)) sys.stdout.flush() region.make_no_wga_windows(chromosome=chromosome, ref_filename=configuration["reference_file"]["filename"], bam_filename=configuration["no_wga_file"]["filename"], **kwargs) if region.get_n_windows(type_=WindowType.NO_WGA) == 0: raise Error("Non-WGA windows have not been created") else: print("{0} Number of Non WGA" " windows: {1}".format(INFO, region.get_n_windows(type_=WindowType.NO_WGA))) sys.stdout.flush() regions_created.append(region) counter += 1 return regions_created
def par_make_window_regions(configuration): n_procs = configuration["processing"]["n_procs"] print("{0} Creating regions with {1} processes".format(INFO, n_procs)) sys.stdout.flush() regions = configuration["regions"] regions_list = [(start, end) for start, end in zip(regions["start"], regions["end"])] regions_created = [] # get the chunks that each process will work on chunks_dict = dict() for i, r in enumerate(regions_list): chunks_dict[i] = partition_range(start=r[0], end=r[1], npieces=n_procs) print("{0} chuncks for region {1}: {2}".format(INFO, i, chunks_dict[i])) sys.stdout.flush() manager = Manager() windows_dict = manager.dict() errors_dict = manager.dict() msg_dict = manager.dict() for i in range(len(regions_list)): windows_dict[i] = manager.dict() for p in range(n_procs): windows_dict[i][p] = {"wga_windows": [], "no_wga_windows": []} for p in range(n_procs): errors_dict[p] = "No error" msg_dict[p] = "No msg" procs = [] for p in range(n_procs - 1): procs.append( Process(target=regions_worker, args=(p, configuration, chunks_dict, windows_dict, msg_dict, errors_dict))) procs[p].start() print("{0} Created: {1} processes".format(INFO, n_procs)) sys.stdout.flush() p = n_procs - 1 print("{0} Master process is: {1} ".format(INFO, p)) sys.stdout.flush() print("{0} Master process doing its share".format(INFO)) sys.stdout.flush() p = n_procs - 1 regions_worker(p, configuration, chunks_dict, windows_dict, msg_dict, errors_dict) if errors_dict[p] != "No error": raise Error(errors_dict[p]) else: print("{0} Process {1} msg: {2}".format(INFO, p, msg_dict[p])) sys.stdout.flush() # wait here and join the processes for p in range(n_procs - 1): procs[p].join() if errors_dict[p] != "No error": raise Error(errors_dict[p]) else: print("{0} Process {1} msg: {2}".format(INFO, p, msg_dict[p])) sys.stdout.flush() regions = [] # now bring together the pieces of the regions for i, r in enumerate(regions_list): region = Region(idx=i, start=r[0], end=r[1], window_size=configuration["window_size"]) wga_windows = [] no_wga_windows = [] for p in range(n_procs): wga_windows.extend(windows_dict[i][p]["wga_windows"]) no_wga_windows.extend(windows_dict[i][p]["no_wga_windows"]) region.set_windows(wtype=WindowType.WGA, windows=wga_windows) if region.get_n_windows(type_=WindowType.WGA) == 0: raise Error("WGA windows have not been created") else: print("{0} Number of WGA " "windows: {1}".format( INFO, region.get_n_windows(type_=WindowType.WGA))) sys.stdout.flush() region.set_windows(wtype=WindowType.NO_WGA, windows=no_wga_windows) if region.get_n_windows(type_=WindowType.NO_WGA) == 0: raise Error("Non-WGA windows have not been created") else: print("{0} Number of Non WGA" " windows: {1}".format( INFO, region.get_n_windows(type_=WindowType.NO_WGA))) sys.stdout.flush() regions.append(region) return regions