def run(self, manager): manager.logger.info("Reading blocks orders data") file_paths = manager.configuration["gos-asm"]["input"]["block_orders_file_paths"] bg = BreakpointGraph() for file_path in file_paths: with open(file_path, "rt") as source: bg.update(breakpoint_graph=GRIMMReader.get_breakpoint_graph(stream=source, merge_edges=False), merge_edges=False) manager.data["gos-asm"]["bg"] = bg manager.logger.info("Reading phylogenetic tree information") tree = NewickReader.from_string(data_string=manager.configuration["gos-asm"]["input"]["phylogenetic_tree"]) manager.data["gos-asm"]["phylogenetic_tree"] = tree full_tmc = Multicolor(*[BGGenome(genome_name) for genome_name in manager.configuration["gos-asm"]["input"]["target_organisms"]]) manager.data["gos-asm"]["target_multicolor"] = full_tmc tree_consistent_target_multicolors = Multicolor.split_colors(full_tmc, guidance=tree.consistent_multicolors, account_for_color_multiplicity_in_guidance=False) for target_multicolor in tree_consistent_target_multicolors[:]: for tree_c_multicolor in deepcopy(tree.consistent_multicolors): if tree_c_multicolor <= target_multicolor \ and tree_c_multicolor not in tree_consistent_target_multicolors \ and len(tree_c_multicolor.colors) > 0: tree_consistent_target_multicolors.append(tree_c_multicolor) tree_consistent_target_multicolors = sorted(tree_consistent_target_multicolors, key=lambda mc: len(mc.hashable_representation), reverse=True) all_target_multicolors = tree_consistent_target_multicolors[:] for i in range(2, len(tree_consistent_target_multicolors) + 1): for comb in itertools.combinations(tree_consistent_target_multicolors[:], i): comb = list(comb) for mc1, mc2 in itertools.combinations(comb, 2): if len(mc1.intersect(mc2).colors) > 0: break else: new_mc = Multicolor() for mc in comb: new_mc += mc all_target_multicolors.append(new_mc) hashed_vertex_tree_consistent_multicolors = {mc.hashable_representation for mc in all_target_multicolors} all_target_multicolors = [Multicolor(*hashed_multicolor) for hashed_multicolor in hashed_vertex_tree_consistent_multicolors] all_target_multicolors = sorted(all_target_multicolors, key=lambda mc: len(mc.hashable_representation), reverse=True) manager.data["gos-asm"]["target_multicolors"] = all_target_multicolors # log_bg_stats(bg=bg, logger=manager.logger) manager.logger.info("Reading repeats-bridges information") manager.data["gos-asm"]["repeats_guidance"] = get_repeats_bridges_guidance( file_name=manager.configuration["gos-asm"]["input"]["repeats_bridges_file"], data=manager.data)
def validate_datasets(root_directory): root_directory = path.abspath(root_directory) for root, dirs, files in walk(root_directory): for f in files: block_file_path = path.join(root, f) _, e1, e2 = map(int, path.basename(path.dirname(path.dirname(block_file_path))).split('_')) with open(block_file_path) as block_file: breakpoint_graph = GRIMMReader.get_breakpoint_graph(block_file) for genomes in PAIRS_TO_CHECK: leaf_distance = get_dcj_distance_two_genomes(breakpoint_graph, genomes) if leaf_distance != e2: print('Leaf - inner node distance differs in file {0}, expected={1}, real={2}'. format(block_file_path, e2, leaf_distance)) inner_node_distance = get_dcj_distance_two_genomes(breakpoint_graph, ('Left', 'Right')) if inner_node_distance != e1: print('Inner node distance differs in file {0}, expected={1}, real={2}'. format(block_file_path, e1, inner_node_distance))
def run(self, manager): mgra_ex_path = get_from_dict_with_path(manager.configuration, key="executable_path", path=["mgra"]) manager.logger.info("=" * 80) if mgra_ex_path is None: manager.logger.info("MGRA executable path is not supplied, skipping the MGRA based tasks") return manager.logger.info("Preparing data to communicate with MGRA and ontain guidance graph") temp_dir = os.path.join(manager.configuration["gos-asm"]["output"]["dir"], "tmp_mgra") if not os.path.exists(temp_dir): os.mkdir(temp_dir) blocks_file_name = os.path.join(temp_dir, "blocks.txt") config_file_name = os.path.join(temp_dir, "config.cfg") mgra_output_dir_name = os.path.join(temp_dir, "output/") manager.logger.debug("Writing blocks orders in GRIMM format to {file_name}".format(file_name=blocks_file_name)) GRIMMWriter.print_genomes_as_grimm_blocks_orders(bg=manager.data["gos-asm"]["bg"], file_name=blocks_file_name) manager.logger.debug("Writing configuration file for MGRA run to {file_name}".format(file_name=config_file_name)) config = self.create_mgra_config(blocks_file_name=blocks_file_name, manager=manager) with open(config_file_name, "wt") as destination: json.dump(obj=config, fp=destination) manager.logger.info("Running MGRA on prepared configuration") os.system("{mgra_ex_path} -c {config_file_path} -o {output_dir_path}" "".format(mgra_ex_path=mgra_ex_path, config_file_path=config_file_name, output_dir_path=mgra_output_dir_name)) manager.logger.debug("MGRA has successfully finished") manager.logger.info("Reading MGRA produced guidance graph") genomes_dir = os.path.join(mgra_output_dir_name, "genomes") genome_files = [name for name in os.listdir(genomes_dir) if name.endswith(".gen")] full_genomes_paths = [os.path.join(genomes_dir, name) for name in genome_files] guidance_bg = BreakpointGraph() for file_name in full_genomes_paths: with open(file_name, "rt") as source: guidance_bg.update(breakpoint_graph=GRIMMReader.get_breakpoint_graph(stream=source, merge_edges=False), merge_edges=False) if "mgra" not in manager.data: manager.data["mgra"] = {} manager.data["mgra"]["guidance_graph"] = guidance_bg manager.logger.info("Obtained MGRA produced guidance graph")
def run_metrics_on_block_file(block_path, full_correct_tree_file_name): correct_tree = read_correct_tree(full_correct_tree_file_name) with open(block_path) as block_file: breakpoint_graph = GRIMMReader.get_breakpoint_graph(block_file) return compare_metric_results(breakpoint_graph, correct_tree)