def run(self, manager): mgra_ex_path = get_from_dict_with_path(manager.configuration, key="executable_path", path=["mgra"]) manager.logger.info("=" * 80) if mgra_ex_path is None: manager.logger.info( "MGRA executable path is not supplied, skipping the MGRA based tasks" ) return manager.logger.info( "Preparing data to communicate with MGRA and ontain guidance graph" ) temp_dir = os.path.join( manager.configuration["gos-asm"]["output"]["dir"], "tmp_mgra") if not os.path.exists(temp_dir): os.mkdir(temp_dir) blocks_file_name = os.path.join(temp_dir, "blocks.txt") config_file_name = os.path.join(temp_dir, "config.cfg") mgra_output_dir_name = os.path.join(temp_dir, "output/") manager.logger.debug( "Writing blocks orders in GRIMM format to {file_name}".format( file_name=blocks_file_name)) GRIMMWriter.print_genomes_as_grimm_blocks_orders( bg=manager.data["gos-asm"]["bg"], file_name=blocks_file_name) manager.logger.debug( "Writing configuration file for MGRA run to {file_name}".format( file_name=config_file_name)) config = self.create_mgra_config(blocks_file_name=blocks_file_name, manager=manager) with open(config_file_name, "wt") as destination: json.dump(obj=config, fp=destination) manager.logger.info("Running MGRA on prepared configuration") os.system("{mgra_ex_path} -c {config_file_path} -o {output_dir_path}" "".format(mgra_ex_path=mgra_ex_path, config_file_path=config_file_name, output_dir_path=mgra_output_dir_name)) manager.logger.debug("MGRA has successfully finished") manager.logger.info("Reading MGRA produced guidance graph") genomes_dir = os.path.join(mgra_output_dir_name, "genomes") genome_files = [ name for name in os.listdir(genomes_dir) if name.endswith(".gen") ] full_genomes_paths = [ os.path.join(genomes_dir, name) for name in genome_files ] guidance_bg = BreakpointGraph() for file_name in full_genomes_paths: with open(file_name, "rt") as source: guidance_bg.update( breakpoint_graph=GRIMMReader.get_breakpoint_graph( stream=source, merge_edges=False), merge_edges=False) if "mgra" not in manager.data: manager.data["mgra"] = {} manager.data["mgra"]["guidance_graph"] = guidance_bg manager.logger.info("Obtained MGRA produced guidance graph")
def test_get_fragments_grimm_from_breakpoint_graph_single_genome_with_repeat_based_merges( self): data = [ ">Mouse", "# data :: fragment : name = scaffold1", "1 ALC__repeat $", "# data :: fragment : name = scaffold2", "ALC__repeat 2 $", "# data :: fragment : name = scaffold3", "ALC__repeat 3 $" ] bg = self._populate_bg(data=data) iv1 = bg.get_vertex_by_name("1h__repeat:ALCt__infinity") iv2 = bg.get_vertex_by_name("2t__repeat:ALCh__infinity") v1 = bg.get_vertex_by_name("1h") v2 = bg.get_vertex_by_name("2t") kbreak = KBreak(start_edges=[(v1, iv1), (v2, iv2)], result_edges=[(v1, v2), (iv1, iv2)], multicolor=Multicolor(BGGenome("Mouse"))) bg.apply_kbreak(kbreak=kbreak) grimm_strings = GRIMMWriter.get_fragments_in_grimm_from_breakpoint_graph( bg=bg) possibilities_1 = ["scaffold1 scaffold2 $", "-scaffold2 -scaffold1 $"] possibilities_3 = ["scaffold3 $", "-scaffold3 $"] self.assertTrue( any(map(lambda entry: entry in grimm_strings, possibilities_1))) self.assertTrue( any(map(lambda entry: entry in grimm_strings, possibilities_3)))
def test_get_grimm_from_breakpoint_graph_single_chromosome(self): data = [">Mouse", "1 2 3 4 5 $"] bg = self._populate_bg(data=data) grimm_strings = GRIMMWriter.get_blocks_in_grimm_from_breakpoint_graph( bg=bg) self.assertEqual(len(grimm_strings), 2) self.assertIn(">Mouse", grimm_strings) possibilities_1 = ["1 2 3 4 5 $", "-5 -4 -3 -2 -1 $"] self.assertTrue( any(map(lambda entry: entry in grimm_strings, possibilities_1)))
def test_get_grimm_strings_from_breakpoint_graph_single_genome(self): self._populate_single_genome_bg() grimm_strings = GRIMMWriter.get_blocks_in_grimm_from_breakpoint_graph( bg=self.single_genome_bg) self.assertEqual(len(grimm_strings), 3) self.assertIn(">Mouse", grimm_strings) possibilities_1 = ["1 2 3 4 $", "-4 -3 -2 -1 $"] self.assertTrue( any(map(lambda entry: entry in grimm_strings, possibilities_1))) possibilities_2 = ["5 6 7 8 $", "-8 -7 -6 -5 $"] self.assertTrue( any(map(lambda entry: entry in grimm_strings, possibilities_2)))
def test_output_genomes_as_grimm(self): self._populate_four_genomes_bg(merge_edges=True) file_name = "file_name.txt" GRIMMWriter.print_genomes_as_grimm_blocks_orders( bg=self.four_genome_bg, file_name=file_name) try: with open(file_name, "rt") as source: new_bg = GRIMMReader.get_breakpoint_graph(stream=source, merge_edges=True) self.assertEqual(len(list(new_bg.nodes())), len(list(self.four_genome_bg.nodes()))) self.assertEqual(len(list(new_bg.edges())), len(list(self.four_genome_bg.edges()))) self.assertSetEqual(set(new_bg.nodes()), set(self.four_genome_bg.nodes())) self.assertSetEqual( new_bg.get_overall_set_of_colors(), self.four_genome_bg.get_overall_set_of_colors()) finally: if os.path.exists(file_name): os.remove(file_name)
def test_get_fragments_grimm_from_breakpoint_graph_single_genome(self): data = [ ">Mouse", "# data :: fragment : name = scaffold1", "1 repeat__LC-1 $", "# data :: fragment : name = scaffold2", "2 $", "# data :: fragment : name = scaffold3", "repeat__ALC 3 $" ] bg = self._populate_bg(data=data) grimm_strings = GRIMMWriter.get_fragments_in_grimm_from_breakpoint_graph( bg=bg) possibilities_1 = ["scaffold1 $", "-scaffold1 $"] possibilities_2 = ["scaffold2 $", "-scaffold2 $"] possibilities_3 = ["scaffold3 $", "-scaffold3 $"] self.assertTrue( any(map(lambda entry: entry in grimm_strings, possibilities_1))) self.assertTrue( any(map(lambda entry: entry in grimm_strings, possibilities_2))) self.assertTrue( any(map(lambda entry: entry in grimm_strings, possibilities_3)))
def test_get_grimm_strings_from_breakpoints_graph_two_genomes(self): self._populate_two_genomes_bg() grimm_strings = GRIMMWriter.get_blocks_in_grimm_from_breakpoint_graph( bg=self.two_genome_bg) self.assertEqual(len(grimm_strings), 6) self.assertIn(">Mouse", grimm_strings) self.assertIn(">Human", grimm_strings) possibilities_1 = ["1 2 3 4 $", "-4 -3 -2 -1 $"] self.assertTrue( any(map(lambda entry: entry in grimm_strings, possibilities_1))) possibilities_2 = ["5 6 7 8 $", "-8 -7 -6 -5 $"] self.assertTrue( any(map(lambda entry: entry in grimm_strings, possibilities_2))) possibilities_3 = ["1 4 3 2 $", "-2 -3 -4 -1 $"] self.assertTrue( any(map(lambda entry: entry in grimm_strings, possibilities_3))) possibilities_4 = ["5 -7 -6 8 $", "-8 6 7 -5 $"] self.assertTrue( any(map(lambda entry: entry in grimm_strings, possibilities_4)))
def test_get_grimm_from_breakpoint_graph_four_genomes(self): self._populate_four_genomes_bg() grimm_strings = GRIMMWriter.get_blocks_in_grimm_from_breakpoint_graph( bg=self.four_genome_bg) self.assertEqual(len(grimm_strings), 12) self.assertIn(">Mouse", grimm_strings) self.assertIn(">Human", grimm_strings) self.assertIn(">Rat", grimm_strings) self.assertIn(">Chimpanzee", grimm_strings) possibilities_1 = ["1 2 3 4 $", "-4 -3 -2 -1 $"] self.assertTrue( any(map(lambda entry: entry in grimm_strings, possibilities_1))) possibilities_2 = ["5 6 7 8 $", "-8 -7 -6 -5 $"] self.assertTrue( any(map(lambda entry: entry in grimm_strings, possibilities_2))) possibilities_3 = ["1 4 3 2 $", "-2 -3 -4 -1 $"] self.assertTrue( any(map(lambda entry: entry in grimm_strings, possibilities_3))) possibilities_4 = ["5 -7 -6 8 $", "-8 6 7 -5 $"] self.assertTrue( any(map(lambda entry: entry in grimm_strings, possibilities_4))) possibilities_5 = ["0 1 4 5 $", "-5 -4 -1 -0 $"] self.assertTrue( any(map(lambda entry: entry in grimm_strings, possibilities_5))) possibilities_6 = ["10 12 8 7 $", "-7 -8 -12 -10 $"] self.assertTrue( any(map(lambda entry: entry in grimm_strings, possibilities_6))) possibilities_7 = ["5 6 7 8 $", "-8 -7 -6 -5 $"] self.assertTrue( any(map(lambda entry: entry in grimm_strings, possibilities_7))) possibilities_8 = ["1 -4 -3 -2 $", "2 3 4 -1 $"] self.assertTrue( any(map(lambda entry: entry in grimm_strings, possibilities_8)))