Exemple #1
0
    def run(self, manager):
        mgra_ex_path = get_from_dict_with_path(manager.configuration,
                                               key="executable_path",
                                               path=["mgra"])
        manager.logger.info("=" * 80)
        if mgra_ex_path is None:
            manager.logger.info(
                "MGRA executable path is not supplied, skipping the MGRA based tasks"
            )
            return
        manager.logger.info(
            "Preparing data to communicate with MGRA and ontain guidance graph"
        )
        temp_dir = os.path.join(
            manager.configuration["gos-asm"]["output"]["dir"], "tmp_mgra")
        if not os.path.exists(temp_dir):
            os.mkdir(temp_dir)
        blocks_file_name = os.path.join(temp_dir, "blocks.txt")
        config_file_name = os.path.join(temp_dir, "config.cfg")
        mgra_output_dir_name = os.path.join(temp_dir, "output/")

        manager.logger.debug(
            "Writing blocks orders in GRIMM format to {file_name}".format(
                file_name=blocks_file_name))
        GRIMMWriter.print_genomes_as_grimm_blocks_orders(
            bg=manager.data["gos-asm"]["bg"], file_name=blocks_file_name)

        manager.logger.debug(
            "Writing configuration file for MGRA run to {file_name}".format(
                file_name=config_file_name))
        config = self.create_mgra_config(blocks_file_name=blocks_file_name,
                                         manager=manager)
        with open(config_file_name, "wt") as destination:
            json.dump(obj=config, fp=destination)
        manager.logger.info("Running MGRA on prepared configuration")
        os.system("{mgra_ex_path} -c {config_file_path} -o {output_dir_path}"
                  "".format(mgra_ex_path=mgra_ex_path,
                            config_file_path=config_file_name,
                            output_dir_path=mgra_output_dir_name))
        manager.logger.debug("MGRA has successfully finished")
        manager.logger.info("Reading MGRA produced guidance graph")

        genomes_dir = os.path.join(mgra_output_dir_name, "genomes")
        genome_files = [
            name for name in os.listdir(genomes_dir) if name.endswith(".gen")
        ]
        full_genomes_paths = [
            os.path.join(genomes_dir, name) for name in genome_files
        ]
        guidance_bg = BreakpointGraph()
        for file_name in full_genomes_paths:
            with open(file_name, "rt") as source:
                guidance_bg.update(
                    breakpoint_graph=GRIMMReader.get_breakpoint_graph(
                        stream=source, merge_edges=False),
                    merge_edges=False)
        if "mgra" not in manager.data:
            manager.data["mgra"] = {}
        manager.data["mgra"]["guidance_graph"] = guidance_bg
        manager.logger.info("Obtained MGRA produced guidance graph")
Exemple #2
0
 def test_get_fragments_grimm_from_breakpoint_graph_single_genome_with_repeat_based_merges(
         self):
     data = [
         ">Mouse", "# data :: fragment : name = scaffold1",
         "1 ALC__repeat $", "# data :: fragment : name = scaffold2",
         "ALC__repeat 2 $", "# data :: fragment : name = scaffold3",
         "ALC__repeat 3 $"
     ]
     bg = self._populate_bg(data=data)
     iv1 = bg.get_vertex_by_name("1h__repeat:ALCt__infinity")
     iv2 = bg.get_vertex_by_name("2t__repeat:ALCh__infinity")
     v1 = bg.get_vertex_by_name("1h")
     v2 = bg.get_vertex_by_name("2t")
     kbreak = KBreak(start_edges=[(v1, iv1), (v2, iv2)],
                     result_edges=[(v1, v2), (iv1, iv2)],
                     multicolor=Multicolor(BGGenome("Mouse")))
     bg.apply_kbreak(kbreak=kbreak)
     grimm_strings = GRIMMWriter.get_fragments_in_grimm_from_breakpoint_graph(
         bg=bg)
     possibilities_1 = ["scaffold1 scaffold2 $", "-scaffold2 -scaffold1 $"]
     possibilities_3 = ["scaffold3 $", "-scaffold3 $"]
     self.assertTrue(
         any(map(lambda entry: entry in grimm_strings, possibilities_1)))
     self.assertTrue(
         any(map(lambda entry: entry in grimm_strings, possibilities_3)))
Exemple #3
0
 def test_get_grimm_from_breakpoint_graph_single_chromosome(self):
     data = [">Mouse", "1 2 3 4 5 $"]
     bg = self._populate_bg(data=data)
     grimm_strings = GRIMMWriter.get_blocks_in_grimm_from_breakpoint_graph(
         bg=bg)
     self.assertEqual(len(grimm_strings), 2)
     self.assertIn(">Mouse", grimm_strings)
     possibilities_1 = ["1 2 3 4 5 $", "-5 -4 -3 -2 -1 $"]
     self.assertTrue(
         any(map(lambda entry: entry in grimm_strings, possibilities_1)))
Exemple #4
0
 def test_get_grimm_strings_from_breakpoint_graph_single_genome(self):
     self._populate_single_genome_bg()
     grimm_strings = GRIMMWriter.get_blocks_in_grimm_from_breakpoint_graph(
         bg=self.single_genome_bg)
     self.assertEqual(len(grimm_strings), 3)
     self.assertIn(">Mouse", grimm_strings)
     possibilities_1 = ["1 2 3 4 $", "-4 -3 -2 -1 $"]
     self.assertTrue(
         any(map(lambda entry: entry in grimm_strings, possibilities_1)))
     possibilities_2 = ["5 6 7 8 $", "-8 -7 -6 -5 $"]
     self.assertTrue(
         any(map(lambda entry: entry in grimm_strings, possibilities_2)))
Exemple #5
0
    def test_output_genomes_as_grimm(self):
        self._populate_four_genomes_bg(merge_edges=True)
        file_name = "file_name.txt"
        GRIMMWriter.print_genomes_as_grimm_blocks_orders(
            bg=self.four_genome_bg, file_name=file_name)
        try:
            with open(file_name, "rt") as source:
                new_bg = GRIMMReader.get_breakpoint_graph(stream=source,
                                                          merge_edges=True)
                self.assertEqual(len(list(new_bg.nodes())),
                                 len(list(self.four_genome_bg.nodes())))
                self.assertEqual(len(list(new_bg.edges())),
                                 len(list(self.four_genome_bg.edges())))

                self.assertSetEqual(set(new_bg.nodes()),
                                    set(self.four_genome_bg.nodes()))
                self.assertSetEqual(
                    new_bg.get_overall_set_of_colors(),
                    self.four_genome_bg.get_overall_set_of_colors())

        finally:
            if os.path.exists(file_name):
                os.remove(file_name)
Exemple #6
0
 def test_get_fragments_grimm_from_breakpoint_graph_single_genome(self):
     data = [
         ">Mouse", "# data :: fragment : name = scaffold1",
         "1 repeat__LC-1 $", "# data :: fragment : name = scaffold2", "2 $",
         "# data :: fragment : name = scaffold3", "repeat__ALC 3 $"
     ]
     bg = self._populate_bg(data=data)
     grimm_strings = GRIMMWriter.get_fragments_in_grimm_from_breakpoint_graph(
         bg=bg)
     possibilities_1 = ["scaffold1 $", "-scaffold1 $"]
     possibilities_2 = ["scaffold2 $", "-scaffold2 $"]
     possibilities_3 = ["scaffold3 $", "-scaffold3 $"]
     self.assertTrue(
         any(map(lambda entry: entry in grimm_strings, possibilities_1)))
     self.assertTrue(
         any(map(lambda entry: entry in grimm_strings, possibilities_2)))
     self.assertTrue(
         any(map(lambda entry: entry in grimm_strings, possibilities_3)))
Exemple #7
0
    def test_get_grimm_strings_from_breakpoints_graph_two_genomes(self):
        self._populate_two_genomes_bg()
        grimm_strings = GRIMMWriter.get_blocks_in_grimm_from_breakpoint_graph(
            bg=self.two_genome_bg)
        self.assertEqual(len(grimm_strings), 6)
        self.assertIn(">Mouse", grimm_strings)
        self.assertIn(">Human", grimm_strings)

        possibilities_1 = ["1 2 3 4 $", "-4 -3 -2 -1 $"]
        self.assertTrue(
            any(map(lambda entry: entry in grimm_strings, possibilities_1)))
        possibilities_2 = ["5 6 7 8 $", "-8 -7 -6 -5 $"]
        self.assertTrue(
            any(map(lambda entry: entry in grimm_strings, possibilities_2)))

        possibilities_3 = ["1 4 3 2 $", "-2 -3 -4 -1 $"]
        self.assertTrue(
            any(map(lambda entry: entry in grimm_strings, possibilities_3)))
        possibilities_4 = ["5 -7 -6 8 $", "-8 6 7 -5 $"]
        self.assertTrue(
            any(map(lambda entry: entry in grimm_strings, possibilities_4)))
Exemple #8
0
    def test_get_grimm_from_breakpoint_graph_four_genomes(self):
        self._populate_four_genomes_bg()
        grimm_strings = GRIMMWriter.get_blocks_in_grimm_from_breakpoint_graph(
            bg=self.four_genome_bg)

        self.assertEqual(len(grimm_strings), 12)
        self.assertIn(">Mouse", grimm_strings)
        self.assertIn(">Human", grimm_strings)
        self.assertIn(">Rat", grimm_strings)
        self.assertIn(">Chimpanzee", grimm_strings)

        possibilities_1 = ["1 2 3 4 $", "-4 -3 -2 -1 $"]
        self.assertTrue(
            any(map(lambda entry: entry in grimm_strings, possibilities_1)))
        possibilities_2 = ["5 6 7 8 $", "-8 -7 -6 -5 $"]
        self.assertTrue(
            any(map(lambda entry: entry in grimm_strings, possibilities_2)))

        possibilities_3 = ["1 4 3 2 $", "-2 -3 -4 -1 $"]
        self.assertTrue(
            any(map(lambda entry: entry in grimm_strings, possibilities_3)))
        possibilities_4 = ["5 -7 -6 8 $", "-8 6 7 -5 $"]
        self.assertTrue(
            any(map(lambda entry: entry in grimm_strings, possibilities_4)))

        possibilities_5 = ["0 1 4 5 $", "-5 -4 -1 -0 $"]
        self.assertTrue(
            any(map(lambda entry: entry in grimm_strings, possibilities_5)))
        possibilities_6 = ["10 12 8 7 $", "-7 -8 -12 -10 $"]
        self.assertTrue(
            any(map(lambda entry: entry in grimm_strings, possibilities_6)))

        possibilities_7 = ["5 6 7 8 $", "-8 -7 -6 -5 $"]
        self.assertTrue(
            any(map(lambda entry: entry in grimm_strings, possibilities_7)))
        possibilities_8 = ["1 -4 -3 -2 $", "2 3 4 -1 $"]
        self.assertTrue(
            any(map(lambda entry: entry in grimm_strings, possibilities_8)))