Example #1
0
 def setUp(self):
     # some heavily used variables
     self.genome1 = BGGenome("red")
     self.genome2 = BGGenome("green")
     self.genome3 = BGGenome("blue")
     self.genome4 = BGGenome("black")
     self.genome5 = BGGenome("yellow")
Example #2
0
 def test_json_serialization_no_subclassing(self):
     # genome can be serialized into json format keeping all important information
     g = BGGenome("name1")
     ref_result = {"name": "name1", "g_id": g.json_id}
     self.assertDictEqual(g.to_json(schema_info=False), ref_result)
     ref_result[BGGenome_JSON_SCHEMA_JSON_KEY] = g.json_schema_name
     self.assertDictEqual(g.to_json(), ref_result)
Example #3
0
 def setUp(self):
     self.genome1 = BGGenome("red")
     self.genome2 = BGGenome("green")
     self.genome3 = BGGenome("blue")
     self.single_genome_bg = BreakpointGraph()
     self.two_genome_bg = BreakpointGraph()
     self.four_genome_bg = BreakpointGraph()
Example #4
0
 def test__eq__(self):
     # two genome are called equal if they are both os same class and their hash values are equal
     g1 = BGGenome("name1")
     g2 = BGGenome("name2")
     self.assertNotEqual(g1, g2)
     g2.name = "name1"
     self.assertEqual(g1, g2)
     self.assertNotEqual(g1, 5)
     self.assertNotEqual(g1, "name1")
     self.assertNotEqual(g1, [g1])
Example #5
0
 def test_json_id(self):
     # json id for genome is utilized when genome is serialized to json format and equals to hash value of genome instance
     g = BGGenome("name")
     json_id = g.json_id
     self.assertEqual(json_id, hash(g.name))
     self.assertTrue(isinstance(json_id, int))
     g.name = "name1"
     new_json_id = g.json_id
     self.assertEqual(new_json_id, hash(g.name))
     self.assertTrue(isinstance(json_id, int))
     self.assertNotEqual(json_id, new_json_id)
Example #6
0
 def test__le__(self):
     # Genome is considered less or equal to any other BGGenome is it is either less ("<" implementation
     # or equal (__eq__ implementation), than supplied argument
     g1 = BGGenome("genome1")
     g2 = BGGenome("genome1")
     self.assertLessEqual(g1, g2)
     self.assertLessEqual(g2, g1)
     self.assertTrue(g1 <= g2 <= g1)
     g3 = BGGenome("genome")
     self.assertLessEqual(g3, g1)
     self.assertLessEqual(g3, g1)
Example #7
0
 def setUp(self):
     # commonly used values during the test cases
     v1, v2, v3, v4, v5 = "v1", "v2", "v3", "v4", "v5"
     self.v1 = v1
     self.v2 = v2
     self.v3 = v3
     self.v4 = v4
     self.v5 = v5
     self.bg_v1 = BGGenome(self.v1)
     self.bg_v2 = BGGenome(self.v2)
     self.bg_v3 = BGGenome(self.v3)
     self.bg_v4 = BGGenome(self.v4)
     self.bg_v5 = BGGenome(self.v5)
Example #8
0
 def test_json_serialization(self):
     # simple case of serialization, single color, no multiplicity
     v1, v2 = BlockVertex("v1"), BlockVertex("v2")
     color1 = BGGenome("genome1")
     multicolor = Multicolor(color1)
     edge = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor)
     ref_result = {
         "vertex1_id": v1.json_id,
         "vertex2_id": v2.json_id,
         "multicolor": [color1.json_id]
     }
     self.assertDictEqual(edge.to_json(schema_info=False), ref_result)
     # case where multiple colors are present, multiplicity is 1 for every of them
     color2 = BGGenome("genome2")
     multicolor = Multicolor(color1, color2)
     edge = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor)
     result = edge.to_json()
     self.assertTrue(isinstance(result, dict))
     self.assertEqual(result["vertex1_id"], v1.json_id)
     self.assertEqual(result["vertex2_id"], v2.json_id)
     self.assertSetEqual(set(result["multicolor"]),
                         {color1.json_id, color2.json_id})
     self.assertEqual(result[BGEdge_JSON_SCHEMA_JSON_KEY],
                      edge.json_schema_name)
     # case where multiple colors are present, multiplicity is both 1 and greater than 1
     color3 = BGGenome("genome3")
     multicolor = Multicolor(color1, color1, color1, color2, color2, color3)
     edge = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor)
     result = edge.to_json(schema_info=False)
     self.assertTrue(isinstance(result, dict))
     self.assertEqual(result["vertex1_id"], v1.json_id)
     self.assertEqual(result["vertex2_id"], v2.json_id)
     self.assertSetEqual(set(result["multicolor"]),
                         {color1.json_id, color2.json_id, color3.json_id})
     self.assertDictEqual(
         Counter(result["multicolor"]),
         Counter(color.json_id
                 for color in multicolor.multicolors.elements()))
     # weird case when a vertex1/vertex attribute in edge is not an instance of BGVertex
     # and moreover it does not have "json_id" attribute
     edge = BGEdge(vertex1=v1, vertex2=1, multicolor=Multicolor(color1))
     result = edge.to_json()
     self.assertTrue(isinstance(result, dict))
     self.assertEqual(result["vertex1_id"], v1.json_id)
     self.assertEqual(result["vertex2_id"], hash(1))
     self.assertListEqual(result["multicolor"], [color1.json_id])
     self.assertEqual(result[BGEdge_JSON_SCHEMA_JSON_KEY],
                      edge.json_schema_name)
Example #9
0
    def test_is_multicolor_vtree_consistent(self):
        mc = Multicolor()
        self.assertTrue(BGTree().multicolor_is_vtree_consistent(mc))

        tree = BGTree("(((v1, v2), v3), (v4, v5));")
        self.assertTrue(
            tree.multicolor_is_tree_consistent(Multicolor(self.bg_v1)))
        self.assertTrue(
            tree.multicolor_is_tree_consistent(
                Multicolor(self.bg_v1, self.bg_v2)))
        self.assertTrue(
            tree.multicolor_is_tree_consistent(
                Multicolor(self.bg_v4, self.bg_v5)))
        self.assertTrue(
            tree.multicolor_is_tree_consistent(
                Multicolor(self.bg_v1, self.bg_v2, self.bg_v3)))
        self.assertTrue(
            tree.multicolor_is_tree_consistent(
                Multicolor(self.bg_v1, self.bg_v2, self.bg_v3, self.bg_v4,
                           self.bg_v5)))
        self.assertFalse(
            tree.multicolor_is_tree_consistent(
                Multicolor(self.bg_v2, self.bg_v3)))
        self.assertFalse(
            tree.multicolor_is_tree_consistent(
                Multicolor(self.bg_v1, BGGenome("v6"))))
        self.assertFalse(
            tree.multicolor_is_tree_consistent(
                Multicolor(self.bg_v3, self.bg_v5)))
Example #10
0
 def test__lt__(self):
     # genome is less than any non BGGenome instance
     # with other BGGenome instance it is compared by respective "name" attributes
     g1 = BGGenome("genome1")
     g2 = BGGenome("genome2")
     self.assertLess(g1, g2)
     self.assertGreater(g2, g1)
     g1 = BGGenome("genome1")
     g2 = BGGenome("genome")
     self.assertGreater(g1, g2)
     self.assertLess(g2, g1)
     # BGGenome is always smaller than non-BGGenome objects
     objects_to_compare_to = [1, (1, ), [1], "a"]
     for object_to_compare_to in objects_to_compare_to:
         self.assertLess(g1, object_to_compare_to)
         self.assertLess(g2, object_to_compare_to)
Example #11
0
 def test_get_fragments_grimm_from_breakpoint_graph_single_genome_with_repeat_based_merges(
         self):
     data = [
         ">Mouse", "# data :: fragment : name = scaffold1",
         "1 ALC__repeat $", "# data :: fragment : name = scaffold2",
         "ALC__repeat 2 $", "# data :: fragment : name = scaffold3",
         "ALC__repeat 3 $"
     ]
     bg = self._populate_bg(data=data)
     iv1 = bg.get_vertex_by_name("1h__repeat:ALCt__infinity")
     iv2 = bg.get_vertex_by_name("2t__repeat:ALCh__infinity")
     v1 = bg.get_vertex_by_name("1h")
     v2 = bg.get_vertex_by_name("2t")
     kbreak = KBreak(start_edges=[(v1, iv1), (v2, iv2)],
                     result_edges=[(v1, v2), (iv1, iv2)],
                     multicolor=Multicolor(BGGenome("Mouse")))
     bg.apply_kbreak(kbreak=kbreak)
     grimm_strings = GRIMMWriter.get_fragments_in_grimm_from_breakpoint_graph(
         bg=bg)
     possibilities_1 = ["scaffold1 scaffold2 $", "-scaffold2 -scaffold1 $"]
     possibilities_3 = ["scaffold3 $", "-scaffold3 $"]
     self.assertTrue(
         any(map(lambda entry: entry in grimm_strings, possibilities_1)))
     self.assertTrue(
         any(map(lambda entry: entry in grimm_strings, possibilities_3)))
Example #12
0
    def from_assembly_points_file(cls, separated_values):
        info = AssemblyPointInfo()
        info.target_color = Multicolor(
            BGGenome(separated_values["genome"].strip()))
        repeats = separated_values["repeat1 - repeat2"].strip()
        repeat1, repeat2 = repeats.split(" - ")
        repeat_info = {
            "repeat_name_1": repeat1[:-3],
            "repeat_dir_1": repeat1[-2],
            "repeat_name_2": repeat2[:-3],
            "repeat_dir_2": repeat2[-2]
        }
        support_edge_existed = separated_values["s_edge"]
        info.support_edge = support_edge_existed
        repeat_guidance = separated_values["repeat_guidance"].strip()
        repeat_guidance = repeat_guidance.split(", ")
        repeat_info["repeat_guidance"] = repeat_guidance
        info.repeat_info = repeat_info
        target_multicolor = Multicolor(*list(
            map(lambda entry: BGGenome(entry),
                separated_values["MC"].strip().split(", "))))
        info.target_multicolor = target_multicolor

        result = cls()
        result.id = separated_values["id"].strip()
        result.cc_id = separated_values.get("cc_id", None).strip()
        vertices = separated_values["v1 - v2"]
        vertex1, vertex2 = vertices.split(" - ")
        vertex1, vertex2 = vertex1.strip(), vertex2.strip()
        result.vertex1 = vertex1
        result.vertex2 = vertex2
        fragments = separated_values["fragment1 - fragment2"].strip()
        fragment1, fragment2 = fragments.split(" - ")
        fragment1, fragment2 = fragment1.strip(), fragment2.strip()
        result.fragment1 = fragment1
        result.fragment2 = fragment2
        result.fragment1_sign = "-" if result.fragment1.startswith(
            "-") else "+"
        result.fragment2_sign = "-" if result.fragment2.startswith(
            "-") else "+"
        if result.fragment1.startswith("-"):
            result.fragment1 = result.fragment1[1:]
        if result.fragment2.startswith("-"):
            result.fragment2 = result.fragment2[1:]
        result.info = info
        return result
Example #13
0
 def test_parse_genome_declaration_string(self):
     # genome declaration string is parsed, by stripping the string from the right
     # and retrieving the string after the ">" character
     self.assertEqual(
         GRIMMReader.parse_genome_declaration_string(">genome"),
         BGGenome("genome"))
     self.assertEqual(
         GRIMMReader.parse_genome_declaration_string("  >genome  "),
         BGGenome("genome"))
     self.assertEqual(
         GRIMMReader.parse_genome_declaration_string(">genome__genome"),
         BGGenome("genome__genome"))
     self.assertEqual(
         GRIMMReader.parse_genome_declaration_string(">genome>genome"),
         BGGenome("genome>genome"))
     self.assertEqual(
         GRIMMReader.parse_genome_declaration_string(">genome.!/.#4"),
         BGGenome("genome.!/.#4"))
Example #14
0
    def parse_genome_declaration_string(data_string):
        """ Parses a string marked as ``genome declaration`` and returns a corresponding :class:`bg.genome.BGGenome`

        :param data_string: a string to retrieve genome name from
        :type data_string: ``str``
        :return: genome name from supplied genome declaration string
        :rtype: :class:`bg.genome.BGGenome`
        """
        data_string = data_string.strip()
        return BGGenome(data_string[1:])
Example #15
0
 def test_json_deserialization_no_subclassing(self):
     # simple case
     json_object = {"name": "name1", "g_id": 1}
     result = BGGenome.from_json(data=json_object)
     self.assertEqual(result.name, "name1")
     # g_id is not mandatory for genome deserialization itself, but is required by the supervising class
     self.assertEqual(
         BGGenome.from_json(data={
             "name": "name1"
         }).name, "name1")
     # BGGenome scheme info shall be ignored at this level, as it is supplied by the supervising class
     self.assertEqual(
         BGGenome.from_json(data={
             "name": "name1",
             BGGenome_JSON_SCHEMA_JSON_KEY: "lalal"
         }).name, "name1")
     # error case when "name" is not present
     with self.assertRaises(ValueError):
         BGGenome.from_json(data={})
Example #16
0
    def run(self, manager):
        manager.logger.info("Reading blocks orders data")
        file_paths = manager.configuration["gos-asm"]["input"]["block_orders_file_paths"]
        bg = BreakpointGraph()
        for file_path in file_paths:
            with open(file_path, "rt") as source:
                bg.update(breakpoint_graph=GRIMMReader.get_breakpoint_graph(stream=source, merge_edges=False), merge_edges=False)
        manager.data["gos-asm"]["bg"] = bg

        manager.logger.info("Reading phylogenetic tree information")
        tree = BGTree(newick=manager.configuration["gos-asm"]["input"]["phylogenetic_tree"])
        manager.data["gos-asm"]["phylogenetic_tree"] = tree

        full_tmc = Multicolor(*[BGGenome(genome_name) for genome_name in manager.configuration["gos-asm"]["input"]["target_organisms"]])
        manager.data["gos-asm"]["target_multicolor"] = full_tmc
        vtree_consistent_target_multicolors = Multicolor.split_colors(full_tmc,
                                                                      guidance=tree.vtree_consistent_multicolors,
                                                                      account_for_color_multiplicity_in_guidance=False)

        for target_multicolor in vtree_consistent_target_multicolors[:]:
            for vtree_c_multicolor in deepcopy(tree.vtree_consistent_multicolors):
                if vtree_c_multicolor <= target_multicolor \
                        and vtree_c_multicolor not in vtree_consistent_target_multicolors \
                        and len(vtree_c_multicolor.colors) > 0:
                    vtree_consistent_target_multicolors.append(vtree_c_multicolor)

        vtree_consistent_target_multicolors = sorted(vtree_consistent_target_multicolors,
                                                     key=lambda mc: len(mc.hashable_representation),
                                                     reverse=True)

        all_target_multicolors = vtree_consistent_target_multicolors[:]
        # for i in range(2, len(vtree_consistent_target_multicolors) + 1):
        #     for comb in itertools.combinations(vtree_consistent_target_multicolors[:], i):
        #         comb = list(comb)
        #         for mc1, mc2 in itertools.combinations(comb, 2):
        #             if len(mc1.intersect(mc2).colors) > 0:
        #                 break
        #         else:
        #             new_mc = Multicolor()
        #             for mc in comb:
        #                 new_mc += mc
        #             all_target_multicolors.append(new_mc)
        hashed_vertex_tree_consistent_multicolors = {mc.hashable_representation for mc in all_target_multicolors}
        all_target_multicolors = [Multicolor(*hashed_multicolor) for hashed_multicolor in
                                  hashed_vertex_tree_consistent_multicolors]
        all_target_multicolors = sorted(all_target_multicolors,
                                        key=lambda mc: len(mc.hashable_representation),
                                        reverse=True)
        manager.data["gos-asm"]["target_multicolors"] = all_target_multicolors
        # log_bg_stats(bg=bg, logger=manager.logger)

        manager.logger.info("Reading repeats-bridges information")
        manager.data["gos-asm"]["repeats_guidance"] = get_repeats_bridges_guidance(
            file_name=manager.configuration["gos-asm"]["input"]["repeats_bridges_file"], data=manager.data)
Example #17
0
    def test_json_deserialization_subclassing(self):
        # being provided an explicit JSONSchema, it shall be utilized for json deserialization
        class BGGenomeJSONSchemaNameOptional(BGGenome.BGGenomeJSONSchema):
            @post_load
            def make_object(self, data):
                if "name" not in data:
                    data["name"] = "default_name"
                return super(BGGenomeJSONSchemaNameOptional,
                             self).make_object(data=data)

        self.assertEqual(
            BGGenome.from_json(
                data={},
                json_schema_class=BGGenomeJSONSchemaNameOptional).name,
            "default_name")
Example #18
0
    def get_genome_character_state_by_edge(genome):
        if cnt[BGGenome(genome)] == 1:
            return 0
        else:
            v1, v2 = edge.vertex1.name, edge.vertex2.name
            if v1 > v2: v1, v2 = v2, v1

            v1_neighbour = get_neighbour_with_genome(v1, genome)
            v2_neighbour = get_neighbour_with_genome(v2, genome)
            if bg.get_edge_by_two_vertices(v1_neighbour, v2_neighbour):
                pair = (v1_neighbour, v2_neighbour)
                if pair not in possible_edges:
                    possible_edges.append(pair)
                return 2 + possible_edges.index(pair)
            else:
                return 1
Example #19
0
def get_repeats_bridges_guidance(file_name, data):
    result = {}
    opposite_dirs = {
        "h": "t",
        "t": "h"
    }
    bg = data["gos-asm"]["bg"]
    for genome in bg.get_overall_set_of_colors():
        result[genome] = DiGraph()
        result[genome].add_edge(str(None) + "ou", str(None) + "oi")
        for repeat_name, r_dir in iter_over_all_repeats(bg=bg, multicolor=Multicolor(genome)):
            rou = repeat_name + r_dir + "ou"
            roi = repeat_name + opposite_dirs[r_dir] + "oi"
            result[genome].add_edge(rou, roi)
            rou = repeat_name + opposite_dirs[r_dir] + "ou"
            roi = repeat_name + r_dir + "oi"
            result[genome].add_edge(rou, roi)
    if not os.path.exists(file_name) or not os.path.isfile(file_name):
        return result
    current_genome = None
    with open(file_name, "rt") as source:
        for cnt, line in enumerate(source):
            if len(line.strip()) == 0 and line.strip().startswith("#"):
                continue
            elif line.strip().startswith(">"):
                current_genome = BGGenome(line.strip()[1:])
            else:
                data = line.strip().split()
                if len(data) != 2:
                    continue
                r1_data, r2_data = map(lambda entry: entry.split("__"), data)
                r1_base, r2_base = r1_data[0], r2_data[0]
                r1_sign = "-" if r1_base.startswith("-") else "+"
                r2_sign = "-" if r2_base.startswith("-") else "+"
                r1_name = r1_base[1:] if r1_base.startswith("-") else r1_base
                r2_name = r2_base[1:] if r2_base.startswith("-") else r2_base
                r1_suffix = "h" if r1_sign == "+" else "t"
                r2_suffix = "t" if r2_sign == "+" else "h"
                source = r1_name + r1_suffix + "oi"
                target = r2_name + r2_suffix + "ou"
                if current_genome is not None:
                    result[current_genome].add_edge(source, target)
    return result
Example #20
0
 def test_get_breakpoint_from_file_with_comment_data_string(self):
     data = [
         "", "\t", "#comment1", ">genome_name_1", "      #comment1",
         "# data :: fragment : name = chromosome_X", "a b $",
         "   #comment1   ", "\t>genome_name_2",
         "#data::fragment:name=scaffold111", "a $", "", "\n\t"
     ]
     file_like = io.StringIO("\n".join(data))
     result_bg = GRIMMReader.get_breakpoint_graph(file_like,
                                                  merge_edges=False)
     self.assertTrue(isinstance(result_bg, BreakpointGraph))
     self.assertEqual(len(list(result_bg.connected_components_subgraphs())),
                      3)
     self.assertEqual(len(list(result_bg.edges())), 5)
     self.assertEqual(len(list(result_bg.nodes())), 7)
     multicolors = [
         Multicolor(BGGenome("genome_name_1")),
         Multicolor(BGGenome("genome_name_2"))
     ]
     condensed_multicolors = [
         Multicolor(BGGenome("genome_name_1")),
         Multicolor(BGGenome("genome_name_2")),
         Multicolor(BGGenome("genome_name_1"), BGGenome("genome_name_2"))
     ]
     for bgedge in result_bg.edges():
         self.assertTrue(bgedge.multicolor in multicolors)
     for bgedge in result_bg.edges():
         condensed_edge = result_bg.get_condensed_edge(
             vertex1=bgedge.vertex1, vertex2=bgedge.vertex2)
         self.assertTrue(condensed_edge.multicolor in condensed_multicolors)
     infinity_edges = [
         bgedge for bgedge in result_bg.edges() if bgedge.is_infinity_edge
     ]
     self.assertEqual(len(infinity_edges), 4)
     for bgedge in result_bg.edges():
         data = bgedge.data
         self.assertIn("fragment", data)
         self.assertIsInstance(data["fragment"], dict)
         self.assertIn("name", data["fragment"])
         self.assertIn(data["fragment"]["name"],
                       {"chromosome_X", "scaffold111"})
     ah = result_bg.get_vertex_by_name("ah")
     bt = result_bg.get_vertex_by_name("bt")
     ahi = result_bg.get_vertex_by_name("ah__infinity")
     edge = result_bg.get_edge_by_two_vertices(vertex1=ah, vertex2=bt)
     self.assertTupleEqual(edge.data["fragment"]["forward_orientation"],
                           (ah, bt))
     iedge = result_bg.get_edge_by_two_vertices(vertex1=ah, vertex2=ahi)
     self.assertTupleEqual(iedge.data["fragment"]["forward_orientation"],
                           (ah, ahi))
Example #21
0
 def test_equality(self):
     # multicolors are called equal
     # if they contain information about hte same colors with same multiplicity for each color
     mc1 = Multicolor(self.genome1)
     mc2 = Multicolor(self.genome1)
     self.assertEqual(mc1, mc2)
     mc1 = Multicolor(self.genome1, self.genome2)
     mc2 = Multicolor(self.genome2, self.genome1)
     self.assertEqual(mc1, mc2)
     mc1 = Multicolor(self.genome1)
     mc2 = Multicolor(BGGenome("ret"))
     self.assertNotEqual(mc1, mc2)
     mc1 = Multicolor(self.genome1, self.genome1, self.genome3)
     mc2 = Multicolor(self.genome1, self.genome3)
     self.assertNotEqual(mc1, mc2)
     ###############################################################################################
     #
     # cases when Multicolor object is compared with a non-Multicolor object
     # this equality comparison is always False
     #
     ###############################################################################################
     for non_multicolor_type_object in [1, (1, ), [1], "1", Mock()]:
         self.assertNotEqual(Multicolor(), non_multicolor_type_object)
Example #22
0
 def test_is_multicolor_tree_consistent(self):
     # tests if supplied multicolor complies with tree topology
     ##########################################################################################
     #
     # empty multicolor complies with any tree
     #
     ##########################################################################################
     mc = Multicolor()
     self.assertTrue(BGTree().multicolor_is_tree_consistent(mc))
     ##########################################################################################
     #
     # simple cases
     #
     ##########################################################################################
     tree = BGTree("(((v1, v2), v3),(v4, v5));")
     self.assertTrue(
         tree.multicolor_is_tree_consistent(Multicolor(self.bg_v1)))
     ##########################################################################################
     #
     # a small v1, v2 subtree, still consistent
     #
     ##########################################################################################
     self.assertTrue(
         tree.multicolor_is_tree_consistent(
             Multicolor(self.bg_v1, self.bg_v2)))
     ##########################################################################################
     #
     # bigger v1, v2, v3 subtree, still consistent
     #
     ##########################################################################################
     self.assertTrue(
         tree.multicolor_is_tree_consistent(
             Multicolor(self.bg_v1, self.bg_v2, self.bg_v3)))
     ##########################################################################################
     #
     # v2, v3 is not a valid subtree (its compliment is two subtrees, instead of one)
     #
     ##########################################################################################
     self.assertFalse(
         tree.multicolor_is_tree_consistent(
             Multicolor(self.bg_v2, self.bg_v3)))
     ##########################################################################################
     #
     # if some genomes in multicolor are not present in tree, then multicolor will not be consistent with the tree
     #
     ##########################################################################################
     self.assertFalse(
         tree.multicolor_is_tree_consistent(
             Multicolor(self.bg_v1, BGGenome("v6"))))
     ##########################################################################################
     #
     # other cases for a non wgd tree
     #
     ##########################################################################################
     self.assertTrue(
         tree.multicolor_is_tree_consistent(
             Multicolor(self.bg_v1, self.bg_v2, self.bg_v3, self.bg_v4)))
     self.assertTrue(
         tree.multicolor_is_tree_consistent(
             Multicolor(self.bg_v1, self.bg_v2, self.bg_v3, self.bg_v5)))
     self.assertTrue(
         tree.multicolor_is_tree_consistent(
             Multicolor(self.bg_v1, self.bg_v2, self.bg_v4, self.bg_v5)))
     self.assertTrue(
         tree.multicolor_is_tree_consistent(
             Multicolor(self.bg_v1, self.bg_v3, self.bg_v4, self.bg_v5)))
     self.assertTrue(
         tree.multicolor_is_tree_consistent(
             Multicolor(self.bg_v2, self.bg_v3, self.bg_v4, self.bg_v5)))
     self.assertTrue(
         tree.multicolor_is_tree_consistent(
             Multicolor(self.bg_v1, self.bg_v2, self.bg_v3, self.bg_v4,
                        self.bg_v5)))
     self.assertTrue(
         tree.multicolor_is_tree_consistent(
             Multicolor(self.bg_v5, self.bg_v4)))
     self.assertTrue(
         tree.multicolor_is_tree_consistent(
             Multicolor(self.bg_v3, self.bg_v4, self.bg_v5)))
     self.assertFalse(
         tree.multicolor_is_tree_consistent(
             Multicolor(self.bg_v3, self.bg_v5)))
Example #23
0
 def test_initialization_incorrect(self):
     # empty genomes are not allowed, a name for genome is mandatory
     with self.assertRaises(TypeError):
         g = BGGenome()
Example #24
0
    def test_get_breakpoint_from_file(self):
        # full workflow testing with dummy data
        # correct cases are assumed with all kind of crazy indentation and rubbish data mixed in, but still correct
        data = [
            "", "\t", "#comment1", ">genome_name_1", "      #comment1",
            "a b $", "\tc -a @\t", "   #comment1   ", "\t>genome_name_2",
            "a $", "", "\n\t"
        ]
        file_like = io.StringIO("\n".join(data))
        result_bg = GRIMMReader.get_breakpoint_graph(file_like)
        self.assertTrue(isinstance(result_bg, BreakpointGraph))
        self.assertEqual(len(list(result_bg.connected_components_subgraphs())),
                         3)
        self.assertEqual(len(list(result_bg.edges())), 6)
        self.assertEqual(len(list(result_bg.nodes())), 9)
        multicolors = [
            Multicolor(BGGenome("genome_name_1"), BGGenome("genome_name_2")),
            Multicolor(BGGenome("genome_name_1")),
            Multicolor(BGGenome("genome_name_2"))
        ]
        for bgedge in result_bg.edges():
            self.assertTrue(bgedge.multicolor in multicolors)
        infinity_edges = [
            bgedge for bgedge in result_bg.edges() if bgedge.is_infinity_edge
        ]
        self.assertEqual(len(infinity_edges), 3)

        data = [
            ">genome_1", "a $", ">genome_2", "a b $", "# this is a bad genome",
            ">genome_3", "a b c $", ">genome_4", "   # chromosome 1", "b c $",
            ">genome_5", "c $"
        ]
        file_like = io.StringIO("\n".join(data))
        result_bg = GRIMMReader.get_breakpoint_graph(file_like)
        self.assertTrue(isinstance(result_bg, BreakpointGraph))
        self.assertEqual(len(list(result_bg.connected_components_subgraphs())),
                         4)
        self.assertEqual(len(list(result_bg.edges())), 8)
        self.assertEqual(len(list(result_bg.nodes())), 12)
        genome1, genome2, genome3 = BGGenome("genome_1"), BGGenome(
            "genome_2"), BGGenome("genome_3")
        genome4, genome5 = BGGenome("genome_4"), BGGenome("genome_5")
        multicolors = [
            Multicolor(genome1, genome2, genome3),
            Multicolor(genome1),
            Multicolor(genome2, genome3),
            Multicolor(genome2),
            Multicolor(genome3, genome4),
            Multicolor(genome3, genome4, genome5),
            Multicolor(genome4),
            Multicolor(genome5)
        ]
        for bgedge in result_bg.edges():
            self.assertTrue(bgedge.multicolor in multicolors)
        infinity_edges = [
            bgedge for bgedge in result_bg.edges() if bgedge.is_infinity_edge
        ]
        self.assertEqual(len(infinity_edges), 6)
        infinity_multicolors = [
            multicolor for multicolor in multicolors
            if len(multicolor.multicolors) != 2
        ]
        for bgedge in infinity_edges:
            self.assertTrue(bgedge.multicolor in infinity_multicolors)
Example #25
0
 def test_initialization(self):
     # simple correct initialization
     g = BGGenome("name")
     self.assertEqual(g.name, "name")
Example #26
0
 def test_is_bgedge_vtree_consistent(self):
     v1, v2 = "v1", "v2"
     bgedge = BGEdge(vertex1=v1, vertex2=v2, multicolor=Multicolor())
     ##########################################################################################
     #
     # bgedge with an empty multicolor complies with any tree
     #
     ##########################################################################################
     mc = Multicolor()
     bgedge.multicolor = mc
     self.assertTrue(BGTree("(v1, v2);").bgedge_is_vtree_consistent(bgedge))
     ##########################################################################################
     #
     # simple cases
     #
     ##########################################################################################
     tree = BGTree("(((v1, v2), v3),(v4, v5));")
     bgedge.multicolor = Multicolor(self.bg_v1)
     self.assertTrue(tree.bgedge_is_vtree_consistent(bgedge))
     ##########################################################################################
     #
     # a small v1, v2 subtree, still consistent
     #
     ##########################################################################################
     bgedge.multicolor = Multicolor(self.bg_v1, self.bg_v2)
     self.assertTrue(tree.bgedge_is_vtree_consistent(bgedge))
     ##########################################################################################
     #
     # bigger v1, v2, v3 subtree, still consistent
     #
     ##########################################################################################
     bgedge.multicolor = Multicolor(self.bg_v1, self.bg_v2, self.bg_v3)
     self.assertTrue(tree.bgedge_is_vtree_consistent(bgedge))
     ##########################################################################################
     #
     # v2, v3 is not a valid subtree (its compliment is two subtrees, instead of one)
     #
     ##########################################################################################
     bgedge.multicolor = Multicolor(self.bg_v2, self.bg_v3)
     self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge))
     ##########################################################################################
     #
     # if some genomes in multicolor are not present in tree, then multicolor will not be consistent with the tree
     #
     ##########################################################################################
     bgedge.multicolor = Multicolor(self.bg_v1, BGGenome("v6"))
     self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge))
     ##########################################################################################
     #
     # other cases for a non wgd tree
     #
     ##########################################################################################
     bgedge.multicolor = Multicolor(self.bg_v1, self.bg_v2, self.bg_v3,
                                    self.bg_v4)
     self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge))
     bgedge.multicolor = Multicolor(self.bg_v1, self.bg_v2, self.bg_v3,
                                    self.bg_v5)
     self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge))
     bgedge.multicolor = Multicolor(self.bg_v1, self.bg_v2, self.bg_v4,
                                    self.bg_v5)
     self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge))
     bgedge.multicolor = Multicolor(self.bg_v1, self.bg_v3, self.bg_v4,
                                    self.bg_v5)
     self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge))
     bgedge.multicolor = Multicolor(self.bg_v2, self.bg_v3, self.bg_v4,
                                    self.bg_v5)
     self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge))
     bgedge.multicolor = Multicolor(self.bg_v1, self.bg_v2, self.bg_v3,
                                    self.bg_v4, self.bg_v5)
     self.assertTrue(tree.bgedge_is_vtree_consistent(bgedge))
     bgedge.multicolor = Multicolor(self.bg_v5, self.bg_v4)
     self.assertTrue(tree.bgedge_is_vtree_consistent(bgedge))
     bgedge.multicolor = Multicolor(self.bg_v3, self.bg_v4, self.bg_v5)
     self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge))
     bgedge.multicolor = Multicolor(self.bg_v3, self.bg_v5)
     self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge))
Example #27
0
 def test_hash(self):
     # hash of genome instance is proxies to hash value of its name
     g = BGGenome("name")
     self.assertEqual(hash(g), hash("name"))