Example #1
0
 def test_merging_incorrect(self):
     # cases when vertices in two supplied for the merging edges are not consistent
     v1 = BlockVertex("v1")
     v2 = BlockVertex("v2")
     v3 = BlockVertex("v3")
     v4 = BlockVertex("v4")
     multicolor = Multicolor(self.genome3)
     multicolor1 = Multicolor(self.genome2)
     edge1 = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor)
     edge2 = BGEdge(vertex1=v1, vertex2=v3, multicolor=multicolor1)
     with self.assertRaises(ValueError):
         BGEdge.merge(edge1, edge2)
     edge2 = BGEdge(vertex1=v3, vertex2=v2, multicolor=multicolor1)
     with self.assertRaises(ValueError):
         BGEdge.merge(edge1, edge2)
     edge2 = BGEdge(vertex1=v3, vertex2=v4, multicolor=multicolor1)
     with self.assertRaises(ValueError):
         BGEdge.merge(edge1, edge2)
     edge2 = BGEdge(vertex1=v1, vertex2=v1, multicolor=multicolor1)
     with self.assertRaises(ValueError):
         BGEdge.merge(edge1, edge2)
     edge2 = BGEdge(vertex1=v2, vertex2=v2, multicolor=multicolor1)
     with self.assertRaises(ValueError):
         BGEdge.merge(edge1, edge2)
     edge2 = BGEdge(vertex1=v3, vertex2=v1, multicolor=multicolor1)
     with self.assertRaises(ValueError):
         BGEdge.merge(edge1, edge2)
Example #2
0
 def test_get_vtree_consistent_multicolors(self):
     tree = BGTree("(((v1, v2), v3),(v4, v5));")
     self.assertFalse(tree.multicolors_are_up_to_date)
     vtree_consistent_multicolors = tree.get_vtree_consistent_multicolors()
     self.assertTrue(tree.multicolors_are_up_to_date)
     self.assertIsInstance(vtree_consistent_multicolors, list)
     self.assertTrue(vtree_consistent_multicolors,
                     tree.vtree_consistent_multicolors)
     self.assertFalse(
         vtree_consistent_multicolors is tree.vtree_consistent_multicolors)
     for obtained_mc, stored_mc in zip(vtree_consistent_multicolors,
                                       tree.vtree_consistent_multicolors):
         self.assertFalse(obtained_mc is stored_mc)
     self.assertSetEqual(
         {
             mc.hashable_representation
             for mc in vtree_consistent_multicolors
         }, tree.vtree_consistent_multicolors_set)
     self.assertEqual(len(vtree_consistent_multicolors), 10)
     ref_vtree_consistent_multicolors = [
         Multicolor(),
         Multicolor(self.bg_v1, self.bg_v2, self.bg_v3, self.bg_v4,
                    self.bg_v5),
         Multicolor(self.bg_v1),
         Multicolor(self.bg_v2),
         Multicolor(self.bg_v3),
         Multicolor(self.bg_v4),
         Multicolor(self.bg_v5),
         Multicolor(self.bg_v1, self.bg_v2),
         Multicolor(self.bg_v4, self.bg_v5),
         Multicolor(self.bg_v1, self.bg_v2, self.bg_v3),
     ]
     for multicolor in ref_vtree_consistent_multicolors:
         self.assertIn(multicolor, vtree_consistent_multicolors)
Example #3
0
 def test__gt__and__ge__(self):
     # multicolor are compared as follows:
     # for all the colors in the left argument of comparison, checks that multiplicity of that color in right argument is
     # greater (greater-equal)
     mc1 = Multicolor(self.genome1, self.genome2, self.genome1)
     mc2 = Multicolor(self.genome1, self.genome2)
     self.assertTrue(mc1 > mc2)
     self.assertTrue(mc1 >= mc2)
     mc2 = Multicolor(self.genome1, self.genome1, self.genome2)
     self.assertFalse(mc1 > mc2)
     self.assertTrue(mc1 >= mc2)
     mc2 = Multicolor(self.genome1, self.genome1, self.genome2,
                      self.genome2)
     self.assertFalse(mc1 > mc2)
     self.assertFalse(mc1 >= mc2)
     ###############################################################################################
     #
     # Multicolor object is never greater or equal to the non-Multicolor object
     #
     ###############################################################################################
     for non_multicolor_object in [1, (1, ), [
             1,
     ], "1", Mock()]:
         self.assertFalse(mc1 >= non_multicolor_object)
         self.assertFalse(mc1 > non_multicolor_object)
Example #4
0
 def test_hashable_representation(self):
     # every multicolor has to have a hashable representation, that can be utilized in a set/dict
     # for a fast check against multicolor instance
     ################################################
     # the idea is to use sorted Counter.elements() method and convert into sorted tuple on the fly
     genome_list = [
         self.genome1, self.genome2, self.genome3, self.genome4,
         self.genome1, self.genome2, self.genome1
     ]
     mc = Multicolor(*genome_list)
     ref_tuple = tuple(sorted(genome_list))
     result = mc.hashable_representation
     self.assertTrue(isinstance(result, tuple))
     self.assertTupleEqual(result, ref_tuple)
     mc1 = Multicolor(*result)
     self.assertEqual(mc, mc1)
     # non-equal multicolors shall have different hashable representations
     mc1 = Multicolor(*genome_list[:-2])
     mc2 = Multicolor(*genome_list[:-1])
     self.assertNotEqual(mc1, mc2)
     self.assertNotEqual(mc1.hashable_representation,
                         mc2.hashable_representation)
     # there shall be no errors or exceptions raised while taking hash of hashable_representation
     result = mc.hashable_representation
     self.assertEqual(hash(result), hash(ref_tuple))
Example #5
0
    def test_equality(self):
        # edges are called equal if they connect same pairs of vertices and have same multicolor assigned to them
        v1 = BlockVertex("v1")
        v2 = BlockVertex("v2")
        v3 = BlockVertex("v3")
        v4 = BlockVertex("v4")
        multicolor = Multicolor(self.genome3)
        multicolor1 = Multicolor(self.genome2)
        edge1 = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor)
        edge2 = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor1)
        edge3 = BGEdge(vertex1=v1, vertex2=v3, multicolor=multicolor1)
        edge4 = BGEdge(vertex1=v3, vertex2=v4, multicolor=multicolor)
        self.assertNotEqual(edge1, edge2)
        self.assertNotEqual(edge1, edge3)
        self.assertNotEqual(edge2, edge3)
        self.assertNotEqual(edge1, edge4)
        edge4 = BGEdge(vertex1=v2, vertex2=v1, multicolor=multicolor)
        edge5 = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor)
        self.assertEqual(edge1, edge4)
        self.assertEqual(edge1, edge5)
        self.assertEqual(edge4, edge5)
        self.assertNotEqual(edge1, 5)
        edge6 = BGEdge(vertex1=v3, vertex2=v1, multicolor=multicolor)
        self.assertNotEqual(edge1, edge6)

        self.assertEqual(edge1, edge4)
        edge4.data = {"fragment": {"name": 1}}
        edge1.data = {"fragment": {"name": 2}}
        self.assertNotEqual(edge1, edge4)
        edge1.data = {"fragment": {"name": 1}}
        self.assertEqual(edge1, edge4)
Example #6
0
    def __update_consistent_multicolors(self):
        """ Internally used method, that recalculates T-consistent / VT-consistent multicolors for current tree topology
        """
        v_t_consistent_multicolors = self.__get_v_tree_consistent_leaf_based_hashable_multicolors(
        )

        hashed_vtree_consistent_leaves_multicolors = {
            mc.hashable_representation
            for mc in v_t_consistent_multicolors
        }
        self.vtree_consistent_multicolors_set = hashed_vtree_consistent_leaves_multicolors
        self.vtree_consistent_multicolors = [
            Multicolor(*hashed_multicolor)
            for hashed_multicolor in hashed_vtree_consistent_leaves_multicolors
        ]
        result = []
        # T-consistent multicolors can be viewed as VT-consistent multicolors united with all of their complements
        full_multicolor = v_t_consistent_multicolors[0]
        for multicolor in v_t_consistent_multicolors:
            result.append(multicolor)
            result.append(full_multicolor - multicolor)

        hashed_tree_consistent_leaves_multicolors = {
            mc.hashable_representation
            for mc in result
        }
        self.tree_consistent_multicolors_set = hashed_tree_consistent_leaves_multicolors
        self.tree_consistent_multicolors = [
            Multicolor(*hashed_multicolor)
            for hashed_multicolor in hashed_tree_consistent_leaves_multicolors
        ]
        self.multicolors_are_up_to_date = True
Example #7
0
    def test_is_multicolor_vtree_consistent(self):
        mc = Multicolor()
        self.assertTrue(BGTree().multicolor_is_vtree_consistent(mc))

        tree = BGTree("(((v1, v2), v3), (v4, v5));")
        self.assertTrue(
            tree.multicolor_is_tree_consistent(Multicolor(self.bg_v1)))
        self.assertTrue(
            tree.multicolor_is_tree_consistent(
                Multicolor(self.bg_v1, self.bg_v2)))
        self.assertTrue(
            tree.multicolor_is_tree_consistent(
                Multicolor(self.bg_v4, self.bg_v5)))
        self.assertTrue(
            tree.multicolor_is_tree_consistent(
                Multicolor(self.bg_v1, self.bg_v2, self.bg_v3)))
        self.assertTrue(
            tree.multicolor_is_tree_consistent(
                Multicolor(self.bg_v1, self.bg_v2, self.bg_v3, self.bg_v4,
                           self.bg_v5)))
        self.assertFalse(
            tree.multicolor_is_tree_consistent(
                Multicolor(self.bg_v2, self.bg_v3)))
        self.assertFalse(
            tree.multicolor_is_tree_consistent(
                Multicolor(self.bg_v1, BGGenome("v6"))))
        self.assertFalse(
            tree.multicolor_is_tree_consistent(
                Multicolor(self.bg_v3, self.bg_v5)))
Example #8
0
    def run(self, manager):
        manager.logger.info("Reading blocks orders data")
        file_paths = manager.configuration["gos-asm"]["input"]["block_orders_file_paths"]
        bg = BreakpointGraph()
        for file_path in file_paths:
            with open(file_path, "rt") as source:
                bg.update(breakpoint_graph=GRIMMReader.get_breakpoint_graph(stream=source, merge_edges=False), merge_edges=False)
        manager.data["gos-asm"]["bg"] = bg

        manager.logger.info("Reading phylogenetic tree information")
        tree = BGTree(newick=manager.configuration["gos-asm"]["input"]["phylogenetic_tree"])
        manager.data["gos-asm"]["phylogenetic_tree"] = tree

        full_tmc = Multicolor(*[BGGenome(genome_name) for genome_name in manager.configuration["gos-asm"]["input"]["target_organisms"]])
        manager.data["gos-asm"]["target_multicolor"] = full_tmc
        vtree_consistent_target_multicolors = Multicolor.split_colors(full_tmc,
                                                                      guidance=tree.vtree_consistent_multicolors,
                                                                      account_for_color_multiplicity_in_guidance=False)

        for target_multicolor in vtree_consistent_target_multicolors[:]:
            for vtree_c_multicolor in deepcopy(tree.vtree_consistent_multicolors):
                if vtree_c_multicolor <= target_multicolor \
                        and vtree_c_multicolor not in vtree_consistent_target_multicolors \
                        and len(vtree_c_multicolor.colors) > 0:
                    vtree_consistent_target_multicolors.append(vtree_c_multicolor)

        vtree_consistent_target_multicolors = sorted(vtree_consistent_target_multicolors,
                                                     key=lambda mc: len(mc.hashable_representation),
                                                     reverse=True)

        all_target_multicolors = vtree_consistent_target_multicolors[:]
        # for i in range(2, len(vtree_consistent_target_multicolors) + 1):
        #     for comb in itertools.combinations(vtree_consistent_target_multicolors[:], i):
        #         comb = list(comb)
        #         for mc1, mc2 in itertools.combinations(comb, 2):
        #             if len(mc1.intersect(mc2).colors) > 0:
        #                 break
        #         else:
        #             new_mc = Multicolor()
        #             for mc in comb:
        #                 new_mc += mc
        #             all_target_multicolors.append(new_mc)
        hashed_vertex_tree_consistent_multicolors = {mc.hashable_representation for mc in all_target_multicolors}
        all_target_multicolors = [Multicolor(*hashed_multicolor) for hashed_multicolor in
                                  hashed_vertex_tree_consistent_multicolors]
        all_target_multicolors = sorted(all_target_multicolors,
                                        key=lambda mc: len(mc.hashable_representation),
                                        reverse=True)
        manager.data["gos-asm"]["target_multicolors"] = all_target_multicolors
        # log_bg_stats(bg=bg, logger=manager.logger)

        manager.logger.info("Reading repeats-bridges information")
        manager.data["gos-asm"]["repeats_guidance"] = get_repeats_bridges_guidance(
            file_name=manager.configuration["gos-asm"]["input"]["repeats_bridges_file"], data=manager.data)
Example #9
0
 def test_single_initialization(self):
     # simple case initialization where only one genome with multiplicity one is supplied
     mc = Multicolor(self.genome1)
     self.assertEqual(len(mc.colors), 1)
     self.assertEqual(len(mc.multicolors), 1)
     self.assertSetEqual({self.genome1}, mc.colors)
     self.assertEqual(mc.multicolors[self.genome1], 1)
     mc = Multicolor(self.genome1)
     self.assertEqual(len(mc.colors), 1)
     self.assertEqual(len(mc.multicolors), 1)
     self.assertSetEqual({self.genome1}, mc.colors)
     self.assertEqual(mc.multicolors[self.genome1], 1)
Example #10
0
 def test_get_tree_consistent_multicolors_with_non_default_leaf_wrapper(
         self):
     tree = BGTree("(v1, v2)root;", leaf_wrapper=lambda name: name)
     tree_consistent_multicolors = tree.get_tree_consistent_multicolors()
     ref_multicolors = [
         Multicolor(self.v1),
         Multicolor(self.v2),
         Multicolor(),
         Multicolor(self.v1, self.v2)
     ]
     self.assertEqual(len(tree_consistent_multicolors), 4)
     for mc in tree_consistent_multicolors:
         self.assertIn(mc, ref_multicolors)
Example #11
0
 def test_iter_over_colors_json_ids(self):
     # when multiedge is serialized into json a list of colors in it referenced by their ids
     # the multiplicity of colors has to be preserved
     v1 = BlockVertex("v1")
     v2 = BlockVertex("v2")
     genomes = [
         self.genome1, self.genome1, self.genome2, self.genome3,
         self.genome2
     ]
     multicolor = Multicolor(*genomes)
     bgedge = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor)
     json_ids = bgedge.colors_json_ids
     self.assertTrue(isinstance(json_ids, list))
     json_ids_list = json_ids
     self.assertEqual(len(json_ids_list), 5)
     ref_json_ids = Counter(genome.json_id for genome in genomes)
     res_json_ids = Counter(json_ids_list)
     self.assertDictEqual(ref_json_ids, res_json_ids)
     # case when color objects are not a BGGenome, but some other hashable object without json_id attribute
     v1 = BlockVertex("v1")
     v2 = BlockVertex("v2")
     colors = ["red", "red", "green", "black", "yellow", "green"]
     multicolor = Multicolor(*colors)
     bgedge = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor)
     json_ids = bgedge.colors_json_ids
     self.assertTrue(isinstance(json_ids, list))
     json_ids_list = json_ids
     self.assertEqual(len(json_ids_list), 6)
     ref_json_ids = Counter(hash(genome) for genome in colors)
     res_json_ids = Counter(json_ids_list)
     self.assertDictEqual(ref_json_ids, res_json_ids)
     # case when color objects are mixed objects: BGGenome objects, just hashable, have json_id but not BGGenome
     v1 = BlockVertex("v1")
     v2 = BlockVertex("v2")
     mock1, mock2 = Mock(), Mock()
     mock1.json_id = 5
     mock2.json_id = 6
     colors = [
         self.genome1, mock1, self.genome2, "black", mock2, self.genome2
     ]
     multicolor = Multicolor(*colors)
     bgedge = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor)
     json_ids = bgedge.colors_json_ids
     self.assertTrue(isinstance(json_ids, list))
     json_ids_list = list(json_ids)
     self.assertEqual(len(json_ids_list), 6)
     ref_json_ids = Counter(
         genome.json_id if hasattr(genome, "json_id") else hash(genome)
         for genome in colors)
     res_json_ids = Counter(json_ids_list)
     self.assertDictEqual(ref_json_ids, res_json_ids)
Example #12
0
 def test_get_breakpoint_from_file_with_comment_data_string(self):
     data = [
         "", "\t", "#comment1", ">genome_name_1", "      #comment1",
         "# data :: fragment : name = chromosome_X", "a b $",
         "   #comment1   ", "\t>genome_name_2",
         "#data::fragment:name=scaffold111", "a $", "", "\n\t"
     ]
     file_like = io.StringIO("\n".join(data))
     result_bg = GRIMMReader.get_breakpoint_graph(file_like,
                                                  merge_edges=False)
     self.assertTrue(isinstance(result_bg, BreakpointGraph))
     self.assertEqual(len(list(result_bg.connected_components_subgraphs())),
                      3)
     self.assertEqual(len(list(result_bg.edges())), 5)
     self.assertEqual(len(list(result_bg.nodes())), 7)
     multicolors = [
         Multicolor(BGGenome("genome_name_1")),
         Multicolor(BGGenome("genome_name_2"))
     ]
     condensed_multicolors = [
         Multicolor(BGGenome("genome_name_1")),
         Multicolor(BGGenome("genome_name_2")),
         Multicolor(BGGenome("genome_name_1"), BGGenome("genome_name_2"))
     ]
     for bgedge in result_bg.edges():
         self.assertTrue(bgedge.multicolor in multicolors)
     for bgedge in result_bg.edges():
         condensed_edge = result_bg.get_condensed_edge(
             vertex1=bgedge.vertex1, vertex2=bgedge.vertex2)
         self.assertTrue(condensed_edge.multicolor in condensed_multicolors)
     infinity_edges = [
         bgedge for bgedge in result_bg.edges() if bgedge.is_infinity_edge
     ]
     self.assertEqual(len(infinity_edges), 4)
     for bgedge in result_bg.edges():
         data = bgedge.data
         self.assertIn("fragment", data)
         self.assertIsInstance(data["fragment"], dict)
         self.assertIn("name", data["fragment"])
         self.assertIn(data["fragment"]["name"],
                       {"chromosome_X", "scaffold111"})
     ah = result_bg.get_vertex_by_name("ah")
     bt = result_bg.get_vertex_by_name("bt")
     ahi = result_bg.get_vertex_by_name("ah__infinity")
     edge = result_bg.get_edge_by_two_vertices(vertex1=ah, vertex2=bt)
     self.assertTupleEqual(edge.data["fragment"]["forward_orientation"],
                           (ah, bt))
     iedge = result_bg.get_edge_by_two_vertices(vertex1=ah, vertex2=ahi)
     self.assertTupleEqual(iedge.data["fragment"]["forward_orientation"],
                           (ah, ahi))
Example #13
0
 def test_data_update_non_dict_source(self):
     edge = BGEdge(
         vertex1=TaggedBlockVertex("v1"),
         vertex2=TaggedBlockVertex("v2"),
         multicolor=Multicolor(self.genome1, self.genome2),
         data={"fragment": {
             "name": "scaffold2",
             "origin": "test"
         }})
     for source in [1, "2", Multicolor(), (1, ), [
             2,
     ]]:
         with self.assertRaises(ValueError):
             edge.update_data(source=source)
Example #14
0
 def test_update(self):
     # multicolor can be updated by multiple arguments
     # they shall add information about colors (if color was not present before) and/or their multiplicity
     # change is inplace
     mc = Multicolor()
     self.assertSetEqual(set(), mc.colors)
     mc.update(self.genome1)
     self.assertEqual(len(mc.colors), 1)
     self.assertEqual(len(mc.multicolors), 1)
     self.assertSetEqual({self.genome1}, mc.colors)
     mc.update(self.genome2, self.genome3)
     self.assertEqual(len(mc.colors), 3)
     self.assertEqual(len(mc.multicolors), 3)
     self.assertSetEqual({self.genome1, self.genome2, self.genome3},
                         mc.colors)
     for color in mc.multicolors:
         self.assertEqual(mc.multicolors[color], 1)
     mc.update(self.genome1)
     self.assertEqual(len(mc.colors), 3)
     self.assertEqual(len(mc.multicolors), 3)
     for color in mc.multicolors:
         if color == self.genome1:
             self.assertEqual(mc.multicolors[color], 2)
         else:
             self.assertEqual(mc.multicolors[color], 1)
     self.assertSetEqual({self.genome1, self.genome2, self.genome3},
                         mc.colors)
Example #15
0
 def test_merging_correct(self):
     # two BGEdges can be merged together into a third, separate BGEdge
     # that would contain information from both supplied BGEdges in terms of colors and multiplicities
     # such merge is allowed only if a pair of vertices in both BGEdges is the same
     # ordering of vertices if not a concern, since edges in BreakpointGraph are not directed
     v1 = BlockVertex("v1")
     v2 = BlockVertex("v2")
     multicolor = Multicolor(self.genome3)
     multicolor1 = Multicolor(self.genome2)
     edge1 = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor)
     edge2 = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor1)
     merged_edge = BGEdge.merge(edge1, edge2)
     self.assertEqual(merged_edge.vertex1, v1)
     self.assertEqual(merged_edge.vertex2, v2)
     self.assertEqual(merged_edge.multicolor, multicolor + multicolor1)
Example #16
0
 def test_json_serialization(self):
     # simple case of serialization, single color, no multiplicity
     v1, v2 = BlockVertex("v1"), BlockVertex("v2")
     color1 = BGGenome("genome1")
     multicolor = Multicolor(color1)
     edge = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor)
     ref_result = {
         "vertex1_id": v1.json_id,
         "vertex2_id": v2.json_id,
         "multicolor": [color1.json_id]
     }
     self.assertDictEqual(edge.to_json(schema_info=False), ref_result)
     # case where multiple colors are present, multiplicity is 1 for every of them
     color2 = BGGenome("genome2")
     multicolor = Multicolor(color1, color2)
     edge = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor)
     result = edge.to_json()
     self.assertTrue(isinstance(result, dict))
     self.assertEqual(result["vertex1_id"], v1.json_id)
     self.assertEqual(result["vertex2_id"], v2.json_id)
     self.assertSetEqual(set(result["multicolor"]),
                         {color1.json_id, color2.json_id})
     self.assertEqual(result[BGEdge_JSON_SCHEMA_JSON_KEY],
                      edge.json_schema_name)
     # case where multiple colors are present, multiplicity is both 1 and greater than 1
     color3 = BGGenome("genome3")
     multicolor = Multicolor(color1, color1, color1, color2, color2, color3)
     edge = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor)
     result = edge.to_json(schema_info=False)
     self.assertTrue(isinstance(result, dict))
     self.assertEqual(result["vertex1_id"], v1.json_id)
     self.assertEqual(result["vertex2_id"], v2.json_id)
     self.assertSetEqual(set(result["multicolor"]),
                         {color1.json_id, color2.json_id, color3.json_id})
     self.assertDictEqual(
         Counter(result["multicolor"]),
         Counter(color.json_id
                 for color in multicolor.multicolors.elements()))
     # weird case when a vertex1/vertex attribute in edge is not an instance of BGVertex
     # and moreover it does not have "json_id" attribute
     edge = BGEdge(vertex1=v1, vertex2=1, multicolor=Multicolor(color1))
     result = edge.to_json()
     self.assertTrue(isinstance(result, dict))
     self.assertEqual(result["vertex1_id"], v1.json_id)
     self.assertEqual(result["vertex2_id"], hash(1))
     self.assertListEqual(result["multicolor"], [color1.json_id])
     self.assertEqual(result[BGEdge_JSON_SCHEMA_JSON_KEY],
                      edge.json_schema_name)
Example #17
0
 def test__sub__(self):
     # - operator os overloaded to provide "delete" alike behaviour, but with a creation of a new Multicolor instance
     # only Multicolor instance is supported as an argument
     # for any other argument type a TypeError is raised
     mc1 = Multicolor(self.genome1, self.genome3, self.genome1,
                      self.genome2)
     mc2 = Multicolor(self.genome3, self.genome2, self.genome5)
     mc3 = mc1 - mc2
     self.assertEqual(len(mc3.colors), 1)
     self.assertEqual(len(mc3.multicolors), 1)
     self.assertEqual(mc3.multicolors[self.genome1], 2)
     self.assertSetEqual({self.genome1}, mc3.colors)
     mc4 = Multicolor() - mc2
     self.assertEqual(mc4, Multicolor())
     with self.assertRaises(TypeError):
         mc1 - 5
Example #18
0
 def test__isub__(self):
     # -= operator is overloaded and support only Multicolor instance as an argument
     # for any other argument a TypeError is raised
     # behalves just like the "delete" method
     mc1 = Multicolor(self.genome1, self.genome3, self.genome1,
                      self.genome2)
     mc2 = Multicolor(self.genome3, self.genome2, self.genome5)
     mc1_id = id(mc1)
     mc1 -= mc2
     self.assertEqual(len(mc1.colors), 1)
     self.assertEqual(len(mc1.multicolors), 1)
     self.assertEqual(mc1.multicolors[self.genome1], 2)
     self.assertSetEqual({self.genome1}, mc1.colors)
     self.assertEqual(id(mc1), mc1_id)
     with self.assertRaises(TypeError):
         mc1 -= 5
Example #19
0
 def test_initialization_non_empty_data_attribute(self):
     v1 = TaggedBlockVertex("v1")
     v2 = TaggedBlockVertex("v2")
     multicolor = Multicolor(self.genome1)
     data = {"fragment": {"name": "scaffold1"}}
     edge = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor, data=data)
     self.assertDictEqual(edge.data, data)
Example #20
0
 def test_get_fragments_grimm_from_breakpoint_graph_single_genome_with_repeat_based_merges(
         self):
     data = [
         ">Mouse", "# data :: fragment : name = scaffold1",
         "1 ALC__repeat $", "# data :: fragment : name = scaffold2",
         "ALC__repeat 2 $", "# data :: fragment : name = scaffold3",
         "ALC__repeat 3 $"
     ]
     bg = self._populate_bg(data=data)
     iv1 = bg.get_vertex_by_name("1h__repeat:ALCt__infinity")
     iv2 = bg.get_vertex_by_name("2t__repeat:ALCh__infinity")
     v1 = bg.get_vertex_by_name("1h")
     v2 = bg.get_vertex_by_name("2t")
     kbreak = KBreak(start_edges=[(v1, iv1), (v2, iv2)],
                     result_edges=[(v1, v2), (iv1, iv2)],
                     multicolor=Multicolor(BGGenome("Mouse")))
     bg.apply_kbreak(kbreak=kbreak)
     grimm_strings = GRIMMWriter.get_fragments_in_grimm_from_breakpoint_graph(
         bg=bg)
     possibilities_1 = ["scaffold1 scaffold2 $", "-scaffold2 -scaffold1 $"]
     possibilities_3 = ["scaffold3 $", "-scaffold3 $"]
     self.assertTrue(
         any(map(lambda entry: entry in grimm_strings, possibilities_1)))
     self.assertTrue(
         any(map(lambda entry: entry in grimm_strings, possibilities_3)))
Example #21
0
    def from_assembly_points_file(cls, separated_values):
        info = AssemblyPointInfo()
        info.target_color = Multicolor(
            BGGenome(separated_values["genome"].strip()))
        repeats = separated_values["repeat1 - repeat2"].strip()
        repeat1, repeat2 = repeats.split(" - ")
        repeat_info = {
            "repeat_name_1": repeat1[:-3],
            "repeat_dir_1": repeat1[-2],
            "repeat_name_2": repeat2[:-3],
            "repeat_dir_2": repeat2[-2]
        }
        support_edge_existed = separated_values["s_edge"]
        info.support_edge = support_edge_existed
        repeat_guidance = separated_values["repeat_guidance"].strip()
        repeat_guidance = repeat_guidance.split(", ")
        repeat_info["repeat_guidance"] = repeat_guidance
        info.repeat_info = repeat_info
        target_multicolor = Multicolor(*list(
            map(lambda entry: BGGenome(entry),
                separated_values["MC"].strip().split(", "))))
        info.target_multicolor = target_multicolor

        result = cls()
        result.id = separated_values["id"].strip()
        result.cc_id = separated_values.get("cc_id", None).strip()
        vertices = separated_values["v1 - v2"]
        vertex1, vertex2 = vertices.split(" - ")
        vertex1, vertex2 = vertex1.strip(), vertex2.strip()
        result.vertex1 = vertex1
        result.vertex2 = vertex2
        fragments = separated_values["fragment1 - fragment2"].strip()
        fragment1, fragment2 = fragments.split(" - ")
        fragment1, fragment2 = fragment1.strip(), fragment2.strip()
        result.fragment1 = fragment1
        result.fragment2 = fragment2
        result.fragment1_sign = "-" if result.fragment1.startswith(
            "-") else "+"
        result.fragment2_sign = "-" if result.fragment2.startswith(
            "-") else "+"
        if result.fragment1.startswith("-"):
            result.fragment1 = result.fragment1[1:]
        if result.fragment2.startswith("-"):
            result.fragment2 = result.fragment2[1:]
        result.info = info
        return result
Example #22
0
 def __init__(self,
              newick=None,
              newick_format=1,
              dist=DEFAULT_EDGE_LENGTH,
              leaf_wrapper=BGGenome):
     self.tree = Tree(newick=newick, format=newick_format, dist=dist)
     self.__root = self.tree
     self.__leaf_wrapper = leaf_wrapper  # a callable, that would be called with leaf name as an argument for Multicolor class
     self.multicolors_are_up_to_date = False
     self.__tree_consistent_multicolors_set = {
         Multicolor().hashable_representation
     }
     self.__tree_consistent_multicolors = [Multicolor()]
     self.__vtree_consistent_multicolors_set = {
         Multicolor().hashable_representation
     }
     self.__vtree_consistent_multicolors = [Multicolor()]
Example #23
0
 def test__lt__and__le__(self):
     # multicolor are compared as follows:
     # for all the colors in the left argument of comparison, checks that multiplicity of that color in right argument is
     # less (less-equal)
     mc1 = Multicolor(self.genome1, self.genome2, self.genome1)
     mc2 = Multicolor(self.genome1, self.genome2)
     self.assertTrue(mc2 < mc1)
     self.assertTrue(mc2 <= mc1)
     self.assertFalse(mc2 <= 5)
     self.assertFalse(mc2 < 5)
     mc2 = Multicolor(self.genome1, self.genome1, self.genome2)
     self.assertFalse(mc2 < mc1)
     self.assertTrue(mc2 <= mc1)
     mc2 = Multicolor(self.genome1, self.genome1, self.genome2,
                      self.genome2)
     self.assertFalse(mc2 < mc1)
     self.assertFalse(mc2 <= mc1)
Example #24
0
 def test_initialization(self):
     # simple correct initialization of BGEdge instance
     v1 = BlockVertex("v1")
     v2 = BlockVertex("v2")
     multicolor = Multicolor(self.genome3)
     edge = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor)
     self.assertEqual(edge.vertex1, v1)
     self.assertEqual(edge.vertex2, v2)
     self.assertEqual(edge.multicolor, multicolor)
Example #25
0
 def test__add__(self):
     # + operator is overloaded and works just like a "merge" method, but support only Multicolor instance as an argument
     # for any other type of argument a TypeError is raised
     mc1 = Multicolor(self.genome1, self.genome2)
     mc2 = Multicolor(self.genome3, self.genome5, self.genome1)
     mc3 = mc1 + mc2
     self.assertEqual(len(mc3.colors), 4)
     self.assertEqual(len(mc3.multicolors), 4)
     self.assertSetEqual(
         {self.genome1, self.genome2, self.genome3, self.genome5},
         mc3.colors)
     for color in mc3.multicolors:
         if color == self.genome1:
             self.assertEqual(mc3.multicolors[color], 2)
         else:
             self.assertEqual(mc3.multicolors[color], 1)
     with self.assertRaises(TypeError):
         mc1 + 5
Example #26
0
 def __get_v_tree_consistent_leaf_based_hashable_multicolors(self):
     """ Internally used method, that recalculates VTree-consistent sets of leaves in the current tree """
     result = []
     nodes = deque([self.__root])
     while len(nodes) > 0:
         current_node = nodes.popleft()
         children = current_node.children
         nodes.extend(children)
         if not current_node.is_leaf():
             leaves = filter(lambda node: node.is_leaf(),
                             current_node.get_descendants())
             result.append(
                 Multicolor(
                     *[self.__leaf_wrapper(leaf.name) for leaf in leaves]))
         else:
             result.append(
                 Multicolor(self.__leaf_wrapper(current_node.name)))
     result.append(Multicolor())
     return result
Example #27
0
def get_full_irregular_multicolor(vertex, data, graph=None):
    result = Multicolor()
    if graph is None:
        bg = data["gos-asm"]["bg"]
    else:
        bg = graph
    for edge in bg.get_edges_by_vertex(vertex):
        if edge.is_irregular_edge:
            result += edge.multicolor
    return result
Example #28
0
def get_full_support_edge(regular_vertex1, regular_vertex2, data):
    bg = data["gos-asm"]["bg"]
    multicolor = Multicolor(*[
        color
        for bgedge in bg.edges_between_two_vertices(vertex1=regular_vertex1,
                                                    vertex2=regular_vertex2)
        for color in bgedge.multicolor.colors
    ])
    return BGEdge(vertex1=regular_vertex1,
                  vertex2=regular_vertex2,
                  multicolor=multicolor)
Example #29
0
 def test__mull__(self):
     # empty multicolor shall be kept as is regardless of multiplier
     mc = Multicolor()
     for multiplier in range(10):
         self.assertEqual(mc * multiplier, Multicolor())
     # multiplying by 0 shall make any multicolor an empty one
     mc1 = Multicolor(self.genome1)
     self.assertEqual(mc1 * 0, Multicolor())
     mc2 = Multicolor(self.genome1, self.genome2, self.genome3)
     self.assertEqual(mc2 * 0, Multicolor())
     mc3 = Multicolor(self.genome1, self.genome2, self.genome1)
     self.assertEqual(mc3 * 0, Multicolor())
     # multiplying by an integer shall multiply each color multiplicity respectively
     mc = Multicolor(self.genome1, self.genome2, self.genome3, self.genome1,
                     self.genome2, self.genome1)
     for multiplier in range(1, 50):
         ref_multicolor = Multicolor()
         for _ in range(multiplier):
             ref_multicolor += mc
         self.assertEqual(mc * multiplier, ref_multicolor)
Example #30
0
 def test_multiple_initialization(self):
     # cases when multiple genomes with different multiplicities (from 1 to >1 are specified)
     mc = Multicolor(self.genome1, self.genome2, self.genome3)
     self.assertEqual(len(mc.colors), 3)
     self.assertEqual(len(mc.multicolors), 3)
     self.assertSetEqual({self.genome1, self.genome2, self.genome3},
                         mc.colors)
     for color in mc.multicolors:
         self.assertEqual(mc.multicolors[color], 1)
     mc = Multicolor(*[self.genome1, self.genome2, self.genome3])
     self.assertEqual(len(mc.colors), 3)
     self.assertEqual(len(mc.multicolors), 3)
     self.assertSetEqual({self.genome1, self.genome2, self.genome3},
                         mc.colors)
     for color in mc.multicolors:
         self.assertEqual(mc.multicolors[color], 1)
     mc1 = Multicolor(self.genome1, self.genome2, self.genome1)
     self.assertEqual(len(mc1.colors), 2)
     self.assertEqual(mc1.multicolors[self.genome2], 1)
     self.assertEqual(mc1.multicolors[self.genome1], 2)