def setUp(self): # some heavily used variables self.genome1 = BGGenome("red") self.genome2 = BGGenome("green") self.genome3 = BGGenome("blue") self.genome4 = BGGenome("black") self.genome5 = BGGenome("yellow")
def test_json_serialization_no_subclassing(self): # genome can be serialized into json format keeping all important information g = BGGenome("name1") ref_result = {"name": "name1", "g_id": g.json_id} self.assertDictEqual(g.to_json(schema_info=False), ref_result) ref_result[BGGenome_JSON_SCHEMA_JSON_KEY] = g.json_schema_name self.assertDictEqual(g.to_json(), ref_result)
def setUp(self): self.genome1 = BGGenome("red") self.genome2 = BGGenome("green") self.genome3 = BGGenome("blue") self.single_genome_bg = BreakpointGraph() self.two_genome_bg = BreakpointGraph() self.four_genome_bg = BreakpointGraph()
def test__eq__(self): # two genome are called equal if they are both os same class and their hash values are equal g1 = BGGenome("name1") g2 = BGGenome("name2") self.assertNotEqual(g1, g2) g2.name = "name1" self.assertEqual(g1, g2) self.assertNotEqual(g1, 5) self.assertNotEqual(g1, "name1") self.assertNotEqual(g1, [g1])
def test_json_id(self): # json id for genome is utilized when genome is serialized to json format and equals to hash value of genome instance g = BGGenome("name") json_id = g.json_id self.assertEqual(json_id, hash(g.name)) self.assertTrue(isinstance(json_id, int)) g.name = "name1" new_json_id = g.json_id self.assertEqual(new_json_id, hash(g.name)) self.assertTrue(isinstance(json_id, int)) self.assertNotEqual(json_id, new_json_id)
def test__le__(self): # Genome is considered less or equal to any other BGGenome is it is either less ("<" implementation # or equal (__eq__ implementation), than supplied argument g1 = BGGenome("genome1") g2 = BGGenome("genome1") self.assertLessEqual(g1, g2) self.assertLessEqual(g2, g1) self.assertTrue(g1 <= g2 <= g1) g3 = BGGenome("genome") self.assertLessEqual(g3, g1) self.assertLessEqual(g3, g1)
def setUp(self): # commonly used values during the test cases v1, v2, v3, v4, v5 = "v1", "v2", "v3", "v4", "v5" self.v1 = v1 self.v2 = v2 self.v3 = v3 self.v4 = v4 self.v5 = v5 self.bg_v1 = BGGenome(self.v1) self.bg_v2 = BGGenome(self.v2) self.bg_v3 = BGGenome(self.v3) self.bg_v4 = BGGenome(self.v4) self.bg_v5 = BGGenome(self.v5)
def test_json_serialization(self): # simple case of serialization, single color, no multiplicity v1, v2 = BlockVertex("v1"), BlockVertex("v2") color1 = BGGenome("genome1") multicolor = Multicolor(color1) edge = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor) ref_result = { "vertex1_id": v1.json_id, "vertex2_id": v2.json_id, "multicolor": [color1.json_id] } self.assertDictEqual(edge.to_json(schema_info=False), ref_result) # case where multiple colors are present, multiplicity is 1 for every of them color2 = BGGenome("genome2") multicolor = Multicolor(color1, color2) edge = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor) result = edge.to_json() self.assertTrue(isinstance(result, dict)) self.assertEqual(result["vertex1_id"], v1.json_id) self.assertEqual(result["vertex2_id"], v2.json_id) self.assertSetEqual(set(result["multicolor"]), {color1.json_id, color2.json_id}) self.assertEqual(result[BGEdge_JSON_SCHEMA_JSON_KEY], edge.json_schema_name) # case where multiple colors are present, multiplicity is both 1 and greater than 1 color3 = BGGenome("genome3") multicolor = Multicolor(color1, color1, color1, color2, color2, color3) edge = BGEdge(vertex1=v1, vertex2=v2, multicolor=multicolor) result = edge.to_json(schema_info=False) self.assertTrue(isinstance(result, dict)) self.assertEqual(result["vertex1_id"], v1.json_id) self.assertEqual(result["vertex2_id"], v2.json_id) self.assertSetEqual(set(result["multicolor"]), {color1.json_id, color2.json_id, color3.json_id}) self.assertDictEqual( Counter(result["multicolor"]), Counter(color.json_id for color in multicolor.multicolors.elements())) # weird case when a vertex1/vertex attribute in edge is not an instance of BGVertex # and moreover it does not have "json_id" attribute edge = BGEdge(vertex1=v1, vertex2=1, multicolor=Multicolor(color1)) result = edge.to_json() self.assertTrue(isinstance(result, dict)) self.assertEqual(result["vertex1_id"], v1.json_id) self.assertEqual(result["vertex2_id"], hash(1)) self.assertListEqual(result["multicolor"], [color1.json_id]) self.assertEqual(result[BGEdge_JSON_SCHEMA_JSON_KEY], edge.json_schema_name)
def test_is_multicolor_vtree_consistent(self): mc = Multicolor() self.assertTrue(BGTree().multicolor_is_vtree_consistent(mc)) tree = BGTree("(((v1, v2), v3), (v4, v5));") self.assertTrue( tree.multicolor_is_tree_consistent(Multicolor(self.bg_v1))) self.assertTrue( tree.multicolor_is_tree_consistent( Multicolor(self.bg_v1, self.bg_v2))) self.assertTrue( tree.multicolor_is_tree_consistent( Multicolor(self.bg_v4, self.bg_v5))) self.assertTrue( tree.multicolor_is_tree_consistent( Multicolor(self.bg_v1, self.bg_v2, self.bg_v3))) self.assertTrue( tree.multicolor_is_tree_consistent( Multicolor(self.bg_v1, self.bg_v2, self.bg_v3, self.bg_v4, self.bg_v5))) self.assertFalse( tree.multicolor_is_tree_consistent( Multicolor(self.bg_v2, self.bg_v3))) self.assertFalse( tree.multicolor_is_tree_consistent( Multicolor(self.bg_v1, BGGenome("v6")))) self.assertFalse( tree.multicolor_is_tree_consistent( Multicolor(self.bg_v3, self.bg_v5)))
def test__lt__(self): # genome is less than any non BGGenome instance # with other BGGenome instance it is compared by respective "name" attributes g1 = BGGenome("genome1") g2 = BGGenome("genome2") self.assertLess(g1, g2) self.assertGreater(g2, g1) g1 = BGGenome("genome1") g2 = BGGenome("genome") self.assertGreater(g1, g2) self.assertLess(g2, g1) # BGGenome is always smaller than non-BGGenome objects objects_to_compare_to = [1, (1, ), [1], "a"] for object_to_compare_to in objects_to_compare_to: self.assertLess(g1, object_to_compare_to) self.assertLess(g2, object_to_compare_to)
def test_get_fragments_grimm_from_breakpoint_graph_single_genome_with_repeat_based_merges( self): data = [ ">Mouse", "# data :: fragment : name = scaffold1", "1 ALC__repeat $", "# data :: fragment : name = scaffold2", "ALC__repeat 2 $", "# data :: fragment : name = scaffold3", "ALC__repeat 3 $" ] bg = self._populate_bg(data=data) iv1 = bg.get_vertex_by_name("1h__repeat:ALCt__infinity") iv2 = bg.get_vertex_by_name("2t__repeat:ALCh__infinity") v1 = bg.get_vertex_by_name("1h") v2 = bg.get_vertex_by_name("2t") kbreak = KBreak(start_edges=[(v1, iv1), (v2, iv2)], result_edges=[(v1, v2), (iv1, iv2)], multicolor=Multicolor(BGGenome("Mouse"))) bg.apply_kbreak(kbreak=kbreak) grimm_strings = GRIMMWriter.get_fragments_in_grimm_from_breakpoint_graph( bg=bg) possibilities_1 = ["scaffold1 scaffold2 $", "-scaffold2 -scaffold1 $"] possibilities_3 = ["scaffold3 $", "-scaffold3 $"] self.assertTrue( any(map(lambda entry: entry in grimm_strings, possibilities_1))) self.assertTrue( any(map(lambda entry: entry in grimm_strings, possibilities_3)))
def from_assembly_points_file(cls, separated_values): info = AssemblyPointInfo() info.target_color = Multicolor( BGGenome(separated_values["genome"].strip())) repeats = separated_values["repeat1 - repeat2"].strip() repeat1, repeat2 = repeats.split(" - ") repeat_info = { "repeat_name_1": repeat1[:-3], "repeat_dir_1": repeat1[-2], "repeat_name_2": repeat2[:-3], "repeat_dir_2": repeat2[-2] } support_edge_existed = separated_values["s_edge"] info.support_edge = support_edge_existed repeat_guidance = separated_values["repeat_guidance"].strip() repeat_guidance = repeat_guidance.split(", ") repeat_info["repeat_guidance"] = repeat_guidance info.repeat_info = repeat_info target_multicolor = Multicolor(*list( map(lambda entry: BGGenome(entry), separated_values["MC"].strip().split(", ")))) info.target_multicolor = target_multicolor result = cls() result.id = separated_values["id"].strip() result.cc_id = separated_values.get("cc_id", None).strip() vertices = separated_values["v1 - v2"] vertex1, vertex2 = vertices.split(" - ") vertex1, vertex2 = vertex1.strip(), vertex2.strip() result.vertex1 = vertex1 result.vertex2 = vertex2 fragments = separated_values["fragment1 - fragment2"].strip() fragment1, fragment2 = fragments.split(" - ") fragment1, fragment2 = fragment1.strip(), fragment2.strip() result.fragment1 = fragment1 result.fragment2 = fragment2 result.fragment1_sign = "-" if result.fragment1.startswith( "-") else "+" result.fragment2_sign = "-" if result.fragment2.startswith( "-") else "+" if result.fragment1.startswith("-"): result.fragment1 = result.fragment1[1:] if result.fragment2.startswith("-"): result.fragment2 = result.fragment2[1:] result.info = info return result
def test_parse_genome_declaration_string(self): # genome declaration string is parsed, by stripping the string from the right # and retrieving the string after the ">" character self.assertEqual( GRIMMReader.parse_genome_declaration_string(">genome"), BGGenome("genome")) self.assertEqual( GRIMMReader.parse_genome_declaration_string(" >genome "), BGGenome("genome")) self.assertEqual( GRIMMReader.parse_genome_declaration_string(">genome__genome"), BGGenome("genome__genome")) self.assertEqual( GRIMMReader.parse_genome_declaration_string(">genome>genome"), BGGenome("genome>genome")) self.assertEqual( GRIMMReader.parse_genome_declaration_string(">genome.!/.#4"), BGGenome("genome.!/.#4"))
def parse_genome_declaration_string(data_string): """ Parses a string marked as ``genome declaration`` and returns a corresponding :class:`bg.genome.BGGenome` :param data_string: a string to retrieve genome name from :type data_string: ``str`` :return: genome name from supplied genome declaration string :rtype: :class:`bg.genome.BGGenome` """ data_string = data_string.strip() return BGGenome(data_string[1:])
def test_json_deserialization_no_subclassing(self): # simple case json_object = {"name": "name1", "g_id": 1} result = BGGenome.from_json(data=json_object) self.assertEqual(result.name, "name1") # g_id is not mandatory for genome deserialization itself, but is required by the supervising class self.assertEqual( BGGenome.from_json(data={ "name": "name1" }).name, "name1") # BGGenome scheme info shall be ignored at this level, as it is supplied by the supervising class self.assertEqual( BGGenome.from_json(data={ "name": "name1", BGGenome_JSON_SCHEMA_JSON_KEY: "lalal" }).name, "name1") # error case when "name" is not present with self.assertRaises(ValueError): BGGenome.from_json(data={})
def run(self, manager): manager.logger.info("Reading blocks orders data") file_paths = manager.configuration["gos-asm"]["input"]["block_orders_file_paths"] bg = BreakpointGraph() for file_path in file_paths: with open(file_path, "rt") as source: bg.update(breakpoint_graph=GRIMMReader.get_breakpoint_graph(stream=source, merge_edges=False), merge_edges=False) manager.data["gos-asm"]["bg"] = bg manager.logger.info("Reading phylogenetic tree information") tree = BGTree(newick=manager.configuration["gos-asm"]["input"]["phylogenetic_tree"]) manager.data["gos-asm"]["phylogenetic_tree"] = tree full_tmc = Multicolor(*[BGGenome(genome_name) for genome_name in manager.configuration["gos-asm"]["input"]["target_organisms"]]) manager.data["gos-asm"]["target_multicolor"] = full_tmc vtree_consistent_target_multicolors = Multicolor.split_colors(full_tmc, guidance=tree.vtree_consistent_multicolors, account_for_color_multiplicity_in_guidance=False) for target_multicolor in vtree_consistent_target_multicolors[:]: for vtree_c_multicolor in deepcopy(tree.vtree_consistent_multicolors): if vtree_c_multicolor <= target_multicolor \ and vtree_c_multicolor not in vtree_consistent_target_multicolors \ and len(vtree_c_multicolor.colors) > 0: vtree_consistent_target_multicolors.append(vtree_c_multicolor) vtree_consistent_target_multicolors = sorted(vtree_consistent_target_multicolors, key=lambda mc: len(mc.hashable_representation), reverse=True) all_target_multicolors = vtree_consistent_target_multicolors[:] # for i in range(2, len(vtree_consistent_target_multicolors) + 1): # for comb in itertools.combinations(vtree_consistent_target_multicolors[:], i): # comb = list(comb) # for mc1, mc2 in itertools.combinations(comb, 2): # if len(mc1.intersect(mc2).colors) > 0: # break # else: # new_mc = Multicolor() # for mc in comb: # new_mc += mc # all_target_multicolors.append(new_mc) hashed_vertex_tree_consistent_multicolors = {mc.hashable_representation for mc in all_target_multicolors} all_target_multicolors = [Multicolor(*hashed_multicolor) for hashed_multicolor in hashed_vertex_tree_consistent_multicolors] all_target_multicolors = sorted(all_target_multicolors, key=lambda mc: len(mc.hashable_representation), reverse=True) manager.data["gos-asm"]["target_multicolors"] = all_target_multicolors # log_bg_stats(bg=bg, logger=manager.logger) manager.logger.info("Reading repeats-bridges information") manager.data["gos-asm"]["repeats_guidance"] = get_repeats_bridges_guidance( file_name=manager.configuration["gos-asm"]["input"]["repeats_bridges_file"], data=manager.data)
def test_json_deserialization_subclassing(self): # being provided an explicit JSONSchema, it shall be utilized for json deserialization class BGGenomeJSONSchemaNameOptional(BGGenome.BGGenomeJSONSchema): @post_load def make_object(self, data): if "name" not in data: data["name"] = "default_name" return super(BGGenomeJSONSchemaNameOptional, self).make_object(data=data) self.assertEqual( BGGenome.from_json( data={}, json_schema_class=BGGenomeJSONSchemaNameOptional).name, "default_name")
def get_genome_character_state_by_edge(genome): if cnt[BGGenome(genome)] == 1: return 0 else: v1, v2 = edge.vertex1.name, edge.vertex2.name if v1 > v2: v1, v2 = v2, v1 v1_neighbour = get_neighbour_with_genome(v1, genome) v2_neighbour = get_neighbour_with_genome(v2, genome) if bg.get_edge_by_two_vertices(v1_neighbour, v2_neighbour): pair = (v1_neighbour, v2_neighbour) if pair not in possible_edges: possible_edges.append(pair) return 2 + possible_edges.index(pair) else: return 1
def get_repeats_bridges_guidance(file_name, data): result = {} opposite_dirs = { "h": "t", "t": "h" } bg = data["gos-asm"]["bg"] for genome in bg.get_overall_set_of_colors(): result[genome] = DiGraph() result[genome].add_edge(str(None) + "ou", str(None) + "oi") for repeat_name, r_dir in iter_over_all_repeats(bg=bg, multicolor=Multicolor(genome)): rou = repeat_name + r_dir + "ou" roi = repeat_name + opposite_dirs[r_dir] + "oi" result[genome].add_edge(rou, roi) rou = repeat_name + opposite_dirs[r_dir] + "ou" roi = repeat_name + r_dir + "oi" result[genome].add_edge(rou, roi) if not os.path.exists(file_name) or not os.path.isfile(file_name): return result current_genome = None with open(file_name, "rt") as source: for cnt, line in enumerate(source): if len(line.strip()) == 0 and line.strip().startswith("#"): continue elif line.strip().startswith(">"): current_genome = BGGenome(line.strip()[1:]) else: data = line.strip().split() if len(data) != 2: continue r1_data, r2_data = map(lambda entry: entry.split("__"), data) r1_base, r2_base = r1_data[0], r2_data[0] r1_sign = "-" if r1_base.startswith("-") else "+" r2_sign = "-" if r2_base.startswith("-") else "+" r1_name = r1_base[1:] if r1_base.startswith("-") else r1_base r2_name = r2_base[1:] if r2_base.startswith("-") else r2_base r1_suffix = "h" if r1_sign == "+" else "t" r2_suffix = "t" if r2_sign == "+" else "h" source = r1_name + r1_suffix + "oi" target = r2_name + r2_suffix + "ou" if current_genome is not None: result[current_genome].add_edge(source, target) return result
def test_get_breakpoint_from_file_with_comment_data_string(self): data = [ "", "\t", "#comment1", ">genome_name_1", " #comment1", "# data :: fragment : name = chromosome_X", "a b $", " #comment1 ", "\t>genome_name_2", "#data::fragment:name=scaffold111", "a $", "", "\n\t" ] file_like = io.StringIO("\n".join(data)) result_bg = GRIMMReader.get_breakpoint_graph(file_like, merge_edges=False) self.assertTrue(isinstance(result_bg, BreakpointGraph)) self.assertEqual(len(list(result_bg.connected_components_subgraphs())), 3) self.assertEqual(len(list(result_bg.edges())), 5) self.assertEqual(len(list(result_bg.nodes())), 7) multicolors = [ Multicolor(BGGenome("genome_name_1")), Multicolor(BGGenome("genome_name_2")) ] condensed_multicolors = [ Multicolor(BGGenome("genome_name_1")), Multicolor(BGGenome("genome_name_2")), Multicolor(BGGenome("genome_name_1"), BGGenome("genome_name_2")) ] for bgedge in result_bg.edges(): self.assertTrue(bgedge.multicolor in multicolors) for bgedge in result_bg.edges(): condensed_edge = result_bg.get_condensed_edge( vertex1=bgedge.vertex1, vertex2=bgedge.vertex2) self.assertTrue(condensed_edge.multicolor in condensed_multicolors) infinity_edges = [ bgedge for bgedge in result_bg.edges() if bgedge.is_infinity_edge ] self.assertEqual(len(infinity_edges), 4) for bgedge in result_bg.edges(): data = bgedge.data self.assertIn("fragment", data) self.assertIsInstance(data["fragment"], dict) self.assertIn("name", data["fragment"]) self.assertIn(data["fragment"]["name"], {"chromosome_X", "scaffold111"}) ah = result_bg.get_vertex_by_name("ah") bt = result_bg.get_vertex_by_name("bt") ahi = result_bg.get_vertex_by_name("ah__infinity") edge = result_bg.get_edge_by_two_vertices(vertex1=ah, vertex2=bt) self.assertTupleEqual(edge.data["fragment"]["forward_orientation"], (ah, bt)) iedge = result_bg.get_edge_by_two_vertices(vertex1=ah, vertex2=ahi) self.assertTupleEqual(iedge.data["fragment"]["forward_orientation"], (ah, ahi))
def test_equality(self): # multicolors are called equal # if they contain information about hte same colors with same multiplicity for each color mc1 = Multicolor(self.genome1) mc2 = Multicolor(self.genome1) self.assertEqual(mc1, mc2) mc1 = Multicolor(self.genome1, self.genome2) mc2 = Multicolor(self.genome2, self.genome1) self.assertEqual(mc1, mc2) mc1 = Multicolor(self.genome1) mc2 = Multicolor(BGGenome("ret")) self.assertNotEqual(mc1, mc2) mc1 = Multicolor(self.genome1, self.genome1, self.genome3) mc2 = Multicolor(self.genome1, self.genome3) self.assertNotEqual(mc1, mc2) ############################################################################################### # # cases when Multicolor object is compared with a non-Multicolor object # this equality comparison is always False # ############################################################################################### for non_multicolor_type_object in [1, (1, ), [1], "1", Mock()]: self.assertNotEqual(Multicolor(), non_multicolor_type_object)
def test_is_multicolor_tree_consistent(self): # tests if supplied multicolor complies with tree topology ########################################################################################## # # empty multicolor complies with any tree # ########################################################################################## mc = Multicolor() self.assertTrue(BGTree().multicolor_is_tree_consistent(mc)) ########################################################################################## # # simple cases # ########################################################################################## tree = BGTree("(((v1, v2), v3),(v4, v5));") self.assertTrue( tree.multicolor_is_tree_consistent(Multicolor(self.bg_v1))) ########################################################################################## # # a small v1, v2 subtree, still consistent # ########################################################################################## self.assertTrue( tree.multicolor_is_tree_consistent( Multicolor(self.bg_v1, self.bg_v2))) ########################################################################################## # # bigger v1, v2, v3 subtree, still consistent # ########################################################################################## self.assertTrue( tree.multicolor_is_tree_consistent( Multicolor(self.bg_v1, self.bg_v2, self.bg_v3))) ########################################################################################## # # v2, v3 is not a valid subtree (its compliment is two subtrees, instead of one) # ########################################################################################## self.assertFalse( tree.multicolor_is_tree_consistent( Multicolor(self.bg_v2, self.bg_v3))) ########################################################################################## # # if some genomes in multicolor are not present in tree, then multicolor will not be consistent with the tree # ########################################################################################## self.assertFalse( tree.multicolor_is_tree_consistent( Multicolor(self.bg_v1, BGGenome("v6")))) ########################################################################################## # # other cases for a non wgd tree # ########################################################################################## self.assertTrue( tree.multicolor_is_tree_consistent( Multicolor(self.bg_v1, self.bg_v2, self.bg_v3, self.bg_v4))) self.assertTrue( tree.multicolor_is_tree_consistent( Multicolor(self.bg_v1, self.bg_v2, self.bg_v3, self.bg_v5))) self.assertTrue( tree.multicolor_is_tree_consistent( Multicolor(self.bg_v1, self.bg_v2, self.bg_v4, self.bg_v5))) self.assertTrue( tree.multicolor_is_tree_consistent( Multicolor(self.bg_v1, self.bg_v3, self.bg_v4, self.bg_v5))) self.assertTrue( tree.multicolor_is_tree_consistent( Multicolor(self.bg_v2, self.bg_v3, self.bg_v4, self.bg_v5))) self.assertTrue( tree.multicolor_is_tree_consistent( Multicolor(self.bg_v1, self.bg_v2, self.bg_v3, self.bg_v4, self.bg_v5))) self.assertTrue( tree.multicolor_is_tree_consistent( Multicolor(self.bg_v5, self.bg_v4))) self.assertTrue( tree.multicolor_is_tree_consistent( Multicolor(self.bg_v3, self.bg_v4, self.bg_v5))) self.assertFalse( tree.multicolor_is_tree_consistent( Multicolor(self.bg_v3, self.bg_v5)))
def test_initialization_incorrect(self): # empty genomes are not allowed, a name for genome is mandatory with self.assertRaises(TypeError): g = BGGenome()
def test_get_breakpoint_from_file(self): # full workflow testing with dummy data # correct cases are assumed with all kind of crazy indentation and rubbish data mixed in, but still correct data = [ "", "\t", "#comment1", ">genome_name_1", " #comment1", "a b $", "\tc -a @\t", " #comment1 ", "\t>genome_name_2", "a $", "", "\n\t" ] file_like = io.StringIO("\n".join(data)) result_bg = GRIMMReader.get_breakpoint_graph(file_like) self.assertTrue(isinstance(result_bg, BreakpointGraph)) self.assertEqual(len(list(result_bg.connected_components_subgraphs())), 3) self.assertEqual(len(list(result_bg.edges())), 6) self.assertEqual(len(list(result_bg.nodes())), 9) multicolors = [ Multicolor(BGGenome("genome_name_1"), BGGenome("genome_name_2")), Multicolor(BGGenome("genome_name_1")), Multicolor(BGGenome("genome_name_2")) ] for bgedge in result_bg.edges(): self.assertTrue(bgedge.multicolor in multicolors) infinity_edges = [ bgedge for bgedge in result_bg.edges() if bgedge.is_infinity_edge ] self.assertEqual(len(infinity_edges), 3) data = [ ">genome_1", "a $", ">genome_2", "a b $", "# this is a bad genome", ">genome_3", "a b c $", ">genome_4", " # chromosome 1", "b c $", ">genome_5", "c $" ] file_like = io.StringIO("\n".join(data)) result_bg = GRIMMReader.get_breakpoint_graph(file_like) self.assertTrue(isinstance(result_bg, BreakpointGraph)) self.assertEqual(len(list(result_bg.connected_components_subgraphs())), 4) self.assertEqual(len(list(result_bg.edges())), 8) self.assertEqual(len(list(result_bg.nodes())), 12) genome1, genome2, genome3 = BGGenome("genome_1"), BGGenome( "genome_2"), BGGenome("genome_3") genome4, genome5 = BGGenome("genome_4"), BGGenome("genome_5") multicolors = [ Multicolor(genome1, genome2, genome3), Multicolor(genome1), Multicolor(genome2, genome3), Multicolor(genome2), Multicolor(genome3, genome4), Multicolor(genome3, genome4, genome5), Multicolor(genome4), Multicolor(genome5) ] for bgedge in result_bg.edges(): self.assertTrue(bgedge.multicolor in multicolors) infinity_edges = [ bgedge for bgedge in result_bg.edges() if bgedge.is_infinity_edge ] self.assertEqual(len(infinity_edges), 6) infinity_multicolors = [ multicolor for multicolor in multicolors if len(multicolor.multicolors) != 2 ] for bgedge in infinity_edges: self.assertTrue(bgedge.multicolor in infinity_multicolors)
def test_initialization(self): # simple correct initialization g = BGGenome("name") self.assertEqual(g.name, "name")
def test_is_bgedge_vtree_consistent(self): v1, v2 = "v1", "v2" bgedge = BGEdge(vertex1=v1, vertex2=v2, multicolor=Multicolor()) ########################################################################################## # # bgedge with an empty multicolor complies with any tree # ########################################################################################## mc = Multicolor() bgedge.multicolor = mc self.assertTrue(BGTree("(v1, v2);").bgedge_is_vtree_consistent(bgedge)) ########################################################################################## # # simple cases # ########################################################################################## tree = BGTree("(((v1, v2), v3),(v4, v5));") bgedge.multicolor = Multicolor(self.bg_v1) self.assertTrue(tree.bgedge_is_vtree_consistent(bgedge)) ########################################################################################## # # a small v1, v2 subtree, still consistent # ########################################################################################## bgedge.multicolor = Multicolor(self.bg_v1, self.bg_v2) self.assertTrue(tree.bgedge_is_vtree_consistent(bgedge)) ########################################################################################## # # bigger v1, v2, v3 subtree, still consistent # ########################################################################################## bgedge.multicolor = Multicolor(self.bg_v1, self.bg_v2, self.bg_v3) self.assertTrue(tree.bgedge_is_vtree_consistent(bgedge)) ########################################################################################## # # v2, v3 is not a valid subtree (its compliment is two subtrees, instead of one) # ########################################################################################## bgedge.multicolor = Multicolor(self.bg_v2, self.bg_v3) self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge)) ########################################################################################## # # if some genomes in multicolor are not present in tree, then multicolor will not be consistent with the tree # ########################################################################################## bgedge.multicolor = Multicolor(self.bg_v1, BGGenome("v6")) self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge)) ########################################################################################## # # other cases for a non wgd tree # ########################################################################################## bgedge.multicolor = Multicolor(self.bg_v1, self.bg_v2, self.bg_v3, self.bg_v4) self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge)) bgedge.multicolor = Multicolor(self.bg_v1, self.bg_v2, self.bg_v3, self.bg_v5) self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge)) bgedge.multicolor = Multicolor(self.bg_v1, self.bg_v2, self.bg_v4, self.bg_v5) self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge)) bgedge.multicolor = Multicolor(self.bg_v1, self.bg_v3, self.bg_v4, self.bg_v5) self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge)) bgedge.multicolor = Multicolor(self.bg_v2, self.bg_v3, self.bg_v4, self.bg_v5) self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge)) bgedge.multicolor = Multicolor(self.bg_v1, self.bg_v2, self.bg_v3, self.bg_v4, self.bg_v5) self.assertTrue(tree.bgedge_is_vtree_consistent(bgedge)) bgedge.multicolor = Multicolor(self.bg_v5, self.bg_v4) self.assertTrue(tree.bgedge_is_vtree_consistent(bgedge)) bgedge.multicolor = Multicolor(self.bg_v3, self.bg_v4, self.bg_v5) self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge)) bgedge.multicolor = Multicolor(self.bg_v3, self.bg_v5) self.assertFalse(tree.bgedge_is_vtree_consistent(bgedge))
def test_hash(self): # hash of genome instance is proxies to hash value of its name g = BGGenome("name") self.assertEqual(hash(g), hash("name"))