def test_color_splits_no_guidance_sorting(self): ############################################################################################### # # order of multicolors in guidance affects the splitting # if two colors in guidance are both present in the splitting multicolor # then the first multicolor in the guidance will be retrieved, but the second might not be, # as not enough information will be left in the splitting multicolor # ############################################################################################### ############################################################################################### # # simple case, when there are two multicolors in the guidance # we don't account for the # ############################################################################################### mc = Multicolor(self.genome1, self.genome2, self.genome3) guidance = [ Multicolor(self.genome1, self.genome2), Multicolor(self.genome1, self.genome2, self.genome3) ] result = Multicolor.split_colors(mc, guidance=guidance, sorted_guidance=True) self.assertEqual(len(result), 2) ref = [ Multicolor(self.genome1, self.genome2), Multicolor(self.genome3) ] for result_mc in result: self.assertIn(result_mc, ref) ############################################################################################### ############################################################################################### # # simple case, when there are two multicolors in the guidance # we don't account for the # ############################################################################################### mc = Multicolor(self.genome1, self.genome1, self.genome2, self.genome3, self.genome3, self.genome3) guidance = [ Multicolor(self.genome1), Multicolor(self.genome1, self.genome2), Multicolor(self.genome3, self.genome3, self.genome3, self.genome2) ] result = Multicolor.split_colors(mc, guidance=guidance, sorted_guidance=True) ref = [ Multicolor(self.genome1), Multicolor(self.genome2, self.genome3, self.genome3, self.genome3) ] self.assertEqual(len(result), 3) for result_mc in result: self.assertIn(result_mc, ref)
def run(self, manager): manager.logger.info("Reading blocks orders data") file_paths = manager.configuration["gos-asm"]["input"]["block_orders_file_paths"] bg = BreakpointGraph() for file_path in file_paths: with open(file_path, "rt") as source: bg.update(breakpoint_graph=GRIMMReader.get_breakpoint_graph(stream=source, merge_edges=False), merge_edges=False) manager.data["gos-asm"]["bg"] = bg manager.logger.info("Reading phylogenetic tree information") tree = BGTree(newick=manager.configuration["gos-asm"]["input"]["phylogenetic_tree"]) manager.data["gos-asm"]["phylogenetic_tree"] = tree full_tmc = Multicolor(*[BGGenome(genome_name) for genome_name in manager.configuration["gos-asm"]["input"]["target_organisms"]]) manager.data["gos-asm"]["target_multicolor"] = full_tmc vtree_consistent_target_multicolors = Multicolor.split_colors(full_tmc, guidance=tree.vtree_consistent_multicolors, account_for_color_multiplicity_in_guidance=False) for target_multicolor in vtree_consistent_target_multicolors[:]: for vtree_c_multicolor in deepcopy(tree.vtree_consistent_multicolors): if vtree_c_multicolor <= target_multicolor \ and vtree_c_multicolor not in vtree_consistent_target_multicolors \ and len(vtree_c_multicolor.colors) > 0: vtree_consistent_target_multicolors.append(vtree_c_multicolor) vtree_consistent_target_multicolors = sorted(vtree_consistent_target_multicolors, key=lambda mc: len(mc.hashable_representation), reverse=True) all_target_multicolors = vtree_consistent_target_multicolors[:] # for i in range(2, len(vtree_consistent_target_multicolors) + 1): # for comb in itertools.combinations(vtree_consistent_target_multicolors[:], i): # comb = list(comb) # for mc1, mc2 in itertools.combinations(comb, 2): # if len(mc1.intersect(mc2).colors) > 0: # break # else: # new_mc = Multicolor() # for mc in comb: # new_mc += mc # all_target_multicolors.append(new_mc) hashed_vertex_tree_consistent_multicolors = {mc.hashable_representation for mc in all_target_multicolors} all_target_multicolors = [Multicolor(*hashed_multicolor) for hashed_multicolor in hashed_vertex_tree_consistent_multicolors] all_target_multicolors = sorted(all_target_multicolors, key=lambda mc: len(mc.hashable_representation), reverse=True) manager.data["gos-asm"]["target_multicolors"] = all_target_multicolors # log_bg_stats(bg=bg, logger=manager.logger) manager.logger.info("Reading repeats-bridges information") manager.data["gos-asm"]["repeats_guidance"] = get_repeats_bridges_guidance( file_name=manager.configuration["gos-asm"]["input"]["repeats_bridges_file"], data=manager.data)
def test_split_colors_with_empty_multicolor_in_guidance(self): ############################################################################################### # # empty multicolor in splitting guidance shall not have any affect on the splitting procedure # ############################################################################################### mc = Multicolor(self.genome1, self.genome2) guidance = [Multicolor(self.genome1), Multicolor()] result = Multicolor.split_colors(mc, guidance=guidance) self.assertEqual(len(result), 2) ref = [Multicolor(self.genome1), Multicolor(self.genome2)] for result_mc in result: self.assertIn(result_mc, ref)
def compute_evolutionary_score(multicolor, scenario, data): tree = data["gos-asm"]["phylogenetic_tree"] if scenario == EvolutionaryScenario.existed: color_to_split = multicolor else: if "full_multicolor" not in data["gos-asm"]["cache"]: data["gos-asm"]["cache"]["full_multicolor"] = get_full_multicolor( data=data) full_multicolor = data["gos-asm"]["cache"]["full_multicolor"] color_to_split = full_multicolor - multicolor return len( Multicolor.split_colors( multicolor=color_to_split, guidance=tree.vtree_consistent_multicolors, account_for_color_multiplicity_in_guidance=False))
def test_split_colors_do_not_account_for_multiplicity_in_guidance(self): ############################################################################################### ############################################################################################### # # no guidance, targeted multicolor shall be split on separate colors # keeping respective colors multiplicity intact in each splitted peace # ############################################################################################### mc = Multicolor(self.genome1, self.genome1, self.genome2, self.genome3, self.genome3, self.genome3) result = Multicolor.split_colors( mc, account_for_color_multiplicity_in_guidance=False) ref = [ Multicolor(self.genome1, self.genome1), Multicolor(self.genome2), Multicolor(self.genome3, self.genome3, self.genome3) ] self.assertEqual(len(result), 3) for result_mc in result: self.assertIn(result_mc, ref) ############################################################################################### ############################################################################################### # # simple case, where guidance contains already multicolor with multiplicity 1 # targeted multicolor shall be split based on those colors # but multiplicity of respective colors in the result shall be kept as it was in the targetted multicolor # ############################################################################################### mc = Multicolor(self.genome1, self.genome1, self.genome2, self.genome3, self.genome3, self.genome3) guidance = [ Multicolor(self.genome1, self.genome2), Multicolor(self.genome3) ] result = Multicolor.split_colors( mc, guidance=guidance, account_for_color_multiplicity_in_guidance=False) ref = [ Multicolor(self.genome1, self.genome1, self.genome2), Multicolor(self.genome3, self.genome3, self.genome3) ] self.assertEqual(len(result), 2) for result_mc in result: self.assertIn(result_mc, ref) ############################################################################################### ############################################################################################### # # more complex case, when guidance contains multicolor with multiplicity of colors bigger than 1 # in this case, those guidance multicolors will be simplified to same colors multicolors # but multiplicity of respective colors will be changed to 1 # resulted multicolor split shall contain multiplicity of respective colors, as in the original one # ############################################################################################### mc = Multicolor(self.genome1, self.genome1, self.genome2, self.genome3, self.genome3, self.genome3) guidance = [ Multicolor(self.genome1, self.genome2, self.genome1), Multicolor(self.genome3, self.genome3) ] result = Multicolor.split_colors( mc, guidance=guidance, account_for_color_multiplicity_in_guidance=False) ref = [ Multicolor(self.genome1, self.genome1, self.genome2), Multicolor(self.genome3, self.genome3, self.genome3) ] self.assertEqual(len(result), 2) for result_mc in result: self.assertIn(result_mc, ref) ############################################################################################### ############################################################################################### # # case when guidance has multiple multicolors, that after simplification would look the same # (they differ only in the multiplicity of respective colors) # ############################################################################################### mc = Multicolor(self.genome1, self.genome2, self.genome3, self.genome3, self.genome4, self.genome4) guidance = [ Multicolor(self.genome1), Multicolor(self.genome1, self.genome1), Multicolor(self.genome1, self.genome1), Multicolor(self.genome2, self.genome3), Multicolor(self.genome4) ] result = Multicolor.split_colors( mc, guidance=guidance, account_for_color_multiplicity_in_guidance=False) ref = [ Multicolor(self.genome1), Multicolor(self.genome2, self.genome3, self.genome3), Multicolor(self.genome4, self.genome4) ] self.assertEqual(len(result), 3) for result_mc in result: self.assertIn(result_mc, ref)
def test_split_colors_account_for_multiplicity_in_guidance(self): ############################################################################################### # # when no guidance is specified, a multicolor shall be split according to its own colors # when `account_for_multiplicity_in_guidance` is specified # each color in the splitted result shall have multiplicity as it had in the targeted multicolor # ############################################################################################### mc = Multicolor(self.genome1, self.genome1, self.genome2, self.genome3, self.genome3, self.genome3) ref = [ Multicolor(self.genome1, self.genome1), Multicolor(self.genome2), Multicolor(self.genome3, self.genome3, self.genome3) ] result = Multicolor.split_colors( mc, account_for_color_multiplicity_in_guidance=True) self.assertEqual(len(result), 3) for result_mc in result: self.assertIn(result_mc, ref) ############################################################################################### ############################################################################################### # # a simple guidance with a single multicolor, that has only a single color with multiplicity one # ############################################################################################### mc = Multicolor(self.genome1) guidance = [Multicolor(self.genome1)] result = Multicolor.split_colors( mc, guidance=guidance, account_for_color_multiplicity_in_guidance=True) self.assertEqual(len(result), 1) mc = result[0] self.assertEqual(len(mc.colors), 1) self.assertEqual(len(mc.multicolors), 1) self.assertEqual(mc.multicolors[self.genome1], 1) ############################################################################################### ############################################################################################### # # color exists in guidance only as subset, then it shall be retrieved fully on its own # ############################################################################################### mc = Multicolor(self.genome1) guidance = [Multicolor(self.genome1, self.genome4)] result = Multicolor.split_colors( mc, guidance=guidance, account_for_color_multiplicity_in_guidance=True) self.assertEqual(len(result), 1) mc = result[0] self.assertEqual(len(mc.colors), 1) self.assertEqual(len(mc.multicolors), 1) self.assertEqual(mc.multicolors[self.genome1], 1) ############################################################################################### ############################################################################################### # # color exists in guidance both as subset and a set itself, and thus shall be retrieved fully # ############################################################################################### mc = Multicolor(self.genome1) guidance = [ Multicolor(self.genome1), Multicolor(self.genome1, self.genome4), Multicolor(self.genome4) ] result = Multicolor.split_colors( mc, guidance=guidance, account_for_color_multiplicity_in_guidance=True) self.assertEqual(len(result), 1) mc = result[0] self.assertEqual(len(mc.colors), 1) self.assertEqual(len(mc.multicolors), 1) self.assertEqual(mc.multicolors[self.genome1], 1) ############################################################################################### ############################################################################################### # # color does not exist in guidance, and shall be retrieved fully, as an appendix # ############################################################################################### mc = Multicolor(self.genome1) guidance = [Multicolor(self.genome2, self.genome4)] result = Multicolor.split_colors( mc, guidance=guidance, account_for_color_multiplicity_in_guidance=True) self.assertEqual(len(result), 1) mc = result[0] self.assertEqual(len(mc.colors), 1) self.assertEqual(len(mc.multicolors), 1) self.assertEqual(mc.multicolors[self.genome1], 1) ############################################################################################### ############################################################################################### # # some color in guidance present twice in the splitting multicolor # and thus shall be retrieved fully twice # ############################################################################################### mc = Multicolor(self.genome1, self.genome1, self.genome1, self.genome2, self.genome2) guidance = [Multicolor(self.genome1, self.genome2)] ref1 = guidance[0] ref2 = Multicolor(self.genome1) result = Multicolor.split_colors( mc, guidance=guidance, account_for_color_multiplicity_in_guidance=True) self.assertEqual(len(result), 3) for result_mc in result: self.assertIn(result_mc, [ref1, ref2]) ############################################################################################### ############################################################################################### # # some colors in guidance have non empty intersections (with multiplicity > 1) with s splitting color # ############################################################################################### mc = Multicolor(self.genome1, self.genome1, self.genome2, self.genome2) guidance = [Multicolor(self.genome1, self.genome2, self.genome3)] ref = Multicolor(self.genome1, self.genome2) result = Multicolor.split_colors( mc, guidance=guidance, account_for_color_multiplicity_in_guidance=True) self.assertEqual(len(result), 2) for result_mc in result: self.assertEqual(result_mc, ref) ############################################################################################### ############################################################################################### # # some color in guidance is present twice in the splitting color # some color in guidance has a non empty intersection with splitting color # that interferes with multicolor in guidance, that is fully present # full presence must overtake in this case # ############################################################################################### mc = Multicolor(self.genome1, self.genome1, self.genome2, self.genome2) guidance = [ Multicolor(self.genome1, self.genome2), Multicolor(self.genome1, self.genome2, self.genome3) ] ref = Multicolor(self.genome1, self.genome2) result = Multicolor.split_colors( mc, guidance=guidance, account_for_color_multiplicity_in_guidance=True) self.assertEqual(len(result), 2) for result_mc in result: self.assertEqual(result_mc, ref) ############################################################################################### ############################################################################################### # # both fully present and non-empty intersection colors are present in guidance # the most complex test case # ############################################################################################### mc = Multicolor(self.genome1, self.genome1, self.genome2, self.genome2, self.genome3) guidance = [ Multicolor(self.genome1, self.genome2, self.genome3), Multicolor(self.genome1, self.genome1, self.genome2, self.genome2) ] ref1 = Multicolor(self.genome1, self.genome1, self.genome2, self.genome2) ref2 = Multicolor(self.genome3) result = Multicolor.split_colors( mc, guidance=guidance, account_for_color_multiplicity_in_guidance=True) self.assertEqual(len(result), 2) for result_mc in result: self.assertIn(result_mc, [ref1, ref2])