def test_color_splits_no_guidance_sorting(self):
     ###############################################################################################
     #
     # order of multicolors in guidance affects the splitting
     # if two colors in guidance are both present in the splitting multicolor
     #   then the first multicolor in the guidance  will be retrieved, but the second might not be,
     #   as not enough information will be left in the splitting multicolor
     #
     ###############################################################################################
     ###############################################################################################
     #
     # simple case, when there are two multicolors in the guidance
     # we don't account for the
     #
     ###############################################################################################
     mc = Multicolor(self.genome1, self.genome2, self.genome3)
     guidance = [
         Multicolor(self.genome1, self.genome2),
         Multicolor(self.genome1, self.genome2, self.genome3)
     ]
     result = Multicolor.split_colors(mc,
                                      guidance=guidance,
                                      sorted_guidance=True)
     self.assertEqual(len(result), 2)
     ref = [
         Multicolor(self.genome1, self.genome2),
         Multicolor(self.genome3)
     ]
     for result_mc in result:
         self.assertIn(result_mc, ref)
     ###############################################################################################
     ###############################################################################################
     #
     # simple case, when there are two multicolors in the guidance
     # we don't account for the
     #
     ###############################################################################################
     mc = Multicolor(self.genome1, self.genome1, self.genome2, self.genome3,
                     self.genome3, self.genome3)
     guidance = [
         Multicolor(self.genome1),
         Multicolor(self.genome1, self.genome2),
         Multicolor(self.genome3, self.genome3, self.genome3, self.genome2)
     ]
     result = Multicolor.split_colors(mc,
                                      guidance=guidance,
                                      sorted_guidance=True)
     ref = [
         Multicolor(self.genome1),
         Multicolor(self.genome2, self.genome3, self.genome3, self.genome3)
     ]
     self.assertEqual(len(result), 3)
     for result_mc in result:
         self.assertIn(result_mc, ref)
Beispiel #2
0
    def run(self, manager):
        manager.logger.info("Reading blocks orders data")
        file_paths = manager.configuration["gos-asm"]["input"]["block_orders_file_paths"]
        bg = BreakpointGraph()
        for file_path in file_paths:
            with open(file_path, "rt") as source:
                bg.update(breakpoint_graph=GRIMMReader.get_breakpoint_graph(stream=source, merge_edges=False), merge_edges=False)
        manager.data["gos-asm"]["bg"] = bg

        manager.logger.info("Reading phylogenetic tree information")
        tree = BGTree(newick=manager.configuration["gos-asm"]["input"]["phylogenetic_tree"])
        manager.data["gos-asm"]["phylogenetic_tree"] = tree

        full_tmc = Multicolor(*[BGGenome(genome_name) for genome_name in manager.configuration["gos-asm"]["input"]["target_organisms"]])
        manager.data["gos-asm"]["target_multicolor"] = full_tmc
        vtree_consistent_target_multicolors = Multicolor.split_colors(full_tmc,
                                                                      guidance=tree.vtree_consistent_multicolors,
                                                                      account_for_color_multiplicity_in_guidance=False)

        for target_multicolor in vtree_consistent_target_multicolors[:]:
            for vtree_c_multicolor in deepcopy(tree.vtree_consistent_multicolors):
                if vtree_c_multicolor <= target_multicolor \
                        and vtree_c_multicolor not in vtree_consistent_target_multicolors \
                        and len(vtree_c_multicolor.colors) > 0:
                    vtree_consistent_target_multicolors.append(vtree_c_multicolor)

        vtree_consistent_target_multicolors = sorted(vtree_consistent_target_multicolors,
                                                     key=lambda mc: len(mc.hashable_representation),
                                                     reverse=True)

        all_target_multicolors = vtree_consistent_target_multicolors[:]
        # for i in range(2, len(vtree_consistent_target_multicolors) + 1):
        #     for comb in itertools.combinations(vtree_consistent_target_multicolors[:], i):
        #         comb = list(comb)
        #         for mc1, mc2 in itertools.combinations(comb, 2):
        #             if len(mc1.intersect(mc2).colors) > 0:
        #                 break
        #         else:
        #             new_mc = Multicolor()
        #             for mc in comb:
        #                 new_mc += mc
        #             all_target_multicolors.append(new_mc)
        hashed_vertex_tree_consistent_multicolors = {mc.hashable_representation for mc in all_target_multicolors}
        all_target_multicolors = [Multicolor(*hashed_multicolor) for hashed_multicolor in
                                  hashed_vertex_tree_consistent_multicolors]
        all_target_multicolors = sorted(all_target_multicolors,
                                        key=lambda mc: len(mc.hashable_representation),
                                        reverse=True)
        manager.data["gos-asm"]["target_multicolors"] = all_target_multicolors
        # log_bg_stats(bg=bg, logger=manager.logger)

        manager.logger.info("Reading repeats-bridges information")
        manager.data["gos-asm"]["repeats_guidance"] = get_repeats_bridges_guidance(
            file_name=manager.configuration["gos-asm"]["input"]["repeats_bridges_file"], data=manager.data)
 def test_split_colors_with_empty_multicolor_in_guidance(self):
     ###############################################################################################
     #
     # empty multicolor in splitting guidance shall not have any affect on the splitting procedure
     #
     ###############################################################################################
     mc = Multicolor(self.genome1, self.genome2)
     guidance = [Multicolor(self.genome1), Multicolor()]
     result = Multicolor.split_colors(mc, guidance=guidance)
     self.assertEqual(len(result), 2)
     ref = [Multicolor(self.genome1), Multicolor(self.genome2)]
     for result_mc in result:
         self.assertIn(result_mc, ref)
Beispiel #4
0
def compute_evolutionary_score(multicolor, scenario, data):
    tree = data["gos-asm"]["phylogenetic_tree"]
    if scenario == EvolutionaryScenario.existed:
        color_to_split = multicolor
    else:
        if "full_multicolor" not in data["gos-asm"]["cache"]:
            data["gos-asm"]["cache"]["full_multicolor"] = get_full_multicolor(
                data=data)
        full_multicolor = data["gos-asm"]["cache"]["full_multicolor"]
        color_to_split = full_multicolor - multicolor
    return len(
        Multicolor.split_colors(
            multicolor=color_to_split,
            guidance=tree.vtree_consistent_multicolors,
            account_for_color_multiplicity_in_guidance=False))
 def test_split_colors_do_not_account_for_multiplicity_in_guidance(self):
     ###############################################################################################
     ###############################################################################################
     #
     # no guidance, targeted multicolor shall be split on separate colors
     # keeping respective colors multiplicity intact in each splitted peace
     #
     ###############################################################################################
     mc = Multicolor(self.genome1, self.genome1, self.genome2, self.genome3,
                     self.genome3, self.genome3)
     result = Multicolor.split_colors(
         mc, account_for_color_multiplicity_in_guidance=False)
     ref = [
         Multicolor(self.genome1, self.genome1),
         Multicolor(self.genome2),
         Multicolor(self.genome3, self.genome3, self.genome3)
     ]
     self.assertEqual(len(result), 3)
     for result_mc in result:
         self.assertIn(result_mc, ref)
     ###############################################################################################
     ###############################################################################################
     #
     # simple case, where guidance contains already multicolor with multiplicity 1
     # targeted multicolor shall be split based on those colors
     #     but multiplicity of respective colors in the result shall be kept as it was in the targetted multicolor
     #
     ###############################################################################################
     mc = Multicolor(self.genome1, self.genome1, self.genome2, self.genome3,
                     self.genome3, self.genome3)
     guidance = [
         Multicolor(self.genome1, self.genome2),
         Multicolor(self.genome3)
     ]
     result = Multicolor.split_colors(
         mc,
         guidance=guidance,
         account_for_color_multiplicity_in_guidance=False)
     ref = [
         Multicolor(self.genome1, self.genome1, self.genome2),
         Multicolor(self.genome3, self.genome3, self.genome3)
     ]
     self.assertEqual(len(result), 2)
     for result_mc in result:
         self.assertIn(result_mc, ref)
     ###############################################################################################
     ###############################################################################################
     #
     # more complex case, when guidance contains multicolor with multiplicity of colors bigger than 1
     # in this case, those guidance multicolors will be simplified to same colors multicolors
     #   but multiplicity of respective colors will be changed to 1
     # resulted multicolor split shall contain multiplicity of respective colors, as in the original one
     #
     ###############################################################################################
     mc = Multicolor(self.genome1, self.genome1, self.genome2, self.genome3,
                     self.genome3, self.genome3)
     guidance = [
         Multicolor(self.genome1, self.genome2, self.genome1),
         Multicolor(self.genome3, self.genome3)
     ]
     result = Multicolor.split_colors(
         mc,
         guidance=guidance,
         account_for_color_multiplicity_in_guidance=False)
     ref = [
         Multicolor(self.genome1, self.genome1, self.genome2),
         Multicolor(self.genome3, self.genome3, self.genome3)
     ]
     self.assertEqual(len(result), 2)
     for result_mc in result:
         self.assertIn(result_mc, ref)
     ###############################################################################################
     ###############################################################################################
     #
     # case when guidance has multiple multicolors, that after simplification would look the same
     #   (they differ only in the multiplicity of respective colors)
     #
     ###############################################################################################
     mc = Multicolor(self.genome1, self.genome2, self.genome3, self.genome3,
                     self.genome4, self.genome4)
     guidance = [
         Multicolor(self.genome1),
         Multicolor(self.genome1, self.genome1),
         Multicolor(self.genome1, self.genome1),
         Multicolor(self.genome2, self.genome3),
         Multicolor(self.genome4)
     ]
     result = Multicolor.split_colors(
         mc,
         guidance=guidance,
         account_for_color_multiplicity_in_guidance=False)
     ref = [
         Multicolor(self.genome1),
         Multicolor(self.genome2, self.genome3, self.genome3),
         Multicolor(self.genome4, self.genome4)
     ]
     self.assertEqual(len(result), 3)
     for result_mc in result:
         self.assertIn(result_mc, ref)
 def test_split_colors_account_for_multiplicity_in_guidance(self):
     ###############################################################################################
     #
     # when no guidance is specified, a multicolor shall be split according to its own colors
     # when `account_for_multiplicity_in_guidance` is specified
     #   each color in the splitted result shall have multiplicity as it had in the targeted multicolor
     #
     ###############################################################################################
     mc = Multicolor(self.genome1, self.genome1, self.genome2, self.genome3,
                     self.genome3, self.genome3)
     ref = [
         Multicolor(self.genome1, self.genome1),
         Multicolor(self.genome2),
         Multicolor(self.genome3, self.genome3, self.genome3)
     ]
     result = Multicolor.split_colors(
         mc, account_for_color_multiplicity_in_guidance=True)
     self.assertEqual(len(result), 3)
     for result_mc in result:
         self.assertIn(result_mc, ref)
     ###############################################################################################
     ###############################################################################################
     #
     # a simple guidance with a single multicolor, that has only a single color with multiplicity one
     #
     ###############################################################################################
     mc = Multicolor(self.genome1)
     guidance = [Multicolor(self.genome1)]
     result = Multicolor.split_colors(
         mc,
         guidance=guidance,
         account_for_color_multiplicity_in_guidance=True)
     self.assertEqual(len(result), 1)
     mc = result[0]
     self.assertEqual(len(mc.colors), 1)
     self.assertEqual(len(mc.multicolors), 1)
     self.assertEqual(mc.multicolors[self.genome1], 1)
     ###############################################################################################
     ###############################################################################################
     #
     # color exists in guidance only as subset, then it shall be retrieved fully on its own
     #
     ###############################################################################################
     mc = Multicolor(self.genome1)
     guidance = [Multicolor(self.genome1, self.genome4)]
     result = Multicolor.split_colors(
         mc,
         guidance=guidance,
         account_for_color_multiplicity_in_guidance=True)
     self.assertEqual(len(result), 1)
     mc = result[0]
     self.assertEqual(len(mc.colors), 1)
     self.assertEqual(len(mc.multicolors), 1)
     self.assertEqual(mc.multicolors[self.genome1], 1)
     ###############################################################################################
     ###############################################################################################
     #
     # color exists in guidance both as subset and a set itself, and thus shall be retrieved fully
     #
     ###############################################################################################
     mc = Multicolor(self.genome1)
     guidance = [
         Multicolor(self.genome1),
         Multicolor(self.genome1, self.genome4),
         Multicolor(self.genome4)
     ]
     result = Multicolor.split_colors(
         mc,
         guidance=guidance,
         account_for_color_multiplicity_in_guidance=True)
     self.assertEqual(len(result), 1)
     mc = result[0]
     self.assertEqual(len(mc.colors), 1)
     self.assertEqual(len(mc.multicolors), 1)
     self.assertEqual(mc.multicolors[self.genome1], 1)
     ###############################################################################################
     ###############################################################################################
     #
     # color does not exist in guidance, and shall be retrieved fully, as an appendix
     #
     ###############################################################################################
     mc = Multicolor(self.genome1)
     guidance = [Multicolor(self.genome2, self.genome4)]
     result = Multicolor.split_colors(
         mc,
         guidance=guidance,
         account_for_color_multiplicity_in_guidance=True)
     self.assertEqual(len(result), 1)
     mc = result[0]
     self.assertEqual(len(mc.colors), 1)
     self.assertEqual(len(mc.multicolors), 1)
     self.assertEqual(mc.multicolors[self.genome1], 1)
     ###############################################################################################
     ###############################################################################################
     #
     # some color in guidance present twice in the splitting multicolor
     # and thus shall be retrieved fully twice
     #
     ###############################################################################################
     mc = Multicolor(self.genome1, self.genome1, self.genome1, self.genome2,
                     self.genome2)
     guidance = [Multicolor(self.genome1, self.genome2)]
     ref1 = guidance[0]
     ref2 = Multicolor(self.genome1)
     result = Multicolor.split_colors(
         mc,
         guidance=guidance,
         account_for_color_multiplicity_in_guidance=True)
     self.assertEqual(len(result), 3)
     for result_mc in result:
         self.assertIn(result_mc, [ref1, ref2])
     ###############################################################################################
     ###############################################################################################
     #
     # some colors in guidance have non empty intersections (with multiplicity > 1) with s splitting color
     #
     ###############################################################################################
     mc = Multicolor(self.genome1, self.genome1, self.genome2, self.genome2)
     guidance = [Multicolor(self.genome1, self.genome2, self.genome3)]
     ref = Multicolor(self.genome1, self.genome2)
     result = Multicolor.split_colors(
         mc,
         guidance=guidance,
         account_for_color_multiplicity_in_guidance=True)
     self.assertEqual(len(result), 2)
     for result_mc in result:
         self.assertEqual(result_mc, ref)
     ###############################################################################################
     ###############################################################################################
     #
     # some color in guidance is present twice in the splitting color
     # some color in guidance has a non empty intersection with splitting color
     #   that interferes with multicolor in guidance, that is fully present
     # full presence must overtake in this case
     #
     ###############################################################################################
     mc = Multicolor(self.genome1, self.genome1, self.genome2, self.genome2)
     guidance = [
         Multicolor(self.genome1, self.genome2),
         Multicolor(self.genome1, self.genome2, self.genome3)
     ]
     ref = Multicolor(self.genome1, self.genome2)
     result = Multicolor.split_colors(
         mc,
         guidance=guidance,
         account_for_color_multiplicity_in_guidance=True)
     self.assertEqual(len(result), 2)
     for result_mc in result:
         self.assertEqual(result_mc, ref)
     ###############################################################################################
     ###############################################################################################
     #
     # both fully present and non-empty intersection colors are present in guidance
     # the most complex test case
     #
     ###############################################################################################
     mc = Multicolor(self.genome1, self.genome1, self.genome2, self.genome2,
                     self.genome3)
     guidance = [
         Multicolor(self.genome1, self.genome2, self.genome3),
         Multicolor(self.genome1, self.genome1, self.genome2, self.genome2)
     ]
     ref1 = Multicolor(self.genome1, self.genome1, self.genome2,
                       self.genome2)
     ref2 = Multicolor(self.genome3)
     result = Multicolor.split_colors(
         mc,
         guidance=guidance,
         account_for_color_multiplicity_in_guidance=True)
     self.assertEqual(len(result), 2)
     for result_mc in result:
         self.assertIn(result_mc, [ref1, ref2])