Beispiel #1
0
    def test_two_universes_partial_weighted1(self):
        sets = {
            0: {0: {1, 2}},
            1: {0: {1, 2},
                1: {3, 4, 5}},
            2: {1: {4}},
            3: {1: {5}},
            4: {1: {3}}
        }

        costs = {0: 2, 1: 1000, 2: 3, 3: 1, 4: 10}
        universe_p = {0: 0.1, 1: 0.1}
        desired_output = {0, 3}
        self.assertEqual(sc.approx_multiuniverse(sets, costs, universe_p),
                         desired_output)

        costs = {0: 2, 1: 1000, 2: 3, 3: 1, 4: 10}
        universe_p = {0: 0.0, 1: 0.1}
        desired_output = {3}
        self.assertEqual(sc.approx_multiuniverse(sets, costs, universe_p),
                         desired_output)

        costs = {0: 2, 1: 1000, 2: 3, 3: 1, 4: 10}
        universe_p = {0: 0.5, 1: 0.5}
        desired_output = {0, 2, 3}
        self.assertEqual(sc.approx_multiuniverse(sets, costs, universe_p),
                         desired_output)
Beispiel #2
0
 def test_same_value_different_universe3(self):
     sets = {0: {0: {1, 2}, 1: {2}}, 1: {0: {1, 2, 3}}}
     universe_p = {0: 1.0, 1: 1.0}
     desired_output = {0, 1}
     self.assertEqual(sc.approx_multiuniverse(sets,
                                              universe_p=universe_p),
                      desired_output)
Beispiel #3
0
 def test_one_universe_complete_unweighted(self):
     sets = {
         0: {0: {1, 2}},
         1: {0: {1, 2, 4}},
         2: {0: {2, 4}},
         3: {0: {4, 5}},
         4: {0: {3}}
     }
     desired_output = {1, 3, 4}
     self.assertEqual(sc.approx_multiuniverse(sets), desired_output)
Beispiel #4
0
 def test_one_universe_rank(self):
     sets = {
         0: {0: {1, 2, 3}},
         1: {0: {1, 2, 3, 4}},
         2: {0: {1, 2, 3}},
         3: {0: {1, 2, 3}}
     }
     ranks = {0: 5, 1: 10, 2: 1, 3: 10}
     desired_output = {1, 2}
     self.assertEqual(sc.approx_multiuniverse(sets,
                                              ranks=ranks), desired_output)
Beispiel #5
0
 def test_tuple_universe_id(self):
     sets = {
         0: {(0, 0): {1, 2},
             (1, 0): {2}},
         1: {(0, 0): {1, 2, 3}}
     }
     universe_p = {(0, 0): 1.0, (1, 0): 1.0}
     desired_output = {0, 1}
     self.assertEqual(sc.approx_multiuniverse(sets,
                                              universe_p=universe_p),
                      desired_output)
Beispiel #6
0
    def test_partial_coverage_with_ranks(self):
        sets = {
            0: {0: {1, 2, 3}},
            1: {0: {4, 5, 6}},
            2: {0: {7, 8, 9}},
            3: {0: {10, 11, 12}}
        }

        universe_p = {0: 0.25}
        ranks = {0: 2, 1: 1, 2: 2, 3: 2}
        desired_output = {1}
        self.assertEqual(sc.approx_multiuniverse(sets,
                                                 universe_p=universe_p,
                                                 ranks=ranks), desired_output)

        universe_p = {0: 0.5}
        ranks = {0: 3, 1: 1, 2: 3, 3: 2}
        desired_output = {1, 3}
        self.assertEqual(sc.approx_multiuniverse(sets,
                                                 universe_p=universe_p,
                                                 ranks=ranks), desired_output)
Beispiel #7
0
 def test_cost_and_ranks2(self):
     sets = {
         0: {0: {1, 2, 3, 4}},
         1: {0: {1, 2, 3}},
         2: {0: {3, 4}},
         3: {0: {1, 2, 3, 4}}
     }
     ranks = {0: 2, 1: 1, 2: 1, 3: 1}
     costs = {0: 1, 1: 1, 2: 1, 3: 10}
     desired_output = {1, 2}
     self.assertEqual(sc.approx_multiuniverse(sets,
                                              costs=costs,
                                              ranks=ranks), desired_output)
Beispiel #8
0
 def test_two_universes_ranks(self):
     sets = {
         0: {0: {1, 2, 3, 4},
             1: {1}},
         1: {0: {1, 2, 3}},
         2: {0: {4},
             1: {1}},
         3: {0: {2}}
     }
     ranks = {0: 100, 1: 3, 2: 2, 3: 1}
     desired_output = {1, 2, 3}
     self.assertEqual(sc.approx_multiuniverse(sets,
                                              ranks=ranks), desired_output)
Beispiel #9
0
 def test_one_universe_partial_unweighted(self):
     sets = {
         0: {0: {1, 2}},
         1: {0: {1, 2, 4}},
         2: {0: {2, 4}},
         3: {0: {4, 5}},
         4: {0: {3}}
     }
     universe_p = {0: 0.6}
     desired_output = {1}
     self.assertEqual(sc.approx_multiuniverse(sets,
                                              universe_p=universe_p),
                      desired_output)
Beispiel #10
0
 def test_two_universes_partial_unweighted1(self):
     sets = {
         0: {1: {1, 2}},
         1: {1: {1, 2, 4}},
         2: {1: {2, 4}},
         3: {0: {5},
             1: {4}},
         4: {0: {3}}
     }
     universe_p = {0: 1.0, 1: 0.3}
     desired_output = {3, 4}
     self.assertEqual(sc.approx_multiuniverse(sets,
                                              universe_p=universe_p),
                      desired_output)
Beispiel #11
0
    def test_two_universes_partial_weighted3(self):
        sets = {
            0: {1: {1, 2}},
            1: {0: {3, 4, 5},
                1: {2}},
            2: {0: {3}},
            3: {0: {4}},
            4: {0: {5}}
        }

        costs = {0: 1000, 1: 4, 2: 1, 3: 1, 4: 2}
        universe_p = {0: 0.6, 1: 0.5}
        desired_output = {1, 2, 3}
        # The optimal solution is [1] but the approximation fails to
        # find it
        self.assertEqual(sc.approx_multiuniverse(sets, costs, universe_p),
                         desired_output)

        costs = {0: 1000, 1: 4, 2: 1.5, 3: 1.5, 4: 2}
        universe_p = {0: 0.6, 1: 0.5}
        desired_output = {1}
        self.assertEqual(sc.approx_multiuniverse(sets, costs, universe_p),
                         desired_output)
Beispiel #12
0
 def test_two_universes_partial_weighted2(self):
     sets = {
         0: {0: {1, 2}},
         1: {0: {2, 3},
             1: {4, 5}},
         2: {0: {3}},
         3: {1: {4}},
         4: {1: {5}}
     }
     costs = {0: 3, 1: 4, 2: 1, 3: 1, 4: 2}
     universe_p = {0: 1.0, 1: 0.5}
     desired_output = {0, 2, 3}
     self.assertEqual(sc.approx_multiuniverse(sets, costs, universe_p),
                      desired_output)
Beispiel #13
0
    def test_with_intervalsets(self):
        sets = {
            0: {0: interval.IntervalSet([(1, 100)]),
                1: interval.IntervalSet([(1, 5)])},
            1: {0: interval.IntervalSet([(20, 30)])},
            2: {0: interval.IntervalSet([(40, 50)]),
                1: interval.IntervalSet([(20, 50)])}
        }

        universe_p = {0: 1.0, 1: 0.1}
        desired_output = {0}
        self.assertEqual(sc.approx_multiuniverse(sets,
                                                 universe_p=universe_p,
                                                 use_intervalsets=True),
                         desired_output)
Beispiel #14
0
    def test_two_universe_partial_coverage_with_ranks(self):
        sets = {
            0: {0: {1, 2, 3},
                1: {1, 2, 3}},
            1: {0: {4, 5, 6}},
            2: {0: {7, 8, 9},
                1: {1}}
        }

        universe_p = {0: 0.1, 1: 0.1}
        ranks = {0: 10, 1: 5, 2: 1}
        desired_output = {2}
        self.assertEqual(sc.approx_multiuniverse(sets,
                                                 universe_p=universe_p,
                                                 ranks=ranks), desired_output)

        universe_p = {0: 0.1, 1: 0.5}
        ranks = {0: 10, 1: 5, 2: 1}
        desired_output = {0, 2}
        self.assertEqual(sc.approx_multiuniverse(sets,
                                                 universe_p=universe_p,
                                                 ranks=ranks), desired_output)

        universe_p = {0: 0.5, 1: 0.1}
        ranks = {0: 10, 1: 5, 2: 1}
        desired_output = {1, 2}
        self.assertEqual(sc.approx_multiuniverse(sets,
                                                 universe_p=universe_p,
                                                 ranks=ranks), desired_output)

        universe_p = {0: 0.5, 1: 0.5}
        ranks = {0: 10, 1: 5, 2: 1}
        desired_output = {0, 1, 2}
        self.assertEqual(sc.approx_multiuniverse(sets,
                                                 universe_p=universe_p,
                                                 ranks=ranks), desired_output)
Beispiel #15
0
 def test_three_universes_partial_weighted(self):
     sets = {
         0: {0: {1, 2}},
         1: {0: {2},
             1: {3, 4}},
         2: {1: {3}},
         3: {1: {4},
             2: {6}},
         4: {2: {5}}
     }
     costs = {0: 3, 1: 4, 2: 1, 3: 1, 4: 1000}
     universe_p = {0: 0.5, 1: 0.5, 2: 1.0}
     desired_output = {0, 3, 4}
     self.assertEqual(sc.approx_multiuniverse(sets, costs, universe_p),
                      desired_output)
Beispiel #16
0
    def test_with_intervalsets_single_interval(self):
        """Give a single interval directly as a tuple rather than as an
        instance of IntervalSet.
        """
        sets = {
            0: {0: interval.IntervalSet([(1, 100)]),
                1: (1, 5)},
            1: {0: (20, 30)},
            2: {0: interval.IntervalSet([(40, 50)]),
                1: (20, 50)}
        }

        universe_p = {0: 1.0, 1: 0.1}
        desired_output = {0}
        self.assertEqual(sc.approx_multiuniverse(sets,
                                                 universe_p=universe_p,
                                                 use_intervalsets=True),
                         desired_output)
Beispiel #17
0
    def _compute_set_cover(self, sets, costs, universe_p, ranks,
                           target_genomes):
        """Compute set cover approximation(s) for one or more instances.

        When self.cover_groupings_separately is True, this uses the input
        to construct and solve a separate instance of set cover to find the
        probes for each grouping of target genomes (i.e., to cover all the
        target genomes in each grouping). Then, it returns the union of all
        the selected probes (namely, the union of all the selected set ids).
        This may yield more probes than running just one instance in total
        (across all groupings), but should run more quickly because the
        input size for each instance is smaller.

        When self.cover_groupings_separately is False, this uses the input
        to construct and solve just one instance of set cover (for all target
        genomes across all groupings).

        Args:
            sets: sets input to set_cover.approx_multiuniverse for a full
                instance of set cover (i.e., covering target genomes across
                all groupings)
            costs: costs input to set_cover.approx_multiuniverse for a full
                instance of set cover (i.e., contains costs for probes that
                come from all target genomes across all groupings)
            universe_p: universe_p input to set_cover.approxmultiuniverse for
                a full instance of set cover (i.e., give universe_p coverage
                value for every universe corresponding each target genome
                across all groupings)
            ranks: ranks input to set_cover.approxmultiuniverse for a full
                instance of set cover (i.e., contains ranks for probes that
                come from all target genomes across all groupings)
            target_genomes: list of groups of target genomes

        Returns:
            set ids (corresponding to indices in the sets input) that give
            the probes selected to be in the set cover
        """
        if self.cover_groupings_separately:
            # For each grouping, construct a set cover instance and solve it
            set_ids_in_cover = set()
            for i in range(len(target_genomes)):
                # The costs, universe_p, and ranks input may have extra
                # information for this instance, but should still be valid
                # input to the solver (i.e., they contain all the necessary
                # information to solve the instance)
                # We construct the instance by reducing sets -- namely, by
                # only giving coverage for universes corresponding to target
                # genomes that come from this grouping.
                sets_for_instance = {}
                for set_id in sets.keys():
                    # For a universe_id, universe_id[0] gives the grouping
                    # of that universe and should equal i to be included in
                    # this instance
                    coverage_for_set_id = {
                        universe_id: sets[set_id][universe_id]
                        for universe_id in sets[set_id].keys()
                        if universe_id[0] == i
                    }
                    if len(coverage_for_set_id) > 0:
                        sets_for_instance[set_id] = coverage_for_set_id
                logger.info(
                    ("Approximating the solution to an instance of "
                     "set cover, corresponding to grouping %d (of %d)"), i + 1,
                    len(target_genomes))
                set_ids_for_instance = set_cover.approx_multiuniverse(
                    sets_for_instance,
                    costs=costs,
                    universe_p=universe_p,
                    ranks=ranks,
                    use_intervalsets=True)
                set_ids_in_cover.update(set_ids_for_instance)
        else:
            logger.info(("Approximating the solution to a single set cover "
                         "instance across all groupings"))
            set_ids_in_cover = set_cover.approx_multiuniverse(
                sets,
                costs=costs,
                universe_p=universe_p,
                ranks=ranks,
                use_intervalsets=True)
        return set_ids_in_cover
Beispiel #18
0
    def run_random(self, use_arrays, use_intervalsets, make_contiguous):
        """Run tests with randomly generated instances of set cover.

        This generates random instances of set cover, computes the
        solution, and verifies that the solution achieves the
        desired coverage. It also verifies that, on average, the
        solution achieves a reasonable reduction in the sum of weights
        of chosen sets versus choosing all sets.

        Args:
            use_arrays: when True, solve set cover where the input
                sets are actually stored as arrays (for space
                efficiency reasons)
            use_intervalsets: when True, solve set cover where the
                input sets are actually an instance of IntervalSet
            make_contiguous: when True, the elements (integers) put
                into the sets form contigous stretches (when False,
                they tend to be spaced apart)
        """
        np.random.seed(1)
        weight_fracs = []
        outputs = []
        for n in range(20):
            if make_contiguous:
                # Generate the sets and universes together
                num_universes = np.random.randint(1, 10)
                num_sets = np.random.randint(250, 350)
                sets = {}
                universes = defaultdict(set)
                for set_id in range(num_sets):
                    sets[set_id] = defaultdict(set)
                    for universe_id in range(num_universes):
                        num_stretches = np.random.randint(0, 10)
                        for stretch in range(num_stretches):
                            stretch_length = np.random.randint(50, 150)
                            stretch_start = np.random.randint(0, 5000)
                            for i in range(stretch_length):
                                val = stretch_start + i
                                sets[set_id][universe_id].add(val)
                                universes[universe_id].add(val)
            else:
                # Generate the universes
                num_universes = np.random.randint(1, 10)
                universes = {}
                for universe_id in range(num_universes):
                    universe_size = np.random.randint(100, 500)
                    els = set(np.random.randint(0, 5000, size=universe_size))
                    universes[universe_id] = els
                # Generate the sets
                num_sets = np.random.randint(500, 1000)
                sets = defaultdict(dict)
                sets_union = defaultdict(set)
                for set_id in range(num_sets):
                    for universe_id in range(num_universes):
                        set_size_from_universe = np.random.randint(0, 25)
                        if set_size_from_universe > 0:
                            els = set(
                                np.random.choice(list(universes[universe_id]),
                                                 size=set_size_from_universe,
                                                 replace=False))
                            sets[set_id][universe_id] = els
                            sets_union[universe_id].update(els)
                # Remove from all universes any elements that don't show
                # up in a set in order to ensure that we correctly verify
                # partial coverage
                for universe_id, universe in universes.items():
                    universe.intersection_update(sets_union[universe_id])
            # Generate random set costs and random coverage fractions
            costs = {
                set_id: 1.0 + 10.0 * np.random.random()
                for set_id in range(num_sets)
            }
            universe_p = {
                universe_id: np.random.random()
                for universe_id in range(num_universes)
            }
            # Compute the set cover
            if use_intervalsets:
                sets_as_intervalsets = {}
                for set_id in sets.keys():
                    sets_as_intervalsets[set_id] = {}
                    for universe_id in sets[set_id].keys():
                        els_as_intervals = []
                        for el in sets[set_id][universe_id]:
                            els_as_intervals += [(el, el + 1)]
                        els_as_intervals_merged = \
                            interval.merge_overlapping(els_as_intervals)
                        if len(els_as_intervals_merged) == 1:
                            # There is just one contiguous interval ("stretch")
                            # so test the space-efficient option of giving
                            # this interval directly as a tuple rather than
                            # as an IntervalSet object
                            sets_as_intervalsets[set_id][universe_id] = \
                                els_as_intervals_merged[0]
                        else:
                            sets_as_intervalsets[set_id][universe_id] = \
                                interval.IntervalSet(els_as_intervals)
                output = sc.approx_multiuniverse(sets_as_intervalsets, costs,
                                                 universe_p,
                                                 use_arrays=False,
                                                 use_intervalsets=True)
            elif use_arrays:
                sets_as_arrays = {}
                for set_id in sets.keys():
                    sets_as_arrays[set_id] = {}
                    for universe_id in sets[set_id].keys():
                        sets_as_arrays[set_id][universe_id] = array('I')
                        for el in sets[set_id][universe_id]:
                            sets_as_arrays[set_id][universe_id].append(el)
                output = sc.approx_multiuniverse(sets_as_arrays, costs,
                                                 universe_p,
                                                 use_arrays=True,
                                                 use_intervalsets=False)
            else:
                output = sc.approx_multiuniverse(sets, costs, universe_p,
                                                 use_arrays=False,
                                                 use_intervalsets=False)
            self.verify_partial_cover(sets, universe_p, output)
            weight_fracs += [self.weight_frac(costs, output)]
            outputs += [output]
        # There's no guarantee that the average weight_frac should be
        # small, but in the average case it should be so test it anyway
        # (e.g., test that it's less than 0.01)
        self.assertLess(np.median(weight_fracs), 0.01)
        return outputs