def test_two_universes_partial_weighted1(self): sets = { 0: {0: {1, 2}}, 1: {0: {1, 2}, 1: {3, 4, 5}}, 2: {1: {4}}, 3: {1: {5}}, 4: {1: {3}} } costs = {0: 2, 1: 1000, 2: 3, 3: 1, 4: 10} universe_p = {0: 0.1, 1: 0.1} desired_output = {0, 3} self.assertEqual(sc.approx_multiuniverse(sets, costs, universe_p), desired_output) costs = {0: 2, 1: 1000, 2: 3, 3: 1, 4: 10} universe_p = {0: 0.0, 1: 0.1} desired_output = {3} self.assertEqual(sc.approx_multiuniverse(sets, costs, universe_p), desired_output) costs = {0: 2, 1: 1000, 2: 3, 3: 1, 4: 10} universe_p = {0: 0.5, 1: 0.5} desired_output = {0, 2, 3} self.assertEqual(sc.approx_multiuniverse(sets, costs, universe_p), desired_output)
def test_same_value_different_universe3(self): sets = {0: {0: {1, 2}, 1: {2}}, 1: {0: {1, 2, 3}}} universe_p = {0: 1.0, 1: 1.0} desired_output = {0, 1} self.assertEqual(sc.approx_multiuniverse(sets, universe_p=universe_p), desired_output)
def test_one_universe_complete_unweighted(self): sets = { 0: {0: {1, 2}}, 1: {0: {1, 2, 4}}, 2: {0: {2, 4}}, 3: {0: {4, 5}}, 4: {0: {3}} } desired_output = {1, 3, 4} self.assertEqual(sc.approx_multiuniverse(sets), desired_output)
def test_one_universe_rank(self): sets = { 0: {0: {1, 2, 3}}, 1: {0: {1, 2, 3, 4}}, 2: {0: {1, 2, 3}}, 3: {0: {1, 2, 3}} } ranks = {0: 5, 1: 10, 2: 1, 3: 10} desired_output = {1, 2} self.assertEqual(sc.approx_multiuniverse(sets, ranks=ranks), desired_output)
def test_tuple_universe_id(self): sets = { 0: {(0, 0): {1, 2}, (1, 0): {2}}, 1: {(0, 0): {1, 2, 3}} } universe_p = {(0, 0): 1.0, (1, 0): 1.0} desired_output = {0, 1} self.assertEqual(sc.approx_multiuniverse(sets, universe_p=universe_p), desired_output)
def test_partial_coverage_with_ranks(self): sets = { 0: {0: {1, 2, 3}}, 1: {0: {4, 5, 6}}, 2: {0: {7, 8, 9}}, 3: {0: {10, 11, 12}} } universe_p = {0: 0.25} ranks = {0: 2, 1: 1, 2: 2, 3: 2} desired_output = {1} self.assertEqual(sc.approx_multiuniverse(sets, universe_p=universe_p, ranks=ranks), desired_output) universe_p = {0: 0.5} ranks = {0: 3, 1: 1, 2: 3, 3: 2} desired_output = {1, 3} self.assertEqual(sc.approx_multiuniverse(sets, universe_p=universe_p, ranks=ranks), desired_output)
def test_cost_and_ranks2(self): sets = { 0: {0: {1, 2, 3, 4}}, 1: {0: {1, 2, 3}}, 2: {0: {3, 4}}, 3: {0: {1, 2, 3, 4}} } ranks = {0: 2, 1: 1, 2: 1, 3: 1} costs = {0: 1, 1: 1, 2: 1, 3: 10} desired_output = {1, 2} self.assertEqual(sc.approx_multiuniverse(sets, costs=costs, ranks=ranks), desired_output)
def test_two_universes_ranks(self): sets = { 0: {0: {1, 2, 3, 4}, 1: {1}}, 1: {0: {1, 2, 3}}, 2: {0: {4}, 1: {1}}, 3: {0: {2}} } ranks = {0: 100, 1: 3, 2: 2, 3: 1} desired_output = {1, 2, 3} self.assertEqual(sc.approx_multiuniverse(sets, ranks=ranks), desired_output)
def test_one_universe_partial_unweighted(self): sets = { 0: {0: {1, 2}}, 1: {0: {1, 2, 4}}, 2: {0: {2, 4}}, 3: {0: {4, 5}}, 4: {0: {3}} } universe_p = {0: 0.6} desired_output = {1} self.assertEqual(sc.approx_multiuniverse(sets, universe_p=universe_p), desired_output)
def test_two_universes_partial_unweighted1(self): sets = { 0: {1: {1, 2}}, 1: {1: {1, 2, 4}}, 2: {1: {2, 4}}, 3: {0: {5}, 1: {4}}, 4: {0: {3}} } universe_p = {0: 1.0, 1: 0.3} desired_output = {3, 4} self.assertEqual(sc.approx_multiuniverse(sets, universe_p=universe_p), desired_output)
def test_two_universes_partial_weighted3(self): sets = { 0: {1: {1, 2}}, 1: {0: {3, 4, 5}, 1: {2}}, 2: {0: {3}}, 3: {0: {4}}, 4: {0: {5}} } costs = {0: 1000, 1: 4, 2: 1, 3: 1, 4: 2} universe_p = {0: 0.6, 1: 0.5} desired_output = {1, 2, 3} # The optimal solution is [1] but the approximation fails to # find it self.assertEqual(sc.approx_multiuniverse(sets, costs, universe_p), desired_output) costs = {0: 1000, 1: 4, 2: 1.5, 3: 1.5, 4: 2} universe_p = {0: 0.6, 1: 0.5} desired_output = {1} self.assertEqual(sc.approx_multiuniverse(sets, costs, universe_p), desired_output)
def test_two_universes_partial_weighted2(self): sets = { 0: {0: {1, 2}}, 1: {0: {2, 3}, 1: {4, 5}}, 2: {0: {3}}, 3: {1: {4}}, 4: {1: {5}} } costs = {0: 3, 1: 4, 2: 1, 3: 1, 4: 2} universe_p = {0: 1.0, 1: 0.5} desired_output = {0, 2, 3} self.assertEqual(sc.approx_multiuniverse(sets, costs, universe_p), desired_output)
def test_with_intervalsets(self): sets = { 0: {0: interval.IntervalSet([(1, 100)]), 1: interval.IntervalSet([(1, 5)])}, 1: {0: interval.IntervalSet([(20, 30)])}, 2: {0: interval.IntervalSet([(40, 50)]), 1: interval.IntervalSet([(20, 50)])} } universe_p = {0: 1.0, 1: 0.1} desired_output = {0} self.assertEqual(sc.approx_multiuniverse(sets, universe_p=universe_p, use_intervalsets=True), desired_output)
def test_two_universe_partial_coverage_with_ranks(self): sets = { 0: {0: {1, 2, 3}, 1: {1, 2, 3}}, 1: {0: {4, 5, 6}}, 2: {0: {7, 8, 9}, 1: {1}} } universe_p = {0: 0.1, 1: 0.1} ranks = {0: 10, 1: 5, 2: 1} desired_output = {2} self.assertEqual(sc.approx_multiuniverse(sets, universe_p=universe_p, ranks=ranks), desired_output) universe_p = {0: 0.1, 1: 0.5} ranks = {0: 10, 1: 5, 2: 1} desired_output = {0, 2} self.assertEqual(sc.approx_multiuniverse(sets, universe_p=universe_p, ranks=ranks), desired_output) universe_p = {0: 0.5, 1: 0.1} ranks = {0: 10, 1: 5, 2: 1} desired_output = {1, 2} self.assertEqual(sc.approx_multiuniverse(sets, universe_p=universe_p, ranks=ranks), desired_output) universe_p = {0: 0.5, 1: 0.5} ranks = {0: 10, 1: 5, 2: 1} desired_output = {0, 1, 2} self.assertEqual(sc.approx_multiuniverse(sets, universe_p=universe_p, ranks=ranks), desired_output)
def test_three_universes_partial_weighted(self): sets = { 0: {0: {1, 2}}, 1: {0: {2}, 1: {3, 4}}, 2: {1: {3}}, 3: {1: {4}, 2: {6}}, 4: {2: {5}} } costs = {0: 3, 1: 4, 2: 1, 3: 1, 4: 1000} universe_p = {0: 0.5, 1: 0.5, 2: 1.0} desired_output = {0, 3, 4} self.assertEqual(sc.approx_multiuniverse(sets, costs, universe_p), desired_output)
def test_with_intervalsets_single_interval(self): """Give a single interval directly as a tuple rather than as an instance of IntervalSet. """ sets = { 0: {0: interval.IntervalSet([(1, 100)]), 1: (1, 5)}, 1: {0: (20, 30)}, 2: {0: interval.IntervalSet([(40, 50)]), 1: (20, 50)} } universe_p = {0: 1.0, 1: 0.1} desired_output = {0} self.assertEqual(sc.approx_multiuniverse(sets, universe_p=universe_p, use_intervalsets=True), desired_output)
def _compute_set_cover(self, sets, costs, universe_p, ranks, target_genomes): """Compute set cover approximation(s) for one or more instances. When self.cover_groupings_separately is True, this uses the input to construct and solve a separate instance of set cover to find the probes for each grouping of target genomes (i.e., to cover all the target genomes in each grouping). Then, it returns the union of all the selected probes (namely, the union of all the selected set ids). This may yield more probes than running just one instance in total (across all groupings), but should run more quickly because the input size for each instance is smaller. When self.cover_groupings_separately is False, this uses the input to construct and solve just one instance of set cover (for all target genomes across all groupings). Args: sets: sets input to set_cover.approx_multiuniverse for a full instance of set cover (i.e., covering target genomes across all groupings) costs: costs input to set_cover.approx_multiuniverse for a full instance of set cover (i.e., contains costs for probes that come from all target genomes across all groupings) universe_p: universe_p input to set_cover.approxmultiuniverse for a full instance of set cover (i.e., give universe_p coverage value for every universe corresponding each target genome across all groupings) ranks: ranks input to set_cover.approxmultiuniverse for a full instance of set cover (i.e., contains ranks for probes that come from all target genomes across all groupings) target_genomes: list of groups of target genomes Returns: set ids (corresponding to indices in the sets input) that give the probes selected to be in the set cover """ if self.cover_groupings_separately: # For each grouping, construct a set cover instance and solve it set_ids_in_cover = set() for i in range(len(target_genomes)): # The costs, universe_p, and ranks input may have extra # information for this instance, but should still be valid # input to the solver (i.e., they contain all the necessary # information to solve the instance) # We construct the instance by reducing sets -- namely, by # only giving coverage for universes corresponding to target # genomes that come from this grouping. sets_for_instance = {} for set_id in sets.keys(): # For a universe_id, universe_id[0] gives the grouping # of that universe and should equal i to be included in # this instance coverage_for_set_id = { universe_id: sets[set_id][universe_id] for universe_id in sets[set_id].keys() if universe_id[0] == i } if len(coverage_for_set_id) > 0: sets_for_instance[set_id] = coverage_for_set_id logger.info( ("Approximating the solution to an instance of " "set cover, corresponding to grouping %d (of %d)"), i + 1, len(target_genomes)) set_ids_for_instance = set_cover.approx_multiuniverse( sets_for_instance, costs=costs, universe_p=universe_p, ranks=ranks, use_intervalsets=True) set_ids_in_cover.update(set_ids_for_instance) else: logger.info(("Approximating the solution to a single set cover " "instance across all groupings")) set_ids_in_cover = set_cover.approx_multiuniverse( sets, costs=costs, universe_p=universe_p, ranks=ranks, use_intervalsets=True) return set_ids_in_cover
def run_random(self, use_arrays, use_intervalsets, make_contiguous): """Run tests with randomly generated instances of set cover. This generates random instances of set cover, computes the solution, and verifies that the solution achieves the desired coverage. It also verifies that, on average, the solution achieves a reasonable reduction in the sum of weights of chosen sets versus choosing all sets. Args: use_arrays: when True, solve set cover where the input sets are actually stored as arrays (for space efficiency reasons) use_intervalsets: when True, solve set cover where the input sets are actually an instance of IntervalSet make_contiguous: when True, the elements (integers) put into the sets form contigous stretches (when False, they tend to be spaced apart) """ np.random.seed(1) weight_fracs = [] outputs = [] for n in range(20): if make_contiguous: # Generate the sets and universes together num_universes = np.random.randint(1, 10) num_sets = np.random.randint(250, 350) sets = {} universes = defaultdict(set) for set_id in range(num_sets): sets[set_id] = defaultdict(set) for universe_id in range(num_universes): num_stretches = np.random.randint(0, 10) for stretch in range(num_stretches): stretch_length = np.random.randint(50, 150) stretch_start = np.random.randint(0, 5000) for i in range(stretch_length): val = stretch_start + i sets[set_id][universe_id].add(val) universes[universe_id].add(val) else: # Generate the universes num_universes = np.random.randint(1, 10) universes = {} for universe_id in range(num_universes): universe_size = np.random.randint(100, 500) els = set(np.random.randint(0, 5000, size=universe_size)) universes[universe_id] = els # Generate the sets num_sets = np.random.randint(500, 1000) sets = defaultdict(dict) sets_union = defaultdict(set) for set_id in range(num_sets): for universe_id in range(num_universes): set_size_from_universe = np.random.randint(0, 25) if set_size_from_universe > 0: els = set( np.random.choice(list(universes[universe_id]), size=set_size_from_universe, replace=False)) sets[set_id][universe_id] = els sets_union[universe_id].update(els) # Remove from all universes any elements that don't show # up in a set in order to ensure that we correctly verify # partial coverage for universe_id, universe in universes.items(): universe.intersection_update(sets_union[universe_id]) # Generate random set costs and random coverage fractions costs = { set_id: 1.0 + 10.0 * np.random.random() for set_id in range(num_sets) } universe_p = { universe_id: np.random.random() for universe_id in range(num_universes) } # Compute the set cover if use_intervalsets: sets_as_intervalsets = {} for set_id in sets.keys(): sets_as_intervalsets[set_id] = {} for universe_id in sets[set_id].keys(): els_as_intervals = [] for el in sets[set_id][universe_id]: els_as_intervals += [(el, el + 1)] els_as_intervals_merged = \ interval.merge_overlapping(els_as_intervals) if len(els_as_intervals_merged) == 1: # There is just one contiguous interval ("stretch") # so test the space-efficient option of giving # this interval directly as a tuple rather than # as an IntervalSet object sets_as_intervalsets[set_id][universe_id] = \ els_as_intervals_merged[0] else: sets_as_intervalsets[set_id][universe_id] = \ interval.IntervalSet(els_as_intervals) output = sc.approx_multiuniverse(sets_as_intervalsets, costs, universe_p, use_arrays=False, use_intervalsets=True) elif use_arrays: sets_as_arrays = {} for set_id in sets.keys(): sets_as_arrays[set_id] = {} for universe_id in sets[set_id].keys(): sets_as_arrays[set_id][universe_id] = array('I') for el in sets[set_id][universe_id]: sets_as_arrays[set_id][universe_id].append(el) output = sc.approx_multiuniverse(sets_as_arrays, costs, universe_p, use_arrays=True, use_intervalsets=False) else: output = sc.approx_multiuniverse(sets, costs, universe_p, use_arrays=False, use_intervalsets=False) self.verify_partial_cover(sets, universe_p, output) weight_fracs += [self.weight_frac(costs, output)] outputs += [output] # There's no guarantee that the average weight_frac should be # small, but in the average case it should be so test it anyway # (e.g., test that it's less than 0.01) self.assertLess(np.median(weight_fracs), 0.01) return outputs