Exemple #1
0
def find_set_covering(
        subsets: List[CommonAncestor],
        ontology: Ontology = None,
        value: List[float] = None,
        max_num_subsets: int = None
) -> Union[None, List[Tuple[str, Set[str]]]]:
    """greedy algorithm to solve set covering problem on subsets of trimming candidates

    Args:
        ontology: optional ontology to avoid parent-child relationships in the final result
        subsets (List[Tuple[str, str, Set[str]]]): list of subsets, each of which must contain a tuple with the first
        element being the ID of the subset, the second being the name, and the third the actual set of elements
        value (List[float]): list of costs of the subsets
        max_num_subsets (int): maximum number of subsets in the final list
    Returns:
        Union[None, List[str]]: the list of IDs of the subsets that maximize coverage with respect to the elements
                                in the element universe
    """
    logger.debug("starting set covering optimization")
    elem_to_process = {subset.node_id for subset in subsets}
    if value and len(value) != len(elem_to_process):
        return None
    universe = set(
        [e for subset in subsets for e in subset.covered_starting_nodes])
    included_elmts = set()
    included_sets = []
    while len(elem_to_process) > 0 and included_elmts != universe and (
            not max_num_subsets or len(included_sets) < max_num_subsets):
        if value:
            effect_sets = sorted([
                (v * len(s.covered_starting_nodes - included_elmts),
                 s.covered_starting_nodes, s.node_label, s.node_id)
                for s, v in zip(subsets, value) if s.node_id in elem_to_process
            ],
                                 key=lambda x: (-x[0], x[2]))
        else:
            effect_sets = sorted(
                [(len(s.covered_starting_nodes - included_elmts),
                  s.covered_starting_nodes, s.node_label, s.node_id)
                 for s in subsets if s.node_id in elem_to_process],
                key=lambda x: (-x[0], x[2]))
        elem_to_process.remove(effect_sets[0][3])
        if ontology:
            for elem in included_sets:
                if effect_sets[0][3] in ontology.ancestors(elem[0]):
                    included_sets.remove(elem)
        included_elmts |= effect_sets[0][1]
        included_sets.append((effect_sets[0][3], effect_sets[0][1]))
    logger.debug("finished set covering optimization")
    return included_sets
 def split_assocs(self, root_target : TargetClass, ontology : Ontology = None):
     logging.info('Splitting assocs on: {} // {}'.format(root_target, ontology))
     aset = self.assocs
     if ontology is None:
         ontology = aset.ontology
     fmap = {}
     tmap = {}
     for subj in aset.subjects:
         targets = set()
         features = set()
         for c in aset.annotations(subj):
             if root_target in ontology.ancestors(c, reflexive=True):
                 targets.add(c)
             else:
                 features.add(c)
         fmap[subj] = features
         tmap[subj] = targets
     self.assocs = AssociationSet(ontology=ontology, association_map=fmap)
     self.target_assocs = AssociationSet(ontology=ontology, association_map=tmap)
     logging.info('Split; f={} t={}'.format(self.assocs, self.target_assocs))