Example #1
0
 def gen_hierarchy(self, data):
     """
     Create a new feature hierarchy:
     (i) from input hierarchy if available, and
     (ii) from feature set if not
     """
     num_features = data.shape[1]
     if self.feature_hierarchy is None:
         # Create hierarchy if not available
         if self.feature_names is None:
             # Generate feature names if not available
             self.feature_names = [f"{idx}" for idx in range(num_features)]
         root = Feature(constants.DUMMY_ROOT, description=constants.DUMMY_ROOT, perturbable=False)  # Dummy root node, shouldn't be perturbed
         for idx, feature_name in enumerate(self.feature_names):
             Feature(feature_name, parent=root, idx=[idx])
         self.feature_hierarchy = root
     else:
         # TODO: Document real hierarchy with examples
         # Input hierarchy needs a list of indices assigned to all base features
         # Create hierarchy over features from input hierarchy
         if isinstance(self.feature_hierarchy, str):
             # JSON hierarchy - import to anytree
             try:
                 importer = JsonImporter()
                 with open(self.feature_hierarchy, encoding="utf-8") as hierarchy_file:
                     self.feature_hierarchy = importer.read(hierarchy_file)
             except JSONDecodeError as error:
                 raise ValueError(f"Feature hierarchy {self.feature_hierarchy} does not appear to be a valid JSON file:") from error
         assert isinstance(self.feature_hierarchy, anytree.node.nodemixin.NodeMixin), "Feature hierarchy does not appear to be a valid JSON file or an anytree node"
         feature_nodes = {}
         all_idx = set()
         # Parse and validate input hierarchy
         for node in anytree.PostOrderIter(self.feature_hierarchy):
             idx = []
             if node.is_leaf:
                 valid = (hasattr(node, "idx") and
                          isinstance(node.idx, list) and
                          len(node.idx) >= 1 and
                          all(isinstance(node.idx[i], int) for i in range(len(node.idx))))
                 assert valid, f"Leaf node {node.name} must contain a non-empty list of integer indices under attribute 'idx'"
                 assert not all_idx.intersection(node.idx), f"Leaf node {node.name} has index overlap with other leaf nodes"
                 idx = node.idx
                 all_idx.update(idx)
             else:
                 # Ensure internal nodes have empty initial indices
                 valid = not hasattr(node, "idx") or not node.idx
                 assert valid, f"Internal node {node.name} must have empty initial indices under attribute 'idx'"
             description = getattr(node, "description", "")
             feature_nodes[node.name] = Feature(node.name, description=description, idx=idx)
         # Update feature group (internal node) indices and tree connections
         assert min(all_idx) >= 0 and max(all_idx) < num_features, "Feature indices in hierarchy must be in range [0, num_features - 1]"
         feature_node = None
         for node in anytree.PostOrderIter(self.feature_hierarchy):
             feature_node = feature_nodes[node.name]
             parent = node.parent
             if parent:
                 feature_node.parent = feature_nodes[parent.name]
             for child in node.children:
                 feature_node.idx += feature_nodes[child.name].idx
         self.feature_hierarchy = Feature(constants.DUMMY_ROOT, children=[feature_node], perturbable=False)  # Dummy root node for consistency with flat hierarchy; last feature_node is original root
 def simplify_tree(self):
     """
     Goes through the tree and gets rid of nodes with only one child.
     Replaced them with their only child.
     Purges not needed Nodes from the node_dictionary
     
     See Algorithm 1 (line 11)
     """
     nodes = [node for node in at.PostOrderIter(self.root)]
     for node in nodes:
         children = node.children
         if len(children) == 1:
             children[0].parent = node.parent
             if node.parent is None:
                 # The current root has only one child - let's replace it
                 # with the only child
                 self.root = children[0]
             node.parent = None
             try:
                 self.node_dictionary.pop(node.name)
             except KeyError:
                 print(node.name)
             try:
                 self.category_dictionary.pop(node.name)
             except KeyError:
                 print(node.name)
     # recompute distances for the entities
     nodes = [node for node in at.PostOrderIter(self.root)]
     for node in nodes:
         if node.is_leaf:
             node._compute_distances()
Example #3
0
def update_hierarchy_relevance(hierarchy_root, relevant_feature_map, probs):
    """
    Add feature relevance information to nodes of hierarchy:
    their probabilty of being enabled,
    their polynomial coefficient
    """
    relevant_features = set()
    for key in relevant_feature_map:
        relevant_features.update(key)
    for node in anytree.PostOrderIter(hierarchy_root):
        node.description = constants.IRRELEVANT
        if node.is_leaf:
            idx = int(node.static_indices)
            node.poly_coeff = 0.0
            node.bin_prob = probs[idx]
            coeff = relevant_feature_map.get(frozenset([idx]))
            if coeff:
                node.poly_coeff = coeff
                node.description = (
                    "%s feature:\nPolynomial coefficient: %f\nBinomial probability: %f"
                    % (constants.RELEVANT, coeff, probs[idx]))
            elif idx in relevant_features:
                node.description = ("%s feature\n(Interaction-only)" %
                                    constants.RELEVANT)
        else:
            for child in node.children:
                if child.description != constants.IRRELEVANT:
                    node.description = constants.RELEVANT
Example #4
0
def merkle_branch(hash_, tree):
    tree_walker = anytree.PostOrderIter(tree)
    node = next((node for node in tree_walker if node.name == hash_), None)
    if not node:
        return None

    # TODO come up with a sane branch resprentation
    return node
Example #5
0
 def get_input_wires(self):
     if not self.input_wires:
         leaves = [node.name for node
                   in anytree.PostOrderIter(self.tree) if node.is_leaf]
         input_wires = [wire for leaf in leaves
                        for wire in [leaf.left_wire, leaf.right_wire]]
         self.input_wires = input_wires
     return self.input_wires
Example #6
0
def prune_empty_groups(tree: LayerGroupNode, ) -> LayerGroupNode:
    """Remove any leaf nodes which are not LayerNodes."""
    lt = copy.deepcopy(tree)

    # NOTE: We MUST iterate using PostOrderIter(lt) instead of lt.leaves
    # because removing a leaf group may leave its parent group newly enleafened.
    for node in anytree.PostOrderIter(lt):
        if node.is_leaf and type(node) is not LayerNode:
            logger.warn(f'{node.group_name_path=}, {node.name=}')
            _delete_node(node, msg='Removing empty group')

    return lt
Example #7
0
def evaluate_hierarchical(args, sfeatures, afeatures):
    """
    Evaluate hierarchical analysis results - obtain power/FDR measures for all nodes/base features
    """
    # pylint: disable = too-many-locals
    # Map features in hierarchy to original features and identify ground-truth importances/scores
    sfeatures_map = {sfeature.name: sfeature for sfeature in sfeatures}
    importance_map = {}
    score_map = {}
    for node in anytree.PostOrderIter(afeatures[0].root):
        if node.is_leaf:
            sfeature_name = str(node.idx[0])
            importance_map[node.name] = sfeatures_map[sfeature_name].important
            score_map[node.name] = sfeatures_map[sfeature_name].effect_size
        else:
            importance_map[node.name] = any(importance_map[child.name] for child in node.children)
    # Overall FDR/power
    important = np.zeros(len(afeatures))
    inferred_important = np.zeros(len(afeatures))
    for idx, afeature in enumerate(afeatures):
        important[idx] = importance_map[afeature.name]
        inferred_important[idx] = afeature.important
    imp_precision, imp_recall = get_precision_recall(important, inferred_important)
    # Base features FDR/power
    base_features = list(filter(lambda node: node.is_leaf, afeatures))
    base_important = np.zeros(len(base_features))
    inferred_base_important = np.zeros(len(base_features))
    for idx, base_feature in enumerate(base_features):
        base_important[idx] = importance_map[base_feature.name]
        inferred_base_important[idx] = base_feature.important
    base_imp_precision, base_imp_recall = get_precision_recall(base_important, inferred_base_important)
    # Importance scores for base features
    overall_scores_corr, overall_relevant_scores_corr = (1.0, 1.0)
    if args.model_type == REGRESSOR:
        scores = np.zeros(len(base_features))
        inferred_scores = np.zeros(len(base_features))
        for idx, base_feature in enumerate(base_features):
            scores[idx] = score_map[base_feature.name]
            inferred_scores[idx] = base_feature.importance_score
        relevant_base_features = list(filter(lambda node: importance_map[node.name], base_features))
        relevant_scores = np.zeros(len(relevant_base_features))
        relevant_inferred_scores = np.zeros(len(relevant_base_features))
        for idx, relevant_base_feature in enumerate(relevant_base_features):
            relevant_scores[idx] = score_map[relevant_base_feature.name]
            relevant_inferred_scores[idx] = relevant_base_feature.importance_score
        overall_scores_corr = pearsonr(scores, inferred_scores)[0] if len(scores) >= 2 else 1
        overall_relevant_scores_corr = pearsonr(relevant_scores, relevant_inferred_scores)[0] if len(relevant_scores) >= 2 else 1

    vals = {FDR: 1 - imp_precision, POWER: imp_recall,
            BASE_FEATURES_FDR: 1 - base_imp_precision, BASE_FEATURES_POWER: base_imp_recall,
            OVERALL_SCORES_CORR: overall_scores_corr, OVERALL_RELEVANT_SCORES_CORR: overall_relevant_scores_corr}
    return {key: value if isinstance(value, dict) else round(value, 10) for key, value in vals.items()}  # Round values to avoid FP discrepancies
def delete_nodes(root_node, dry_run, delete_root, stop_node="root"):
    """
    Cascade deletes OSDF nodes in a depth-first manner.

    Args:
        root_node (anytree.Node): The root node of the tree to delete from.
        dry_run (boolean): True/False to delete or print out the nodes to 
            be deleted.
        stop_node (string): Name of the node to stop deletion on. Defaults to 
            the root node but can be any OSDF ID to stop on.
        delete_root (boolean): If the stop_node parameter is set to 'root'
            this parameter can be passed to indicate we want to delete the 
            root node as well.

    Requires:
        None

    Returns:
        None
    """
    deleted = []
    failed_delete = []

    for node in anytree.PostOrderIter(root_node):
        if dry_run:
            if not node.name == "root":
                print "DELETING NODE:", node
        else:
            osdf_obj = node.osdf

            if not node.name == "root":
                print "DELETING NODE:", node
                res = osdf_obj.delete()
               
                if not res:
                    print "FAILED TO DELETE NODE:", node
                    failed_delete.append(osdf_obj)

    if failed_delete:
        print "WARNING: The following OSDF nodes were not deleted:" + "\n".join(failed_delete)

    if delete_root and stop_node == "root":
        print "DELETING ROOT NODE:", root_node

        if not dry_run:
            root_node.osdf.delete()
Example #9
0
def gen_hierarchy(args, clustering_data):
    """
    Generate hierarchy over features

    Args:
        args: Command-line arguments
        clustering_data: Data potentially used to cluster features
                         (depending on hierarchy generation method)

    Returns:
        hierarchy_root: root fo resulting hierarchy over features
    """
    # TODO: Get rid of possibly redundant hierarchy attributes e.g. vidx
    # Generate hierarchy
    hierarchy_root = None
    if args.hierarchy_type == constants.FLAT:
        args.contiguous_node_names = False  # Flat hierarchy should be automatically created; do not re-index hierarchy
    elif args.hierarchy_type == constants.CLUSTER_FROM_DATA:
        clusters = cluster_data(clustering_data)
        hierarchy_root = gen_hierarchy_from_clusters(args, clusters)
    elif args.hierarchy_type == constants.RANDOM:
        hierarchy_root = gen_random_hierarchy(args)
    else:
        raise NotImplementedError("Need valid hierarchy type")
    # Improve visualization - contiguous feature names
    feature_id_map = {}  # mapping from visual feature ids to original ids
    if args.contiguous_node_names:
        for idx, node in enumerate(anytree.PostOrderIter(hierarchy_root)):
            node.vidx = idx
            if node.is_leaf:
                node.min_child_vidx = idx
                node.max_child_vidx = idx
                node.num_base_features = 1
                node.name = str(idx)
                feature_id_map[idx] = node.idx[0]
            else:
                node.min_child_vidx = min(
                    [child.min_child_vidx for child in node.children])
                node.max_child_vidx = max(
                    [child.vidx for child in node.children])
                node.num_base_features = sum(
                    [child.num_base_features for child in node.children])
                node.name = "[%d-%d] (size: %d)" % (node.min_child_vidx,
                                                    node.max_child_vidx,
                                                    node.num_base_features)
    return hierarchy_root, feature_id_map
Example #10
0
def compare_with_ground_truth(args, hierarchy_root):
    """Compare results from mihifepe with ground truth results"""
    # Generate ground truth results
    # Write hierarchical FDR input file for ground truth values
    args.logger.info("Compare mihifepe results to ground truth")
    input_filename = "%s/ground_truth_pvalues.csv" % args.output_dir
    with open(input_filename, "w", newline="") as input_file:
        writer = csv.writer(input_file)
        writer.writerow([
            constants.NODE_NAME, constants.PARENT_NAME,
            constants.PVALUE_LOSSES, constants.DESCRIPTION
        ])
        for node in anytree.PostOrderIter(hierarchy_root):
            parent_name = node.parent.name if node.parent else ""
            # Decide p-values based on rough heuristic for relevance
            node.pvalue = 1.0
            if node.description != constants.IRRELEVANT:
                if node.is_leaf:
                    node.pvalue = 0.001
                    if node.poly_coeff:
                        node.pvalue = min(
                            node.pvalue,
                            1e-10 / (node.poly_coeff * node.bin_prob)**3)
                else:
                    node.pvalue = 0.999 * min(
                        [child.pvalue for child in node.children])
            writer.writerow(
                [node.name, parent_name, node.pvalue, node.description])
    # Generate hierarchical FDR results for ground truth values
    ground_truth_dir = "%s/ground_truth_fdr" % args.output_dir
    cmd = (
        "python -m mihifepe.fdr.hierarchical_fdr_control -output_dir %s -procedure yekutieli "
        "-rectangle_leaves %s" % (ground_truth_dir, input_filename))
    args.logger.info("Running cmd: %s" % cmd)
    pass_args = cmd.split()[2:]
    with patch.object(sys, 'argv', pass_args):
        hierarchical_fdr_control.main()
    # Compare results
    ground_truth_outputs_filename = "%s/%s.png" % (ground_truth_dir,
                                                   constants.TREE)
    args.logger.info("Ground truth results: %s" %
                     ground_truth_outputs_filename)
    mihifepe_outputs_filename = "%s/%s/%s.png" % (
        args.output_dir, constants.HIERARCHICAL_FDR_DIR, constants.TREE)
    args.logger.info("mihifepe results: %s" % mihifepe_outputs_filename)
Example #11
0
File: ID3.py Project: shahamran/iml
    def prune(self, error_bound):
        clf = self.copy()

        def compute_error(clf, original, alternative):
            # just compute the error if  no switch is needed
            if original == alternative:
                return error_bound(clf)
            # switch the original node with the alternative in clf
            old_parents = (original.parent, alternative.parent)
            Node.replace(original, alternative)

            if old_parents[0] is None:
                clf.tree = alternative
            # compute the error with the resulting tree

            output = error_bound(clf)
            # return to the original tree
            Node.replace(alternative, original, old_parents[-1])
            if old_parents[0] is None:
                clf.tree = original

            return output

        # go over all nodes in a bottom-up manner
        for node in anytree.PostOrderIter(clf.tree):
            # add all alternatives to a list
            alternatives = []
            for label in clf.label_values:
                alternatives.append(Node(label))
            for child in node.children:
                alternatives.append(child)
            alternatives.append(node)

            # find the alternative which minimizes the bound on the error
            best_alternative = argmin(lambda x: compute_error(clf, node, x),
                                      alternatives)
            if best_alternative != node:
                Node.replace(node, best_alternative)
                node = best_alternative
                if best_alternative.is_root:
                    clf.tree = best_alternative
        return clf
Example #12
0
def update_hierarchy_descriptions(hierarchy_root, relevant_feature_map,
                                  features):
    """
    Add feature relevance information to nodes of hierarchy
    """
    relevant_features = set()
    for key in relevant_feature_map:
        relevant_features.update(key)
    for node in anytree.PostOrderIter(hierarchy_root):
        node.description = constants.IRRELEVANT
        if node.is_leaf:
            idx = node.idx[0]
            coeff = relevant_feature_map.get(frozenset([idx]))
            if coeff:
                node.description = f"{constants.RELEVANT} feature:\nPolynomial coefficient: {coeff}\nSummary: {features[idx].summary()}"
            elif idx in relevant_features:
                node.description = f"{constants.RELEVANT} feature\n(Interaction-only)\nSummary: {features[idx].summary()}"
        else:
            for child in node.children:
                if child.description != constants.IRRELEVANT:
                    node.description = constants.RELEVANT
root = root.children[0]

# Identify the workspaces
for display in root.children:
    for wk in display.children[0].children:
        wk.workspace = True

# Get the current workspace
proc_out = subprocess.run(['swaymsg', '-t', 'get_workspaces'], stdout=subprocess.PIPE)
wkList = json.loads(proc_out.stdout.decode('utf-8'))
focWkName = nf.getFocusedWK(wkList)

# Change the tree such that the workspaces are children to the root
# while ignoring the current workspace
root.children = [node
                 for node in at.PostOrderIter(root, filter_=lambda x: x.workspace)
                 if node.id != focWkName]

# If workspace contains only one container, then remove that container
for node in at.PostOrderIter(root, filter_=lambda x: x.workspace):
    if len(node.children) == 1:
        node.children = node.children[0].children

# If containers have only one element, then remove such containers
for node in at.PreOrderIter(root, filter_=lambda x: x.container):
    if len(node.children) == 1:
        node.children[0].parent = node.parent
        node.parent = None

# Create names for containers
for node in at.PreOrderIter(root, filter_=lambda x: x.container):