コード例 #1
0
def union_enum(left_level, right_level, x_size, alpha, top_k, w, loss, cur_lvl):
    buckets = {}
    for node_i in range(len(left_level)):
        for node_j in range(len(right_level)):
            flag = approved_union_slice(left_level[node_i], right_level[node_j])
            if flag:
                node = SparkNode(None, None)
                node.attributes = list(set(left_level[node_i].attributes) | set(right_level[node_j].attributes))
                bucket = Bucket(node, cur_lvl, w, x_size, loss)
                bucket.parents.append(left_level[node_i])
                bucket.parents.append(right_level[node_j])
                bucket.calc_bounds(w, x_size, loss)
                if bucket.check_bounds(x_size, alpha, top_k):
                    buckets[bucket.name] = bucket
    return buckets
コード例 #2
0
def join_enum(cur_lvl_nodes, cur_lvl, x_size, alpha, top_k, w, loss):
    buckets = {}
    for node_i in range(len(cur_lvl_nodes)):
        for node_j in range(node_i + 1, len(cur_lvl_nodes)):
            flag = approved_join_slice(cur_lvl_nodes[node_i], cur_lvl_nodes[node_j], cur_lvl)
            if flag:
                node = SparkNode(None, None)
                node.attributes = list(set(cur_lvl_nodes[node_i].attributes) | set(cur_lvl_nodes[node_j].attributes))
                bucket = Bucket(node, cur_lvl, w, x_size, loss)
                bucket.parents.append(cur_lvl_nodes[node_i])
                bucket.parents.append(cur_lvl_nodes[node_j])
                bucket.calc_bounds(w, x_size, loss)
                if bucket.check_bounds(x_size, alpha, top_k):
                    buckets[bucket.name] = bucket
    return buckets
コード例 #3
0
ファイル: spark_utils.py プロジェクト: deutschmn/systemml
def make_first_level(features, predictions, loss, top_k, w, loss_type):
    first_level = []
    # First level slices are enumerated in a "classic way" (getting data and not analyzing bounds
    for feature in features:
        new_node = SparkNode(loss, predictions)
        new_node.parents = [feature]
        new_node.attributes.append(feature)
        new_node.name = new_node.make_name()
        new_node.key = new_node.make_key()
        new_node.process_slice(loss_type)
        new_node.score = opt_fun(new_node.loss, new_node.size, loss,
                                 len(predictions), w)
        new_node.c_upper = new_node.score
        first_level.append(new_node)
        new_node.print_debug(top_k, 0)
    return first_level
コード例 #4
0
ファイル: spark_utils.py プロジェクト: deutschmn/systemml
def process_node(node_i, level, loss, predictions, cur_lvl, top_k, alpha,
                 loss_type, w, debug, enumerator):
    cur_enum_nodes = []
    for node_j in level:
        if enumerator == "join":
            flag = approved_join_slice(node_i, node_j, cur_lvl)
        else:
            flag = approved_union_slice(node_i, node_j)
        if flag and int(node_i.name.split("&&")[0]) < int(
                node_j.name.split("&&")[0]):
            new_node = SparkNode(loss, predictions)
            parents_set = set(new_node.parents)
            parents_set.add(node_i)
            parents_set.add(node_j)
            new_node.parents = list(parents_set)
            parent1_attr = node_i.attributes
            parent2_attr = node_j.attributes
            new_node_attr = union(parent1_attr, parent2_attr)
            new_node.attributes = new_node_attr
            new_node.name = new_node.make_name()
            new_node.key = new_node.make_key()
            new_node.calc_bounds(cur_lvl, w)
            to_slice = new_node.check_bounds(top_k, len(predictions), alpha)
            if to_slice:
                new_node.process_slice(loss_type)
                new_node.score = opt_fun(new_node.loss, new_node.size, loss,
                                         len(predictions), w)
                if new_node.check_constraint(top_k, len(predictions), alpha):
                    cur_enum_nodes.append(new_node)
            if debug:
                new_node.print_debug(top_k, cur_lvl)
    return cur_enum_nodes
コード例 #5
0
ファイル: spark_slicer.py プロジェクト: deutschmn/systemml
def join_enum_fun(node_a, list_b, predictions, f_l2, debug, alpha, w,
                  loss_type, cur_lvl, top_k):
    x_size = len(predictions)
    nodes = []
    for node_i in range(len(list_b)):
        flag = spark_utils.approved_join_slice(node_i, node_a, cur_lvl)
        if not flag:
            new_node = SparkNode(predictions, f_l2)
            parents_set = set(new_node.parents)
            parents_set.add(node_i)
            parents_set.add(node_a)
            new_node.parents = list(parents_set)
            parent1_attr = node_a.attributes
            parent2_attr = list_b[node_i].attributes
            new_node_attr = union(parent1_attr, parent2_attr)
            new_node.attributes = new_node_attr
            new_node.name = new_node.make_name()
            new_node.calc_bounds(cur_lvl, w)
            # check if concrete data should be extracted or not (only for those that have score upper
            # and if size of subset is big enough
            to_slice = new_node.check_bounds(top_k, x_size, alpha)
            if to_slice:
                new_node.process_slice(loss_type)
                new_node.score = opt_fun(new_node.loss, new_node.size, f_l2,
                                         x_size, w)
                # we decide to add node to current level nodes (in order to make new combinations
                # on the next one or not basing on its score value
                if new_node.check_constraint(
                        top_k, x_size,
                        alpha) and new_node.key not in top_k.keys:
                    top_k.add_new_top_slice(new_node)
                nodes.append(new_node)
            if debug:
                new_node.print_debug(top_k, cur_lvl)
    return nodes