Beispiel #1
0
def worker(para):
    (data, data_type, case_id, func_name, max_iter, trade_off, sparsity,
     log_file, result_file) = para

    G = data["graph"]
    features = np.array(data["features"])
    true_subgraph = data["true_subgraph"]
    boundary_edges_dict = data["block_boundary_edges_dict"]
    nodes_set = data["nodes_set"]
    num_blocks = len(boundary_edges_dict)
    num_nodes = G.number_of_nodes()
    num_edges = G.number_of_edges()

    print_log(
        log_file,
        "---------------------------------Ground-Truth---------------------------------------\n"
    )
    print_log(log_file, 'num of node: {}\n'.format(num_nodes))
    print_log(log_file, 'num of edges: {}\n'.format(num_edges))
    print_log(log_file, 'number of blocks: {}\n'.format(num_blocks))
    print_log(log_file, 'all true subgraph: {}\n'.format(true_subgraph))
    print_log(log_file, 'true subgraph size: {}\n'.format(len(true_subgraph)))
    true_X = np.zeros(num_nodes)
    true_X[true_subgraph] = 1.0
    func = BlockSumEMS.BlockSumEMS(features=features,
                                   num_blocks=num_blocks,
                                   trade_off=trade_off,
                                   nodes_set=nodes_set)
    obj_val, ems_score, smooth_penalty, binarized_penalty = func.get_obj_value(
        true_X, boundary_edges_dict)
    print_log(log_file, '\ntrade-off: {}\n'.format(trade_off))
    print_log(log_file,
              'objective value of ground-truth: {:5f}\n'.format(obj_val))
    print_log(log_file,
              'global eml score of ground-truth: {:5f}\n'.format(ems_score))
    print_log(log_file,
              'penalty of ground-truth: {:5f}\n'.format(obj_val - ems_score))

    print_log(
        log_file,
        "\n-----------------------------Block Graph-MP--------------------------------------"
    )
    X, total_time = block_graph_mp(data, sparsity, max_iter, trade_off,
                                   log_file, func_name)

    print_log(
        log_file,
        "\n-------------------------Evaluation of Raw Prediction----------------------------"
    )
    raw_pred_subgraph = sorted(np.nonzero(X)[0])
    raw_global_prec, raw_global_rec, raw_global_fm, raw_global_iou = evaluate(
        true_subgraph, raw_pred_subgraph, log_file)
    obj_val, ems_score, smooth_penalty, binarized_penalty = func.get_obj_value(
        X, boundary_edges_dict)
    print_log(log_file, '\ntrade-off: {}\n'.format(trade_off))
    print_log(log_file,
              'objective value of prediction: {:5f}\n'.format(obj_val))
    print_log(log_file,
              'global ems score of prediction: {:5f}\n'.format(ems_score))
    print_log(
        log_file,
        'smooth penalty of prediction: {:5f}\n'.format(obj_val - ems_score))
    print('\nRaw Prediction    : {}, {}, {}, {:.5f}, {:.5f}, {:.5f}, {:.5f}'.
          format(trade_off, sparsity, case_id, raw_global_prec, raw_global_rec,
                 raw_global_fm, raw_global_iou))

    print_log(
        log_file,
        "\n-----------------------Evaluation of Refined Prediction--------------------------"
    )
    S = G.subgraph(raw_pred_subgraph)
    largest_cc = max(nx.connected_components(S), key=len)
    refined_pred_subgraph = sorted([node for node in largest_cc])
    refined_X = np.zeros_like(X)
    refined_X[refined_pred_subgraph] = X[refined_pred_subgraph]
    refined_global_prec, refined_global_rec, refined_global_fm, refined_global_iou = evaluate(
        true_subgraph, refined_pred_subgraph, log_file)
    obj_val, ems_score, smooth_penalty, binarized_penalty = func.get_obj_value(
        refined_X, boundary_edges_dict)
    print_log(log_file, '\nsmooth trade-off: {}\n'.format(trade_off))
    print_log(log_file,
              'objective value of prediction: {:5f}\n'.format(obj_val))
    print_log(log_file,
              'global ems score of prediction: {:5f}\n'.format(ems_score))
    print_log(log_file,
              'smooth penalty of prediction: {:5f}\n'.format(smooth_penalty))
    print('Refined Prediction: {}, {}, {}, {:.5f}, {:.5f}, {:.5f}, {:.5f}\n'.
          format(trade_off, sparsity, case_id, refined_global_prec,
                 refined_global_rec, refined_global_fm, refined_global_iou))

    output_lock.acquire()
    with open(result_file, "a") as f:
        f.write(
            '{}, {}, {}, {:.5f}, {:.5f}, {:.5f}, {:.5f}, {:.5f}, {:.5f}, {:.5f}, {:.5f}, {:.5f}\n'
            .format(trade_off, sparsity, case_id, total_time, raw_global_prec,
                    raw_global_rec, raw_global_fm, raw_global_iou,
                    refined_global_prec, refined_global_rec, refined_global_fm,
                    refined_global_iou))
    output_lock.release()
Beispiel #2
0
def block_graph_mp(data, k, max_iter, trade_off, log_file, func_name="EMS"):
    """
    :param func_name: score function name
    :param k: sparsity
    :param max_iter: max number of iterations
    :param G: networkx graph
    :param true_subgraph: a list of nodes that represents the ground truth subgraph
    :return: prediction xt, which denotes by the predicted abnormal nodes
    """
    G = data["graph"]
    features = np.array(data["features"])
    nodes_set = data["nodes_set"]
    boundary_edges_dict = data["block_boundary_edges_dict"]
    node_block_dict = data["node_block_dict"]
    true_subgraph = sorted(data["true_subgraph"])
    num_blocks = len(boundary_edges_dict)
    nodes_id_dict = relabel_nodes(
        nodes_set)  # key is global node id, value is local node id
    if func_name == "BlockSumEMS":
        func = BlockSumEMS.BlockSumEMS(features=features,
                                       num_blocks=num_blocks,
                                       nodes_set=nodes_set,
                                       boundary_edges_dict=boundary_edges_dict,
                                       nodes_id_dict=nodes_id_dict,
                                       trade_off=trade_off)
    else:
        print("ERROR")

    num_nodes = G.number_of_nodes()
    num_edges = G.number_of_edges()
    nodes_id_dict = relabel_nodes(
        nodes_set)  # key is global node id, value is local node id
    relabeled_edges_set = relabel_edges(G, nodes_set, nodes_id_dict)

    print_log(log_file, "\n----------------initialization---------------\n")
    X = func.get_init_point_random()
    XT = np.copy(X)
    print_log(log_file, "\n------------------searching------------------\n")
    start_time = time.time()
    learning_rate = 1.
    for iter in range(max_iter):
        Omega_X = []
        X_prev = np.copy(XT)
        print("iter: {}, time: {}".format(
            iter, time.asctime(time.localtime(time.time()))))
        for t in range(num_blocks):
            xt = XT[sorted(nodes_set[t])]
            boundary_xs_dict = get_boundary_xs(
                XT, boundary_edges_dict[t], nodes_id_dict
            )  # key is boundary edge, value is adjacent x in other blocks
            fea = features[sorted(nodes_set[t])]
            grad = func.get_loss_grad(xt, fea, boundary_xs_dict)

            if 0 == iter:  # because we initialize the x as 0.000001 to avoid the divided by zero error when calculating the gradient
                xt_zero = np.zeros_like(xt)
                normalized_grad = normalized_gradient(xt_zero, grad)
            else:
                normalized_grad = normalized_gradient(xt, grad)

            # g: number of connected component
            edges = np.array(relabeled_edges_set[t])
            costs = np.ones(len(edges))
            re_head = head_proj(edges=edges,
                                weights=costs,
                                x=normalized_grad,
                                g=1,
                                s=k,
                                budget=k - 1,
                                delta=1. / 169.,
                                max_iter=100,
                                err_tol=1e-8,
                                root=-1,
                                pruning='strong',
                                epsilon=1e-10,
                                verbose=0)
            re_nodes, re_edges, p_x = re_head
            gamma_xt = set(re_nodes)
            supp_xt = set([ind for ind, _ in enumerate(xt) if _ != 0.])

            indicator_x = np.zeros(len(xt))
            indicator_x[list(gamma_xt)] = 1.
            if iter == 0:
                tmp_x = np.zeros_like(
                    xt
                ) + learning_rate * grad * indicator_x  # note, not update current variables, only use the intermediate results
            else:
                tmp_x = xt + learning_rate * grad * indicator_x
            omega_x = set([ind for ind, _ in enumerate(tmp_x) if not 0. == _])

            # if 0 == iter:  # because we initialize the x as 0.000001 to avoid the divided by zero error when calculating the gradient
            #     omega_x = gamma_xt
            Omega_X.append(omega_x)

        print("---Head Projection Finished: time: {}".format(
            time.asctime(time.localtime(time.time()))))
        # BX = func.get_argmax_fx_with_proj_accelerated(XT, Omega_X, iter)  # TODO: how to solve this argmax correctly
        BX = func.get_argmax_fx_with_proj_accelerated_2(
            XT, Omega_X)  # TODO: how to solve this argmax correctly

        print("---ArgMax Finished: time: {}".format(
            time.asctime(time.localtime(time.time()))))
        for t in range(num_blocks):
            edges = np.array(relabeled_edges_set[t])
            costs = np.ones(len(edges))
            bx = BX[nodes_set[t]]
            re_tail = tail_proj(edges=edges,
                                weights=costs,
                                x=bx,
                                g=1,
                                s=k,
                                budget=k - 1,
                                nu=2.5,
                                max_iter=100,
                                err_tol=1e-8,
                                root=-1,
                                pruning='strong',
                                verbose=0)
            re_nodes, re_edges, p_x = re_tail
            psi_x = re_nodes
            xt = np.zeros_like(XT[nodes_set[t]])
            xt[list(psi_x)] = bx[list(
                psi_x
            )]  # TODO: note the non-zero entries of xt[list(psi_x)] may not be connected
            XT[nodes_set[t]] = xt

        print("---Tail Projection Finished: time: {}".format(
            time.asctime(time.localtime(time.time()))))
        gap_x = np.linalg.norm(XT - X_prev)**2
        if gap_x < 1e-6:
            break

        print_log(log_file,
                  '\ncurrent performance iteration: {}\n'.format(iter))
        obj_val, ems_score, smooth_penalty, binarized_penalty = func.get_obj_value(
            XT, boundary_edges_dict)
        print_log(log_file, 'trade-off: {}\n'.format(trade_off))
        print_log(log_file,
                  'objective value of prediction: {:5f}\n'.format(obj_val))
        print_log(log_file,
                  'global ems score of prediction: {:5f}\n'.format(ems_score))
        print_log(log_file,
                  'penalty of prediction: {:5f}\n'.format(obj_val - ems_score))

        pred_subgraph = sorted(np.nonzero(XT)[0])
        print_log(
            log_file,
            "----------------- current predicted subgraph vs true subgraph:\n")
        print_log(log_file, "{}, size: {}\n".format(pred_subgraph,
                                                    len(pred_subgraph)))
        print_log(log_file, "{}, size: {}\n".format(true_subgraph,
                                                    len(true_subgraph)))

        print_log(log_file,
                  "----------------- info of current predicted subgraph:\n")
        fea = np.round(features[pred_subgraph], 5)
        print_log(
            log_file, "{}\n".format(
                zip(pred_subgraph, np.round(XT[pred_subgraph], 5), fea)))

        print_log(log_file,
                  "----------------- info of current true subgraph:\n")
        fea = np.round(features[true_subgraph], 5)
        print_log(
            log_file, "{}\n".format(
                zip(true_subgraph, np.round(XT[true_subgraph], 5), fea)))

        global_prec, global_rec, global_fm, global_iou = evaluate(
            true_subgraph, pred_subgraph)
        print_log(
            log_file,
            'global_prec={:4f},\nglobal_rec={:.4f},\nglobal_fm={:.4f},\nglobal_iou={:.4f}\n'
            .format(global_prec, global_rec, global_fm, global_iou))

    end_time = time.time()
    total_time = end_time - start_time
    print("time {}".format(total_time))

    return XT, total_time