Ejemplo n.º 1
0
def optimize(instance, sparsity, trade_off, max_iter, epsilon=1e-3):

    graph = instance['graph']
    true_subgraph = instance['subgraph']
    features = instance['features']
    block_node_sets = instance['block_node_sets'] # block id - global node id
    block_boundary_edges_dict = instance['block_boundary_edges_dict'] # block id - edge set

    num_nodes = graph.number_of_nodes()
    num_edges = graph.number_of_edges()
    num_blocks = len(block_boundary_edges_dict)

    logger.debug('-' * 5 + ' related info ' + '-' * 5)
    logger.debug('algorithm: serial graph block-structured matching pursuit')
    logger.debug('sparsity: {:d}'.format(sparsity))
    logger.debug('max iteration: {:d}'.format(max_iter))
    logger.debug('number of nodes: {:d}'.format(num_nodes))
    logger.debug('number of edges: {:d}'.format(num_edges))
    logger.debug('number of blocks: {:d}'.format(num_blocks))

    node_id_dict = relabel_nodes(block_node_sets) # relabel nodes of each block with block local index, global node id - block node id
    relabeled_edge_sets = relabel_edges(graph, block_node_sets, node_id_dict) # relabel edges of each block with block node id

    logger.debug('-' * 5 + ' start iterating ' + '-' * 5)
    start_time = time.time()
    acc_proj_time = 0.

    func = SerialSumEMS(features=features, block_node_sets=block_node_sets, node_id_dict=node_id_dict, block_boundary_edges_dict=block_boundary_edges_dict, trade_off=trade_off)
    true_x = np.zeros(num_nodes)
    true_x[true_subgraph] = 1.
    true_x = np.array(true_x)
    true_obj_val, true_sum_ems_val, true_penalty = func.get_obj_val(true_x, block_boundary_edges_dict)
    logger.debug('ground truth, obj value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}'.format(true_obj_val, true_sum_ems_val, true_penalty))

    current_x = func.get_init_x_zeros() + 1e-6
    for iter in range(max_iter):
        logger.debug('iteration: {:d}'.format(iter))
        prev_x = np.copy(current_x)
        iter_time = time.time()
        iter_proj_time = 0.

        omega_x_list = []
        for b in range(num_blocks):
            block_x = current_x[sorted(block_node_sets[b])]
            block_boundary_edge_x_dict = get_boundary_edge_x_dict(current_x, block_boundary_edges_dict[b], node_id_dict) # (block node 1, global node 2) - value of node 2
            block_features = features[sorted(block_node_sets[b])]
            block_grad = func.get_gradient(block_x, block_features, block_boundary_edge_x_dict)

            block_x = block_x if iter > 0 else np.zeros_like(block_x, dtype=np.float64)

            normalized_block_grad = normalize_gradient(block_x, block_grad)
            edges = np.array(relabeled_edge_sets[b])
            edge_weights = np.ones(len(edges))

            start_proj_time = time.time()
            re_head = head_proj(edges=edges, weights=edge_weights, x=normalized_block_grad, g=1, s=sparsity, budget=sparsity - 1., delta=1. / 169., max_iter=100, err_tol=1e-8, root=-1, pruning='strong', epsilon=1e-10, verbose=0)
            re_nodes, _, _ = re_head
            iter_proj_time += time.time() - start_proj_time
            block_gamma_x = set(re_nodes)
            block_supp_x = set([ind for ind, _ in enumerate(block_x) if not 0 == _])
            block_omega_x = block_gamma_x | block_supp_x
            omega_x_list.append(block_omega_x)

        bx = func.argmax_obj_with_proj_serial(current_x, omega_x_list)

        for b in range(num_blocks):
            edges = np.array(relabeled_edge_sets[b])
            edge_weights = np.ones(len(edges))
            block_bx = bx[sorted(block_node_sets[b])]
            start_proj_time = time.time()
            re_tail = tail_proj(edges=edges, weights=edge_weights, x=block_bx, g=1, s=sparsity, budget=sparsity - 1., nu=2.5, max_iter=100, err_tol=1e-8, root=-1, pruning='strong', verbose=0)
            re_nodes, _, _ = re_tail
            iter_proj_time += time.time() - start_proj_time
            psi_x = set(re_nodes)
            block_x = np.zeros_like(current_x[block_node_sets[b]])
            block_x[list(psi_x)] = block_bx[list(psi_x)]
            current_x[sorted(block_node_sets[b])] = block_x

        acc_proj_time += iter_proj_time

        # post process

        obj_val, sum_ems_val, penalty = func.get_obj_val(current_x, block_boundary_edges_dict)
        logger.debug('objective value: {:.5f}, global ems value: {:.5f}, penalty: {:.5f}'.format(obj_val, sum_ems_val, penalty))
        logger.debug('iteration time: {:.5f}'.format(time.time() - iter_time))
        logger.debug('iter projection time: {:.5f}'.format(iter_proj_time))
        logger.debug('acc projection time: {:.5f}'.format(acc_proj_time))  # accumulative projection time
        logger.debug('-' * 10)

        diff_norm_x = np.linalg.norm(current_x - prev_x)
        if diff_norm_x < epsilon: # todo, terminate condition, add stop when objective value decreases
            break

    run_time = time.time() - start_time
    obj_val, global_ems_val, penalty = func.get_obj_val(current_x, block_boundary_edges_dict)
    logger.debug('objective value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}'.format(obj_val, global_ems_val, penalty))
    logger.debug('run time of whole algorithm: {:.5f}'.format(run_time))
    logger.debug('accumulative projection time: {:.5f}'.format(acc_proj_time))

    return current_x
Ejemplo n.º 2
0
def optimize(instance,
             sparsity,
             trade_off,
             learning_rate,
             max_iter,
             epsilon=1e-3,
             logger=None,
             tao=None):

    graph = instance['graph']  # get graph structure
    true_subgraph = instance['true_subgraph']  # get ground truth
    features = instance['features']
    # edges = np.array(graph.edges())

    num_blocks = len(instance['nodes_set'])
    num_nodes = graph.number_of_nodes()
    num_edges = graph.number_of_edges()
    nodes_set = instance['nodes_set']
    boundary_edges_dict = instance['block_boundary_edges_dict']

    nodes_id_dict = relabel_nodes(nodes_set)
    relabeled_edges_set = relabel_edges(graph, nodes_set, nodes_id_dict)

    if logger:
        logger.debug('-' * 5 + ' related info ' + '-' * 5)
        logger.debug('algorithm: graph block-structured GHTP')
        logger.debug('sparsity: {:d}'.format(sparsity))
        logger.debug('max iteration: {:d}'.format(max_iter))
        logger.debug('number of nodes: {:d}'.format(num_nodes))
        logger.debug('number of edges: {:d}'.format(num_edges))
        logger.debug('number of blocks: {:d}'.format(num_blocks))
        logger.debug('-' * 5 + ' start iterating ' + '-' * 5)

    start_time = time.time()
    acc_proj_time = 0.
    func = ParallelSumEMS(features=instance['features'],
                          trade_off=trade_off,
                          nodes_set=nodes_set,
                          boundary_edges_dict=boundary_edges_dict,
                          nodes_id_dict=nodes_id_dict,
                          tao=tao)
    if logger:
        true_x = np.zeros(num_nodes)
        true_x[true_subgraph] = 1.
        true_x = np.array(true_x)
        true_obj_val, true_ems_val, true_penalty, _ = func.get_obj_val(
            true_x, boundary_edges_dict)
        logger.debug(
            'ground truth, obj value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}'
            .format(true_obj_val, true_ems_val, true_penalty))

    current_x = func.get_init_x_zeros() + 1e-6
    for iter in range(max_iter):
        if logger:
            logger.debug('iteration: {:d}'.format(iter))
        prev_x = np.copy(current_x)
        iter_time = time.time()
        iter_proj_time = 0.

        omega_x_list = []
        for b in range(num_blocks):
            block_x = current_x[sorted(nodes_set[b])]
            boundary_xs_dict = get_boundary_xs(
                current_x, boundary_edges_dict[b], nodes_id_dict
            )  # key is boundary edge, value is adjacent x in other blocks
            feat = features[sorted(nodes_set[b])]
            grad_x = func.get_gradient(block_x, feat, boundary_xs_dict)

            block_x = block_x if iter > 0 else np.zeros_like(block_x,
                                                             dtype=np.float64)

            normalized_grad = normalize_gradient(block_x, grad_x)

            edges = np.array(relabeled_edges_set[b])
            costs = np.ones(len(edges))
            start_proj_time = time.time()
            # g: number of connected component
            re_head = head_proj(edges=edges,
                                weights=costs,
                                x=normalized_grad,
                                g=1,
                                s=sparsity,
                                budget=sparsity - 1.,
                                delta=1. / 169.,
                                max_iter=100,
                                err_tol=1e-8,
                                root=-1,
                                pruning='strong',
                                epsilon=1e-10,
                                verbose=0)
            re_nodes, re_edges, p_x = re_head
            iter_proj_time += (time.time() - start_proj_time)
            gamma_x = set(re_nodes)
            indicator_x = np.zeros(len(block_x))
            indicator_x[list(gamma_x)] = 1.
            tmp_x = block_x + learning_rate * grad_x * indicator_x  # note, not update current variables, only use the intermediate results
            omega_x = set([ind for ind, _ in enumerate(tmp_x) if not 0. == _])

            omega_x_list.append(omega_x)

        bx_array = func.argmax_obj_with_proj_parallel(
            current_x,
            omega_x_list)  # solve argmax problem with block coordinate ascent

        for b in range(num_blocks):
            bx = bx_array[nodes_set[b]]
            # get edges and edge weights of current block
            edges = np.array(relabeled_edges_set[b])
            costs = np.ones(len(edges))
            start_proj_time = time.time()
            re_tail = tail_proj(edges=edges,
                                weights=costs,
                                x=bx,
                                g=1,
                                s=sparsity,
                                budget=sparsity - 1.,
                                nu=2.5,
                                max_iter=100,
                                err_tol=1e-8,
                                root=-1,
                                pruning='strong',
                                verbose=0)  # tail projection
            re_nodes, re_edges, p_x = re_tail
            iter_proj_time += (time.time() - start_proj_time)
            psi_x = set(re_nodes)

            block_x = np.zeros_like(current_x[nodes_set[b]])
            block_x[list(psi_x)] = bx[list(psi_x)]
            current_x[nodes_set[b]] = block_x

        acc_proj_time += iter_proj_time

        if logger:
            obj_val, sum_ems_val, penalty, _ = func.get_obj_val(
                current_x, boundary_edges_dict)
            logger.debug(
                'objective value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}'
                .format(obj_val, sum_ems_val, penalty))
            logger.debug('iteration time: {:.5f}'.format(time.time() -
                                                         iter_time))
            logger.debug('iter projection time: {:.5f}'.format(iter_proj_time))
            logger.debug('acc projection time: {:.5f}'.format(
                acc_proj_time))  # accumulative projection time
            logger.debug('-' * 10)

        diff_norm_x = np.linalg.norm(current_x - prev_x)
        if logger:
            logger.debug('difference norm x: {:.5f}'.format(diff_norm_x))
        if diff_norm_x < epsilon:
            break

    run_time = time.time() - start_time
    if logger:
        obj_val, sum_ems_val, penalty, _ = func.get_obj_val(
            current_x, boundary_edges_dict)
        logger.debug(
            'objective value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}'.
            format(obj_val, sum_ems_val, penalty))
        logger.debug('run time of whole algorithm: {:.5f}'.format(run_time))
        logger.debug(
            'accumulative projection time: {:.5f}'.format(acc_proj_time))

    return current_x, run_time
Ejemplo n.º 3
0
def optimize(instance,
             sparsity,
             trade_off,
             learning_rate,
             max_iter,
             epsilon=1e-3,
             logger=None):

    graph = instance['graph']
    true_subgraph = instance['true_subgraph']
    features = instance['features']

    num_blocks = len(instance['nodes_set'])
    num_nodes = graph.number_of_nodes()
    num_edges = graph.number_of_edges()
    nodes_set = instance['nodes_set']
    boundary_edges_dict = instance['block_boundary_edges_dict']

    nodes_id_dict = relabel_nodes(nodes_set)
    relabeled_edges_set = relabel_edges(graph, nodes_set, nodes_id_dict)

    if logger:
        logger.debug('-' * 5 + ' related info ' + '-' * 5)
        logger.debug('algorithm: graph block-structured IHT')
        logger.debug('sparsity: {:d}'.format(sparsity))
        logger.debug('max iteration: {:d}'.format(max_iter))
        logger.debug('number of nodes: {:d}'.format(num_nodes))
        logger.debug('number of edges: {:d}'.format(num_edges))
        logger.debug('number of blocks: {:d}'.format(num_blocks))
        logger.debug('-' * 5 + ' start iterating ' + '-' * 5)

    start_time = time.time()
    acc_proj_time = 0.
    func = BlockSumEMS(features=instance['features'],
                       trade_off=trade_off,
                       nodes_set=nodes_set,
                       boundary_edges_dict=boundary_edges_dict,
                       nodes_id_dict=nodes_id_dict)
    if logger:
        true_x = np.zeros(num_nodes)
        true_x[true_subgraph] = 1.
        true_x = np.array(true_x)
        true_obj_val, true_ems_val, true_penalty, _ = func.get_obj_val(
            true_x, boundary_edges_dict)
        logger.debug(
            'ground truth, obj value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}'
            .format(true_obj_val, true_ems_val, true_penalty))

    current_x = func.get_init_x_zeros() + 1e-6
    for iter in range(max_iter):
        if logger:
            logger.debug('iteration: {:d}'.format(iter))
        prev_x = np.copy(current_x)
        iter_time = time.time()
        iter_proj_time = 0.

        omega_x_list = []
        for b in range(num_blocks):
            block_x = current_x[sorted(nodes_set[b])]
            boundary_xs_dict = get_boundary_xs(current_x,
                                               boundary_edges_dict[b],
                                               nodes_id_dict)
            feat = features[sorted(nodes_set[b])]
            grad_x = func.get_gradient(block_x, feat, boundary_xs_dict)

            block_x = block_x if iter > 0 else np.zeros_like(block_x,
                                                             dtype=np.float64)

            normalized_grad = normalize_gradient(block_x, grad_x)

            edges = np.array(relabeled_edges_set[b])
            costs = np.ones(len(edges))
            start_proj_time = time.time()
            # g: number of connected component
            re_head = head_proj(edges=edges,
                                weights=costs,
                                x=normalized_grad,
                                g=1,
                                s=sparsity,
                                budget=sparsity - 1.,
                                delta=1. / 169.,
                                max_iter=100,
                                err_tol=1e-8,
                                root=-1,
                                pruning='strong',
                                epsilon=1e-10,
                                verbose=0)
            re_nodes, re_edges, p_x = re_head
            iter_proj_time += (time.time() - start_proj_time)
            gamma_x = set(
                re_nodes
            )  # note, elements in block_gamma_x are all local (block) node id
            omega_x_list.append(gamma_x)

        # update
        bx_array = np.zeros_like(current_x)
        for b in range(num_blocks):
            block_x = current_x[nodes_set[b]]
            feat = features[sorted(nodes_set[b])]
            indicator_x = np.zeros(len(block_x))  # note, zeros, not
            indicator_x[list(omega_x_list[b])] = 1.
            boundary_xs_dict = get_boundary_xs(current_x,
                                               boundary_edges_dict[b],
                                               nodes_id_dict)
            grad_x = func.get_gradient(block_x, feat, boundary_xs_dict)
            bx_array[nodes_set[b]] = current_x[
                nodes_set[b]] + learning_rate * grad_x * indicator_x

        for b in range(num_blocks):
            bx = bx_array[nodes_set[b]]
            # get edges and edge weights of current block
            edges = np.array(relabeled_edges_set[b])
            costs = np.ones(len(edges))
            start_proj_time = time.time()
            re_tail = tail_proj(edges=edges,
                                weights=costs,
                                x=bx,
                                g=1,
                                s=sparsity,
                                budget=sparsity - 1.,
                                nu=2.5,
                                max_iter=100,
                                err_tol=1e-8,
                                root=-1,
                                pruning='strong',
                                verbose=0)
            re_nodes, re_edges, p_x = re_tail
            iter_proj_time += (time.time() - start_proj_time)
            psi_x = set(re_nodes)

            block_x = np.zeros_like(current_x[nodes_set[b]])
            block_x[list(psi_x)] = bx[list(psi_x)]
            normalized_block_x = normalize(block_x)
            current_x[nodes_set[
                b]] = normalized_block_x  # constrain current block x in [0, 1]

        acc_proj_time += iter_proj_time

        if logger:
            obj_val, sum_ems_val, penalty, _ = func.get_obj_val(
                current_x, boundary_edges_dict)
            logger.debug(
                'objective value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}'
                .format(obj_val, sum_ems_val, penalty))
            logger.debug('iteration time: {:.5f}'.format(time.time() -
                                                         iter_time))
            logger.debug('iter projection time: {:.5f}'.format(iter_proj_time))
            logger.debug('acc projection time: {:.5f}'.format(
                acc_proj_time))  # accumulative projection time
            logger.debug('-' * 10)

        diff_norm_x = np.linalg.norm(current_x - prev_x)
        if logger:
            logger.debug('difference {:.5f}'.format(diff_norm_x))
        if diff_norm_x < epsilon:
            break

    run_time = time.time() - start_time
    if logger:
        obj_val, sum_ems_val, penalty, _ = func.get_obj_val(
            current_x, boundary_edges_dict)
        logger.debug(
            'objective value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}'.
            format(obj_val, sum_ems_val, penalty))
        logger.debug('run time of whole algorithm: {:.5f}'.format(run_time))
        logger.debug(
            'accumulative projection time: {:.5f}'.format(acc_proj_time))

    return current_x, run_time


# def run_instance(instance, sparsity, trade_off, learning_rate, max_iter=10000, epsilon=1e-3):
#
#     opt_x = optimize(instance, sparsity, trade_off, learning_rate, max_iter, epsilon)
#
#     pred_subgraph = np.nonzero(opt_x)[0]
#
#     prec, rec, fm, iou = evaluate_block(instance['subgraph'], pred_subgraph)
#
#     logger.debug('-' * 5 + ' raw performance ' + '-' * 5)
#     logger.debug('precision: {:.5f}'.format(prec))
#     logger.debug('recall   : {:.5f}'.format(rec))
#     logger.debug('f-measure: {:.5f}'.format(fm))
#     logger.debug('iou      : {:.5f}'.format(iou))
#
#     refined_pred_subgraph = post_process_block(instance['graph'], pred_subgraph)
#     prec, rec, fm, iou = evaluate_block(instance['subgraph'], refined_pred_subgraph)
#     logger.debug('-' * 5 + ' refined performance ' + '-' * 5)
#     logger.debug('precision: {:.5f}'.format(prec))
#     logger.debug('recall   : {:.5f}'.format(rec))
#     logger.debug('f-measure: {:.5f}'.format(fm))
#     logger.debug('iou      : {:.5f}'.format(iou))
#
# if __name__ == '__main__':
#
#     path = '/network/rit/lab/ceashpc/share_data/GraphOpt/ijcai/app2/CondMat'
#     fn = 'test_9.pkl'
#
#     rfn = os.path.join(path, fn)
#     with open(rfn, 'rb') as rfile:
#         dataset = pickle.load(rfile)
#
#     instance = dataset[0]
#     sparsity = 534
#     trade_off = 0.0001
#     learning_rate = 1.
#     run_instance(instance, sparsity, trade_off, learning_rate)
Ejemplo n.º 4
0
def optimize(instance,
             sparsity,
             trade_off,
             learning_rate,
             max_iter,
             epsilon=1e-3,
             logger=None):

    graph = instance['graph']
    true_subgraph = instance['true_subgraph']
    features = instance['features']

    num_blocks = len(instance['nodes_set'])
    num_nodes = graph.number_of_nodes()
    num_edges = graph.number_of_edges()
    nodes_set = instance['nodes_set']
    boundary_edges_dict = instance['block_boundary_edges_dict']

    nodes_id_dict = relabel_nodes(nodes_set)
    relabeled_edges_set = relabel_edges(graph, nodes_set, nodes_id_dict)

    if logger:
        logger.debug('-' * 5 + ' related info ' + '-' * 5)
        logger.debug('algorithm: graph block-structured IHT')
        logger.debug('sparsity: {:d}'.format(sparsity))
        logger.debug('max iteration: {:d}'.format(max_iter))
        logger.debug('number of nodes: {:d}'.format(num_nodes))
        logger.debug('number of edges: {:d}'.format(num_edges))
        logger.debug('number of blocks: {:d}'.format(num_blocks))
        logger.debug('-' * 5 + ' start iterating ' + '-' * 5)

    start_time = time.time()
    acc_proj_time = 0.
    func = BlockSumEMS(features=instance['features'],
                       trade_off=trade_off,
                       nodes_set=nodes_set,
                       boundary_edges_dict=boundary_edges_dict,
                       nodes_id_dict=nodes_id_dict)
    if logger:
        true_x = np.zeros(num_nodes)
        true_x[true_subgraph] = 1.
        true_x = np.array(true_x)
        true_obj_val, true_ems_val, true_penalty, _ = func.get_obj_val(
            true_x, boundary_edges_dict)
        logger.debug(
            'ground truth, obj value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}'
            .format(true_obj_val, true_ems_val, true_penalty))

    current_x = func.get_init_x_zeros() + 1e-6
    for iter in range(max_iter):
        if logger:
            logger.debug('iteration: {:d}'.format(iter))
        prev_x = np.copy(current_x)
        iter_time = time.time()
        iter_proj_time = 0.

        bx_array = np.zeros_like(current_x)
        omega_x_list = []
        for b in range(num_blocks):
            block_x = current_x[sorted(nodes_set[b])]
            boundary_xs_dict = get_boundary_xs(current_x,
                                               boundary_edges_dict[b],
                                               nodes_id_dict)
            feat = features[sorted(nodes_set[b])]
            grad_x = func.get_gradient(block_x, feat, boundary_xs_dict)

            block_x = block_x if iter > 0 else np.zeros_like(block_x,
                                                             dtype=np.float64)

            normalized_grad = normalize_gradient(block_x, grad_x)

            edges = np.array(relabeled_edges_set[b])
            costs = np.ones(len(edges))
            start_proj_time = time.time()
            # g: number of connected component
            re_head = head_proj(edges=edges,
                                weights=costs,
                                x=normalized_grad,
                                g=1,
                                s=sparsity,
                                budget=sparsity - 1.,
                                delta=1. / 169.,
                                max_iter=100,
                                err_tol=1e-8,
                                root=-1,
                                pruning='strong',
                                epsilon=1e-10,
                                verbose=0)
            re_nodes, re_edges, p_x = re_head
            iter_proj_time += (time.time() - start_proj_time)
            gamma_x = set(
                re_nodes
            )  # note, elements in block_gamma_x are all local (block) node id
            # omega_x_list.append(gamma_x)

            indicator_x = np.zeros(len(block_x))  # note, zeros, not
            indicator_x[list(gamma_x)] = 1.
            boundary_xs_dict = get_boundary_xs(current_x,
                                               boundary_edges_dict[b],
                                               nodes_id_dict)
            # grad_x = func.get_gradient(block_x, feat, boundary_xs_dict)
            bx_array[nodes_set[b]] = current_x[
                nodes_set[b]] + learning_rate * grad_x * indicator_x

        for b in range(num_blocks):
            bx = bx_array[nodes_set[b]]
            # get edges and edge weights of current block
            edges = np.array(relabeled_edges_set[b])
            costs = np.ones(len(edges))
            start_proj_time = time.time()
            re_tail = tail_proj(edges=edges,
                                weights=costs,
                                x=bx,
                                g=1,
                                s=sparsity,
                                budget=sparsity - 1.,
                                nu=2.5,
                                max_iter=100,
                                err_tol=1e-8,
                                root=-1,
                                pruning='strong',
                                verbose=0)
            re_nodes, re_edges, p_x = re_tail
            iter_proj_time += (time.time() - start_proj_time)
            psi_x = set(re_nodes)

            block_x = np.zeros_like(current_x[nodes_set[b]])
            block_x[list(psi_x)] = bx[list(psi_x)]
            normalized_block_x = normalize(block_x)
            current_x[nodes_set[
                b]] = normalized_block_x  # constrain current block x in [0, 1]

        acc_proj_time += iter_proj_time

        if logger:
            obj_val, sum_ems_val, penalty, _ = func.get_obj_val(
                current_x, boundary_edges_dict)
            logger.debug(
                'objective value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}'
                .format(obj_val, sum_ems_val, penalty))
            logger.debug('iteration time: {:.5f}'.format(time.time() -
                                                         iter_time))
            logger.debug('iter projection time: {:.5f}'.format(iter_proj_time))
            logger.debug('acc projection time: {:.5f}'.format(
                acc_proj_time))  # accumulative projection time
            logger.debug('-' * 10)

        diff_norm_x = np.linalg.norm(current_x - prev_x)
        if logger:
            logger.debug('difference {:.5f}'.format(diff_norm_x))
        if diff_norm_x < epsilon:
            break

    run_time = time.time() - start_time
    if logger:
        obj_val, sum_ems_val, penalty, _ = func.get_obj_val(
            current_x, boundary_edges_dict)
        logger.debug(
            'objective value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}'.
            format(obj_val, sum_ems_val, penalty))
        logger.debug('run time of whole algorithm: {:.5f}'.format(run_time))
        logger.debug(
            'accumulative projection time: {:.5f}'.format(acc_proj_time))

    return current_x, run_time
Ejemplo n.º 5
0
def optimize(instance, sparsity, trade_off, learning_rate, max_iter, epsilon):

    graph = instance['graph']
    true_subgraph = instance['subgraph']
    features = instance['features']
    block_node_sets = instance['block_node_sets']
    block_boundary_edges_dict = instance['block_boundary_edges_dict']

    num_nodes = graph.number_of_nodes()
    num_edges = graph.number_of_edges()
    num_blocks = len(block_boundary_edges_dict)

    logger.debug('-' * 5 + ' related info ' + '-' * 5)
    logger.debug('algorithm: graph block-structured IHT')
    logger.debug('sparsity: {:d}'.format(sparsity))
    logger.debug('max iteration: {:d}'.format(max_iter))
    logger.debug('number of nodes: {:d}'.format(num_nodes))
    logger.debug('number of edges: {:d}'.format(num_edges))
    logger.debug('number of blocks: {:d}'.format(num_blocks))

    node_id_dict = relabel_nodes(block_node_sets)
    relabeled_edge_sets = relabel_edges(graph, block_node_sets, node_id_dict)

    logger.debug('-' * 5 + ' start iterating ' + '-' * 5)
    start_time = time.time()
    acc_proj_time = 0.
    func = BlockSumEMS(features=features,
                       nodes_set=block_node_sets,
                       boundary_edges_dict=block_boundary_edges_dict,
                       nodes_id_dict=node_id_dict,
                       trade_off=trade_off)

    true_x = np.zeros(num_nodes)
    true_x[true_subgraph] = 1.
    true_x = np.array(true_x)
    true_obj_val, true_sum_ems_val, true_penalty = func.get_obj_val(true_x)
    logger.debug(
        'ground truth, obj value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}'
        .format(true_obj_val, true_sum_ems_val, true_penalty))

    current_x = func.get_init_x_zeros() + 1e-6
    for iter in range(max_iter):  # cyclic block coordinate version
        for b in range(num_blocks):
            logger.debug('iteration: {:d}'.format(iter))
            prev_x = np.copy(current_x)
            iter_time = time.time()
            iter_proj_time = 0.

            sorted_node_set = sorted(block_node_sets[b])
            block_x = current_x[sorted_node_set]
            block_boundary_edge_x_dict = get_boundary_xs(
                current_x, block_boundary_edges_dict[b], node_id_dict)
            block_features = features[sorted_node_set]
            block_grad = func.get_gradient(block_x, block_features,
                                           block_boundary_edge_x_dict)
            block_x = block_x if iter > 0 else np.zeros_like(block_x,
                                                             dtype=np.float64)

            normalized_block_grad = normalize_gradient(block_x, block_grad)
            edges = np.array(relabeled_edge_sets[b])
            edge_weights = np.ones(len(edges))

            start_proj_time = time.time()
            re_head = head_proj(edges=edges,
                                weights=edge_weights,
                                x=normalized_block_grad,
                                g=1,
                                s=sparsity,
                                budget=sparsity - 1.,
                                delta=1. / 169.,
                                max_iter=100,
                                err_tol=1e-8,
                                root=-1,
                                pruning='strong',
                                epsilon=1e-10,
                                verbose=0)
            re_nodes, _, _ = re_head
            iter_proj_time += time.time() - start_proj_time
            block_omega_x = set(
                re_nodes
            )  # note, elements in block_omega_x are all local (block) node id

            # update one block
            block_x = current_x[sorted_node_set]
            block_features = features[sorted_node_set]
            block_indicator_x = np.zeros_like(current_x[sorted_node_set])
            block_indicator_x[list(block_omega_x)] = 1.
            block_boundary_edge_x_dict = get_boundary_xs(
                current_x, block_boundary_edges_dict[b], node_id_dict)

            current_x[sorted_node_set] = current_x[
                sorted_node_set] + learning_rate * func.get_gradient(
                    block_x, block_features,
                    block_boundary_edge_x_dict) * block_indicator_x

            block_bx = current_x[sorted_node_set]
            start_proj_time = time.time()
            re_tail = tail_proj(edges=edges,
                                weights=edge_weights,
                                x=block_bx,
                                g=1,
                                s=sparsity,
                                budget=sparsity - 1.,
                                nu=2.5,
                                max_iter=100,
                                err_tol=1e-8,
                                root=-1,
                                pruning='strong',
                                verbose=0)
            re_nodes, _, _ = re_tail
            iter_proj_time += time.time() - start_proj_time
            psi_x = set(re_nodes)
            block_x = np.zeros_like(current_x[sorted_node_set])
            block_x[list(psi_x)] = block_bx[list(psi_x)]
            normalized_block_x = normalize(block_x)
            current_x[
                sorted_node_set] = normalized_block_x  # constrain current block x in [0, 1]

            acc_proj_time += iter_proj_time

            obj_val, sum_ems_val, penalty = func.get_obj_val(current_x)
            logger.debug(
                'objective value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}'
                .format(obj_val, sum_ems_val, penalty))
            logger.debug('iteration time: {:.5f}'.format(time.time() -
                                                         iter_time))
            logger.debug('iter projection time: {:.5f}'.format(iter_proj_time))
            logger.debug('acc projection time: {:.5f}'.format(
                acc_proj_time))  # accumulative projection time
            pred_subgraph = np.nonzero(current_x)[0]
            prec, rec, fm, iou = evaluate_block(instance['subgraph'],
                                                pred_subgraph)
            logger.debug(
                'precision: {:.5f}, recall: {:.5f}, f-measure: {:.5f}'.format(
                    prec, rec, fm))
            logger.debug('-' * 10)

            diff_norm_x = np.linalg.norm(current_x - prev_x)
            if diff_norm_x < epsilon:
                print('difference {:.5f}'.format(diff_norm_x))
                break

    run_time = time.time() - start_time
    obj_val, sum_ems_val, penalty = func.get_obj_val(current_x)
    logger.debug(
        'objective value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}'.
        format(obj_val, sum_ems_val, penalty))
    logger.debug('run time of whole algorithm: {:.5f}'.format(run_time))
    logger.debug('accumulative projection time: {:.5f}'.format(acc_proj_time))

    return current_x