def optimize(instance, sparsity, threshold, trade_off, learning_rate, max_iter, epsilon=1e-3, logger=None): first_graph = instance['first_graph'] second_graph = instance['second_graph'] true_subgraph = instance['true_subgraph'] features = instance['weight'] A = adj_matrix( second_graph ) # get adjacent matrix of second graph, used for density projection first_graph_edges = np.array(first_graph.edges) first_graph_edge_weights = np.ones( first_graph.number_of_edges()) # edge weight, default 1 print('number of nodes in first graph', first_graph.number_of_nodes()) print('number of nodes in second graph', second_graph.number_of_nodes()) if first_graph.number_of_nodes() != second_graph.number_of_nodes(): raise ('error, wrong dual network input !!!') num_nodes = first_graph.number_of_nodes() num_edges_first_graph = first_graph.number_of_edges() num_edges_second_graph = second_graph.number_of_edges() if logger: # print some basic information logger.debug('-' * 5 + ' related info ' + '-' * 5) logger.debug('algorithm: graph block-structured GHTP') logger.debug('sparsity: {:d}'.format(sparsity)) logger.debug('max iteration: {:d}'.format(max_iter)) logger.debug('number of nodes: {:d}'.format(num_nodes)) logger.debug('number of edges in first graph: {:d}'.format( num_edges_first_graph)) logger.debug('number of edges in second graph: {:d}'.format( num_edges_second_graph)) logger.debug('density of first graph: {:.5f}'.format( nx.density(first_graph))) logger.debug('density of second graph: {:.5f}'.format( nx.density(second_graph))) logger.debug('density of true subgraph in second graph: {:.5f}'.format( nx.density(nx.subgraph(second_graph, true_subgraph)))) logger.debug('-' * 5 + ' start iterating ' + '-' * 5) start_time = time.time() acc_proj_time = 0. func = DualEMS(features, trade_off) if logger: print(sorted(true_subgraph)) true_x = np.zeros(num_nodes) # print(type(true_subgraph)) true_x[list(true_subgraph)] = 1. true_x = np.array(true_x) true_obj_val, x_ems_val, y_ems_val, penalty = func.get_obj_val( true_x, true_x) print('ground truth values: {}, {}, {}, {}'.format( true_obj_val, x_ems_val, y_ems_val, penalty)) current_x, current_y = func.get_init_x_zeros( ) # are there some other better initialization methods? current_x += 1e-6 # start from zero, plus 1e-6 avoid divide by zero error current_y += 1e-6 print('iteration start funval', func.get_obj_val(current_x, current_y)) for iter in range(max_iter): # external iteration if logger: logger.debug('iteration: {:d}'.format(iter)) prev_x, prev_y = np.copy(current_x), np.copy( current_y) # store previous vectors for early termination # handle first graph grad_x = func.get_gradient(current_x, current_y) iter_proj_time = 0. if iter == 0: # from all zero vector norm_grad_x = normalize_gradient(np.zeros_like(current_x), grad_x) else: norm_grad_x = normalize_gradient(current_x, grad_x) start_proj_time = time.time() # head projection for the connected constraint, so projection should be on first graph re_head = head_proj(edges=first_graph_edges, weights=first_graph_edge_weights, x=norm_grad_x, g=1, s=sparsity, budget=sparsity - 1., delta=1. / 169., max_iter=100, err_tol=1e-8, root=-1, pruning='strong', epsilon=1e-10, verbose=0) re_nodes, _, _ = re_head iter_proj_time += (time.time() - start_proj_time) print('head projection time for x: {:.5f}'.format(time.time() - start_proj_time)) gamma_x = set(re_nodes) indicator_x = np.zeros(num_nodes) indicator_x[list(gamma_x)] = 1. # there is no differene between using grad_x and norm_grad_x, because indicator_x is got from norm_grad_x if iter == 0: tmp_x = np.zeros_like( current_x ) + learning_rate * norm_grad_x * indicator_x # start from all zeros else: tmp_x = current_x + learning_rate * norm_grad_x * indicator_x omega_x = set([ind for ind, _ in enumerate(tmp_x) if not 0. == _]) # head projection for y grad_y = func.get_gradient(current_y, current_x) # note, reverse order x & y # note, test not normalize if iter == 0: norm_grad_y = normalize_gradient( np.zeros_like(current_y), grad_y) # # note, is it necessary for density projection? else: norm_grad_y = normalize_gradient(current_y, grad_y) # norm_grad_y = grad_y # note !!! # note, should be positive for gradient, input for density projection should be positive # note, why baojian's code does not consider positive value, head projection abs_norm_grad_y = np.absolute( norm_grad_y ) # take absolute value of gradient, since larger absolute value represent larger affection to objective function np.set_printoptions(linewidth=3000) # print(norm_grad_y) # lmbd_list = [0.01, 0.05, 0.07, 0.08, 0.09, 0.1, 0.12, 0.15, 0.17, 0.2, 0.2, 0.2, 0.2, 0.21, 0.22, 0.23, 0.18, 0.18, 0.18, 0.17] # normalize lmbd_list = [0.23] # normalize # lmbd_list = [0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.0006] # normalize # sparsity_list = [250, 250, 260, 270, 270, 275, 275, 280, 280, 280, 265, 270, 275, 275, 280, 285, 260, 255, 250, 245] # normalize sparsity_list = [275] # normalize # sparsity_list = [50, 50, 50, 50, 55] # normalize lmbd_sparsity_list = zip(lmbd_list, sparsity_list) # sparsity_list = [50] print('start head projection for y') start_proj_time = time.time() # gamma_y = density_projection(second_graph, norm_grad_y, A, threshold, min_sparsity, max_sparsity, step_sparsity, normalize=False) gamma_y = density_projection( second_graph, abs_norm_grad_y, A, threshold, lmbd_sparsity_list, normalize=True, true_subgraph=true_subgraph ) # test not normalize, need new lambda sparsity list # gamma_y = density_projection(second_graph, abs_norm_grad_y, A, threshold, lmbd_sparsity_list, normalize=False, true_subgraph=true_subgraph) # test not normalize, need new lambda sparsity list iter_proj_time += (time.time() - start_proj_time) print('head projection time for y: {:.5f}'.format(time.time() - start_proj_time)) # # # indicator_y = np.zeros(num_nodes) # indicator_y[list(gamma_y)] = 1. # if iter == 0: # # tmp_y = np.zeros_like(current_y) + learning_rate * grad_y * indicator_y # tmp_y = np.zeros_like(current_y) + learning_rate * norm_grad_y * indicator_y # todo, pls note that update gradient should be normalized gradient # else: # # tmp_y = current_y + learning_rate * grad_y * indicator_y # tmp_y = current_y + learning_rate * norm_grad_y * indicator_y # # omega_y = set([ind for ind, _ in enumerate(tmp_y) if not 0. == _]) # # print('omega_x', len(omega_x)) # print(sorted(list(omega_x))) # # print('omega_y', len(omega_y)) # print(sorted(list(omega_y))) # # print('intersect', len(omega_y & omega_x)) # print(sorted(list(omega_y & omega_x))) # # # break # # print('solve argmax') # start_max_time = time.time() # bx, by = func.argmax_obj_with_proj(current_x, current_y, omega_x, omega_y) # print('solve argmax time {:.5f}'.format(time.time() - start_max_time)) # # # break # # start_proj_time = time.time() # # tail projection on first graph # re_tail = tail_proj(edges=first_graph_edges, weights=first_graph_edge_weights, x=bx, g=1, s=sparsity, budget=sparsity - 1., nu=2.5, max_iter=100, err_tol=1e-8, root=-1, pruning='strong', verbose=0) # tail projection # re_nodes, _, _ = re_tail # iter_proj_time += time.time() - start_proj_time # print('tail projection time for x: {:.5f}'.format(time.time() - start_proj_time)) # psi_x = set(re_nodes) # # current_x = np.zeros_like(current_x) # current_x[list(psi_x)] = bx[list(psi_x)] # current_x = normalize(current_x) # # lmbd_list = [0.01, 0.05, 0.07, 0.08, 0.09, 0.1] # # lmbd_list = [0.006, 0.08] # sparsity_list = [250, 250, 270, 270, 275, 275] # lmbd_sparsity_list = zip(lmbd_list, sparsity_list) # # start_proj_time = time.time() # # psi_y = density_projection(second_graph, by, threshold, min_sparsity, max_sparsity, step_sparsity, normalize=False) # psi_y = density_projection(second_graph, by, A, threshold, lmbd_sparsity_list, normalize=False, true_subgraph=true_subgraph) # not normalize, not absolute value, since by is in [0, 1] # iter_proj_time += (time.time() - start_proj_time) # # print('tail projetion time for y: {:.5f}'.format(time.time() - start_proj_time)) # # current_y = np.zeros_like(current_y) # print('1', len(np.nonzero(by)[0])) # print('by nonzero', sorted(list(np.nonzero(by)[0]))) # print('1v', len(np.nonzero(bx)[0])) # print('2', len(psi_y)) # print('psi_y', sorted(list(psi_y))) # print('2v', len(psi_x)) # current_y[list(psi_y)] = by[list(psi_y)] # print('3', len(np.nonzero(current_y)[0])) # print('3v', len(np.nonzero(current_x)[0])) # current_y = normalize(current_y) # print('4', len(np.nonzero(current_y)[0])) # print('4v', len(np.nonzero(current_x)[0])) # # print('{} iteration funval'.format(iter), func.get_obj_val(current_x, current_y)) # # acc_proj_time += iter_proj_time # # if logger: # print('iter proj time: {:.5f}'.format(iter_proj_time)) # # diff_norm = np.sqrt(np.linalg.norm(current_x - prev_x) ** 2 + np.linalg.norm(current_y - prev_y) ** 2) # if logger: # logger.debug('difference norm: {}'.format(diff_norm)) # # if diff_norm < epsilon: # break # run_time = time.time() - start_time if logger: pass return current_x, current_y, run_time
def proj_mp(graph, weight, A, sparsity, lmbd, max_iter=10, epsilon=1e-3): current_x = proj_init_point(graph.number_of_nodes()) + 1e-6 edges = np.array(graph.edges) edge_weights = np.ones(graph.number_of_edges()) start_time = time.time() for i in range(max_iter): print('iter {}'.format(i)) iter_time = time.time() gradient = proj_get_gradient(current_x, weight, A, lmbd) normalized_gradient = normalize_gradient(current_x, gradient) re_head = head_proj(edges=edges, weights=edge_weights, x=normalized_gradient, g=1, s=sparsity, budget=sparsity - 1., delta=1. / 169., err_tol=1e-8, max_iter=100, root=-1, pruning='strong', epsilon=1e-10, verbose=0) re_nodes, _, _ = re_head gamma_x = set(re_nodes) print('gamma_x', len(gamma_x)) # print(sorted(list(gamma_x))) if i == 0: supp_x = set() else: supp_x = set( [ind for ind, _ in enumerate(current_x) if not 0. == _]) omega_x = gamma_x | supp_x print('omega_x', len(omega_x)) # print(sorted(list(omega_x))) # print(gradient[sorted(list(gamma_x))]) # print(gradient[sorted(list(supp_x))]) bx = proj_argmax(current_x, omega_x, weight, A, lmbd, max_iter=2000, learning_rate=0.01) re_tail = tail_proj(edges=edges, weights=edge_weights, x=bx, g=1, s=sparsity, budget=sparsity - 1., nu=2.5, max_iter=100, err_tol=1e-8, root=-1, pruning='strong', verbose=0) re_nodes, _, _ = re_tail psi_x = set(re_nodes) prev_x = current_x current_x = np.zeros_like(current_x) current_x[list(psi_x)] = bx[list(psi_x)] print('psi_x', len(np.nonzero(current_x)[0])) diff_norm_x = np.linalg.norm(current_x - prev_x) func_val, xtw, dense_term = proj_get_func_val(current_x, weight, A, lmbd) print( 'iter {}, func val: {:.5f}, iter_time: {:.5f}, diff_norm: {:.5f}'. format(i, func_val, time.time() - iter_time, diff_norm_x)) subgraph = set(np.nonzero(current_x)[0]) print('subgraph density', nx.density(nx.subgraph(graph, subgraph))) if diff_norm_x <= epsilon: break run_time = time.time() - start_time func_val, xtw, dense_term = proj_get_func_val(current_x, weight, A, lmbd) print('final function value: {:.5f}'.format(func_val)) print('run time of whole algorithm: {:.5f}'.format(run_time)) subgraph = set(np.nonzero(current_x)[0]) return subgraph
def optimize(instance, sparsity, trade_off, max_iter, epsilon=1e-3): graph = instance['graph'] true_subgraph = instance['subgraph'] features = instance['features'] block_node_sets = instance['block_node_sets'] # block id - global node id block_boundary_edges_dict = instance['block_boundary_edges_dict'] # block id - edge set num_nodes = graph.number_of_nodes() num_edges = graph.number_of_edges() num_blocks = len(block_boundary_edges_dict) logger.debug('-' * 5 + ' related info ' + '-' * 5) logger.debug('algorithm: serial graph block-structured matching pursuit') logger.debug('sparsity: {:d}'.format(sparsity)) logger.debug('max iteration: {:d}'.format(max_iter)) logger.debug('number of nodes: {:d}'.format(num_nodes)) logger.debug('number of edges: {:d}'.format(num_edges)) logger.debug('number of blocks: {:d}'.format(num_blocks)) node_id_dict = relabel_nodes(block_node_sets) # relabel nodes of each block with block local index, global node id - block node id relabeled_edge_sets = relabel_edges(graph, block_node_sets, node_id_dict) # relabel edges of each block with block node id logger.debug('-' * 5 + ' start iterating ' + '-' * 5) start_time = time.time() acc_proj_time = 0. func = SerialSumEMS(features=features, block_node_sets=block_node_sets, node_id_dict=node_id_dict, block_boundary_edges_dict=block_boundary_edges_dict, trade_off=trade_off) true_x = np.zeros(num_nodes) true_x[true_subgraph] = 1. true_x = np.array(true_x) true_obj_val, true_sum_ems_val, true_penalty = func.get_obj_val(true_x, block_boundary_edges_dict) logger.debug('ground truth, obj value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}'.format(true_obj_val, true_sum_ems_val, true_penalty)) current_x = func.get_init_x_zeros() + 1e-6 for iter in range(max_iter): logger.debug('iteration: {:d}'.format(iter)) prev_x = np.copy(current_x) iter_time = time.time() iter_proj_time = 0. omega_x_list = [] for b in range(num_blocks): block_x = current_x[sorted(block_node_sets[b])] block_boundary_edge_x_dict = get_boundary_edge_x_dict(current_x, block_boundary_edges_dict[b], node_id_dict) # (block node 1, global node 2) - value of node 2 block_features = features[sorted(block_node_sets[b])] block_grad = func.get_gradient(block_x, block_features, block_boundary_edge_x_dict) block_x = block_x if iter > 0 else np.zeros_like(block_x, dtype=np.float64) normalized_block_grad = normalize_gradient(block_x, block_grad) edges = np.array(relabeled_edge_sets[b]) edge_weights = np.ones(len(edges)) start_proj_time = time.time() re_head = head_proj(edges=edges, weights=edge_weights, x=normalized_block_grad, g=1, s=sparsity, budget=sparsity - 1., delta=1. / 169., max_iter=100, err_tol=1e-8, root=-1, pruning='strong', epsilon=1e-10, verbose=0) re_nodes, _, _ = re_head iter_proj_time += time.time() - start_proj_time block_gamma_x = set(re_nodes) block_supp_x = set([ind for ind, _ in enumerate(block_x) if not 0 == _]) block_omega_x = block_gamma_x | block_supp_x omega_x_list.append(block_omega_x) bx = func.argmax_obj_with_proj_serial(current_x, omega_x_list) for b in range(num_blocks): edges = np.array(relabeled_edge_sets[b]) edge_weights = np.ones(len(edges)) block_bx = bx[sorted(block_node_sets[b])] start_proj_time = time.time() re_tail = tail_proj(edges=edges, weights=edge_weights, x=block_bx, g=1, s=sparsity, budget=sparsity - 1., nu=2.5, max_iter=100, err_tol=1e-8, root=-1, pruning='strong', verbose=0) re_nodes, _, _ = re_tail iter_proj_time += time.time() - start_proj_time psi_x = set(re_nodes) block_x = np.zeros_like(current_x[block_node_sets[b]]) block_x[list(psi_x)] = block_bx[list(psi_x)] current_x[sorted(block_node_sets[b])] = block_x acc_proj_time += iter_proj_time # post process obj_val, sum_ems_val, penalty = func.get_obj_val(current_x, block_boundary_edges_dict) logger.debug('objective value: {:.5f}, global ems value: {:.5f}, penalty: {:.5f}'.format(obj_val, sum_ems_val, penalty)) logger.debug('iteration time: {:.5f}'.format(time.time() - iter_time)) logger.debug('iter projection time: {:.5f}'.format(iter_proj_time)) logger.debug('acc projection time: {:.5f}'.format(acc_proj_time)) # accumulative projection time logger.debug('-' * 10) diff_norm_x = np.linalg.norm(current_x - prev_x) if diff_norm_x < epsilon: # todo, terminate condition, add stop when objective value decreases break run_time = time.time() - start_time obj_val, global_ems_val, penalty = func.get_obj_val(current_x, block_boundary_edges_dict) logger.debug('objective value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}'.format(obj_val, global_ems_val, penalty)) logger.debug('run time of whole algorithm: {:.5f}'.format(run_time)) logger.debug('accumulative projection time: {:.5f}'.format(acc_proj_time)) return current_x
def optimize(instance, sparsity, learning_rate=0.01, max_iter=10, epsilon=1e-3): """ graph-structured gradient hard thresholding pursuit algorithm :param instance: :param sparsity: :param learning_rate: :param max_iter: :param epsilon: :return: """ graph = instance['graph'] # topology of network, networkx object true_subgraph = instance['subgraph'] # list or numpy one dim ndarray edges = np.array(graph.edges) num_nodes = graph.number_of_nodes() num_edges = graph.number_of_edges() edge_weights = np.ones(num_edges) logger.debug('-' * 5 + ' related info ' + '-' * 5) logger.debug('sparsity: {:d}'.format(sparsity)) print(max_iter) logger.debug('max iteration: {:d}'.format(max_iter)) logger.debug('number of nodes: {:d}'.format(num_nodes)) logger.debug('number of edges: {:d}'.format(num_edges)) logger.debug('number of nodes in true_subgraph: {:d}'.format( len(true_subgraph))) logger.debug('-' * 5 + ' start iterating ' + '-' * 5) start_time = time.time() acc_proj_time = 0. func = EMS(features=instance['features'], graph=graph) # current_x = func.get_init_x_random() # note, not stable # current_x = func.get_init_x_maxcc() current_x = func.get_init_x_zeros() + 0.00001 for iter in range(max_iter): logger.debug('iteration: {:d}'.format(iter)) iter_time = time.time() iter_proj_time = 0. grad_x = func.get_gradient(current_x) normalized_grad = normalize_gradient(current_x, grad_x) start_proj_time = time.time() re_head = head_proj(edges=edges, weights=edge_weights, x=normalized_grad, g=1, s=sparsity, budget=sparsity - 1., delta=1. / 169., max_iter=100, err_tol=1e-8, root=-1, pruning='strong', epsilon=1e-10, verbose=0) re_nodes, _, _ = re_head iter_proj_time += time.time() - start_proj_time gamma_x = set(re_nodes) indicator_x = np.zeros(num_nodes) indicator_x[list(gamma_x)] = 1. tmp_x = current_x + learning_rate * normalized_grad * indicator_x omega_x = set([ind for ind, _ in enumerate(tmp_x) if not 0. == _]) bx = func.argmax_obj_with_proj(current_x, omega_x) start_proj_time = time.time() re_tail = tail_proj(edges=edges, weights=edge_weights, x=bx, g=1, s=sparsity, budget=sparsity - 1., nu=2.5, max_iter=100, err_tol=1e-8, root=-1, pruning='strong', verbose=0) re_nodes, _, _ = re_tail iter_proj_time += time.time() - start_proj_time acc_proj_time += iter_proj_time psi_x = set(re_nodes) prev_x = current_x current_x = np.zeros_like(current_x) current_x[list(psi_x)] = bx[list(psi_x)] logger.debug('function value: {:.5f}'.format( func.get_obj_val(current_x))) logger.debug('iteration time: {:.5f}'.format(time.time() - iter_time)) logger.debug('iter projection time: {:.5f}'.format(iter_proj_time)) logger.debug('acc projection time: {:.5f}'.format( acc_proj_time)) # accumulative projection time logger.debug('-' * 10) diff_norm_x = np.linalg.norm(current_x - prev_x) if diff_norm_x < epsilon: break run_time = time.time() - start_time logger.debug('final function value: {:.5f}'.format( func.get_obj_val(current_x))) logger.debug('run time of whole algorithm: {:.5f}'.format(run_time)) logger.debug('accumulative projection time: {:.5f}'.format(acc_proj_time)) return current_x
def optimize(instance, sparsity, trade_off, learning_rate, max_iter, epsilon=1e-3, logger=None): graph = instance['graph'] true_subgraphs = instance['true_subgraphs'] edges = np.array(graph.edges) tag = False start = end = 0 for t, subgraph in enumerate(true_subgraphs): if subgraph and not tag: start = t tag = True if not subgraph and tag: end = t - 1 tag = False num_time_stamps = len(true_subgraphs) num_nodes = graph.number_of_nodes() num_edges = graph.number_of_edges() edge_weights = np.ones(num_edges) if logger: logger.debug('-' * 5 + ' related info ' + '-' * 5) logger.debug('algorithm: graph block-structured IHT') logger.debug('sparsity: {:d}'.format(sparsity)) logger.debug('max iteration: {:d}'.format(max_iter)) logger.debug('number of nodes: {:d}'.format(num_nodes)) logger.debug('number of edges: {:d}'.format(num_edges)) logger.debug('number of time stamps: {:d}'.format(num_time_stamps)) logger.debug('signal interval: [{:d}, {:d}]'.format(start, end)) logger.debug('-' * 5 + ' start iterating ' + '-' * 5) start_time = time.time() acc_proj_time = 0. func = GlobalEMS(features=instance['features'], trade_off=trade_off) true_x_array = [] for true_subgraph in true_subgraphs: true_x = np.zeros(num_nodes) true_x[true_subgraph] = 1. true_x_array.append(true_x) true_x_array = np.array(true_x_array) true_obj_val, true_gloabl_ems_val, true_penalty = func.get_obj_val( true_x_array) if logger: logger.debug( 'ground truth, obj value: {:.5f}, global ems value: {:.5f}, penalty: {:.5f}' .format(true_obj_val, true_gloabl_ems_val, true_penalty)) current_x_array = func.get_init_x_zeros() + 1e-6 # current_x_array = true_x_array # print('start from grount truth') for iter in range(max_iter): if logger: logger.debug('iteration: {:d}'.format(iter)) prev_x_array = np.copy(current_x_array) iter_time = time.time() iter_proj_time = 0. omega_x_list = [] for t in range(num_time_stamps): grad_x = func.get_gradient(current_x_array, t) print(grad_x) current_x = current_x_array[t] if iter > 0 else np.zeros_like( current_x_array[t], dtype=np.float64) normalized_grad = normalize_gradient(current_x, grad_x) start_proj_time = time.time() re_head = head_proj(edges=edges, weights=edge_weights, x=normalized_grad, g=1, s=sparsity, budget=sparsity - 1., delta=1. / 169., max_iter=100, err_tol=1e-8, root=-1, pruning='strong', epsilon=1e-10, verbose=0) re_nodes, _, _ = re_head iter_proj_time += time.time() - start_proj_time omega_x = set(re_nodes) omega_x_list.append(omega_x) print(sorted(omega_x)) # update style 1 # update all blocks simultaneously at each iteration # this style is analogous to gradient descent # pls refer to Andrew Ng' Machine Learning course bx_array = np.zeros_like(current_x_array) # update for t in range(num_time_stamps): indicator_x = np.zeros(num_nodes) indicator_x[list(omega_x_list[t])] = 1. bx_array[t] = current_x_array[t] + learning_rate * func.get_gradient( current_x_array, t ) * indicator_x # since bx as an intermediate variable, we can update gradient simultaneously for t in range(num_time_stamps): bx = bx_array[t] start_proj_time = time.time() re_tail = tail_proj(edges=edges, weights=edge_weights, x=bx, g=1, s=sparsity, budget=sparsity - 1., nu=2.5, max_iter=100, err_tol=1e-8, root=-1, pruning='strong', verbose=0) re_nodes, _, _ = re_tail iter_proj_time += time.time() - start_proj_time psi_x = set(re_nodes) current_x = np.zeros_like(current_x_array[t]) current_x[list(psi_x)] = bx[list(psi_x)] current_x = normalize( current_x) # note, restrict current_x in [0, 1] current_x_array[t] = current_x # print(t, sorted(np.nonzero(current_x))) acc_proj_time += iter_proj_time if logger: obj_val, global_ems_val, penalty = func.get_obj_val( current_x_array) logger.debug( 'objective value: {:.5f}, global ems value: {:.5f}, penalty: {:.5f}' .format(obj_val, global_ems_val, penalty)) logger.debug('iteration time: {:.5f}'.format(time.time() - iter_time)) logger.debug('iter projection time: {:.5f}'.format(iter_proj_time)) logger.debug('acc projection time: {:.5f}'.format( acc_proj_time)) # accumulative projection time logger.debug('-' * 10) diff_norm_x = np.linalg.norm(current_x_array - prev_x_array) if logger: logger.debug('difference norm x: {:.5f}'.format(diff_norm_x)) if diff_norm_x < epsilon: break run_time = time.time() - start_time if logger: obj_val, global_ems_val, penalty = func.get_obj_val(current_x_array) logger.debug( 'objective value: {:.5f}, global ems value: {:.5f}, penalty: {:.5f}' .format(obj_val, global_ems_val, penalty)) logger.debug('run time of whole algorithm: {:.5f}'.format(run_time)) logger.debug( 'accumulative projection time: {:.5f}'.format(acc_proj_time)) return current_x_array, run_time
def optimize(instance, sparsity, threshold, trade_off, learning_rate, max_iter, epsilon=1e-3, logger=None): first_graph = instance['first_graph'] second_graph = instance['second_graph'] # true_subgraph = instance['true_subgraph'] true_subgraph = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] # note features = instance['weight'] first_graph_edges = np.array(first_graph.edges) # second_graph_edges = np.array(second_graph.edges) first_graph_edge_weights = np.ones(first_graph.number_of_edges()) # second_graph_edge_weights = np.ones(second_graph.number_of_edges()) print(first_graph.number_of_nodes()) print(second_graph.number_of_nodes()) if first_graph.number_of_nodes() != second_graph.number_of_nodes(): raise('error, wrong dual network input !!!') num_nodes = first_graph.number_of_nodes() num_edges_first_graph = first_graph.number_of_edges() num_edges_second_graph = second_graph.number_of_edges() if logger: logger.debug('-' * 5 + ' related info ' + '-' * 5) logger.debug('algorithm: graph block-structured GHTP') logger.debug('sparsity: {:d}'.format(sparsity)) logger.debug('max iteration: {:d}'.format(max_iter)) logger.debug('number of nodes: {:d}'.format(num_nodes)) logger.debug('number of edges in first graph: {:d}'.format(num_edges_first_graph)) logger.debug('number of edges in second graph: {:d}'.format(num_edges_second_graph)) logger.debug('density of first graph: {:.5f}'.format(nx.density(first_graph))) logger.debug('density of second graph: {:.5f}'.format(nx.density(second_graph))) logger.debug('-' * 5 + ' start iterating ' + '-' * 5) # calculate shortest path lengths for all node pairs, which will be used in density projection start_time = time.time() shortest_path_lengths = {} for node in second_graph.nodes(): x = nx.shortest_path_length(second_graph, source=node) shortest_path_lengths[node] = x print('shortest_path time: {:.5f}'.format(time.time() - start_time)) start_time = time.time() acc_proj_time = 0. func = DualEMS(features, trade_off) if logger: print(sorted(true_subgraph)) true_x = np.zeros(num_nodes) # print(type(true_subgraph)) true_x[list(true_subgraph)] = 1. true_x = np.array(true_x) true_obj_val, x_ems_val, y_ems_val, penalty = func.get_obj_val(true_x, true_x) print('ground truth values: {}, {}, {}, {}'.format(true_obj_val, x_ems_val, y_ems_val, penalty)) # initialize node coefficients current_x, current_y = func.get_init_x_zeros() current_x += 1e-6 # from not zeros but close to zero, avoid error divided by zero current_y += 1e-6 print('iteration start funval', func.get_obj_val(current_x, current_y)) # start optimization for iter in range(max_iter): if logger: logger.debug('iteration: {:d}'.format(iter)) prev_x, prev_y = np.copy(current_x), np.copy(current_y) # handle first graph grad_x = func.get_gradient(current_x, current_y) iter_proj_time = 0. # iter_time = time.time() if iter == 0: norm_grad_x = normalize_gradient(np.zeros_like(current_x), grad_x) else: norm_grad_x = normalize_gradient(current_x, grad_x) # head projection start_proj_time = time.time() re_head = head_proj(edges=first_graph_edges, weights=first_graph_edge_weights, x=norm_grad_x, g=1, s=sparsity, budget=sparsity - 1., delta=1. / 169., max_iter=100, err_tol=1e-8, root=-1, pruning='strong', epsilon=1e-10, verbose=0) # head projection re_nodes, _, _ = re_head iter_proj_time += (time.time() - start_proj_time) print('head projection time for x: {:.5f}'.format(time.time() - start_proj_time)) gamma_x = set(re_nodes) indicator_x = np.zeros(num_nodes) indicator_x[list(gamma_x)] = 1. if iter == 0: tmp_x = np.zeros_like(current_x) + learning_rate * grad_x * indicator_x # note, not update current variables, only use the intermediate results else: tmp_x = current_x + learning_rate * grad_x * indicator_x omega_x = set([ind for ind, _ in enumerate(tmp_x) if not 0. == _]) # handle second graph grad_y = func.get_gradient(current_y, current_x) # note, test not normalize if iter == 0: norm_grad_y = normalize_gradient(np.zeros_like(current_y), grad_y) else: norm_grad_y = normalize_gradient(current_y, grad_y) # norm_grad_y = grad_y # note, test !!! # note, should positive, eventallpairs algorithm require that node weights should be positive norm_grad_y = np.absolute(norm_grad_y) start = 5 ratio = 5 steps = 8 progression = [start * ratio ** i for i in range(steps)] # generate geometric sequence, enumerate as lambda parameter in eventallpairs algorithm print('lmbd progression', progression) start_proj_time = time.time() print(norm_grad_y) gamma_y = dense_projection(second_graph, norm_grad_y, threshold, progression, shortest_path_lengths, normalize=True, sort=False) # normalize True iter_proj_time += (time.time() - start_proj_time) print('head projection time for y: {:.5f}'.format(time.time() - start_proj_time)) indicator_y = np.zeros(num_nodes) indicator_y[list(gamma_y)] = 1. if iter == 0: tmp_y = np.zeros_like(current_y) + learning_rate * grad_y * indicator_y # note, not update current variables, only use the intermediate results else: tmp_y = current_y + learning_rate * grad_y * indicator_y omega_y = set([ind for ind, _ in enumerate(tmp_y) if not 0. == _]) print('solve argmax') # solve argmax start_max_time = time.time() bx, by = func.argmax_obj_with_proj(current_x, current_y, omega_x, omega_y) print('solve argmax time: {:.5f}'.format(time.time() - start_max_time)) # tail projection for the first graph start_proj_time = time.time() re_tail = tail_proj(edges=first_graph_edges, weights=first_graph_edge_weights, x=bx, g=1, s=sparsity, budget=sparsity - 1., nu=2.5, max_iter=100, err_tol=1e-8, root=-1, pruning='strong', verbose=0) # tail projection re_nodes, _, _ = re_tail iter_proj_time += time.time() - start_proj_time print('tail projection time for x: {:.5f}'.format(time.time() - start_proj_time)) psi_x = set(re_nodes) current_x = np.zeros_like(current_x) current_x[list(psi_x)] = bx[list(psi_x)] current_x = normalize(current_x) # note, constrain current_x in [0, 1], is this step necessary # print(by) # note, by in [0, 1], so we should change lmbd range # start = 5 # ratio = 10 progression = [start * ratio ** i for i in range(steps)] # tail projection for the second graph start_proj_time = time.time() psi_y = dense_projection(second_graph, by, threshold, progression, shortest_path_lengths, normalize=False, sort=False) # note, normalize ??? iter_proj_time += (time.time() - start_proj_time) print('tail projection time for y: {:.5f}'.format(time.time() - start_proj_time)) current_y = np.zeros_like(current_y) # current_y[list(psi_y)] = bx[list(psi_y)] # note, error, but resutls are good current_y[list(psi_y)] = by[list(psi_y)] current_y = normalize(current_y) # constrain current_y in [0, 1] print('{} iteration funval'.format(iter), func.get_obj_val(current_x, current_y)) acc_proj_time += iter_proj_time if logger: print('iter proj time: {:.5f}'.format(iter_proj_time)) diff_norm = np.sqrt(np.linalg.norm(current_x - prev_x) ** 2 + np.linalg.norm(current_y - prev_y) ** 2) if logger: logger.debug('difference norm: {}'.format(diff_norm)) # raw_pred_subgraph_x = np.nonzero(current_x)[0] # # prec, rec, fm, iou = evaluate_block(instance['true_subgraph'], raw_pred_subgraph_x) # # logger.debug('-' * 5 + ' performance of x prediction ' + '-' * 5) # logger.debug('precision: {:.5f}'.format(prec)) # logger.debug('recall : {:.5f}'.format(rec)) # logger.debug('f-measure: {:.5f}'.format(fm)) # logger.debug('iou : {:.5f}'.format(iou)) # # raw_pred_subgraph_y = np.nonzero(current_y)[0] # # prec, rec, fm, iou = evaluate_block(instance['true_subgraph'], raw_pred_subgraph_y) # # logger.debug('-' * 5 + ' performance of y prediction ' + '-' * 5) # logger.debug('precision: {:.5f}'.format(prec)) # logger.debug('recall : {:.5f}'.format(rec)) # logger.debug('f-measure: {:.5f}'.format(fm)) # logger.debug('iou : {:.5f}'.format(iou)) if diff_norm < epsilon: break run_time = time.time() - start_time if logger: pass return current_x, current_y, run_time
def optimize(instance, sparsity, trade_off, learning_rate, max_iter, epsilon=1e-3, logger=None): graph = instance['graph'] true_subgraph = instance['true_subgraph'] features = instance['features'] num_blocks = len(instance['nodes_set']) num_nodes = graph.number_of_nodes() num_edges = graph.number_of_edges() nodes_set = instance['nodes_set'] boundary_edges_dict = instance['block_boundary_edges_dict'] nodes_id_dict = relabel_nodes(nodes_set) relabeled_edges_set = relabel_edges(graph, nodes_set, nodes_id_dict) if logger: logger.debug('-' * 5 + ' related info ' + '-' * 5) logger.debug('algorithm: graph block-structured IHT') logger.debug('sparsity: {:d}'.format(sparsity)) logger.debug('max iteration: {:d}'.format(max_iter)) logger.debug('number of nodes: {:d}'.format(num_nodes)) logger.debug('number of edges: {:d}'.format(num_edges)) logger.debug('number of blocks: {:d}'.format(num_blocks)) logger.debug('-' * 5 + ' start iterating ' + '-' * 5) start_time = time.time() acc_proj_time = 0. func = BlockSumEMS(features=instance['features'], trade_off=trade_off, nodes_set=nodes_set, boundary_edges_dict=boundary_edges_dict, nodes_id_dict=nodes_id_dict) if logger: true_x = np.zeros(num_nodes) true_x[true_subgraph] = 1. true_x = np.array(true_x) true_obj_val, true_ems_val, true_penalty, _ = func.get_obj_val( true_x, boundary_edges_dict) logger.debug( 'ground truth, obj value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}' .format(true_obj_val, true_ems_val, true_penalty)) current_x = func.get_init_x_zeros() + 1e-6 for iter in range(max_iter): if logger: logger.debug('iteration: {:d}'.format(iter)) prev_x = np.copy(current_x) iter_time = time.time() iter_proj_time = 0. bx_array = np.zeros_like(current_x) omega_x_list = [] for b in range(num_blocks): block_x = current_x[sorted(nodes_set[b])] boundary_xs_dict = get_boundary_xs(current_x, boundary_edges_dict[b], nodes_id_dict) feat = features[sorted(nodes_set[b])] grad_x = func.get_gradient(block_x, feat, boundary_xs_dict) block_x = block_x if iter > 0 else np.zeros_like(block_x, dtype=np.float64) normalized_grad = normalize_gradient(block_x, grad_x) edges = np.array(relabeled_edges_set[b]) costs = np.ones(len(edges)) start_proj_time = time.time() # g: number of connected component re_head = head_proj(edges=edges, weights=costs, x=normalized_grad, g=1, s=sparsity, budget=sparsity - 1., delta=1. / 169., max_iter=100, err_tol=1e-8, root=-1, pruning='strong', epsilon=1e-10, verbose=0) re_nodes, re_edges, p_x = re_head iter_proj_time += (time.time() - start_proj_time) gamma_x = set( re_nodes ) # note, elements in block_gamma_x are all local (block) node id # omega_x_list.append(gamma_x) indicator_x = np.zeros(len(block_x)) # note, zeros, not indicator_x[list(gamma_x)] = 1. boundary_xs_dict = get_boundary_xs(current_x, boundary_edges_dict[b], nodes_id_dict) # grad_x = func.get_gradient(block_x, feat, boundary_xs_dict) bx_array[nodes_set[b]] = current_x[ nodes_set[b]] + learning_rate * grad_x * indicator_x for b in range(num_blocks): bx = bx_array[nodes_set[b]] # get edges and edge weights of current block edges = np.array(relabeled_edges_set[b]) costs = np.ones(len(edges)) start_proj_time = time.time() re_tail = tail_proj(edges=edges, weights=costs, x=bx, g=1, s=sparsity, budget=sparsity - 1., nu=2.5, max_iter=100, err_tol=1e-8, root=-1, pruning='strong', verbose=0) re_nodes, re_edges, p_x = re_tail iter_proj_time += (time.time() - start_proj_time) psi_x = set(re_nodes) block_x = np.zeros_like(current_x[nodes_set[b]]) block_x[list(psi_x)] = bx[list(psi_x)] normalized_block_x = normalize(block_x) current_x[nodes_set[ b]] = normalized_block_x # constrain current block x in [0, 1] acc_proj_time += iter_proj_time if logger: obj_val, sum_ems_val, penalty, _ = func.get_obj_val( current_x, boundary_edges_dict) logger.debug( 'objective value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}' .format(obj_val, sum_ems_val, penalty)) logger.debug('iteration time: {:.5f}'.format(time.time() - iter_time)) logger.debug('iter projection time: {:.5f}'.format(iter_proj_time)) logger.debug('acc projection time: {:.5f}'.format( acc_proj_time)) # accumulative projection time logger.debug('-' * 10) diff_norm_x = np.linalg.norm(current_x - prev_x) if logger: logger.debug('difference {:.5f}'.format(diff_norm_x)) if diff_norm_x < epsilon: break run_time = time.time() - start_time if logger: obj_val, sum_ems_val, penalty, _ = func.get_obj_val( current_x, boundary_edges_dict) logger.debug( 'objective value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}'. format(obj_val, sum_ems_val, penalty)) logger.debug('run time of whole algorithm: {:.5f}'.format(run_time)) logger.debug( 'accumulative projection time: {:.5f}'.format(acc_proj_time)) return current_x, run_time
def optimize(instance, sparsity, trade_off, learning_rate, max_iter, epsilon=1e-3, logger=None, tao=None): graph = instance['graph'] # get graph structure true_subgraph = instance['true_subgraph'] # get ground truth features = instance['features'] # edges = np.array(graph.edges()) num_blocks = len(instance['nodes_set']) num_nodes = graph.number_of_nodes() num_edges = graph.number_of_edges() nodes_set = instance['nodes_set'] boundary_edges_dict = instance['block_boundary_edges_dict'] nodes_id_dict = relabel_nodes(nodes_set) relabeled_edges_set = relabel_edges(graph, nodes_set, nodes_id_dict) if logger: logger.debug('-' * 5 + ' related info ' + '-' * 5) logger.debug('algorithm: graph block-structured GHTP') logger.debug('sparsity: {:d}'.format(sparsity)) logger.debug('max iteration: {:d}'.format(max_iter)) logger.debug('number of nodes: {:d}'.format(num_nodes)) logger.debug('number of edges: {:d}'.format(num_edges)) logger.debug('number of blocks: {:d}'.format(num_blocks)) logger.debug('-' * 5 + ' start iterating ' + '-' * 5) start_time = time.time() acc_proj_time = 0. func = ParallelSumEMS(features=instance['features'], trade_off=trade_off, nodes_set=nodes_set, boundary_edges_dict=boundary_edges_dict, nodes_id_dict=nodes_id_dict, tao=tao) if logger: true_x = np.zeros(num_nodes) true_x[true_subgraph] = 1. true_x = np.array(true_x) true_obj_val, true_ems_val, true_penalty, _ = func.get_obj_val( true_x, boundary_edges_dict) logger.debug( 'ground truth, obj value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}' .format(true_obj_val, true_ems_val, true_penalty)) current_x = func.get_init_x_zeros() + 1e-6 for iter in range(max_iter): if logger: logger.debug('iteration: {:d}'.format(iter)) prev_x = np.copy(current_x) iter_time = time.time() iter_proj_time = 0. omega_x_list = [] for b in range(num_blocks): block_x = current_x[sorted(nodes_set[b])] boundary_xs_dict = get_boundary_xs( current_x, boundary_edges_dict[b], nodes_id_dict ) # key is boundary edge, value is adjacent x in other blocks feat = features[sorted(nodes_set[b])] grad_x = func.get_gradient(block_x, feat, boundary_xs_dict) block_x = block_x if iter > 0 else np.zeros_like(block_x, dtype=np.float64) normalized_grad = normalize_gradient(block_x, grad_x) edges = np.array(relabeled_edges_set[b]) costs = np.ones(len(edges)) start_proj_time = time.time() # g: number of connected component re_head = head_proj(edges=edges, weights=costs, x=normalized_grad, g=1, s=sparsity, budget=sparsity - 1., delta=1. / 169., max_iter=100, err_tol=1e-8, root=-1, pruning='strong', epsilon=1e-10, verbose=0) re_nodes, re_edges, p_x = re_head iter_proj_time += (time.time() - start_proj_time) gamma_x = set(re_nodes) indicator_x = np.zeros(len(block_x)) indicator_x[list(gamma_x)] = 1. tmp_x = block_x + learning_rate * grad_x * indicator_x # note, not update current variables, only use the intermediate results omega_x = set([ind for ind, _ in enumerate(tmp_x) if not 0. == _]) omega_x_list.append(omega_x) bx_array = func.argmax_obj_with_proj_parallel( current_x, omega_x_list) # solve argmax problem with block coordinate ascent for b in range(num_blocks): bx = bx_array[nodes_set[b]] # get edges and edge weights of current block edges = np.array(relabeled_edges_set[b]) costs = np.ones(len(edges)) start_proj_time = time.time() re_tail = tail_proj(edges=edges, weights=costs, x=bx, g=1, s=sparsity, budget=sparsity - 1., nu=2.5, max_iter=100, err_tol=1e-8, root=-1, pruning='strong', verbose=0) # tail projection re_nodes, re_edges, p_x = re_tail iter_proj_time += (time.time() - start_proj_time) psi_x = set(re_nodes) block_x = np.zeros_like(current_x[nodes_set[b]]) block_x[list(psi_x)] = bx[list(psi_x)] current_x[nodes_set[b]] = block_x acc_proj_time += iter_proj_time if logger: obj_val, sum_ems_val, penalty, _ = func.get_obj_val( current_x, boundary_edges_dict) logger.debug( 'objective value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}' .format(obj_val, sum_ems_val, penalty)) logger.debug('iteration time: {:.5f}'.format(time.time() - iter_time)) logger.debug('iter projection time: {:.5f}'.format(iter_proj_time)) logger.debug('acc projection time: {:.5f}'.format( acc_proj_time)) # accumulative projection time logger.debug('-' * 10) diff_norm_x = np.linalg.norm(current_x - prev_x) if logger: logger.debug('difference norm x: {:.5f}'.format(diff_norm_x)) if diff_norm_x < epsilon: break run_time = time.time() - start_time if logger: obj_val, sum_ems_val, penalty, _ = func.get_obj_val( current_x, boundary_edges_dict) logger.debug( 'objective value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}'. format(obj_val, sum_ems_val, penalty)) logger.debug('run time of whole algorithm: {:.5f}'.format(run_time)) logger.debug( 'accumulative projection time: {:.5f}'.format(acc_proj_time)) return current_x, run_time
def optimize(instance, sparsity, trade_off, learning_rate, max_iter, epsilon): graph = instance['graph'] true_subgraph = instance['subgraph'] features = instance['features'] block_node_sets = instance['block_node_sets'] block_boundary_edges_dict = instance['block_boundary_edges_dict'] num_nodes = graph.number_of_nodes() num_edges = graph.number_of_edges() num_blocks = len(block_boundary_edges_dict) logger.debug('-' * 5 + ' related info ' + '-' * 5) logger.debug('algorithm: graph block-structured IHT') logger.debug('sparsity: {:d}'.format(sparsity)) logger.debug('max iteration: {:d}'.format(max_iter)) logger.debug('number of nodes: {:d}'.format(num_nodes)) logger.debug('number of edges: {:d}'.format(num_edges)) logger.debug('number of blocks: {:d}'.format(num_blocks)) node_id_dict = relabel_nodes(block_node_sets) relabeled_edge_sets = relabel_edges(graph, block_node_sets, node_id_dict) logger.debug('-' * 5 + ' start iterating ' + '-' * 5) start_time = time.time() acc_proj_time = 0. func = BlockSumEMS(features=features, nodes_set=block_node_sets, boundary_edges_dict=block_boundary_edges_dict, nodes_id_dict=node_id_dict, trade_off=trade_off) true_x = np.zeros(num_nodes) true_x[true_subgraph] = 1. true_x = np.array(true_x) true_obj_val, true_sum_ems_val, true_penalty = func.get_obj_val(true_x) logger.debug( 'ground truth, obj value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}' .format(true_obj_val, true_sum_ems_val, true_penalty)) current_x = func.get_init_x_zeros() + 1e-6 for iter in range(max_iter): # cyclic block coordinate version for b in range(num_blocks): logger.debug('iteration: {:d}'.format(iter)) prev_x = np.copy(current_x) iter_time = time.time() iter_proj_time = 0. sorted_node_set = sorted(block_node_sets[b]) block_x = current_x[sorted_node_set] block_boundary_edge_x_dict = get_boundary_xs( current_x, block_boundary_edges_dict[b], node_id_dict) block_features = features[sorted_node_set] block_grad = func.get_gradient(block_x, block_features, block_boundary_edge_x_dict) block_x = block_x if iter > 0 else np.zeros_like(block_x, dtype=np.float64) normalized_block_grad = normalize_gradient(block_x, block_grad) edges = np.array(relabeled_edge_sets[b]) edge_weights = np.ones(len(edges)) start_proj_time = time.time() re_head = head_proj(edges=edges, weights=edge_weights, x=normalized_block_grad, g=1, s=sparsity, budget=sparsity - 1., delta=1. / 169., max_iter=100, err_tol=1e-8, root=-1, pruning='strong', epsilon=1e-10, verbose=0) re_nodes, _, _ = re_head iter_proj_time += time.time() - start_proj_time block_omega_x = set( re_nodes ) # note, elements in block_omega_x are all local (block) node id # update one block block_x = current_x[sorted_node_set] block_features = features[sorted_node_set] block_indicator_x = np.zeros_like(current_x[sorted_node_set]) block_indicator_x[list(block_omega_x)] = 1. block_boundary_edge_x_dict = get_boundary_xs( current_x, block_boundary_edges_dict[b], node_id_dict) current_x[sorted_node_set] = current_x[ sorted_node_set] + learning_rate * func.get_gradient( block_x, block_features, block_boundary_edge_x_dict) * block_indicator_x block_bx = current_x[sorted_node_set] start_proj_time = time.time() re_tail = tail_proj(edges=edges, weights=edge_weights, x=block_bx, g=1, s=sparsity, budget=sparsity - 1., nu=2.5, max_iter=100, err_tol=1e-8, root=-1, pruning='strong', verbose=0) re_nodes, _, _ = re_tail iter_proj_time += time.time() - start_proj_time psi_x = set(re_nodes) block_x = np.zeros_like(current_x[sorted_node_set]) block_x[list(psi_x)] = block_bx[list(psi_x)] normalized_block_x = normalize(block_x) current_x[ sorted_node_set] = normalized_block_x # constrain current block x in [0, 1] acc_proj_time += iter_proj_time obj_val, sum_ems_val, penalty = func.get_obj_val(current_x) logger.debug( 'objective value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}' .format(obj_val, sum_ems_val, penalty)) logger.debug('iteration time: {:.5f}'.format(time.time() - iter_time)) logger.debug('iter projection time: {:.5f}'.format(iter_proj_time)) logger.debug('acc projection time: {:.5f}'.format( acc_proj_time)) # accumulative projection time pred_subgraph = np.nonzero(current_x)[0] prec, rec, fm, iou = evaluate_block(instance['subgraph'], pred_subgraph) logger.debug( 'precision: {:.5f}, recall: {:.5f}, f-measure: {:.5f}'.format( prec, rec, fm)) logger.debug('-' * 10) diff_norm_x = np.linalg.norm(current_x - prev_x) if diff_norm_x < epsilon: print('difference {:.5f}'.format(diff_norm_x)) break run_time = time.time() - start_time obj_val, sum_ems_val, penalty = func.get_obj_val(current_x) logger.debug( 'objective value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}'. format(obj_val, sum_ems_val, penalty)) logger.debug('run time of whole algorithm: {:.5f}'.format(run_time)) logger.debug('accumulative projection time: {:.5f}'.format(acc_proj_time)) return current_x
def optimize(instance, sparsity, trade_off, learning_rate, max_iter, epsilon=1e-3, logger=None): graph = instance['graph'] true_subgraph = instance['true_subgraph'] features = instance['features'] num_blocks = len(instance['nodes_set']) num_nodes = graph.number_of_nodes() num_edges = graph.number_of_edges() nodes_set = instance['nodes_set'] boundary_edges_dict = instance['block_boundary_edges_dict'] nodes_id_dict = relabel_nodes(nodes_set) relabeled_edges_set = relabel_edges(graph, nodes_set, nodes_id_dict) if logger: logger.debug('-' * 5 + ' related info ' + '-' * 5) logger.debug('algorithm: graph block-structured IHT') logger.debug('sparsity: {:d}'.format(sparsity)) logger.debug('max iteration: {:d}'.format(max_iter)) logger.debug('number of nodes: {:d}'.format(num_nodes)) logger.debug('number of edges: {:d}'.format(num_edges)) logger.debug('number of blocks: {:d}'.format(num_blocks)) logger.debug('-' * 5 + ' start iterating ' + '-' * 5) start_time = time.time() acc_proj_time = 0. func = BlockSumEMS(features=instance['features'], trade_off=trade_off, nodes_set=nodes_set, boundary_edges_dict=boundary_edges_dict, nodes_id_dict=nodes_id_dict) if logger: true_x = np.zeros(num_nodes) true_x[true_subgraph] = 1. true_x = np.array(true_x) true_obj_val, true_ems_val, true_penalty, _ = func.get_obj_val( true_x, boundary_edges_dict) logger.debug( 'ground truth, obj value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}' .format(true_obj_val, true_ems_val, true_penalty)) current_x = func.get_init_x_zeros() + 1e-6 for iter in range(max_iter): if logger: logger.debug('iteration: {:d}'.format(iter)) prev_x = np.copy(current_x) iter_time = time.time() iter_proj_time = 0. omega_x_list = [] for b in range(num_blocks): block_x = current_x[sorted(nodes_set[b])] boundary_xs_dict = get_boundary_xs(current_x, boundary_edges_dict[b], nodes_id_dict) feat = features[sorted(nodes_set[b])] grad_x = func.get_gradient(block_x, feat, boundary_xs_dict) block_x = block_x if iter > 0 else np.zeros_like(block_x, dtype=np.float64) normalized_grad = normalize_gradient(block_x, grad_x) edges = np.array(relabeled_edges_set[b]) costs = np.ones(len(edges)) start_proj_time = time.time() # g: number of connected component re_head = head_proj(edges=edges, weights=costs, x=normalized_grad, g=1, s=sparsity, budget=sparsity - 1., delta=1. / 169., max_iter=100, err_tol=1e-8, root=-1, pruning='strong', epsilon=1e-10, verbose=0) re_nodes, re_edges, p_x = re_head iter_proj_time += (time.time() - start_proj_time) gamma_x = set( re_nodes ) # note, elements in block_gamma_x are all local (block) node id omega_x_list.append(gamma_x) # update bx_array = np.zeros_like(current_x) for b in range(num_blocks): block_x = current_x[nodes_set[b]] feat = features[sorted(nodes_set[b])] indicator_x = np.zeros(len(block_x)) # note, zeros, not indicator_x[list(omega_x_list[b])] = 1. boundary_xs_dict = get_boundary_xs(current_x, boundary_edges_dict[b], nodes_id_dict) grad_x = func.get_gradient(block_x, feat, boundary_xs_dict) bx_array[nodes_set[b]] = current_x[ nodes_set[b]] + learning_rate * grad_x * indicator_x for b in range(num_blocks): bx = bx_array[nodes_set[b]] # get edges and edge weights of current block edges = np.array(relabeled_edges_set[b]) costs = np.ones(len(edges)) start_proj_time = time.time() re_tail = tail_proj(edges=edges, weights=costs, x=bx, g=1, s=sparsity, budget=sparsity - 1., nu=2.5, max_iter=100, err_tol=1e-8, root=-1, pruning='strong', verbose=0) re_nodes, re_edges, p_x = re_tail iter_proj_time += (time.time() - start_proj_time) psi_x = set(re_nodes) block_x = np.zeros_like(current_x[nodes_set[b]]) block_x[list(psi_x)] = bx[list(psi_x)] normalized_block_x = normalize(block_x) current_x[nodes_set[ b]] = normalized_block_x # constrain current block x in [0, 1] acc_proj_time += iter_proj_time if logger: obj_val, sum_ems_val, penalty, _ = func.get_obj_val( current_x, boundary_edges_dict) logger.debug( 'objective value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}' .format(obj_val, sum_ems_val, penalty)) logger.debug('iteration time: {:.5f}'.format(time.time() - iter_time)) logger.debug('iter projection time: {:.5f}'.format(iter_proj_time)) logger.debug('acc projection time: {:.5f}'.format( acc_proj_time)) # accumulative projection time logger.debug('-' * 10) diff_norm_x = np.linalg.norm(current_x - prev_x) if logger: logger.debug('difference {:.5f}'.format(diff_norm_x)) if diff_norm_x < epsilon: break run_time = time.time() - start_time if logger: obj_val, sum_ems_val, penalty, _ = func.get_obj_val( current_x, boundary_edges_dict) logger.debug( 'objective value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}'. format(obj_val, sum_ems_val, penalty)) logger.debug('run time of whole algorithm: {:.5f}'.format(run_time)) logger.debug( 'accumulative projection time: {:.5f}'.format(acc_proj_time)) return current_x, run_time # def run_instance(instance, sparsity, trade_off, learning_rate, max_iter=10000, epsilon=1e-3): # # opt_x = optimize(instance, sparsity, trade_off, learning_rate, max_iter, epsilon) # # pred_subgraph = np.nonzero(opt_x)[0] # # prec, rec, fm, iou = evaluate_block(instance['subgraph'], pred_subgraph) # # logger.debug('-' * 5 + ' raw performance ' + '-' * 5) # logger.debug('precision: {:.5f}'.format(prec)) # logger.debug('recall : {:.5f}'.format(rec)) # logger.debug('f-measure: {:.5f}'.format(fm)) # logger.debug('iou : {:.5f}'.format(iou)) # # refined_pred_subgraph = post_process_block(instance['graph'], pred_subgraph) # prec, rec, fm, iou = evaluate_block(instance['subgraph'], refined_pred_subgraph) # logger.debug('-' * 5 + ' refined performance ' + '-' * 5) # logger.debug('precision: {:.5f}'.format(prec)) # logger.debug('recall : {:.5f}'.format(rec)) # logger.debug('f-measure: {:.5f}'.format(fm)) # logger.debug('iou : {:.5f}'.format(iou)) # # if __name__ == '__main__': # # path = '/network/rit/lab/ceashpc/share_data/GraphOpt/ijcai/app2/CondMat' # fn = 'test_9.pkl' # # rfn = os.path.join(path, fn) # with open(rfn, 'rb') as rfile: # dataset = pickle.load(rfile) # # instance = dataset[0] # sparsity = 534 # trade_off = 0.0001 # learning_rate = 1. # run_instance(instance, sparsity, trade_off, learning_rate)
def optimize(instance, sparsity, trade_off, learning_rate, max_iter, epsilon=1e-3, logger=None): graph = instance['graph'] # get graph structure true_subgraphs = instance['true_subgraphs'] # get ground truth edges = np.array(graph.edges) # decide interval tag = False start = end = 0 for t, subgraph in enumerate(true_subgraphs): if subgraph and not tag: start = t tag = True if subgraph and tag: end = t num_time_stamps = len(true_subgraphs) num_nodes = graph.number_of_nodes() num_edges = graph.number_of_edges() edge_weights = np.ones(num_edges) if logger: logger.debug('-' * 5 + ' related info ' + '-' * 5) logger.debug('algorithm: graph block-structured GHTP') logger.debug('sparsity: {:d}'.format(sparsity)) logger.debug('max iteration: {:d}'.format(max_iter)) logger.debug('number of nodes: {:d}'.format(num_nodes)) logger.debug('number of edges: {:d}'.format(num_edges)) logger.debug('number of time stamps: {:d}'.format(num_time_stamps)) logger.debug('signal interval: [{:d}, {:d}]'.format(start, end)) logger.debug('-' * 5 + ' start iterating ' + '-' * 5) start_time = time.time() acc_proj_time = 0. func = GlobalEMS(features=instance['features'], trade_off=trade_off, learning_rate=1., max_iter=1) if logger: true_x_array = [] for true_subgraph in true_subgraphs: true_x = np.zeros(num_nodes) true_x[true_subgraph] = 1. true_x_array.append(true_x) true_x_array = np.array(true_x_array) true_obj_val, true_gloabl_ems_val, true_penalty = func.get_obj_val( true_x_array) logger.debug( 'ground truth, obj value: {:.5f}, global ems value: {:.5f}, penalty: {:.5f}' .format(true_obj_val, true_gloabl_ems_val, true_penalty)) # initialization current_x_array = func.get_init_x_zeros() + 1e-6 # print('start from ground truth') # current_x_array = true_x_array for iter in range(max_iter): if logger: logger.debug('iteration: {:d}'.format(iter)) prev_x_array = np.copy(current_x_array) iter_time = time.time() iter_proj_time = 0. omega_x_list = [] # get head projection set on each block for t in range(num_time_stamps): grad_x = func.get_gradient( current_x_array, t) # calculate partial gradient of current block print(grad_x) current_x = current_x_array[t] if iter > 0 else np.zeros_like( current_x_array[t], dtype=np.float64) # start from all zeros when iter == 0 normalized_grad = normalize_gradient( current_x, grad_x ) # make gradient invalid when it will make updated result go beyond [0, 1] start_proj_time = time.time() re_head = head_proj(edges=edges, weights=edge_weights, x=normalized_grad, g=1, s=sparsity, budget=sparsity - 1., delta=1. / 169., max_iter=100, err_tol=1e-8, root=-1, pruning='strong', epsilon=1e-10, verbose=0) # head projection re_nodes, _, _ = re_head iter_proj_time += time.time() - start_proj_time gamma_x = set(re_nodes) indicator_x = np.zeros(num_nodes) indicator_x[list(gamma_x)] = 1. # or normalized_grad tmp_x = current_x + learning_rate * grad_x * indicator_x # note, not update current variables, only use the intermediate results omega_x = set([ind for ind, _ in enumerate(tmp_x) if not 0. == _]) omega_x_list.append(omega_x) print(sorted(omega_x)) bx_array = func.argmax_obj_with_proj( current_x_array, omega_x_list) # solve argmax problem with block coordinate ascent for t in range(num_time_stamps): bx = bx_array[t] start_proj_time = time.time() re_tail = tail_proj(edges=edges, weights=edge_weights, x=bx, g=1, s=sparsity, budget=sparsity - 1., nu=2.5, max_iter=100, err_tol=1e-8, root=-1, pruning='strong', verbose=0) # tail projection re_nodes, _, _ = re_tail iter_proj_time += time.time() - start_proj_time psi_x = set(re_nodes) current_x = np.zeros_like(current_x_array[t]) current_x[list(psi_x)] = bx[list(psi_x)] current_x = normalize(current_x) # constrain current_x in [0, 1] current_x_array[t] = current_x acc_proj_time += iter_proj_time if logger: obj_val, global_ems_val, penalty = func.get_obj_val( current_x_array) logger.debug( 'objective value: {:.5f}, global ems value: {:.5f}, penalty: {:.5f}' .format(obj_val, global_ems_val, penalty)) logger.debug('iteration time: {:.5f}'.format(time.time() - iter_time)) logger.debug('iter projection time: {:.5f}'.format(iter_proj_time)) logger.debug('acc projection time: {:.5f}'.format( acc_proj_time)) # accumulative projection time logger.debug('-' * 10) diff_norm_x = np.linalg.norm(current_x_array - prev_x_array) if logger: logger.debug('difference norm x: {:.5f}'.format(diff_norm_x)) if diff_norm_x < epsilon: break run_time = time.time() - start_time if logger: obj_val, global_ems_val, penalty = func.get_obj_val(current_x_array) logger.debug( 'objective value: {:.5f}, global ems value: {:.5f}, penalty: {:.5f}' .format(obj_val, global_ems_val, penalty)) logger.debug('run time of whole algorithm: {:.5f}'.format(run_time)) logger.debug( 'accumulative projection time: {:.5f}'.format(acc_proj_time)) return current_x_array, run_time
def optimize(instance, sparsity, trade_off, learning_rate, max_iter, epsilon=1e-3): graph = instance['graph'] true_subgraphs = instance['subgraphs'] edges = np.array(graph.edges) tag = False start = end = 0 for t, subgraph in enumerate(true_subgraphs): if subgraph and not tag: start = t tag = True if subgraph and tag: end = t num_time_stamps = len(true_subgraphs) num_nodes = graph.number_of_nodes() num_edges = graph.number_of_edges() edge_weights = np.ones(num_edges) logger.debug('-' * 5 + ' related info ' + '-' * 5) logger.debug('algorithm: graph block-structured IHT') logger.debug('sparsity: {:d}'.format(sparsity)) logger.debug('max iteration: {:d}'.format(max_iter)) logger.debug('number of nodes: {:d}'.format(num_nodes)) logger.debug('number of edges: {:d}'.format(num_edges)) logger.debug('number of time stamps: {:d}'.format(num_time_stamps)) logger.debug('signal interval: [{:d}, {:d}]'.format(start, end)) logger.debug('-' * 5 + ' start iterating ' + '-' * 5) start_time = time.time() acc_proj_time = 0. func = GlobalEMS(features=instance['features'], trade_off=trade_off) true_x_array = [] for true_subgraph in true_subgraphs: true_x = np.zeros(num_nodes) true_x[true_subgraph] = 1. true_x_array.append(true_x) true_x_array = np.array(true_x_array) true_obj_val, true_gloabl_ems_val, true_penalty = func.get_obj_val(true_x_array) logger.debug('ground truth, obj value: {:.5f}, global ems value: {:.5f}, penalty: {:.5f}'.format(true_obj_val, true_gloabl_ems_val, true_penalty)) current_x_array = func.get_init_x_zeros() + 1e-6 # initialization, variables are all from zeros, to avoid "divided by zero" error, by plus a small amount for iter in range(max_iter): tag = False # update style 2 # update each block cyclically for t in range(num_time_stamps): logger.debug('iteration: {:d}, time stamps: {:d}'.format(iter, t)) prev_x_array = np.copy(current_x_array) iter_time = time.time() iter_proj_time = 0. grad_x = func.get_gradient(current_x_array, t) current_x = current_x_array[t] if iter > 0 else np.zeros_like(current_x_array[t], dtype=np.float64) normalized_grad = normalize_gradient(current_x, grad_x) start_proj_time = time.time() re_head = head_proj(edges=edges, weights=edge_weights, x=normalized_grad, g=1, s=sparsity, budget=sparsity - 1., delta=1. / 169., max_iter=100, err_tol=1e-8, root=-1, pruning='strong', epsilon=1e-10, verbose=0) re_nodes, _, _ = re_head iter_proj_time += time.time() - start_proj_time omega_x = set(re_nodes) indicator_x = np.zeros(num_nodes) indicator_x[list(omega_x)] = 1. # bx as intermediate variables stores updated block variables bx = current_x_array[t] + learning_rate * func.get_gradient(current_x_array, t) * indicator_x # tail projection start_proj_time = time.time() re_tail = tail_proj(edges=edges, weights=edge_weights, x=bx, g=1, s=sparsity, budget=sparsity - 1., nu=2.5, max_iter=100, err_tol=1e-8, root=-1, pruning='strong', verbose=0) re_nodes, _, _ = re_tail iter_proj_time += time.time() - start_proj_time psi_x = set(re_nodes) current_x = np.zeros_like(current_x_array[t]) current_x[list(psi_x)] = bx[list(psi_x)] current_x = normalize(current_x) # note, restrict current_x in [0, 1] current_x_array[t] = current_x # update current variables, will affect other gradient of blocks acc_proj_time += iter_proj_time obj_val, global_ems_val, penalty = func.get_obj_val(current_x_array) logger.debug('objective value: {:.5f}, global ems value: {:.5f}, penalty: {:.5f}'.format(obj_val, global_ems_val, penalty)) logger.debug('iteration time: {:.5f}'.format(time.time() - iter_time)) logger.debug('iter projection time: {:.5f}'.format(iter_proj_time)) logger.debug('acc projection time: {:.5f}'.format(acc_proj_time)) # accumulative projection time logger.debug('-' * 10) diff_norm_x = np.linalg.norm(current_x_array - prev_x_array) logger.debug('difference norm x: {:.5f}'.format(diff_norm_x)) if diff_norm_x < epsilon: # tag = True break # if tag: # break run_time = time.time() - start_time obj_val, global_ems_val, penalty = func.get_obj_val(current_x_array) logger.debug('objective value: {:.5f}, global ems value: {:.5f}, penalty: {:.5f}'.format(obj_val, global_ems_val, penalty)) logger.debug('run time of whole algorithm: {:.5f}'.format(run_time)) logger.debug('accumulative projection time: {:.5f}'.format(acc_proj_time)) return current_x_array
def optimize(instance, sparsity, trade_off, max_iter=10, epsilon=1e-3): graph = instance['graph'] true_subgraphs = instance['subgraphs'] edges = np.array(graph.edges) tag = False start = end = 0 for t, subgraph in enumerate(true_subgraphs): if subgraph and not tag: start = t tag = True if not subgraph and tag: end = t - 1 tag = False num_time_stamps = len(true_subgraphs) num_nodes = graph.number_of_nodes() num_edges = graph.number_of_edges() edge_weights = np.ones(num_edges) logger.debug('-' * 5 + ' related info ' + '-' * 5) logger.debug('algorithm: graph block-structured matching pursuit') logger.debug('sparsity: {:d}'.format(sparsity)) logger.debug('max iteration: {:d}'.format(max_iter)) logger.debug('number of nodes: {:d}'.format(num_nodes)) logger.debug('number of edges: {:d}'.format(num_edges)) logger.debug('number of time stamps: {:d}'.format(num_time_stamps)) logger.debug('signal interval: [{:d}, {:d}]'.format(start, end)) logger.debug('-' * 5 + ' start iterating ' + '-' * 5) start_time = time.time() acc_proj_time = 0. func = GlobalEMS(features=instance['features'], trade_off=trade_off) true_x_array = [] for true_subgraph in true_subgraphs: true_x = np.zeros(num_nodes) true_x[true_subgraph] = 1. true_x_array.append(true_x) true_x_array = np.array(true_x_array) true_obj_val, true_global_ems_val, true_penalty = func.get_obj_val( true_x_array) logger.debug( 'ground truth, obj value: {:.5f}, global ems value: {:.5f}, penalty: {:.5f}' .format(true_obj_val, true_global_ems_val, true_penalty)) # current_x_array = func.get_init_x_random() current_x_array = func.get_init_x_zeros() + 1e-6 for iter in range(max_iter): logger.debug('iteration: {:d}'.format(iter)) prev_x_array = np.copy(current_x_array) iter_time = time.time() iter_proj_time = 0. omega_x_list = [] for t in range(num_time_stamps): grad_x = func.get_gradient(current_x_array, t) current_x = current_x_array[t] if iter > 0 else np.zeros_like( current_x_array[t], dtype=np.float64) normalized_grad = normalize_gradient(current_x, grad_x) start_proj_time = time.time() re_head = head_proj(edges=edges, weights=edge_weights, x=normalized_grad, g=1, s=sparsity, budget=sparsity - 1., delta=1. / 169., max_iter=100, err_tol=1e-8, root=-1, pruning='strong', epsilon=1e-10, verbose=0) re_nodes, _, _ = re_head iter_proj_time += time.time() - start_proj_time gamma_x = set(re_nodes) supp_x = set( [ind for ind, _ in enumerate(current_x) if not 0. == _]) omega_x = gamma_x | supp_x omega_x_list.append(omega_x) bx_array = func.argmax_obj_with_proj(current_x_array, omega_x_list) for t in range(num_time_stamps): bx = bx_array[t] start_proj_time = time.time() re_tail = tail_proj(edges=edges, weights=edge_weights, x=bx, g=1, s=sparsity, budget=sparsity - 1., nu=2.5, max_iter=100, err_tol=1e-8, root=-1, pruning='strong', verbose=0) re_nodes, _, _ = re_tail iter_proj_time += time.time() - start_proj_time psi_x = set(re_nodes) current_x = np.zeros_like(current_x_array[t]) current_x[list(psi_x)] = bx[list(psi_x)] current_x_array[t] = current_x print(t, sorted(np.nonzero(current_x))) acc_proj_time += iter_proj_time # post process obj_val, global_ems_val, penalty = func.get_obj_val(current_x_array) logger.debug( 'objective value: {:.5f}, global ems value: {:.5f}, penalty: {:.5f}' .format(obj_val, global_ems_val, penalty)) logger.debug('iteration time: {:.5f}'.format(time.time() - iter_time)) logger.debug('iter projection time: {:.5f}'.format(iter_proj_time)) logger.debug('acc projection time: {:.5f}'.format( acc_proj_time)) # accumulative projection time logger.debug('-' * 10) diff_norm_x = np.linalg.norm(current_x_array - prev_x_array) if diff_norm_x < epsilon: break run_time = time.time() - start_time obj_val, global_ems_val, penalty = func.get_obj_val(current_x_array) logger.debug( 'objective value: {:.5f}, global ems value: {:.5f}, penalty: {:.5f}'. format(obj_val, global_ems_val, penalty)) logger.debug('run time of whole algorithm: {:.5f}'.format(run_time)) logger.debug('accumulative projection time: {:.5f}'.format(acc_proj_time)) return current_x_array
def optimize(instance, sparsity, threshold, trade_off, learning_rate, max_iter, epsilon=1e-3, logger=None): first_graph = instance['first_graph'] second_graph = instance['second_graph'] true_subgraph = instance['true_subgraph'] # true_subgraph = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] # note features = instance['weight'] first_graph_edges = np.array(first_graph.edges) # second_graph_edges = np.array(second_graph.edges) first_graph_edge_weights = np.ones(first_graph.number_of_edges()) # second_graph_edge_weights = np.ones(second_graph.number_of_edges()) print(first_graph.number_of_nodes()) print(second_graph.number_of_nodes()) if first_graph.number_of_nodes() != second_graph.number_of_nodes(): raise ('error, wrong dual network input !!!') num_nodes = first_graph.number_of_nodes() num_edges_first_graph = first_graph.number_of_edges() num_edges_second_graph = second_graph.number_of_edges() if logger: logger.debug('-' * 5 + ' related info ' + '-' * 5) logger.debug('algorithm: graph block-structured GHTP') logger.debug('sparsity: {:d}'.format(sparsity)) logger.debug('max iteration: {:d}'.format(max_iter)) logger.debug('number of nodes: {:d}'.format(num_nodes)) logger.debug('number of edges in first graph: {:d}'.format( num_edges_first_graph)) logger.debug('number of edges in second graph: {:d}'.format( num_edges_second_graph)) logger.debug('density of first graph: {:.5f}'.format( nx.density(first_graph))) logger.debug('density of second graph: {:.5f}'.format( nx.density(second_graph))) logger.debug('-' * 5 + ' start iterating ' + '-' * 5) weight = instance['weight'] lcc = max(nx.connected_component_subgraphs(second_graph), key=len) tmp = 0 start_node = 0 for node in lcc: if weight[node] > tmp: tmp = weight[node] start_node = node lcc_diameter = 9 start_time = time.time() acc_proj_time = 0. func = DualEMS(features, trade_off) if logger: print(sorted(true_subgraph)) true_x = np.zeros(num_nodes) # print(type(true_subgraph)) true_x[list(true_subgraph)] = 1. true_x = np.array(true_x) true_obj_val, x_ems_val, y_ems_val, penalty = func.get_obj_val( true_x, true_x) print('ground truth values: {}, {}, {}, {}'.format( true_obj_val, x_ems_val, y_ems_val, penalty)) current_x, current_y = func.get_init_x_zeros() current_x += 1e-6 # from not zeros but close to zero current_y += 1e-6 print('iteration start funval', func.get_obj_val(current_x, current_y)) for iter in range(max_iter): if logger: logger.debug('iteration: {:d}'.format(iter)) prev_x, prev_y = np.copy(current_x), np.copy(current_y) # handle first graph grad_x = func.get_gradient(current_x, current_y) iter_proj_time = 0. # iter_time = time.time() if iter == 0: norm_grad_x = normalize_gradient( np.zeros_like(current_x), grad_x) # default, start from all zero else: norm_grad_x = normalize_gradient(current_x, grad_x) start_proj_time = time.time() re_head = head_proj(edges=first_graph_edges, weights=first_graph_edge_weights, x=norm_grad_x, g=1, s=sparsity, budget=sparsity - 1., delta=1. / 169., max_iter=100, err_tol=1e-8, root=-1, pruning='strong', epsilon=1e-10, verbose=0) # head projection re_nodes, _, _ = re_head iter_proj_time += (time.time() - start_proj_time) print('head projection time for x: {:.5f}'.format(time.time() - start_proj_time)) gamma_x = set(re_nodes) indicator_x = np.zeros(num_nodes) indicator_x[list(gamma_x)] = 1. if iter == 0: tmp_x = np.zeros_like( current_x ) + learning_rate * grad_x * indicator_x # note, not update current variables, only use the intermediate results else: tmp_x = current_x + learning_rate * grad_x * indicator_x omega_x = set([ind for ind, _ in enumerate(tmp_x) if not 0. == _]) # handle second graph grad_y = func.get_gradient(current_y, current_x) # note, order # note, test not normalize if iter == 0: norm_grad_y = normalize_gradient(np.zeros_like(current_y), grad_y) else: # norm_grad_y = normalize_gradient(current_y, grad_y) norm_grad_y = grad_y # note, !!! # note, should positive norm_grad_y = np.absolute(norm_grad_y) min = 100 maxx = 5002 step = 500 start_proj_time = time.time() # print(norm_grad_y) # gamma_y = dense_projection(second_graph, norm_grad_y, threshold, min, maxx, step, start_node, lcc_diameter=2*lcc_diameter, normalize=False, sort=False) gamma_y = dense_projection(second_graph, norm_grad_y, threshold, min, maxx, step, start_node, lcc_diameter=2 * lcc_diameter, normalize=True, sort=False) iter_proj_time += (time.time() - start_proj_time) print('head projection time for y: {:.5f}'.format(time.time() - start_proj_time)) indicator_y = np.zeros(num_nodes) indicator_y[list(gamma_y)] = 1. if iter == 0: tmp_y = np.zeros_like( current_y ) + learning_rate * grad_y * indicator_y # note, not update current variables, only use the intermediate results else: tmp_y = current_y + learning_rate * grad_y * indicator_y omega_y = set([ind for ind, _ in enumerate(tmp_y) if not 0. == _]) print('solve argmax') # solve argmax start_max_time = time.time() bx, by = func.argmax_obj_with_proj(current_x, current_y, omega_x, omega_y) print('solve argmax time: {:.5f}'.format(time.time() - start_max_time)) # tail projection for the first graph start_proj_time = time.time() re_tail = tail_proj(edges=first_graph_edges, weights=first_graph_edge_weights, x=bx, g=1, s=sparsity, budget=sparsity - 1., nu=2.5, max_iter=100, err_tol=1e-8, root=-1, pruning='strong', verbose=0) # tail projection re_nodes, _, _ = re_tail iter_proj_time += time.time() - start_proj_time print('tail projection time for x: {:.5f}'.format(time.time() - start_proj_time)) psi_x = set(re_nodes) current_x = np.zeros_like(current_x) current_x[list(psi_x)] = bx[list(psi_x)] current_x = normalize( current_x ) # note, constrain current_x in [0, 1], is this step necessary # print(by) # note, by in [0, 1], so we should change lmbd range min = 10000 maxx = 20001 step = 1000 # tail projection for the second graph start_proj_time = time.time() psi_y = dense_projection(second_graph, by, threshold, min, maxx, step, start_node, lcc_diameter=2 * lcc_diameter, normalize=False, sort=False) iter_proj_time += (time.time() - start_proj_time) print('tail projection time for y: {:.5f}'.format(time.time() - start_proj_time)) current_y = np.zeros_like(current_y) # current_y[list(psi_y)] = bx[list(psi_y)] # union current_y and psi_y, make psi_y in the current_y # psi_y = psi_y & np.nonzero(current_y)[0] # note, improvement, avoid result extended randomly current_y[list(psi_y)] = by[list(psi_y)] current_y = normalize(current_y) # constrain current_y in [0, 1] print('{} iteration funval'.format(iter), func.get_obj_val(current_x, current_y)) acc_proj_time += iter_proj_time if logger: print('iter proj time: {:.5f}'.format(iter_proj_time)) diff_norm = np.sqrt( np.linalg.norm(current_x - prev_x)**2 + np.linalg.norm(current_y - prev_y)**2) if logger: logger.debug('difference norm: {}'.format(diff_norm)) # raw_pred_subgraph_x = np.nonzero(current_x)[0] # # prec, rec, fm, iou = evaluate_block(instance['true_subgraph'], raw_pred_subgraph_x) # # logger.debug('-' * 5 + ' performance of x prediction ' + '-' * 5) # logger.debug('precision: {:.5f}'.format(prec)) # logger.debug('recall : {:.5f}'.format(rec)) # logger.debug('f-measure: {:.5f}'.format(fm)) # logger.debug('iou : {:.5f}'.format(iou)) # # raw_pred_subgraph_y = np.nonzero(current_y)[0] # # prec, rec, fm, iou = evaluate_block(instance['true_subgraph'], raw_pred_subgraph_y) # # logger.debug('-' * 5 + ' performance of y prediction ' + '-' * 5) # logger.debug('precision: {:.5f}'.format(prec)) # logger.debug('recall : {:.5f}'.format(rec)) # logger.debug('f-measure: {:.5f}'.format(fm)) # logger.debug('iou : {:.5f}'.format(iou)) if diff_norm < epsilon: break run_time = time.time() - start_time if logger: pass return current_x, current_y, run_time
def optimize(instance, sparsity, learning_rate=0.01, max_iter=10, epsilon=1e-3): graph = instance['graph'] true_subgraph = instance['subgraph'] edges = np.array(graph.edges) num_nodes = graph.number_of_nodes() num_edges = graph.number_of_edges() edge_weights = np.ones(num_edges) logger.debug('-' * 5 + ' related info ' + '-' * 5) logger.debug('sparsity: {:d}'.format(sparsity)) logger.debug('max iteration: {:d}'.format(max_iter)) logger.debug('number of nodes: {:d}'.format(num_nodes)) logger.debug('number of edges: {:d}'.format(num_edges)) logger.debug('number of nodes in true_subgraph: {:d}'.format( len(true_subgraph))) logger.debug('-' * 5 + ' start iterating ' + '-' * 5) start_time = time.time() acc_proj_time = 0. func = EMS(features=instance['features'], graph=graph) # current_x = func.get_init_x_random() # note, not stable current_x = func.get_init_x_zeros() + 0.00001 for iter in range(max_iter): logger.debug('iteration: {:d}'.format(iter)) iter_time = time.time() iter_proj_time = 0. grad_x = func.get_gradient(current_x) normalized_grad = normalize_gradient(current_x, grad_x) # fixme start_proj_time = time.time() re_head = head_proj(edges=edges, weights=edge_weights, x=normalized_grad, g=1, s=sparsity, budget=sparsity - 1., delta=1. / 169., max_iter=100, err_tol=1e-8, root=-1, pruning='strong', epsilon=1e-10, verbose=0) re_nodes, _, _ = re_head iter_proj_time += time.time() - start_proj_time omega_x = set(re_nodes) indicator_x = np.zeros(num_nodes) indicator_x[list(omega_x)] = 1. bx = current_x + learning_rate * normalized_grad * indicator_x start_proj_time = time.time() re_tail = tail_proj(edges=edges, weights=edge_weights, x=bx, g=1, s=sparsity, budget=sparsity - 1., nu=2.5, max_iter=100, err_tol=1e-8, root=-1, pruning='strong', verbose=0) re_nodes, _, _ = re_tail iter_proj_time += time.time() - start_proj_time acc_proj_time += time.time() - start_proj_time psi_x = set(re_nodes) prev_x = current_x current_x = np.zeros_like(current_x) current_x[list(psi_x)] = bx[list(psi_x)] current_x = normalize(current_x) # note, constrain current_x in [0, 1] logger.debug('function value: {:.5f}'.format( func.get_obj_val(current_x))) logger.debug('iteration time: {:.5f}'.format(time.time() - iter_time)) logger.debug('iter projection time: {:.5f}'.format(iter_proj_time)) logger.debug('acc projection time: {:.5f}'.format( acc_proj_time)) # accumulative projection time logger.debug('-' * 10) diff_norm_x = np.linalg.norm(current_x - prev_x) if diff_norm_x < epsilon: break run_time = time.time() - start_time logger.debug('final function value: {:.5f}'.format( func.get_obj_val(current_x))) logger.debug('run time of whole algorithm: {:.5f}'.format(run_time)) logger.debug('accumulative projection time: {:.5f}'.format(acc_proj_time)) return current_x