def optimize_and_find_optimal_decomposition(init_graphs, domain_graphs, target_graphs, oracle_func, n_iter=10, max_decomposition_order=3, make_monitor=None): eights = do_decompose(decompose_cycles, composition_function=decompose_edge_join) nieghbs = do_decompose(decompose_neighborhood(radius=1), decompose_neighborhood(radius=2), decompose_neighborhood(radius=3)) lollipops = do_decompose(decompose_cycles_and_non_cycles, compose_function=decompose_node_join) decomposition = do_decompose(decompose_cycles, eights, nieghbs, lollipops) graphs = init_graphs for it in range(n_iter): logger.info('-' * 100 + '\n iteration:%d/%d' % (it + 1, n_iter)) # optimize starting from initial graphs n, ei, m = optimizer_setup(decomposition, domain_graphs, target_graphs, oracle_func, grammar_conservativeness=2, n_neighbors=3, exploitation_vs_exploration=1, make_monitor=make_monitor) graphs = optimize(graphs, oracle_func, n_iter=250, sample_size=5, max_pool_size=10, k_steps=1, neighborhood_estimator=n, graph_expected_improvement_estimator=ei, monitor=m) # sample graphs generated during optimization # and learn optimal decomposition from them sample_graphs = sample_with_expected_improvement(graphs, 50, ei) sample_scores = [oracle_func(g) for g in sample_graphs] if termination_condition(sample_scores): break decomposition = find_optimal_decomposition( sample_graphs, sample_scores, order=max_decomposition_order) return decomposition, graphs
def remove_duplicates_in_set(graphs_to_filter, graph_archive): """remove_duplicates_in_set.""" df = decompose_neighborhood(radius=2) val_set = set([hash_graph(g, decomposition_funcs=df) for g in graph_archive]) selected_graphs = [g for g in graphs_to_filter if hash_graph( g, decomposition_funcs=df) not in val_set] return selected_graphs
def decomp(egostuff): # i expect this to work,, but probably ego is bugged #return [ lambda x: decompose_neighborhood(x,radius=y) for y in [0,1,2]] egostuff = list(egostuff) return [ e for radius in [1, 2] for e in decompose_neighborhood(egostuff, radius=radius) ]
def remove_duplicates(graphs): """remove_duplicates.""" df = decompose_neighborhood(radius=2) selected_graphs_dict = { hash_graph(g, decomposition_funcs=df): g for g in graphs } return list(selected_graphs_dict.values())
def hyper_optimize_setup(pos_graphs, neg_graphs, timeout=60, test_frac=.50, n_iter=3, add_data_driven=False): train_graphs, train_target, test_graphs, test_target = make_data_split( pos_graphs, neg_graphs, test_frac=test_frac) evaluate_func = evaluate(train_graphs=train_graphs, train_target=train_target, test_graphs=test_graphs, test_target=test_target, n_iter=n_iter, timeout=timeout) evaluate_complexity_func = evaluate_complexity(graphs=train_graphs, timeout=timeout) evaluate_identity_func = make_evaluate_identity(graphs=train_graphs) decomposition_dict = make_decomposition_dict() if add_data_driven: decomposition_dict[0]['adpts11nb1'] = decompose_positive( ktop=11, part_importance_estimator=PartImportanceEstimator( decompose_neighborhood(radius=1)).fit(train_graphs, train_target)) decomposition_dict[0]['adpts7nb1'] = decompose_positive( ktop=7, part_importance_estimator=PartImportanceEstimator( decompose_neighborhood(radius=1)).fit(train_graphs, train_target)) decomposition_dict[0]['adpts11nb2'] = decompose_positive( ktop=11, part_importance_estimator=PartImportanceEstimator( decompose_neighborhood(radius=2)).fit(train_graphs, train_target)) decomposition_dict[0]['adpts7nb2'] = decompose_positive( ktop=7, part_importance_estimator=PartImportanceEstimator( decompose_neighborhood(radius=2)).fit(train_graphs, train_target)) return ( train_graphs, train_target, test_graphs, test_target ), evaluate_func, evaluate_complexity_func, evaluate_identity_func, decomposition_dict
def hyperopt(pos_graphs, neg_graphs, data_size, memory, history, auc_threshold, cmpx_threshold, n_max, n_max_sel, n_max_sample, order, timeout, max_n_hours, max_runtime, test_frac, n_iter, display=True, add_data_driven=False): hyper_params = hyper_optimize_setup(pos_graphs, neg_graphs, timeout, test_frac=test_frac, n_iter=n_iter, add_data_driven=add_data_driven) data_partition, evaluate_func, evaluate_complexity_func, evaluate_identity_func, decomposition_dict = hyper_params train_graphs, train_target, test_graphs, test_target = data_partition opt_decompose_func, opt_decompose_func_str = hyper_optimize_decomposition_function( order, decomposition_dict, evaluate_func, evaluate_complexity_func, evaluate_identity_func, n_max, n_max_sel, n_max_sample, auc_threshold, cmpx_threshold, memory, history, max_runtime=max_runtime, display=display, return_decomposition_function_string=True) if opt_decompose_func is None: return decompose_neighborhood(radius=2), 'nb2' return opt_decompose_func, opt_decompose_func_str
def decompose(x): return decompose_neighborhood(x, max_radius=max_decompose_radius)
def make_decomposition_dict(): fs = [] for i in range(10): decomposition_functions = dict() decomposition_functions['+'] = decompose_concatenate fs.append(decomposition_functions) fs[0].pop('+') fs[1].pop('+') fs[0]['edg'] = decompose_edges fs[0]['cyc'] = decompose_cycles fs[0]['cyc&n'] = decompose_cycles_and_non_cycles fs[0]['ncyc'] = decompose_non_cycles fs[0]['nb'] = decompose_neighborhood(radius=1) fs[0]['nb2'] = decompose_neighborhood(radius=2) fs[0]['nb3'] = decompose_neighborhood(radius=3) #fs[0]['grfl3'] = decompose_graphlet(size=3) #fs[0]['grfl4'] = decompose_graphlet(size=4) #fs[0]['grfl5'] = decompose_graphlet(size=5) fs[0]['centr5'] = decompose_central(k_top=5) fs[0]['centr7'] = decompose_central(k_top=7) fs[0]['centr9'] = decompose_central(k_top=9) fs[0]['centr11'] = decompose_central(k_top=11) fs[0]['centr13'] = decompose_central(k_top=13) fs[0]['ncentr5'] = decompose_non_central(k_top=5) fs[0]['ncentr7'] = decompose_non_central(k_top=7) fs[0]['ncentr9'] = decompose_non_central(k_top=9) fs[0]['ncentr11'] = decompose_non_central(k_top=11) fs[0]['ncentr13'] = decompose_non_central(k_top=13) fs[1]["nb'"] = decompose_neighborhood(radius=1) fs[1]["nb2'"] = decompose_neighborhood(radius=2) fs[1]["nb''"] = decompose_neighborhood(radius=1) fs[1]["nb2''"] = decompose_neighborhood(radius=2) fs[1]['dlt'] = decompose_dilatate(radius=1) fs[1]['dlt2'] = decompose_dilatate(radius=2) fs[1]['dlt3'] = decompose_dilatate(radius=3) fs[1]['cntx'] = decompose_context(radius=1) fs[1]['cntx2'] = decompose_context(radius=2) fs[1]['cntx3'] = decompose_context(radius=3) fs[1]['njn'] = decompose_node_join fs[1]['ejn'] = decompose_edge_join fs[1]['pr1'] = decompose_pair(distance=1) fs[1]['pr2'] = decompose_pair(distance=2) fs[1]['pr3'] = decompose_pair(distance=3) fs[1]['sz<5'] = decompose_node_size(max_size=5) fs[1]['sz<7'] = decompose_node_size(max_size=7) fs[1]['sz<9'] = decompose_node_size(max_size=9) fs[1]['sz<11'] = decompose_node_size(max_size=11) fs[1]['sz<13'] = decompose_node_size(max_size=13) fs[1]['sz>5'] = decompose_node_size(min_size=5) fs[1]['sz>7'] = decompose_node_size(min_size=7) fs[1]['sz>9'] = decompose_node_size(min_size=9) fs[1]['sz>11'] = decompose_node_size(min_size=11) fs[1]['sz>13'] = decompose_node_size(min_size=13) fs[2]['unn'] = decompose_union fs[2]['int'] = decompose_intersection fs[2]['dff'] = decompose_difference fs[2]['sdff'] = decompose_symmetric_difference fs[2]['rel'] = decompose_relation fs[2]['pair1'] = decompose_pair_binary(distance=1) fs[2]['pair2'] = decompose_pair_binary(distance=2) fs[2]['pair3'] = decompose_pair_binary(distance=3) fs[3]['relbi'] = decompose_relation_binary(keep_second_component=False) fs[3]['relbi2nd'] = decompose_relation_binary(keep_second_component=True) return fs