Beispiel #1
0
def optimize_and_find_optimal_decomposition(init_graphs,
                                            domain_graphs,
                                            target_graphs,
                                            oracle_func,
                                            n_iter=10,
                                            max_decomposition_order=3,
                                            make_monitor=None):
    eights = do_decompose(decompose_cycles,
                          composition_function=decompose_edge_join)
    nieghbs = do_decompose(decompose_neighborhood(radius=1),
                           decompose_neighborhood(radius=2),
                           decompose_neighborhood(radius=3))
    lollipops = do_decompose(decompose_cycles_and_non_cycles,
                             compose_function=decompose_node_join)
    decomposition = do_decompose(decompose_cycles, eights, nieghbs, lollipops)

    graphs = init_graphs
    for it in range(n_iter):
        logger.info('-' * 100 + '\n iteration:%d/%d' % (it + 1, n_iter))
        # optimize starting from initial graphs
        n, ei, m = optimizer_setup(decomposition,
                                   domain_graphs,
                                   target_graphs,
                                   oracle_func,
                                   grammar_conservativeness=2,
                                   n_neighbors=3,
                                   exploitation_vs_exploration=1,
                                   make_monitor=make_monitor)
        graphs = optimize(graphs,
                          oracle_func,
                          n_iter=250,
                          sample_size=5,
                          max_pool_size=10,
                          k_steps=1,
                          neighborhood_estimator=n,
                          graph_expected_improvement_estimator=ei,
                          monitor=m)

        # sample graphs generated during optimization
        # and learn optimal decomposition from them
        sample_graphs = sample_with_expected_improvement(graphs, 50, ei)
        sample_scores = [oracle_func(g) for g in sample_graphs]
        if termination_condition(sample_scores):
            break
        decomposition = find_optimal_decomposition(
            sample_graphs, sample_scores, order=max_decomposition_order)
    return decomposition, graphs
Beispiel #2
0
def remove_duplicates_in_set(graphs_to_filter, graph_archive):
    """remove_duplicates_in_set."""
    df = decompose_neighborhood(radius=2)
    val_set = set([hash_graph(g, decomposition_funcs=df)
                   for g in graph_archive])
    selected_graphs = [g for g in graphs_to_filter if hash_graph(
        g, decomposition_funcs=df) not in val_set]
    return selected_graphs
Beispiel #3
0
def decomp(egostuff):
    # i expect this to work,, but probably ego is bugged
    #return [ lambda x: decompose_neighborhood(x,radius=y) for y in [0,1,2]]
    egostuff = list(egostuff)
    return [
        e for radius in [1, 2]
        for e in decompose_neighborhood(egostuff, radius=radius)
    ]
Beispiel #4
0
def remove_duplicates(graphs):
    """remove_duplicates."""
    df = decompose_neighborhood(radius=2)
    selected_graphs_dict = {
        hash_graph(g, decomposition_funcs=df): g
        for g in graphs
    }
    return list(selected_graphs_dict.values())
Beispiel #5
0
def hyper_optimize_setup(pos_graphs,
                         neg_graphs,
                         timeout=60,
                         test_frac=.50,
                         n_iter=3,
                         add_data_driven=False):
    train_graphs, train_target, test_graphs, test_target = make_data_split(
        pos_graphs, neg_graphs, test_frac=test_frac)
    evaluate_func = evaluate(train_graphs=train_graphs,
                             train_target=train_target,
                             test_graphs=test_graphs,
                             test_target=test_target,
                             n_iter=n_iter,
                             timeout=timeout)
    evaluate_complexity_func = evaluate_complexity(graphs=train_graphs,
                                                   timeout=timeout)
    evaluate_identity_func = make_evaluate_identity(graphs=train_graphs)
    decomposition_dict = make_decomposition_dict()

    if add_data_driven:
        decomposition_dict[0]['adpts11nb1'] = decompose_positive(
            ktop=11,
            part_importance_estimator=PartImportanceEstimator(
                decompose_neighborhood(radius=1)).fit(train_graphs,
                                                      train_target))
        decomposition_dict[0]['adpts7nb1'] = decompose_positive(
            ktop=7,
            part_importance_estimator=PartImportanceEstimator(
                decompose_neighborhood(radius=1)).fit(train_graphs,
                                                      train_target))

        decomposition_dict[0]['adpts11nb2'] = decompose_positive(
            ktop=11,
            part_importance_estimator=PartImportanceEstimator(
                decompose_neighborhood(radius=2)).fit(train_graphs,
                                                      train_target))
        decomposition_dict[0]['adpts7nb2'] = decompose_positive(
            ktop=7,
            part_importance_estimator=PartImportanceEstimator(
                decompose_neighborhood(radius=2)).fit(train_graphs,
                                                      train_target))
    return (
        train_graphs, train_target, test_graphs, test_target
    ), evaluate_func, evaluate_complexity_func, evaluate_identity_func, decomposition_dict
Beispiel #6
0
def hyperopt(pos_graphs,
             neg_graphs,
             data_size,
             memory,
             history,
             auc_threshold,
             cmpx_threshold,
             n_max,
             n_max_sel,
             n_max_sample,
             order,
             timeout,
             max_n_hours,
             max_runtime,
             test_frac,
             n_iter,
             display=True,
             add_data_driven=False):
    hyper_params = hyper_optimize_setup(pos_graphs,
                                        neg_graphs,
                                        timeout,
                                        test_frac=test_frac,
                                        n_iter=n_iter,
                                        add_data_driven=add_data_driven)
    data_partition, evaluate_func, evaluate_complexity_func, evaluate_identity_func, decomposition_dict = hyper_params
    train_graphs, train_target, test_graphs, test_target = data_partition

    opt_decompose_func, opt_decompose_func_str = hyper_optimize_decomposition_function(
        order,
        decomposition_dict,
        evaluate_func,
        evaluate_complexity_func,
        evaluate_identity_func,
        n_max,
        n_max_sel,
        n_max_sample,
        auc_threshold,
        cmpx_threshold,
        memory,
        history,
        max_runtime=max_runtime,
        display=display,
        return_decomposition_function_string=True)
    if opt_decompose_func is None:
        return decompose_neighborhood(radius=2), 'nb2'
    return opt_decompose_func, opt_decompose_func_str
Beispiel #7
0
def decompose(x):
    return decompose_neighborhood(x, max_radius=max_decompose_radius)
Beispiel #8
0
def make_decomposition_dict():
    fs = []
    for i in range(10):
        decomposition_functions = dict()
        decomposition_functions['+'] = decompose_concatenate
        fs.append(decomposition_functions)
    fs[0].pop('+')
    fs[1].pop('+')

    fs[0]['edg'] = decompose_edges
    fs[0]['cyc'] = decompose_cycles
    fs[0]['cyc&n'] = decompose_cycles_and_non_cycles
    fs[0]['ncyc'] = decompose_non_cycles
    fs[0]['nb'] = decompose_neighborhood(radius=1)
    fs[0]['nb2'] = decompose_neighborhood(radius=2)
    fs[0]['nb3'] = decompose_neighborhood(radius=3)
    #fs[0]['grfl3'] = decompose_graphlet(size=3)
    #fs[0]['grfl4'] = decompose_graphlet(size=4)
    #fs[0]['grfl5'] = decompose_graphlet(size=5)
    fs[0]['centr5'] = decompose_central(k_top=5)
    fs[0]['centr7'] = decompose_central(k_top=7)
    fs[0]['centr9'] = decompose_central(k_top=9)
    fs[0]['centr11'] = decompose_central(k_top=11)
    fs[0]['centr13'] = decompose_central(k_top=13)
    fs[0]['ncentr5'] = decompose_non_central(k_top=5)
    fs[0]['ncentr7'] = decompose_non_central(k_top=7)
    fs[0]['ncentr9'] = decompose_non_central(k_top=9)
    fs[0]['ncentr11'] = decompose_non_central(k_top=11)
    fs[0]['ncentr13'] = decompose_non_central(k_top=13)

    fs[1]["nb'"] = decompose_neighborhood(radius=1)
    fs[1]["nb2'"] = decompose_neighborhood(radius=2)
    fs[1]["nb''"] = decompose_neighborhood(radius=1)
    fs[1]["nb2''"] = decompose_neighborhood(radius=2)
    fs[1]['dlt'] = decompose_dilatate(radius=1)
    fs[1]['dlt2'] = decompose_dilatate(radius=2)
    fs[1]['dlt3'] = decompose_dilatate(radius=3)
    fs[1]['cntx'] = decompose_context(radius=1)
    fs[1]['cntx2'] = decompose_context(radius=2)
    fs[1]['cntx3'] = decompose_context(radius=3)
    fs[1]['njn'] = decompose_node_join
    fs[1]['ejn'] = decompose_edge_join
    fs[1]['pr1'] = decompose_pair(distance=1)
    fs[1]['pr2'] = decompose_pair(distance=2)
    fs[1]['pr3'] = decompose_pair(distance=3)
    fs[1]['sz<5'] = decompose_node_size(max_size=5)
    fs[1]['sz<7'] = decompose_node_size(max_size=7)
    fs[1]['sz<9'] = decompose_node_size(max_size=9)
    fs[1]['sz<11'] = decompose_node_size(max_size=11)
    fs[1]['sz<13'] = decompose_node_size(max_size=13)
    fs[1]['sz>5'] = decompose_node_size(min_size=5)
    fs[1]['sz>7'] = decompose_node_size(min_size=7)
    fs[1]['sz>9'] = decompose_node_size(min_size=9)
    fs[1]['sz>11'] = decompose_node_size(min_size=11)
    fs[1]['sz>13'] = decompose_node_size(min_size=13)

    fs[2]['unn'] = decompose_union
    fs[2]['int'] = decompose_intersection
    fs[2]['dff'] = decompose_difference
    fs[2]['sdff'] = decompose_symmetric_difference
    fs[2]['rel'] = decompose_relation
    fs[2]['pair1'] = decompose_pair_binary(distance=1)
    fs[2]['pair2'] = decompose_pair_binary(distance=2)
    fs[2]['pair3'] = decompose_pair_binary(distance=3)

    fs[3]['relbi'] = decompose_relation_binary(keep_second_component=False)
    fs[3]['relbi2nd'] = decompose_relation_binary(keep_second_component=True)

    return fs