Esempio n. 1
0
def crossval_small_dataset(shingles_type):
    in_file = helpers.datasets[dataset]["files"][0]
    graph_database = list(dataset_manager.read_chemical_compounts(in_file))
    for window_size in window_size_range:
        base_model = {"window_size": window_size}
#         best_model = crossval.loo_crossval_sketch(graph_database, wl_iter_range, k_L_range, output_dir, cols_count=188, base_model=base_model, shingles_type=shingles_type, window_size=window_size, accumulate_wl_shingles=False)
        best_model = crossval.loo_crossval_pnn(graph_database, wl_iter_range, p_range, output_dir, base_model=base_model, shingles_type=shingles_type, window_size=window_size, accumulate_wl_shingles=False)
#         best_model = crossval.loo_crossval_threshold(graph_database, wl_iter_range, infl_point_range, output_dir, base_model=base_model, shingles_type=shingles_type, window_size=window_size, accumulate_wl_shingles=False)
        print "Best model:", best_model
Esempio n. 2
0
def loo_crossval(hypergraph, wl_iter_range, r_in_range, r_out_range, r_all_range, output_dir, k_L_range=None, infl_point_range=None, p_range=None):
    best_model = sgm_crossval.model(-1, -1, base_model=model(-1, -1, -1))
    
    for r_in in r_in_range:
        for r_out in r_out_range:
            for r_all in r_all_range:
                base_model = model(r_in, r_out, r_all)
                rballs_database, _ = similar_nodes_mining.extract_rballs_database(hypergraph, r_in=r_in, r_out=r_out, r_all=r_all, center_default_color=True)
                rballs_database = [(r_id, list(graphs), t) for r_id, graphs, t in rballs_database] # execute generator
                if k_L_range:
                    current_model = sgm_crossval.loo_crossval_sketch(rballs_database, wl_iter_range, k_L_range, output_dir, base_model=base_model)
                elif infl_point_range:
                    current_model = sgm_crossval.loo_crossval_threshold(rballs_database, wl_iter_range, infl_point_range, output_dir, base_model=base_model)
                else:
                    current_model = sgm_crossval.loo_crossval_pnn(rballs_database, wl_iter_range, p_range, output_dir, base_model=base_model)
                if current_model["quality"] > best_model["quality"]:
                    best_model = current_model
    
    models_file = open(output_dir + "models", "a")
    models_file.write(str(best_model) + ",\n")
    models_file.close()
    
    return best_model