Esempio n. 1
0
def test_sync_gosta(nb_iter = 3000, trace_period = 10,
                    data_file = 'wine.csv', graph_type = 'watts',
                    pairwise_f = pf.within_clust_scatter, 
                    averaging_function = gosta.pair_avg,
                    graph = None, data = None, **kwargs):

    if(graph == None or data == None):
        data = gc.parse(data_file)
        graph = gc.build_graph(data, graph_type, **kwargs)
    
    trace = gosta.gosta_sync(graph, pairwise_f, nb_iter,
                             trace_period = trace_period,
                             averaging_function = averaging_function)
    
    results = [graph.node[n]['estimate'] for n in graph]

    true_mean = compute_truth_all_pairs(data, pairwise_f)
    
    if (trace_period != None):
        plt.figure(1)
        plt.plot(trace)
        plt.plot(true_mean*np.ones(nb_iter/trace_period-1))
        plt.title(data_file + ', ' + graph_type)
    
    return ({'gosta_mean' : np.mean(results),
             'gosta_variance' : np.var(results),
             'data_mean' : true_mean,
             'graph' : graph}, trace)
Esempio n. 2
0
def gosta_experiment(nb_repetitions = 1, n_iter = 10000, trace_period = 10,
                     data_files = ['wine.csv'],
                     graph_type_list = ['watts', 'complete', 'grid'],
                     f_to_avg = pf.within_clust_scatter,
                     averaging_function = gosta.neighbourhood_avg,
                     root_results_folder = path_to_results_folder()):
    
    for data_src in data_files:
        data_name = re.match(r'([^\.]+)\..*', data_src).group(1)
        print '\n', data_name

        data = gc.parse(data_src)
        print data.shape
        true_mean = compute_truth_all_pairs(
            data, f_to_avg)
        
        for graph_type in graph_type_list:
            print '\n', graph_type, '\n'

            results_folder_name = '%s_%s_%s' % (
                function_names[averaging_function], data_name, graph_type)

            results_folder = utils.make_new_dir(
                root_results_folder, results_folder_name)

            graph = gc.build_graph(data, graph_type)
            traces = []
            error_traces = []
            
            for rep in range(nb_repetitions):
                print '%s, %s, repetition %d' % (data_name, graph_type, rep)

                trace = gosta.gosta_sync(
                    graph, f_to_avg, n_iter = n_iter,
                    trace_period = trace_period,
                    averaging_function = averaging_function,
                    log_filename = path.join(results_folder, 'log.npy'),
                    log_period = 1000)

                traces.append(trace)
                error_traces.append(np.abs((np.array(trace)
                                            - true_mean)
                                           / true_mean))

            traces = np.array(traces)
            error_traces = np.atleast_2d(np.array(error_traces))
            err_mean = np.mean(error_traces, axis = 0)

            traces_file_name = path.join(
                results_folder,
                '%s_%s_traces.npy' % (data_name, graph_type))
            
            mean_file_name = path.join(
                results_folder,
                '%s_%s_mean.npy' % (data_name, graph_type))

            with open(traces_file_name, 'w') as traces_file:
                np.save(traces_file, traces)

            with open(mean_file_name, 'w') as mean_file:
                np.save(mean_file, err_mean)