def test_sync_gosta(nb_iter = 3000, trace_period = 10, data_file = 'wine.csv', graph_type = 'watts', pairwise_f = pf.within_clust_scatter, averaging_function = gosta.pair_avg, graph = None, data = None, **kwargs): if(graph == None or data == None): data = gc.parse(data_file) graph = gc.build_graph(data, graph_type, **kwargs) trace = gosta.gosta_sync(graph, pairwise_f, nb_iter, trace_period = trace_period, averaging_function = averaging_function) results = [graph.node[n]['estimate'] for n in graph] true_mean = compute_truth_all_pairs(data, pairwise_f) if (trace_period != None): plt.figure(1) plt.plot(trace) plt.plot(true_mean*np.ones(nb_iter/trace_period-1)) plt.title(data_file + ', ' + graph_type) return ({'gosta_mean' : np.mean(results), 'gosta_variance' : np.var(results), 'data_mean' : true_mean, 'graph' : graph}, trace)
def gosta_experiment(nb_repetitions = 1, n_iter = 10000, trace_period = 10, data_files = ['wine.csv'], graph_type_list = ['watts', 'complete', 'grid'], f_to_avg = pf.within_clust_scatter, averaging_function = gosta.neighbourhood_avg, root_results_folder = path_to_results_folder()): for data_src in data_files: data_name = re.match(r'([^\.]+)\..*', data_src).group(1) print '\n', data_name data = gc.parse(data_src) print data.shape true_mean = compute_truth_all_pairs( data, f_to_avg) for graph_type in graph_type_list: print '\n', graph_type, '\n' results_folder_name = '%s_%s_%s' % ( function_names[averaging_function], data_name, graph_type) results_folder = utils.make_new_dir( root_results_folder, results_folder_name) graph = gc.build_graph(data, graph_type) traces = [] error_traces = [] for rep in range(nb_repetitions): print '%s, %s, repetition %d' % (data_name, graph_type, rep) trace = gosta.gosta_sync( graph, f_to_avg, n_iter = n_iter, trace_period = trace_period, averaging_function = averaging_function, log_filename = path.join(results_folder, 'log.npy'), log_period = 1000) traces.append(trace) error_traces.append(np.abs((np.array(trace) - true_mean) / true_mean)) traces = np.array(traces) error_traces = np.atleast_2d(np.array(error_traces)) err_mean = np.mean(error_traces, axis = 0) traces_file_name = path.join( results_folder, '%s_%s_traces.npy' % (data_name, graph_type)) mean_file_name = path.join( results_folder, '%s_%s_mean.npy' % (data_name, graph_type)) with open(traces_file_name, 'w') as traces_file: np.save(traces_file, traces) with open(mean_file_name, 'w') as mean_file: np.save(mean_file, err_mean)