parameters = parser.parse_args() min_frac, max_frac, interval, nSamples, edges_csv, delim_option, output, dataset, seq_samples_scale = parameters.min, \ parameters.max, parameters.interval, parameters.samples, parameters.csv, parameters.delim, parameters.output, parameters.dataset, parameters.seq_samples_scale#,\ #[float(x) for x in parameters.eps.split(',')], parameters.title delim_dict = {0 : '\t', 1 : ' ', 2 : ','} delimeter = delim_dict[delim_option] results_dir = 'experiments/results/' bUndirected = True if parameters.undirected == 1 else False bfs_method = 'seq' start_time = time() print "creating link-server object" if delimeter == "\t": print "delimeter is tab" L=LinkServerCP(dataset, edges_csv, create_new=True, prob_method=parameters.prob_method, prob=[0.1,0.01], delim=delimeter, undirected = bUndirected) n = L.getNumNodes() #n = len(V) print "Number of nodes: ", n nBFS_samples = 1000 running_times_file = results_dir + output + '-running_times-ratios_k_min-%.3f-k_max-%.3f-samples-%d-bfs_samples-%d-large'%(min_frac,max_frac,nSamples,nBFS_samples) running_times_file_raw = running_times_file + "-raw" removeFile(running_times_file) f = open(running_times_file, 'w') f_raw = open(running_times_file_raw,'w') nBFS_samples = 1000 nBFS_samples_theoretic = n * log(n,2) for nSeeds in xrange(int(min_frac * n), int(max_frac * n), int(interval * n)): print "k = ", nSeeds seeds_fname = output + "-seeds-" + str(nSeeds) + '.cp' runtimes_approx, runtimes_seq = [], [] for i in xrange(nSamples):
min_frac, max_frac, interval, nSamples, edges_csv, delim_option, output, dataset = parameters.min, \ parameters.max, parameters.interval, parameters.samples, parameters.csv, parameters.delim, parameters.output, parameters.dataset print "Input file: ", edges_csv print "Output file prefix: ", output delimiter = delim_dict[delim_option] start_time = time() bfs_method = 'seq' print "creating link-server object" if parameters.prob_method == 0: edge_prob = 0.2 else: edge_prob = [0.1,0.01] L=LinkServerCP(dataset, edges_csv, create_new=True, prob_method=parameters.prob_method, prob=edge_prob, delim=delimiter, undirected=parameters.undirected) print "n = ", L.getNumNodes() V = LoadNodesFromFile(edges_csv, delimiter) n = len(V) print 'min_frac', min_frac k_min = int(n * min_frac) k_max = int(n * max_frac) + 1 k_step = int(n * interval) generateSeedFiles(k_min, k_max, k_step, V, nSamples, 'experiments/' + output + "-seeds-") mean_errors, std_errors = [], [] for k in xrange(k_min, k_max,k_step): approx_fname = 'experiments/results/' + output + "-approx_errors-k_min-%d-k_max-%d-k-%d-samples-%d"%(k_min,k_max,k,nSamples)
parameters.min_samples, parameters.max_samples, parameters.samples_step, parameters.k_mode, parameters.k,\ parameters.csv, parameters.delim, parameters.output, parameters.dataset, parameters.prob_method, parameters.cores print "Dataset: ", dataset delim_dict = {0 : '\t', 1 : ' ', 2 : ','} delimeter = delim_dict[delim_option] results_dir = 'experiments/results/influence_values/' bUndirected = True if parameters.undirected == 1 else False bfs_method = 'seq' start_time = time() print "creating link-server object" if delimeter == "\t": print "delimeter is tab" L=LinkServerCP(dataset, edges_csv, create_new=True, prob_method=parameters.prob_method, prob=[0.1,0.01], delim=delimeter, undirected = bUndirected) n = L.getNumNodes() if k_mode == 0: nSeeds = int(n * k) else: nSeeds = int(k) perf_csv_fname = "perf_out" + str(sample(range(1000),1)[0]) + ".csv" print "Number of nodes: ", n print "nSeeds = ", nSeeds results_file = results_dir + output + '-influence_values_samples_min-%d-samples_max-%d-k-%.3f-prob_method-%d'%(min_samples,max_samples, k, prob_method) removeFile(results_file) seeds_fname = "%s-seeds-%d.cp"%(dataset,nSeeds) generateSeedFiles(nSeeds, nSeeds+1, 1, range(n), 1, dataset + "-seeds-")
delimiter = delim_dict[delim_option] start_time = time() bfs_method = 'seq' print "creating link-server object" if parameters.prob_method == 0: edge_prob = 0.2 else: edge_prob = [0.1, 0.01] L = LinkServerCP(dataset, edges_csv, create_new=True, prob_method=parameters.prob_method, prob=edge_prob, delim=delimiter, undirected=parameters.undirected) print "n = ", L.getNumNodes() V = LoadNodesFromFile(edges_csv, delimiter) n = len(V) k_min = int(n * min_frac) k_max = int(n * max_frac) + 1 k_step = int(n * interval) print "max_k = ", max_frac print "Minimum k value: %d, maximum k value: %d" % (k_min, k_max) removeFile(dataset) generateSeedFiles(k_min, k_max, k_step, V, nSamples, 'experiments/' + output + "-seeds-") results_fname = "experiments/results/" + output + '-approximations-nSamples-%d-k_frac-%.3f-%.3f' % ( nSamples, min_frac, max_frac) removeFile(results_fname) mean_errors, std_errors = [], []