gc.enable()
    parameters = parser.parse_args()
    min_frac, max_frac, interval, nSamples, edges_csv, delim_option, output, dataset, seq_samples_scale = parameters.min, \
      parameters.max, parameters.interval, parameters.samples, parameters.csv, parameters.delim, parameters.output, parameters.dataset, parameters.seq_samples_scale#,\
      #[float(x) for x in parameters.eps.split(',')], parameters.title

    delim_dict = {0 : '\t', 1 : ' ', 2 : ','}
    delimeter = delim_dict[delim_option]
    results_dir = 'experiments/results/'
    bUndirected = True if parameters.undirected == 1 else False
    bfs_method = 'seq'
    start_time = time()
    print "creating link-server object"
    if delimeter == "\t":
        print "delimeter is tab"
    L=LinkServerCP(dataset, edges_csv, create_new=True, prob_method=parameters.prob_method, prob=[0.1,0.01], delim=delimeter, undirected = bUndirected)
    n = L.getNumNodes()
    #n = len(V)
    print "Number of nodes: ", n
    nBFS_samples = 1000
    running_times_file = results_dir + output + '-running_times-ratios_k_min-%.3f-k_max-%.3f-samples-%d-bfs_samples-%d-large'%(min_frac,max_frac,nSamples,nBFS_samples)
    running_times_file_raw = running_times_file + "-raw"
    removeFile(running_times_file)
    f = open(running_times_file, 'w')
    f_raw = open(running_times_file_raw,'w')
    nBFS_samples = 1000
    nBFS_samples_theoretic = n * log(n,2)
    for nSeeds in xrange(int(min_frac * n), int(max_frac * n), int(interval * n)):
        print "k = ", nSeeds
        seeds_fname = output + "-seeds-" + str(nSeeds) + '.cp'
        runtimes_approx, runtimes_seq = [], []
Example #2
0
    parameters = parser.parse_args()
    min_frac, max_frac, interval, nSamples, edges_csv, delim_option, output, dataset = parameters.min, \
      parameters.max, parameters.interval, parameters.samples, parameters.csv, parameters.delim, parameters.output, parameters.dataset
    
    print "Input file: ", edges_csv
    print "Output file prefix: ", output

    delimiter = delim_dict[delim_option]
    start_time = time()
    bfs_method = 'seq'
    print "creating link-server object"
    if parameters.prob_method == 0:
        edge_prob = 0.2
    else:
        edge_prob = [0.1,0.01]
    L=LinkServerCP(dataset, edges_csv, create_new=True, prob_method=parameters.prob_method, prob=edge_prob, delim=delimiter, undirected=parameters.undirected)
    print "n = ", L.getNumNodes()
    V = LoadNodesFromFile(edges_csv, delimiter)
    n = len(V)
    print 'min_frac', min_frac
    k_min = int(n * min_frac)
    k_max = int(n * max_frac) + 1
    k_step = int(n * interval)
    

    generateSeedFiles(k_min, k_max, k_step, V, nSamples, 'experiments/' + output + "-seeds-")
    
    
    mean_errors, std_errors = [], []

    for k in xrange(k_min, k_max,k_step):
    parameters = parser.parse_args()
    min_samples, max_samples, samples_step, k_mode, k, edges_csv, delim_option, output, dataset, prob_method, cores= \
      parameters.min_samples, parameters.max_samples, parameters.samples_step, parameters.k_mode, parameters.k,\
      parameters.csv, parameters.delim, parameters.output, parameters.dataset, parameters.prob_method, parameters.cores

    print "Dataset: ", dataset
    delim_dict = {0 : '\t', 1 : ' ', 2 : ','}
    delimeter = delim_dict[delim_option]
    results_dir = 'experiments/results/influence_values/'
    bUndirected = True if parameters.undirected == 1 else False
    bfs_method = 'seq'
    start_time = time()
    print "creating link-server object"
    if delimeter == "\t":
        print "delimeter is tab"
    L=LinkServerCP(dataset, edges_csv, create_new=True, prob_method=parameters.prob_method, prob=[0.1,0.01], delim=delimeter, undirected = bUndirected)
    
    n = L.getNumNodes()
    if k_mode == 0:
        nSeeds = int(n * k)
    else:
        nSeeds = int(k)

    perf_csv_fname = "perf_out" + str(sample(range(1000),1)[0]) + ".csv"
    print "Number of nodes: ", n
    print "nSeeds = ", nSeeds

    results_file = results_dir + output + '-influence_values_samples_min-%d-samples_max-%d-k-%.3f-prob_method-%d'%(min_samples,max_samples, k, prob_method)
    removeFile(results_file)

    seeds_fname = "%s-seeds-%d.cp"%(dataset,nSeeds)
Example #4
0
#!/usr/bin/python
from link_server import LinkServerCP
import sys
import argparse

parser = argparse.ArgumentParser()
parser.add_argument('-cp', type=str)

if __name__ == "__main__":
    parameters = parser.parse_args()
    print "name of dataset file: ", parameters.cp
    L = LinkServerCP(parameters.cp)
Example #5
0
    new = True
    total_time_start = time()
    iters = 0
    if pr_method == 2:
        prob = [0.01, 0.1]
    print "min_frac = %.5f, max_frac = %.5f, interval = %.5f" % (
        min_frac, max_frac, interval)
    print "Number of cores to use: ", parameters.cores
    print "CSV file to be used: ", edges_csv
    print "creating link-server object"
    if delimeter == "\t":
        print "delimeter is tab"
    L = LinkServerCP(dataset,
                     edges_csv,
                     new,
                     pr_method,
                     prob,
                     delim=delimeter,
                     undirected=parameters.undirected)
    V = LoadNodesFromFile(edges_csv, delimeter)
    n = len(V)
    print "Number of nodes: ", n
    running_times_file = output + '-running_times-large-samples-%d-k_frac-%.5f-%.5f' % (
        nSamples, min_frac, max_frac)

    removeFile(running_times_file)
    dRuntimes = defaultdict(list)

    for nSeeds in xrange(int(min_frac * n), int(max_frac * n),
                         int(interval * n)):
        start_time_k = time()
            n, parameters.graph_method)
        running_times_files = [
            output +
            '-running_times-n-%d-graph_method-%d-samples-prob_method-%d-%d-eps-%.5f'
            %
            (n, parameters.graph_method, nSamples, parameters.prob_method, eps)
            for eps in eps_list
        ]
        for fname in running_times_files:
            removeFile(fname)
        createRandomGraph(csv_fname, parameters.graph_method, n,
                          parameters.er_p, parameters.ws_beta, parameters.ws_k)
        L = LinkServerCP('input/datasets/' + dataset,
                         csv_fname,
                         create_new=True,
                         prob_method=parameters.prob_method,
                         prob=[0.1, 0.01],
                         delim='\t',
                         undirected=1)
        # record loading time of link-server -- for interpolation
        removeFile(perf_output_fname)
        subprocess.Popen("perf stat -x, -o %s python load_link_server.py -cp %s"%\
                         (perf_output_fname, "input/datasets/" + dataset), shell = True, stdout = subprocess.PIPE).stdout.read()

        nCycles_link_server = getNumCycles(perf_output_fname)
        removeFile(perf_output_fname)

        #removeFile(csv_fname)
        V = xrange(n)

        for nSeeds in xrange(int(min_frac * n), int(max_frac * n),
Example #7
0
def plot_k_vs_approx():
    csv_files = [#'input/datasets/wiki-Vote-small.txt',\
                 #'input/datasets/email-Enron.txt',\
                 'input/datasets/Epinions1.txt',\
                 'input/datasets/Wiki-Vote_stripped.txt']
    work_dir = 'experiments/results/'
    delims = ['\t', '\t']
    n_values = [
        LinkServerCP('tmp', csv_fname, create_new=True,
                     delim=delims[i]).getNumNodes()
        for i, csv_fname in enumerate(csv_files)
    ]
    datasets = ['epinions', 'wiki-vote']
    x_vals_lists, y_vals_lists, sem_lists = [], [], []
    for i, dataset in enumerate(datasets):
        print "dataset: ", dataset
        n = n_values[i]
        min_frac, max_frac, step_frac = 0.01, 0.6, 0.02
        k_min, k_max, step = int(n * min_frac), int(n * max_frac) + 1, int(
            n * step_frac)
        approx_files = [
            file for file in os.listdir(work_dir)
            if file.startswith(dataset + "-approx_errors-k_min-%d-k_max-%d" %
                               (k_min, k_max))
        ]
        seq_files = [
            file for file in os.listdir(work_dir)
            if file.startswith(dataset + "-seq-approx-errors-k")
        ]

        x_vals, y_data, errs = [], [], []
        for k in range(k_min, k_max, step):
            print "k = ", k
            offsets = range(-30, 30)
            candidate_fnames = [
                dataset + "-approx_errors-k_min-%d-k_max-%d-k-%d-samples-10" %
                (k_min, k_max, k + offset) for offset in offsets
            ]
            files_exist = [
                candidate in approx_files for candidate in candidate_fnames
            ]
            assert any(
                files_exist), "candidate fnames: " + str(candidate_fnames)
            offset_idx = files_exist.index(True)
            approx_fname = candidate_fnames[offset_idx]
            seq_fname = dataset + "-seq-approx-errors-k-" + str(
                k + offsets[offset_idx]) + "-samples-10"
            print "approx_fname: ", approx_fname
            print "seq_fname: ", seq_fname
            k, mean_err, err = calculateMeanErrorAndSEM(
                work_dir + approx_fname, work_dir + seq_fname)
            x_vals.append(1.0 * k / n)
            y_data.append(mean_err)
            errs.append(err)
        x_vals_lists.append(x_vals)
        y_vals_lists.append(y_data)
        sem_lists.append(errs)
        assert len(x_vals) == len(y_data)
        assert len(x_vals) == len(errs)
    plot2d(x_vals_lists, y_vals_lists, sem_lists, datasets,
           [r'$k/n$', 'Approximation ratio'], '',
           'experiments/figures/k_vs_approx_combined.pdf')

    print "Printing separate plots: "
    for i, dataset in enumerate(datasets):
        x_data = x_vals_lists[i]
        y_data = y_vals_lists[i]
        sem_list = sem_lists[i]
        plot2d(x_data, [y_data], [sem_list], [dataset], [r'$k/n$','Approximation ratio'], \
               '', 'experiments/figures/' + dataset + '-k_vs_approx.pdf')
Example #8
0
    print "Input file: ", edges_csv
    print "Output file prefix: ", output

    delimiter = delim_dict[delim_option]
    start_time = time()
    bfs_method = 'seq'
    print "creating link-server object"
    if parameters.prob_method == 0:
        edge_prob = 0.2
    else:
        edge_prob = [0.1, 0.01]
    L = LinkServerCP(dataset,
                     edges_csv,
                     create_new=True,
                     prob_method=parameters.prob_method,
                     prob=edge_prob,
                     delim=delimiter,
                     undirected=parameters.undirected)
    print "n = ", L.getNumNodes()
    V = LoadNodesFromFile(edges_csv, delimiter)
    n = len(V)

    k_min = int(n * min_frac)
    k_max = int(n * max_frac) + 1
    k_step = int(n * interval)
    print "max_k = ", max_frac
    print "Minimum k value: %d, maximum k value: %d" % (k_min, k_max)
    removeFile(dataset)
    generateSeedFiles(k_min, k_max, k_step, V, nSamples,
                      'experiments/' + output + "-seeds-")
Example #9
0
      parameters.max, parameters.interval, parameters.samples, parameters.csv, parameters.delim, parameters.output, parameters.dataset#,\
    #[float(x) for x in parameters.eps.split(',')], parameters.title
    seed()
    perf_fname = "runtimes" + str(random())
    delim_dict = {0: '\t', 1: ' ', 2: ','}
    delimeter = delim_dict[delim_option]
    results_dir = 'experiments/results/'
    start_time = time()
    bfs_method = 'seq'

    print "creating link-server object"
    if delimeter == "\t":
        print "delimeter is tab"
    L = LinkServerCP(dataset,
                     edges_csv,
                     create_new=True,
                     prob_method=2,
                     prob=[0.1, 0.01],
                     delim=delimeter)
    V = LoadNodesFromFile(edges_csv, delimeter)
    n = len(V)
    print "Number of nodes: ", n
    running_times_file = results_dir + output + '-running_times-ratios_k_min-%.3f-k_max-%.3f-samples-%d' % (
        min_frac, max_frac, nSamples)
    running_times_file_raw = running_times_file + "-raw"
    removeFile(running_times_file)
    f = open(running_times_file, 'w')
    f_raw = open(running_times_file_raw, 'w')
    for nSeeds in xrange(int(min_frac * n), int(max_frac * n),
                         int(interval * n)):
        print "k = ", nSeeds
        seeds_fname = output + "-seeds-" + str(nSeeds) + '.cp'
Example #10
0
def print_out(text):
    if output_mode in [0, 2]:
        print text
    if output_mode in [1, 2]:
        f_output = open(output_file, 'a')
        f_output.write(text + '\n')
        f_output.close()


if __name__ == "__main__":
    parameters = parser.parse_args()
    dataset, seeds_file, results_file, output_mode, nSamples = parameters.dataset, parameters.seeds_file, parameters.results_file,\
      parameters.output_mode, parameters.nSamples

    L = LinkServerCP(dataset)
    f = open(seeds_file, 'r')

    assert (nSamples <= 0 or parameters.min_relative_standard_error <= 0)
    seeds_list = cp.load(f)
    f.close()
    removeFile(parameters.reached_nodes_file)
    if output_mode in [1, 2]:
        f = open(results_file, 'w')
    for i, seed_set in enumerate(seeds_list):
        if output_mode in [0, 2]:
            print "Sample ", i
        if parameters.get_n_reached == 1:
            try:
                avg, total_samples,l_n_reached = sequential_estimation(L, seed_set, max_samples_cap=nSamples, \
                                                                       nCores=parameters.cores,bReturnValues = True,\