예제 #1
0
def run(dataset, model, simulations):
    graph, guaranteed = research_data.import_graph_data(dataset, model)
    # print("\nGuaranteed: {}".format(guaranteed))
    # size = find_opt_seed_size(graph, simulations, 100, dataset, model)
    size = optimal_size_mp.run(graph, dataset, model, 100, guaranteed,
                               simulations)
    print("Size: {}".format(size))
    # load saved seeds, compute occurrences
    file_path = "data/{0}/sim/{0}_{1}_sets.csv".format(dataset, model)
    seed_sets = []
    with open(file_path, 'r') as f:
        for line in f:
            data = line.strip("\n").strip(" ").split(" ")
            seed_sets.append(data)

    seeds = {}
    for set in seed_sets:
        for seed in set:
            if seed not in seeds:
                seeds[seed] = 1
            else:
                seeds[seed] += 1

    sorted_seeds = sorted(seeds.items(),
                          key=operator.itemgetter(1),
                          reverse=True)
    # print("\nSeeds: {}\n".format(sorted_seeds))
    opt_seed = []
    i = 0
    while len(opt_seed) < size:
        seed = int(sorted_seeds[i][0])
        if (seed not in guaranteed):
            opt_seed.append(seed)
        i += 1
    # select top k users and compute inf_score
    score = inf_score_est_mp(graph, opt_seed)
    file_path = "data/{}/sim/opt_seed_{}.csv".format(dataset, model)
    with open(file_path, 'w') as f:
        for seed in opt_seed:
            f.write(str(seed))
            f.write("\n")
    print("Optimal seed set saved to {}".format(file_path))

    msg = "Best seed set found score is: {} "
    print(msg.format(score))
예제 #2
0
def run(dataset):
    # step 1 load graph
    graph = import_graph_data(dataset)[0]

    # step 2 get keys and order them
    sorted_keys = sorted(graph.keys())

    # step 3 in new equivalency dict, assign index value to keys
    equivalency = {}
    for i in range(len(sorted_keys)):
        equivalency[sorted_keys[i]] = i

    # step 4 save graph back to file
    file_name = "data/{}/indexed_{}_wc.inf".format(dataset)
    with open(file_name, 'w') as f:
        for key in graph.keys():
            for neighbor in graph[key].keys():
                line = str(equivalency[key]) + ' '
                line += str(equivalency[neighbor]) + ' '
                line += str(graph[key][neighbor]) + "\n"
                f.write(line)
예제 #3
0
    print("Models \t\t {}".format(args.models))
    print("Reach \t\t {}".format(args.reach))
    print("Algorithm \t [{}]".format(args.algorithm))
    print("Simulations \t {}".format(args.simulations))
    print("Series \t\t {}".format(args.series))
    print("Pre-processing \t [{}]".format(args.pre))
    print("Live \t\t [{}]".format(args.live))
    print("----------------------------")

    # args = args.getArguments("Main")

    if (args.pre and args.algorithm != 'random_im'
            and args.algorithm != 'opt_size'):
        print("Launched {} pre-processing!".format(args.algorithm))
        for model in args.models:
            graph, _ = research_data.import_graph_data(args.dataset, model)
            if args.algorithm == 'rtim':
                rtim.run_pre_processing(graph, args.dataset, model, args.depth)

    if args.live and args.algorithm != 'opt_size':
        print("Launched {} live!".format(args.algorithm))
        for model in args.models:
            graph, _ = research_data.import_graph_data(args.dataset, model)
            for serie in args.series:
                if args.algorithm == 'rtim':
                    rtim.run_live(graph, args.dataset, model, serie)
                elif args.algorithm == 'rand_repeat':
                    random_im.run_repeat(graph, args.dataset, model, serie)
                elif args.algorithm == 'rand_no_repeat':
                    random_im.run_no_repeat(graph, args.dataset, model, serie)
예제 #4
0
def run(dataset, graph, lim):
    '''
    '''
    seeds = []
    spread = []

    best_seed = []
    for k in range(1, lim + 1):
        best = getBestSeed(graph, best_seed)
        best_seed = best[1]
        seeds.append(len(best_seed))
        spread.append(best[0])
        if best[0] >= float(len(graph.keys())):
            break

    plt.plot(seeds, spread, color='blue', label='inf score of seeds')
    plt.xlabel('seeds')
    plt.ylabel('spread')
    plt.title('Influence score vs seeds found')
    plt.savefig('data/{}/greedy.png'.format(dataset))
    plt.close()
    return best_seed


if __name__ == "__main__":
    graph = {}
    graph, _ = research_data.import_graph_data("small_graph", "wc")
    best = run("small_graph", graph, 11)
    print("Best: {}".format(best))
예제 #5
0
        raise Exception(msg.format(args.model))

    print(
        "-------------------------------------------------------------------")
    print("Importing [{}]".format(args.dataset))
    print("Model [{}]".format(args.model))
    print("Theta_ap [{}]".format(args.ap))
    print("Top [{}]".format(args.inf))

    theta_ap = args.ap
    # print("RTIM Pre-Process [{}]".format(args.preProc))
    # print("RTIM Live [{}]".format(args.live))

    print("---")
    graph = {}
    graph, _ = research_data.import_graph_data(args.dataset, args.model)

    graph_values = {}
    for node in graph.keys():
        graph_values[node] = {'inf': 0, 'ap': 0}

    import_inf_scores(args.dataset, args.model, graph_values)
    print(graph_values)
    # inf_scores = inf_score_array(graph_values)
    #
    # theta_inf_index = int(inf_threshold_index(inf_scores, args.inf))
    # theta_inf = inf_scores[theta_inf_index]
    # # msg = "Influence threshold index {}, value {}"
    # # print(msg.format(theta_inf_index, theta_inf))
    #
    # seed = set()
예제 #6
0
def run_no_repeat(graph, dataset, model, serie, max_size=float('inf')):
    '''
        Runs Random IM with non repeat property: it doesn't retarget a user
        that has already been targeted
        max_size = number of users that can be targeted
    '''
    print("> Running Random IM on {}/{}/s{}".format(dataset, model, serie))
    seed = set()
    # read from random_models
    file_name = 'data/{0}/random_model/{0}_s{1}.csv'.format(dataset, serie)
    with open(file_name, 'r') as f:
        reader = csv.reader(f)
        for line in reader:
            user = int(line[0])
            if target() and user not in seed and len(seed) <= max_size:
                seed.add(user)

    print(": Finished targeting!")
    inf_spread = inf_score_est_mp(graph, seed)
    print("Influence spread is {}".format(inf_spread))
    save_seed('rand_no_repeat', seed, inf_spread, dataset, model, serie)
    save_data('rand_no_repeat', dataset, model, serie, len(seed), inf_spread)
    print(": Finished running Random IM.")


if __name__ == "__main__":
    graph = {}
    graph, _ = research_data.import_graph_data('small_graph')

    run(graph, 'small_graph', 'wc', 0)
예제 #7
0
if __name__ == "__main__":
    '''
        pass argument to test RTIM with Python dic or Neo4J database
        second argument is file or database name to define data to use
    '''
    parser = argparse.ArgumentParser(description="RTIM Queue")
    parser.add_argument('-f',
                        '--file',
                        default="hep",
                        help="File name to choose graph from")
    parser.add_argument("--model", default="wc", help="Model to use")
    args = parser.parse_args()

    print(
        "-------------------------------------------------------------------")
    if args.model not in research_data.valid_models():
        msg = "Invalid arguments [model] -> Received: {}"
        raise Exception(msg.format(args.model))

    msg = "Pre-processing graph using RTIM\n"
    msg += "Use model: {}".format(args.model)
    print(msg)

    print("---")
    graph = {}
    graph, _ = research_data.import_graph_data(args.file, args.model)

    rtim_inf_scores(graph, args.file, args.model)
    print("---")