def greedy_search(graph_rdd, graph, select_count, trials, iterations=float("inf")): max_influence = (set(), 0) for iteration in range(1, select_count+1): pairsRDD = graph_rdd.map(lambda x: (max_influence[0] | set([x]), cascade_trials(trials, max_influence[0] | set([x]), graph, iterations))) pairsRDD = pairsRDD.filter(lambda x: len(x[0]) == iteration) max_influence = pairsRDD.takeOrdered(1, key=lambda x: -x[1]['mean'])[0] return max_influence
def spark_trials(rdd,N, nodes, graph, max_iterations=float("inf")): rdd = rdd.map(lambda x: cascade_trials(N, nodes, graph, max_iterations)) return rdd.reduce(lambda x,y: {'time': x['time'] + y['time'],'mean': 0.5*x['mean'] + 0.5*y['mean'],'std': 0.5*x['std'] + 0.5*y['std']})