Beispiel #1
0
def exp2():
    conf = pyspark.SparkConf()
    conf.setMaster('local[4]')
    sc = pyspark.SparkContext(conf=conf)
    data = get_dataset_rdd(sc, INPUT_DATASET_PATH)
    data.cache()
    data_set_size = data.count()
    threshold = 50000 / float(data_set_size)

    print 'Starting alg-fpgrowth test'
    start = time()
    res = algfpgrowth.alg_fp_growth(data, threshold, 4)
    end = time()
    print 'alg-fp-growth test ended and took %d seconds' % int(end - start)
    pickle.dump(res, open(OUTPUT_PATH_RAND, "w"))
    sc.stop()
Beispiel #2
0
def run_spark(data_set_rdd, threshold, num_of_partitions):
    return algfpgrowth.alg_fp_growth(data_set_rdd, threshold,
                                     num_of_partitions)
Beispiel #3
0
 def test_alg(self):
     threshold = 2 / 5
     res = algfpgrowth.alg_fp_growth(self.rdd, threshold, 2)