コード例 #1
0
def run_alg(sc, data_set_rdd, data_set_size, threshold, epsilon):
    return alg.alg(sc,
                   data_set_rdd,
                   data_set_size,
                   threshold,
                   epsilon,
                   randomized=True)
コード例 #2
0
def run_base(sc, data_set_rdd, data_set_size, threshold, epsilon):
    return alg.alg(sc,
                   data_set_rdd,
                   data_set_size,
                   threshold,
                   epsilon,
                   randomized=False)
コード例 #3
0
def exp1():
    _configure_log()
    conf = pyspark.SparkConf()
    conf.setMaster('local[4]')
    sc = pyspark.SparkContext(conf=conf)

    dataset_rdd = get_dataset_rdd(sc, INPUT_DATASET_PATH)
    log.info('Done loading data set from %s', INPUT_DATASET_PATH)

    log.info(
        'Configuration for randomized test: Threshold=%(threshold)d, epsilon=%(epsilon)s',
        dict(threshold=threshold, epsilon=epsilon))
    dataset_rdd.cache()
    data_set_size = dataset_rdd.count()
    log.info('dataset has %s records' % data_set_size)

    log.info('Starting test')
    start = time.time()
    res = alg.alg(sc, dataset_rdd, data_set_size, threshold, epsilon)
    end = time.time()
    log.info('Test ended and took %d seconds', int(end - start))

    output_path = os.path.join(LOGS_DIR, LATTICE_NAME)
    log.info('Saving lattice into path %s', output_path)
    frequents.Frequents.save(res, output_path)
    log.info('Lattice saved successfully')

    log.info('Freeing Spark context object')

    sc.stop()
    log.info('Experiement done')
コード例 #4
0
ファイル: test_alpha.py プロジェクト: eyalaltshuler/cis
def run_alg(sc, data_set_rdd, data_set_size, threshold, epsilon, alpha=0.1):
    return alg.alg(sc,
                   data_set_rdd,
                   data_set_size,
                   threshold,
                   epsilon,
                   randomized=True,
                   alpha=alpha)
コード例 #5
0
 def test_alg(self):
     threshold = 2
     epsilon = 0.1
     res = alg.alg(self.rdd, threshold, epsilon)