def run_alg(sc, data_set_rdd, data_set_size, threshold, epsilon): return alg.alg(sc, data_set_rdd, data_set_size, threshold, epsilon, randomized=True)
def run_base(sc, data_set_rdd, data_set_size, threshold, epsilon): return alg.alg(sc, data_set_rdd, data_set_size, threshold, epsilon, randomized=False)
def exp1(): _configure_log() conf = pyspark.SparkConf() conf.setMaster('local[4]') sc = pyspark.SparkContext(conf=conf) dataset_rdd = get_dataset_rdd(sc, INPUT_DATASET_PATH) log.info('Done loading data set from %s', INPUT_DATASET_PATH) log.info( 'Configuration for randomized test: Threshold=%(threshold)d, epsilon=%(epsilon)s', dict(threshold=threshold, epsilon=epsilon)) dataset_rdd.cache() data_set_size = dataset_rdd.count() log.info('dataset has %s records' % data_set_size) log.info('Starting test') start = time.time() res = alg.alg(sc, dataset_rdd, data_set_size, threshold, epsilon) end = time.time() log.info('Test ended and took %d seconds', int(end - start)) output_path = os.path.join(LOGS_DIR, LATTICE_NAME) log.info('Saving lattice into path %s', output_path) frequents.Frequents.save(res, output_path) log.info('Lattice saved successfully') log.info('Freeing Spark context object') sc.stop() log.info('Experiement done')
def run_alg(sc, data_set_rdd, data_set_size, threshold, epsilon, alpha=0.1): return alg.alg(sc, data_set_rdd, data_set_size, threshold, epsilon, randomized=True, alpha=alpha)
def test_alg(self): threshold = 2 epsilon = 0.1 res = alg.alg(self.rdd, threshold, epsilon)