def setUpClass(cls): """Set up the database once for the test run.""" cls.sc = pyspark.SparkContext(master="local[*]") raw_pings = list(generate_pings()) aggregates = _aggregate_metrics(cls.sc.parallelize(raw_pings), num_reducers=10) submit_aggregates(aggregates)
def setup_module(): global aggregates global sc sc = pyspark.SparkContext(master="local[*]") raw_pings = list(generate_pings()) aggregates = _aggregate_metrics(sc.parallelize(raw_pings)) submit_aggregates(aggregates)
def setup_module(): global aggregates global sc sc = pyspark.SparkContext(master="local[*]") raw_pings = list(generate_pings()) aggregates = _aggregate_metrics(sc.parallelize(raw_pings), num_reducers=10) submit_aggregates(aggregates)
def setUpClass(cls): """Set up the database once for the test run.""" clear_db() cls.sc = pyspark.SparkContext(master="local[*]") raw_pings = list(generate_pings()) aggregates = _aggregate_metrics(cls.sc.parallelize(raw_pings), num_reducers=10) submit_aggregates(aggregates)
def setup_module(): global aggregates global sc logger = logging.getLogger("py4j") logger.setLevel(logging.ERROR) sc = pyspark.SparkContext(master="local[*]") raw_pings = list(generate_pings()) aggregates = _aggregate_metrics(sc.parallelize(raw_pings))
def setup_module(): global build_id_aggregates global submission_date_aggregates logger = logging.getLogger("py4j") logger.setLevel(logging.ERROR) sc = pyspark.SparkContext(master="local[*]") raw_pings = list(d.generate_pings()) build_id_aggregates, submission_date_aggregates = _aggregate_metrics(sc.parallelize(raw_pings), num_reducers=10) build_id_aggregates = build_id_aggregates.collect() submission_date_aggregates = submission_date_aggregates.collect() # Note: most tests are based on the build-id aggregates as the aggregation # code is the same for both scenarios. sc.stop()
def setup_module(): global build_id_aggregates global submission_date_aggregates logger = logging.getLogger("py4j") logger.setLevel(logging.ERROR) sc = pyspark.SparkContext(master="local[*]") raw_pings = list(d.generate_pings()) build_id_aggregates, submission_date_aggregates = _aggregate_metrics( sc.parallelize(raw_pings), num_reducers=10) build_id_aggregates = build_id_aggregates.collect() submission_date_aggregates = submission_date_aggregates.collect() # Note: most tests are based on the build-id aggregates as the aggregation # code is the same for both scenarios. sc.stop()
def aggregates(sc): logger = logging.getLogger("py4j") logger.setLevel(logging.ERROR) raw_pings = list(d.generate_pings()) return _aggregate_metrics(sc.parallelize(raw_pings), num_reducers=10)
def aggregates(sc): raw_pings = list(generate_pings()) aggregates = _aggregate_metrics(sc.parallelize(raw_pings), num_reducers=10) submit_aggregates(aggregates) return aggregates