Esempio n. 1
0
def process_recipe(path,
                   tdir,
                   dataframe_source,
                   cov_report_file,
                   test_case,
                   spark_session):
    """Prepare the recipe and trigger the recipe execution.
    """
    with open(path) as f:
        code = f.read()
    code_w_reporting = prepare(code)
    cov_dict = {}
    cov_dict["cov_report_file"] = cov_report_file
    cov_dict["test_case"] = test_case

    globals_dict = {
        "BIRGITTA_SPARK_SESSION_TYPE": "LOCAL",
        "BIRGITTA_TEST_COVERAGE": cov_dict,
        "BIRGITTA_DBG_COUNTS": dbg_counts()
    }
    full_code = script_prepend.code(tdir) + code_w_reporting
    dump_test_recipe(test_case, tdir, full_code)
    timing.time("execute_recipe before exec")
    runner.exec_code(full_code, globals_dict)
    timing.time("execute_recipe after exec")
Esempio n. 2
0
def default_server_session(*, conf):
    """Don't override app_name, since context might have given it
    a useful name."""
    session = (SparkSession.builder
               .config(conf=conf)
               .getOrCreate())
    timing.time("spark.default_server_session created/gotten")
    return session
Esempio n. 3
0
def write_fixtures(fixtures, variant_name, spark_session, dataframe_source):
    """Write fixtures to storage.

    Args:
        fixtures (dict): Dict of fixtures
        variant_name (str): Name of fixture variant
        spark_session (SparkSession): Spark session used to create fixtures
        dataframe_source (DataframeSource): The source to write to, e.g. FS
    """
    timing.time("write_fixtures start")
    dfs = dataframes(fixtures, variant_name, spark_session)
    for ds_name in dfs.keys():
        dataframe_source.write(dfs[ds_name], ds_name)
    timing.time("write_fixtures end")
Esempio n. 4
0
def log_entry(test_case, line_no, line_str, report_file, metrics):
    """Log a report entry for line of the recipe.
    """
    timing.time(line_str)
    if dbg_counts() and (metrics['var_type'] == 'DataFrame'):
        print("l:", line_no, repr(line_str), "count:", metrics['count'])
    with open(report_file, 'a') as f:
        json_dict = {
            "test_case": test_case,
            "line_no": line_no,
            "line_str": line_str,
            "metrics": metrics,
        }
        f.write(json.dumps(json_dict))
        f.write("\n")
Esempio n. 5
0
def local_session(*, app_name='birgitta_spark_test'):
    """Get a local spark session. Used for recipe tests,
    both running them, and creating fixtures."""
    conf = local_conf_spark()
    # Sets the Spark master URL to connect to, such as:
    #
    #   "local" to run locally,
    #   "local[4]" to run locally with 4 cores,
    #   local[*] Run Spark locally with as many worker threads as logical cores
    #   on your machine,
    #   "spark://89.9.250.25:7077" or "spark://master:7077" to run on a Spark
    #   standalone cluster.
    master_spark_url = 'local[*]'
    session = (SparkSession.builder
               .config(conf=conf)
               .master(master_spark_url)
               .appName(app_name)
               .getOrCreate())
    timing.time("spark.local_session created/gotten")
    return session
Esempio n. 6
0
def run_case(tmpdir,
             cov_report_file,
             cov_results,  # noqa F811
             test_case,
             spark_session,
             in_fixtures,
             out_fixtures,
             recipe_path,
             fixture_name):
    """Run a test case.

    Does the following:

    * Reporting and timing.
    * Setup input and result fixtures.
    * Run the spark script in recipe_path.
    * Asserts the outputs.
    * Collects and presents report.

    Returns:
        None
    """
    timing.time("run_case_fn start: %s" % (fixture_name))
    tdir = tmpdir.strpath
    dataframe_source = LocalSource(dataset_dir=tdir)
    fixture_name = test_case[5:]  # Lose 'test_' prefix
    fixturing.write_fixtures(in_fixtures,
                             fixture_name,
                             spark_session,
                             dataframe_source)
    expected_dfs = fixturing.dataframes(out_fixtures,
                                        fixture_name,
                                        spark_session)
    localtest.process_recipe(recipe_path,
                             tdir,
                             dataframe_source,
                             cov_report_file,
                             test_case,
                             spark_session)
    timing.time("runcase_fn run_script done: %s" % (fixture_name))
    assertion.assert_outputs(expected_dfs,
                             dataframe_source,
                             spark_session)
    report.collect(cov_report_file, test_case, cov_results)
    timing.time("run_case_fn end: %s" % (fixture_name))
    timing.print_results(test_case)