Python read_notebook Exemples, papermill.read_notebook Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_notebooks_gpu.py Projet : David-Li-L/recommenders

def test_wide_deep(notebooks, tmp):
    notebook_path = notebooks["wide_deep"]

    params = {
        "MOVIELENS_DATA_SIZE": "100k",
        "EPOCHS": 1,
        "EVALUATE_WHILE_TRAINING": False,
        "MODEL_DIR": tmp,
        "EXPORT_DIR_BASE": tmp,
        "RATING_METRICS": ["rmse", "mae"],
        "RANKING_METRICS": ["ndcg_at_k", "precision_at_k"],
    }
    pm.execute_notebook(
        notebook_path, OUTPUT_NOTEBOOK, kernel_name=KERNEL_NAME, parameters=params
    )
    results = pm.read_notebook(OUTPUT_NOTEBOOK).dataframe.set_index("name")["value"]

    # Model performance is highly dependant on the initial random weights
    # when epochs is small with a small dataset.
    # Therefore, in the smoke-test context, rather check if the model training is working
    # with minimum performance metrics as follows:
    assert results["rmse"] < 2.0
    assert results["mae"] < 2.0
    assert results["ndcg_at_k"] > 0.0
    assert results["precision_at_k"] > 0.0

Exemple #2

0

Afficher le fichier

Fichier : test_notebooks_gpu.py Projet : David-Li-L/recommenders

def test_notebook_dkn(notebooks):
    notebook_path = notebooks["dkn_quickstart"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
        parameters=dict(epoch=1),
    )
    results = pm.read_notebook(OUTPUT_NOTEBOOK).dataframe.set_index("name")["value"]

    assert results["res"]["auc"] == pytest.approx(0.4707, rel=TOL, abs=ABS_TOL)
    assert results["res"]["acc"] == pytest.approx(0.5725, rel=TOL, abs=ABS_TOL)

Exemple #3

0

Afficher le fichier

Fichier : test_notebooks_python.py Projet : David-Li-L/recommenders

def test_template_runs(notebooks):
    notebook_path = notebooks["template"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        parameters=dict(PM_VERSION=pm.__version__),
        kernel_name=KERNEL_NAME,
    )
    nb = pm.read_notebook(OUTPUT_NOTEBOOK)
    df = nb.dataframe
    assert df.shape[0] == 2
    check_version = df.loc[df["name"] == "checked_version", "value"].values[0]
    assert check_version is True

Exemple #4

0

Afficher le fichier

Fichier : test_notebooks_gpu.py Projet : David-Li-L/recommenders

def test_fastai_integration(notebooks, size, epochs, expected_values):
    notebook_path = notebooks["fastai"]
    pm.execute_notebook(notebook_path, OUTPUT_NOTEBOOK, kernel_name=KERNEL_NAME)
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
        parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE=size, EPOCHS=epochs),
    )
    results = pm.read_notebook(OUTPUT_NOTEBOOK).dataframe.set_index("name")["value"]

    for key, value in expected_values.items():
        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)

Exemple #5

0

Afficher le fichier

Fichier : test_notebooks_pyspark.py Projet : David-Li-L/recommenders

def test_mmlspark_lightgbm_criteo_smoke(notebooks):
    notebook_path = notebooks["mmlspark_lightgbm_criteo"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
        parameters=dict(
            DATA_SIZE="sample",
            NUM_ITERATIONS=50,
            EARLY_STOPPING_ROUND=10
        )
    )
    nb = pm.read_notebook(OUTPUT_NOTEBOOK)
    results = nb.dataframe.set_index("name")["value"]
    assert results["auc"] == pytest.approx(0.68895, rel=TOL, abs=ABS_TOL)

Exemple #6

0

Afficher le fichier

Fichier : test_notebooks_gpu.py Projet : David-Li-L/recommenders

def test_ncf_smoke(notebooks):
    notebook_path = notebooks["ncf"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
        parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE="100k", EPOCHS=1, BATCH_SIZE=256),
    )
    results = pm.read_notebook(OUTPUT_NOTEBOOK).dataframe.set_index("name")["value"]

    # There is too much variability to do an approx equal, just adding top values
    assert results["map"] < 0.05
    assert results["ndcg"] < 0.20
    assert results["precision"] < 0.17
    assert results["recall"] < 0.10

Exemple #7

0

Afficher le fichier

Fichier : test_notebook_utils.py Projet : David-Li-L/recommenders

def test_is_jupyter():
    # Test on the terminal
    assert is_jupyter() is False
    assert is_databricks() is False

    # Test on Jupyter notebook
    path = os.path.join("tests", "unit", "test_notebook_utils.ipynb")
    pm.execute_notebook(
        path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
    )
    nb = pm.read_notebook(OUTPUT_NOTEBOOK)
    df = nb.dataframe
    result_is_jupyter = df.loc[df["name"] == "is_jupyter", "value"].values[0]
    assert result_is_jupyter is True
    result_is_databricks = df.loc[df["name"] == "is_databricks", "value"].values[0]
    assert result_is_databricks is False

Exemple #8

0

Afficher le fichier

Fichier : test_notebooks_gpu.py Projet : David-Li-L/recommenders

def test_fastai(notebooks):
    notebook_path = notebooks["fastai"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
        parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE="100k", EPOCHS=1),
    )
    results = pm.read_notebook(OUTPUT_NOTEBOOK).dataframe.set_index("name")["value"]

    assert results["rmse"] == pytest.approx(0.959352, rel=TOL, abs=ABS_TOL)
    assert results["mae"] == pytest.approx(0.766504, rel=TOL, abs=ABS_TOL)
    assert results["rsquared"] == pytest.approx(0.287902, rel=TOL, abs=ABS_TOL)
    assert results["exp_var"] == pytest.approx(0.289008, rel=TOL, abs=ABS_TOL)
    assert results["map"] == pytest.approx(0.024379, rel=TOL, abs=ABS_TOL)
    assert results["ndcg"] == pytest.approx(0.148380, rel=TOL, abs=ABS_TOL)
    assert results["precision"] == pytest.approx(0.138494, rel=TOL, abs=ABS_TOL)
    assert results["recall"] == pytest.approx(0.058747, rel=TOL, abs=ABS_TOL)

Exemple #9

0

Afficher le fichier

Fichier : test_notebooks_gpu.py Projet : David-Li-L/recommenders

def test_wide_deep(notebooks, size, epochs, expected_values, tmp):
    notebook_path = notebooks["wide_deep"]

    params = {
        "MOVIELENS_DATA_SIZE": size,
        "EPOCHS": epochs,
        "EVALUATE_WHILE_TRAINING": False,
        "MODEL_DIR": tmp,
        "EXPORT_DIR_BASE": tmp,
        "RATING_METRICS": ["rmse", "mae", "rsquared", "exp_var"],
        "RANKING_METRICS": ["ndcg_at_k", "map_at_k", "precision_at_k", "recall_at_k"],
    }
    pm.execute_notebook(
        notebook_path, OUTPUT_NOTEBOOK, kernel_name=KERNEL_NAME, parameters=params
    )
    results = pm.read_notebook(OUTPUT_NOTEBOOK).dataframe.set_index("name")["value"]

    for key, value in expected_values.items():
        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)

Exemple #10

0

Afficher le fichier

Fichier : test_notebooks_gpu.py Projet : David-Li-L/recommenders

def test_notebook_xdeepfm(notebooks):
    notebook_path = notebooks["xdeepfm_quickstart"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
        parameters=dict(
            EPOCHS_FOR_SYNTHETIC_RUN=20,
            EPOCHS_FOR_CRITEO_RUN=1,
            BATCH_SIZE_SYNTHETIC=128,
            BATCH_SIZE_CRITEO=512,
        ),
    )
    results = pm.read_notebook(OUTPUT_NOTEBOOK).dataframe.set_index("name")["value"]

    assert results["res_syn"]["auc"] == pytest.approx(0.982, rel=TOL, abs=ABS_TOL)
    assert results["res_syn"]["logloss"] == pytest.approx(0.2306, rel=TOL, abs=ABS_TOL)
    assert results["res_real"]["auc"] == pytest.approx(0.628, rel=TOL, abs=ABS_TOL)
    assert results["res_real"]["logloss"] == pytest.approx(0.5589, rel=TOL, abs=ABS_TOL)

Exemple #11

0

Afficher le fichier

Fichier : test_notebooks_pyspark.py Projet : David-Li-L/recommenders

def test_als_pyspark_smoke(notebooks):
    notebook_path = notebooks["als_pyspark"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
        parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE="100k"),
    )
    nb = pm.read_notebook(OUTPUT_NOTEBOOK)
    results = nb.dataframe.set_index("name")["value"]
    start_or_get_spark("ALS PySpark").stop()

    assert results["map"] == pytest.approx(0.0052, rel=TOL, abs=ABS_TOL)
    assert results["ndcg"] == pytest.approx(0.0463, rel=TOL, abs=ABS_TOL)
    assert results["precision"] == pytest.approx(0.0487, rel=TOL, abs=ABS_TOL)
    assert results["recall"] == pytest.approx(0.0177, rel=TOL, abs=ABS_TOL)
    assert results["rmse"] == pytest.approx(0.9636, rel=TOL, abs=ABS_TOL)
    assert results["mae"] == pytest.approx(0.7508, rel=TOL, abs=ABS_TOL)
    assert results["exp_var"] == pytest.approx(0.2672, rel=TOL, abs=ABS_TOL)
    assert results["rsquared"] == pytest.approx(0.2611, rel=TOL, abs=ABS_TOL)

Exemple #12

0

Afficher le fichier

Fichier : test_notebooks_pyspark.py Projet : David-Li-L/recommenders

def test_als_pyspark_integration(notebooks):
    notebook_path = notebooks["als_pyspark"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
        parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE="1m"),
    )
    nb = pm.read_notebook(OUTPUT_NOTEBOOK)
    results = nb.dataframe.set_index("name")["value"]
    start_or_get_spark("ALS PySpark").stop()

    assert results["map"] == pytest.approx(0.00201, rel=TOL, abs=ABS_TOL)
    assert results["ndcg"] == pytest.approx(0.02516, rel=TOL, abs=ABS_TOL)
    assert results["precision"] == pytest.approx(0.03172, rel=TOL, abs=ABS_TOL)
    assert results["recall"] == pytest.approx(0.009302, rel=TOL, abs=ABS_TOL)
    assert results["rmse"] == pytest.approx(0.8621, rel=TOL, abs=ABS_TOL)
    assert results["mae"] == pytest.approx(0.68023, rel=TOL, abs=ABS_TOL)
    assert results["exp_var"] == pytest.approx(0.4094, rel=TOL, abs=ABS_TOL)
    assert results["rsquared"] == pytest.approx(0.4038, rel=TOL, abs=ABS_TOL)

Exemple #13

0

Afficher le fichier

Fichier : test_notebooks_gpu.py Projet : shainaraza/recommenders-1

def test_lightgcn_deep_dive_integration(notebooks, yaml_file, data_path, size,
                                        epochs, batch_size, expected_values,
                                        seed):
    notebook_path = notebooks["lightgcn_deep_dive"]
    pm.execute_notebook(
        notebook_path,
        OUTPUT_NOTEBOOK,
        kernel_name=KERNEL_NAME,
        parameters=dict(
            TOP_K=10,
            MOVIELENS_DATA_SIZE=size,
            EPOCHS=epochs,
            BATCH_SIZE=batch_size,
            SEED=seed,
            yaml_file=yaml_file,
            user_file=os.path.join(data_path, r"user_embeddings"),
            item_file=os.path.join(data_path, r"item_embeddings"),
        ),
    )
    results = pm.read_notebook(OUTPUT_NOTEBOOK).dataframe.set_index(
        "name")["value"]

    for key, value in expected_values.items():
        assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL)

Exemple #14

0

Afficher le fichier

Fichier : test_notebooks_gpu.py Projet : shainaraza/recommenders-1

def test_npa_quickstart_integration(notebooks, epochs, seed, MIND_type,
                                    expected_values):
    notebook_path = notebooks["npa_quickstart"]

    params = {"epochs": epochs, "seed": seed, "MIND_type": MIND_type}
    pm.execute_notebook(notebook_path,
                        OUTPUT_NOTEBOOK,
                        kernel_name=KERNEL_NAME,
                        parameters=params)
    results = pm.read_notebook(OUTPUT_NOTEBOOK).dataframe.set_index(
        "name")["value"]
    for key, value in expected_values.items():
        assert results[key]["group_auc"] == pytest.approx(value["group_auc"],
                                                          rel=TOL,
                                                          abs=ABS_TOL)
        assert results[key]["mean_mrr"] == pytest.approx(value["mean_mrr"],
                                                         rel=TOL,
                                                         abs=ABS_TOL)
        assert results[key]["ndcg@5"] == pytest.approx(value["ndcg@5"],
                                                       rel=TOL,
                                                       abs=ABS_TOL)
        assert results[key]["ndcg@10"] == pytest.approx(value["ndcg@10"],
                                                        rel=TOL,
                                                        abs=ABS_TOL)

Exemple #15

0

Afficher le fichier

Fichier : test_notebooks_gpu.py Projet : shainaraza/recommenders-1

def test_slirec_quickstart_integration(notebooks, yaml_file, data_path, epochs,
                                       batch_size, expected_values, seed):
    notebook_path = notebooks["slirec_quickstart"]

    params = {
        "yaml_file": yaml_file,
        "data_path": data_path,
        "EPOCHS": epochs,
        "BATCH_SIZE": batch_size,
        "RANDOM_SEED": seed,
    }
    pm.execute_notebook(notebook_path,
                        OUTPUT_NOTEBOOK,
                        kernel_name=KERNEL_NAME,
                        parameters=params)
    results = pm.read_notebook(OUTPUT_NOTEBOOK).dataframe.set_index(
        "name")["value"]
    for key, value in expected_values.items():
        assert results[key]["auc"] == pytest.approx(value["auc"],
                                                    rel=TOL,
                                                    abs=ABS_TOL)
        assert results[key]["logloss"] == pytest.approx(value["logloss"],
                                                        rel=TOL,
                                                        abs=ABS_TOL)

Exemple #16

0

Afficher le fichier

Fichier : __init__.py Projet : saket1994/dagster

    def _t_fn(info, inputs):
        base_dir = '/tmp/dagstermill/{run_id}/'.format(
            run_id=info.context.run_id)
        output_notebook_dir = os.path.join(base_dir, 'output_notebooks/')

        if not os.path.exists(output_notebook_dir):
            os.makedirs(output_notebook_dir)

        temp_path = os.path.join(
            output_notebook_dir,
            '{prefix}-out.ipynb'.format(prefix=str(uuid.uuid4())))

        try:
            _source_nb = pm.execute_notebook(
                notebook_path,
                temp_path,
                parameters=dict(dm_context=serialize_dm_context(info, inputs)),
            )

            output_nb = pm.read_notebook(temp_path)

            info.context.debug(
                'Notebook execution complete for {name}. Data is {data}'.
                format(name=name, data=output_nb.data))

            for output_def in info.solid_def.output_defs:
                if output_def.name in output_nb.data:

                    value = unmarshal_value(output_def.runtime_type,
                                            output_nb.data[output_def.name])

                    yield Result(value, output_def.name)

        finally:
            if do_cleanup and os.path.exists(temp_path):
                os.remove(temp_path)

Exemple #17

0

Afficher le fichier

    def _t_fn(transform_context, inputs):
        check.inst_param(transform_context, 'transform_context',
                         TransformExecutionContext)
        check.param_invariant(
            isinstance(transform_context.environment_dict, dict),
            'context',
            'SystemTransformExecutionContext must have valid environment_dict',
        )

        system_transform_context = transform_context.get_system_context()

        base_dir = '/tmp/dagstermill/{run_id}/'.format(
            run_id=transform_context.run_id)
        output_notebook_dir = os.path.join(base_dir, 'output_notebooks/')

        if not os.path.exists(output_notebook_dir):
            os.makedirs(output_notebook_dir)

        temp_path = os.path.join(
            output_notebook_dir,
            '{prefix}-out.ipynb'.format(prefix=str(uuid.uuid4())))

        output_log_path = os.path.join(base_dir, 'run.log')

        try:
            nb = load_notebook_node(notebook_path)
            nb_no_parameters = replace_parameters(
                system_transform_context,
                nb,
                get_papermill_parameters(system_transform_context, inputs,
                                         output_log_path),
            )
            intermediate_path = os.path.join(
                output_notebook_dir,
                '{prefix}-inter.ipynb'.format(prefix=str(uuid.uuid4())))
            write_ipynb(nb_no_parameters, intermediate_path)

            with open(output_log_path, 'a') as f:
                f.close()

            process = subprocess.Popen(
                [
                    'papermill', '--log-output', '--log-level', 'ERROR',
                    intermediate_path, temp_path
                ],
                stderr=subprocess.PIPE,
            )
            _stdout, stderr = process.communicate()
            while process.poll() is None:  # while subprocess alive
                if system_transform_context.event_callback:
                    with open(output_log_path, 'r') as ff:
                        current_time = os.path.getmtime(output_log_path)
                        while process.poll() is None:
                            new_time = os.path.getmtime(output_log_path)
                            if new_time != current_time:
                                line = ff.readline()
                                if not line:
                                    break
                                event_record_dict = json.loads(line)

                                system_transform_context.event_callback(
                                    EventRecord(**event_record_dict))
                                current_time = new_time

            if process.returncode != 0:
                raise DagstermillError(
                    'There was an error when Papermill tried to execute the notebook. '
                    'The process stderr is \'{stderr}\''.format(stderr=stderr))

            output_nb = pm.read_notebook(temp_path)

            system_transform_context.log.debug(
                'Notebook execution complete for {name}. Data is {data}'.
                format(name=name, data=output_nb.data))

            yield Materialization(
                '{name} output notebook'.format(
                    name=transform_context.solid.name), temp_path)

            for output_def in system_transform_context.solid_def.output_defs:
                if output_def.name in output_nb.data:

                    value = read_value(output_def.runtime_type,
                                       output_nb.data[output_def.name])

                    yield Result(value, output_def.name)

        finally:
            if do_cleanup and os.path.exists(temp_path):
                os.remove(temp_path)

Exemple #18

0

Afficher le fichier

Fichier : tuning_param_framework.py Projet : you-n-g/code_tools_repo

for cname in GB_ATTRS:
    # convert to str for grouping.
    sum_df[cname] = sum_df[cname].astype(np.str)

sum_df['exp_n'] = sum_df.groupby(GB_ATTRS)['RES_PATH'].transform('count')


# XXX read the result from res_path
# read the pm data
for i, row in sum_df.iterrows():
    display(sum_df[i:i + 1])
    res_nb_path = os.path.join(row['RES_PATH'], 'script.ipynb')

    # READ  the config
    res_nb = pm.read_notebook(res_nb_path)
    res_nb.display_output('XXXX')

g_sum_df = sum_df.groupby(GB_ATTRS).mean()

# Check the affect of every parameters
for attr in GB_ATTRS:
    print(attr, '=' * 20)
    display(sum_df.groupby(attr).mean().loc[:, 'pretrain_group5_model_ar':'ft_group5_wr'])
    display(sum_df.groupby(attr).count().loc[:, 'pretrain_group5_model_ar':'ft_group5_wr'])

# END   exp_summary.py --------------------------------------------------


# BEGIN jupyter.ipynb -----------------------------------------------
# https://github.com/nteract/papermill

Exemple #19

0

Afficher le fichier

Fichier : test_api.py Projet : kelli-jean/papermill

    def test_bad_file_ext(self):

        with self.assertRaises(PapermillException):
            read_notebook('result_notebook.py')

Exemple #20

0

Afficher le fichier

parser = argparse.ArgumentParser(prog='SAT')
parser.add_argument("heuristic", type=int,
                    help="[1] DP basic, [2] MOMs, [3] JeroSloWang, [4] Logistic regression",
                    default=1)
parser.add_argument("infile", type=str,
                    help="DIMACS file including all constraints to satisfy",
                    default="constraint_problem.txt")

args = parser.parse_args()

argMapping = {1:"random", 2:"moms", 3:"jerow", 4:"logreg"}

print("Finding satisfying interpretation of", args.infile, "using", argMapping[args.heuristic], "splits...")
pm.execute_notebook(
   'SAT_book.ipynb',
   'output.ipynb',
   parameters = dict(heuristic=argMapping[args.heuristic], infile=args.infile)
)

nb = pm.read_notebook('output.ipynb')
df = nb.dataframe
solution = next(iter((df[df['name']=='solution']['value'].values)))

if solution:
    print("Saved solution to file: solution.txt")   
else:
    print("No solution found")

with open("solution.txt", "w") as outfile:
    for s in solution:
        outfile.write('' + str(s) + ' 0\n')

Exemple #21

0

Afficher le fichier

Fichier : execute_notebook.py Projet : zuoky/serverless_notebooks

    paramaters if os.environ["ACCEPT_PARAMETERS"] else None,
    progress_bar=False,
    log_output=False,
    report_mode=False)

## Get return type for notebook from 'Http_Accept' header
## if none return empty string (stdout)
notebook_return_type = os.environ.get('Http_Accept', "").split(",")[0]

## Get all the cell outputs from notebook
## iterate backwards and retrieve output that
## meets 'Http_Accept' header, if none match
## return last output
##
## If 'Http_Accept' is */* or "" default to stdout
output_notebook = papermill.read_notebook(os.environ["OUTPUT_NOTEBOOK"])
outputs=sum([list(c["outputs"]) \
             if "outputs" in c.keys() else [] \
             for c in output_notebook.node.cells],[])

for cell in outputs[::-1]:
    if (cell["output_type"] == "error"):
        print(cell["traceback"])
        break

    elif (cell["output_type"] == "stream") and \
         (notebook_return_type in ["*/*", ""]):
        print(cell["text"])
        break

    elif (cell["output_type"] in ["display_data", "execute_result"]):

Exemple #22

0

Afficher le fichier

Fichier : __init__.py Projet : shcheklein/dagster

    def _t_fn(info, inputs):
        check.param_invariant(
            isinstance(info.context.environment_config, dict),
            'info',
            'TransformExecutionInfo must have valid environment_config',
        )

        base_dir = '/tmp/dagstermill/{run_id}/'.format(
            run_id=info.context.run_id)
        output_notebook_dir = os.path.join(base_dir, 'output_notebooks/')

        if not os.path.exists(output_notebook_dir):
            os.makedirs(output_notebook_dir)

        temp_path = os.path.join(
            output_notebook_dir,
            '{prefix}-out.ipynb'.format(prefix=str(uuid.uuid4())))

        output_log_path = os.path.join(base_dir, 'run.log')

        try:
            nb = load_notebook_node(notebook_path)
            nb_no_parameters = replace_parameters(
                info, nb,
                get_papermill_parameters(info, inputs, output_log_path))
            intermediate_path = os.path.join(
                output_notebook_dir,
                '{prefix}-inter.ipynb'.format(prefix=str(uuid.uuid4())))
            write_ipynb(nb_no_parameters, intermediate_path)

            with open(output_log_path, 'a') as f:
                f.close()

            # info.log.info("Output log path is {}".format(output_log_path))
            # info.log.info("info.context.event_callback {}".format(info.context.event_callback))

            process = subprocess.Popen(
                ["papermill", intermediate_path, temp_path])
            # _source_nb = pm.execute_notebook(intermediate_path, temp_path)

            while process.poll() is None:  # while subprocess alive
                if info.context.event_callback:
                    with open(output_log_path, 'r') as ff:
                        current_time = os.path.getmtime(output_log_path)
                        while process.poll() is None:
                            new_time = os.path.getmtime(output_log_path)
                            if new_time != current_time:
                                line = ff.readline()
                                if not line:
                                    break
                                event_record_dict = json.loads(line)

                                event_record_dict['event_type'] = EventType(
                                    event_record_dict['event_type'])
                                info.context.event_callback(
                                    EventRecord(**event_record_dict))
                                current_time = new_time

            if process.returncode != 0:
                # Throw event that is an execution error!
                info.log.debug("There was an error in Papermill!")
                info.log.debug('stderr was None'
                               if process.stderr is None else process.stderr)
                exit()

            output_nb = pm.read_notebook(temp_path)

            info.log.debug(
                'Notebook execution complete for {name}. Data is {data}'.
                format(name=name, data=output_nb.data))

            info.log.info(
                "Output notebook path is {}".format(output_notebook_dir))

            for output_def in info.solid_def.output_defs:
                if output_def.name in output_nb.data:

                    value = read_value(output_def.runtime_type,
                                       output_nb.data[output_def.name])

                    yield Result(value, output_def.name)

        finally:
            if do_cleanup and os.path.exists(temp_path):
                os.remove(temp_path)

Exemple #23

0

Afficher le fichier

Fichier : input_types.py Projet : lengxupa/hublib

def get_outputs_df(nbname):
    df = pm.read_notebook(nbname).dataframe
    df = df.loc[df['type'] == 'record'].set_index('name')
    df.drop(['type', 'filename'], axis=1, inplace=True)
    return df

Exemple #24

0

Afficher le fichier

Fichier : input_types.py Projet : lengxupa/hublib

def get_output_files(dirname):
    return [pm.read_notebook(x) for x in glob('%s/*.ipynb' % dirname)]

Exemple #25

0

Afficher le fichier

if outdir is "":
    outdir = os.getcwd()

if not os.path.isdir(outdir):
    print("output directory", outdir, "does not exist. Creating.")
    os.makedirs(outdir)

notebooks = {"hello_world": args.input}

notebook_path = notebooks["hello_world"]
pm.execute_notebook(notebook_path,
                    OUTPUT_NOTEBOOK,
                    kernel_name=KERNEL_NAME,
                    parameters=dict(x=args.x, y=args.y))

nb = pm.read_notebook(OUTPUT_NOTEBOOK)

## Now log thing via AML
try:
    from azureml.core import Run
    run = Run.get_context()
except ImportError:
    run = None

print('*** run value is:', run)


def _log(metric, value):
    if run is not None:
        print('logging variables with AML logging functions.')
        if type(value) == list and len(value) > 0 and (type(

Exemple #26

0

Afficher le fichier

Fichier : wide_deep.py Projet : David-Li-L/recommenders

if params['TOP_K'] <= 0:
    raise ValueError("Top K should be larger than 0")

if params['MODEL_TYPE'] not in {'wide', 'deep', 'wide_deep'}:
    raise ValueError("Model type should be either 'wide', 'deep', or 'wide_deep'")

if params['DATA_DIR'] is None:
    raise ValueError("Datastore path should be given")

print("Args:")
for k, v in params.items():
    _log(k, v)


print("Run", NOTEBOOK_NAME)

pm.execute_notebook(
    NOTEBOOK_NAME,
    OUTPUT_NOTEBOOK,
    parameters=params,
    kernel_name='python3'
)
nb = pm.read_notebook(OUTPUT_NOTEBOOK)

for m, v in nb.data.items():
    _log(m, v)

# clean-up
os.remove(OUTPUT_NOTEBOOK)
shutil.rmtree(params['MODEL_DIR'], ignore_errors=True)