def test_report_invalid(mock_pubsub, reporter, value): with tempfile.TemporaryDirectory() as tmpdir: mlf.set_tracking_uri(f'file:{tmpdir}/foo') _reset_experiment() mlflow_cfg = { 'experiment_name': 'foo', 'run_name': 'bar', 'artifact_location': '/foo/bar', } with uv.start_run(**mlflow_cfg) as active_run, uv.active_reporter( reporter()) as r: assert r is not None steps = [{ 'step': 1, 'm': { 'a': value, } }] for p in steps: for k, v in p['m'].items(): with pytest.raises(ValueError): r.report(step=p['step'], k=k, v=v)
def test_report_param(mock_pubsub, reporter): with tempfile.TemporaryDirectory() as tmpdir: mlf.set_tracking_uri(f'file:{tmpdir}/foo') _reset_experiment() mlflow_cfg = { 'experiment_name': 'foo', 'run_name': 'bar', 'artifact_location': '/foo/bar', } with uv.start_run(**mlflow_cfg) as active_run, uv.active_reporter( reporter()) as r: assert r is not None param = {'a': 3.14159} r.report_param(k='a', v=param['a']) assert mlf.active_run() == active_run # we need to access the run via a client to inspect most params/tags/metrics client = mlf.tracking.MlflowClient() run = client.get_run(active_run.info.run_id) assert run is not None for k, v in param.items(): p = run.data.params assert k in p assert p[k] == str(v)
def main(): experiment_name = f'{getpass.getuser()}_query_tutorial' with uv.active_reporter(MLFlowReporter()): _run_experiments(experiment_name) client = mlflow.tracking.MlflowClient() experiment = client.get_experiment_by_name(experiment_name) # get all of our runs where we set mean=0 mean_zero_runs = client.search_runs( experiment_ids=[experiment.experiment_id], filter_string='params.mean = "0"', ) # extract metric data metrics = {} for r in mean_zero_runs: run_id = r.info.run_id run_name = r.data.tags['mlflow.runName'] metrics[run_name] = { k: _get_metric_array(client, run_id, k) for k in ('x', 'y') } # generate a simple plot and save it to file for k, v in metrics.items(): plt.plot(v['x'], v['y'], label=k) plt.legend() plt.grid(True) plt.title('UV/MLFlow Tutorial') outdir = '../../docs/_static/img/' if not os.path.exists(outdir): outdir = '.' outfile = os.path.join(outdir, 'mlflow_query_tutorial.png') plt.savefig(outfile) print(f'plot saved to {outfile}') # we can also log this plot as an artifact artifact_run = mean_zero_runs[0] run_id = artifact_run.info.run_id client.log_artifact(run_id=run_id, local_path=outfile) print(f'artifact saved to run {artifact_run.data.tags["mlflow.runName"]}') # you can list a job's artifacts using list_artifacts() print(f'artifacts for run_id {run_id}:') artifacts = client.list_artifacts(run_id=run_id) for a in artifacts: print(f'{a}') # to retrieve an artifact, use client.download_artifacts() download_path = '.' client.download_artifacts(run_id=run_id, path=artifacts[0].path, dst_path='.')
def test_report(mock_pubsub, reporter): with tempfile.TemporaryDirectory() as tmpdir: mlf.set_tracking_uri(f'file:{tmpdir}/foo') _reset_experiment() mlflow_cfg = { 'experiment_name': 'foo', 'run_name': 'bar', 'artifact_location': '/foo/bar', } with uv.start_run(**mlflow_cfg) as active_run, uv.active_reporter( reporter()) as r: assert r is not None steps = [{ 'step': 1, 'm': { 'a': 3, 'b': 3.141 } }, { 'step': 2, 'm': { 'a': 6, 'b': 6.282 } }] for p in steps: for k, v in p['m'].items(): r.report(step=p['step'], k=k, v=v) assert mlf.active_run() == active_run # we need to access the run via a client to inspect most params/tags/metrics client = mlf.tracking.MlflowClient() run = client.get_run(active_run.info.run_id) assert run is not None metrics = run.data.metrics metric_data = {} # check that the metrics are in the run data for k, v in steps[0]['m'].items(): assert k in metrics metric_data[k] = { x.step: x.value for x in client.get_metric_history(active_run.info.run_id, k) } for s in steps: cur_step = s['step'] for k, v in s['m'].items(): assert metric_data[k][cur_step] == v
def test_report_params(mock_pubsub, reporter): with tempfile.TemporaryDirectory() as tmpdir: mlf.set_tracking_uri(f'file:{tmpdir}/foo') _reset_experiment() mlflow_cfg = { 'experiment_name': 'foo', 'run_name': 'bar', 'artifact_location': '/foo/bar', } with uv.start_run(**mlflow_cfg) as active_run, uv.active_reporter( reporter()) as r: assert r is not None params = { 'a': 3, 'b': 'string_param', INVALID_KEY: INVALID_PARAM_VALUE, } r.report_params(params) assert mlf.active_run() == active_run # we need to access the run via a client to inspect most params/tags/metrics client = mlf.tracking.MlflowClient() run = client.get_run(active_run.info.run_id) assert run is not None for k, v in params.items(): if k == INVALID_KEY: k = SANITIZED_KEY if v == INVALID_PARAM_VALUE: v = SANITIZED_PARAM_VALUE p = run.data.params assert k in p assert p[k] == str(v)
def main(**kwargs): with uv.start_run(), uv.active_reporter(MLFlowReporter()): uv.report_params(kwargs) _run_training(**kwargs)
def test_log_metrics(mock_pubsub): '''tests the log_metrics() funtion in mlflow_subscriber''' # test exception handling by passing invalid message and mlflow client log_metrics(None, None, True) with tempfile.TemporaryDirectory() as tmpdir: mlf.set_tracking_uri(f'file:{tmpdir}/foo') _reset_experiment() mlflow_cfg = { 'experiment_name': 'foo', 'run_name': 'bar', 'artifact_location': '/foo/bar', } with uv.start_run(**mlflow_cfg), uv.active_reporter( MLFlowPubsubReporter('p', 't')) as r: active_run = mlf.active_run() steps = [{ 'step': 1, 'm': { 'a': 3, 'b': 3.141 } }, { 'step': 2, 'm': { 'a': 6, 'b': 6.282 } }] client = mlf.tracking.MlflowClient() assert len(_MSG_QUEUE) == 0 for s in steps: r.report_all(**s) assert len(_MSG_QUEUE) == len(steps) for m in _MSG_QUEUE: log_metrics(client, m, True) run = client.get_run(active_run.info.run_id) assert run is not None metrics = run.data.metrics metric_data = {} # check that the metrics are in the run data for k, v in steps[0]['m'].items(): assert k in metrics metric_data[k] = { x.step: x.value for x in client.get_metric_history(active_run.info.run_id, k) } for s in steps: cur_step = s['step'] for k, v in s['m'].items(): assert metric_data[k][cur_step] == v
def main(_): """Builds and trains a sentiment classification RNN.""" # prevent tf from accessing GPU tf.config.experimental.set_visible_devices([], "GPU") # Get and save config config = argparser.parse_args('main') logging.info(json.dumps(config, indent=2)) with uv.start_run( experiment_name=config['save']['mlflow_expname'], run_name=config['save']['mlflow_runname']), uv.active_reporter( MLFlowReporter()): reporters.save_config(config) uv.report_params(reporters.flatten(config)) prng_key = random.PRNGKey(config['run']['seed']) # Load data. vocab_size, train_dset, test_dset = data.get_dataset(config['data']) # Build network. cell = model_utils.get_cell(config['model']['cell_type'], num_units=config['model']['num_units']) init_fun, apply_fun, _, _ = network.build_rnn( vocab_size, config['model']['emb_size'], cell, config['model']['num_outputs']) loss_fun, acc_fun = optim_utils.loss_and_accuracy( apply_fun, config['model'], config['optim']) _, initial_params = init_fun( prng_key, (config['data']['batch_size'], config['data']['max_pad'])) initial_params = model_utils.initialize(initial_params, config['model']) # get optimizer opt, get_params, opt_state, step_fun = optim_utils.optimization_suite( initial_params, loss_fun, config['optim']) ## Scope setup # Reporter setup data_store = {} reporter = reporters.build_reporters(config['save'], data_store) # Static state for scope static_state = { 'acc_fun': acc_fun, 'loss_fun': loss_fun, 'param_extractor': get_params, 'test_set': test_dset } oscilloscope = m.MetricCallback(static_state) def interval_trigger(interval): def function_to_return(x): return x % interval == 0 return function_to_return oscilloscope.add_measurement({ 'name': 'test_acc', 'trigger': interval_trigger(config['save']['measure_test']), 'function': measurements.measure_test_acc }) oscilloscope.add_measurement({ 'name': 'shuffled_test_acc', 'trigger': interval_trigger(config['save']['measure_test']), 'function': measurements.measure_shuffled_acc }) oscilloscope.add_measurement({ 'name': 'train_acc', 'trigger': interval_trigger(config['save']['measure_train']), 'function': measurements.measure_batch_acc }) oscilloscope.add_measurement({ 'name': 'train_loss', 'trigger': interval_trigger(config['save']['measure_train']), 'function': measurements.measure_batch_loss }) oscilloscope.add_measurement({ 'name': 'l2_norm', 'trigger': interval_trigger(config['save']['measure_test']), 'function': measurements.measure_l2_norm }) # Train global_step = 0 loss = np.nan for epoch in range(config['optim']['num_epochs']): for batch_num, batch in enumerate(tfds.as_numpy(train_dset)): dynamic_state = { 'opt_state': opt_state, 'batch_train_loss': loss, 'batch': batch } step_measurements = oscilloscope.measure( int(global_step), dynamic_state) if step_measurements is not None: reporter.report_all(int(global_step), step_measurements) global_step, opt_state, loss = step_fun( global_step, opt_state, batch) if global_step % config['save']['checkpoint_interval'] == 0: params = get_params(opt_state) np_params = np.asarray(params, dtype=object) reporters.save_dict(config, np_params, f'checkpoint_{global_step}') final_measurements = oscilloscope.measure( int(global_step), dynamic_state, measurement_list=['test_acc', 'shuffled_test_acc']) reporter.report_all(int(global_step), final_measurements) final_params = { 'params': np.asarray(get_params(opt_state), dtype=object) } reporters.save_dict(config, final_params, 'final_params')