def pull_pipeline(info, pipeline_id, output_path, no_docs): """Copy the configuration of a registered pipeline""" p_api = ce_api.PipelinesApi(utils.api_client(info)) ws_api = ce_api.WorkspacesApi(utils.api_client(info)) active_user = info[constants.ACTIVE_USER] ws_id = info[active_user][constants.ACTIVE_WORKSPACE] all_ps = utils.api_call( ws_api.get_workspaces_pipelines_api_v1_workspaces_workspace_id_pipelines_get, ws_id) p_uuid = utils.find_closest_uuid(pipeline_id, all_ps) utils.declare('Pulling pipeline: {}'.format(utils.format_uuid(p_uuid))) pp = utils.api_call(p_api.get_pipeline_api_v1_pipelines_pipeline_id_get, pipeline_id=p_uuid) # Short term fix for these getting into the exp_config c = pp.pipeline_config if 'bq_args' in c: c.pop('bq_args') if 'ai_platform_training_args' in c: c.pop('ai_platform_training_args') utils.save_config(c, output_path, no_docs)
def get_eval_dir(p_uuid, r_uuid, info, d_path=None): ws_id = info[info[constants.ACTIVE_USER]][constants.ACTIVE_WORKSPACE] if d_path is None: d_path = os.path.join(click.get_app_dir(constants.APP_NAME), 'eval_evaluator', str(ws_id), str(p_uuid), str(r_uuid)) if os.path.exists(os.path.join(d_path, 'eval_config.json')): return d_path api = ce_api.PipelinesApi(api_client(info)) artifact = api_call( api. get_pipeline_artifacts_api_v1_pipelines_pipeline_id_runs_pipeline_run_id_artifacts_component_type_get, pipeline_id=p_uuid, pipeline_run_id=r_uuid, component_type=GDPComponent.Evaluator.name) # TODO: [LOW] artifact[1] hard-coded because of upgrade to 0.21.4 download_artifact(artifact[0].to_dict(), path=d_path) # replace google path with local path with open(os.path.join(d_path, 'eval_config.json'), 'r') as f: eval_config = json.load(f) # now override the google path to local path eval_config['modelLocations'][''] = d_path with open(os.path.join(d_path, 'eval_config.json'), 'w') as f: json.dump(eval_config, f) return d_path
def list_pipelines(info, pipeline_id, ignore_empty): """List of registered pipelines""" utils.notice('Fetching pipeline(s). This might take a few seconds... \n') active_user = info[constants.ACTIVE_USER] ws = info[active_user][constants.ACTIVE_WORKSPACE] ws_api = ce_api.WorkspacesApi(utils.api_client(info)) p_api = ce_api.PipelinesApi(utils.api_client(info)) d_api = ce_api.DatasourcesApi(utils.api_client(info)) pipelines = utils.api_call( ws_api.get_workspaces_pipelines_api_v1_workspaces_workspace_id_pipelines_get, ws) if pipeline_id is not None: pipeline_id = utils.find_closest_uuid(pipeline_id, pipelines) pipelines.sort(key=lambda x: x.created_at) for p in pipelines: write_check = (len(p.pipeline_runs) > 0 or not ignore_empty) and \ (pipeline_id is None or pipeline_id == p.id) if write_check: # THIS WHOLE THING IS HERE FOR A REASON!!!!!! title = 'PIPELINE NAME: {} PIPELINE ID: {}'.format( p.name, utils.format_uuid(p.id)) utils.declare(title) utils.declare('-' * len(title)) if len(p.pipeline_runs) == 0: click.echo('No runs for this pipeline yet!') else: table = [] for r in p.pipeline_runs: author = utils.api_call( p_api.get_pipeline_run_user_api_v1_pipelines_pipeline_id_runs_pipeline_run_id_user_get, p.id, r.id) # Resolve datasource ds_commit = utils.api_call( d_api.get_single_commit_api_v1_datasources_commits_commit_id_get, r.datasource_commit_id) ds = utils.api_call( d_api.get_datasource_api_v1_datasources_ds_id_get, ds_commit.datasource_id) table.append({ 'RUN ID': utils.format_uuid(r.id), 'TYPE': r.pipeline_run_type, 'CPUs PER WORKER': r.cpus_per_worker, 'WORKERS': r.workers, 'DATASOURCE': '{}_{}'.format( ds.name, utils.format_uuid(r.datasource_commit_id)), 'AUTHOR': author.email, 'CREATED AT': utils.format_date(r.start_time), }) click.echo(tabulate(table, headers='keys', tablefmt='plain')) click.echo('\n')
def get_pipeline_run(self, pipeline_id, pipeline_run_id) -> PipelineRun: api = ce_api.PipelinesApi(self.client) pr = api_utils.api_call( api. get_pipeline_run_api_v1_pipelines_pipeline_id_runs_pipeline_run_id_get, pipeline_id, pipeline_run_id) return PipelineRun(**pr.to_dict())
def get_pipeline_run_logs(self, pipeline_id, pipeline_run_id) -> PipelineRun: api = ce_api.PipelinesApi(self.client) logs_url = api_utils.api_call( api. get_pipeline_logs_api_v1_pipelines_pipeline_id_runs_pipeline_run_id_logs_get, pipeline_id, pipeline_run_id) return logs_url
def get_pipeline_runs(self, pipeline_id: Text, **kwargs) -> List[PipelineRun]: api = ce_api.PipelinesApi(self.client) pr_list = api_utils.api_call( api.get_pipeline_runs_api_v1_pipelines_pipeline_id_runs_get, pipeline_id) runs = [PipelineRun(**pr.to_dict()) for pr in pr_list] if kwargs: runs = client_utils.filter_objects(runs, **kwargs) return runs
def test_pipeline(self, pipeline_id: Text, datasource_id: Text = None, datasource_commit_id: Text = None, orchestration_backend: Text = None, orchestration_args: Dict = None, processing_backend: Text = None, processing_args: Dict = None, training_backend: Text = None, training_args: Dict = None, serving_backend: Text = None, serving_args: Dict = None) -> PipelineRun: if datasource_id is None is datasource_commit_id is None: assert ValueError('Please either define a datasource_id ' '(to pick the latest commit) or a ' 'datasource_commit_id to define a source.') ds_api = ce_api.DatasourcesApi(self.client) if datasource_id is not None: commits = api_utils.api_call( ds_api.get_commits_api_v1_datasources_ds_id_commits_get, datasource_id) commits.sort(key=lambda x: x.created_at) c_id = commits[-1].id elif datasource_commit_id is not None: c_id = datasource_commit_id else: raise LookupError('Hello there!') run_create = PipelineRun.creator( pipeline_run_type=PipelineRunTypes.test.name, datasource_commit_id=c_id, orchestration_backend=orchestration_backend, orchestration_args=orchestration_args, processing_backend=processing_backend, processing_args=processing_args, additional_args={ 'training_backend': training_backend, 'training_args': training_args, 'serving_backend': serving_backend, 'serving_args': serving_args }) p_api = ce_api.PipelinesApi(self.client) return api_utils.api_call( p_api.create_pipeline_run_api_v1_pipelines_pipeline_id_runs_post, run_create, pipeline_id)
def pull_pipeline(self, pipeline_id: Text) -> PipelineConfig: api = ce_api.PipelinesApi(self.client) pp = api_utils.api_call( api.get_pipeline_api_v1_pipelines_pipeline_id_get, pipeline_id=pipeline_id) c = pp.pipeline_config if GlobalKeys.BQ_ARGS_ in c: c.pop(GlobalKeys.BQ_ARGS_) if GlobalKeys.CUSTOM_CODE_ in c: c.pop(GlobalKeys.CUSTOM_CODE_) if 'ai_platform_training_args' in c: c.pop('ai_platform_training_args') return PipelineConfig(**c)
def logs_pipeline(info, source_id): """Get link to the logs of a pipeline""" p_uuid, r_uuid = utils.resolve_pipeline_runs(info, source_id) utils.notice( 'Generating logs url for the pipeline run ID {}. Please visit the ' 'url for all your logs.'.format(utils.format_uuid(r_uuid))) api = ce_api.PipelinesApi(utils.api_client(info)) logs_url = utils.api_call( api.get_pipeline_logs_api_v1_pipelines_pipeline_id_runs_pipeline_run_id_logs_get, pipeline_id=p_uuid, pipeline_run_id=r_uuid ) click.echo(logs_url)
def statistics_pipeline(info, pipeline_): """Serve the statistics of a pipeline run""" p_uuid, r_uuid = utils.resolve_pipeline_runs(info, pipeline_, run_type=PipelineRunTypes.training.name) utils.notice('Generating statistics for the pipeline run ID {}. If your ' 'browser opens up to a blank window, please refresh ' 'the page once.'.format(utils.format_uuid(r_uuid))) api = ce_api.PipelinesApi(utils.api_client(info)) stat_artifact = utils.api_call( api.get_pipeline_artifacts_api_v1_pipelines_pipeline_id_runs_pipeline_run_id_artifacts_component_type_get, pipeline_id=p_uuid, pipeline_run_id=r_uuid, component_type=GDPComponent.SplitStatistics.name) ws_id = info[info[constants.ACTIVE_USER]][constants.ACTIVE_WORKSPACE] path = Path(click.get_app_dir(constants.APP_NAME), 'statistics', str(ws_id), p_uuid, r_uuid) utils.download_artifact(artifact_json=stat_artifact[0].to_dict(), path=path) import tensorflow as tf from tensorflow_metadata.proto.v0 import statistics_pb2 import panel as pn result = {} for split in os.listdir(path): stats_path = os.path.join(path, split, 'stats_tfrecord') serialized_stats = next(tf.compat.v1.io.tf_record_iterator(stats_path)) stats = statistics_pb2.DatasetFeatureStatisticsList() stats.ParseFromString(serialized_stats) dataset_list = statistics_pb2.DatasetFeatureStatisticsList() for i, d in enumerate(stats.datasets): d.name = split dataset_list.datasets.append(d) result[split] = dataset_list h = utils.get_statistics_html(result) pn.serve(panels=pn.pane.HTML(h, width=1200), show=True)
def resolve_pipeline_runs(info, source_id, run_type=None): ws_id = info[info[constants.ACTIVE_USER]][constants.ACTIVE_WORKSPACE] ws_api = ce_api.WorkspacesApi(api_client(info)) p_api = ce_api.PipelinesApi(api_client(info)) if len(source_id.split(':')) == 2: pipeline_id, run_id = source_id.split(':') pipelines = api_call( ws_api. get_workspaces_pipelines_api_v1_workspaces_workspace_id_pipelines_get, ws_id) p_id = find_closest_uuid(pipeline_id, pipelines) runs = api_call( p_api.get_pipeline_runs_api_v1_pipelines_pipeline_id_runs_get, p_id) if run_type: runs = [r for r in runs if r.pipeline_run_type == run_type] r_id = find_closest_uuid(run_id, runs) elif len(source_id.split(':')) == 1: pipeline_id = source_id pipelines = api_call( ws_api. get_workspaces_pipelines_api_v1_workspaces_workspace_id_pipelines_get, ws_id) p_id = find_closest_uuid(pipeline_id, pipelines) runs = api_call( p_api.get_pipeline_runs_api_v1_pipelines_pipeline_id_runs_get, p_id) if run_type: runs = [r for r in runs if r.pipeline_run_type == run_type] runs.sort(key=lambda x: x.start_time) if runs: r_id = runs[-1].id else: r_id = None else: raise ValueError('Unresolvable pipeline ID') return p_id, r_id
def get_log_dir(p_uuid, r_uuid, info): # TODO: how do i kow that the pipeline is in this workspace, maybe i # changed the ws ws_id = info[info[constants.ACTIVE_USER]][constants.ACTIVE_WORKSPACE] d_path = os.path.join(click.get_app_dir(constants.APP_NAME), 'eval_trainer', str(ws_id), str(p_uuid), str(r_uuid)) if os.path.exists(os.path.join(d_path, 'eval_model_dir')): return d_path api = ce_api.PipelinesApi(api_client(info)) artifact = api_call( api. get_pipeline_artifacts_api_v1_pipelines_pipeline_id_runs_pipeline_run_id_artifacts_component_type_get, pipeline_id=p_uuid, pipeline_run_id=r_uuid, component_type=GDPComponent.Trainer.name) download_artifact(artifact[0].to_dict(), path=d_path) return d_path
def resolve_pipeline_creation(info, pipeline_type, pipeline_, datasource, orchestration_backend, orchestration_args, processing_backend, processing_args, force, additional_args): active_user = info[constants.ACTIVE_USER] # Initiate all required APIs p_api = ce_api.PipelinesApi(api_client(info)) # Resolving the datasource connection if datasource is not None: ds_id, c_id = resolve_datasource_commits(info, datasource) elif constants.ACTIVE_DATASOURCE_COMMIT in info[active_user]: ds_id, c_id = info[active_user][ constants.ACTIVE_DATASOURCE_COMMIT].split(':') else: raise AssertionError('Please either select an active datasource ' 'commit to work with or explicitly define it.') declare('Using Datasource Commit:{}'.format(format_uuid(c_id))) # Resolving the pipeline uuid pipeline_id, _ = resolve_pipeline_runs(info, pipeline_) run_create = PipelineRunCreate(pipeline_run_type=pipeline_type, datasource_commit_id=c_id, orchestration_backend=orchestration_backend, orchestration_args=orchestration_args, processing_backend=processing_backend, processing_args=processing_args, additional_args=additional_args) notice('Provisioning required resources. This might take a few minutes..') r = api_call( p_api.create_pipeline_run_api_v1_pipelines_pipeline_id_runs_post, run_create, pipeline_id) declare('Run created with ID: {id}!\n'.format(id=format_uuid(r.id))) declare("Use 'cengine pipeline status -p {}' to check on its " "status".format(format_uuid(pipeline_id)))
def push_pipeline(self, name: Text, workspace_id: Text, config: Union[Dict, PipelineConfig]) -> Pipeline: if isinstance(config, PipelineConfig): # config.check_completion() pass elif isinstance(config, dict): config = PipelineConfig(**config) # config.check_completion() else: raise ValueError('Please provide either a dict value or an ' 'instance of cengine.PipelineConfig for ' 'the config') api = ce_api.PipelinesApi(self.client) p = api_utils.api_call(func=api.create_pipeline_api_v1_pipelines_post, body=Pipeline.creator( name=name, pipeline_config=config.to_serial(), workspace_id=workspace_id)) return Pipeline(**p.to_dict())
def model_pipeline(info, pipeline_, output_path): """Download the trained model to a specified location""" if os.path.exists(output_path) and os.path.isdir(output_path): if not [f for f in os.listdir(output_path) if not f.startswith('.')] == []: utils.error("Output path must be an empty directory!") if os.path.exists(output_path) and not os.path.isdir(output_path): utils.error("Output path must be an empty directory!") if not os.path.exists(output_path): "Creating directory {}..".format(output_path) p_uuid, r_uuid = utils.resolve_pipeline_runs(info, pipeline_) utils.notice('Downloading the trained model from pipeline run ' 'ID {}. This might take some time if the model ' 'resources are significantly large in size.\nYour patience ' 'is much appreciated!'.format(utils.format_uuid(r_uuid))) api = ce_api.PipelinesApi(utils.api_client(info)) artifact = utils.api_call( api.get_pipeline_artifacts_api_v1_pipelines_pipeline_id_runs_pipeline_run_id_artifacts_component_type_get, pipeline_id=p_uuid, pipeline_run_id=r_uuid, component_type=GDPComponent.Deployer.name) spin = utils.Spinner() spin.start() if len(artifact) == 1: utils.download_artifact(artifact_json=artifact[0].to_dict(), path=output_path) spin.stop() else: utils.error('Something unexpected happened! Please contact ' '[email protected] to get further information.') utils.declare('Model downloaded to: {}'.format(output_path)) # TODO: [LOW] Make the Tensorflow version more dynamic utils.declare('Please note that the model is saved as a SavedModel ' 'Tensorflow artifact, trained on Tensoflow 2.1.0.')
def push_pipeline(info, config_path, pipeline_name): """Register a pipeline with the selected configuration""" active_user = info[constants.ACTIVE_USER] ws_id = info[active_user][constants.ACTIVE_WORKSPACE] try: with open(config_path, 'rt', encoding='utf8') as f: config = yaml.load(f) except: utils.error('Badly formatted YAML!') api = ce_api.PipelinesApi(utils.api_client(info)) p = utils.api_call(api.create_pipeline_api_v1_pipelines_post, PipelineCreate(name=pipeline_name, pipeline_config=config, workspace_id=ws_id)) utils.declare('Pipeline pushed successfully!'.format( id=utils.format_uuid(p.id))) utils.declare( "Use `cengine pipeline train {} --datasource DS_COMMIT` " "to launch a training pipeline!".format(utils.format_uuid(p.id)))
def download_model(self, pipeline_id, pipeline_run_id, output_path): if os.path.exists(output_path) and os.path.isdir(output_path): if not [ f for f in os.listdir(output_path) if not f.startswith('.') ] == []: raise NotADirectoryError("Output path must be an empty " "directory!") if os.path.exists(output_path) and not os.path.isdir(output_path): raise NotADirectoryError("Output path must be an empty directory!") if not os.path.exists(output_path): logging.info("Creating directory {}..".format(output_path)) # Resolve the pipeline run_id if pipeline_id is None or pipeline_run_id is None: raise ValueError('Please either a pipeline_id and a ' 'pipeline_run_id to choose a trained model.') p_api = ce_api.PipelinesApi(self.client) artifact = api_utils.api_call( p_api. get_pipeline_artifacts_api_v1_pipelines_pipeline_id_runs_pipeline_run_id_artifacts_component_type_get, pipeline_id=pipeline_id, pipeline_run_id=pipeline_run_id, component_type=GDPComponent.Deployer.name) spin = Spinner() spin.start() if len(artifact) == 1: download_artifact(artifact_json=artifact[0].to_dict(), path=output_path) spin.stop() else: raise Exception('Something unexpected happened! Please contact ' '[email protected] to get further information.') logging.info('Model downloaded to: {}'.format(output_path))
def infer_pipeline(self, pipeline_id: Text = None, pipeline_run_id: Text = None, datasource_id: Text = None, datasource_commit_id: Text = None, orchestration_backend: Text = None, orchestration_args: Dict = None, processing_backend: Text = None, processing_args: Dict = None) -> PipelineRun: # Resolve the pipeline run_id if pipeline_id is None is pipeline_run_id is None: raise ValueError('Please either define a pipeline_id ' '(to pick the latest training run) or a ' 'pipeline_run_id to choose a trained model.') p_api = ce_api.PipelinesApi(self.client) if pipeline_id is not None: runs = api_utils.api_call( p_api.get_pipeline_runs_api_v1_pipelines_pipeline_id_runs_get, pipeline_id) runs.sort(key=lambda x: x.run_time) training_runs = [ r for r in runs if r.pipeline_run_type == PipelineRunTypes.training.name ] if len(training_runs) == 0: raise ValueError('You dont have any training runs with the ' 'pipeline {}'.format(pipeline_id)) r_id = training_runs[-1].id elif pipeline_run_id is not None: # TODO: If you just have the pipeline_run_id, how do you get the # run without the pipeline_id? # TODO: We need to check whether we have a training run here r_id = pipeline_run_id else: raise LookupError('Hello there!') if datasource_id is None is datasource_commit_id is None: raise ValueError('Please either define a datasource_id ' '(to pick the latest commit) or a ' 'datasource_commit_id to define a source.') ds_api = ce_api.DatasourcesApi(self.client) if datasource_id is not None: commits = api_utils.api_call( ds_api.get_commits_api_v1_datasources_ds_id_commits_get, datasource_id) commits.sort(key=lambda x: x.created_at) c_id = commits[-1].id elif datasource_commit_id is not None: c_id = datasource_commit_id else: raise LookupError('General Kenobi!') run_create = PipelineRun.creator( pipeline_run_type=PipelineRunTypes.infer.name, datasource_commit_id=c_id, orchestration_backend=orchestration_backend, orchestration_args=orchestration_args, processing_backend=processing_backend, processing_args=processing_args, additional_args={'run_id': r_id}) p_api = ce_api.PipelinesApi(self.client) return api_utils.api_call( p_api.create_pipeline_run_api_v1_pipelines_pipeline_id_runs_post, run_create, pipeline_id)
def get_pipeline_status(self, workspace_id: Text, pipeline_id: Text = None) -> Dict: ws_api = ce_api.WorkspacesApi(self.client) p_api = ce_api.PipelinesApi(self.client) d_api = ce_api.DatasourcesApi(self.client) status_dict = {} pipelines = api_utils.api_call( ws_api. get_workspaces_pipelines_api_v1_workspaces_workspace_id_pipelines_get, workspace_id) pipelines.sort(key=lambda x: x.created_at) for p in pipelines: write_check = (len(p.pipeline_runs) > 0) and \ (pipeline_id is None or pipeline_id == p.id) if write_check: status_dict[p.id] = [] for r in p.pipeline_runs: run = api_utils.api_call( p_api. get_pipeline_run_api_v1_pipelines_pipeline_id_runs_pipeline_run_id_get, p.id, r.id) # Resolve datasource ds_commit = api_utils.api_call( d_api. get_single_commit_api_v1_datasources_commits_commit_id_get, r.datasource_commit_id) ds = api_utils.api_call( d_api.get_datasource_api_v1_datasources_ds_id_get, ds_commit.datasource_id) if run.end_time: td = run.end_time - run.start_time else: td = datetime.now(timezone.utc) - run.start_time status_dict[p.id].append({ 'RUN ID': run.id, 'TYPE': run.pipeline_run_type, 'STATUS': run.status, 'DATASOURCE': '{}_{}'.format(ds.name, run.datasource_commit_id), 'DATAPOINTS': '{}'.format(ds_commit.n_datapoints), 'START TIME': print_utils.format_date(run.start_time), 'DURATION': print_utils.format_timedelta(td), }) return status_dict
def get_statistics(self, pipeline_id: Text, pipeline_run_id: Text, magic: bool = False): api = ce_api.PipelinesApi(self.client) pipeline = api_utils.api_call( api.get_pipeline_api_v1_pipelines_pipeline_id_get, pipeline_id=pipeline_id) run = api_utils.api_call( api. get_pipeline_run_api_v1_pipelines_pipeline_id_runs_pipeline_run_id_get, pipeline_id=pipeline_id, pipeline_run_id=pipeline_run_id) stat_artifact = api_utils.api_call( api. get_pipeline_artifacts_api_v1_pipelines_pipeline_id_runs_pipeline_run_id_artifacts_component_type_get, pipeline_id=pipeline_id, pipeline_run_id=pipeline_run_id, component_type=GDPComponent.SplitStatistics.name) if run.pipeline_run_type != PipelineRunTypes.training.name: raise TypeError('The selected pipeline should be a training ' 'pipeline') workspace_id = pipeline.workspace_id path = Path(click.get_app_dir(constants.APP_NAME), 'statistics', workspace_id, pipeline_id, pipeline_run_id) download_artifact(artifact_json=stat_artifact[0].to_dict(), path=path) import tensorflow as tf from tensorflow_metadata.proto.v0 import statistics_pb2 import panel as pn result = {} for split in os.listdir(path): stats_path = os.path.join(path, split, 'stats_tfrecord') serialized_stats = next( tf.compat.v1.io.tf_record_iterator(stats_path)) stats = statistics_pb2.DatasetFeatureStatisticsList() stats.ParseFromString(serialized_stats) dataset_list = statistics_pb2.DatasetFeatureStatisticsList() for i, d in enumerate(stats.datasets): d.name = split dataset_list.datasets.append(d) result[split] = dataset_list h = get_statistics_html(result) if magic: import sys if 'ipykernel' not in sys.modules: raise EnvironmentError('The magic functions are only usable ' 'in a Jupyter notebook.') from IPython.core.display import display, HTML display(HTML(h)) else: pn.serve(panels=pn.pane.HTML(h, width=1200), show=True)
def get_pipeline_status(info, pipeline_id): """Get status of started pipelines""" utils.notice('Fetching pipeline(s). This might take a few seconds.. \n') active_user = info[constants.ACTIVE_USER] ws = info[active_user][constants.ACTIVE_WORKSPACE] ws_api = ce_api.WorkspacesApi(utils.api_client(info)) p_api = ce_api.PipelinesApi(utils.api_client(info)) d_api = ce_api.DatasourcesApi(utils.api_client(info)) pipelines = utils.api_call( ws_api.get_workspaces_pipelines_api_v1_workspaces_workspace_id_pipelines_get, ws) if pipeline_id is not None: pipeline_id = utils.find_closest_uuid(pipeline_id, pipelines) pipelines.sort(key=lambda x: x.created_at) for p in pipelines: write_check = (len(p.pipeline_runs) > 0) and \ (pipeline_id is None or pipeline_id == p.id) if write_check: title = 'PIPELINE NAME: {} PIPELINE ID: {}'.format( p.name, utils.format_uuid(p.id)) utils.declare(title) utils.declare('-' * len(title)) table = [] for r in p.pipeline_runs: run = utils.api_call( p_api.get_pipeline_run_api_v1_pipelines_pipeline_id_runs_pipeline_run_id_get, p.id, r.id) # Resolve datasource ds_commit = utils.api_call( d_api.get_single_commit_api_v1_datasources_commits_commit_id_get, r.datasource_commit_id) ds = utils.api_call( d_api.get_datasource_api_v1_datasources_ds_id_get, ds_commit.datasource_id) if run.end_time: td = run.end_time - run.start_time else: td = datetime.now(timezone.utc) - run.start_time # # Resolve component status # stage = utils.get_run_stage(run.pipeline_components) table.append({ 'RUN ID': utils.format_uuid(run.id), 'TYPE': run.pipeline_run_type, 'STATUS': run.status, # 'STAGE': stage, 'DATASOURCE': '{}_{}'.format( ds.name, utils.format_uuid(run.datasource_commit_id)), 'DATAPOINTS': '{}'.format(ds_commit.n_datapoints), # 'RUNNING STAGE': stage, 'START TIME': utils.format_date(run.start_time), 'DURATION': utils.format_timedelta(td), }) click.echo(tabulate(table, headers='keys', tablefmt='plain')) click.echo('\n')
def evaluate_single_pipeline(self, pipeline_id: Text, pipeline_run_id: Text, magic: bool = False): # Resolve the pipeline run_id if pipeline_id is None or pipeline_run_id is None: raise ValueError('Please either a pipeline_id and a ' 'pipeline_run_id to choose a trained model.') p_api = ce_api.PipelinesApi(self.client) pipeline = api_utils.api_call( p_api.get_pipeline_api_v1_pipelines_pipeline_id_get, pipeline_id=pipeline_id) workspace_id = pipeline.workspace_id trainer_path = os.path.join(click.get_app_dir(constants.APP_NAME), 'eval_trainer', workspace_id, pipeline_id, pipeline_run_id) eval_path = os.path.join(click.get_app_dir(constants.APP_NAME), 'eval_evaluator', workspace_id, pipeline_id, pipeline_run_id) artifact = api_utils.api_call( p_api. get_pipeline_artifacts_api_v1_pipelines_pipeline_id_runs_pipeline_run_id_artifacts_component_type_get, pipeline_id=pipeline_id, pipeline_run_id=pipeline_run_id, component_type=GDPComponent.Trainer.name) download_artifact(artifact[0].to_dict(), path=trainer_path) artifact = api_utils.api_call( p_api. get_pipeline_artifacts_api_v1_pipelines_pipeline_id_runs_pipeline_run_id_artifacts_component_type_get, pipeline_id=pipeline_id, pipeline_run_id=pipeline_run_id, component_type=GDPComponent.Evaluator.name) download_artifact(artifact[0].to_dict(), path=eval_path) # Patch to make it work locally import json with open(os.path.join(eval_path, 'eval_config.json'), 'r') as f: eval_config = json.load(f) eval_config['modelLocations'][''] = eval_path with open(os.path.join(eval_path, 'eval_config.json'), 'w') as f: json.dump(eval_config, f) if magic: from cengine.utils.shell_utils import create_new_cell model_block = evaluation.get_model_block(trainer_path) eval_block = evaluation.get_eval_block(eval_path) create_new_cell(eval_block) create_new_cell(model_block) else: nb = nbf.v4.new_notebook() nb['cells'] = [ nbf.v4.new_code_cell(evaluation.get_model_block(trainer_path)), nbf.v4.new_code_cell(evaluation.get_eval_block(eval_path)) ] config_folder = click.get_app_dir(constants.APP_NAME) if not (os.path.exists(config_folder) and os.path.isdir(config_folder)): os.makedirs(config_folder) final_out_path = os.path.join(config_folder, constants.EVALUATION_NOTEBOOK) s = nbf.writes(nb) if isinstance(s, bytes): s = s.decode('utf8') with open(final_out_path, 'w') as f: f.write(s) os.system('jupyter notebook "{}"'.format(final_out_path))