def to_dsl(ctx, include, output_path, **kwargs): """ This command will run pipeline, make sure data is uploaded """ ctx.obj.update(**kwargs) ctx.obj.post_process() namespace = ctx.obj["namespace"] config_inst = ctx.obj["config"] yes = ctx.obj["yes"] echo.welcome() echo.echo(f"converting namespace: {namespace}", fg='red') for path in include: echo.echo(f"pipeline path: {os.path.abspath(path)}") if not yes and not click.confirm("running?"): return config_yaml_file = './examples/config.yaml' temp_file_path = f'./logs/{namespace}/temp_pipeline.py' for i in include: try: convert(i, temp_file_path, config_yaml_file, output_path, config_inst) except Exception: exception_id = uuid.uuid1() echo.echo(f"exception_id={exception_id}") LOGGER.exception(f"exception id: {exception_id}") finally: echo.stdout_newline() echo.farewell() echo.echo(f"converting namespace: {namespace}", fg='red')
def download_mnists(ctx, output_path, **kwargs): """ download mnist data for flow """ ctx.obj.update(**kwargs) ctx.obj.post_process() namespace = ctx.obj["namespace"] config_inst = ctx.obj["config"] yes = ctx.obj["yes"] echo.welcome() echo.echo(f"testsuite namespace: {namespace}", fg='red') if output_path is None: config = get_config(config_inst) output_path = str(config.data_base_dir) + "/examples/data/" if not yes and not click.confirm("running?"): return try: download_mnist(Path(output_path), "mnist_train") download_mnist(Path(output_path), "mnist_eval", is_train=False) except Exception: exception_id = uuid.uuid1() echo.echo(f"exception_id={exception_id}") LOGGER.exception(f"exception id: {exception_id}") finally: echo.stdout_newline() echo.farewell() echo.echo(f"testsuite namespace: {namespace}", fg='red')
def run_benchmark(ctx, include, exclude, glob, skip_data, tol, clean_data, storage_tag, history_tag, match_details, **kwargs): """ process benchmark suite, alias: bq """ ctx.obj.update(**kwargs) ctx.obj.post_process() namespace = ctx.obj["namespace"] config_inst = ctx.obj["config"] config_inst.extend_sid = ctx.obj["extend_sid"] config_inst.auto_increasing_sid = ctx.obj["auto_increasing_sid"] if clean_data is None: clean_data = config_inst.clean_data data_namespace_mangling = ctx.obj["namespace_mangling"] yes = ctx.obj["yes"] echo.welcome("benchmark") echo.echo(f"testsuite namespace: {namespace}", fg='red') echo.echo("loading testsuites:") suites = _load_testsuites(includes=include, excludes=exclude, glob=glob, suffix="benchmark.json", suite_type="benchmark") for suite in suites: echo.echo(f"\tdataset({len(suite.dataset)}) benchmark groups({len(suite.pairs)}) {suite.path}") if not yes and not click.confirm("running?"): return with Clients(config_inst) as client: fate_version = client["guest_0"].get_version() for i, suite in enumerate(suites): # noinspection PyBroadException try: start = time.time() echo.echo(f"[{i + 1}/{len(suites)}]start at {time.strftime('%Y-%m-%d %X')} {suite.path}", fg='red') if not skip_data: try: _upload_data(client, suite, config_inst) except Exception as e: raise RuntimeError(f"exception occur while uploading data for {suite.path}") from e try: _run_benchmark_pairs(config_inst, suite, tol, namespace, data_namespace_mangling, storage_tag, history_tag, fate_version, match_details) except Exception as e: raise RuntimeError(f"exception occur while running benchmark jobs for {suite.path}") from e if not skip_data and clean_data: _delete_data(client, suite) echo.echo(f"[{i + 1}/{len(suites)}]elapse {timedelta(seconds=int(time.time() - start))}", fg='red') except Exception: exception_id = uuid.uuid1() echo.echo(f"exception in {suite.path}, exception_id={exception_id}", err=True, fg='red') LOGGER.exception(f"exception id: {exception_id}") finally: echo.stdout_newline() echo.farewell() echo.echo(f"testsuite namespace: {namespace}", fg='red')
def data_save(data_info, table_names, namespaces, partition_list): data_count = 0 for idx, data_name in enumerate(data_info.keys()): label_flag = True if 'guest' in data_info[data_name] else False data_type = 'dense' if 'guest' in data_info[data_name] else host_data_type if split_host and ('host' in data_info[data_name]): host_end_num = int(np.ceil(host_data_size / len(data_info))) * (data_count + 1) if np.ceil( host_data_size / len(data_info)) * (data_count + 1) <= host_data_size else host_data_size host_start_num = int(np.ceil(host_data_size / len(data_info))) * data_count data_count += 1 else: host_end_num = host_data_size host_start_num = 0 out_path = os.path.join(str(big_data_dir), data_name) if os.path.exists(out_path) and os.path.isfile(out_path) and not parallelize: if force: remove_file(out_path) else: echo.echo('{} Already exists'.format(out_path)) continue data_i = (idx + 1) / len(data_info) downLoad = f'dataget [{"#" * int(24 * data_i)}{"-" * (24 - int(24 * data_i))}] {idx + 1}/{len(data_info)}' start = time.time() progress = data_progress(downLoad, start) thread = threading.Thread(target=run, args=[progress]) thread.start() try: if 'guest' in data_info[data_name]: if not parallelize: _generate_dens_data(out_path, guest_start_num, guest_end_num, guest_feature_num, label_flag, progress) else: _generate_parallelize_data(guest_start_num, guest_end_num, guest_feature_num, table_names[idx], namespaces[idx], label_flag, data_type, partition_list[idx], progress) else: if data_type == 'tag' and not parallelize: _generate_tag_data(out_path, host_start_num, host_end_num, host_feature_num, sparsity, progress) elif data_type == 'tag_value' and not parallelize: _generate_tag_value_data(out_path, host_start_num, host_end_num, host_feature_num, progress) elif data_type == 'dense' and not parallelize: _generate_dens_data(out_path, host_start_num, host_end_num, host_feature_num, label_flag, progress) elif parallelize: _generate_parallelize_data(host_start_num, host_end_num, host_feature_num, table_names[idx], namespaces[idx], label_flag, data_type, partition_list[idx], progress) progress.set_switch(False) time.sleep(1) except Exception: exception_id = uuid.uuid1() echo.echo(f"exception_id={exception_id}") LOGGER.exception(f"exception id: {exception_id}") finally: progress.set_switch(False) echo.stdout_newline()
def _upload_data(clients: Clients, suite, config: Config, output_path=None): with click.progressbar(length=len(suite.dataset), label="dataset", show_eta=False, show_pos=True, width=24) as bar: for i, data in enumerate(suite.dataset): data.update(config) table_name = data.config['table_name'] if data.config.get( 'table_name', None) is not None else data.config.get('name') data_progress = DataProgress( f"{data.role_str}<-{data.config['namespace']}.{table_name}") def update_bar(n_step): bar.item_show_func = lambda x: data_progress.show() time.sleep(0.1) bar.update(n_step) def _call_back(resp): if isinstance(resp, UploadDataResponse): data_progress.submitted(resp.job_id) echo.file(f"[dataset]{resp.job_id}") if isinstance(resp, QueryJobResponse): data_progress.update() update_bar(0) try: echo.stdout_newline() status, data_path = clients[data.role_str].upload_data( data, _call_back, output_path) time.sleep(1) data_progress.update() if status != 'success': raise RuntimeError( f"uploading {i + 1}th data for {suite.path} {status}") bar.update(1) if _config.data_switch: from fate_test.scripts import generate_mock_data generate_mock_data.remove_file(data_path) except Exception: exception_id = str(uuid.uuid1()) echo.file(f"exception({exception_id})") LOGGER.exception(f"exception id: {exception_id}") echo.echo( f"upload {i + 1}th data {data.config} to {data.role_str} fail, exception_id: {exception_id}" )
def _delete_data(clients: Clients, suite: Testsuite): with click.progressbar(length=len(suite.dataset), label="delete ", show_eta=False, show_pos=True, width=24) as bar: for data in suite.dataset: # noinspection PyBroadException try: bar.item_show_func = \ lambda x: f"delete table: name={data.config['table_name']}, namespace={data.config['namespace']}" clients[data.role_str].delete_data(data) except Exception: LOGGER.exception( f"delete failed: name={data.config['table_name']}, namespace={data.config['namespace']}" ) time.sleep(0.5) bar.update(1) echo.stdout_newline()
def _upload_data(clients: Clients, suite, config: Config): with click.progressbar(length=len(suite.dataset), label="dataset", show_eta=False, show_pos=True, width=24) as bar: for i, data in enumerate(suite.dataset): data.update(config) data_progress = DataProgress( f"{data.role_str}<-{data.config['namespace']}.{data.config['table_name']}" ) def update_bar(n_step): bar.item_show_func = lambda x: data_progress.show() time.sleep(0.1) bar.update(n_step) def _call_back(resp): if isinstance(resp, UploadDataResponse): data_progress.submitted(resp.job_id) echo.file(f"[dataset]{resp.job_id}") if isinstance(resp, QueryJobResponse): data_progress.update() update_bar(0) try: echo.stdout_newline() response = clients[data.role_str].upload_data(data, _call_back) data_progress.update() if not response.status.is_success(): raise RuntimeError( f"uploading {i + 1}th data for {suite.path} {response.status}" ) bar.update(1) except Exception as e: exception_id = str(uuid.uuid1()) echo.file(f"exception({exception_id})") LOGGER.exception(f"exception id: {exception_id}") echo.echo( f"upload {i + 1}th data {data.config} to {data.role_str} fail, exception_id: {exception_id}" )
def query_component_output_data(clients: Clients, config: Config, component_name, job_id, role, party_id): roles = config.role clients_role = None for k, v in roles.items(): if int(party_id) in v and k == role: clients_role = role + "_" + str(v.index(int(party_id))) try: if clients_role is None: raise ValueError(f"party id {party_id} does not exist") try: table_info = clients[clients_role].output_data_table( job_id=job_id, role=role, party_id=party_id, component_name=component_name) table_info = clients[clients_role].table_info( table_name=table_info['name'], namespace=table_info['namespace']) except Exception as e: raise RuntimeError( f"An exception occurred while getting data {clients_role}<-{component_name}" ) from e echo.echo("query_component_output_data result: {}".format(table_info)) try: header = table_info['data']['schema']['header'] except ValueError as e: raise ValueError(f"Obtain header from table error, error msg: {e}") result = [] for idx, header_name in enumerate(header[1:]): result.append((idx, header_name)) echo.echo("Queried header is {}".format(result)) except Exception: exception_id = uuid.uuid1() echo.echo(f"exception_id={exception_id}") LOGGER.exception(f"exception id: {exception_id}") finally: echo.stdout_newline()
def upload(ctx, include, exclude, glob, suite_type, role, **kwargs): """ upload data defined in suite config files """ ctx.obj.update(**kwargs) ctx.obj.post_process() namespace = ctx.obj["namespace"] config_inst = ctx.obj["config"] yes = ctx.obj["yes"] echo.echo(f"testsuite namespace: {namespace}", fg='red') echo.echo("loading testsuites:") suffix = "benchmark.json" if suite_type == "benchmark" else "testsuite.json" suites = _load_testsuites(includes=include, excludes=exclude, glob=glob, suffix=suffix, suite_type=suite_type) for suite in suites: if role != "all": suite.dataset = [d for d in suite.dataset if re.match(d.role_str, role)] echo.echo(f"\tdataset({len(suite.dataset)}) {suite.path}") if not yes and not click.confirm("running?"): return with Clients(config_inst) as client: for i, suite in enumerate(suites): # noinspection PyBroadException try: echo.echo(f"[{i + 1}/{len(suites)}]start at {time.strftime('%Y-%m-%d %X')} {suite.path}", fg='red') try: _upload_data(client, suite, config_inst) except Exception as e: raise RuntimeError(f"exception occur while uploading data for {suite.path}") from e except Exception: exception_id = uuid.uuid1() echo.echo(f"exception in {suite.path}, exception_id={exception_id}") LOGGER.exception(f"exception id: {exception_id}") finally: echo.stdout_newline() echo.farewell() echo.echo(f"testsuite namespace: {namespace}", fg='red')
def to_testsuite(ctx, include, template_path, **kwargs): """ convert pipeline testsuite to dsl testsuite """ ctx.obj.update(**kwargs) ctx.obj.post_process() namespace = ctx.obj["namespace"] config_inst = ctx.obj["config"] yes = ctx.obj["yes"] echo.welcome() if not os.path.isdir(include): raise Exception("Please fill in a folder.") echo.echo(f"testsuite namespace: {namespace}", fg='red') echo.echo(f"pipeline path: {os.path.abspath(include)}") if not yes and not click.confirm("running?"): return input_path = os.path.abspath(include) input_list = [input_path] i = 0 while i < len(input_list): dirs = os.listdir(input_list[i]) for d in dirs: if os.path.isdir(d): input_list.append(d) i += 1 for file_path in input_list: try: module_name = os.path.basename(file_path) do_generated(file_path, module_name, template_path, config_inst) except Exception: exception_id = uuid.uuid1() echo.echo(f"exception_id={exception_id}") LOGGER.exception(f"exception id: {exception_id}") finally: echo.stdout_newline() echo.farewell() echo.echo(f"converting namespace: {namespace}", fg='red')
def client_upload(suites, config_inst, namespace, output_path=None): with Clients(config_inst) as client: for i, suite in enumerate(suites): # noinspection PyBroadException try: echo.echo( f"[{i + 1}/{len(suites)}]start at {time.strftime('%Y-%m-%d %X')} {suite.path}", fg='red') try: _upload_data(client, suite, config_inst, output_path) except Exception as e: raise RuntimeError( f"exception occur while uploading data for {suite.path}" ) from e except Exception: exception_id = uuid.uuid1() echo.echo( f"exception in {suite.path}, exception_id={exception_id}") LOGGER.exception(f"exception id: {exception_id}") finally: echo.stdout_newline() echo.farewell() echo.echo(f"testsuite namespace: {namespace}", fg='red')
def run_task(ctx, job_type, include, replace, timeout, update_job_parameters, update_component_parameters, max_iter, max_depth, num_trees, task_cores, storage_tag, history_tag, skip_data, clean_data, provider, **kwargs): """ Test the performance of big data tasks, alias: bp """ ctx.obj.update(**kwargs) ctx.obj.post_process() config_inst = ctx.obj["config"] config_inst.extend_sid = ctx.obj["extend_sid"] config_inst.auto_increasing_sid = ctx.obj["auto_increasing_sid"] namespace = ctx.obj["namespace"] yes = ctx.obj["yes"] data_namespace_mangling = ctx.obj["namespace_mangling"] if clean_data is None: clean_data = config_inst.clean_data def get_perf_template(conf: Config, job_type): perf_dir = os.path.join( os.path.abspath(conf.perf_template_dir) + '/' + job_type + '/' + "*testsuite.json") return glob.glob(perf_dir) if not include: include = get_perf_template(config_inst, job_type) # prepare output dir and json hooks _add_replace_hook(replace) echo.welcome() echo.echo(f"testsuite namespace: {namespace}", fg='red') echo.echo("loading testsuites:") suites = _load_testsuites(includes=include, excludes=tuple(), glob=None, provider=provider) for i, suite in enumerate(suites): echo.echo( f"\tdataset({len(suite.dataset)}) dsl jobs({len(suite.jobs)}) {suite.path}" ) if not yes and not click.confirm("running?"): return echo.stdout_newline() with Clients(config_inst) as client: for i, suite in enumerate(suites): # noinspection PyBroadException try: start = time.time() echo.echo( f"[{i + 1}/{len(suites)}]start at {time.strftime('%Y-%m-%d %X')} {suite.path}", fg='red') if not skip_data: try: _upload_data(client, suite, config_inst) except Exception as e: raise RuntimeError( f"exception occur while uploading data for {suite.path}" ) from e echo.stdout_newline() try: time_consuming = _submit_job( client, suite, namespace, config_inst, timeout, update_job_parameters, storage_tag, history_tag, update_component_parameters, max_iter, max_depth, num_trees, task_cores) except Exception as e: raise RuntimeError( f"exception occur while submit job for {suite.path}" ) from e try: _run_pipeline_jobs(config_inst, suite, namespace, data_namespace_mangling) except Exception as e: raise RuntimeError( f"exception occur while running pipeline jobs for {suite.path}" ) from e echo.echo( f"[{i + 1}/{len(suites)}]elapse {timedelta(seconds=int(time.time() - start))}", fg='red') if not skip_data and clean_data: _delete_data(client, suite) echo.echo(suite.pretty_final_summary(time_consuming), fg='red') except Exception: exception_id = uuid.uuid1() echo.echo( f"exception in {suite.path}, exception_id={exception_id}") LOGGER.exception(f"exception id: {exception_id}") finally: echo.stdout_newline() echo.farewell() echo.echo(f"testsuite namespace: {namespace}", fg='red')
def run_suite(ctx, replace, include, exclude, glob, skip_dsl_jobs, skip_pipeline_jobs, skip_data, data_only, clean_data, **kwargs): """ process testsuite """ ctx.obj.update(**kwargs) ctx.obj.post_process() config_inst = ctx.obj["config"] if clean_data is None: clean_data = config_inst.clean_data namespace = ctx.obj["namespace"] yes = ctx.obj["yes"] data_namespace_mangling = ctx.obj["namespace_mangling"] # prepare output dir and json hooks _add_replace_hook(replace) echo.welcome() echo.echo(f"testsuite namespace: {namespace}", fg='red') echo.echo("loading testsuites:") suites = _load_testsuites(includes=include, excludes=exclude, glob=glob) for suite in suites: echo.echo(f"\tdataset({len(suite.dataset)}) dsl jobs({len(suite.jobs)}) " f"pipeline jobs ({len(suite.pipeline_jobs)}) {suite.path}") if not yes and not click.confirm("running?"): return echo.stdout_newline() with Clients(config_inst) as client: for i, suite in enumerate(suites): # noinspection PyBroadException try: start = time.time() echo.echo(f"[{i + 1}/{len(suites)}]start at {time.strftime('%Y-%m-%d %X')} {suite.path}", fg='red') if not skip_data: try: _upload_data(client, suite, config_inst) except Exception as e: raise RuntimeError(f"exception occur while uploading data for {suite.path}") from e if data_only: continue if not skip_dsl_jobs: echo.stdout_newline() try: _submit_job(client, suite, namespace, config_inst) except Exception as e: raise RuntimeError(f"exception occur while submit job for {suite.path}") from e if not skip_pipeline_jobs: try: _run_pipeline_jobs(config_inst, suite, namespace, data_namespace_mangling) except Exception as e: raise RuntimeError(f"exception occur while running pipeline jobs for {suite.path}") from e if not skip_data and clean_data: _delete_data(client, suite) echo.echo(f"[{i + 1}/{len(suites)}]elapse {timedelta(seconds=int(time.time() - start))}", fg='red') if not skip_dsl_jobs or not skip_pipeline_jobs: echo.echo(suite.pretty_final_summary(), fg='red') except Exception: exception_id = uuid.uuid1() echo.echo(f"exception in {suite.path}, exception_id={exception_id}") LOGGER.exception(f"exception id: {exception_id}") finally: echo.stdout_newline() echo.farewell() echo.echo(f"testsuite namespace: {namespace}", fg='red')
def _submit_job(clients: Clients, suite: Testsuite, namespace: str, config: Config): # submit jobs with click.progressbar(length=len(suite.jobs), label="jobs ", show_eta=False, show_pos=True, width=24) as bar: for job in suite.jobs_iter(): job_progress = JobProgress(job.job_name) def _raise(): exception_id = str(uuid.uuid1()) job_progress.exception(exception_id) suite.update_status(job_name=job.job_name, exception_id=exception_id) echo.file(f"exception({exception_id})") LOGGER.exception(f"exception id: {exception_id}") # noinspection PyBroadException try: job.job_conf.update(config.parties, config.work_mode, config.backend) except Exception: _raise() continue def update_bar(n_step): bar.item_show_func = lambda x: job_progress.show() time.sleep(0.1) bar.update(n_step) update_bar(1) def _call_back(resp: SubmitJobResponse): if isinstance(resp, SubmitJobResponse): job_progress.submitted(resp.job_id) echo.file(f"[jobs] {resp.job_id} ", nl=False) suite.update_status(job_name=job.job_name, job_id=resp.job_id) if isinstance(resp, QueryJobResponse): job_progress.running(resp.status, resp.progress) update_bar(0) # noinspection PyBroadException try: response = clients["guest_0"].submit_job(job=job, callback=_call_back) # noinspection PyBroadException try: # add notes notes = f"{job.job_name}@{suite.path}@{namespace}" for role, party_id_list in job.job_conf.role.items(): for i, party_id in enumerate(party_id_list): clients[f"{role}_{i}"].add_notes(job_id=response.job_id, role=role, party_id=party_id, notes=notes) except Exception: pass except Exception: _raise() else: job_progress.final(response.status) suite.update_status(job_name=job.job_name, status=response.status.status) if response.status.is_success(): if suite.model_in_dep(job.job_name): dependent_jobs = suite.get_dependent_jobs(job.job_name) for predict_job in dependent_jobs: if predict_job.job_conf.dsl_version == 2: # noinspection PyBroadException try: model_info = clients["guest_0"].deploy_model(model_id=response.model_info["model_id"], model_version=response.model_info["model_version"], dsl=predict_job.job_dsl.as_dict()) except Exception: _raise() else: model_info = response.model_info suite.feed_dep_model_info(predict_job, job.job_name, model_info) suite.remove_dependency(job.job_name) update_bar(0) echo.stdout_newline()
def _submit_job(clients: Clients, suite: Testsuite, namespace: str, config: Config, timeout, update_job_parameters, storage_tag, history_tag, update_component_parameters, max_iter, max_depth, num_trees, task_cores): # submit jobs with click.progressbar(length=len(suite.jobs), label="jobs", show_eta=False, show_pos=True, width=24) as bar: time_list = [] for job in suite.jobs_iter(): start = time.time() job_progress = JobProgress(job.job_name) def _raise(): exception_id = str(uuid.uuid1()) job_progress.exception(exception_id) suite.update_status(job_name=job.job_name, exception_id=exception_id) echo.file(f"exception({exception_id})") LOGGER.exception(f"exception id: {exception_id}") # noinspection PyBroadException try: if max_iter is not None: job.job_conf.update_component_parameters( 'max_iter', max_iter) if max_depth is not None: job.job_conf.update_component_parameters( 'max_depth', max_depth) if num_trees is not None: job.job_conf.update_component_parameters( 'num_trees', num_trees) if task_cores is not None: job.job_conf.update_job_common_parameters( task_cores=task_cores) job.job_conf.update(config.parties, timeout, update_job_parameters, update_component_parameters) except Exception: _raise() continue def update_bar(n_step): bar.item_show_func = lambda x: job_progress.show() time.sleep(0.1) bar.update(n_step) update_bar(1) def _call_back(resp: SubmitJobResponse): if isinstance(resp, SubmitJobResponse): job_progress.submitted(resp.job_id) echo.file(f"[jobs] {resp.job_id} ", nl=False) suite.update_status(job_name=job.job_name, job_id=resp.job_id) if isinstance(resp, QueryJobResponse): job_progress.running(resp.status, resp.progress) update_bar(0) # noinspection PyBroadException try: response = clients["guest_0"].submit_job(job=job, callback=_call_back) # noinspection PyBroadException try: # add notes notes = f"{job.job_name}@{suite.path}@{namespace}" for role, party_id_list in job.job_conf.role.items(): for i, party_id in enumerate(party_id_list): clients[f"{role}_{i}"].add_notes( job_id=response.job_id, role=role, party_id=party_id, notes=notes) except Exception: pass except Exception: _raise() else: job_progress.final(response.status) suite.update_status(job_name=job.job_name, status=response.status.status) if response.status.is_success(): if suite.model_in_dep(job.job_name): dependent_jobs = suite.get_dependent_jobs(job.job_name) for predict_job in dependent_jobs: model_info, table_info, cache_info, model_loader_info = None, None, None, None for i in _config.deps_alter[predict_job.job_name]: if isinstance(i, dict): name = i.get('name') data_pre = i.get('data') if 'data_deps' in _config.deps_alter[ predict_job.job_name]: roles = list(data_pre.keys()) table_info, hierarchy = [], [] for role_ in roles: role, index = role_.split("_") input_ = data_pre[role_] for data_input, cpn in input_.items(): try: table_name = clients[ "guest_0"].output_data_table( job_id=response.job_id, role=role, party_id=config.role[role][ int(index)], component_name=cpn) except Exception: _raise() if predict_job.job_conf.dsl_version == 2: hierarchy.append( [role, index, data_input]) table_info.append( {'table': table_name}) else: hierarchy.append( [role, 'args', 'data']) table_info.append( {data_input: [table_name]}) table_info = { 'hierarchy': hierarchy, 'table_info': table_info } if 'model_deps' in _config.deps_alter[ predict_job.job_name]: if predict_job.job_conf.dsl_version == 2: # noinspection PyBroadException try: model_info = clients[ "guest_0"].deploy_model( model_id=response. model_info["model_id"], model_version=response. model_info["model_version"], dsl=predict_job.job_dsl. as_dict()) except Exception: _raise() else: model_info = response.model_info if 'cache_deps' in _config.deps_alter[ predict_job.job_name]: cache_dsl = predict_job.job_dsl.as_dict() cache_info = [] for cpn in cache_dsl.get("components").keys(): if "CacheLoader" in cache_dsl.get( "components").get(cpn).get( "module"): cache_info.append( {cpn: { 'job_id': response.job_id }}) cache_info = { 'hierarchy': [""], 'cache_info': cache_info } if 'model_loader_deps' in _config.deps_alter[ predict_job.job_name]: model_loader_dsl = predict_job.job_dsl.as_dict( ) model_loader_info = [] for cpn in model_loader_dsl.get( "components").keys(): if "ModelLoader" in model_loader_dsl.get( "components").get(cpn).get( "module"): model_loader_info.append( {cpn: response.model_info}) model_loader_info = { 'hierarchy': [""], 'model_loader_info': model_loader_info } suite.feed_dep_info( predict_job, name, model_info=model_info, table_info=table_info, cache_info=cache_info, model_loader_info=model_loader_info) suite.remove_dependency(job.job_name) update_bar(0) time_consuming = time.time() - start performance_dir = "/".join([ os.path.join(os.path.abspath(config.cache_directory), 'benchmark_history', "performance.json") ]) fate_version = clients["guest_0"].get_version() if history_tag: history_tag = [ "_".join([i, job.job_name]) for i in history_tag ] comparison_quality(job.job_name, history_tag, performance_dir, time_consuming) if storage_tag: storage_tag = "_".join( ['FATE', fate_version, storage_tag, job.job_name]) save_quality(storage_tag, performance_dir, time_consuming) echo.stdout_newline() time_list.append(time_consuming) return [str(int(i)) + "s" for i in time_list]
def data_upload(clients: Clients, conf: Config, upload_config): def _await_finish(job_id, task_name=None): deadline = time.time() + sys.maxsize start = time.time() param = dict(job_id=job_id, role=None) while True: stdout = clients["guest_0"].flow_client("job/query", param) status = stdout["data"][0]["f_status"] elapse_seconds = int(time.time() - start) date = time.strftime('%Y-%m-%d %X') if task_name: log_msg = f"[{date}][{task_name}]{status}, elapse: {timedelta(seconds=elapse_seconds)}" else: log_msg = f"[{date}]{job_id} {status}, elapse: {timedelta(seconds=elapse_seconds)}" if (status == "running" or status == "waiting") and time.time() < deadline: print(log_msg, end="\r") time.sleep(1) continue else: print(" " * 60, end="\r") # clean line echo.echo(log_msg) return status task_data = upload_config["data"] for i, data in enumerate(task_data): format_msg = f"@{data['file']} >> {data['namespace']}.{data['table_name']}" echo.echo(f"[{time.strftime('%Y-%m-%d %X')}]uploading {format_msg}") try: data["file"] = str(os.path.join(conf.data_base_dir, data["file"])) param = dict(file=data["file"], head=data["head"], partition=data["partition"], table_name=data["table_name"], namespace=data["namespace"]) stdout = clients["guest_0"].flow_client("data/upload", param, drop=1) job_id = stdout.get('jobId', None) echo.echo( f"[{time.strftime('%Y-%m-%d %X')}]upload done {format_msg}, job_id={job_id}\n" ) if job_id is None: echo.echo( "table already exist. To upload again, Please add '-f 1' in start cmd" ) continue _await_finish(job_id) param = dict(table_name=data["table_name"], namespace=data["namespace"]) stdout = clients["guest_0"].flow_client("table/info", param) count = stdout["data"]["count"] if count != data["count"]: raise AssertionError( "Count of upload file is not as expect, count is: {}," "expect is: {}".format(count, data["count"])) echo.echo( f"[{time.strftime('%Y-%m-%d %X')}] check_data_out {stdout} \n") except Exception as e: exception_id = uuid.uuid1() echo.echo( f"exception in {data['file']}, exception_id={exception_id}") LOGGER.exception(f"exception id: {exception_id}") echo.echo( f"upload {i + 1}th data {data['table_name']} fail, exception_id: {exception_id}" ) # raise RuntimeError(f"exception occur while uploading data for {data['file']}") from e finally: echo.stdout_newline()
def _submit_job(clients: Clients, suite: Testsuite, namespace: str, config: Config, timeout, update_job_parameters, update_component_parameters, max_iter, max_depth, num_trees, task_cores): # submit jobs with click.progressbar(length=len(suite.jobs), label="jobs", show_eta=False, show_pos=True, width=24) as bar: for job in suite.jobs_iter(): job_progress = JobProgress(job.job_name) def _raise(): exception_id = str(uuid.uuid1()) job_progress.exception(exception_id) suite.update_status(job_name=job.job_name, exception_id=exception_id) echo.file(f"exception({exception_id})") LOGGER.exception(f"exception id: {exception_id}") # noinspection PyBroadException try: if max_iter is not None: job.job_conf.update_component_parameters( 'max_iter', max_iter) if max_depth is not None: job.job_conf.update_component_parameters( 'max_depth', max_depth) if num_trees is not None: job.job_conf.update_component_parameters( 'num_trees', num_trees) if task_cores is not None: job.job_conf.update_job_common_parameters( task_cores=task_cores) job.job_conf.update(config.parties, config.work_mode, config.backend, timeout, update_job_parameters, update_component_parameters) except Exception: _raise() continue def update_bar(n_step): bar.item_show_func = lambda x: job_progress.show() time.sleep(0.1) bar.update(n_step) update_bar(1) def _call_back(resp: SubmitJobResponse): if isinstance(resp, SubmitJobResponse): job_progress.submitted(resp.job_id) echo.file(f"[jobs] {resp.job_id} ", nl=False) suite.update_status(job_name=job.job_name, job_id=resp.job_id) if isinstance(resp, QueryJobResponse): job_progress.running(resp.status, resp.progress) update_bar(0) # noinspection PyBroadException try: response = clients["guest_0"].submit_job(job=job, callback=_call_back) # noinspection PyBroadException try: # add notes notes = f"{job.job_name}@{suite.path}@{namespace}" for role, party_id_list in job.job_conf.role.items(): for i, party_id in enumerate(party_id_list): clients[f"{role}_{i}"].add_notes( job_id=response.job_id, role=role, party_id=party_id, notes=notes) except Exception: pass except Exception: _raise() else: job_progress.final(response.status) suite.update_status(job_name=job.job_name, status=response.status.status) if response.status.is_success(): if suite.model_in_dep(job.job_name): dependent_jobs = suite.get_dependent_jobs(job.job_name) for predict_job in dependent_jobs: model_info, table_info = None, None for i in _config.deps_alter[predict_job.job_name]: if isinstance(i, dict): name = i.get('name') data_pre = i.get('data') if 'data_deps' in _config.deps_alter[ predict_job.job_name]: roles = list(data_pre.keys()) table_info, hierarchy = [], [] for role_ in roles: role, index = role_.split("_") input_ = data_pre[role_] for data_input, cpn in input_.items(): try: table_name = clients[ "guest_0"].output_data_table( job_id=response.job_id, role=role, party_id=config.role[role][ int(index)], component_name=cpn) except Exception: _raise() if predict_job.job_conf.dsl_version == 2: hierarchy.append( [role, index, data_input]) table_info.append( {'table': table_name}) else: hierarchy.append( [role, 'args', 'data']) table_info.append( {data_input: [table_name]}) table_info = { 'hierarchy': hierarchy, 'table_info': table_info } if 'model_deps' in _config.deps_alter[ predict_job.job_name]: if predict_job.job_conf.dsl_version == 2: # noinspection PyBroadException try: model_info = clients[ "guest_0"].deploy_model( model_id=response. model_info["model_id"], model_version=response. model_info["model_version"], dsl=predict_job.job_dsl. as_dict()) except Exception: _raise() else: model_info = response.model_info suite.feed_dep_info(predict_job, name, model_info=model_info, table_info=table_info) update_bar(0) echo.stdout_newline()