Example #1
0
def _set_namespace(data_namespace_mangling, namespace):
    Path(f"logs/{namespace}").mkdir(exist_ok=True, parents=True)
    set_logger(f"logs/{namespace}/exception.log")
    echo.set_file(click.open_file(f'logs/{namespace}/stdout', "a"))

    if data_namespace_mangling:
        echo.echo(f"add data_namespace_mangling: _{namespace}")
        DATA_JSON_HOOK.add_extend_namespace_hook(namespace)
        CONF_JSON_HOOK.add_extend_namespace_hook(namespace)
Example #2
0
def run_test(includes, conf: Config, error_log_file):
    def error_log(stdout):
        if stdout is None:
            return os.path.abspath(error_log_file)
        with open(error_log_file, "a") as f:
            f.write(stdout)

    def run_test(file):
        global failed_count
        echo.echo("start to run test {}".format(file))
        try:
            subp = subprocess.Popen(["python", file],
                                    stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
            stdout, stderr = subp.communicate()
            stdout = stdout.decode("utf-8")
            echo.echo(stdout)
            if "FAILED" in stdout:
                failed_count += 1
                error_log(stdout=f"error sequence {failed_count}: {file}")
                error_log(stdout=stdout)
        except Exception:
            return

    def traverse_folder(file_fullname):
        if os.path.isfile(file_fullname):
            if "_test.py" in file_fullname and "ftl" not in file_fullname:
                run_test(file_fullname)
        else:
            for file in os.listdir(file_fullname):
                file_fullname_new = os.path.join(file_fullname, file)
                if os.path.isdir(file_fullname_new):
                    traverse_folder(file_fullname_new)
                if "_test.py" in file and ("/test" in file_fullname or "tests" in file_fullname):
                    if "ftl" in file_fullname_new:
                        continue
                    else:
                        run_test(file_fullname_new)

    global failed_count
    failed_count = 0
    fate_base = conf.fate_base
    ml_dir = os.path.join(fate_base, "python/federatedml")
    PYTHONPATH = os.environ.get('PYTHONPATH') + ":"  + os.path.join(fate_base, "python")
    os.environ['PYTHONPATH'] = PYTHONPATH
    if len(includes) == 0:
        traverse_folder(ml_dir)
    else:
        ml_dir = includes
        for v in ml_dir:
            traverse_folder(os.path.abspath(v))

    echo.echo(f"there are {failed_count} failed test")
    if failed_count > 0:
        print('Please check the error content: {}'.format(error_log(None)))
Example #3
0
def show_data(data):
    data_table = PrettyTable()
    data_table.set_style(ORGMODE)
    data_table.field_names = ["Data", "Information"]
    for name, table_name in data.items():
        row = [name, f"{TxtStyle.DATA_FIELD_VAL}{table_name}{TxtStyle.END}"]
        data_table.add_row(row)
    echo.echo(
        data_table.get_string(
            title=f"{TxtStyle.TITLE}Data Summary{TxtStyle.END}"))
    echo.echo("\n")
Example #4
0
    def data_save(data_info, table_names, namespaces, partition_list):
        data_count = 0
        for idx, data_name in enumerate(data_info.keys()):
            label_flag = True if 'guest' in data_info[data_name] else False
            data_type = 'dense' if 'guest' in data_info[data_name] else host_data_type
            if split_host and ('host' in data_info[data_name]):
                host_end_num = int(np.ceil(host_data_size / len(data_info))) * (data_count + 1) if np.ceil(
                    host_data_size / len(data_info)) * (data_count + 1) <= host_data_size else host_data_size
                host_start_num = int(np.ceil(host_data_size / len(data_info))) * data_count
                data_count += 1
            else:
                host_end_num = host_data_size
                host_start_num = 0
            out_path = os.path.join(str(big_data_dir), data_name)
            if os.path.exists(out_path) and os.path.isfile(out_path) and not parallelize:
                if force:
                    remove_file(out_path)
                else:
                    echo.echo('{} Already exists'.format(out_path))
                    continue
            data_i = (idx + 1) / len(data_info)
            downLoad = f'dataget  [{"#" * int(24 * data_i)}{"-" * (24 - int(24 * data_i))}]  {idx + 1}/{len(data_info)}'
            start = time.time()
            progress = data_progress(downLoad, start)
            thread = threading.Thread(target=run, args=[progress])
            thread.start()

            try:
                if 'guest' in data_info[data_name]:
                    if not parallelize:
                        _generate_dens_data(out_path, guest_start_num, guest_end_num, guest_feature_num, label_flag, progress)
                    else:
                        _generate_parallelize_data(guest_start_num, guest_end_num, guest_feature_num, table_names[idx],
                                                   namespaces[idx], label_flag, data_type, partition_list[idx], progress)
                else:
                    if data_type == 'tag' and not parallelize:
                        _generate_tag_data(out_path, host_start_num, host_end_num, host_feature_num, sparsity, progress)
                    elif data_type == 'tag_value' and not parallelize:
                        _generate_tag_value_data(out_path, host_start_num, host_end_num, host_feature_num, progress)
                    elif data_type == 'dense' and not parallelize:
                        _generate_dens_data(out_path, host_start_num, host_end_num, host_feature_num, label_flag, progress)
                    elif parallelize:
                        _generate_parallelize_data(host_start_num, host_end_num, host_feature_num, table_names[idx],
                                                   namespaces[idx], label_flag, data_type, partition_list[idx], progress)
                progress.set_switch(False)
                time.sleep(1)
            except Exception:
                exception_id = uuid.uuid1()
                echo.echo(f"exception_id={exception_id}")
                LOGGER.exception(f"exception id: {exception_id}")
            finally:
                progress.set_switch(False)
                echo.stdout_newline()
Example #5
0
def upload(ctx, include, exclude, glob, suite_type, role, config_type,
           **kwargs):
    """
    upload data defined in suite config files
    """
    ctx.obj.update(**kwargs)
    ctx.obj.post_process()
    namespace = ctx.obj["namespace"]
    config_inst = ctx.obj["config"]
    config_inst.extend_sid = ctx.obj["extend_sid"]
    config_inst.auto_increasing_sid = ctx.obj["auto_increasing_sid"]
    yes = ctx.obj["yes"]
    echo.welcome()
    echo.echo(f"testsuite namespace: {namespace}", fg='red')
    if len(include) != 0:
        echo.echo("loading testsuites:")
        suffix = "benchmark.json" if suite_type == "benchmark" else "testsuite.json"
        suites = _load_testsuites(includes=include,
                                  excludes=exclude,
                                  glob=glob,
                                  suffix=suffix,
                                  suite_type=suite_type)
        for suite in suites:
            if role != "all":
                suite.dataset = [
                    d for d in suite.dataset if re.match(d.role_str, role)
                ]
            echo.echo(f"\tdataset({len(suite.dataset)}) {suite.path}")
        if not yes and not click.confirm("running?"):
            return
        client_upload(suites=suites,
                      config_inst=config_inst,
                      namespace=namespace)
    else:
        config = get_config(config_inst)
        if config_type == 'min_test':
            config_file = config.min_test_data_config
        else:
            config_file = config.all_examples_data_config

        with open(config_file, 'r', encoding='utf-8') as f:
            upload_data = json.loads(f.read())

        echo.echo(f"\tdataset({len(upload_data['data'])}) {config_file}")
        if not yes and not click.confirm("running?"):
            return
        with Clients(config_inst) as client:
            data_upload(client, config_inst, upload_data)
        echo.farewell()
        echo.echo(f"testsuite namespace: {namespace}", fg='red')
Example #6
0
def api(ctx, **kwargs):
    """
    flow sdk api test
    """
    ctx.obj.update(**kwargs)
    ctx.obj.post_process()
    namespace = ctx.obj["namespace"]
    config_inst = ctx.obj["config"]
    yes = ctx.obj["yes"]

    echo.welcome()
    echo.echo(f"testsuite namespace: {namespace}", fg='red')

    if not yes and not click.confirm("running?"):
        return
    try:
        start = time.time()
        flow_sdk_api.run_test_api(get_role(conf=config_inst), namespace)
        echo.echo(f"elapse {timedelta(seconds=int(time.time() - start))}",
                  fg='red')
    except Exception:
        exception_id = uuid.uuid1()
        echo.echo(f"exception_id={exception_id}")
        LOGGER.exception(f"exception id: {exception_id}")
    echo.farewell()
    echo.echo(f"testsuite namespace: {namespace}", fg='red')
Example #7
0
def delete(ctx, include, exclude, glob, yes, suite_type, **kwargs):
    """
    delete data defined in suite config files
    """
    ctx.obj.update(**kwargs)
    ctx.obj.post_process()
    namespace = ctx.obj["namespace"]
    config_inst = ctx.obj["config"]

    echo.echo(f"testsuite namespace: {namespace}", fg='red')
    echo.echo("loading testsuites:")
    suffix = "benchmark.json" if suite_type == "benchmark" else "testsuite.json"

    suites = _load_testsuites(includes=include, excludes=exclude, glob=glob,
                              suffix=suffix, suite_type=suite_type)
    if not yes and not click.confirm("running?"):
        return

    for suite in suites:
        echo.echo(f"\tdataset({len(suite.dataset)}) {suite.path}")
    if not yes and not click.confirm("running?"):
        return
    with Clients(config_inst) as client:
        for i, suite in enumerate(suites):
            _delete_data(client, suite)
    echo.farewell()
    echo.echo(f"testsuite namespace: {namespace}", fg='red')
Example #8
0
def to_dsl(ctx, include, output_path, **kwargs):
    """
    This command will run pipeline, make sure data is uploaded
    """
    ctx.obj.update(**kwargs)
    ctx.obj.post_process()
    namespace = ctx.obj["namespace"]
    config_inst = ctx.obj["config"]
    yes = ctx.obj["yes"]
    echo.welcome()
    echo.echo(f"converting namespace: {namespace}", fg='red')
    for path in include:
        echo.echo(f"pipeline path: {os.path.abspath(path)}")
    if not yes and not click.confirm("running?"):
        return
    config_yaml_file = './examples/config.yaml'
    temp_file_path = f'./logs/{namespace}/temp_pipeline.py'

    for i in include:
        try:
            convert(i, temp_file_path, config_yaml_file, output_path,
                    config_inst)
        except Exception:
            exception_id = uuid.uuid1()
            echo.echo(f"exception_id={exception_id}")
            LOGGER.exception(f"exception id: {exception_id}")
        finally:
            echo.stdout_newline()
    echo.farewell()
    echo.echo(f"converting namespace: {namespace}", fg='red')
Example #9
0
 def run_test(file):
     global failed_count
     echo.echo("start to run test {}".format(file))
     try:
         subp = subprocess.Popen(["python", file],
                                 stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
         stdout, stderr = subp.communicate()
         stdout = stdout.decode("utf-8")
         echo.echo(stdout)
         if "FAILED" in stdout:
             failed_count += 1
             error_log(stdout=f"error sequence {failed_count}: {file}")
             error_log(stdout=stdout)
     except Exception:
         return
Example #10
0
def _upload_data(clients: Clients, suite, config: Config, output_path=None):
    with click.progressbar(length=len(suite.dataset),
                           label="dataset",
                           show_eta=False,
                           show_pos=True,
                           width=24) as bar:
        for i, data in enumerate(suite.dataset):
            data.update(config)
            table_name = data.config['table_name'] if data.config.get(
                'table_name', None) is not None else data.config.get('name')
            data_progress = DataProgress(
                f"{data.role_str}<-{data.config['namespace']}.{table_name}")

            def update_bar(n_step):
                bar.item_show_func = lambda x: data_progress.show()
                time.sleep(0.1)
                bar.update(n_step)

            def _call_back(resp):
                if isinstance(resp, UploadDataResponse):
                    data_progress.submitted(resp.job_id)
                    echo.file(f"[dataset]{resp.job_id}")
                if isinstance(resp, QueryJobResponse):
                    data_progress.update()
                update_bar(0)

            try:
                echo.stdout_newline()
                status, data_path = clients[data.role_str].upload_data(
                    data, _call_back, output_path)
                time.sleep(1)
                data_progress.update()
                if status != 'success':
                    raise RuntimeError(
                        f"uploading {i + 1}th data for {suite.path} {status}")
                bar.update(1)
                if _config.data_switch:
                    from fate_test.scripts import generate_mock_data

                    generate_mock_data.remove_file(data_path)
            except Exception:
                exception_id = str(uuid.uuid1())
                echo.file(f"exception({exception_id})")
                LOGGER.exception(f"exception id: {exception_id}")
                echo.echo(
                    f"upload {i + 1}th data {data.config} to {data.role_str} fail, exception_id: {exception_id}"
                )
Example #11
0
def unit_test(ctx, include, **kwargs):
    """
    federatedml unit test
    """
    ctx.obj.update(**kwargs)
    ctx.obj.post_process()
    namespace = ctx.obj["namespace"]
    config_inst = ctx.obj["config"]
    yes = ctx.obj["yes"]
    echo.echo(f"testsuite namespace: {namespace}", fg='red')

    if not yes and not click.confirm("running?"):
        return

    error_log_file = f"./logs/{namespace}/error_test.log"
    os.makedirs(os.path.dirname(error_log_file), exist_ok=True)
    run_test(includes=include, conf=config_inst, error_log_file=error_log_file)
Example #12
0
def query_schema(ctx, component_name, job_id, role, party_id, **kwargs):
    """
    query the meta of the output data of a component
    """
    ctx.obj.update(**kwargs)
    ctx.obj.post_process()
    namespace = ctx.obj["namespace"]
    yes = ctx.obj["yes"]
    config_inst = ctx.obj["config"]
    echo.welcome()
    echo.echo(f"testsuite namespace: {namespace}", fg='red')

    if not yes and not click.confirm("running?"):
        return
    with Clients(config_inst) as client:
        query_component_output_data(client, config_inst, component_name,
                                    job_id, role, party_id)
    echo.farewell()
    echo.echo(f"testsuite namespace: {namespace}", fg='red')
Example #13
0
def _run_benchmark_pairs(config: Config, suite: BenchmarkSuite, tol: float,
                         namespace: str, data_namespace_mangling: bool):
    # pipeline demo goes here
    pair_n = len(suite.pairs)
    for i, pair in enumerate(suite.pairs):
        echo.echo(f"Running [{i + 1}/{pair_n}] group: {pair.pair_name}")
        results = {}
        data_summary = None
        job_n = len(pair.jobs)
        for j, job in enumerate(pair.jobs):
            try:
                echo.echo(f"Running [{j + 1}/{job_n}] job: {job.job_name}")
                job_name, script_path, conf_path = job.job_name, job.script_path, job.conf_path
                param = Config.load_from_file(conf_path)
                mod = _load_module_from_script(script_path)
                input_params = signature(mod.main).parameters
                # local script
                if len(input_params) == 1:
                    data, metric = mod.main(param=param)
                elif len(input_params) == 2:
                    data, metric = mod.main(config=config, param=param)
                # pipeline script
                elif len(input_params) == 3:
                    if data_namespace_mangling:
                        data, metric = mod.main(config=config,
                                                param=param,
                                                namespace=f"_{namespace}")
                    else:
                        data, metric = mod.main(config=config, param=param)
                else:
                    data, metric = mod.main()
                results[job_name] = metric
                echo.echo(f"[{j + 1}/{job_n}] job: {job.job_name} Success!")
                if job_name == "FATE":
                    data_summary = data
                if data_summary is None:
                    data_summary = data
            except Exception as e:
                exception_id = uuid.uuid1()
                echo.echo(
                    f"exception while running [{j + 1}/{job_n}] job, exception_id={exception_id}",
                    err=True,
                    fg='red')
                LOGGER.exception(
                    f"exception id: {exception_id}, error message: \n{e}")
                continue
        rel_tol = pair.compare_setting.get("relative_tol")
        show_data(data_summary)
        match_metrics(evaluate=True,
                      group_name=pair.pair_name,
                      abs_tol=tol,
                      rel_tol=rel_tol,
                      **results)
Example #14
0
def non_success_summary():
    status = {}
    for job in _config.non_success_jobs:
        if job.status not in status.keys():
            status[job.status] = prettytable.PrettyTable([
                "testsuite_name", "job_name", "job_id", "status",
                "exception_id", "rest_dependency"
            ])

        status[job.status].add_row([
            job.suite_file,
            job.name,
            job.job_id,
            job.status,
            job.exception_id,
            ",".join(job.rest_dependency),
        ])
    for k, v in status.items():
        echo.echo("\n" + "#" * 60)
        echo.echo(v.get_string(title=f"{k} job record"), fg='red')
Example #15
0
def _upload_data(clients: Clients, suite, config: Config):
    with click.progressbar(length=len(suite.dataset),
                           label="dataset",
                           show_eta=False,
                           show_pos=True,
                           width=24) as bar:
        for i, data in enumerate(suite.dataset):
            data.update(config)
            data_progress = DataProgress(
                f"{data.role_str}<-{data.config['namespace']}.{data.config['table_name']}"
            )

            def update_bar(n_step):
                bar.item_show_func = lambda x: data_progress.show()
                time.sleep(0.1)
                bar.update(n_step)

            def _call_back(resp):
                if isinstance(resp, UploadDataResponse):
                    data_progress.submitted(resp.job_id)
                    echo.file(f"[dataset]{resp.job_id}")
                if isinstance(resp, QueryJobResponse):
                    data_progress.update()
                update_bar(0)

            try:
                echo.stdout_newline()
                response = clients[data.role_str].upload_data(data, _call_back)
                data_progress.update()
                if not response.status.is_success():
                    raise RuntimeError(
                        f"uploading {i + 1}th data for {suite.path} {response.status}"
                    )
                bar.update(1)
            except Exception as e:
                exception_id = str(uuid.uuid1())
                echo.file(f"exception({exception_id})")
                LOGGER.exception(f"exception id: {exception_id}")
                echo.echo(
                    f"upload {i + 1}th data {data.config} to {data.role_str} fail, exception_id: {exception_id}"
                )
Example #16
0
def download_mnists(ctx, output_path, **kwargs):
    """
    download mnist data for flow
    """
    ctx.obj.update(**kwargs)
    ctx.obj.post_process()
    namespace = ctx.obj["namespace"]
    config_inst = ctx.obj["config"]
    yes = ctx.obj["yes"]
    echo.welcome()
    echo.echo(f"testsuite namespace: {namespace}", fg='red')

    if output_path is None:
        config = get_config(config_inst)
        output_path = str(config.data_base_dir) + "/examples/data/"
    if not yes and not click.confirm("running?"):
        return
    try:
        download_mnist(Path(output_path), "mnist_train")
        download_mnist(Path(output_path), "mnist_eval", is_train=False)
    except Exception:
        exception_id = uuid.uuid1()
        echo.echo(f"exception_id={exception_id}")
        LOGGER.exception(f"exception id: {exception_id}")
    finally:
        echo.stdout_newline()
    echo.farewell()
    echo.echo(f"testsuite namespace: {namespace}", fg='red')
Example #17
0
def convert(pipeline_file, temp_file_path, config_yaml_file, output_path,
            config: Config):
    folder_name, file_name = os.path.split(pipeline_file)
    if output_path is not None:
        folder_name = output_path
    echo.echo(
        f"folder_name: {os.path.abspath(folder_name)}, file_name: {file_name}")
    conf_name = file_name.replace('.py', '_conf.json')
    dsl_name = file_name.replace('.py', '_dsl.json')
    conf_name = os.path.join(folder_name, conf_name)
    dsl_name = os.path.join(folder_name, dsl_name)

    make_temp_pipeline(pipeline_file, temp_file_path, folder_name)
    additional_path = os.path.realpath(
        os.path.join(os.path.curdir, pipeline_file, os.pardir, os.pardir))
    if additional_path not in sys.path:
        sys.path.append(additional_path)
    loader = importlib.machinery.SourceFileLoader("main", str(temp_file_path))
    spec = importlib.util.spec_from_loader(loader.name, loader)
    mod = importlib.util.module_from_spec(spec)
    loader.exec_module(mod)
    my_pipeline = mod.main(os.path.join(config.data_base_dir,
                                        config_yaml_file))
    conf = my_pipeline.get_train_conf()
    dsl = my_pipeline.get_train_dsl()
    os.remove(temp_file_path)

    with open(conf_name, 'w') as f:
        json.dump(conf, f, indent=4)
        echo.echo('conf name is {}'.format(os.path.abspath(conf_name)))
    with open(dsl_name, 'w') as f:
        json.dump(dsl, f, indent=4)
        echo.echo('dsl name is {}'.format(os.path.abspath(dsl_name)))
Example #18
0
def _run_benchmark_pairs(config: Config, suite: BenchmarkSuite, tol: float, namespace: str,
                         data_namespace_mangling: bool, storage_tag, history_tag, fate_version, match_details):
    # pipeline demo goes here
    pair_n = len(suite.pairs)
    fate_base = config.fate_base
    PYTHONPATH = os.environ.get('PYTHONPATH') + ":"  + os.path.join(fate_base, "python")
    os.environ['PYTHONPATH'] = PYTHONPATH
    for i, pair in enumerate(suite.pairs):
        echo.echo(f"Running [{i + 1}/{pair_n}] group: {pair.pair_name}")
        results = {}
        # data_summary = None
        job_n = len(pair.jobs)
        for j, job in enumerate(pair.jobs):
            try:
                echo.echo(f"Running [{j + 1}/{job_n}] job: {job.job_name}")
                job_name, script_path, conf_path = job.job_name, job.script_path, job.conf_path
                param = Config.load_from_file(conf_path)
                mod = _load_module_from_script(script_path)
                input_params = signature(mod.main).parameters
                # local script
                if len(input_params) == 1:
                    data, metric = mod.main(param=param)
                elif len(input_params) == 2:
                    data, metric = mod.main(config=config, param=param)
                # pipeline script
                elif len(input_params) == 3:
                    if data_namespace_mangling:
                        data, metric = mod.main(config=config, param=param, namespace=f"_{namespace}")
                    else:
                        data, metric = mod.main(config=config, param=param)
                else:
                    data, metric = mod.main()
                results[job_name] = metric
                echo.echo(f"[{j + 1}/{job_n}] job: {job.job_name} Success!\n")
                if data and DATA_DISPLAY_PATTERN.match(job_name):
                    # data_summary = data
                    show_data(data)
                # if data_summary is None:
                #    data_summary = data
            except Exception as e:
                exception_id = uuid.uuid1()
                echo.echo(f"exception while running [{j + 1}/{job_n}] job, exception_id={exception_id}", err=True,
                          fg='red')
                LOGGER.exception(f"exception id: {exception_id}, error message: \n{e}")
                continue
        rel_tol = pair.compare_setting.get("relative_tol")
        # show_data(data_summary)
        match_metrics(evaluate=True, group_name=pair.pair_name, abs_tol=tol, rel_tol=rel_tol,
                      storage_tag=storage_tag, history_tag=history_tag, fate_version=fate_version,
                      cache_directory=config.cache_directory, match_details=match_details, **results)
Example #19
0
def upload(ctx, include, exclude, glob, suite_type, role, **kwargs):
    """
    upload data defined in suite config files
    """
    ctx.obj.update(**kwargs)
    ctx.obj.post_process()
    namespace = ctx.obj["namespace"]
    config_inst = ctx.obj["config"]
    config_inst.extend_sid = ctx.obj["extend_sid"]
    config_inst.auto_increasing_sid = ctx.obj["auto_increasing_sid"]
    yes = ctx.obj["yes"]
    echo.echo(f"testsuite namespace: {namespace}", fg='red')
    echo.echo("loading testsuites:")
    suffix = "benchmark.json" if suite_type == "benchmark" else "testsuite.json"
    suites = _load_testsuites(includes=include,
                              excludes=exclude,
                              glob=glob,
                              suffix=suffix,
                              suite_type=suite_type)
    for suite in suites:
        if role != "all":
            suite.dataset = [
                d for d in suite.dataset if re.match(d.role_str, role)
            ]
        echo.echo(f"\tdataset({len(suite.dataset)}) {suite.path}")
    if not yes and not click.confirm("running?"):
        return
    client_upload(suites=suites, config_inst=config_inst, namespace=namespace)
Example #20
0
def generate(ctx, include, host_data_type, encryption_type, match_rate,
             sparsity, guest_data_size, host_data_size, guest_feature_num,
             host_feature_num, output_path, force, split_host, upload_data,
             remove_data, use_local_data, parallelize, **kwargs):
    """
    create data defined in suite config files
    """
    ctx.obj.update(**kwargs)
    ctx.obj.post_process()
    namespace = ctx.obj["namespace"]
    config_inst = ctx.obj["config"]
    config_inst.extend_sid = ctx.obj["extend_sid"]
    config_inst.auto_increasing_sid = ctx.obj["auto_increasing_sid"]
    if parallelize and upload_data:
        upload_data = False
    yes = ctx.obj["yes"]
    echo.welcome()
    echo.echo(f"testsuite namespace: {namespace}", fg='red')
    echo.echo("loading testsuites:")
    if host_data_size is None:
        host_data_size = guest_data_size
    suites = _load_testsuites(includes=include, excludes=tuple(), glob=None)
    suites += _load_testsuites(includes=include,
                               excludes=tuple(),
                               glob=None,
                               suffix="benchmark.json",
                               suite_type="benchmark")
    for suite in suites:
        if upload_data:
            echo.echo(
                f"\tdataget({len(suite.dataset)}) dataset({len(suite.dataset)}) {suite.path}"
            )
        else:
            echo.echo(f"\tdataget({len(suite.dataset)}) {suite.path}")
    if not yes and not click.confirm("running?"):
        return

    _big_data_task(include, guest_data_size, host_data_size, guest_feature_num,
                   host_feature_num, host_data_type, config_inst,
                   encryption_type, match_rate, sparsity, force, split_host,
                   output_path, parallelize)
    if upload_data:
        if use_local_data:
            _config.use_local_data = 0
        _config.data_switch = remove_data
        client_upload(suites=suites,
                      config_inst=config_inst,
                      namespace=namespace,
                      output_path=output_path)
Example #21
0
 def _await_finish(job_id, task_name=None):
     deadline = time.time() + sys.maxsize
     start = time.time()
     param = dict(job_id=job_id, role=None)
     while True:
         stdout = clients["guest_0"].flow_client("job/query", param)
         status = stdout["data"][0]["f_status"]
         elapse_seconds = int(time.time() - start)
         date = time.strftime('%Y-%m-%d %X')
         if task_name:
             log_msg = f"[{date}][{task_name}]{status}, elapse: {timedelta(seconds=elapse_seconds)}"
         else:
             log_msg = f"[{date}]{job_id} {status}, elapse: {timedelta(seconds=elapse_seconds)}"
         if (status == "running"
                 or status == "waiting") and time.time() < deadline:
             print(log_msg, end="\r")
             time.sleep(1)
             continue
         else:
             print(" " * 60, end="\r")  # clean line
             echo.echo(log_msg)
             return status
Example #22
0
def _distribution_metrics(**results):
    filtered_metric_group = _filter_results([DISTRIBUTION_METRICS], **results)
    for script, model_results_pair in filtered_metric_group.items():
        metric_results = model_results_pair[0]
        common_metrics = _get_common_metrics(**metric_results)
        filtered_results = _filter_results(common_metrics, **metric_results)
        table = PrettyTable()
        table.set_style(ORGMODE)
        script_model_names = list(filtered_results.keys())
        table.field_names = ["Script Model Name"] + common_metrics
        for script_model_name in script_model_names:
            row = [f"{script}-{script_model_name}"] + [
                f"{TxtStyle.FIELD_VAL}{v}{TxtStyle.END}"
                for v in filtered_results[script_model_name]
            ]
            table.add_row(row)
        echo.echo(
            table.get_string(
                title=
                f"{TxtStyle.TITLE}{script} distribution metrics{TxtStyle.END}")
        )
        echo.echo("\n" + "#" * 60)
Example #23
0
def to_testsuite(ctx, include, template_path, **kwargs):
    """
    convert pipeline testsuite to dsl testsuite
    """
    ctx.obj.update(**kwargs)
    ctx.obj.post_process()
    namespace = ctx.obj["namespace"]
    config_inst = ctx.obj["config"]
    yes = ctx.obj["yes"]
    echo.welcome()
    if not os.path.isdir(include):
        raise Exception("Please fill in a folder.")
    echo.echo(f"testsuite namespace: {namespace}", fg='red')
    echo.echo(f"pipeline path: {os.path.abspath(include)}")
    if not yes and not click.confirm("running?"):
        return
    input_path = os.path.abspath(include)
    input_list = [input_path]
    i = 0
    while i < len(input_list):
        dirs = os.listdir(input_list[i])
        for d in dirs:
            if os.path.isdir(d):
                input_list.append(d)
        i += 1

    for file_path in input_list:
        try:
            module_name = os.path.basename(file_path)
            do_generated(file_path, module_name, template_path, config_inst)
        except Exception:
            exception_id = uuid.uuid1()
            echo.echo(f"exception_id={exception_id}")
            LOGGER.exception(f"exception id: {exception_id}")
        finally:
            echo.stdout_newline()
    echo.farewell()
    echo.echo(f"converting namespace: {namespace}", fg='red')
Example #24
0
def process(ctx, **kwargs):
    """
    flow process test
    """
    ctx.obj.update(**kwargs)
    ctx.obj.post_process()
    namespace = ctx.obj["namespace"]
    config_inst = ctx.obj["config"]
    yes = ctx.obj["yes"]

    echo.welcome("benchmark")
    echo.echo(f"testsuite namespace: {namespace}", fg='red')
    echo.echo("loading testsuites:")
    if not yes and not click.confirm("running?"):
        return
    try:
        start = time.time()
        flow_process.run_fate_flow_test(get_role(conf=config_inst))
        echo.echo(f"elapse {timedelta(seconds=int(time.time() - start))}",
                  fg='red')
    except Exception:
        exception_id = uuid.uuid1()
        echo.echo(f"exception_id={exception_id}")
        LOGGER.exception(f"exception id: {exception_id}")
Example #25
0
def _all_match(common_metrics,
               filtered_results,
               abs_tol,
               rel_tol,
               script=None,
               match_details=None):
    eval_summary, all_match = evaluate_almost_equal(common_metrics,
                                                    filtered_results, abs_tol,
                                                    rel_tol)
    eval_table = PrettyTable()
    eval_table.set_style(ORGMODE)
    field_names = ["Metric", "All Match"]
    relative_error_list, absolute_error_list = _match_error(
        common_metrics, filtered_results)
    for i, metric in enumerate(eval_summary.keys()):
        row = [metric, eval_summary.get(metric)]
        if match_details == ALL:
            field_names = [
                "Metric", "All Match", "max_relative_error",
                "max_absolute_error"
            ]
            row += [relative_error_list[i], absolute_error_list[i]]
        elif match_details == RELATIVE:
            field_names = ["Metric", "All Match", "max_relative_error"]
            row += [relative_error_list[i]]
        elif match_details == ABSOLUTE:
            field_names = ["Metric", "All Match", "max_absolute_error"]
            row += [absolute_error_list[i]]
        eval_table.add_row(row)
    eval_table.field_names = field_names

    echo.echo(
        style_table(
            eval_table.get_string(
                title=f"{TxtStyle.TITLE}Match Results{TxtStyle.END}")))
    script = "" if script is None else f"{script} "
    if all_match:
        echo.echo(
            f"All {script}Metrics Match: {TxtStyle.TRUE_VAL}{all_match}{TxtStyle.END}"
        )
    else:
        echo.echo(
            f"All {script}Metrics Match: {TxtStyle.FALSE_VAL}{all_match}{TxtStyle.END}"
        )
Example #26
0
def paillier_test(ctx, data_num, test_round, **kwargs):
    """
    paillier
    """
    ctx.obj.update(**kwargs)
    ctx.obj.post_process()
    namespace = ctx.obj["namespace"]
    yes = ctx.obj["yes"]
    echo.welcome()
    echo.echo(f"testsuite namespace: {namespace}", fg='red')

    if not yes and not click.confirm("running?"):
        return

    for method in ["Paillier"]:
        assess_table = PaillierAssess(method=method, data_num=data_num, test_round=test_round)
        table = assess_table.output_table()
        echo.echo(table)
    echo.farewell()
    echo.echo(f"testsuite namespace: {namespace}", fg='red')
Example #27
0
def query_component_output_data(clients: Clients, config: Config,
                                component_name, job_id, role, party_id):
    roles = config.role
    clients_role = None
    for k, v in roles.items():
        if int(party_id) in v and k == role:
            clients_role = role + "_" + str(v.index(int(party_id)))
    try:
        if clients_role is None:
            raise ValueError(f"party id {party_id} does not exist")

        try:
            table_info = clients[clients_role].output_data_table(
                job_id=job_id,
                role=role,
                party_id=party_id,
                component_name=component_name)
            table_info = clients[clients_role].table_info(
                table_name=table_info['name'],
                namespace=table_info['namespace'])
        except Exception as e:
            raise RuntimeError(
                f"An exception occurred while getting data {clients_role}<-{component_name}"
            ) from e

        echo.echo("query_component_output_data result: {}".format(table_info))
        try:
            header = table_info['data']['schema']['header']
        except ValueError as e:
            raise ValueError(f"Obtain header from table error, error msg: {e}")

        result = []
        for idx, header_name in enumerate(header[1:]):
            result.append((idx, header_name))
        echo.echo("Queried header is {}".format(result))
    except Exception:
        exception_id = uuid.uuid1()
        echo.echo(f"exception_id={exception_id}")
        LOGGER.exception(f"exception id: {exception_id}")
    finally:
        echo.stdout_newline()
Example #28
0
def spdz_test(ctx, data_num, seed, data_partition, test_round,
              data_lower_bound, data_upper_bound, **kwargs):
    """
    spdz_test
    """
    ctx.obj.update(**kwargs)
    ctx.obj.post_process()
    namespace = ctx.obj["namespace"]
    yes = ctx.obj["yes"]
    echo.welcome()
    echo.echo(f"testsuite namespace: {namespace}", fg='red')

    if not yes and not click.confirm("running?"):
        return

    conf = ctx.obj["config"]
    runtime_config_path_prefix = \
        os.path.abspath(conf.fate_base) + "/python/fate_test/fate_test/scripts/op_test/spdz_conf/"

    params = dict(data_num=data_num, seed=seed, data_partition=data_partition,
                  test_round=test_round, data_lower_bound=data_lower_bound,
                  data_upper_bound=data_upper_bound)

    flow_address = None
    for idx, address in enumerate(conf.serving_setting["flow_services"]):
        if conf.role["guest"][0] in address["parties"]:
            flow_address = address["address"]

    spdz_test = SPDZTest(params=params,
                         conf_path=runtime_config_path_prefix + "job_conf.json",
                         dsl_path=runtime_config_path_prefix + "job_dsl.json",
                         flow_address=flow_address,
                         guest_party_id=[conf.role["guest"][0]],
                         host_party_id=[conf.role["host"][0]])

    tables = spdz_test.run()
    for table in tables:
        echo.echo(table)
    echo.farewell()
    echo.echo(f"testsuite namespace: {namespace}", fg='red')
Example #29
0
def client_upload(suites, config_inst, namespace, output_path=None):
    with Clients(config_inst) as client:
        for i, suite in enumerate(suites):
            # noinspection PyBroadException
            try:
                echo.echo(
                    f"[{i + 1}/{len(suites)}]start at {time.strftime('%Y-%m-%d %X')} {suite.path}",
                    fg='red')
                try:
                    _upload_data(client, suite, config_inst, output_path)
                except Exception as e:
                    raise RuntimeError(
                        f"exception occur while uploading data for {suite.path}"
                    ) from e
            except Exception:
                exception_id = uuid.uuid1()
                echo.echo(
                    f"exception in {suite.path}, exception_id={exception_id}")
                LOGGER.exception(f"exception id: {exception_id}")
            finally:
                echo.stdout_newline()
    echo.farewell()
    echo.echo(f"testsuite namespace: {namespace}", fg='red')
Example #30
0
def do_generated(file_path, fold_name, template_path, config: Config):
    yaml_file = os.path.join(config.data_base_dir, "./examples/config.yaml")
    PYTHONPATH = os.environ.get('PYTHONPATH') + ":" + str(config.data_base_dir)
    os.environ['PYTHONPATH'] = PYTHONPATH
    if not os.path.isdir(file_path):
        return
    files = os.listdir(file_path)
    if template_path is None:
        for f in files:
            if "testsuite" in f and "generated_testsuite" not in f:
                template_path = os.path.join(file_path, f)
                break
    if template_path is None:
        return

    suite_json = get_testsuite_file(template_path)
    pipeline_suite = copy.deepcopy(suite_json)
    suite_json["tasks"] = {}
    pipeline_suite["pipeline_tasks"] = {}
    replaced_path = os.path.join(file_path, 'replaced_code')
    generated_path = os.path.join(file_path, 'dsl_testsuite')

    if not os.path.exists(replaced_path):
        os.system('mkdir {}'.format(replaced_path))

    if not os.path.exists(generated_path):
        os.system('mkdir {}'.format(generated_path))

    for f in files:
        if not f.startswith("pipeline"):
            continue
        echo.echo(f)
        task_name = f.replace(".py", "")
        task_name = "-".join(task_name.split('-')[1:])
        pipeline_suite["pipeline_tasks"][task_name] = {"script": f}
        f_path = os.path.join(file_path, f)
        code_str = insert_extract_code(f_path)
        pipeline_file_path = os.path.join(replaced_path, f)
        open(pipeline_file_path, 'w').writelines(code_str)

    exe_files = os.listdir(replaced_path)
    fail_job_count = 0
    task_type_list = []
    exe_conf_file = None
    exe_dsl_file = None
    for i, f in enumerate(exe_files):
        abs_file = os.path.join(replaced_path, f)
        echo.echo(
            '\n' +
            '[{}/{}]  executing {}'.format(i + 1, len(exe_files), abs_file),
            fg='red')
        result = os.system(f"python {abs_file} -config {yaml_file}")
        if not result:
            time.sleep(3)
            conf_files = os.listdir(generated_path)
            f_dsl = {
                "_".join(f.split('_')[:-1]): f
                for f in conf_files if 'dsl.json' in f
            }
            f_conf = {
                "_".join(f.split('_')[:-1]): f
                for f in conf_files if 'conf.json' in f
            }
            for task_type, dsl_file in f_dsl.items():
                if task_type not in task_type_list:
                    exe_dsl_file = dsl_file
                    task_type_list.append(task_type)
                    exe_conf_file = f_conf[task_type]
                    suite_json['tasks'][task_type] = {
                        "conf": exe_conf_file,
                        "dsl": exe_dsl_file
                    }
            echo.echo('conf name is {}'.format(
                os.path.join(file_path, "dsl_testsuite", exe_conf_file)))
            echo.echo('dsl name is {}'.format(
                os.path.join(file_path, "dsl_testsuite", exe_dsl_file)))
        else:
            echo.echo('profile generation failed')
            fail_job_count += 1

    suite_path = os.path.join(generated_path, f"{fold_name}_testsuite.json")
    with open(suite_path, 'w', encoding='utf-8') as json_file:
        json.dump(suite_json, json_file, ensure_ascii=False, indent=4)

    suite_path = os.path.join(file_path,
                              f"{fold_name}_pipeline_testsuite.json")
    with open(suite_path, 'w', encoding='utf-8') as json_file:
        json.dump(pipeline_suite, json_file, ensure_ascii=False, indent=4)

    shutil.rmtree(replaced_path)
    if not fail_job_count:
        echo.echo("Generate testsuite and dsl&conf finished!")
    else:
        echo.echo(
            "Generate testsuite and dsl&conf finished! {} failures".format(
                fail_job_count))