Esempio n. 1
0
    def test_sum(self):
        dataset = Dataset('adult')
        model = self.fit('Logloss', dataset)
        model1 = self.normalize_model(model, dataset, 'test_file')
        model2 = self.normalize_model(model, dataset, 'train_file')
        s1, b1 = self.get_scale_bias(model1)
        s2, b2 = self.get_scale_bias(model2)

        # Pick weights w1, w2 so that model_sum = w1 * model1 + w2 * model2
        det = (s1 * b2 - s2 * b1)
        w1 = b2 / det if det else 0.5
        w2 = -b1 / det if det else 0.5

        model_sum = get_test_output_path('model_sum{}.bin')
        yc.execute([
            CATBOOST_PATH,
            'model-sum',
            '--model-with-weight',
            '{}={}'.format(model1, w1),
            '--model-with-weight',
            '{}={}'.format(model2, w2),
            '--output-path',
            model_sum,
        ])
        eval_orig = self.eval_model(model, dataset, 'test_file')
        eval_sum = self.eval_model(model_sum, dataset, 'test_file')
        yc.execute(
            get_limited_precision_dsv_diff_tool(1e-8) + [eval_orig, eval_sum])
Esempio n. 2
0
 def test_deduplicate(self):
     assert 'archiver' == os.path.basename(self.archiver_path)
     assert os.path.exists(self.archiver_path)
     contents = ytc.source_path("tools/archiver/tests/directory")
     ytc.execute(command=[
         self.archiver_path,
         "--output",
         "result_dedup",
         "--recursive",
         "--deduplicate",
         "--plain",
         contents,
     ])
     ytc.execute(command=[
         self.archiver_path,
         "--output",
         "result_no_dedup",
         "--recursive",
         "--plain",
         contents,
     ])
     with open('result_dedup',
               'rb') as f_dedup, open('result_no_dedup',
                                      'rb') as f_no_dedup:
         archive_dedup = f_dedup.read()
         archive_no_dedup = f_no_dedup.read()
     assert len(archive_dedup) == 58
     assert len(archive_no_dedup) == 75
Esempio n. 3
0
 def test_recursive(self):
     assert 'archiver' == os.path.basename(self.archiver_path)
     assert os.path.exists(self.archiver_path)
     contents = ytc.source_path("tools/archiver/tests/directory")
     ytc.execute(command=[
         self.archiver_path,
         "--output",
         "archive",
         "--recursive",
         contents,
     ])
     with open('result', 'w') as archive_list:
         ytc.execute(
             command=[
                 self.archiver_path,
                 "--list",
                 "archive",
             ],
             stdout=archive_list,
             stderr=None,
         )
     archive_list = sorted(open('result').read().strip().split('\n'))
     assert len(archive_list) == 3
     assert archive_list[0] == 'file1'
     assert archive_list[1] == 'file2'
     assert archive_list[2] == 'file3'
Esempio n. 4
0
 def test_normalize_idempotent(self):
     dataset = Dataset('adult')
     model = self.fit('Logloss', dataset)
     model_normalized_once = self.normalize_model(model, dataset,
                                                  'test_file', 'train_file')
     model_normalized_twice = self.normalize_model(model_normalized_once,
                                                   dataset, 'test_file',
                                                   'train_file')
     eval1 = self.eval_model(model_normalized_once, dataset, 'test_file')
     eval2 = self.eval_model(model_normalized_twice, dataset, 'test_file')
     yc.execute(get_limited_precision_dsv_diff_tool(0) + [eval1, eval2])
Esempio n. 5
0
 def normalize_model(self, model, dataset, *pools):
     normalized_model = get_test_output_path('normalized_model{}.bin')
     yc.execute([
         CATBOOST_PATH,
         'normalize-model',
         '-m',
         model,
         '--output-model',
         normalized_model,
         '--cd',
         dataset.cd_file,
         '-T',
         '4',
     ] + sum([['-i', getattr(dataset, pool)] for pool in pools], []))
     return normalized_model
Esempio n. 6
0
 def get_scale_bias(self, model):
     scale_bias_txt = get_test_output_path('scale_bias{}.txt')
     with open(scale_bias_txt, 'wt') as to_scale_bias_txt:
         yc.execute([
             CATBOOST_PATH,
             'normalize-model',
             '-m',
             model,
             '--print-scale-and-bias',
         ],
                    stdout=to_scale_bias_txt)
     for line in open(scale_bias_txt).readlines():
         m = re.match(r'Input model scale (\S+) bias (\S+)', line)
         if m:
             return float(m.group(1)), float(m.group(2))
     raise ValueError('No scale/bias in model {}'.format(model))
Esempio n. 7
0
def test_traceback(mode, entry_point):
    tb_tool = yc.build_path('library/python/runtime/test/traceback/traceback')
    stdout_path = yc.test_output_path('stdout_raw.txt')
    stderr_path = yc.test_output_path('stderr_raw.txt')
    filtered_stdout_path = yc.test_output_path('stdout.txt')
    filtered_stderr_path = yc.test_output_path('stderr.txt')

    env = os.environ.copy()
    if entry_point == 'custom':
        env['Y_PYTHON_ENTRY_POINT'] = 'library.python.runtime.test.traceback.crash:main'

    proc = yc.execute(
        command=[tb_tool, mode],
        env=env,
        stdout=stdout_path,
        stderr=stderr_path,
        check_exit_code=False,
    )

    with open(filtered_stdout_path, 'w') as f:
        f.write(clean_traceback(proc.std_out))

    with open(filtered_stderr_path, 'w') as f:
        f.write(clean_traceback(proc.std_err))

    return {
        'stdout': yc.canonical_file(
            filtered_stdout_path,
            local=True,
        ),
        'stderr': yc.canonical_file(
            filtered_stderr_path,
            local=True,
        ),
    }
Esempio n. 8
0
 def set_scale_bias(self, model, scale, bias, output_model_format):
     model_with_set_scale_bias = get_test_output_path(
         'model_with_set_scale_bias{}.' + output_model_format)
     yc.execute([
         CATBOOST_PATH,
         'normalize-model',
         '-m',
         model,
         '--output-model',
         model_with_set_scale_bias,
         '--output-model-format',
         output_model_format,
         '--set-scale',
         scale,
         '--set-bias',
         bias,
     ])
     return model_with_set_scale_bias
Esempio n. 9
0
 def eval_model(self, model, dataset, pool):
     eval_result = get_test_output_path('eval_result{}.txt')
     yc.execute([
         CATBOOST_PATH,
         'calc',
         '-m',
         model,
         '--input-path',
         getattr(dataset, pool),
         '--cd',
         dataset.cd_file,
         '--output-path',
         eval_result,
         '-T',
         '4',
         '--output-columns',
         'RawFormulaVal',
     ])
     return eval_result
Esempio n. 10
0
 def fit(self, loss_function, dataset):
     model_file = get_test_output_path('model{}.bin')
     yc.execute([
         CATBOOST_PATH,
         'fit',
         '--loss-function',
         loss_function,
         '-f',
         dataset.train_file,
         '--cd',
         dataset.cd_file,
         '-i',
         '10',
         '-T',
         '4',
         '-m',
         model_file,
     ])
     return model_file
Esempio n. 11
0
def execute(
    command, check_exit_code=True,
    shell=False, timeout=None,
    cwd=None, env=None,
    stdin=None, stdout=None, stderr=None,
    creationflags=0, wait=True,
    process_progress_listener=None, close_fds=False,
    collect_cores=True, check_sanitizer=True, preexec_fn=None, on_timeout=None,
    # YT specific
    input_data=None, output_data=None,
    data_mine_strategy=None,
    env_mine_strategy=None,
    operation_spec=None, task_spec=None,
    yt_proxy=None, output_result_path=None,
    init_func=None, fini_func=None,
    # Service args
    spec_filename=None,
    test_tool_bin=None,
    executor=_YtExecutor,
    runner_log_path=None,
    runner_log2stderr=False,
    runner_meta_path=None,
    target_stdout_path=None,
    target_stderr_path=None,
    operation_log_path=None,
    operation_description=None,
):
    """
    Executes a command on the YT. Listed below are options whose behavior is different from yatest.common.execute
    :param command: can be a list of arguments or a string (all paths matched prefixes yatest.common.*_path will be fixed)
    :param timeout: timeout for command executed on the YT (doesn't take into account the time spent for execution preparation - uploading/downloading data, etc)
    :param cwd: ignored
    :param env: all paths matched prefixes yatest.common.*_path will be fixed
    :param stdin: stdin will be fully read before execution and uploaded to the YT
    :param stdout: stdout will be available after the execution of the command on the YT. Set to False to skip downloading
    :param stderr: same as stdout
    :param process_progress_listener: ignored
    :param preexec_fn: ignored
    :param on_timeout: ignored
    :param input_data: map of input files/dirs required for command run which will be uploaded to YT (local path -> YT sandbox path)
    :param output_data: map of output files/dirs which will be downloaded from YT after command execution (YT sandbox path -> local path)
                        Take into account that runner will call os.path.dirname(YT sandbox path) to create intermediate directories for every entry
    :param data_mine_strategy: allows to provide own function to mine input data and fix cmd. For more info take a look at *_mine_strategy()
    :param env_mine_strategy: allows to provide own function to mine input data and fix env. For more info take a look at *_mine_strategy()
    :param operation_spec: YT operation spec
    :param task_spec: YT task spec
    :param output_result_path: specify path to output archive. Used for test purposes
    :param init_func: Function which will be executed before target program. See note below
    :param fini_func: Function which will be executed after target program. See note below
    :return: Execution object
    .. note::
        init_func and fini_func have some limitations:
        * every used module should be imported inside this functions, because functions will be called in a different environment and required modules may be not imported or available
        * you can only use built-in python modules (because test_tool uploads itself and runs init/fini func inside itself)
    """
    test_tool_bin = test_tool_bin or _get_test_tool_bin()
    data_mine_strategy = data_mine_strategy or default_mine_strategy
    env_mine_strategy = env_mine_strategy or default_env_mine_strategy

    if not wait:
        raise NotImplementedError()

    orig_command = command
    command, env, to_upload, to_download = _fix_user_data(command, shell, env, input_data, output_data, data_mine_strategy, env_mine_strategy)
    command_name = ytc.process.get_command_name(command)

    exec_spec = {
        'env': env,
        'command': command,
        'timeout': timeout,
        'input_data': to_upload,
        'output_data': to_download,
        'description': operation_description,
    }

    if stdin:
        if isinstance(stdin, basestring):
            stdin_path = stdin
        else:
            logger.deubg('Reading stdin')
            with tempfile.NamedTemporaryFile(delete=False) as afile:
                afile.write(stdin.read())
                stdin_path = afile.name
        to_upload[stdin_path] = get_yt_sandbox_path('env/stdin')
        exec_spec['stdin'] = get_yt_sandbox_path('env/stdin')

    for stream, name, filename in [
        (True, 'meta', runner_meta_path),
        (stdout, 'stdout', target_stdout_path),
        (stderr, 'stderr', target_stderr_path),
    ]:
        if stream is not False:
            path = get_yt_sandbox_path("env/{}".format(name))
            exec_spec[name] = path
            to_download[path] = filename or ytc.get_unique_file_path(ytc.work_path(), 'yt_vanilla_{}_{}'.format(command_name, name))

    runner_log_dst = get_yt_sandbox_path('env/runner_log')
    exec_spec['runner_log'] = runner_log_dst
    to_download[runner_log_dst] = runner_log_path or ytc.path.get_unique_file_path(ytc.test_output_path(), 'yt_vanilla_wrapper_{}.log'.format(command_name))

    exec_spec['op_spec'] = _get_spec(
        default={
            'max_failed_job_count': 2,
            # Preventing dangling operations in case when test is get killed - see https://st.yandex-team.ru/DEVTOOLS-4753#1539181402000
            'time_limit': int(1000 * 60 * 60 * 1.5)  # 1.5h (milliseconds)
        },
        user=operation_spec,
    )
    exec_spec['task_spec'] = _get_spec(
        default={'memory_limit': 3 * (1024 ** 3)},
        user=task_spec,
        mandatory={'job_count': 1},
    )
    if init_func:
        exec_spec['init_func'] = _dump_func(init_func)
    if fini_func:
        exec_spec['fini_func'] = _dump_func(fini_func)

    exec_spec_path = _dump_spec(spec_filename, exec_spec)

    executor_cmd = [
        test_tool_bin, 'yt_vanilla_execute',
        '--spec-file', exec_spec_path,
        '--log-path', operation_log_path or ytc.path.get_unique_file_path(ytc.test_output_path(), 'yt_vanilla_op_{}.log'.format(command_name)),
    ]
    if yt_proxy:
        executor_cmd += ['--yt-proxy', yt_proxy]
    if output_result_path:
        executor_cmd += ['--output-path', output_result_path]
    if runner_log2stderr:
        executor_cmd += ['--log2stderr']
        executor_stderr = sys.stderr
    else:
        executor_stderr = None

    res = ytc.execute(
        executor_cmd,
        stderr=executor_stderr,
        collect_cores=collect_cores,
        wait=False,
        check_sanitizer=check_sanitizer,
        executor=executor,
    )
    if wait:
        res.wait(exec_spec, orig_command, stdout, stderr, check_exit_code, timeout)
    return res
Esempio n. 12
0
def test_run():
    runner = yc.build_path('contrib/python/pytz/tests/runner/doctest.pytz')
    yc.execute(runner)
Esempio n. 13
0
def execute(
    command,
    check_exit_code=True,
    shell=False,
    timeout=None,
    cwd=None,
    env=None,
    stdin=None,
    stdout=None,
    stderr=None,
    creationflags=0,
    wait=True,
    process_progress_listener=None,
    close_fds=False,
    collect_cores=True,
    check_sanitizer=True,
    preexec_fn=None,
    on_timeout=None,
    # YT specific
    input_data=None,
    output_data=None,
    data_mine_strategy=None,
    operation_spec=None,
    task_spec=None,
    yt_proxy=None,
    output_result_path=None,
):
    """
    Executes a command on the YT. Listed below are options whose behavior is different from yatest.common.execute
    :param command: can be a list of arguments or a string (all paths matched prefixes yatest.common.*_path will be fixed)
    :param timeout: timeout for command executed on the YT (doesn't take into account the time spent for execution preparation - uploading/downloading data, etc)
    :param cwd: ignored
    :param env: all paths matched prefixes yatest.common.*_path will be fixed
    :param stdin: stdin will be fully read before execution and uploaded to the YT
    :param stdout: stdout will be available after the execution of the command on the YT. Set to False to skip downloading
    :param stderr: same as stdout
    :param process_progress_listener: ignored
    :param preexec_fn: ignored
    :param on_timeout: ignored
    :param input_data: map of input files/dirs required for command run which will be uploaded to YT (local path -> YT sandbox path)
    :param output_data: map of output files/dirs which will be downloaded from YT after command execution (YT sandbox path -> local path)
                        Take into account that runner will call os.path.dirname(YT sandbox path) to create intermediate directories for every entry
    :param data_mine_strategy: allows to provide own function to mine input data and fix cmd. For more info take a look at *_mine_strategy()
    :param operation_spec: YT operation spec
    :param task_spec: YT task spec
    :param output_result_path: specify path to output archive. Used for test purposes
    :return: Execution object
    """
    test_tool_bin = _get_test_tool_bin()
    data_mine_strategy = data_mine_strategy or default_mine_strategy

    if not wait:
        raise NotImplementedError()

    env = _get_fixed_env(env, data_mine_strategy)

    orig_command = command
    command, to_upload, to_download = _fix_user_data(command, shell,
                                                     input_data, output_data,
                                                     data_mine_strategy)
    command_name = ytc.process.get_command_name(command)

    exec_spec = {
        'env': env,
        'command': command,
        'timeout': timeout,
        'input_data': to_upload,
        'output_data': to_download,
    }

    if stdin:
        if isinstance(stdin, basestring):
            stdin_path = stdin
        else:
            logger.deubg('Reading stdin')
            with tempfile.NamedTemporaryFile(delete=False) as afile:
                afile.write(stdin.read())
                stdin_path = afile.name
        to_upload[stdin_path] = 'env/stdin'
        exec_spec['stdin'] = 'env/stdin'

    for stream, name in [
        (True, 'meta'),
        (stdout, 'stdout'),
        (stderr, 'stderr'),
    ]:
        if stream is not False:
            path = 'env/{}'.format(name)
            exec_spec[name] = path
            to_download[path] = ytc.get_unique_file_path(
                ytc.work_path(), 'yt_vanilla_{}_{}'.format(command_name, name))

    runner_log_dst = 'env/runner_log'
    exec_spec['runner_log'] = runner_log_dst
    to_download[runner_log_dst] = ytc.path.get_unique_file_path(
        ytc.test_output_path(),
        'yt_vanilla_wrapper_{}.log'.format(command_name))

    exec_spec['op_spec'] = _get_spec(
        default={'max_failed_job_count': 2},
        user=operation_spec,
    )
    exec_spec['task_spec'] = _get_spec(
        default={'memory_limit': 3 * (1024**3)},
        user=task_spec,
        mandatory={'job_count': 1},
    )

    exec_spec_path = _dump_spec(exec_spec)

    executor_cmd = [
        test_tool_bin,
        'yt_vanilla_execute',
        '--spec-file',
        exec_spec_path,
        '--log-path',
        ytc.path.get_unique_file_path(
            ytc.test_output_path(),
            'yt_vanilla_op_{}.log'.format(command_name)),
    ]
    if yt_proxy:
        executor_cmd += ['--yt-proxy', yt_proxy]
    if output_result_path:
        executor_cmd += ['--output-path', output_result_path]

    res = ytc.execute(executor_cmd,
                      collect_cores=collect_cores,
                      wait=False,
                      check_sanitizer=check_sanitizer)
    if wait:
        res.wait(check_exit_code=True)

    _patch_result(res, exec_spec, orig_command, stdout, stderr,
                  check_exit_code, timeout)
    return res