예제 #1
0
def test_get_export_input_override():
    project = rand_str()
    name = rand_str()
    output_dir = os.path.join(rand_str(), rand_str())
    model_version = str(random.randint(1, 5))
    exporter_type = rand_str()
    return_labels = random.choice([True, False])
    is_remote = random.choice([True, False])
    config = {
        'project': rand_str(),
        'name': rand_str(),
        'output_dir': os.path.join(rand_str(), rand_str()),
        'model_version': str(random.randint(1, 5)),
        'exporter_type': rand_str(),
        'return_labels': not return_labels,
        'is_remote': not is_remote,
    }
    o, p, n, v, e, l, r = get_export_params(config, output_dir, project, name, model_version, exporter_type, return_labels, is_remote)
    assert o == output_dir
    assert p == project
    assert n == name
    assert v == model_version
    assert e == exporter_type
    assert l == return_labels
    assert r == is_remote
예제 #2
0
def test_get_export_str2bool_called():
    return_labels = random.choice(['true', 'false'])
    is_remote = random.choice(['true', 'false'])
    with patch('mead.utils.str2bool') as b_patch:
        _ = get_export_params({},
                              return_labels=return_labels,
                              is_remote=is_remote)
        assert b_patch.call_args_list == [call(return_labels), call(is_remote)]
예제 #3
0
def test_get_export_defaults():
    o, p, n, v, e, l, r = get_export_params({})
    assert o == './models'
    assert p is None
    assert n is None
    assert v is None
    assert e == 'default'
    assert l is False
    assert r is True
예제 #4
0
def test_get_export_config():
    config = {
        'project': rand_str(),
        'name': rand_str(),
        'output_dir': os.path.join(rand_str(), rand_str()),
        'model_version': str(random.randint(1, 5)),
        'exporter_type': rand_str(),
        'return_labels': random.choice(['true', 'false']),
        'is_remote': random.choice(['true', 'false']),
    }
    o, p, n, v, e, l, r = get_export_params(config)
    assert o == config['output_dir']
    assert p == config['project']
    assert n == config['name']
    assert v == config['model_version']
    assert e == config['exporter_type']
    assert l == str2bool(config['return_labels'])
    assert r == str2bool(config['is_remote'])
예제 #5
0
 def test():
     in_ = make_data()
     c = make_data()
     config = {
         'output_dir': c.dir,
         'project': c.proj,
         'name': c.name,
         'model_version': c.version,
         'exporter_type': rand_str(),
         'return_labels': random.choice(['true', 'false']),
         'is_remote': random.choice(['true', 'false']),
     }
     in_output, gold_output = choice(in_.dir, config, 'output_dir')
     gold_output = './models' if gold_output is None else gold_output
     in_project, gold_project = choice(in_.proj, config, 'project')
     in_name, gold_name = choice(in_.name, config, 'name')
     in_version, gold_version = choice(in_.version, config, 'model_version')
     in_export, gold_export = choice(rand_str(), config, 'exporter_type')
     gold_export = gold_export if gold_export is not None else 'default'
     in_labels, gold_labels = choice(random.choice(['true', 'false']),
                                     config, 'return_labels')
     gold_labels = str2bool(
         gold_labels) if gold_labels is not None else False
     in_remote, gold_remote = choice(random.choice(['true', 'false']),
                                     config, 'is_remote')
     gold_remote = str2bool(
         gold_remote) if gold_remote is not None else True
     o, p, n, v, e, l, r = get_export_params(
         config,
         in_output,
         in_project,
         in_name,
         in_version,
         in_export,
         in_labels,
         in_remote,
     )
     assert o == gold_output
     assert p == gold_project
     assert n == gold_name
     assert v == gold_version
     assert e == gold_export
     assert l == gold_labels
     assert r == gold_remote
예제 #6
0
def test_get_export_output_expanded():
    output_dir = "~/example"
    gold_output_dir = os.path.expanduser(output_dir)
    o, _, _, _, _, _, _ = get_export_params({}, output_dir)
    assert o == gold_output_dir
예제 #7
0
def test_get_export_type_in_config():
    config = {'type': rand_str()}
    _, _, _, _, e, _, _ = get_export_params(config)
    assert e == config['type']
예제 #8
0
def main():
    parser = argparse.ArgumentParser(description='Export a model')
    parser.add_argument('--config', help='configuration for an experiment', required=True, type=convert_path)
    parser.add_argument('--settings', help='configuration for mead', required=False, default=DEFAULT_SETTINGS_LOC, type=convert_path)
    parser.add_argument('--modules', help='modules to load', default=[], nargs='+', required=False)
    parser.add_argument('--datasets', help='json library of dataset labels')
    parser.add_argument('--vecs', help='index of vectorizers: local file, remote URL or hub mead-ml/ref', default='config/vecs.json', type=convert_path)
    parser.add_argument('--logging', help='json file for logging', default='config/logging.json', type=convert_path)
    parser.add_argument('--task', help='task to run', choices=['classify', 'tagger', 'seq2seq', 'lm'])
    parser.add_argument('--exporter_type', help="exporter type (default 'default')", default=None)
    parser.add_argument('--return_labels', help='if true, the exported model returns actual labels else '
                                                'the indices for labels vocab (default False)', default=None)
    parser.add_argument('--model', help='model name', required=True, type=unzip_files)
    parser.add_argument('--model_version', help='model_version', default=None)
    parser.add_argument('--output_dir', help="output dir (default './models')", default=None)
    parser.add_argument('--project', help='Name of project, used in path first', default=None)
    parser.add_argument('--name', help='Name of the model, used second in the path', default=None)
    parser.add_argument('--beam', help='beam_width', default=30, type=int)
    parser.add_argument('--nbest_input', help='Is the input to this model N-best', default=False, type=str2bool)
    parser.add_argument('--is_remote', help='if True, separate items for remote server and client. If False bundle everything together (default True)', default=None)
    parser.add_argument('--backend', help='The deep learning backend to use')
    parser.add_argument('--reporting', help='reporting hooks', nargs='+')
    parser.add_argument('--use_version', help='Should we use the version?', type=str2bool, default=True)
    parser.add_argument('--use_all_features', help='If a feature is found via vectorizer and not in embeddings, should we include it?', type=str2bool, default=False)
    parser.add_argument('--zip', help='Should we zip the results?', type=str2bool, default=False)

    args, overrides = parser.parse_known_args()
    configure_logger(args.logging)

    config_params = read_config_stream(args.config)
    config_params = parse_and_merge_overrides(config_params, overrides, pre='x')

    try:
        args.settings = read_config_stream(args.settings)
    except Exception:
        logger.warning('Warning: no mead-settings file was found at [{}]'.format(args.settings))
        args.settings = {}

    task_name = config_params.get('task', 'classify') if args.task is None else args.task

    # Remove multigpu references
    os.environ['CUDA_VISIBLE_DEVICES'] = ""
    os.environ['NV_GPU'] = ""
    if 'gpus' in config_params.get('train', {}):
        del config_params['train']['gpus']

    if task_name == 'seq2seq' and 'beam' not in config_params:
         config_params['beam'] = args.beam

    config_params['modules'] = config_params.get('modules', []) + args.modules
    if args.backend is not None:
        config_params['backend'] = normalize_backend(args.backend)

    cmd_hooks = args.reporting if args.reporting is not None else []
    config_hooks = config_params.get('reporting') if config_params.get('reporting') is not None else []
    reporting = parse_extra_args(set(chain(cmd_hooks, config_hooks)), overrides)
    config_params['reporting'] = reporting

    args.vecs = read_config_stream(args.vecs)

    task = mead.Task.get_task_specific(task_name, args.settings)

    output_dir, project, name, model_version, exporter_type, return_labels, is_remote = get_export_params(
        config_params.get('export', {}),
        args.output_dir,
        args.project, args.name,
        args.model_version,
        args.exporter_type,
        args.return_labels,
        args.is_remote,
    )
    # Here we reuse code in `.read_config` which needs a dataset index (when used with mead-train)
    # but when used with mead-export it is not needed. This is a dummy dataset index that will work
    # It means we don't need to pass it in
    datasets = [{'label': config_params['dataset']}]
    task.read_config(config_params, datasets, args.vecs, exporter_type=exporter_type)
    feature_exporter_field_map = create_feature_exporter_field_map(config_params['features'])
    exporter = create_exporter(task, exporter_type, return_labels=return_labels,
                               feature_exporter_field_map=feature_exporter_field_map,
                               nbest_input=args.nbest_input)
    exporter.run(args.model, output_dir, project, name, model_version,
                 remote=is_remote, use_version=args.use_version, zip_results=args.zip, use_all_features=args.use_all_features)
def main():
    parser = argparse.ArgumentParser(
        description='Create an Embeddings Service')
    parser.add_argument('--config',
                        help='JSON Configuration for an experiment',
                        type=convert_path,
                        default="$MEAD_CONFIG")
    parser.add_argument('--settings',
                        help='JSON Configuration for mead',
                        default='config/mead-settings.json',
                        type=convert_path)
    parser.add_argument('--datasets',
                        help='json library of dataset labels',
                        default='config/datasets.json',
                        type=convert_path)
    parser.add_argument('--embeddings',
                        help='json library of embeddings',
                        default='config/embeddings.json',
                        type=convert_path)
    parser.add_argument('--backend', help='The deep learning backend to use')
    parser.add_argument('--export',
                        help='Should this create a export bundle?',
                        default=True,
                        type=str2bool)
    parser.add_argument('--exporter_type',
                        help="exporter type (default 'default')",
                        default=None)
    parser.add_argument('--model_version', help='model_version', default=None)
    parser.add_argument('--output_dir',
                        help="output dir (default './models')",
                        default=None)
    parser.add_argument('--project',
                        help='Name of project, used in path first',
                        default=None)
    parser.add_argument('--name',
                        help='Name of the model, used second in the path',
                        default=None)
    parser.add_argument(
        '--is_remote',
        help=
        'if True, separate items for remote server and client. If False bundle everything together (default True)',
        default=None)
    args, reporting_args = parser.parse_known_args()

    config_params = read_config_stream(args.config)
    try:
        args.settings = read_config_stream(args.settings)
    except:
        logger.warning(
            'Warning: no mead-settings file was found at [{}]'.format(
                args.settings))
        args.settings = {}
    args.datasets = read_config_stream(args.datasets)
    args.embeddings = read_config_stream(args.embeddings)

    if args.backend is not None:
        config_params['backend'] = normalize_backend(args.backend)

    os.environ['CUDA_VISIBLE_DEVICES'] = ""
    os.environ['NV_GPU'] = ""
    if 'gpus' in config_params.get('train', {}):
        del config_params['train']['gpus']

    config_params['task'] = 'servable_embeddings'
    task = mead.Task.get_task_specific(config_params['task'], args.settings)
    task.read_config(config_params,
                     args.datasets,
                     reporting_args=[],
                     config_file=deepcopy(config_params))
    task.initialize(args.embeddings)

    to_zip = False if args.export else True
    task.train(None, zip_model=to_zip)

    if args.export:
        model = os.path.abspath(task.get_basedir())
        output_dir, project, name, model_version, exporter_type, return_labels, is_remote = get_export_params(
            config_params.get('export', {}),
            args.output_dir,
            args.project,
            args.name,
            args.model_version,
            args.exporter_type,
            False,
            args.is_remote,
        )
        feature_exporter_field_map = create_feature_exporter_field_map(
            config_params['features'])
        exporter = create_exporter(
            task,
            exporter_type,
            return_labels=return_labels,
            feature_exporter_field_map=feature_exporter_field_map)
        exporter.run(model,
                     output_dir,
                     project,
                     name,
                     model_version,
                     remote=is_remote)
예제 #10
0
파일: export.py 프로젝트: tanthml/baseline
def main():
    parser = argparse.ArgumentParser(description='Export a model')
    parser.add_argument('--config', help='JSON Configuration for an experiment', required=True, type=convert_path)
    parser.add_argument('--settings', help='JSON Configuration for mead', required=False, default='config/mead-settings.json', type=convert_path)
    parser.add_argument('--modules', help='modules to load', default=[], nargs='+', required=False)
    parser.add_argument('--datasets', help='json library of dataset labels', default='config/datasets.json', type=convert_path)
    parser.add_argument('--logging', help='json file for logging', default='config/logging.json', type=convert_path)
    parser.add_argument('--task', help='task to run', choices=['classify', 'tagger', 'seq2seq', 'lm'])
    parser.add_argument('--exporter_type', help="exporter type (default 'default')", default=None)
    parser.add_argument('--return_labels', help='if true, the exported model returns actual labels else '
                                                'the indices for labels vocab (default False)', default=None)
    parser.add_argument('--model', help='model name', required=True, type=unzip_files)
    parser.add_argument('--model_version', help='model_version', default=None)
    parser.add_argument('--output_dir', help="output dir (default './models')", default=None)
    parser.add_argument('--project', help='Name of project, used in path first', default=None)
    parser.add_argument('--name', help='Name of the model, used second in the path', default=None)
    parser.add_argument('--beam', help='beam_width', default=30, type=int)
    parser.add_argument('--is_remote', help='if True, separate items for remote server and client. If False bundle everything together (default True)', default=None)

    args = parser.parse_args()
    configure_logger(args.logging)

    config_params = read_config_file(args.config)

    try:
        args.settings = read_config_stream(args.settings)
    except:
        logger.warning('Warning: no mead-settings file was found at [{}]'.format(args.settings))
        args.settings = {}

    task_name = config_params.get('task', 'classify') if args.task is None else args.task

    # Remove multigpu references
    os.environ['CUDA_VISIBLE_DEVICES'] = ""
    os.environ['NV_GPU'] = ""
    if 'gpus' in config_params.get('train', {}):
        del config_params['train']['gpus']

    if task_name == 'seq2seq' and 'beam' not in config_params:
         config_params['beam'] = args.beam

    config_params['modules'] = config_params.get('modules', []) + args.modules

    task = mead.Task.get_task_specific(task_name, args.settings)

    output_dir, project, name, model_version, exporter_type, return_labels, is_remote = get_export_params(
        config_params.get('export', {}),
        args.output_dir,
        args.project, args.name,
        args.model_version,
        args.exporter_type,
        args.return_labels,
        args.is_remote,
    )
    task.read_config(config_params, args.datasets, exporter_type=exporter_type)
    feature_exporter_field_map = create_feature_exporter_field_map(config_params['features'])
    exporter = create_exporter(task, exporter_type, return_labels=return_labels,
                               feature_exporter_field_map=feature_exporter_field_map)
    exporter.run(args.model, output_dir, project, name, model_version, remote=is_remote)