Example #1
0
def main():
    """Driver program to execute chores."""
    parser = argparse.ArgumentParser(description='Run chores')
    parser.add_argument('file', help='A chore YAML file.')
    parser.add_argument('--cred', help='cred file', default="/etc/odind/odin-cred.yml")
    parser.add_argument('--label', required=True)
    parser.add_argument('--modules', nargs='+', default=[], help='Addon modules to load')
    args = parser.parse_args()

    for addon in args.modules:
        import_user_module(addon)

    cred_params = read_config_stream(args.cred)
    store = create_store_backend(**cred_params['jobs_db'])
    config = read_config_stream(args.file)
    previous = store.get_previous(args.label)
    parent_details = store.get_parent(args.label)
    results = {prev_job_details['name']: prev_job_details for prev_job_details in previous}
    results['parent'] = parent_details
    results = run_chores(config, results)
    results = {'chore_context': results}
    LOGGER.info(results)
    job_details = store.get(args.label)
    job_details.update(results)
    store.set(job_details)
Example #2
0
def main():
    parser = argparse.ArgumentParser(description='Create an Embeddings Service')
    parser.add_argument('--config', help='JSON Configuration for an experiment', type=convert_path, default="$MEAD_CONFIG")
    parser.add_argument('--settings', help='JSON Configuration for mead', default='config/mead-settings.json', type=convert_path)
    parser.add_argument('--datasets', help='json library of dataset labels', default='config/datasets.json', type=convert_path)
    parser.add_argument('--embeddings', help='json library of embeddings', default='config/embeddings.json', type=convert_path)
    parser.add_argument('--backend', help='The deep learning backend to use')
    parser.add_argument('--export', help='Should this create a export bundle?', default=True, type=str2bool)
    parser.add_argument('--exporter_type', help="exporter type (default 'default')", default=None)
    parser.add_argument('--model_version', help='model_version', default=None)
    parser.add_argument('--output_dir', help="output dir (default './models')", default=None)
    parser.add_argument('--project', help='Name of project, used in path first', default=None)
    parser.add_argument('--name', help='Name of the model, used second in the path', default=None)
    parser.add_argument('--is_remote', help='if True, separate items for remote server and client. If False bundle everything together (default True)', default=None)
    args, reporting_args = parser.parse_known_args()

    config_params = read_config_stream(args.config)
    try:
        args.settings = read_config_stream(args.settings)
    except:
        logger.warning('Warning: no mead-settings file was found at [{}]'.format(args.settings))
        args.settings = {}
    args.datasets = read_config_stream(args.datasets)
    args.embeddings = read_config_stream(args.embeddings)

    if args.backend is not None:
        config_params['backend'] = normalize_backend(args.backend)

    os.environ['CUDA_VISIBLE_DEVICES'] = ""
    os.environ['NV_GPU'] = ""
    if 'gpus' in config_params.get('train', {}):
        del config_params['train']['gpus']

    config_params['task'] = 'servable-embeddings'
    task = mead.Task.get_task_specific(config_params['task'], args.settings)
    task.read_config(config_params, args.datasets, reporting_args=[], config_file=deepcopy(config_params))
    task.initialize(args.embeddings)

    to_zip = False if args.export else True
    task.train(None, zip_model=to_zip)

    if args.export:
        model = os.path.abspath(task.get_basedir())
        output_dir, project, name, model_version, exporter_type, return_labels, is_remote = get_export_params(
            config_params.get('export', {}),
            args.output_dir,
            args.project, args.name,
            args.model_version,
            args.exporter_type,
            False,
            args.is_remote,
        )
        feature_exporter_field_map = create_feature_exporter_field_map(config_params['features'])
        exporter = create_exporter(task, exporter_type, return_labels=return_labels,
                                   feature_exporter_field_map=feature_exporter_field_map)
        exporter.run(model, output_dir, project, name, model_version, remote=is_remote)
Example #3
0
def main():
    parser = argparse.ArgumentParser(description='Train a text classifier')
    parser.add_argument('--config',
                        help='JSON Configuration for an experiment',
                        type=convert_path,
                        default="$MEAD_CONFIG")
    parser.add_argument('--settings',
                        help='JSON Configuration for mead',
                        default='config/mead-settings.json',
                        type=convert_path)
    parser.add_argument('--datasets',
                        help='json library of dataset labels',
                        default='config/datasets.json',
                        type=convert_path)
    parser.add_argument('--embeddings',
                        help='json library of embeddings',
                        default='config/embeddings.json',
                        type=convert_path)
    parser.add_argument('--logging',
                        help='json file for logging',
                        default='config/logging.json',
                        type=convert_path)
    parser.add_argument('--task',
                        help='task to run',
                        choices=['classify', 'tagger', 'seq2seq', 'lm'])
    parser.add_argument('--gpus',
                        help='Number of GPUs (defaults to 1)',
                        type=int)
    parser.add_argument('--reporting', help='reporting hooks', nargs='+')
    args, reporting_args = parser.parse_known_args()

    config_params = read_config_stream(args.config)
    args.settings = read_config_stream(args.settings)
    args.datasets = read_config_stream(args.datasets)
    args.embeddings = read_config_stream(args.embeddings)
    args.logging = read_config_stream(args.logging)

    if args.gpus is not None:
        config_params['model']['gpus'] = args.gpus
    if args.reporting is not None:
        reporting = parse_extra_args(args.reporting, reporting_args)
        config_params['reporting'] = reporting

    task_name = config_params.get(
        'task', 'classify') if args.task is None else args.task
    print('Task: [{}]'.format(task_name))
    task = mead.Task.get_task_specific(task_name, args.logging, args.settings)
    task.read_config(config_params,
                     args.datasets,
                     reporting_args=reporting_args,
                     config_file=args.config)
    task.initialize(args.embeddings)
    task.train()
Example #4
0
def main():
    parser = argparse.ArgumentParser(description="Get the mead hash of a config.")
    parser.add_argument('config', help='JSON/YML Configuration for an experiment: local file or remote URL', type=convert_path, default="$MEAD_CONFIG")
    args = parser.parse_args()

    config = read_config_stream(args.config)
    print(hash_config(config))
Example #5
0
def main():
    """Clean up a job
    """
    parser = argparse.ArgumentParser(description='Clean up a job')
    parser.add_argument('work', help='Job')
    parser.add_argument('--cred',
                        help='cred file',
                        type=convert_path,
                        required=True)
    parser.add_argument('--db',
                        action='store_true',
                        help="Also remove from the jobs db")
    parser.add_argument('--fs',
                        action='store_true',
                        help="Also remove from the filesystem")
    parser.add_argument('--data_dir', help="The root of where data is saved.")

    args = parser.parse_args()

    cred_params = read_config_stream(args.cred)
    store = create_store_backend(**cred_params['jobs_db'])

    cleaned = cleanup(args.work,
                      store,
                      purge_db=args.db,
                      purge_fs=args.fs,
                      data_dir=args.data_dir)
    print("Results of this request:")
    print_table(cleaned)
Example #6
0
def main():
    """Take in a job and get back its status

    TODO: support passing in specific Job IDs and regex
    """
    parser = argparse.ArgumentParser(description='Get job status')
    parser.add_argument('work', help='Pipeline or Job')
    parser.add_argument('--cred',
                        help='cred file',
                        type=convert_path,
                        required=True)
    parser.add_argument('--format', help='Format the output', default="human")
    parser.add_argument('--columns',
                        nargs="+",
                        default=[],
                        help="Columns of the status to show.")
    parser.add_argument('--all',
                        action='store_true',
                        help="Show all columns of the status message.")
    args = parser.parse_args()
    cred_params = read_config_stream(args.cred)
    store = create_store_backend(**cred_params['jobs_db'])
    work = store.parents_like(args.work)
    if not work:
        print('No job found')
    for parent in work:
        try:
            show_status(*get_status(parent, store),
                        columns=set(args.columns),
                        all_cols=args.all)
        except Exception:
            print('ERROR: Skipping {}'.format(parent))
Example #7
0
def main():
    parser = argparse.ArgumentParser(description='Train a text classifier')
    parser.add_argument('--config', help='JSON Configuration for an experiment', type=convert_path, default="$MEAD_CONFIG")
    parser.add_argument('--settings', help='JSON Configuration for mead', default='config/mead-settings.json', type=convert_path)
    parser.add_argument('--datasets', help='json library of dataset labels', default='config/datasets.json', type=convert_path)
    parser.add_argument('--embeddings', help='json library of embeddings', default='config/embeddings.json', type=convert_path)
    parser.add_argument('--logging', help='json file for logging', default='config/logging.json', type=convert_path)
    parser.add_argument('--task', help='task to run', choices=['classify', 'tagger', 'seq2seq', 'lm'])
    parser.add_argument('--gpus', help='Number of GPUs (defaults to number available)', type=int, default=-1)
    parser.add_argument('--basedir', help='Override the base directory where models are stored', type=str)
    parser.add_argument('--reporting', help='reporting hooks', nargs='+')
    parser.add_argument('--backend', help='The deep learning backend to use')
    parser.add_argument('--checkpoint', help='Restart training from this checkpoint')
    args, reporting_args = parser.parse_known_args()

    args.logging = read_config_stream(args.logging)
    configure_logger(args.logging)

    config_params = read_config_stream(args.config)
    try:
        args.settings = read_config_stream(args.settings)
    except:
        logger.warning('Warning: no mead-settings file was found at [{}]'.format(args.settings))
        args.settings = {}
    args.datasets = read_config_stream(args.datasets)
    args.embeddings = read_config_stream(args.embeddings)

    if args.gpus is not None:
        config_params['model']['gpus'] = args.gpus

    if args.basedir is not None:
        config_params['basedir'] = args.basedir

    if args.backend is not None:
        config_params['backend'] = normalize_backend(args.backend)

    cmd_hooks = args.reporting if args.reporting is not None else []
    config_hooks = config_params.get('reporting') if config_params.get('reporting') is not None else []
    reporting = parse_extra_args(set(chain(cmd_hooks, config_hooks)), reporting_args)
    config_params['reporting'] = reporting

    task_name = config_params.get('task', 'classify') if args.task is None else args.task
    logger.info('Task: [{}]'.format(task_name))
    task = mead.Task.get_task_specific(task_name, args.settings)
    task.read_config(config_params, args.datasets, reporting_args=reporting_args, config_file=deepcopy(config_params))
    task.initialize(args.embeddings)
    task.train(args.checkpoint)
Example #8
0
def main():
    parser = argparse.ArgumentParser(description='Train a text classifier')
    parser.add_argument('--config', help='JSON Configuration for an experiment', type=convert_path, default="$MEAD_CONFIG")
    parser.add_argument('--settings', help='JSON Configuration for mead', default='config/mead-settings.json', type=convert_path)
    parser.add_argument('--datasets', help='json library of dataset labels', default='config/datasets.json', type=convert_path)
    parser.add_argument('--embeddings', help='json library of embeddings', default='config/embeddings.json', type=convert_path)
    parser.add_argument('--logging', help='json file for logging', default='config/logging.json', type=convert_path)
    parser.add_argument('--task', help='task to run', choices=['classify', 'tagger', 'seq2seq', 'lm'])
    parser.add_argument('--backend', help='The deep learning backend to use')

    parser.add_argument('--num_iters', type=int, default=5)
    parser.add_argument('--max_lr', type=float, default=10)
    parser.add_argument('--smooth', type=float, default=0.05)
    parser.add_argument('--use_val', type=str2bool, default=False)
    parser.add_argument('--log', type=str2bool, default=True)
    parser.add_argument('--diverge_threshold', type=int, default=5)

    args, reporting_args = parser.parse_known_args()

    config_params = read_config_stream(args.config)
    try:
        args.settings = read_config_stream(args.settings)
    except:
        print('Warning: no mead-settings file was found at [{}]'.format(args.config))
        args.settings = {}
    args.datasets = read_config_stream(args.datasets)
    args.embeddings = read_config_stream(args.embeddings)
    args.logging = read_config_stream(args.logging)

    if args.backend is not None:
        config_params['backend'] = normalize_backend(args.backend)

    config_params['reporting'] = {}
    config_params['train']['fit_func'] = "lr-find"
    config_params['train']['lr_scheduler_type'] = 'warmup_linear'
    config_params['train']['smooth_beta'] = args.smooth
    config_params['train']['use_val'] = args.use_val
    config_params['train']['log_scale'] = args.log
    config_params['train']['diverge_threshold'] = args.diverge_threshold
    config_params['train']['be'] = config_params['backend']

    task_name = config_params.get('task', 'classify') if args.task is None else args.task
    print('Task: [{}]'.format(task_name))
    task = mead.Task.get_task_specific(task_name, args.logging, args.settings)
    task.read_config(config_params, args.datasets, reporting_args=reporting_args, config_file=deepcopy(config_params))
    task.initialize(args.embeddings)
    task.train()
Example #9
0
def main():
    parser = argparse.ArgumentParser(description='Train a text classifier')
    parser.add_argument('--config', help='JSON Configuration for an experiment', type=convert_path, default="$MEAD_CONFIG")
    parser.add_argument('--settings', help='JSON Configuration for mead', default='config/mead-settings.json', type=convert_path)
    parser.add_argument('--datasets', help='json library of dataset labels', default='config/datasets.json', type=convert_path)
    parser.add_argument('--embeddings', help='json library of embeddings', default='config/embeddings.json', type=convert_path)
    parser.add_argument('--logging', help='json file for logging', default='config/logging.json', type=convert_path)
    parser.add_argument('--task', help='task to run', choices=['classify', 'tagger', 'seq2seq', 'lm'])
    parser.add_argument('--backend', help='The deep learning backend to use')

    parser.add_argument('--num_iters', type=int, default=5)
    parser.add_argument('--max_lr', type=float, default=10)
    parser.add_argument('--smooth', type=float, default=0.05)
    parser.add_argument('--use_val', type=str2bool, default=False)
    parser.add_argument('--log', type=str2bool, default=True)
    parser.add_argument('--diverge_threshold', type=int, default=5)

    args, reporting_args = parser.parse_known_args()

    config_params = read_config_stream(args.config)
    try:
        args.settings = read_config_stream(args.settings)
    except:
        print('Warning: no mead-settings file was found at [{}]'.format(args.config))
        args.settings = {}
    args.datasets = read_config_stream(args.datasets)
    args.embeddings = read_config_stream(args.embeddings)
    args.logging = read_config_stream(args.logging)

    if args.backend is not None:
        config_params['backend'] = normalize_backend(args.backend)

    config_params['reporting'] = {}
    config_params['train']['fit_func'] = "lr-find"
    config_params['train']['lr_scheduler_type'] = 'warmup_linear'
    config_params['train']['smooth_beta'] = args.smooth
    config_params['train']['use_val'] = args.use_val
    config_params['train']['log_scale'] = args.log
    config_params['train']['diverge_threshold'] = args.diverge_threshold
    config_params['train']['be'] = config_params['backend']

    task_name = config_params.get('task', 'classify') if args.task is None else args.task
    print('Task: [{}]'.format(task_name))
    task = mead.Task.get_task_specific(task_name, args.logging, args.settings)
    task.read_config(config_params, args.datasets, reporting_args=reporting_args, config_file=deepcopy(config_params))
    task.initialize(args.embeddings)
    task.train()
Example #10
0
def test_read_config_stream_env(env, gold_data):
    data = read_config_stream(env)
    assert data == gold_data
Example #11
0
def test_read_config_stream_file():
    file_name = os.path.join(data_loc, 'test_json.json')
    with mock.patch('baseline.utils.read_config_file') as read_patch:
        read_config_stream(file_name)
    read_patch.assert_called_once_with(file_name)
Example #12
0
def main():
    parser = argparse.ArgumentParser(description='Train a text classifier')
    parser.add_argument('--config',
                        help='JSON Configuration for an experiment',
                        type=convert_path,
                        default="$MEAD_CONFIG")
    parser.add_argument('--settings',
                        help='JSON Configuration for mead',
                        default='config/mead-settings.json',
                        type=convert_path)
    parser.add_argument('--datasets',
                        help='json library of dataset labels',
                        default='config/datasets.json',
                        type=convert_path)
    parser.add_argument('--embeddings',
                        help='json library of embeddings',
                        default='config/embeddings.json',
                        type=convert_path)
    parser.add_argument('--logging',
                        help='json file for logging',
                        default='config/logging.json',
                        type=convert_path)
    parser.add_argument('--task',
                        help='task to run',
                        choices=['classify', 'tagger', 'seq2seq', 'lm'])
    parser.add_argument('--gpus',
                        help='Number of GPUs (defaults to number available)',
                        type=int,
                        default=-1)
    parser.add_argument('--reporting', help='reporting hooks', nargs='+')
    parser.add_argument('--backend', help='The deep learning backend to use')
    args, reporting_args = parser.parse_known_args()

    config_params = read_config_stream(args.config)
    try:
        args.settings = read_config_stream(args.settings)
    except:
        print('Warning: no mead-settings file was found at [{}]'.format(
            args.config))
        args.settings = {}
    args.datasets = read_config_stream(args.datasets)
    args.embeddings = read_config_stream(args.embeddings)
    args.logging = read_config_stream(args.logging)

    if args.gpus is not None:
        config_params['model']['gpus'] = args.gpus

    if args.backend is not None:
        config_params['backend'] = normalize_backend(args.backend)

    cmd_hooks = args.reporting if args.reporting is not None else []
    config_hooks = config_params.get('reporting') if config_params.get(
        'reporting') is not None else []
    reporting = parse_extra_args(set(chain(cmd_hooks, config_hooks)),
                                 reporting_args)
    config_params['reporting'] = reporting

    task_name = config_params.get(
        'task', 'classify') if args.task is None else args.task
    print('Task: [{}]'.format(task_name))
    task = mead.Task.get_task_specific(task_name, args.logging, args.settings)
    task.read_config(config_params,
                     args.datasets,
                     reporting_args=reporting_args,
                     config_file=deepcopy(config_params))
    task.initialize(args.embeddings)
    task.train()
Example #13
0
def main():
    parser = argparse.ArgumentParser(description='Train a text classifier')
    parser.add_argument('--config',
                        help='configuration for an experiment',
                        type=convert_path,
                        default="$MEAD_CONFIG")
    parser.add_argument('--settings',
                        help='configuration for mead',
                        default=DEFAULT_SETTINGS_LOC,
                        type=convert_path)
    parser.add_argument('--datasets',
                        help='index of dataset labels',
                        type=convert_path)
    parser.add_argument('--modules',
                        help='modules to load',
                        default=[],
                        nargs='+',
                        required=False)
    parser.add_argument('--mod_train_file', help='override the training set')
    parser.add_argument('--mod_valid_file', help='override the validation set')
    parser.add_argument('--mod_test_file', help='override the test set')
    parser.add_argument('--embeddings',
                        help='index of embeddings',
                        type=convert_path)
    parser.add_argument('--logging',
                        help='config file for logging',
                        default=DEFAULT_LOGGING_LOC,
                        type=convert_path)
    parser.add_argument('--task',
                        help='task to run',
                        choices=['classify', 'tagger', 'seq2seq', 'lm'])
    parser.add_argument('--gpus',
                        help='Number of GPUs (defaults to number available)',
                        type=int,
                        default=-1)
    parser.add_argument(
        '--basedir',
        help='Override the base directory where models are stored',
        type=str)
    parser.add_argument('--reporting', help='reporting hooks', nargs='+')
    parser.add_argument('--backend', help='The deep learning backend to use')
    parser.add_argument('--checkpoint',
                        help='Restart training from this checkpoint')
    args, reporting_args = parser.parse_known_args()

    config_params = read_config_stream(args.config)

    if args.basedir is not None:
        config_params['basedir'] = args.basedir

    task_name = config_params.get(
        'task', 'classify') if args.task is None else args.task

    args.logging = read_config_stream(args.logging)
    configure_logger(args.logging,
                     config_params.get('basedir', './{}'.format(task_name)))

    try:
        args.settings = read_config_stream(args.settings)
    except:
        logger.warning(
            'Warning: no mead-settings file was found at [{}]'.format(
                args.settings))
        args.settings = {}

    args.datasets = args.datasets if args.datasets else args.settings.get(
        'datasets', convert_path(DEFAULT_DATASETS_LOC))
    args.datasets = read_config_stream(args.datasets)
    if args.mod_train_file or args.mod_valid_file or args.mod_test_file:
        logging.warning(
            'Warning: overriding the training/valid/test data with user-specified files'
            ' different from what was specified in the dataset index.  Creating a new key for this entry'
        )
        update_datasets(args.datasets, config_params, args.mod_train_file,
                        args.mod_valid_file, args.mod_test_file)

    args.embeddings = args.embeddings if args.embeddings else args.settings.get(
        'embeddings', convert_path(DEFAULT_EMBEDDINGS_LOC))
    args.embeddings = read_config_stream(args.embeddings)

    if args.gpus is not None:
        config_params['model']['gpus'] = args.gpus

    if args.backend is None and 'backend' in args.settings:
        args.backend = args.settings['backend']
    if args.backend is not None:
        config_params['backend'] = normalize_backend(args.backend)

    config_params['modules'] = list(
        set(chain(config_params.get('modules', []), args.modules)))

    cmd_hooks = args.reporting if args.reporting is not None else []
    config_hooks = config_params.get('reporting') if config_params.get(
        'reporting') is not None else []
    reporting = parse_extra_args(set(chain(cmd_hooks, config_hooks)),
                                 reporting_args)
    config_params['reporting'] = reporting

    logger.info('Task: [{}]'.format(task_name))
    task = mead.Task.get_task_specific(task_name, args.settings)
    task.read_config(config_params,
                     args.datasets,
                     reporting_args=reporting_args)
    task.initialize(args.embeddings)
    task.train(args.checkpoint)
Example #14
0
def main():
    parser = argparse.ArgumentParser(description='Export a model')
    parser.add_argument('--config',
                        help='configuration for an experiment',
                        required=True,
                        type=convert_path)
    parser.add_argument('--settings',
                        help='configuration for mead',
                        required=False,
                        default=DEFAULT_SETTINGS_LOC,
                        type=convert_path)
    parser.add_argument('--modules',
                        help='modules to load',
                        default=[],
                        nargs='+',
                        required=False)
    parser.add_argument('--logging',
                        help='json file for logging',
                        default=DEFAULT_LOGGING_LOC,
                        type=convert_path)
    parser.add_argument('--task',
                        help='task to run',
                        choices=['classify', 'tagger', 'seq2seq', 'lm'])
    parser.add_argument('--exporter_type',
                        help="exporter type (default 'default')",
                        default=None)
    parser.add_argument(
        '--return_labels',
        help='if true, the exported model returns actual labels else '
        'the indices for labels vocab (default False)',
        default=None)
    parser.add_argument('--model',
                        help='model name',
                        required=True,
                        type=unzip_files)
    parser.add_argument('--model_version', help='model_version', default=None)
    parser.add_argument('--output_dir',
                        help="output dir (default './models')",
                        default=None)
    parser.add_argument('--project',
                        help='Name of project, used in path first',
                        default=None)
    parser.add_argument('--name',
                        help='Name of the model, used second in the path',
                        default=None)
    parser.add_argument('--beam', help='beam_width', default=30, type=int)
    parser.add_argument(
        '--is_remote',
        help=
        'if True, separate items for remote server and client. If False bundle everything together (default True)',
        default=None)

    args = parser.parse_args()
    configure_logger(args.logging)

    config_params = read_config_stream(args.config)

    try:
        args.settings = read_config_stream(args.settings)
    except Exception:
        logger.warning(
            'Warning: no mead-settings file was found at [{}]'.format(
                args.settings))
        args.settings = {}

    task_name = config_params.get(
        'task', 'classify') if args.task is None else args.task

    # Remove multigpu references
    os.environ['CUDA_VISIBLE_DEVICES'] = ""
    os.environ['NV_GPU'] = ""
    if 'gpus' in config_params.get('train', {}):
        del config_params['train']['gpus']

    if task_name == 'seq2seq' and 'beam' not in config_params:
        config_params['beam'] = args.beam

    config_params['modules'] = config_params.get('modules', []) + args.modules

    task = mead.Task.get_task_specific(task_name, args.settings)

    output_dir, project, name, model_version, exporter_type, return_labels, is_remote = get_export_params(
        config_params.get('export', {}),
        args.output_dir,
        args.project,
        args.name,
        args.model_version,
        args.exporter_type,
        args.return_labels,
        args.is_remote,
    )
    # Here we reuse code in `.read_config` which needs a dataset index (when used with mead-train)
    # but when used with mead-export it is not needed. This is a dummy dataset index that will work
    # It means we don't need to pass it in
    datasets = [{'label': config_params['dataset']}]
    task.read_config(config_params, datasets, exporter_type=exporter_type)
    feature_exporter_field_map = create_feature_exporter_field_map(
        config_params['features'])
    exporter = create_exporter(
        task,
        exporter_type,
        return_labels=return_labels,
        feature_exporter_field_map=feature_exporter_field_map)
    exporter.run(args.model,
                 output_dir,
                 project,
                 name,
                 model_version,
                 remote=is_remote)
Example #15
0
def main():  # pylint: disable=too-many-statements
    """Select a model for export if one meets the criteria
    """
    parser = argparse.ArgumentParser(
        description='Select a model for export if one meets the criteria')
    parser.add_argument('--cred',
                        help='cred file',
                        default="/etc/odind/odin-cred.yml")
    parser.add_argument('--type', help='Policy type', required=True)
    parser.add_argument(
        '--label',
        required=True,
        help=
        "The odin task label for this selecting task, used to access the store"
    )
    parser.add_argument('--models', required=True, nargs='+')
    parser.add_argument(
        '--dataset',
        help="(deprecated) The name of the dataset to evaluate",
        required=False)
    parser.add_argument('--task', required=False)
    parser.add_argument('--metric', default='acc')
    parser.add_argument('--user_cmp', default=None)
    parser.add_argument(
        '--config',
        help='(deprecated) JSON Configuration for an experiment',
        type=convert_path)
    parser.add_argument(
        '--settings',
        help='JSON Configuration for mead',
        required=False,
        default='config/mead-settings.json',
        type=convert_path,
    )
    parser.add_argument('--datasets',
                        help='(deprecated) json library of dataset labels',
                        type=convert_path)
    parser.add_argument('--logging',
                        help='json file for logging',
                        default='config/logging.json',
                        type=convert_path)
    parser.add_argument('--data_root', help='Data directory', default='/data')
    parser.add_argument('--xpctl_api_url', help='XPCTL api', type=str)

    args = parser.parse_args()

    if args.datasets is not None:
        LOGGER.warning(
            "--datasets is unused and unneeded for calls to `odin-select`")
    if args.config is not None:
        LOGGER.warning(
            "--config is unused and unneeded for calls to `odin-select`")
    if args.dataset is not None:
        LOGGER.warning(
            "--dataset is unused and unneeded for calls to `odin-select`")

    cred_params = read_config_stream(args.cred)

    store = create_store_backend(**cred_params['jobs_db'])
    args.store = store

    xpctl_url = args.xpctl_api_url if args.xpctl_api_url is not None else cred_params[
        'reporting_db']['host']
    args.api = xpctl_client(host=xpctl_url)

    params = vars(args)
    del params['cred']
    policy = create_export_policy(args.type, params)
    results = policy.select(args.models)
    if results:
        print(results)

        job_details = store.get(args.label)
        outputs = job_details.get("outputs", {})
        if outputs is None:
            outputs = {}
            job_details['outputs'] = outputs
        outputs.update(results)
        store.set(job_details)
Example #16
0
def test_read_config_stream_str(gold_data):
    input_ = json.dumps(gold_data)
    data = read_config_stream(input_)
    assert data == gold_data
Example #17
0
def test_read_config_stream_env(env, gold_data):
    data = read_config_stream(env)
    assert data == gold_data
Example #18
0
def test_read_config_stream_str(gold_data):
    input_ = json.dumps(gold_data)
    data = read_config_stream(input_)
    assert data == gold_data
Example #19
0
def main():
    parser = argparse.ArgumentParser(description='Export a model')
    parser.add_argument('--config', help='configuration for an experiment', required=True, type=convert_path)
    parser.add_argument('--settings', help='configuration for mead', required=False, default=DEFAULT_SETTINGS_LOC, type=convert_path)
    parser.add_argument('--modules', help='modules to load', default=[], nargs='+', required=False)
    parser.add_argument('--datasets', help='json library of dataset labels')
    parser.add_argument('--vecs', help='index of vectorizers: local file, remote URL or hub mead-ml/ref', default='config/vecs.json', type=convert_path)
    parser.add_argument('--logging', help='json file for logging', default='config/logging.json', type=convert_path)
    parser.add_argument('--task', help='task to run', choices=['classify', 'tagger', 'seq2seq', 'lm'])
    parser.add_argument('--exporter_type', help="exporter type (default 'default')", default=None)
    parser.add_argument('--return_labels', help='if true, the exported model returns actual labels else '
                                                'the indices for labels vocab (default False)', default=None)
    parser.add_argument('--model', help='model name', required=True, type=unzip_files)
    parser.add_argument('--model_version', help='model_version', default=None)
    parser.add_argument('--output_dir', help="output dir (default './models')", default=None)
    parser.add_argument('--project', help='Name of project, used in path first', default=None)
    parser.add_argument('--name', help='Name of the model, used second in the path', default=None)
    parser.add_argument('--beam', help='beam_width', default=30, type=int)
    parser.add_argument('--nbest_input', help='Is the input to this model N-best', default=False, type=str2bool)
    parser.add_argument('--is_remote', help='if True, separate items for remote server and client. If False bundle everything together (default True)', default=None)
    parser.add_argument('--backend', help='The deep learning backend to use')
    parser.add_argument('--reporting', help='reporting hooks', nargs='+')
    parser.add_argument('--use_version', help='Should we use the version?', type=str2bool, default=True)
    parser.add_argument('--use_all_features', help='If a feature is found via vectorizer and not in embeddings, should we include it?', type=str2bool, default=False)
    parser.add_argument('--zip', help='Should we zip the results?', type=str2bool, default=False)

    args, overrides = parser.parse_known_args()
    configure_logger(args.logging)

    config_params = read_config_stream(args.config)
    config_params = parse_and_merge_overrides(config_params, overrides, pre='x')

    try:
        args.settings = read_config_stream(args.settings)
    except Exception:
        logger.warning('Warning: no mead-settings file was found at [{}]'.format(args.settings))
        args.settings = {}

    task_name = config_params.get('task', 'classify') if args.task is None else args.task

    # Remove multigpu references
    os.environ['CUDA_VISIBLE_DEVICES'] = ""
    os.environ['NV_GPU'] = ""
    if 'gpus' in config_params.get('train', {}):
        del config_params['train']['gpus']

    if task_name == 'seq2seq' and 'beam' not in config_params:
         config_params['beam'] = args.beam

    config_params['modules'] = config_params.get('modules', []) + args.modules
    if args.backend is not None:
        config_params['backend'] = normalize_backend(args.backend)

    cmd_hooks = args.reporting if args.reporting is not None else []
    config_hooks = config_params.get('reporting') if config_params.get('reporting') is not None else []
    reporting = parse_extra_args(set(chain(cmd_hooks, config_hooks)), overrides)
    config_params['reporting'] = reporting

    args.vecs = read_config_stream(args.vecs)

    task = mead.Task.get_task_specific(task_name, args.settings)

    output_dir, project, name, model_version, exporter_type, return_labels, is_remote = get_export_params(
        config_params.get('export', {}),
        args.output_dir,
        args.project, args.name,
        args.model_version,
        args.exporter_type,
        args.return_labels,
        args.is_remote,
    )
    # Here we reuse code in `.read_config` which needs a dataset index (when used with mead-train)
    # but when used with mead-export it is not needed. This is a dummy dataset index that will work
    # It means we don't need to pass it in
    datasets = [{'label': config_params['dataset']}]
    task.read_config(config_params, datasets, args.vecs, exporter_type=exporter_type)
    feature_exporter_field_map = create_feature_exporter_field_map(config_params['features'])
    exporter = create_exporter(task, exporter_type, return_labels=return_labels,
                               feature_exporter_field_map=feature_exporter_field_map,
                               nbest_input=args.nbest_input)
    exporter.run(args.model, output_dir, project, name, model_version,
                 remote=is_remote, use_version=args.use_version, zip_results=args.zip, use_all_features=args.use_all_features)
Example #20
0
def main():
    parser = argparse.ArgumentParser(description='Evaluate on a dataset')
    parser.add_argument('--model', required=True)
    parser.add_argument('--dataset', required=True)
    parser.add_argument('--settings',
                        default=DEFAULT_SETTINGS_LOC,
                        type=convert_path)
    parser.add_argument('--modules', nargs="+", default=[])
    parser.add_argument('--reporting', nargs="+")
    parser.add_argument('--logging',
                        default=DEFAULT_LOGGING_LOC,
                        type=convert_path)
    parser.add_argument('--task',
                        default='classify',
                        choices={'classify', 'tagger', 'seq2seq', 'lm'})
    parser.add_argument('--backend', default='tf')
    parser.add_argument('--reader', default='default')
    parser.add_argument('--trim', default=True, type=str2bool)
    parser.add_argument('--batchsz', default=50)
    parser.add_argument('--trainer', default='default')
    parser.add_argument('--output', default=None)
    parser.add_argument('--remote')
    parser.add_argument(
        '--features',
        help=
        '(optional) features in the format feature_name:index (column # in conll) or '
        'just feature names (assumed sequential)',
        default=[],
        nargs='+',
    )
    parser.add_argument('--device', default='cpu')
    # our parse_extra_args doesn't handle lists :/
    parser.add_argument('--pair_suffix', nargs='+', default=[])
    args, extra_args = parser.parse_known_args()

    args.batchsz = args.batchsz if args.task != 'lm' else 1

    named_fields = {
        str(v): k
        for k, v in feature_index_mapping(args.features).items()
    }

    reader_options = parse_extra_args(['reader'], extra_args)['reader']
    reader_options = process_reader_options(reader_options)
    verbose_options = parse_extra_args(['verbose'], extra_args)['verbose']
    trainer_options = parse_extra_args(['trainer'], extra_args)['trainer']
    if 'span_type' not in trainer_options:
        trainer_options['span_type'] = 'iobes'
    model_options = parse_extra_args(['model'], extra_args)['model']

    args.logging = read_config_stream(args.logging)
    configure_logger(args.logging)

    try:
        args.settings = read_config_stream(args.settings)
    except:
        logger.warning(
            'Warning: no mead-settings file was found at [{}]'.format(
                args.settings))
        args.settings = {}

    backend = Backend(args.backend)
    backend.load(args.task)
    for module in args.modules:
        import_user_module(module)

    reporting = parse_extra_args(
        args.reporting if args.reporting is not None else [], extra_args)
    reporting_hooks, reporting = merge_reporting_with_settings(
        reporting, args.settings)
    reporting_fns = [
        x.step for x in create_reporting(reporting_hooks, reporting,
                                         {'task': args.task})
    ]

    service = get_service(args.task)
    model = service.load(args.model,
                         backend=args.backend,
                         remote=args.remote,
                         device=args.device,
                         **model_options)

    vectorizers = get_vectorizers(args.task, model)

    reader = create_reader(args.task,
                           vectorizers,
                           args.trim,
                           type=args.reader,
                           named_fields=named_fields,
                           pair_suffix=args.pair_suffix,
                           **reader_options)
    reader = patch_reader(args.task, model, reader)

    data, txts = load_data(args.task, reader, model, args.dataset,
                           args.batchsz)

    if args.task == 'seq2seq':
        trainer_options['tgt_rlut'] = {
            v: k
            for k, v in model.tgt_vocab.items()
        }

    trainer = get_trainer(model,
                          args.trainer,
                          verbose_options,
                          backend.name,
                          gpu=args.device != 'cpu',
                          nogpu=args.device == 'cpu',
                          **trainer_options)
    if args.task == 'classify':
        _ = trainer.test(data,
                         reporting_fns=reporting_fns,
                         phase='Test',
                         verbose=verbose_options,
                         output=args.output,
                         txts=txts,
                         **model_options)
    elif args.task == 'tagger':
        _ = trainer.test(data,
                         reporting_fns=reporting_fns,
                         phase='Test',
                         verbose=verbose_options,
                         conll_output=args.output,
                         txts=txts,
                         **model_options)
    else:
        _ = trainer.test(data,
                         reporting_fns=reporting_fns,
                         phase='Test',
                         verbose=verbose_options,
                         **model_options)
Example #21
0
def test_read_config_stream_file():
    file_name = os.path.join(data_loc, 'test_json.json')
    with mock.patch('baseline.utils.read_config_file') as read_patch:
        read_config_stream(file_name)
    read_patch.assert_called_once_with(file_name)