Example #1
0
def test_experiment_model_resume(csv_filename):
    # Single sequence input, single category output
    # Tests saving a model file, loading it to rerun training and predict
    input_features = [sequence_feature(encoder='rnn', reduce_output='sum')]
    output_features = [categorical_feature(vocab_size=2, reduce_input='sum')]
    # Generate test data
    rel_path = generate_data(input_features, output_features, csv_filename)

    model_definition = {
        'input_features': input_features,
        'output_features': output_features,
        'combiner': {
            'type': 'concat',
            'fc_size': 14
        },
        'training': {
            'epochs': 2
        }
    }

    exp_dir_name = experiment(model_definition, data_csv=rel_path)
    logging.info('Experiment Directory: {0}'.format(exp_dir_name))

    experiment(model_definition,
               data_csv=rel_path,
               model_resume_path=exp_dir_name)

    full_predict(os.path.join(exp_dir_name, 'model'), data_csv=rel_path)
    shutil.rmtree(exp_dir_name, ignore_errors=True)
Example #2
0
def test_experiment_model_resume(csv_filename):
    # Single sequence input, single category output
    # Tests saving a model file, loading it to rerun training and predict
    input_features = '[{name: utterance, type: sequence, vocab_size: 10,' \
                     ' max_len: 10, encoder: rnn, reduce_output: sum}]'
    output_features = "[{name: intent, type: category, vocab_size: 2," \
                      " reduce_input: sum}] "

    # Generate test data
    rel_path = generate_data(input_features, output_features, csv_filename)

    model_definition = model_definition_template.substitute(
        input_name=input_features, output_name=output_features)

    exp_dir_name = experiment(yaml.load(model_definition), data_csv=rel_path)
    logging.info('Experiment Directory: {0}'.format(exp_dir_name))

    experiment(yaml.load(model_definition),
               data_csv=rel_path,
               model_resume_path=exp_dir_name)

    full_predict(os.path.join(exp_dir_name, 'model'), data_csv=rel_path)
Example #3
0
def cli(sys_argv):
    parser = argparse.ArgumentParser(
        description='This script loads a pretrained model '
        'and tests its performance by comparing'
        'its predictions with ground truth.',
        prog='ludwig test',
        usage='%(prog)s [options]')

    # ---------------
    # Data parameters
    # ---------------
    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument(
        '--data_csv',
        help='input data CSV file. '
        'If it has a split column, it will be used for splitting '
        '(0: train, 1: validation, 2: test), '
        'otherwise the dataset will be randomly split')
    group.add_argument(
        '--data_hdf5',
        help='input data HDF5 file. It is an intermediate preprocess version of'
        ' the input CSV created the first time a CSV file is used in the '
        'same directory with the same name and a hdf5 extension')
    parser.add_argument(
        '--train_set_metadata_json',
        help='input metadata JSON file. It is an intermediate preprocess file '
        'containing the mappings of the input CSV created the first time '
        'a CSV file is used in the same directory with the same name and '
        'a json extension')

    parser.add_argument('-s',
                        '--split',
                        default=TEST,
                        choices=[TRAINING, VALIDATION, TEST, FULL],
                        help='the split to test the model on')

    # ----------------
    # Model parameters
    # ----------------
    parser.add_argument('-m',
                        '--model_path',
                        help='model to load',
                        required=True)

    # -------------------------
    # Output results parameters
    # -------------------------
    parser.add_argument('-od',
                        '--output_directory',
                        type=str,
                        default='results',
                        help='directory that contains the results')
    parser.add_argument('-ssuo',
                        '--skip_save_unprocessed_output',
                        help='skips saving intermediate NPY output files',
                        action='store_true',
                        default=False)

    # ------------------
    # Generic parameters
    # ------------------
    parser.add_argument('-bs',
                        '--batch_size',
                        type=int,
                        default=128,
                        help='size of batches')

    # ------------------
    # Runtime parameters
    # ------------------
    parser.add_argument('-g',
                        '--gpus',
                        type=int,
                        default=0,
                        help='list of gpu to use')
    parser.add_argument(
        '-gf',
        '--gpu_fraction',
        type=float,
        default=1.0,
        help='fraction of gpu memory to initialize the process with')
    parser.add_argument('-uh',
                        '--use_horovod',
                        action='store_true',
                        default=False,
                        help='uses horovod for distributed training')
    parser.add_argument('-dbg',
                        '--debug',
                        action='store_true',
                        default=False,
                        help='enables debugging mode')
    parser.add_argument(
        '-l',
        '--logging_level',
        default='info',
        help='the level of logging to use',
        choices=['critical', 'error', 'warning', 'info', 'debug', 'notset'])

    args = parser.parse_args(sys_argv)
    args.evaluate_performance = True

    logging.getLogger('ludwig').setLevel(
        logging_level_registry[args.logging_level])
    set_on_master(args.use_horovod)

    if is_on_master():
        print_ludwig('Test', LUDWIG_VERSION)

    full_predict(**vars(args))