예제 #1
0
def test_experiment_sequence_combiner(csv_filename):
    # Machine translation with attention
    input_features_template = Template(
        '[{name: english, type: sequence, vocab_size: 10,'
        ' max_len: 10, min_len: 10, encoder: rnn, cell_type: lstm,'
        ' reduce_output: null}, {name: spanish, type: sequence, vocab_size: 10,'
        ' max_len: 10, min_len: 10, encoder: rnn, cell_type: lstm,'
        ' reduce_output: null}, {name: category,'
        ' type: category, vocab_size: 10} ]')

    output_features_string = "[{type: category, name: intent, reduce_input:" \
                             " sum, vocab_size: 10}]"

    model_definition_template2 = Template(
        '{input_features: ${input_name}, output_features: ${output_name}, '
        'training: {epochs: 2}, combiner: {type: sequence_concat, encoder: rnn,'
        'main_sequence_feature: random_sequence}}')

    # Generate test data
    rel_path = generate_data(
        input_features_template.substitute(encoder1='rnn', encoder2='rnn'),
        output_features_string, csv_filename)

    for encoder1, encoder2 in zip(encoders, encoders):
        input_features = input_features_template.substitute(encoder1=encoder1,
                                                            encoder2=encoder2)

        model_definition = model_definition_template2.substitute(
            input_name=input_features, output_name=output_features_string)

        experiment(yaml.load(model_definition),
                   skip_save_processed_input=True,
                   skip_save_progress=True,
                   skip_save_unprocessed_output=True,
                   data_csv=rel_path)
예제 #2
0
def test_experiment_model_resume(csv_filename):
    # Single sequence input, single category output
    # Tests saving a model file, loading it to rerun training and predict
    input_features = [sequence_feature(encoder='rnn', reduce_output='sum')]
    output_features = [categorical_feature(vocab_size=2, reduce_input='sum')]
    # Generate test data
    rel_path = generate_data(input_features, output_features, csv_filename)

    model_definition = {
        'input_features': input_features,
        'output_features': output_features,
        'combiner': {
            'type': 'concat',
            'fc_size': 14
        },
        'training': {
            'epochs': 2
        }
    }

    exp_dir_name = experiment(model_definition, data_csv=rel_path)
    logging.info('Experiment Directory: {0}'.format(exp_dir_name))

    experiment(model_definition,
               data_csv=rel_path,
               model_resume_path=exp_dir_name)

    full_predict(os.path.join(exp_dir_name, 'model'), data_csv=rel_path)
    shutil.rmtree(exp_dir_name, ignore_errors=True)
예제 #3
0
def run_experiment(input_features, output_features, **kwargs):
    """
    Helper method to avoid code repetition in running an experiment. Deletes
    the data saved to disk after running the experiment
    :param input_features: list of input feature dictionaries
    :param output_features: list of output feature dictionaries
    **kwargs you may also pass extra parameters to the experiment as keyword
    arguments
    :return: None
    """
    model_definition = None
    if input_features is not None and output_features is not None:
        # This if is necessary so that the caller can call with
        # model_definition_file (and not model_definition)
        model_definition = {
            'input_features': input_features,
            'output_features': output_features,
            'combiner': {
                'type': 'concat',
                'fc_size': 14
            },
            'training': {'epochs': 2}
        }

    args = {
        'model_definition': model_definition,
        'skip_save_processed_input': True,
        'skip_save_progress': True,
        'skip_save_unprocessed_output': True,
    }
    args.update(kwargs)

    exp_dir_name = experiment(**args)
    shutil.rmtree(exp_dir_name, ignore_errors=True)
예제 #4
0
def run_experiment(input_features, output_features, data_csv):
    """
    Helper method to avoid code repetition in running an experiment
    :param input_features: input schema
    :param output_features: output schema
    :param data_csv: path to data
    :return: None
    """
    model_definition = model_definition_template.substitute(
        input_name=input_features, output_name=output_features)

    experiment(yaml.load(model_definition),
               skip_save_processed_input=True,
               skip_save_progress=True,
               skip_save_unprocessed_output=True,
               data_csv=data_csv)
예제 #5
0
def test_experiment_sequence_combiner(csv_filename):
    # Sequence combiner
    input_features = [
        sequence_feature(
            name='english',
            min_len=5,
            max_len=5,
            encoder='rnn',
            cell_type='lstm',
            reduce_output=None
        ),
        sequence_feature(
            name='spanish',
            min_len=5,
            max_len=5,
            encoder='rnn',
            cell_type='lstm',
            reduce_output=None
        ),
        category_feature(vocab_size=5)
    ]
    output_features = [
        category_feature(reduce_input='sum', vocab_size=5)
    ]

    model_definition = {
        'input_features': input_features,
        'output_features': output_features,
        'training': {
            'epochs': 2
        },
        'combiner': {
            'type': 'sequence_concat',
            'encoder': 'rnn',
            'main_sequence_feature': 'random_sequence'
        }
    }

    # Generate test data
    rel_path = generate_data(input_features, output_features, csv_filename)

    for encoder in ENCODERS[:-2]:
        logger.error('sequence combiner. encoders: {0}, {1}'.format(
            encoder,
            encoder
        ))
        input_features[0]['encoder'] = encoder
        input_features[1]['encoder'] = encoder

        model_definition['input_features'] = input_features

        exp_dir_name = experiment(
            model_definition,
            skip_save_processed_input=False,
            skip_save_progress=True,
            skip_save_unprocessed_output=True,
            data_csv=rel_path
        )
        shutil.rmtree(exp_dir_name, ignore_errors=True)
예제 #6
0
def test_experiment_model_resume(csv_filename):
    # Single sequence input, single category output
    # Tests saving a model file, loading it to rerun training and predict
    input_features = '[{name: utterance, type: sequence, vocab_size: 10,' \
                     ' max_len: 10, encoder: rnn, reduce_output: sum}]'
    output_features = "[{name: intent, type: category, vocab_size: 2," \
                      " reduce_input: sum}] "

    # Generate test data
    rel_path = generate_data(input_features, output_features, csv_filename)

    model_definition = model_definition_template.substitute(
        input_name=input_features, output_name=output_features)

    exp_dir_name = experiment(yaml.load(model_definition), data_csv=rel_path)
    logging.info('Experiment Directory: {0}'.format(exp_dir_name))

    experiment(yaml.load(model_definition),
               data_csv=rel_path,
               model_resume_path=exp_dir_name)

    full_predict(os.path.join(exp_dir_name, 'model'), data_csv=rel_path)
예제 #7
0
def test_experiment_seq_seq1_model_def_file(csv_filename, yaml_filename):
    # seq-to-seq test to use model definition file instead of dictionary
    input_features = ('[{name: utt, type: text, reduce_output: null, '
                      'vocab_size: 10, min_len: 10, max_len: 10,'
                      ' encoder: embed}]')

    output_features = ('[{name: iob, type: text, reduce_input: null, '
                       'vocab_size: 3, min_len: 10, max_len: 10,'
                       ' decoder: tagger}]')

    # Save the model definition to a yaml file
    model_definition = yaml.load(
        model_definition_template.substitute(input_name=input_features,
                                             output_name=output_features))
    with open(yaml_filename, 'w') as yaml_out:
        yaml.dump(model_definition, yaml_out)

    rel_path = generate_data(input_features, output_features, csv_filename)
    experiment(model_definition=None,
               model_definition_file=yaml_filename,
               skip_save_processed_input=True,
               skip_save_progress=True,
               skip_save_unprocessed_output=True,
               data_csv=rel_path)