def test_experiment_sequence_combiner(csv_filename): # Machine translation with attention input_features_template = Template( '[{name: english, type: sequence, vocab_size: 10,' ' max_len: 10, min_len: 10, encoder: rnn, cell_type: lstm,' ' reduce_output: null}, {name: spanish, type: sequence, vocab_size: 10,' ' max_len: 10, min_len: 10, encoder: rnn, cell_type: lstm,' ' reduce_output: null}, {name: category,' ' type: category, vocab_size: 10} ]') output_features_string = "[{type: category, name: intent, reduce_input:" \ " sum, vocab_size: 10}]" model_definition_template2 = Template( '{input_features: ${input_name}, output_features: ${output_name}, ' 'training: {epochs: 2}, combiner: {type: sequence_concat, encoder: rnn,' 'main_sequence_feature: random_sequence}}') # Generate test data rel_path = generate_data( input_features_template.substitute(encoder1='rnn', encoder2='rnn'), output_features_string, csv_filename) for encoder1, encoder2 in zip(encoders, encoders): input_features = input_features_template.substitute(encoder1=encoder1, encoder2=encoder2) model_definition = model_definition_template2.substitute( input_name=input_features, output_name=output_features_string) experiment(yaml.load(model_definition), skip_save_processed_input=True, skip_save_progress=True, skip_save_unprocessed_output=True, data_csv=rel_path)
def test_experiment_model_resume(csv_filename): # Single sequence input, single category output # Tests saving a model file, loading it to rerun training and predict input_features = [sequence_feature(encoder='rnn', reduce_output='sum')] output_features = [categorical_feature(vocab_size=2, reduce_input='sum')] # Generate test data rel_path = generate_data(input_features, output_features, csv_filename) model_definition = { 'input_features': input_features, 'output_features': output_features, 'combiner': { 'type': 'concat', 'fc_size': 14 }, 'training': { 'epochs': 2 } } exp_dir_name = experiment(model_definition, data_csv=rel_path) logging.info('Experiment Directory: {0}'.format(exp_dir_name)) experiment(model_definition, data_csv=rel_path, model_resume_path=exp_dir_name) full_predict(os.path.join(exp_dir_name, 'model'), data_csv=rel_path) shutil.rmtree(exp_dir_name, ignore_errors=True)
def run_experiment(input_features, output_features, **kwargs): """ Helper method to avoid code repetition in running an experiment. Deletes the data saved to disk after running the experiment :param input_features: list of input feature dictionaries :param output_features: list of output feature dictionaries **kwargs you may also pass extra parameters to the experiment as keyword arguments :return: None """ model_definition = None if input_features is not None and output_features is not None: # This if is necessary so that the caller can call with # model_definition_file (and not model_definition) model_definition = { 'input_features': input_features, 'output_features': output_features, 'combiner': { 'type': 'concat', 'fc_size': 14 }, 'training': {'epochs': 2} } args = { 'model_definition': model_definition, 'skip_save_processed_input': True, 'skip_save_progress': True, 'skip_save_unprocessed_output': True, } args.update(kwargs) exp_dir_name = experiment(**args) shutil.rmtree(exp_dir_name, ignore_errors=True)
def run_experiment(input_features, output_features, data_csv): """ Helper method to avoid code repetition in running an experiment :param input_features: input schema :param output_features: output schema :param data_csv: path to data :return: None """ model_definition = model_definition_template.substitute( input_name=input_features, output_name=output_features) experiment(yaml.load(model_definition), skip_save_processed_input=True, skip_save_progress=True, skip_save_unprocessed_output=True, data_csv=data_csv)
def test_experiment_sequence_combiner(csv_filename): # Sequence combiner input_features = [ sequence_feature( name='english', min_len=5, max_len=5, encoder='rnn', cell_type='lstm', reduce_output=None ), sequence_feature( name='spanish', min_len=5, max_len=5, encoder='rnn', cell_type='lstm', reduce_output=None ), category_feature(vocab_size=5) ] output_features = [ category_feature(reduce_input='sum', vocab_size=5) ] model_definition = { 'input_features': input_features, 'output_features': output_features, 'training': { 'epochs': 2 }, 'combiner': { 'type': 'sequence_concat', 'encoder': 'rnn', 'main_sequence_feature': 'random_sequence' } } # Generate test data rel_path = generate_data(input_features, output_features, csv_filename) for encoder in ENCODERS[:-2]: logger.error('sequence combiner. encoders: {0}, {1}'.format( encoder, encoder )) input_features[0]['encoder'] = encoder input_features[1]['encoder'] = encoder model_definition['input_features'] = input_features exp_dir_name = experiment( model_definition, skip_save_processed_input=False, skip_save_progress=True, skip_save_unprocessed_output=True, data_csv=rel_path ) shutil.rmtree(exp_dir_name, ignore_errors=True)
def test_experiment_model_resume(csv_filename): # Single sequence input, single category output # Tests saving a model file, loading it to rerun training and predict input_features = '[{name: utterance, type: sequence, vocab_size: 10,' \ ' max_len: 10, encoder: rnn, reduce_output: sum}]' output_features = "[{name: intent, type: category, vocab_size: 2," \ " reduce_input: sum}] " # Generate test data rel_path = generate_data(input_features, output_features, csv_filename) model_definition = model_definition_template.substitute( input_name=input_features, output_name=output_features) exp_dir_name = experiment(yaml.load(model_definition), data_csv=rel_path) logging.info('Experiment Directory: {0}'.format(exp_dir_name)) experiment(yaml.load(model_definition), data_csv=rel_path, model_resume_path=exp_dir_name) full_predict(os.path.join(exp_dir_name, 'model'), data_csv=rel_path)
def test_experiment_seq_seq1_model_def_file(csv_filename, yaml_filename): # seq-to-seq test to use model definition file instead of dictionary input_features = ('[{name: utt, type: text, reduce_output: null, ' 'vocab_size: 10, min_len: 10, max_len: 10,' ' encoder: embed}]') output_features = ('[{name: iob, type: text, reduce_input: null, ' 'vocab_size: 3, min_len: 10, max_len: 10,' ' decoder: tagger}]') # Save the model definition to a yaml file model_definition = yaml.load( model_definition_template.substitute(input_name=input_features, output_name=output_features)) with open(yaml_filename, 'w') as yaml_out: yaml.dump(model_definition, yaml_out) rel_path = generate_data(input_features, output_features, csv_filename) experiment(model_definition=None, model_definition_file=yaml_filename, skip_save_processed_input=True, skip_save_progress=True, skip_save_unprocessed_output=True, data_csv=rel_path)