def test_experiment_sequence_combiner(sequence_encoder, csv_filename): config = { "input_features": [ sequence_feature(name="seq1", min_len=5, max_len=5, encoder=sequence_encoder, cell_type="lstm", reduce_output=None), sequence_feature(name="seq2", min_len=5, max_len=5, encoder=sequence_encoder, cell_type="lstm", reduce_output=None), category_feature(vocab_size=5), ], "output_features": [category_feature(reduce_input="sum", vocab_size=5)], "training": { "epochs": 2 }, "combiner": { "type": "sequence", "encoder": "rnn", "main_sequence_feature": "seq1", "reduce_output": None, }, } # Generate test data rel_path = generate_data(config["input_features"], config["output_features"], csv_filename) exp_dir_name = experiment_cli( config, skip_save_processed_input=False, skip_save_progress=True, skip_save_unprocessed_output=True, dataset=rel_path, ) shutil.rmtree(exp_dir_name, ignore_errors=True)
def test_experiment_dataset_formats(data_format, csv_filename): # primary focus of this test is to determine if exceptions are # raised for different data set formats and in_memory setting input_features = [number_feature(), category_feature()] output_features = [category_feature(), number_feature()] config = { "input_features": input_features, "output_features": output_features, "combiner": { "type": "concat", "output_size": 14 }, "preprocessing": {}, TRAINER: { "epochs": 2 }, } # setup training data format to test raw_data = generate_data(input_features, output_features, csv_filename) training_set_metadata = None if data_format == "hdf5": # hdf5 format training_set, _, _, training_set_metadata = preprocess_for_training( config, dataset=raw_data) dataset_to_use = training_set.data_hdf5_fp else: dataset_to_use = create_data_set_to_use(data_format, raw_data) # define Ludwig model model = LudwigModel(config=config) model.train(dataset=dataset_to_use, training_set_metadata=training_set_metadata, random_seed=default_random_seed) # # run functions with the specified data format model.evaluate(dataset=dataset_to_use) model.predict(dataset=dataset_to_use)
def test_basic_image_feature(num_channels, image_source, in_memory, skip_save_processed_input, tmpdir): # Image Inputs image_dest_folder = os.path.join(tmpdir, "generated_images") input_features = [ image_feature( folder=image_dest_folder, encoder="stacked_cnn", preprocessing={ "in_memory": in_memory, "height": 12, "width": 12, "num_channels": num_channels, "num_processes": 5, }, output_size=16, num_filters=8, ) ] output_features = [category_feature(vocab_size=2, reduce_input="sum")] rel_path = generate_data(input_features, output_features, os.path.join(tmpdir, "dataset.csv")) if image_source == "file": # use images from file run_experiment(input_features, output_features, dataset=rel_path, skip_save_processed_input=skip_save_processed_input) else: # import image from file and store in dataframe as tensors. df = pd.read_csv(rel_path) image_feature_name = input_features[0]["name"] df[image_feature_name] = df[image_feature_name].apply( lambda x: torchvision.io.read_image(x)) run_experiment(input_features, output_features, dataset=df, skip_save_processed_input=skip_save_processed_input)
def run_hyperopt_executor( sampler, executor, csv_filename, validate_output_feature=False, validation_metric=None, ): config = _get_config(sampler, executor) rel_path = generate_data(config["input_features"], config["output_features"], csv_filename) config = merge_with_defaults(config) hyperopt_config = config["hyperopt"] if validate_output_feature: hyperopt_config["output_feature"] = config["output_features"][0]["name"] if validation_metric: hyperopt_config["validation_metric"] = validation_metric update_hyperopt_params_with_defaults(hyperopt_config) parameters = hyperopt_config["parameters"] if sampler.get("search_alg", {}).get("type", "") == "bohb": # bohb does not support grid_search search space del parameters["utterance.cell_type"] split = hyperopt_config["split"] output_feature = hyperopt_config["output_feature"] metric = hyperopt_config["metric"] goal = hyperopt_config["goal"] hyperopt_sampler = get_build_hyperopt_sampler(sampler["type"])(goal, parameters, **sampler) hyperopt_executor = get_build_hyperopt_executor(executor["type"])( hyperopt_sampler, output_feature, metric, split, **executor ) hyperopt_executor.execute( config, dataset=rel_path, backend="local", )
def test_experiment_sequence_combiner_with_reduction_fails(csv_filename): config = { "input_features": [ sequence_feature( name="seq1", min_len=5, max_len=5, encoder="embed", cell_type="lstm", reduce_output="sum", ), sequence_feature( name="seq2", min_len=5, max_len=5, encoder="embed", cell_type="lstm", reduce_output="sum", ), category_feature(vocab_size=5), ], "output_features": [category_feature(reduce_input="sum", vocab_size=5)], TRAINER: { "epochs": 2 }, "combiner": { "type": "sequence", "encoder": "rnn", "main_sequence_feature": "seq1", "reduce_output": None, }, } # Generate test data rel_path = generate_data(config["input_features"], config["output_features"], csv_filename) # Encoding sequence features with 'embed' should fail with SequenceConcatCombiner, since at least one sequence # feature should be rank 3. with pytest.raises(ValueError): run_experiment(config=config, dataset=rel_path)
def test_api_callbacks(tmpdir, csv_filename, epochs, batch_size, num_examples, steps_per_checkpoint): mock_callback = mock.Mock(wraps=Callback()) steps_per_epoch = num_examples / batch_size total_checkpoints = (steps_per_epoch / steps_per_checkpoint) * epochs total_batches = epochs * (num_examples / batch_size) input_features = [sequence_feature(reduce_output="sum")] output_features = [category_feature(vocab_size=5, reduce_input="sum")] config = { "input_features": input_features, "output_features": output_features, "combiner": {"type": "concat", "output_size": 14}, TRAINER: {"epochs": epochs, "batch_size": batch_size, "steps_per_checkpoint": steps_per_checkpoint}, } model = LudwigModel(config, callbacks=[mock_callback]) data_csv = generate_data( input_features, output_features, os.path.join(tmpdir, csv_filename), num_examples=num_examples ) val_csv = shutil.copyfile(data_csv, os.path.join(tmpdir, "validation.csv")) test_csv = shutil.copyfile(data_csv, os.path.join(tmpdir, "test.csv")) model.train(training_set=data_csv, validation_set=val_csv, test_set=test_csv) assert mock_callback.on_epoch_start.call_count == epochs assert mock_callback.on_epoch_end.call_count == epochs assert mock_callback.should_early_stop.call_count == total_checkpoints assert mock_callback.on_validation_start.call_count == total_checkpoints assert mock_callback.on_validation_end.call_count == total_checkpoints assert mock_callback.on_test_start.call_count == total_checkpoints assert mock_callback.on_test_end.call_count == total_checkpoints assert mock_callback.on_batch_start.call_count == total_batches assert mock_callback.on_batch_end.call_count == total_batches assert mock_callback.on_eval_end.call_count == total_checkpoints assert mock_callback.on_eval_start.call_count == total_checkpoints
def test_basic_image_feature(num_channels, image_source, in_memory, skip_save_processed_input, csv_filename): # Image Inputs image_dest_folder = os.path.join(os.getcwd(), 'generated_images') input_features = [ image_feature(folder=image_dest_folder, encoder='stacked_cnn', preprocessing={ 'in_memory': in_memory, 'height': 12, 'width': 12, 'num_channels': num_channels, 'num_processes': 5 }, fc_size=16, num_filters=8) ] output_features = [category_feature(vocab_size=2, reduce_input='sum')] rel_path = generate_data(input_features, output_features, csv_filename) if image_source == 'file': # use images from file run_experiment(input_features, output_features, dataset=rel_path, skip_save_processed_input=skip_save_processed_input) else: # import image from file and store in dataframe as ndarrays df = pd.read_csv(rel_path) image_feature_name = input_features[0]['name'] df[image_feature_name] = df[image_feature_name].apply( lambda x: imread(x)) run_experiment(input_features, output_features, dataset=df, skip_save_processed_input=skip_save_processed_input) # Delete the temporary data created shutil.rmtree(image_dest_folder, ignore_errors=True)
def run(csv_filename): # Image Inputs image_dest_folder = os.path.join(os.getcwd(), "generated_images") # Inputs & Outputs input_features = [image_feature(folder=image_dest_folder)] output_features = [category_feature()] data_csv = generate_data(input_features, output_features, csv_filename) config = { "input_features": input_features, "output_features": output_features, "combiner": {"type": "concat", "output_size": 14}, TRAINER: {"epochs": 2}, } callback = CometCallback() model = LudwigModel(config, callbacks=[callback]) output_dir = None # Wrap these methods so we can check that they were called callback.on_train_init = Mock(side_effect=callback.on_train_init) callback.on_train_start = Mock(side_effect=callback.on_train_start) with patch("comet_ml.Experiment.log_asset_data") as mock_log_asset_data: try: # Training with csv _, _, output_dir = model.train(dataset=data_csv) model.predict(dataset=data_csv) finally: if output_dir: shutil.rmtree(output_dir, ignore_errors=True) # Verify that the experiment was created successfully assert callback.cometml_experiment is not None # Check that these methods were called at least once callback.on_train_init.assert_called() callback.on_train_start.assert_called() # Check that we ran `train_model`, which calls into `log_assert_data`, successfully mock_log_asset_data.assert_called()
def test_experiment_seq_seq1(csv_filename): # Single Sequence input, single sequence output # Only the following encoders are working input_features_template = Template( '[{name: utterance, type: text, reduce_output: null,' ' vocab_size: 10, min_len: 10, max_len: 10, encoder: ${encoder}}]') output_features = '[{name: iob, type: text, reduce_input: null,' \ ' vocab_size: 3, min_len: 10, max_len: 10,' \ ' decoder: tagger}]' # Generate test data rel_path = generate_data(input_features_template.substitute(encoder='rnn'), output_features, csv_filename) encoders2 = ['embed', 'rnn', 'cnnrnn'] for encoder in encoders2: logging.info('Test 2, Encoder: {0}'.format(encoder)) input_features = input_features_template.substitute(encoder=encoder) run_experiment(input_features, output_features, data_csv=rel_path)
def test_api_skip_parameters_predict( tmpdir, csv_filename, skip_save_unprocessed_output, skip_save_predictions, ): # Single sequence input, single category output input_features = [category_feature(vocab_size=5)] output_features = [category_feature(vocab_size=5)] # Generate test data rel_path = generate_data(input_features, output_features, os.path.join(tmpdir, csv_filename)) run_api_commands( input_features, output_features, data_csv=rel_path, output_dir=tmpdir, skip_save_unprocessed_output=skip_save_unprocessed_output, skip_save_predictions=skip_save_predictions, )
def test_sequence_tagger(enc_cell_type, attention, csv_filename): # Define input and output features input_features = [sequence_feature(max_len=10, encoder="rnn", cell_type=enc_cell_type, reduce_output=None)] output_features = [ sequence_feature( max_len=10, decoder="tagger", attention=attention, reduce_input=None, ) ] # Generate test data rel_path = generate_data(input_features, output_features, csv_filename) # setup sampled softmax loss output_features[0].update({"loss": {"type": "sampled_softmax_cross_entropy", "negative_samples": 7}}) # run the experiment run_experiment(input_features, output_features, dataset=rel_path)
def test_sequence_tagger(enc_cell_type, attention, csv_filename): # Define input and output features input_features = [ sequence_feature(max_len=10, encoder="rnn", cell_type=enc_cell_type, reduce_output=None) ] output_features = [ sequence_feature(max_len=10, decoder="tagger", attention=attention, reduce_input=None) ] # Generate test data rel_path = generate_data(input_features, output_features, csv_filename) # run the experiment run_experiment(input_features, output_features, dataset=rel_path)
def test_sequence_tagger(enc_cell_type, csv_filename): # Define input and output features input_features = [ sequence_feature(max_len=10, encoder='rnn', cell_type='lstm', reduce_output=None) ] output_features = [ sequence_feature(max_len=10, decoder='tagger', reduce_input=None) ] # Generate test data rel_path = generate_data(input_features, output_features, csv_filename) # setup encoder specification input_features[0]['cell_type'] = enc_cell_type # run the experiment run_experiment(input_features, output_features, data_csv=rel_path)
def test_experiment_attention(csv_filename): # Machine translation with attention input_features = [ sequence_feature(encoder='rnn', cell_type='lstm', max_len=10) ] output_features = [ sequence_feature( max_len=10, cell_type='lstm', decoder='generator', attention='bahdanau' ) ] # Generate test data rel_path = generate_data(input_features, output_features, csv_filename) for attention in ['bahdanau', 'luong']: output_features[0]['attention'] = attention run_experiment(input_features, output_features, data_csv=rel_path)
def run_test_gbm_category(tmpdir, backend_config): """Test that the GBM model can train and predict a categorical output (multiclass classification).""" input_features = [number_feature(), category_feature(reduce_output="sum")] vocab_size = 3 output_feature = category_feature(vocab_size=vocab_size) output_features = [output_feature] csv_filename = os.path.join(tmpdir, "training.csv") dataset_filename = generate_data(input_features, output_features, csv_filename, num_examples=100) config = { MODEL_TYPE: "gbm", "input_features": input_features, "output_features": output_features, TRAINER: { "num_boost_round": 2 }, } model = LudwigModel(config, backend=backend_config) _, _, output_directory = model.train( dataset=dataset_filename, output_directory=tmpdir, skip_save_processed_input=True, skip_save_progress=True, skip_save_unprocessed_output=True, skip_save_log=True, ) model.load(os.path.join(tmpdir, "api_experiment_run", "model")) preds, _ = model.predict(dataset=dataset_filename, output_directory=output_directory) prob_col = preds[output_feature["name"] + "_probabilities"] if backend_config["type"] == "ray": prob_col = prob_col.compute() assert len(prob_col.iloc[0]) == (vocab_size + 1) assert prob_col.apply(sum).mean() == pytest.approx(1.0)
def __init__(self, csv_filename): self.csv_file = csv_filename self.model = None self.input_features = [ text_feature(vocab_size=10, min_len=1, representation='sparse'), category_feature(vocab_size=10) ] self.output_features = [ category_feature(vocab_size=2, reduce_input='sum')] encoder = 'parallel_cnn' data_csv = generate_data( self.input_features, self.output_features, self.csv_file ) self.input_features[0]['encoder'] = encoder self.setup_model() test_df, train_df, val_df = obtain_df_splits(data_csv) self.train_stats = self.model.train( data_train_df=train_df, data_validation_df=val_df ) self.test_stats_full = self.model.test( data_df=test_df ) self.output_feature_name = self.output_features[0]['name'] # probabilities need to be list of lists containing each row data # from the probability columns # ref: https://uber.github.io/ludwig/api/#test - Return num_probs = self.output_features[0]['vocab_size'] self.probability = self.test_stats_full[0].iloc[:, 1:(num_probs+2)].values self.ground_truth_metadata = self.model.train_set_metadata target_predictions = test_df[self.output_feature_name] self.ground_truth = np.asarray([ self.ground_truth_metadata[self.output_feature_name]['str2idx'][test_row] for test_row in target_predictions ]) self.prediction_raw = self.test_stats_full[0].iloc[:, 0].tolist() self.prediction = np.asarray([ self.ground_truth_metadata[self.output_feature_name]['str2idx'][pred_row] for pred_row in self.prediction_raw])
def test_add_feature_data(feature_type, tmpdir): preprocessing_params = { "audio_file_length_limit_in_s": 3.0, "missing_value_strategy": BACKFILL, "in_memory": True, "padding_value": 0, "norm": "per_file", "type": feature_type, "window_length_in_s": 0.04, "window_shift_in_s": 0.02, "num_fft_points": None, "window_type": "hamming", "num_filter_bands": 80, } audio_dest_folder = os.path.join(tmpdir, "generated_audio") audio_feature_config = audio_feature(audio_dest_folder, preprocessing=preprocessing_params) data_df_path = generate_data( [audio_feature_config], [category_feature(vocab_size=5, reduce_input="sum")], os.path.join(tmpdir, "data.csv"), num_examples=10, ) data_df = pd.read_csv(data_df_path) metadata = { audio_feature_config["name"]: AudioFeatureMixin.get_feature_meta( data_df[audio_feature_config["name"]], preprocessing_params, LOCAL_BACKEND ) } proc_df = {} AudioFeatureMixin.add_feature_data( feature_config=audio_feature_config, input_df=data_df, proc_df=proc_df, metadata=metadata, preprocessing_parameters=preprocessing_params, backend=LOCAL_BACKEND, skip_save_processed_input=False, ) assert len(proc_df[audio_feature_config[PROC_COLUMN]]) == 10
def test_server_integration(csv_filename): # Image Inputs image_dest_folder = os.path.join(os.getcwd(), 'generated_images') # Resnet encoder input_features = [ image_feature(folder=image_dest_folder, encoder='resnet', preprocessing={ 'in_memory': True, 'height': 8, 'width': 8, 'num_channels': 3 }, fc_size=16, num_filters=8), text_feature(encoder='embed', min_len=1), numerical_feature(normalization='zscore') ] output_features = [ categorical_feature(vocab_size=2, reduce_input='sum'), numerical_feature() ] rel_path = generate_data(input_features, output_features, csv_filename) model = train_model(input_features, output_features, data_csv=rel_path) app = server(model) client = TestClient(app) response = client.post('/predict') assert response.json() == ALL_FEATURES_PRESENT_ERROR data_df = read_csv(rel_path) data, files = convert_to_form(data_df.T.to_dict()[0]) response = client.post('/predict', data=data, files=files) response_keys = sorted(list(response.json().keys())) assert response_keys == sorted(output_keys_for(output_features)) shutil.rmtree(model.exp_dir_name, ignore_errors=True) shutil.rmtree(image_dest_folder)
def test_remote_training_set(tmpdir, fs_protocol): output_directory = f"{fs_protocol}://{tmpdir}" input_features = [sequence_feature(reduce_output="sum")] output_features = [category_feature(vocab_size=2, reduce_input="sum")] csv_filename = os.path.join(tmpdir, "training.csv") data_csv = generate_data(input_features, output_features, csv_filename) val_csv = shutil.copyfile(data_csv, os.path.join(tmpdir, "validation.csv")) test_csv = shutil.copyfile(data_csv, os.path.join(tmpdir, "test.csv")) data_csv = f"{fs_protocol}://{os.path.abspath(data_csv)}" val_csv = f"{fs_protocol}://{os.path.abspath(val_csv)}" test_csv = f"{fs_protocol}://{os.path.abspath(test_csv)}" config = { "input_features": input_features, "output_features": output_features, "combiner": {"type": "concat", "output_size": 14}, TRAINER: {"epochs": 2}, } config_path = os.path.join(tmpdir, "config.yaml") with open(config_path, "w") as f: yaml.dump(config, f) config_path = f"{fs_protocol}://{config_path}" backend_config = { "type": "local", } backend = initialize_backend(backend_config) model = LudwigModel(config_path, backend=backend) _, _, output_directory = model.train( training_set=data_csv, validation_set=val_csv, test_set=test_csv, output_directory=output_directory ) model.predict(dataset=test_csv, output_directory=output_directory) # Train again, this time the cache will be used # Resume from the remote output directory model.train(training_set=data_csv, validation_set=val_csv, test_set=test_csv, model_resume_path=output_directory)
def test_missing_values_drop_rows(csv_filename, tmpdir): data_csv_path = os.path.join(tmpdir, csv_filename) kwargs = {PREPROCESSING: {"missing_value_strategy": DROP_ROW}} input_features = [ number_feature(), binary_feature(), category_feature(vocab_size=3), ] output_features = [ binary_feature(**kwargs), number_feature(**kwargs), category_feature(vocab_size=3, **kwargs), sequence_feature(vocab_size=3, **kwargs), text_feature(vocab_size=3, **kwargs), set_feature(vocab_size=3, **kwargs), vector_feature(), ] backend = LocalTestBackend() config = { "input_features": input_features, "output_features": output_features, TRAINER: { "epochs": 2 } } training_data_csv_path = generate_data(input_features, output_features, data_csv_path) df = pd.read_csv(training_data_csv_path) # set 10% of values to NaN nan_percent = 0.1 ix = [(row, col) for row in range(df.shape[0]) for col in range(df.shape[1])] for row, col in random.sample(ix, int(round(nan_percent * len(ix)))): df.iat[row, col] = np.nan # run preprocessing ludwig_model = LudwigModel(config, backend=backend) ludwig_model.preprocess(dataset=df)
def test_experiment_tied_weights(csv_filename): # Single sequence input, single category output input_features = [ text_feature(name="text_feature1", min_len=1, encoder="cnnrnn", reduce_output="sum"), text_feature(name="text_feature2", min_len=1, encoder="cnnrnn", reduce_output="sum", tied="text_feature1"), ] output_features = [category_feature(vocab_size=2, reduce_input="sum")] # Generate test data rel_path = generate_data(input_features, output_features, csv_filename) for encoder in ENCODERS: input_features[0]["encoder"] = encoder input_features[1]["encoder"] = encoder run_experiment(input_features, output_features, dataset=rel_path)
def test_experiment_infer_image_metadata(csv_filename: str): # Image Inputs image_dest_folder = os.path.join(os.getcwd(), "generated_images") # Resnet encoder input_features = [ image_feature(folder=image_dest_folder, encoder="stacked_cnn", fc_size=16, num_filters=8), text_feature(encoder="embed", min_len=1), numerical_feature(normalization="zscore"), ] output_features = [category_feature(vocab_size=2, reduce_input="sum"), numerical_feature()] rel_path = generate_data(input_features, output_features, csv_filename) # remove image preprocessing section to force inferring image meta data input_features[0].pop("preprocessing") run_experiment(input_features, output_features, dataset=rel_path) # Delete the temporary data created shutil.rmtree(image_dest_folder)
def test_experiment_tied_weights(csv_filename): # Single sequence input, single category output input_features = [ text_feature(name='text_feature1', min_len=1, encoder='cnnrnn', reduce_output='sum'), text_feature(name='text_feature2', min_len=1, encoder='cnnrnn', reduce_output='sum', tied_weights='text_feature1') ] output_features = [categorical_feature(vocab_size=2, reduce_input='sum')] # Generate test data rel_path = generate_data(input_features, output_features, csv_filename) for encoder in ENCODERS: input_features[0]['encoder'] = encoder input_features[1]['encoder'] = encoder run_experiment(input_features, output_features, data_csv=rel_path)
def test_tune_batch_size_lr(tmpdir): with ray_start(num_cpus=2, num_gpus=None): config = { "input_features": [ number_feature(normalization="zscore"), set_feature(), binary_feature(), ], "output_features": [category_feature(vocab_size=2, reduce_input="sum")], "combiner": {"type": "concat", "output_size": 14}, TRAINER: {"epochs": 2, "batch_size": "auto", "learning_rate": "auto"}, } backend_config = {**RAY_BACKEND_CONFIG} csv_filename = os.path.join(tmpdir, "dataset.csv") dataset_csv = generate_data(config["input_features"], config["output_features"], csv_filename, num_examples=100) dataset_parquet = create_data_set_to_use("parquet", dataset_csv) model = run_api_experiment(config, dataset=dataset_parquet, backend_config=backend_config) assert model.config[TRAINER]["batch_size"] != "auto" assert model.config[TRAINER]["learning_rate"] != "auto"
def test_api_skip_parameters_predict( csv_filename, skip_save_unprocessed_output, skip_save_predictions, ): # Single sequence input, single category output input_features = [category_feature(vocab_size=2)] output_features = [category_feature(vocab_size=2)] with tempfile.TemporaryDirectory() as output_dir: # Generate test data rel_path = generate_data(input_features, output_features, os.path.join(output_dir, csv_filename)) run_api_commands( input_features, output_features, data_csv=rel_path, output_dir=output_dir, skip_save_unprocessed_output=skip_save_unprocessed_output, skip_save_predictions=skip_save_predictions, )
def _prepare_data(csv_filename, config_filename): # Single sequence input, single category output input_features = [sequence_feature(reduce_output='sum')] output_features = [category_feature(vocab_size=2, reduce_input='sum')] # Generate test data dataset_filename = generate_data(input_features, output_features, csv_filename) # generate config file config = { 'input_features': input_features, 'output_features': output_features, 'combiner': {'type': 'concat', 'fc_size': 14}, 'training': {'epochs': 2} } with open(config_filename, 'w') as f: yaml.dump(config, f) return dataset_filename
def test_experiment_seq_seq_model_def_file(csv_filename, yaml_filename): # seq-to-seq test to use model definition file instead of dictionary input_features = [text_feature(reduce_output=None, encoder='embed')] output_features = [ text_feature(reduce_input=None, vocab_size=3, decoder='tagger') ] # Save the model definition to a yaml file model_definition = { 'input_features': input_features, 'output_features': output_features, 'combiner': {'type': 'concat', 'fc_size': 14}, 'training': {'epochs': 2} } with open(yaml_filename, 'w') as yaml_out: yaml.safe_dump(model_definition, yaml_out) rel_path = generate_data(input_features, output_features, csv_filename) run_experiment( None, None, data_csv=rel_path, model_definition_file=yaml_filename )
def test_sequence_generator(enc_encoder, enc_cell_type, dec_cell_type, csv_filename): # Define input and output features input_features = [ sequence_feature(min_len=5, max_len=10, encoder=enc_encoder, cell_type=enc_cell_type) ] output_features = [ sequence_feature(min_len=5, max_len=10, decoder="generator", cell_type=dec_cell_type) ] # Generate test data rel_path = generate_data(input_features, output_features, csv_filename) # run the experiment run_experiment(input_features, output_features, dataset=rel_path)
def test_visual_question_answering(csv_filename): image_dest_folder = os.path.join(os.getcwd(), 'generated_images') input_features = [ image_feature(folder=image_dest_folder, encoder='resnet', preprocessing={ 'in_memory': True, 'height': 8, 'width': 8, 'num_channels': 3 }, fc_size=8, num_filters=8), text_feature(encoder='embed', min_len=1, level='word'), ] output_features = [sequence_feature(decoder='generator', cell_type='lstm')] rel_path = generate_data(input_features, output_features, csv_filename) run_experiment(input_features, output_features, data_csv=rel_path) # Delete the temporary data created shutil.rmtree(image_dest_folder)
def test_torchscript_e2e_date(tmpdir, csv_filename): data_csv_path = os.path.join(tmpdir, csv_filename) input_features = [ date_feature(), ] output_features = [ binary_feature(), ] backend = LocalTestBackend() config = { "input_features": input_features, "output_features": output_features, TRAINER: { "epochs": 2 } } training_data_csv_path = generate_data(input_features, output_features, data_csv_path) validate_torchscript_outputs(tmpdir, config, backend, training_data_csv_path)