def test_read_image_from_numpy_array(tmpdir, csv_filename): input_features = [image_feature(os.path.join(tmpdir, "generated_output"))] output_features = [category_feature(vocab_size=5, reduce_input="sum")] config = { "input_features": input_features, "output_features": output_features, TRAINER: { "epochs": 2 }, } data_csv = generate_data(input_features, output_features, os.path.join(tmpdir, csv_filename), num_examples=NUM_EXAMPLES) df = pd.read_csv(data_csv) processed_df_rows = [] for _, row in df.iterrows(): processed_df_rows.append({ input_features[0][NAME]: np.array(Image.open(row[input_features[0][NAME]])), output_features[0][NAME]: row[output_features[0][NAME]], }) df_with_images_as_numpy_arrays = pd.DataFrame(processed_df_rows) model = LudwigModel(config) model.preprocess( df_with_images_as_numpy_arrays, skip_save_processed_input=False, )
def test_validate_with_preprocessing_defaults(): config = { "input_features": [ audio_feature("/tmp/destination_folder", preprocessing=AudioFeatureMixin.preprocessing_defaults), bag_feature(preprocessing=BagFeatureMixin.preprocessing_defaults), binary_feature(preprocessing=BinaryFeatureMixin.preprocessing_defaults), category_feature(preprocessing=CategoryFeatureMixin.preprocessing_defaults), date_feature(preprocessing=DateFeatureMixin.preprocessing_defaults), h3_feature(preprocessing=H3FeatureMixin.preprocessing_defaults), image_feature("/tmp/destination_folder", preprocessing=ImageFeatureMixin.preprocessing_defaults), numerical_feature(preprocessing=NumericalFeatureMixin.preprocessing_defaults), sequence_feature(preprocessing=SequenceFeatureMixin.preprocessing_defaults), set_feature(preprocessing=SetFeatureMixin.preprocessing_defaults), text_feature(preprocessing=TextFeatureMixin.preprocessing_defaults), timeseries_feature(preprocessing=TimeseriesFeatureMixin.preprocessing_defaults), vector_feature(preprocessing=VectorFeatureMixin.preprocessing_defaults), ], "output_features": [{"name": "target", "type": "category"}], "training": { "decay": True, "learning_rate": 0.001, "validation_field": "target", "validation_metric": "accuracy", }, } validate_config(config) config = merge_with_defaults(config) validate_config(config)
def test_experiment_infer_image_metadata(csv_filename: str): # Image Inputs image_dest_folder = os.path.join(os.getcwd(), "generated_images") # Resnet encoder input_features = [ image_feature(folder=image_dest_folder, encoder="stacked_cnn", fc_size=16, num_filters=8), text_feature(encoder="embed", min_len=1), numerical_feature(normalization="zscore"), ] output_features = [ category_feature(vocab_size=2, reduce_input="sum"), numerical_feature() ] rel_path = generate_data(input_features, output_features, csv_filename) # remove image preprocessing section to force inferring image meta data input_features[0].pop("preprocessing") run_experiment(input_features, output_features, dataset=rel_path) # Delete the temporary data created shutil.rmtree(image_dest_folder)
def test_wandb_experiment(csv_filename): # Test W&B integration # add wandb arg and detect flag sys.argv.append('--wandb') ludwig.contrib.contrib_import() # disable sync to cloud os.environ['WANDB_MODE'] = 'dryrun' # Image Inputs image_dest_folder = os.path.join(os.getcwd(), 'generated_images') # Inputs & Outputs input_features = [image_feature(folder=image_dest_folder)] output_features = [category_feature()] rel_path = generate_data(input_features, output_features, csv_filename) # Run experiment run_experiment(input_features, output_features, data_csv=rel_path) # Check a W&B run was created assert wandb.run is not None # End session wandb.join() # Remove instance from contrib_registry ludwig.contrib.contrib_registry['instances'].pop() # Delete the temporary data created shutil.rmtree(image_dest_folder)
def test_config_bad_preprocessing_param(): config = { "input_features": [ sequence_feature(reduce_output="sum", encoder="fake"), image_feature( "/tmp/destination_folder", preprocessing={ "in_memory": True, "height": 12, "width": 12, "num_channels": 3, "tokenizer": "space", }, ), ], "output_features": [category_feature(vocab_size=2, reduce_input="sum")], "combiner": { "type": "concat", "output_size": 14 }, } with pytest.raises(ValidationError, match=r"^'fake' is not one of .*"): validate_config(config)
def test_saved_weights_in_checkpoint(tmpdir): image_dest_folder = os.path.join(tmpdir, "generated_images") input_features = [text_feature(), image_feature(image_dest_folder)] output_features = [category_feature(name="class")] data_csv = generate_data(input_features, output_features, os.path.join(tmpdir, "dataset.csv")) val_csv = shutil.copyfile(data_csv, os.path.join(tmpdir, "validation.csv")) test_csv = shutil.copyfile(data_csv, os.path.join(tmpdir, "test.csv")) config = { "input_features": input_features, "output_features": output_features, } model = LudwigModel(config) _, _, output_dir = model.train( training_set=data_csv, validation_set=val_csv, test_set=test_csv, output_directory=tmpdir ) config_save_path = os.path.join(output_dir, "model", MODEL_HYPERPARAMETERS_FILE_NAME) with open(config_save_path) as f: saved_config = json.load(f) saved_input_features = saved_config["input_features"] for saved_input_feature in saved_input_features: assert "saved_weights_in_checkpoint" in saved_input_feature assert saved_input_feature["saved_weights_in_checkpoint"]
def run(csv_filename): callback = WandbCallback() # Wrap these methods so we can check that they were called callback.on_train_init = Mock(side_effect=callback.on_train_init) callback.on_train_start = Mock(side_effect=callback.on_train_start) # disable sync to cloud os.environ["WANDB_MODE"] = "dryrun" # Image Inputs image_dest_folder = os.path.join(os.getcwd(), "generated_images") try: # Inputs & Outputs input_features = [image_feature(folder=image_dest_folder)] output_features = [category_feature()] rel_path = generate_data(input_features, output_features, csv_filename) # Run experiment run_experiment(input_features, output_features, dataset=rel_path, callbacks=[callback]) finally: # Delete the temporary data created shutil.rmtree(image_dest_folder, ignore_errors=True) # Check that these methods were called at least once callback.on_train_init.assert_called() callback.on_train_start.assert_called()
def test_experiment_infer_image_metadata(tmpdir): # Image Inputs image_dest_folder = os.path.join(tmpdir, "generated_images") # Resnet encoder input_features = [ image_feature(folder=image_dest_folder, encoder="stacked_cnn", output_size=16, num_filters=8), text_feature(encoder="embed", min_len=1), number_feature(normalization="zscore"), ] output_features = [ category_feature(vocab_size=2, reduce_input="sum"), number_feature() ] rel_path = generate_data(input_features, output_features, os.path.join(tmpdir, "dataset.csv")) # remove image preprocessing section to force inferring image meta data input_features[0].pop("preprocessing") run_experiment(input_features, output_features, dataset=rel_path)
def test_experiment_image_inputs(image_params: ImageParams, csv_filename: str): # Image Inputs image_dest_folder = os.path.join(os.getcwd(), "generated_images") # Resnet encoder input_features = [ image_feature( folder=image_dest_folder, encoder="resnet", preprocessing={"in_memory": True, "height": 12, "width": 12, "num_channels": 3, "num_processes": 5}, fc_size=16, num_filters=8, ), text_feature(encoder="embed", min_len=1), numerical_feature(normalization="zscore"), ] output_features = [category_feature(vocab_size=2, reduce_input="sum"), numerical_feature()] input_features[0]["encoder"] = image_params.image_encoder input_features[0]["preprocessing"]["in_memory"] = image_params.in_memory_flag rel_path = generate_data(input_features, output_features, csv_filename) run_experiment( input_features, output_features, dataset=rel_path, skip_save_processed_input=image_params.skip_save_processed_input, ) # Delete the temporary data created shutil.rmtree(image_dest_folder)
def test_config_bad_preprocessing_param(): config = { 'input_features': [ sequence_feature(reduce_output='sum', encoder='fake'), image_feature( '/tmp/destination_folder', preprocessing={ 'in_memory': True, 'height': 12, 'width': 12, 'num_channels': 3, 'tokenizer': 'space', }, ), ], 'output_features': [category_feature(vocab_size=2, reduce_input='sum')], 'combiner': { 'type': 'concat', 'fc_size': 14 }, } with pytest.raises(ValidationError, match=r"^'fake' is not one of .*"): validate_config(config)
def test_image_resizing_num_channel_handling(csv_filename): """ This test creates two image datasets with 3 channels and 1 channel. The combination of this data is used to train a model. This checks the cases where the user may or may not specify a number of channels in the config :param csv_filename: :return: """ # Image Inputs image_dest_folder = os.path.join(os.getcwd(), 'generated_images') # Resnet encoder input_features = [ image_feature(folder=image_dest_folder, encoder='resnet', preprocessing={ 'in_memory': True, 'height': 8, 'width': 8, 'num_channels': 3, 'num_processes': 5 }, fc_size=8, num_filters=8), text_feature(encoder='embed', min_len=1), numerical_feature(normalization='minmax') ] output_features = [binary_feature(), numerical_feature()] rel_path = generate_data(input_features, output_features, csv_filename, num_examples=50) df1 = read_csv(rel_path) input_features[0]['preprocessing']['num_channels'] = 1 rel_path = generate_data(input_features, output_features, csv_filename, num_examples=50) df2 = read_csv(rel_path) df = concatenate_df(df1, df2, None, LOCAL_BACKEND) df.to_csv(rel_path, index=False) # Here the user sepcifiies number of channels. Exception shouldn't be thrown run_experiment(input_features, output_features, dataset=rel_path) del input_features[0]['preprocessing']['num_channels'] # User now doesn't specify num channels. Should throw exception with pytest.raises(ValueError): run_experiment(input_features, output_features, dataset=rel_path) # Delete the temporary data created shutil.rmtree(image_dest_folder)
def test_image_resizing_num_channel_handling(csv_filename): """This test creates two image datasets with 3 channels and 1 channel. The combination of this data is used to train a model. This checks the cases where the user may or may not specify a number of channels in the config. :param csv_filename: :return: """ # Image Inputs image_dest_folder = os.path.join(os.getcwd(), "generated_images") # Resnet encoder input_features = [ image_feature( folder=image_dest_folder, encoder="resnet", preprocessing={ "in_memory": True, "height": 8, "width": 8, "num_channels": 3, "num_processes": 5 }, fc_size=8, num_filters=8, ), text_feature(encoder="embed", min_len=1), numerical_feature(normalization="minmax"), ] output_features = [binary_feature(), numerical_feature()] rel_path = generate_data(input_features, output_features, csv_filename, num_examples=50) df1 = read_csv(rel_path) input_features[0]["preprocessing"]["num_channels"] = 1 rel_path = generate_data(input_features, output_features, csv_filename, num_examples=50) df2 = read_csv(rel_path) df = concatenate_df(df1, df2, None, LOCAL_BACKEND) df.to_csv(rel_path, index=False) # Here the user specifies number of channels. Exception shouldn't be thrown run_experiment(input_features, output_features, dataset=rel_path) del input_features[0]["preprocessing"]["num_channels"] # User doesn't specify num channels, but num channels is inferred. Exception shouldn't be thrown run_experiment(input_features, output_features, dataset=rel_path) # Delete the temporary data created shutil.rmtree(image_dest_folder)
def test_config_encoders(): for encoder in ENCODERS: config = { 'input_features': [ sequence_feature(reduce_output='sum', encoder=encoder), image_feature('/tmp/destination_folder'), ], 'output_features': [category_feature(vocab_size=2, reduce_input='sum')], 'combiner': {'type': 'concat', 'fc_size': 14}, } validate_config(config)
def test_config_encoders(): for encoder in ENCODERS: config = { "input_features": [ sequence_feature(reduce_output="sum", encoder=encoder), image_feature("/tmp/destination_folder"), ], "output_features": [category_feature(vocab_size=2, reduce_input="sum")], "combiner": {"type": "concat", "fc_size": 14}, } validate_config(config)
def run(csv_filename): # Check that comet has been imported successfully as a contrib package contrib_instances = ludwig.contrib.contrib_registry["instances"] assert len(contrib_instances) == 1 comet_instance = contrib_instances[0] assert isinstance(comet_instance, Comet) # Image Inputs image_dest_folder = os.path.join(os.getcwd(), 'generated_images') # Inputs & Outputs input_features = [image_feature(folder=image_dest_folder)] output_features = [category_feature()] data_csv = generate_data(input_features, output_features, csv_filename) config = { 'input_features': input_features, 'output_features': output_features, 'combiner': { 'type': 'concat', 'fc_size': 14 }, 'training': { 'epochs': 2 } } model = LudwigModel(config) output_dir = None # Wrap these methods so we can check that they were called comet_instance.train_init = Mock(side_effect=comet_instance.train_init) comet_instance.train_model = Mock(side_effect=comet_instance.train_model) with patch('comet_ml.Experiment.log_asset_data') as mock_log_asset_data: try: # Training with csv _, _, output_dir = model.train(dataset=data_csv) model.predict(dataset=data_csv) finally: if output_dir: shutil.rmtree(output_dir, ignore_errors=True) # Verify that the experiment was created successfully assert comet_instance.cometml_experiment is not None # Check that these methods were called at least once comet_instance.train_init.assert_called() comet_instance.train_model.assert_called() # Check that we ran `train_model`, which calls into `log_assert_data`, successfully mock_log_asset_data.assert_called()
def test_config_features(): all_input_features = [ audio_feature("/tmp/destination_folder"), bag_feature(), binary_feature(), category_feature(), date_feature(), h3_feature(), image_feature("/tmp/destination_folder"), number_feature(), sequence_feature(), set_feature(), text_feature(), timeseries_feature(), vector_feature(), ] all_output_features = [ binary_feature(), category_feature(), number_feature(), sequence_feature(), set_feature(), text_feature(), vector_feature(), ] # validate config with all features config = { "input_features": all_input_features, "output_features": all_output_features, } validate_config(config) # make sure all defaults provided also registers as valid config = merge_with_defaults(config) validate_config(config) # test various invalid output features input_only_features = [ feature for feature in all_input_features if feature["type"] not in output_type_registry.keys() ] for input_feature in input_only_features: config = { "input_features": all_input_features, "output_features": all_output_features + [input_feature], } dtype = input_feature["type"] with pytest.raises(ValidationError, match=rf"^'{dtype}' is not one of .*"): validate_config(config)
def test_config_features(): all_input_features = [ audio_feature('/tmp/destination_folder'), bag_feature(), binary_feature(), category_feature(), date_feature(), h3_feature(), image_feature('/tmp/destination_folder'), numerical_feature(), sequence_feature(), set_feature(), text_feature(), timeseries_feature(), vector_feature(), ] all_output_features = [ binary_feature(), category_feature(), numerical_feature(), sequence_feature(), set_feature(), text_feature(), vector_feature(), ] # validate config with all features config = { 'input_features': all_input_features, 'output_features': all_output_features, } validate_config(config) # make sure all defaults provided also registers as valid config = merge_with_defaults(config) validate_config(config) # test various invalid output features input_only_features = [ feature for feature in all_input_features if feature['type'] not in OUTPUT_FEATURE_TYPES ] for input_feature in input_only_features: config = { 'input_features': all_input_features, 'output_features': all_output_features + [input_feature], } dtype = input_feature['type'] with pytest.raises(ValidationError, match=rf"^'{dtype}' is not one of .*"): validate_config(config)
def test_ray_lazy_load_image_error(tmpdir): image_dest_folder = os.path.join(tmpdir, "generated_images") input_features = [ image_feature( folder=image_dest_folder, encoder="resnet", preprocessing={"in_memory": False, "height": 12, "width": 12, "num_channels": 3, "num_processes": 5}, output_size=16, num_filters=8, ), ] output_features = [binary_feature()] run_test_with_features(input_features, output_features, expect_error=True)
def test_ray_image(tmpdir, dataset_type): image_dest_folder = os.path.join(tmpdir, "generated_images") input_features = [ image_feature( folder=image_dest_folder, encoder="resnet", preprocessing={"in_memory": True, "height": 12, "width": 12, "num_channels": 3, "num_processes": 5}, output_size=16, num_filters=8, ), ] output_features = [binary_feature()] run_test_with_features(input_features, output_features, dataset_type=dataset_type, nan_percent=0.1)
def test_basic_image_feature(num_channels, image_source, in_memory, skip_save_processed_input, csv_filename): # Image Inputs image_dest_folder = os.path.join(os.getcwd(), 'generated_images') input_features = [ image_feature( folder=image_dest_folder, encoder='stacked_cnn', preprocessing={ 'in_memory': in_memory, 'height': 12, 'width': 12, 'num_channels': num_channels, 'num_processes': 5 }, fc_size=16, num_filters=8 ) ] output_features = [ category_feature(vocab_size=2, reduce_input='sum') ] rel_path = generate_data(input_features, output_features, csv_filename) if image_source == 'file': # use images from file run_experiment( input_features, output_features, dataset=rel_path, skip_save_processed_input=skip_save_processed_input ) else: # import image from file and store in dataframe as ndarrays df = pd.read_csv(rel_path) image_feature_name = input_features[0]['name'] df[image_feature_name] = df[image_feature_name].apply( lambda x: imread(x)) run_experiment( input_features, output_features, dataset=df, skip_save_processed_input=skip_save_processed_input ) # Delete the temporary data created shutil.rmtree(image_dest_folder, ignore_errors=True)
def test_ray_image(): with tempfile.TemporaryDirectory() as tmpdir: image_dest_folder = os.path.join(tmpdir, "generated_images") input_features = [ image_feature( folder=image_dest_folder, encoder="resnet", preprocessing={"in_memory": True, "height": 12, "width": 12, "num_channels": 3, "num_processes": 5}, output_size=16, num_filters=8, ), ] output_features = [binary_feature()] run_test_parquet(input_features, output_features)
def test_server_integration(csv_filename): # Image Inputs image_dest_folder = os.path.join(os.getcwd(), 'generated_images') # Resnet encoder input_features = [ image_feature(folder=image_dest_folder, preprocessing={ 'in_memory': True, 'height': 8, 'width': 8, 'num_channels': 3 }, fc_size=16, num_filters=8), text_feature(encoder='embed', min_len=1), numerical_feature(normalization='zscore') ] output_features = [category_feature(vocab_size=2), numerical_feature()] rel_path = generate_data(input_features, output_features, csv_filename) model, output_dir = train_model(input_features, output_features, data_csv=rel_path) app = server(model) client = TestClient(app) response = client.get('/') assert response.status_code == 200 response = client.post('/predict') assert response.json() == ALL_FEATURES_PRESENT_ERROR data_df = read_csv(rel_path) first_entry = data_df.T.to_dict()[0] data, files = convert_to_form(first_entry) server_response = client.post('/predict', data=data, files=files) server_response = server_response.json() server_response_keys = sorted(list(server_response.keys())) assert server_response_keys == sorted(output_keys_for(output_features)) model_output, _ = model.predict(dataset=[first_entry], data_format=dict) model_output = model_output.to_dict('records')[0] assert model_output == server_response shutil.rmtree(output_dir, ignore_errors=True) shutil.rmtree(image_dest_folder)
def test_experiment_image_inputs(csv_filename): # Image Inputs image_dest_folder = os.path.join(os.getcwd(), 'generated_images') # Resnet encoder input_features = [ image_feature( folder=image_dest_folder, encoder='resnet', preprocessing={ 'in_memory': True, 'height': 12, 'width': 12, 'num_channels': 3, 'num_processes': 5 }, fc_size=16, num_filters=8 ), text_feature(encoder='embed', min_len=1), numerical_feature(normalization='zscore') ] output_features = [ category_feature(vocab_size=2, reduce_input='sum'), numerical_feature() ] rel_path = generate_data(input_features, output_features, csv_filename) run_experiment(input_features, output_features, data_csv=rel_path) # Stacked CNN encoder input_features[0]['encoder'] = 'stacked_cnn' rel_path = generate_data(input_features, output_features, csv_filename) run_experiment(input_features, output_features, data_csv=rel_path) # Stacked CNN encoder, in_memory = False input_features[0]['preprocessing']['in_memory'] = False rel_path = generate_data(input_features, output_features, csv_filename) run_experiment( input_features, output_features, data_csv=rel_path, skip_save_processed_input=False, ) # Delete the temporary data created shutil.rmtree(image_dest_folder)
def run(csv_filename): # Image Inputs image_dest_folder = os.path.join(os.getcwd(), "generated_images") # Inputs & Outputs input_features = [image_feature(folder=image_dest_folder)] output_features = [category_feature()] data_csv = generate_data(input_features, output_features, csv_filename) config = { "input_features": input_features, "output_features": output_features, "combiner": { "type": "concat", "fc_size": 14 }, "training": { "epochs": 2 }, } callback = CometCallback() model = LudwigModel(config, callbacks=[callback]) output_dir = None # Wrap these methods so we can check that they were called callback.on_train_init = Mock(side_effect=callback.on_train_init) callback.on_train_start = Mock(side_effect=callback.on_train_start) with patch("comet_ml.Experiment.log_asset_data") as mock_log_asset_data: try: # Training with csv _, _, output_dir = model.train(dataset=data_csv) model.predict(dataset=data_csv) finally: if output_dir: shutil.rmtree(output_dir, ignore_errors=True) # Verify that the experiment was created successfully assert callback.cometml_experiment is not None # Check that these methods were called at least once callback.on_train_init.assert_called() callback.on_train_start.assert_called() # Check that we ran `train_model`, which calls into `log_assert_data`, successfully mock_log_asset_data.assert_called()
def test_visual_question_answering(csv_filename): image_dest_folder = os.path.join(os.getcwd(), "generated_images") input_features = [ image_feature( folder=image_dest_folder, encoder="resnet", preprocessing={"in_memory": True, "height": 8, "width": 8, "num_channels": 3, "num_processes": 5}, fc_size=8, num_filters=8, ), text_feature(encoder="embed", min_len=1, level="word"), ] output_features = [sequence_feature(decoder="generator", cell_type="lstm")] rel_path = generate_data(input_features, output_features, csv_filename) run_experiment(input_features, output_features, dataset=rel_path) # Delete the temporary data created shutil.rmtree(image_dest_folder)
def test_dask_lazy_load_image_error(): with tempfile.TemporaryDirectory() as tmpdir: image_dest_folder = os.path.join(tmpdir, 'generated_images') input_features = [ image_feature(folder=image_dest_folder, encoder='resnet', preprocessing={ 'in_memory': False, 'height': 12, 'width': 12, 'num_channels': 3, 'num_processes': 5 }, fc_size=16, num_filters=8), ] output_features = [binary_feature()] run_test_parquet(input_features, output_features, expect_error=True)
def test_basic_image_feature(num_channels, image_source, in_memory, skip_save_processed_input, csv_filename): # Image Inputs image_dest_folder = os.path.join(os.getcwd(), "generated_images") input_features = [ image_feature( folder=image_dest_folder, encoder="stacked_cnn", preprocessing={ "in_memory": in_memory, "height": 12, "width": 12, "num_channels": num_channels, "num_processes": 5, }, fc_size=16, num_filters=8, ) ] output_features = [category_feature(vocab_size=2, reduce_input="sum")] rel_path = generate_data(input_features, output_features, csv_filename) if image_source == "file": # use images from file run_experiment(input_features, output_features, dataset=rel_path, skip_save_processed_input=skip_save_processed_input) else: # import image from file and store in dataframe as tensors. df = pd.read_csv(rel_path) image_feature_name = input_features[0]["name"] df[image_feature_name] = df[image_feature_name].apply( lambda x: torchvision.io.read_image(x)) run_experiment(input_features, output_features, dataset=df, skip_save_processed_input=skip_save_processed_input) # Delete the temporary data created shutil.rmtree(image_dest_folder, ignore_errors=True)
def test_server_integration(csv_filename): # Image Inputs image_dest_folder = os.path.join(os.getcwd(), 'generated_images') # Resnet encoder input_features = [ image_feature( folder=image_dest_folder, encoder='resnet', preprocessing={ 'in_memory': True, 'height': 8, 'width': 8, 'num_channels': 3 }, fc_size=16, num_filters=8 ), text_feature(encoder='embed', min_len=1), numerical_feature(normalization='zscore') ] output_features = [ category_feature(vocab_size=2, reduce_input='sum'), numerical_feature() ] rel_path = generate_data(input_features, output_features, csv_filename) model = train_model(input_features, output_features, data_csv=rel_path) app = server(model) client = TestClient(app) response = client.post('/predict') assert response.json() == ALL_FEATURES_PRESENT_ERROR data_df = read_csv(rel_path) data, files = convert_to_form(data_df.T.to_dict()[0]) response = client.post('/predict', data=data, files=files) response_keys = sorted(list(response.json().keys())) assert response_keys == sorted(output_keys_for(output_features)) shutil.rmtree(model.exp_dir_name, ignore_errors=True) shutil.rmtree(image_dest_folder)
def test_visual_question_answering(csv_filename): image_dest_folder = os.path.join(os.getcwd(), 'generated_images') input_features = [ image_feature(folder=image_dest_folder, encoder='resnet', preprocessing={ 'in_memory': True, 'height': 8, 'width': 8, 'num_channels': 3 }, fc_size=8, num_filters=8), text_feature(encoder='embed', min_len=1, level='word'), ] output_features = [sequence_feature(decoder='generator', cell_type='lstm')] rel_path = generate_data(input_features, output_features, csv_filename) run_experiment(input_features, output_features, data_csv=rel_path) # Delete the temporary data created shutil.rmtree(image_dest_folder)
def test_experiment_image_inputs(image_params: ImageParams, tmpdir): # Image Inputs image_dest_folder = os.path.join(tmpdir, "generated_images") # Resnet encoder input_features = [ image_feature( folder=image_dest_folder, encoder="resnet", preprocessing={ "in_memory": True, "height": 12, "width": 12, "num_channels": 3, "num_processes": 5 }, output_size=16, num_filters=8, ), text_feature(encoder="embed", min_len=1), number_feature(normalization="zscore"), ] output_features = [ category_feature(vocab_size=2, reduce_input="sum"), number_feature() ] input_features[0]["encoder"] = image_params.image_encoder input_features[0]["preprocessing"][ "in_memory"] = image_params.in_memory_flag rel_path = generate_data(input_features, output_features, os.path.join(tmpdir, "dataset.csv")) run_experiment( input_features, output_features, dataset=rel_path, skip_save_processed_input=image_params.skip_save_processed_input, )