Ejemplo n.º 1
0
def test_read_image_from_numpy_array(tmpdir, csv_filename):
    input_features = [image_feature(os.path.join(tmpdir, "generated_output"))]
    output_features = [category_feature(vocab_size=5, reduce_input="sum")]

    config = {
        "input_features": input_features,
        "output_features": output_features,
        TRAINER: {
            "epochs": 2
        },
    }

    data_csv = generate_data(input_features,
                             output_features,
                             os.path.join(tmpdir, csv_filename),
                             num_examples=NUM_EXAMPLES)

    df = pd.read_csv(data_csv)
    processed_df_rows = []

    for _, row in df.iterrows():
        processed_df_rows.append({
            input_features[0][NAME]:
            np.array(Image.open(row[input_features[0][NAME]])),
            output_features[0][NAME]:
            row[output_features[0][NAME]],
        })

    df_with_images_as_numpy_arrays = pd.DataFrame(processed_df_rows)

    model = LudwigModel(config)
    model.preprocess(
        df_with_images_as_numpy_arrays,
        skip_save_processed_input=False,
    )
Ejemplo n.º 2
0
def test_validate_with_preprocessing_defaults():
    config = {
        "input_features": [
            audio_feature("/tmp/destination_folder", preprocessing=AudioFeatureMixin.preprocessing_defaults),
            bag_feature(preprocessing=BagFeatureMixin.preprocessing_defaults),
            binary_feature(preprocessing=BinaryFeatureMixin.preprocessing_defaults),
            category_feature(preprocessing=CategoryFeatureMixin.preprocessing_defaults),
            date_feature(preprocessing=DateFeatureMixin.preprocessing_defaults),
            h3_feature(preprocessing=H3FeatureMixin.preprocessing_defaults),
            image_feature("/tmp/destination_folder", preprocessing=ImageFeatureMixin.preprocessing_defaults),
            numerical_feature(preprocessing=NumericalFeatureMixin.preprocessing_defaults),
            sequence_feature(preprocessing=SequenceFeatureMixin.preprocessing_defaults),
            set_feature(preprocessing=SetFeatureMixin.preprocessing_defaults),
            text_feature(preprocessing=TextFeatureMixin.preprocessing_defaults),
            timeseries_feature(preprocessing=TimeseriesFeatureMixin.preprocessing_defaults),
            vector_feature(preprocessing=VectorFeatureMixin.preprocessing_defaults),
        ],
        "output_features": [{"name": "target", "type": "category"}],
        "training": {
            "decay": True,
            "learning_rate": 0.001,
            "validation_field": "target",
            "validation_metric": "accuracy",
        },
    }

    validate_config(config)
    config = merge_with_defaults(config)
    validate_config(config)
Ejemplo n.º 3
0
def test_experiment_infer_image_metadata(csv_filename: str):
    # Image Inputs
    image_dest_folder = os.path.join(os.getcwd(), "generated_images")

    # Resnet encoder
    input_features = [
        image_feature(folder=image_dest_folder,
                      encoder="stacked_cnn",
                      fc_size=16,
                      num_filters=8),
        text_feature(encoder="embed", min_len=1),
        numerical_feature(normalization="zscore"),
    ]
    output_features = [
        category_feature(vocab_size=2, reduce_input="sum"),
        numerical_feature()
    ]

    rel_path = generate_data(input_features, output_features, csv_filename)

    # remove image preprocessing section to force inferring image meta data
    input_features[0].pop("preprocessing")

    run_experiment(input_features, output_features, dataset=rel_path)

    # Delete the temporary data created
    shutil.rmtree(image_dest_folder)
Ejemplo n.º 4
0
def test_wandb_experiment(csv_filename):
    # Test W&B integration

    # add wandb arg and detect flag
    sys.argv.append('--wandb')
    ludwig.contrib.contrib_import()

    # disable sync to cloud
    os.environ['WANDB_MODE'] = 'dryrun'

    # Image Inputs
    image_dest_folder = os.path.join(os.getcwd(), 'generated_images')

    # Inputs & Outputs
    input_features = [image_feature(folder=image_dest_folder)]
    output_features = [category_feature()]
    rel_path = generate_data(input_features, output_features, csv_filename)

    # Run experiment
    run_experiment(input_features, output_features, data_csv=rel_path)

    # Check a W&B run was created
    assert wandb.run is not None

    # End session
    wandb.join()

    # Remove instance from contrib_registry
    ludwig.contrib.contrib_registry['instances'].pop()

    # Delete the temporary data created
    shutil.rmtree(image_dest_folder)
Ejemplo n.º 5
0
def test_config_bad_preprocessing_param():
    config = {
        "input_features": [
            sequence_feature(reduce_output="sum", encoder="fake"),
            image_feature(
                "/tmp/destination_folder",
                preprocessing={
                    "in_memory": True,
                    "height": 12,
                    "width": 12,
                    "num_channels": 3,
                    "tokenizer": "space",
                },
            ),
        ],
        "output_features":
        [category_feature(vocab_size=2, reduce_input="sum")],
        "combiner": {
            "type": "concat",
            "output_size": 14
        },
    }

    with pytest.raises(ValidationError, match=r"^'fake' is not one of .*"):
        validate_config(config)
Ejemplo n.º 6
0
def test_saved_weights_in_checkpoint(tmpdir):
    image_dest_folder = os.path.join(tmpdir, "generated_images")
    input_features = [text_feature(), image_feature(image_dest_folder)]
    output_features = [category_feature(name="class")]

    data_csv = generate_data(input_features, output_features, os.path.join(tmpdir, "dataset.csv"))
    val_csv = shutil.copyfile(data_csv, os.path.join(tmpdir, "validation.csv"))
    test_csv = shutil.copyfile(data_csv, os.path.join(tmpdir, "test.csv"))

    config = {
        "input_features": input_features,
        "output_features": output_features,
    }
    model = LudwigModel(config)
    _, _, output_dir = model.train(
        training_set=data_csv, validation_set=val_csv, test_set=test_csv, output_directory=tmpdir
    )

    config_save_path = os.path.join(output_dir, "model", MODEL_HYPERPARAMETERS_FILE_NAME)
    with open(config_save_path) as f:
        saved_config = json.load(f)
    saved_input_features = saved_config["input_features"]
    for saved_input_feature in saved_input_features:
        assert "saved_weights_in_checkpoint" in saved_input_feature
        assert saved_input_feature["saved_weights_in_checkpoint"]
Ejemplo n.º 7
0
def run(csv_filename):
    callback = WandbCallback()

    # Wrap these methods so we can check that they were called
    callback.on_train_init = Mock(side_effect=callback.on_train_init)
    callback.on_train_start = Mock(side_effect=callback.on_train_start)

    # disable sync to cloud
    os.environ["WANDB_MODE"] = "dryrun"

    # Image Inputs
    image_dest_folder = os.path.join(os.getcwd(), "generated_images")

    try:
        # Inputs & Outputs
        input_features = [image_feature(folder=image_dest_folder)]
        output_features = [category_feature()]
        rel_path = generate_data(input_features, output_features, csv_filename)

        # Run experiment
        run_experiment(input_features, output_features, dataset=rel_path, callbacks=[callback])
    finally:
        # Delete the temporary data created
        shutil.rmtree(image_dest_folder, ignore_errors=True)

    # Check that these methods were called at least once
    callback.on_train_init.assert_called()
    callback.on_train_start.assert_called()
Ejemplo n.º 8
0
def test_experiment_infer_image_metadata(tmpdir):
    # Image Inputs
    image_dest_folder = os.path.join(tmpdir, "generated_images")

    # Resnet encoder
    input_features = [
        image_feature(folder=image_dest_folder,
                      encoder="stacked_cnn",
                      output_size=16,
                      num_filters=8),
        text_feature(encoder="embed", min_len=1),
        number_feature(normalization="zscore"),
    ]
    output_features = [
        category_feature(vocab_size=2, reduce_input="sum"),
        number_feature()
    ]

    rel_path = generate_data(input_features, output_features,
                             os.path.join(tmpdir, "dataset.csv"))

    # remove image preprocessing section to force inferring image meta data
    input_features[0].pop("preprocessing")

    run_experiment(input_features, output_features, dataset=rel_path)
Ejemplo n.º 9
0
def test_experiment_image_inputs(image_params: ImageParams, csv_filename: str):
    # Image Inputs
    image_dest_folder = os.path.join(os.getcwd(), "generated_images")

    # Resnet encoder
    input_features = [
        image_feature(
            folder=image_dest_folder,
            encoder="resnet",
            preprocessing={"in_memory": True, "height": 12, "width": 12, "num_channels": 3, "num_processes": 5},
            fc_size=16,
            num_filters=8,
        ),
        text_feature(encoder="embed", min_len=1),
        numerical_feature(normalization="zscore"),
    ]
    output_features = [category_feature(vocab_size=2, reduce_input="sum"), numerical_feature()]

    input_features[0]["encoder"] = image_params.image_encoder
    input_features[0]["preprocessing"]["in_memory"] = image_params.in_memory_flag
    rel_path = generate_data(input_features, output_features, csv_filename)
    run_experiment(
        input_features,
        output_features,
        dataset=rel_path,
        skip_save_processed_input=image_params.skip_save_processed_input,
    )

    # Delete the temporary data created
    shutil.rmtree(image_dest_folder)
Ejemplo n.º 10
0
def test_config_bad_preprocessing_param():
    config = {
        'input_features': [
            sequence_feature(reduce_output='sum', encoder='fake'),
            image_feature(
                '/tmp/destination_folder',
                preprocessing={
                    'in_memory': True,
                    'height': 12,
                    'width': 12,
                    'num_channels': 3,
                    'tokenizer': 'space',
                },
            ),
        ],
        'output_features':
        [category_feature(vocab_size=2, reduce_input='sum')],
        'combiner': {
            'type': 'concat',
            'fc_size': 14
        },
    }

    with pytest.raises(ValidationError, match=r"^'fake' is not one of .*"):
        validate_config(config)
Ejemplo n.º 11
0
def test_image_resizing_num_channel_handling(csv_filename):
    """
    This test creates two image datasets with 3 channels and 1 channel. The
    combination of this data is used to train a model. This checks the cases
    where the user may or may not specify a number of channels in the
    config
    :param csv_filename:
    :return:
    """
    # Image Inputs
    image_dest_folder = os.path.join(os.getcwd(), 'generated_images')

    # Resnet encoder
    input_features = [
        image_feature(folder=image_dest_folder,
                      encoder='resnet',
                      preprocessing={
                          'in_memory': True,
                          'height': 8,
                          'width': 8,
                          'num_channels': 3,
                          'num_processes': 5
                      },
                      fc_size=8,
                      num_filters=8),
        text_feature(encoder='embed', min_len=1),
        numerical_feature(normalization='minmax')
    ]
    output_features = [binary_feature(), numerical_feature()]
    rel_path = generate_data(input_features,
                             output_features,
                             csv_filename,
                             num_examples=50)

    df1 = read_csv(rel_path)

    input_features[0]['preprocessing']['num_channels'] = 1
    rel_path = generate_data(input_features,
                             output_features,
                             csv_filename,
                             num_examples=50)
    df2 = read_csv(rel_path)

    df = concatenate_df(df1, df2, None, LOCAL_BACKEND)
    df.to_csv(rel_path, index=False)

    # Here the user sepcifiies number of channels. Exception shouldn't be thrown
    run_experiment(input_features, output_features, dataset=rel_path)

    del input_features[0]['preprocessing']['num_channels']

    # User now doesn't specify num channels. Should throw exception
    with pytest.raises(ValueError):
        run_experiment(input_features, output_features, dataset=rel_path)

    # Delete the temporary data created
    shutil.rmtree(image_dest_folder)
Ejemplo n.º 12
0
def test_image_resizing_num_channel_handling(csv_filename):
    """This test creates two image datasets with 3 channels and 1 channel. The combination of this data is used to
    train a model. This checks the cases where the user may or may not specify a number of channels in the config.

    :param csv_filename:
    :return:
    """
    # Image Inputs
    image_dest_folder = os.path.join(os.getcwd(), "generated_images")

    # Resnet encoder
    input_features = [
        image_feature(
            folder=image_dest_folder,
            encoder="resnet",
            preprocessing={
                "in_memory": True,
                "height": 8,
                "width": 8,
                "num_channels": 3,
                "num_processes": 5
            },
            fc_size=8,
            num_filters=8,
        ),
        text_feature(encoder="embed", min_len=1),
        numerical_feature(normalization="minmax"),
    ]
    output_features = [binary_feature(), numerical_feature()]
    rel_path = generate_data(input_features,
                             output_features,
                             csv_filename,
                             num_examples=50)

    df1 = read_csv(rel_path)

    input_features[0]["preprocessing"]["num_channels"] = 1
    rel_path = generate_data(input_features,
                             output_features,
                             csv_filename,
                             num_examples=50)
    df2 = read_csv(rel_path)

    df = concatenate_df(df1, df2, None, LOCAL_BACKEND)
    df.to_csv(rel_path, index=False)

    # Here the user specifies number of channels. Exception shouldn't be thrown
    run_experiment(input_features, output_features, dataset=rel_path)

    del input_features[0]["preprocessing"]["num_channels"]

    # User doesn't specify num channels, but num channels is inferred. Exception shouldn't be thrown
    run_experiment(input_features, output_features, dataset=rel_path)

    # Delete the temporary data created
    shutil.rmtree(image_dest_folder)
Ejemplo n.º 13
0
def test_config_encoders():
    for encoder in ENCODERS:
        config = {
            'input_features': [
                sequence_feature(reduce_output='sum', encoder=encoder),
                image_feature('/tmp/destination_folder'),
            ],
            'output_features': [category_feature(vocab_size=2, reduce_input='sum')],
            'combiner': {'type': 'concat', 'fc_size': 14},
        }
        validate_config(config)
Ejemplo n.º 14
0
def test_config_encoders():
    for encoder in ENCODERS:
        config = {
            "input_features": [
                sequence_feature(reduce_output="sum", encoder=encoder),
                image_feature("/tmp/destination_folder"),
            ],
            "output_features": [category_feature(vocab_size=2, reduce_input="sum")],
            "combiner": {"type": "concat", "fc_size": 14},
        }
        validate_config(config)
Ejemplo n.º 15
0
def run(csv_filename):
    # Check that comet has been imported successfully as a contrib package
    contrib_instances = ludwig.contrib.contrib_registry["instances"]
    assert len(contrib_instances) == 1

    comet_instance = contrib_instances[0]
    assert isinstance(comet_instance, Comet)

    # Image Inputs
    image_dest_folder = os.path.join(os.getcwd(), 'generated_images')

    # Inputs & Outputs
    input_features = [image_feature(folder=image_dest_folder)]
    output_features = [category_feature()]
    data_csv = generate_data(input_features, output_features, csv_filename)

    config = {
        'input_features': input_features,
        'output_features': output_features,
        'combiner': {
            'type': 'concat',
            'fc_size': 14
        },
        'training': {
            'epochs': 2
        }
    }

    model = LudwigModel(config)
    output_dir = None

    # Wrap these methods so we can check that they were called
    comet_instance.train_init = Mock(side_effect=comet_instance.train_init)
    comet_instance.train_model = Mock(side_effect=comet_instance.train_model)

    with patch('comet_ml.Experiment.log_asset_data') as mock_log_asset_data:
        try:
            # Training with csv
            _, _, output_dir = model.train(dataset=data_csv)
            model.predict(dataset=data_csv)
        finally:
            if output_dir:
                shutil.rmtree(output_dir, ignore_errors=True)

    # Verify that the experiment was created successfully
    assert comet_instance.cometml_experiment is not None

    # Check that these methods were called at least once
    comet_instance.train_init.assert_called()
    comet_instance.train_model.assert_called()

    # Check that we ran `train_model`, which calls into `log_assert_data`, successfully
    mock_log_asset_data.assert_called()
Ejemplo n.º 16
0
def test_config_features():
    all_input_features = [
        audio_feature("/tmp/destination_folder"),
        bag_feature(),
        binary_feature(),
        category_feature(),
        date_feature(),
        h3_feature(),
        image_feature("/tmp/destination_folder"),
        number_feature(),
        sequence_feature(),
        set_feature(),
        text_feature(),
        timeseries_feature(),
        vector_feature(),
    ]
    all_output_features = [
        binary_feature(),
        category_feature(),
        number_feature(),
        sequence_feature(),
        set_feature(),
        text_feature(),
        vector_feature(),
    ]

    # validate config with all features
    config = {
        "input_features": all_input_features,
        "output_features": all_output_features,
    }
    validate_config(config)

    # make sure all defaults provided also registers as valid
    config = merge_with_defaults(config)
    validate_config(config)

    # test various invalid output features
    input_only_features = [
        feature for feature in all_input_features
        if feature["type"] not in output_type_registry.keys()
    ]
    for input_feature in input_only_features:
        config = {
            "input_features": all_input_features,
            "output_features": all_output_features + [input_feature],
        }

        dtype = input_feature["type"]
        with pytest.raises(ValidationError,
                           match=rf"^'{dtype}' is not one of .*"):
            validate_config(config)
Ejemplo n.º 17
0
def test_config_features():
    all_input_features = [
        audio_feature('/tmp/destination_folder'),
        bag_feature(),
        binary_feature(),
        category_feature(),
        date_feature(),
        h3_feature(),
        image_feature('/tmp/destination_folder'),
        numerical_feature(),
        sequence_feature(),
        set_feature(),
        text_feature(),
        timeseries_feature(),
        vector_feature(),
    ]
    all_output_features = [
        binary_feature(),
        category_feature(),
        numerical_feature(),
        sequence_feature(),
        set_feature(),
        text_feature(),
        vector_feature(),
    ]

    # validate config with all features
    config = {
        'input_features': all_input_features,
        'output_features': all_output_features,
    }
    validate_config(config)

    # make sure all defaults provided also registers as valid
    config = merge_with_defaults(config)
    validate_config(config)

    # test various invalid output features
    input_only_features = [
        feature for feature in all_input_features
        if feature['type'] not in OUTPUT_FEATURE_TYPES
    ]
    for input_feature in input_only_features:
        config = {
            'input_features': all_input_features,
            'output_features': all_output_features + [input_feature],
        }

        dtype = input_feature['type']
        with pytest.raises(ValidationError,
                           match=rf"^'{dtype}' is not one of .*"):
            validate_config(config)
Ejemplo n.º 18
0
def test_ray_lazy_load_image_error(tmpdir):
    image_dest_folder = os.path.join(tmpdir, "generated_images")
    input_features = [
        image_feature(
            folder=image_dest_folder,
            encoder="resnet",
            preprocessing={"in_memory": False, "height": 12, "width": 12, "num_channels": 3, "num_processes": 5},
            output_size=16,
            num_filters=8,
        ),
    ]
    output_features = [binary_feature()]
    run_test_with_features(input_features, output_features, expect_error=True)
Ejemplo n.º 19
0
def test_ray_image(tmpdir, dataset_type):
    image_dest_folder = os.path.join(tmpdir, "generated_images")
    input_features = [
        image_feature(
            folder=image_dest_folder,
            encoder="resnet",
            preprocessing={"in_memory": True, "height": 12, "width": 12, "num_channels": 3, "num_processes": 5},
            output_size=16,
            num_filters=8,
        ),
    ]
    output_features = [binary_feature()]
    run_test_with_features(input_features, output_features, dataset_type=dataset_type, nan_percent=0.1)
Ejemplo n.º 20
0
def test_basic_image_feature(num_channels, image_source, in_memory,
                             skip_save_processed_input, csv_filename):
    # Image Inputs
    image_dest_folder = os.path.join(os.getcwd(), 'generated_images')

    input_features = [
        image_feature(
            folder=image_dest_folder,
            encoder='stacked_cnn',
            preprocessing={
                'in_memory': in_memory,
                'height': 12,
                'width': 12,
                'num_channels': num_channels,
                'num_processes': 5
            },
            fc_size=16,
            num_filters=8
        )
    ]
    output_features = [
        category_feature(vocab_size=2, reduce_input='sum')
    ]

    rel_path = generate_data(input_features, output_features, csv_filename)

    if image_source == 'file':
        # use images from file
        run_experiment(
            input_features,
            output_features,
            dataset=rel_path,
            skip_save_processed_input=skip_save_processed_input
        )
    else:
        # import image from file and store in dataframe as ndarrays
        df = pd.read_csv(rel_path)
        image_feature_name = input_features[0]['name']
        df[image_feature_name] = df[image_feature_name].apply(
            lambda x: imread(x))

        run_experiment(
            input_features,
            output_features,
            dataset=df,
            skip_save_processed_input=skip_save_processed_input
        )

    # Delete the temporary data created
    shutil.rmtree(image_dest_folder, ignore_errors=True)
Ejemplo n.º 21
0
def test_ray_image():
    with tempfile.TemporaryDirectory() as tmpdir:
        image_dest_folder = os.path.join(tmpdir, "generated_images")
        input_features = [
            image_feature(
                folder=image_dest_folder,
                encoder="resnet",
                preprocessing={"in_memory": True, "height": 12, "width": 12, "num_channels": 3, "num_processes": 5},
                output_size=16,
                num_filters=8,
            ),
        ]
        output_features = [binary_feature()]
        run_test_parquet(input_features, output_features)
Ejemplo n.º 22
0
def test_server_integration(csv_filename):
    # Image Inputs
    image_dest_folder = os.path.join(os.getcwd(), 'generated_images')

    # Resnet encoder
    input_features = [
        image_feature(folder=image_dest_folder,
                      preprocessing={
                          'in_memory': True,
                          'height': 8,
                          'width': 8,
                          'num_channels': 3
                      },
                      fc_size=16,
                      num_filters=8),
        text_feature(encoder='embed', min_len=1),
        numerical_feature(normalization='zscore')
    ]
    output_features = [category_feature(vocab_size=2), numerical_feature()]

    rel_path = generate_data(input_features, output_features, csv_filename)
    model, output_dir = train_model(input_features,
                                    output_features,
                                    data_csv=rel_path)

    app = server(model)
    client = TestClient(app)
    response = client.get('/')
    assert response.status_code == 200

    response = client.post('/predict')
    assert response.json() == ALL_FEATURES_PRESENT_ERROR

    data_df = read_csv(rel_path)
    first_entry = data_df.T.to_dict()[0]
    data, files = convert_to_form(first_entry)
    server_response = client.post('/predict', data=data, files=files)
    server_response = server_response.json()

    server_response_keys = sorted(list(server_response.keys()))
    assert server_response_keys == sorted(output_keys_for(output_features))

    model_output, _ = model.predict(dataset=[first_entry], data_format=dict)
    model_output = model_output.to_dict('records')[0]
    assert model_output == server_response

    shutil.rmtree(output_dir, ignore_errors=True)
    shutil.rmtree(image_dest_folder)
Ejemplo n.º 23
0
def test_experiment_image_inputs(csv_filename):
    # Image Inputs
    image_dest_folder = os.path.join(os.getcwd(), 'generated_images')

    # Resnet encoder
    input_features = [
        image_feature(
            folder=image_dest_folder,
            encoder='resnet',
            preprocessing={
                'in_memory': True,
                'height': 12,
                'width': 12,
                'num_channels': 3,
                'num_processes': 5
            },
            fc_size=16,
            num_filters=8
        ),
        text_feature(encoder='embed', min_len=1),
        numerical_feature(normalization='zscore')
    ]
    output_features = [
        category_feature(vocab_size=2, reduce_input='sum'),
        numerical_feature()
    ]

    rel_path = generate_data(input_features, output_features, csv_filename)
    run_experiment(input_features, output_features, data_csv=rel_path)

    # Stacked CNN encoder
    input_features[0]['encoder'] = 'stacked_cnn'
    rel_path = generate_data(input_features, output_features, csv_filename)
    run_experiment(input_features, output_features, data_csv=rel_path)

    # Stacked CNN encoder, in_memory = False
    input_features[0]['preprocessing']['in_memory'] = False
    rel_path = generate_data(input_features, output_features, csv_filename)
    run_experiment(
        input_features,
        output_features,
        data_csv=rel_path,
        skip_save_processed_input=False,
    )

    # Delete the temporary data created
    shutil.rmtree(image_dest_folder)
Ejemplo n.º 24
0
def run(csv_filename):
    # Image Inputs
    image_dest_folder = os.path.join(os.getcwd(), "generated_images")

    # Inputs & Outputs
    input_features = [image_feature(folder=image_dest_folder)]
    output_features = [category_feature()]
    data_csv = generate_data(input_features, output_features, csv_filename)

    config = {
        "input_features": input_features,
        "output_features": output_features,
        "combiner": {
            "type": "concat",
            "fc_size": 14
        },
        "training": {
            "epochs": 2
        },
    }

    callback = CometCallback()
    model = LudwigModel(config, callbacks=[callback])
    output_dir = None

    # Wrap these methods so we can check that they were called
    callback.on_train_init = Mock(side_effect=callback.on_train_init)
    callback.on_train_start = Mock(side_effect=callback.on_train_start)

    with patch("comet_ml.Experiment.log_asset_data") as mock_log_asset_data:
        try:
            # Training with csv
            _, _, output_dir = model.train(dataset=data_csv)
            model.predict(dataset=data_csv)
        finally:
            if output_dir:
                shutil.rmtree(output_dir, ignore_errors=True)

    # Verify that the experiment was created successfully
    assert callback.cometml_experiment is not None

    # Check that these methods were called at least once
    callback.on_train_init.assert_called()
    callback.on_train_start.assert_called()

    # Check that we ran `train_model`, which calls into `log_assert_data`, successfully
    mock_log_asset_data.assert_called()
Ejemplo n.º 25
0
def test_visual_question_answering(csv_filename):
    image_dest_folder = os.path.join(os.getcwd(), "generated_images")
    input_features = [
        image_feature(
            folder=image_dest_folder,
            encoder="resnet",
            preprocessing={"in_memory": True, "height": 8, "width": 8, "num_channels": 3, "num_processes": 5},
            fc_size=8,
            num_filters=8,
        ),
        text_feature(encoder="embed", min_len=1, level="word"),
    ]
    output_features = [sequence_feature(decoder="generator", cell_type="lstm")]
    rel_path = generate_data(input_features, output_features, csv_filename)
    run_experiment(input_features, output_features, dataset=rel_path)

    # Delete the temporary data created
    shutil.rmtree(image_dest_folder)
Ejemplo n.º 26
0
def test_dask_lazy_load_image_error():
    with tempfile.TemporaryDirectory() as tmpdir:
        image_dest_folder = os.path.join(tmpdir, 'generated_images')
        input_features = [
            image_feature(folder=image_dest_folder,
                          encoder='resnet',
                          preprocessing={
                              'in_memory': False,
                              'height': 12,
                              'width': 12,
                              'num_channels': 3,
                              'num_processes': 5
                          },
                          fc_size=16,
                          num_filters=8),
        ]
        output_features = [binary_feature()]
        run_test_parquet(input_features, output_features, expect_error=True)
Ejemplo n.º 27
0
def test_basic_image_feature(num_channels, image_source, in_memory,
                             skip_save_processed_input, csv_filename):
    # Image Inputs
    image_dest_folder = os.path.join(os.getcwd(), "generated_images")

    input_features = [
        image_feature(
            folder=image_dest_folder,
            encoder="stacked_cnn",
            preprocessing={
                "in_memory": in_memory,
                "height": 12,
                "width": 12,
                "num_channels": num_channels,
                "num_processes": 5,
            },
            fc_size=16,
            num_filters=8,
        )
    ]
    output_features = [category_feature(vocab_size=2, reduce_input="sum")]

    rel_path = generate_data(input_features, output_features, csv_filename)

    if image_source == "file":
        # use images from file
        run_experiment(input_features,
                       output_features,
                       dataset=rel_path,
                       skip_save_processed_input=skip_save_processed_input)
    else:
        # import image from file and store in dataframe as tensors.
        df = pd.read_csv(rel_path)
        image_feature_name = input_features[0]["name"]
        df[image_feature_name] = df[image_feature_name].apply(
            lambda x: torchvision.io.read_image(x))

        run_experiment(input_features,
                       output_features,
                       dataset=df,
                       skip_save_processed_input=skip_save_processed_input)

    # Delete the temporary data created
    shutil.rmtree(image_dest_folder, ignore_errors=True)
Ejemplo n.º 28
0
def test_server_integration(csv_filename):
     # Image Inputs
    image_dest_folder = os.path.join(os.getcwd(), 'generated_images')

    # Resnet encoder
    input_features = [
        image_feature(
            folder=image_dest_folder,
            encoder='resnet',
            preprocessing={
                'in_memory': True,
                'height': 8,
                'width': 8,
                'num_channels': 3
            },
            fc_size=16,
            num_filters=8
        ),
        text_feature(encoder='embed', min_len=1),
        numerical_feature(normalization='zscore')
    ]
    output_features = [
        category_feature(vocab_size=2, reduce_input='sum'),
        numerical_feature()
    ]

    rel_path = generate_data(input_features, output_features, csv_filename)
    model = train_model(input_features, output_features, data_csv=rel_path)

    app = server(model)
    client = TestClient(app)
    response = client.post('/predict')
    assert response.json() == ALL_FEATURES_PRESENT_ERROR

    data_df = read_csv(rel_path)
    data, files = convert_to_form(data_df.T.to_dict()[0])
    response = client.post('/predict', data=data, files=files)

    response_keys = sorted(list(response.json().keys()))
    assert response_keys == sorted(output_keys_for(output_features))

    shutil.rmtree(model.exp_dir_name, ignore_errors=True)
    shutil.rmtree(image_dest_folder)
Ejemplo n.º 29
0
def test_visual_question_answering(csv_filename):
    image_dest_folder = os.path.join(os.getcwd(), 'generated_images')
    input_features = [
        image_feature(folder=image_dest_folder,
                      encoder='resnet',
                      preprocessing={
                          'in_memory': True,
                          'height': 8,
                          'width': 8,
                          'num_channels': 3
                      },
                      fc_size=8,
                      num_filters=8),
        text_feature(encoder='embed', min_len=1, level='word'),
    ]
    output_features = [sequence_feature(decoder='generator', cell_type='lstm')]
    rel_path = generate_data(input_features, output_features, csv_filename)
    run_experiment(input_features, output_features, data_csv=rel_path)

    # Delete the temporary data created
    shutil.rmtree(image_dest_folder)
Ejemplo n.º 30
0
def test_experiment_image_inputs(image_params: ImageParams, tmpdir):
    # Image Inputs
    image_dest_folder = os.path.join(tmpdir, "generated_images")

    # Resnet encoder
    input_features = [
        image_feature(
            folder=image_dest_folder,
            encoder="resnet",
            preprocessing={
                "in_memory": True,
                "height": 12,
                "width": 12,
                "num_channels": 3,
                "num_processes": 5
            },
            output_size=16,
            num_filters=8,
        ),
        text_feature(encoder="embed", min_len=1),
        number_feature(normalization="zscore"),
    ]
    output_features = [
        category_feature(vocab_size=2, reduce_input="sum"),
        number_feature()
    ]

    input_features[0]["encoder"] = image_params.image_encoder
    input_features[0]["preprocessing"][
        "in_memory"] = image_params.in_memory_flag
    rel_path = generate_data(input_features, output_features,
                             os.path.join(tmpdir, "dataset.csv"))

    run_experiment(
        input_features,
        output_features,
        dataset=rel_path,
        skip_save_processed_input=image_params.skip_save_processed_input,
    )