def test_image_input_feature(image_config: Dict, encoder: str, height: int,
                             width: int, num_channels) -> None:
    # setup image input feature definition
    image_def = deepcopy(image_config)
    image_def["encoder"] = encoder
    image_def["height"] = height
    image_def["width"] = width
    image_def["num_channels"] = num_channels

    # pickup any other missing parameters
    ImageInputFeature.populate_defaults(image_def)

    # ensure no exceptions raised during build
    input_feature_obj = build_single_input(image_def, None)

    # check one forward pass through input feature
    input_tensor = torch.randint(0,
                                 256,
                                 size=(BATCH_SIZE, num_channels, height,
                                       width),
                                 dtype=torch.uint8)

    encoder_output = input_feature_obj(input_tensor)
    assert encoder_output["encoder_output"].shape == (
        BATCH_SIZE, *input_feature_obj.output_shape)
Esempio n. 2
0
def test_number_input_feature(number_config: Dict, ) -> None:
    # setup image input feature definition
    number_def = deepcopy(number_config)

    # pickup any other missing parameters
    NumberInputFeature.populate_defaults(number_def)

    # ensure no exceptions raised during build
    input_feature_obj = build_single_input(number_def, None).to(DEVICE)

    # check one forward pass through input feature
    input_tensor = torch.rand(2, dtype=torch.float32).to(DEVICE)

    encoder_output = input_feature_obj(input_tensor)
    assert encoder_output["encoder_output"].shape == (
        BATCH_SIZE, *input_feature_obj.output_shape)
Esempio n. 3
0
def test_set_input_feature(set_config: Dict, ) -> None:
    # setup image input feature definition
    set_def = deepcopy(set_config)

    # pickup any other missing parameters
    SetInputFeature.populate_defaults(set_def)

    # ensure no exceptions raised during build
    input_feature_obj = build_single_input(set_def, None).to(DEVICE)

    # check one forward pass through input feature
    input_tensor = torch.randint(0,
                                 2,
                                 size=(BATCH_SIZE, len(set_def["vocab"])),
                                 dtype=torch.int64).to(DEVICE)

    encoder_output = input_feature_obj(input_tensor)
    assert encoder_output["encoder_output"].shape == (
        BATCH_SIZE, *input_feature_obj.output_shape)
Esempio n. 4
0
def test_category_input_feature(
    category_config: Dict,
    encoder: str,
) -> None:
    # setup image input feature definition
    category_def = deepcopy(category_config)
    category_def["encoder"] = encoder

    # pickup any other missing parameters
    CategoryInputFeature.populate_defaults(category_def)

    # ensure no exceptions raised during build
    input_feature_obj = build_single_input(category_def, None)

    # check one forward pass through input feature
    input_tensor = torch.randint(0, 3, size=(BATCH_SIZE, ),
                                 dtype=torch.int32).to(DEVICE)

    encoder_output = input_feature_obj(input_tensor)
    assert encoder_output["encoder_output"].shape == (
        BATCH_SIZE, *input_feature_obj.output_shape)
Esempio n. 5
0
def test_encoder(test_case):
    # set up required directories for images if needed
    shutil.rmtree(IMAGE_DIR, ignore_errors=True)
    os.mkdir(IMAGE_DIR)

    # reproducible synthetic data set
    np.random.seed(RANDOM_SEED)
    tf.random.set_seed(RANDOM_SEED)

    # create synthetic data for the test
    features = [
        test_case.syn_data.feature_generator(
            *test_case.syn_data.feature_generator_args,
            **test_case.syn_data.feature_generator_kwargs
        )
    ]
    feature_name = features[0]['name']
    data_generator = build_synthetic_dataset(BATCH_SIZE, features)
    data_list = list(data_generator)
    raw_data = [x[0] for x in data_list[1:]]
    df = pd.DataFrame({data_list[0][0]: raw_data})

    # minimal config sufficient to create the input feature
    config = {'input_features': features, 'output_features': []}
    training_set, _, _, training_set_metadata = preprocess_for_training(
        config,
        training_set=df,
        skip_save_processed_input=True,
        random_seed=RANDOM_SEED
    )

    # run through each type of regularizer for the encoder
    regularizer_losses = []
    for regularizer in [None, 'l1', 'l2', 'l1_l2']:
        # start with clean slate and make reproducible
        tf.keras.backend.clear_session()
        np.random.seed(RANDOM_SEED)
        tf.random.set_seed(RANDOM_SEED)

        # setup kwarg for regularizer parms
        x_coder_kwargs = dict(
            zip(test_case.regularizer_parm_names,
                len(test_case.regularizer_parm_names) * [regularizer])
        )

        # combine other other keyword parameters
        x_coder_kwargs.update(test_case.XCoder_other_parms)
        features[0].update(x_coder_kwargs)

        # shim code to support sequence/sequence like features
        if features[0]['type'] in SEQUENCE_TYPES.union({'category', 'set'}):
            features[0]['vocab'] = training_set_metadata[feature_name][
                'idx2str']
            training_set.dataset[feature_name] = \
                training_set.dataset[feature_name].astype(np.int32)

        input_def_obj = build_single_input(features[0], None)

        inputs = training_set.dataset[feature_name]
        # make sure we are at least rank 2 tensor
        if len(inputs.shape) == 1:
            inputs = inputs.reshape(-1, 1)

        # special handling for image feature
        if features[0]['type'] == 'image':
            inputs = tf.cast(inputs, tf.float32) / 255

        input_def_obj.encoder_obj(inputs)
        regularizer_loss = tf.reduce_sum(input_def_obj.encoder_obj.losses)
        regularizer_losses.append(regularizer_loss)

    # check loss regularization loss values
    # None should be zero
    assert regularizer_losses[0] == 0

    # l1, l2 and l1_l2 should be greater than zero
    assert np.all([t > 0.0 for t in regularizer_losses[1:]])

    # # using default setting l1 + l2 == l1_l2 losses
    assert np.isclose(
        regularizer_losses[1].numpy() + regularizer_losses[2].numpy(),
        regularizer_losses[3].numpy())

    # cleanup
    shutil.rmtree(IMAGE_DIR, ignore_errors=True)