Esempio n. 1
0
def setup_model_scaffolding(
        raw_df,
        input_features,
        output_features
):
    # setup input feature for testing
    config = {'input_features': input_features,
              'output_features': output_features}

    # setup model scaffolding to for testing
    model = LudwigModel(config)
    training_set, _, _, training_set_metadata = preprocess_for_training(
        config,
        training_set=raw_df,
        skip_save_processed_input=True
    )
    model.training_set_metadata = training_set_metadata
    update_config_with_metadata(
        model.config,
        training_set_metadata
    )
    model.model = model.create_model(model.config)

    # setup batcher to go through synthetic data
    with training_set.initialize_batcher() as batcher:
        yield model, batcher
Esempio n. 2
0
def test_regularizers(
    input_features,
    output_features,
):

    np.random.seed(RANDOM_SEED)
    torch.manual_seed(RANDOM_SEED)
    random.seed(0)

    data_file = generate_data(input_features, output_features, num_examples=BATCH_SIZE)
    data_df = read_csv(data_file)

    regularizer_losses = []
    for regularization_type in [None, "l1", "l2", "l1_l2"]:

        config = {
            "input_features": input_features,
            "output_features": output_features,
            "combiner": {"type": "concat", "output_size": 14},
            TRAINER: {"epochs": 2, "regularization_type": regularization_type, "regularization_lambda": 0.1},
        }

        backend = LocalTestBackend()
        model = LudwigModel(config, backend=backend)
        processed_data_df, _, _, _ = preprocess_for_training(config, data_df, backend=backend)
        with processed_data_df.initialize_batcher(batch_size=BATCH_SIZE) as batcher:
            batch = batcher.next_batch()

        _, _, _ = model.train(
            training_set=data_df,
            skip_save_processed_input=True,
            skip_save_progress=True,
            skip_save_unprocessed_output=True,
        )

        inputs = {
            i_feat.feature_name: torch.from_numpy(batch[i_feat.proc_column]).to(DEVICE)
            for i_feat in model.model.input_features.values()
        }
        targets = {
            o_feat.feature_name: torch.from_numpy(batch[o_feat.proc_column]).to(DEVICE)
            for o_feat in model.model.output_features.values()
        }
        predictions = model.model((inputs, targets))

        loss, _ = model.model.train_loss(targets, predictions, regularization_type, 0.1)
        regularizer_losses.append(loss)

    # Regularizer_type=None has lowest regularizer loss
    assert min(regularizer_losses) == regularizer_losses[0]

    # l1, l2 and l1_l2 should be greater than zero
    assert torch.all(torch.tensor([t - regularizer_losses[0] > 0.0 for t in regularizer_losses[1:]]))

    # using default setting l1 + l2 == l1_l2 losses
    assert torch.isclose(
        regularizer_losses[1] + regularizer_losses[2] - regularizer_losses[0], regularizer_losses[3], rtol=0.1
    )