def setup_model_scaffolding( raw_df, input_features, output_features ): # setup input feature for testing config = {'input_features': input_features, 'output_features': output_features} # setup model scaffolding to for testing model = LudwigModel(config) training_set, _, _, training_set_metadata = preprocess_for_training( config, training_set=raw_df, skip_save_processed_input=True ) model.training_set_metadata = training_set_metadata update_config_with_metadata( model.config, training_set_metadata ) model.model = model.create_model(model.config) # setup batcher to go through synthetic data with training_set.initialize_batcher() as batcher: yield model, batcher
def test_regularizers( input_features, output_features, ): np.random.seed(RANDOM_SEED) torch.manual_seed(RANDOM_SEED) random.seed(0) data_file = generate_data(input_features, output_features, num_examples=BATCH_SIZE) data_df = read_csv(data_file) regularizer_losses = [] for regularization_type in [None, "l1", "l2", "l1_l2"]: config = { "input_features": input_features, "output_features": output_features, "combiner": {"type": "concat", "output_size": 14}, TRAINER: {"epochs": 2, "regularization_type": regularization_type, "regularization_lambda": 0.1}, } backend = LocalTestBackend() model = LudwigModel(config, backend=backend) processed_data_df, _, _, _ = preprocess_for_training(config, data_df, backend=backend) with processed_data_df.initialize_batcher(batch_size=BATCH_SIZE) as batcher: batch = batcher.next_batch() _, _, _ = model.train( training_set=data_df, skip_save_processed_input=True, skip_save_progress=True, skip_save_unprocessed_output=True, ) inputs = { i_feat.feature_name: torch.from_numpy(batch[i_feat.proc_column]).to(DEVICE) for i_feat in model.model.input_features.values() } targets = { o_feat.feature_name: torch.from_numpy(batch[o_feat.proc_column]).to(DEVICE) for o_feat in model.model.output_features.values() } predictions = model.model((inputs, targets)) loss, _ = model.model.train_loss(targets, predictions, regularization_type, 0.1) regularizer_losses.append(loss) # Regularizer_type=None has lowest regularizer loss assert min(regularizer_losses) == regularizer_losses[0] # l1, l2 and l1_l2 should be greater than zero assert torch.all(torch.tensor([t - regularizer_losses[0] > 0.0 for t in regularizer_losses[1:]])) # using default setting l1 + l2 == l1_l2 losses assert torch.isclose( regularizer_losses[1] + regularizer_losses[2] - regularizer_losses[0], regularizer_losses[3], rtol=0.1 )