def test_early_stop(): wide = Wide(100, 1) deepdense = DeepDense( hidden_layers=[32, 16], dropout=[0.5, 0.5], deep_column_idx=deep_column_idx, embed_input=embed_input, continuous_cols=colnames[-5:], ) model = WideDeep(wide=wide, deepdense=deepdense) model.compile( method="binary", callbacks=[ EarlyStopping(min_delta=0.1, patience=3, restore_best_weights=True, verbose=1) ], verbose=1, ) model.fit(X_wide=X_wide, X_deep=X_deep, target=target, val_split=0.2, n_epochs=5) # length of history = patience+1 assert len(model.history._history["train_loss"]) == 3 + 1
def test_initializers_with_pattern(): wide = Wide(100, 1) deepdense = DeepDense( hidden_layers=[32, 16], dropout=[0.5, 0.5], deep_column_idx=deep_column_idx, embed_input=embed_input, continuous_cols=colnames[-5:], ) deeptext = DeepText(vocab_size=vocab_size, embed_dim=32, padding_idx=0) model = WideDeep(wide=wide, deepdense=deepdense, deeptext=deeptext, pred_dim=1) cmodel = c(model) org_word_embed = [] for n, p in cmodel.named_parameters(): if "word_embed" in n: org_word_embed.append(p) model.compile(method="binary", verbose=0, initializers=initializers_2) init_word_embed = [] for n, p in model.named_parameters(): if "word_embed" in n: init_word_embed.append(p) assert torch.all(org_word_embed[0] == init_word_embed[0].cpu())
def test_widedeep_inputs( X_wide, X_deep, X_text, X_img, X_train, X_val, target, val_split, transforms, nepoch, null, ): model = WideDeep(wide=wide, deepdense=deepdense, deeptext=deeptext, deepimage=deepimage) model.compile(method="binary", transforms=transforms, verbose=0) model.fit( X_wide=X_wide, X_deep=X_deep, X_text=X_text, X_img=X_img, X_train=X_train, X_val=X_val, target=target, val_split=val_split, batch_size=16, ) assert (model.history.epoch[0] == nepoch and model.history._history["train_loss"] is not null)
def test_model_checkpoint(save_best_only, max_save, n_files): wide = Wide(100, 1) deepdense = DeepDense( hidden_layers=[32, 16], dropout=[0.5, 0.5], deep_column_idx=deep_column_idx, embed_input=embed_input, continuous_cols=colnames[-5:], ) model = WideDeep(wide=wide, deepdense=deepdense) model.compile( method="binary", callbacks=[ ModelCheckpoint("weights/test_weights", save_best_only=save_best_only, max_save=max_save) ], verbose=0, ) model.fit(X_wide=X_wide, X_deep=X_deep, target=target, n_epochs=5, val_split=0.2) n_saved = len(os.listdir("weights/")) for f in os.listdir("weights/"): os.remove("weights/" + f) assert n_saved <= n_files
def test_xtrain_xval_assertion( X_wide, X_deep, X_text, X_img, X_train, X_val, target, ): model = WideDeep(wide=wide, deepdense=deepdense, deeptext=deeptext, deepimage=deepimage) model.compile(method="binary", verbose=0) with pytest.raises(AssertionError): model.fit( X_wide=X_wide, X_deep=X_deep, X_text=X_text, X_img=X_img, X_train=X_train, X_val=X_val, target=target, batch_size=16, )
def test_fit_methods( X_wide, X_deep, target, method, X_wide_test, X_deep_test, X_test, pred_dim, probs_dim, ): wide = Wide(np.unique(X_wide).shape[0], pred_dim) deepdense = DeepDense( hidden_layers=[32, 16], dropout=[0.5, 0.5], deep_column_idx=deep_column_idx, embed_input=embed_input, continuous_cols=colnames[-5:], ) model = WideDeep(wide=wide, deepdense=deepdense, pred_dim=pred_dim) model.compile(method=method, verbose=0) model.fit(X_wide=X_wide, X_deep=X_deep, target=target) preds = model.predict(X_wide=X_wide, X_deep=X_deep, X_test=X_test) if method == "binary": pass else: probs = model.predict_proba(X_wide=X_wide, X_deep=X_deep, X_test=X_test) assert preds.shape[0] == 100, probs.shape[1] == probs_dim
def test_focal_loss(X_wide, X_deep, target, method, output_dim, probs_dim): wide = Wide(100, output_dim) deepdense = model3 = DeepDense(hidden_layers=[32, 16], dropout=[0.5, 0.5], deep_column_idx=deep_column_idx, embed_input=embed_input, continuous_cols=colnames[-5:]) model = WideDeep(wide=wide, deepdense=deepdense, output_dim=output_dim) model.compile(method=method, verbose=0, with_focal_loss=True) model.fit(X_wide=X_wide, X_deep=X_deep, target=target) probs = model.predict_proba(X_wide=X_wide, X_deep=X_deep) assert probs.shape[1] == probs_dim
def test_basic_run_with_metrics_binary(): model = WideDeep(wide=wide, deepdense=deepdense) model.compile(method="binary", metrics=[Accuracy], verbose=False) model.fit( X_wide=X_wide, X_deep=X_deep, target=target, n_epochs=1, batch_size=16, val_split=0.2, ) assert ("train_loss" in model.history._history.keys() and "train_acc" in model.history._history.keys())
def test_optimizer_scheduler_format(): model = WideDeep(deepdense=deepdense) optimizers = { "deepdense": torch.optim.Adam(model.deepdense.parameters(), lr=0.01) } schedulers = torch.optim.lr_scheduler.StepLR(optimizers["deepdense"], step_size=3) with pytest.raises(ValueError): model.compile( method="binary", optimizers=optimizers, lr_schedulers=schedulers, )
def test_filepath_error(): wide = Wide(np.unique(X_wide).shape[0], 1) deepdense = DeepDense( hidden_layers=[16, 4], deep_column_idx=deep_column_idx, embed_input=embed_input, continuous_cols=colnames[-5:], ) model = WideDeep(wide=wide, deepdense=deepdense) with pytest.raises(ValueError): model.compile( method="binary", callbacks=[ModelCheckpoint(filepath="wrong_file_path")], verbose=0, )
def test_head_layers_individual_components(deepdense, deeptext, deepimage, X_deep, X_text, X_img, target): model = WideDeep(deepdense=deepdense, deeptext=deeptext, deepimage=deepimage, head_layers=[8, 4]) # noqa: F841 model.compile(method="binary", verbose=0) model.fit( X_wide=X_wide, X_deep=X_deep, X_text=X_text, X_img=X_img, target=target, batch_size=16, ) # check it has run succesfully assert len(model.history._history) == 1
def test_individual_inputs(wide, deepdense, deeptext, deepimage, X_wide, X_deep, X_text, X_img, target): model = WideDeep(wide=wide, deepdense=deepdense, deeptext=deeptext, deepimage=deepimage) model.compile(method="binary", verbose=0) model.fit( X_wide=X_wide, X_deep=X_deep, X_text=X_text, X_img=X_img, target=target, batch_size=16, ) # check it has run succesfully assert len(model.history._history) == 1
def test_initializers_1(): wide = Wide(100, 1) deepdense = DeepDense(hidden_layers=[32,16], dropout=[0.5, 0.5], deep_column_idx=deep_column_idx, embed_input=embed_input, continuous_cols=colnames[-5:]) deeptext = DeepText( vocab_size=vocab_size, embed_dim=32, padding_idx=0) deepimage=DeepImage(pretrained=True) model = WideDeep(wide=wide, deepdense=deepdense, deeptext=deeptext, deepimage=deepimage, output_dim=1) cmodel = c(model) org_weights = [] for n,p in cmodel.named_parameters(): if n in test_layers_1: org_weights.append(p) model.compile(method='binary', verbose=0, initializers=initializers_1) init_weights = [] for n,p in model.named_parameters(): if n in test_layers_1: init_weights.append(p) res = all([torch.all((1-(a==b).int()).bool()) for a,b in zip(org_weights, init_weights)]) assert res
def test_fit_with_deephead(): wide = Wide(np.unique(X_wide).shape[0], 1) deepdense = DeepDense( hidden_layers=[32, 16], deep_column_idx=deep_column_idx, embed_input=embed_input, continuous_cols=colnames[-5:], ) deephead = nn.Sequential(nn.Linear(16, 8), nn.Linear(8, 4)) model = WideDeep(wide=wide, deepdense=deepdense, pred_dim=1, deephead=deephead) model.compile(method="binary", verbose=0) model.fit(X_wide=X_wide, X_deep=X_deep, target=target_binary, batch_size=16) preds = model.predict(X_wide=X_wide, X_deep=X_deep, X_test=X_test) probs = model.predict_proba(X_wide=X_wide, X_deep=X_deep, X_test=X_test) assert preds.shape[0] == 32, probs.shape[1] == 2
def test_basic_run_with_metrics_multiclass(): wide = Wide(np.unique(X_wide).shape[0], 3) deepdense = DeepDense( hidden_layers=[32, 16], dropout=[0.5, 0.5], deep_column_idx={k: v for v, k in enumerate(colnames)}, embed_input=embed_input, continuous_cols=colnames[-5:], ) model = WideDeep(wide=wide, deepdense=deepdense, pred_dim=3) model.compile(method="multiclass", metrics=[Accuracy], verbose=False) model.fit( X_wide=X_wide, X_deep=X_deep, target=target_multi, n_epochs=1, batch_size=16, val_split=0.2, ) assert ("train_loss" in model.history._history.keys() and "train_acc" in model.history._history.keys())
def test_predict_with_individual_component(wide, deepdense, deeptext, deepimage, X_wide, X_deep, X_text, X_img, target): model = WideDeep(wide=wide, deepdense=deepdense, deeptext=deeptext, deepimage=deepimage) model.compile(method="binary", verbose=0) model.fit( X_wide=X_wide, X_deep=X_deep, X_text=X_text, X_img=X_img, target=target, batch_size=16, ) # simply checking that runs and produces outputs preds = model.predict(X_wide=X_wide, X_deep=X_deep, X_text=X_text, X_img=X_img) assert preds.shape[0] == 32 and "train_loss" in model.history._history
continuous_cols=continuous_cols) model = WideDeep(wide=wide, deepdense=deepdense) wide_opt = torch.optim.Adam(model.wide.parameters()) deep_opt = RAdam(model.deepdense.parameters()) wide_sch = torch.optim.lr_scheduler.StepLR(wide_opt, step_size=3) deep_sch = torch.optim.lr_scheduler.StepLR(deep_opt, step_size=5) optimizers = {'wide': wide_opt, 'deepdense': deep_opt} schedulers = {'wide': wide_sch, 'deepdense': deep_sch} initializers = {'wide': KaimingNormal, 'deepdense': XavierNormal} callbacks = [ LRHistory(n_epochs=10), EarlyStopping, ModelCheckpoint(filepath='model_weights/wd_out') ] metrics = [BinaryAccuracy] model.compile(method='binary', optimizers=optimizers, lr_schedulers=schedulers, initializers=initializers, callbacks=callbacks, metrics=metrics) model.fit(X_wide=X_wide, X_deep=X_deep, target=target, n_epochs=10, batch_size=256, val_split=0.2)
prepare_wide = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols) X_wide = prepare_wide.fit_transform(df) prepare_deep = DensePreprocessor(embed_cols=cat_embed_cols, continuous_cols=continuous_cols) X_deep = prepare_deep.fit_transform(df) wide = Wide(wide_dim=np.unique(X_wide).shape[0], pred_dim=3) deepdense = DeepDense( hidden_layers=[64, 32], dropout=[0.2, 0.2], deep_column_idx=prepare_deep.deep_column_idx, embed_input=prepare_deep.embeddings_input, continuous_cols=continuous_cols, ) model = WideDeep(wide=wide, deepdense=deepdense, pred_dim=3) optimizer = torch.optim.Adam(model.parameters(), lr=0.03) model.compile(method="multiclass", metrics=[Accuracy, F1Score], optimizers=optimizer) model.fit( X_wide=X_wide, X_deep=X_deep, target=target, n_epochs=1, batch_size=32, val_split=0.2, )
def test_non_instantiated_callbacks(): model = WideDeep(wide=wide, deepdense=deepdense) callbacks = [EarlyStopping] model.compile(method="binary", callbacks=callbacks) assert model.callbacks[1].__class__.__name__ == "EarlyStopping"
"deepdense": KaimingNormal, "deeptext": KaimingNormal, "deepimage": KaimingNormal, } mean = [0.406, 0.456, 0.485] # BGR std = [0.225, 0.224, 0.229] # BGR transforms = [ToTensor, Normalize(mean=mean, std=std)] callbacks = [ EarlyStopping, ModelCheckpoint(filepath="model_weights/wd_out.pt") ] model.compile( method="regression", initializers=initializers, optimizers=optimizers, lr_schedulers=schedulers, callbacks=callbacks, transforms=transforms, ) model.fit( X_wide=X_wide, X_deep=X_deep, X_text=X_text, X_img=X_images, target=target, n_epochs=1, batch_size=32, val_split=0.2, )
df.drop("yield", axis=1, inplace=True) target = "yield_cat" target = np.array(df[target].values) prepare_wide = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols) X_wide = prepare_wide.fit_transform(df) prepare_deep = DeepPreprocessor(embed_cols=cat_embed_cols, continuous_cols=continuous_cols) X_deep = prepare_deep.fit_transform(df) wide = Wide(wide_dim=X_wide.shape[1], output_dim=3) deepdense = DeepDense( hidden_layers=[64, 32], dropout=[0.2, 0.2], deep_column_idx=prepare_deep.deep_column_idx, embed_input=prepare_deep.embeddings_input, continuous_cols=continuous_cols, ) model = WideDeep(wide=wide, deepdense=deepdense, output_dim=3) model.compile(method="multiclass", metrics=[CategoricalAccuracy]) model.fit( X_wide=X_wide, X_deep=X_deep, target=target, n_epochs=1, batch_size=32, val_split=0.2, )
df.drop("yield", axis=1, inplace=True) target = "yield_cat" target = np.array(df[target].values) prepare_wide = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols) X_wide = prepare_wide.fit_transform(df) prepare_deep = DensePreprocessor(embed_cols=cat_embed_cols, continuous_cols=continuous_cols) X_deep = prepare_deep.fit_transform(df) wide = Wide(wide_dim=X_wide.shape[1], pred_dim=3) deepdense = DeepDense( hidden_layers=[64, 32], dropout=[0.2, 0.2], deep_column_idx=prepare_deep.deep_column_idx, embed_input=prepare_deep.embeddings_input, continuous_cols=continuous_cols, ) model = WideDeep(wide=wide, deepdense=deepdense, pred_dim=3) model.compile(method="multiclass", metrics=[Accuracy, F1Score]) model.fit( X_wide=X_wide, X_deep=X_deep, target=target, n_epochs=1, batch_size=32, val_split=0.2, )
def test_multiple_metrics(): model = WideDeep(wide=wide, deepdense=deepdense) metrics = [Accuracy, Precision] model.compile(method="binary", metrics=metrics) assert (model.metric._metrics[0].__class__.__name__ == "Accuracy" and model.metric._metrics[1].__class__.__name__ == "Precision")