def test_initializers_with_pattern(): wide = Wide(100, 1) deeptabular = TabMlp( mlp_hidden_dims=[32, 16], mlp_dropout=[0.5, 0.5], column_idx=column_idx, embed_input=embed_input, continuous_cols=colnames[-5:], ) deeptext = DeepText(vocab_size=vocab_size, embed_dim=32, padding_idx=0) model = WideDeep(wide=wide, deeptabular=deeptabular, deeptext=deeptext, pred_dim=1) cmodel = c(model) org_word_embed = [] for n, p in cmodel.named_parameters(): if "word_embed" in n: org_word_embed.append(p) trainer = Trainer(model, objective="binary", verbose=0, initializers=initializers_2) init_word_embed = [] for n, p in trainer.model.named_parameters(): if "word_embed" in n: init_word_embed.append(p) assert torch.all(org_word_embed[0] == init_word_embed[0].cpu())
def test_initializers_with_pattern(): wide = Wide(100, 1) deepdense = DeepDense( hidden_layers=[32, 16], dropout=[0.5, 0.5], deep_column_idx=deep_column_idx, embed_input=embed_input, continuous_cols=colnames[-5:], ) deeptext = DeepText(vocab_size=vocab_size, embed_dim=32, padding_idx=0) model = WideDeep(wide=wide, deepdense=deepdense, deeptext=deeptext, pred_dim=1) cmodel = c(model) org_word_embed = [] for n, p in cmodel.named_parameters(): if "word_embed" in n: org_word_embed.append(p) model.compile(method="binary", verbose=0, initializers=initializers_2) init_word_embed = [] for n, p in model.named_parameters(): if "word_embed" in n: init_word_embed.append(p) assert torch.all(org_word_embed[0] == init_word_embed[0].cpu())
def test_catch_warning(): with pytest.warns(UserWarning): model3 = DeepText( vocab_size=vocab_size, embed_dim=32, embedding_matrix=pretrained_embeddings, padding_idx=0 )
def test_catch_warning(): with pytest.warns(UserWarning): model3 = DeepText( vocab_size=vocab_size, embed_dim=32, embed_matrix=pretrained_embeddings, padding_idx=0, ) out = model3(torch.from_numpy(padded_sequences)) assert out.size(0) == 100 and out.size(1) == 64
def test_initializers_1(): wide = Wide(100, 1) deepdense = DeepDense(hidden_layers=[32,16], dropout=[0.5, 0.5], deep_column_idx=deep_column_idx, embed_input=embed_input, continuous_cols=colnames[-5:]) deeptext = DeepText( vocab_size=vocab_size, embed_dim=32, padding_idx=0) deepimage=DeepImage(pretrained=True) model = WideDeep(wide=wide, deepdense=deepdense, deeptext=deeptext, deepimage=deepimage, output_dim=1) cmodel = c(model) org_weights = [] for n,p in cmodel.named_parameters(): if n in test_layers_1: org_weights.append(p) model.compile(method='binary', verbose=0, initializers=initializers_1) init_weights = [] for n,p in model.named_parameters(): if n in test_layers_1: init_weights.append(p) res = all([torch.all((1-(a==b).int()).bool()) for a,b in zip(org_weights, init_weights)]) assert res
def test_warning_when_missing_initializer(): wide = Wide(100, 1) deeptabular = TabMlp( mlp_hidden_dims=[32, 16], mlp_dropout=[0.5, 0.5], column_idx=column_idx, embed_input=embed_input, continuous_cols=colnames[-5:], ) deeptext = DeepText(vocab_size=vocab_size, embed_dim=32, padding_idx=0) model = WideDeep(wide=wide, deeptabular=deeptabular, deeptext=deeptext, pred_dim=1) with pytest.warns(UserWarning): trainer = Trainer( # noqa: F841 model, objective="binary", verbose=True, initializers=initializers_3)
def test_initializers_1(initializers, test_layers): wide = Wide(np.unique(X_wide).shape[0], 1) deeptabular = TabMlp( mlp_hidden_dims=[32, 16], mlp_dropout=[0.5, 0.5], column_idx=column_idx, embed_input=embed_input, continuous_cols=colnames[-5:], ) deeptext = DeepText(vocab_size=vocab_size, embed_dim=32, padding_idx=0) deepimage = DeepImage(pretrained=True) model = WideDeep( wide=wide, deeptabular=deeptabular, deeptext=deeptext, deepimage=deepimage, pred_dim=1, ) cmodel = c(model) org_weights = [] for n, p in cmodel.named_parameters(): if n in test_layers: org_weights.append(p) trainer = Trainer(model, objective="binary", verbose=0, initializers=initializers) init_weights = [] for n, p in trainer.model.named_parameters(): if n in test_layers: init_weights.append(p) res = all([ torch.all((1 - (a == b).int()).bool()) for a, b in zip(org_weights, init_weights) ]) assert res
import pytest from pytorch_widedeep.models import DeepText padded_sequences = np.random.choice(np.arange(1, 100), (100, 48)) padded_sequences = np.hstack( (np.repeat(np.array([[0, 0]]), 100, axis=0), padded_sequences) ) pretrained_embeddings = np.random.rand(1000, 64).astype("float32") vocab_size = 1000 ############################################################################### # Without Pretrained Embeddings ############################################################################### model1 = DeepText(vocab_size=vocab_size, embed_dim=32, padding_idx=0) def test_deep_text(): out = model1(torch.from_numpy(padded_sequences)) assert out.size(0) == 100 and out.size(1) == 64 ############################################################################### # With Pretrained Embeddings ############################################################################### model2 = DeepText( vocab_size=vocab_size, embed_matrix=pretrained_embeddings, padding_idx=0 )
column_idx=tab_preprocessor.column_idx, embed_input=tab_preprocessor.embeddings_input, continuous_cols=continuous_cols, ) # # To use TabResnet as the deepdense component simply: # deepdense = TabResnet( # blocks_dims=[64, 32], # dropout=0.2, # column_idx=tab_preprocessor.column_idx, # embed_input=tab_preprocessor.embeddings_input, # continuous_cols=continuous_cols, # ) deeptext = DeepText( vocab_size=len(text_processor.vocab.itos), hidden_dim=64, n_layers=3, rnn_dropout=0.5, padding_idx=1, embed_matrix=text_processor.embedding_matrix, ) deepimage = DeepImage(pretrained=True, head_hidden_dims=None) model = WideDeep(wide=wide, deeptabular=deepdense, deeptext=deeptext, deepimage=deepimage) wide_opt = torch.optim.Adam(model.wide.parameters(), lr=0.01) deep_opt = torch.optim.Adam(model.deeptabular.parameters()) text_opt = RAdam(model.deeptext.parameters()) img_opt = RAdam(model.deepimage.parameters()) wide_sch = torch.optim.lr_scheduler.StepLR(wide_opt, step_size=5)
import torch import pytest from pytorch_widedeep.models import DeepText padded_sequences = np.random.choice(np.arange(1,100), (100, 48)) padded_sequences = np.hstack((np.repeat(np.array([[0,0]]), 100, axis=0), padded_sequences)) pretrained_embeddings = np.random.rand(1000, 64) vocab_size = 1000 ############################################################################### # Without Pretrained Embeddings ############################################################################### model1 = DeepText( vocab_size=vocab_size, embed_dim=32, padding_idx=0 ) def test_deep_test(): out = model1(torch.from_numpy(padded_sequences)) assert out.size(0)==100 and out.size(1)==64 ############################################################################### # With Pretrained Embeddings ############################################################################### model2 = DeepText( vocab_size=vocab_size, embedding_matrix=pretrained_embeddings, padding_idx=0 ) def test_deep_test_pretrained():
X_img_val, y_train, y_val, ) = train_test_split(X_wide, X_deep, X_text, X_img, target) # build model components wide = Wide(np.unique(X_wide).shape[0], 1) deepdense = DeepDense( hidden_layers=[32, 16], dropout=[0.5, 0.5], deep_column_idx={k: v for v, k in enumerate(colnames)}, embed_input=embed_input, continuous_cols=colnames[-5:], ) deeptext = DeepText(vocab_size=vocab_size, embed_dim=32, padding_idx=0) deepimage = DeepImage(pretrained=True) # transforms mean = [0.406, 0.456, 0.485] # BGR std = [0.225, 0.224, 0.229] # BGR transforms1 = [ToTensor, Normalize(mean=mean, std=std)] transforms2 = [Normalize(mean=mean, std=std)] ############################################################################## # Test many possible scenarios of data inputs I can think off. Surely users # will input something unexpected ############################################################################## @pytest.mark.parametrize( "X_wide, X_deep, X_text, X_img, X_train, X_val, target, val_split, transforms, nepoch, null",
from pytorch_widedeep.models import ( Wide, DeepText, WideDeep, DeepDense, DeepImage, ) embed_input = [(u, i, j) for u, i, j in zip(["a", "b", "c"][:4], [4] * 3, [8] * 3)] deep_column_idx = {k: v for v, k in enumerate(["a", "b", "c"])} wide = Wide(10, 1) deepdense = DeepDense(hidden_layers=[16, 8], deep_column_idx=deep_column_idx, embed_input=embed_input) deeptext = DeepText(vocab_size=100, embed_dim=8) deepimage = DeepImage(pretrained=False) ############################################################################### # test raising 'output dim errors' ############################################################################### @pytest.mark.parametrize( "deepcomponent, component_name", [ (None, "dense"), (deeptext, "text"), (deepimage, "image"), ], )
import pytest from pytorch_widedeep.models import DeepText padded_sequences = np.random.choice(np.arange(1, 100), (100, 48)) padded_sequences = np.hstack( (np.repeat(np.array([[0, 0]]), 100, axis=0), padded_sequences) ) pretrained_embeddings = np.random.rand(1000, 64).astype("float32") vocab_size = 1000 ############################################################################### # Without Pretrained Embeddings ############################################################################### model1 = DeepText(vocab_size=vocab_size, embed_dim=32, padding_idx=0) def test_deep_text(): out = model1(torch.from_numpy(padded_sequences)) assert out.size(0) == 100 and out.size(1) == 64 ############################################################################### # With Pretrained Embeddings ############################################################################### model2 = DeepText( vocab_size=vocab_size, embedding_matrix=pretrained_embeddings, padding_idx=0 )