def test_fit_objectives_tab_transformer( X_wide, X_tab, target, objective, X_wide_test, X_tab_test, X_test, pred_dim, probs_dim, ): wide = Wide(np.unique(X_wide).shape[0], pred_dim) tab_transformer = TabTransformer( column_idx={k: v for v, k in enumerate(colnames)}, embed_input=embed_input_tt, continuous_cols=colnames[5:], ) model = WideDeep(wide=wide, deeptabular=tab_transformer, pred_dim=pred_dim) trainer = Trainer(model, objective=objective, verbose=0) trainer.fit(X_wide=X_wide, X_tab=X_tab, target=target, batch_size=16) preds = trainer.predict(X_wide=X_wide, X_tab=X_tab, X_test=X_test) if objective == "binary": pass else: probs = trainer.predict_proba(X_wide=X_wide, X_tab=X_tab, X_test=X_test) assert preds.shape[0] == 32, probs.shape[1] == probs_dim
def set_model(args, prepare_tab, mlp_hidden_dims_same): if args.mlp_hidden_dims == "same": mlp_hidden_dims = [ mlp_hidden_dims_same * args.input_dim, mlp_hidden_dims_same * args.input_dim, (mlp_hidden_dims_same * args.input_dim) // 2, ] elif args.mlp_hidden_dims == "None": mlp_hidden_dims = None else: mlp_hidden_dims = eval(args.mlp_hidden_dims) deeptabular = TabTransformer( column_idx=prepare_tab.column_idx, embed_input=prepare_tab.embeddings_input, embed_dropout=args.embed_dropout, continuous_cols=prepare_tab.continuous_cols, full_embed_dropout=args.full_embed_dropout, shared_embed=args.shared_embed, add_shared_embed=args.add_shared_embed, frac_shared_embed=args.frac_shared_embed, input_dim=args.input_dim, n_heads=args.n_heads, n_blocks=args.n_blocks, dropout=args.dropout, ff_hidden_dim=4 * args.input_dim if not args.ff_hidden_dim else args.ff_hidden_dim, transformer_activation=args.transformer_activation, mlp_hidden_dims=mlp_hidden_dims, mlp_activation=args.mlp_activation, mlp_batchnorm=args.mlp_batchnorm, mlp_batchnorm_last=args.mlp_batchnorm_last, mlp_linear_first=args.mlp_linear_first, ) model = WideDeep(deeptabular=deeptabular) return model
] elif args.mlp_hidden_dims == "None": mlp_hidden_dims = None else: mlp_hidden_dims = eval(args.mlp_hidden_dims) deeptabular = TabTransformer( column_idx=prepare_tab.column_idx, embed_input=prepare_tab.embeddings_input, embed_dropout=args.embed_dropout, full_embed_dropout=args.full_embed_dropout, shared_embed=args.shared_embed, add_shared_embed=args.add_shared_embed, frac_shared_embed=args.frac_shared_embed, input_dim=args.input_dim, n_heads=args.n_heads, n_blocks=args.n_blocks, dropout=args.dropout, ff_hidden_dim=4 * args.input_dim if not args.ff_hidden_dim else args.ff_hidden_dim, transformer_activation=args.transformer_activation, mlp_hidden_dims=mlp_hidden_dims, mlp_activation=args.mlp_activation, mlp_batchnorm=args.mlp_batchnorm, mlp_batchnorm_last=args.mlp_batchnorm_last, mlp_linear_first=args.mlp_linear_first, ) model = WideDeep(deeptabular=deeptabular) optimizers = set_optimizer(model, args) steps_per_epoch = (X_train.shape[0] // args.batch_size) + 1
# Tab Array colnames = list(string.ascii_lowercase)[:10] embed_cols = [np.random.choice(np.arange(5), 32) for _ in range(5)] embeds_input = [(i, j) for i, j in zip(colnames[:5], [5] * 5)] # type: ignore[misc] cont_cols = [np.random.rand(32) for _ in range(5)] column_idx = {k: v for v, k in enumerate(colnames)} X_tab = np.vstack(embed_cols + cont_cols).transpose() # target target = np.random.choice(2, 32) wide = Wide(np.unique(X_wide).shape[0], 1) tab_transformer = TabTransformer( column_idx={k: v for v, k in enumerate(colnames)}, embed_input=embeds_input, continuous_cols=colnames[5:], ) model_tt = WideDeep(wide=wide, deeptabular=tab_transformer) # 1. Single optimizers_1, single scheduler, not cyclic and both passed directly optimizers_1 = RAdam(model_tt.parameters()) lr_schedulers_1 = StepLR(optimizers_1, step_size=4) # 2. Multiple optimizers, single scheduler, cyclic and pass via a 1 item # dictionary wide_opt_2 = torch.optim.Adam(model_tt.wide.parameters()) deep_opt_2 = RAdam(model_tt.deeptabular.parameters()) deep_sch_2 = CyclicLR( deep_opt_2, base_lr=0.001, max_lr=0.01, step_size_up=5, cycle_momentum=False )
target = df[target].values prepare_wide = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols) X_wide = prepare_wide.fit_transform(df) prepare_deep = TabPreprocessor( embed_cols=cat_embed_cols, continuous_cols=continuous_cols, for_tabtransformer=True # type: ignore[arg-type] ) X_tab = prepare_deep.fit_transform(df) wide = Wide(wide_dim=np.unique(X_wide).shape[0], pred_dim=1) deeptabular = TabTransformer( column_idx=prepare_deep.column_idx, embed_input=prepare_deep.embeddings_input, continuous_cols=continuous_cols, ) model = WideDeep(wide=wide, deeptabular=deeptabular) wide_opt = torch.optim.Adam(model.wide.parameters(), lr=0.01) deep_opt = RAdam(model.deeptabular.parameters()) wide_sch = torch.optim.lr_scheduler.StepLR(wide_opt, step_size=3) deep_sch = torch.optim.lr_scheduler.StepLR(deep_opt, step_size=5) optimizers = {"wide": wide_opt, "deeptabular": deep_opt} schedulers = {"wide": wide_sch, "deeptabular": deep_sch} initializers = {"wide": KaimingNormal, "deeptabular": XavierNormal} callbacks = [ LRHistory(n_epochs=10),