def model_train_pred(fold):
     
     model_path = os.path.join(model_dir, model_file_name + f"_FOLD{fold}.pth")
     tabnet_params = dict(n_d = 64, n_a = 128, n_steps = 1,
                          gamma = 1.3,lambda_sparse = 0,
                          n_independent = 2,n_shared = 1,optimizer_fn = optim.Adam,
                          optimizer_params = dict(lr = self.LEARNING_RATE, weight_decay = 1e-5),
                          mask_type = "entmax",
                          scheduler_params = dict(mode = "min", patience = 10, min_lr = 1e-5, factor = 0.9),
                          scheduler_fn = ReduceLROnPlateau,verbose = 10)
     
     x_fold_train, y_fold_train, x_fold_val, y_fold_val, df_test_x_copy, val_idx = \
     preprocess(fold, df_train, df_train_x, df_train_y, df_test_x, no_of_components)
     x_fold_train, x_fold_val, df_test_x_copy = variance_threshold(x_fold_train, x_fold_val, df_test_x_copy)
     
     ### Fit ###
     model = TabNetRegressor(**tabnet_params)
     model.fit(X_train = x_fold_train.values, y_train = y_fold_train.values,
               eval_set = [(x_fold_val.values, y_fold_val.values)], eval_name = ["val"],
               eval_metric = ["logits_ll"],max_epochs = self.EPOCHS,
               patience = 40,batch_size = self.BATCH_SIZE,
               virtual_batch_size = 32,num_workers = 1,drop_last = False,
               loss_fn = SmoothBCEwLogits(smoothing = 1e-4, pos_weight=pos_weight))
     
     ###---- Prediction ---
     oof = np.zeros(df_train_y.shape)
     valid_preds = 1 / (1 + np.exp(-model.predict(x_fold_val.values)))
     oof[val_idx] = valid_preds
     predictions = 1 / (1 + np.exp(-model.predict(df_test_x_copy.values)))
     model_path = model.save_model(model_path)
     return oof, predictions
コード例 #2
0
        def model_train_pred(fold,
                             Model=CNN_Model,
                             df_train_y=df_train_y,
                             df_test_y=df_test_y,
                             features=features,
                             file_name=model_file_name):

            model_path = os.path.join(model_dir,
                                      file_name + f"_FOLD{fold}.pth")
            x_fold_train, y_fold_train, x_fold_val, y_fold_val, df_test_x_copy, val_idx = \
            preprocess(fold, df_train, df_train_x, df_train_y, df_test_x, no_of_components)
            train_dataset = TrainDataset(x_fold_train.values,
                                         y_fold_train.values)
            valid_dataset = TrainDataset(x_fold_val.values, y_fold_val.values)

            trainloader = torch.utils.data.DataLoader(
                train_dataset, batch_size=self.BATCH_SIZE, shuffle=True)
            validloader = torch.utils.data.DataLoader(
                valid_dataset, batch_size=self.BATCH_SIZE, shuffle=False)

            model = Model(num_features=num_features,
                          num_targets=num_targets,
                          hidden_size=hidden_size)
            model.to(DEVICE)
            optimizer = torch.optim.Adam(model.parameters(),
                                         lr=self.LEARNING_RATE,
                                         weight_decay=WEIGHT_DECAY,
                                         eps=1e-9)
            scheduler = optim.lr_scheduler.OneCycleLR(
                optimizer=optimizer,
                pct_start=0.1,
                div_factor=1e3,
                max_lr=1e-2,
                epochs=self.EPOCHS,
                steps_per_epoch=len(trainloader))
            loss_train = SmoothBCEwLogits(smoothing=0.001,
                                          pos_weight=pos_weight)
            loss_val = nn.BCEWithLogitsLoss()
            early_stopping_steps = EARLY_STOPPING_STEPS
            early_step = 0
            oof = np.zeros(df_train_y.shape)
            best_loss = np.inf
            best_loss_epoch = -1

            for epoch in range(self.EPOCHS):
                train_loss = train_fn(model, optimizer, scheduler, loss_train,
                                      trainloader, DEVICE)
                valid_loss, valid_preds = valid_fn(model, loss_val,
                                                   validloader, DEVICE)
                if valid_loss < best_loss:
                    best_loss = valid_loss
                    best_loss_epoch = epoch
                    oof[val_idx] = valid_preds
                    torch.save(model.state_dict(), model_path)
                elif (EARLY_STOP == True):
                    early_step += 1
                    if (early_step >= early_stopping_steps):
                        break
                print(
                    f"FOLD: {fold}, EPOCH: {epoch},train_loss: {train_loss:.6f},\
                valid_loss: {valid_loss:.6f} best_loss: {best_loss:.6f}, best_loss_epoch: {best_loss_epoch}"
                )

            #--------------------- PREDICTION---------------------
            testdataset = TestDataset(df_test_x_copy.values)
            testloader = torch.utils.data.DataLoader(
                testdataset, batch_size=self.BATCH_SIZE, shuffle=False)
            model = Model(num_features=num_features,
                          num_targets=num_targets,
                          hidden_size=hidden_size)
            model.load_state_dict(torch.load(model_path))
            model.to(DEVICE)

            predictions = np.zeros(df_test_y.shape)
            predictions = inference_fn(model, testloader, DEVICE)
            return oof, predictions