def test_no_parameter_updates_when_norm_0(self, classifier_module, classifier_data): from copy import deepcopy from skorch import NeuralNetClassifier from skorch.callbacks import GradientNormClipping net = NeuralNetClassifier( classifier_module, callbacks=[('grad_norm', GradientNormClipping(0))], train_split=None, warm_start=True, max_epochs=1, ) net.initialize() params_before = deepcopy(list(net.module_.parameters())) net.fit(*classifier_data) params_after = net.module_.parameters() for p0, p1 in zip(params_before, params_after): p0, p1 = to_numpy(p0), to_numpy(p1) assert np.allclose(p0, p1)
def get_pipeline(self): regressor = None if self.learning_method == "linear": regressor = MultiOutputRegressor(LinearRegression(fit_intercept=self.fit_intercept), n_jobs=6) elif self.learning_method == "booster": regressor = MultiOutputRegressor(XGBRegressor(n_jobs=12, n_estimators=self.no_estimators)) elif self.learning_method == "deep": regressor = NeuralNetRegressor( module=TemporalConvNet, module__num_inputs=1, module__num_channels=[2] * self.no_channels, module__output_sz=self.horizon, module__kernel_size=5, module__dropout=0.0, max_epochs=60, batch_size=256, lr=2e-3, optimizer=torch.optim.Adam, device='cpu', iterator_train__shuffle=True, callbacks=[GradientNormClipping(gradient_clip_value=1, gradient_clip_norm_type=2)], train_split=None, ) return ForecasterPipeline([ # Convert the `y` target into a horizon ('pre_horizon', HorizonTransformer(horizon=self.horizon)), ('pre_reversible_imputer', ReversibleImputer(y_only=True)), ('features', FeatureUnion([ # Generate a week's worth of autoregressive features ('ar_features', AutoregressiveTransformer( num_lags=int(self.horizon * self.num_lags), pred_stride=self.pred_stride)), ])), ('post_feature_imputer', ReversibleImputer()), ('regressor', regressor) ])
def main(): parser = argparse.ArgumentParser( description='PyTorch RNN with variable-length numeric sequences wrapper' ) parser.add_argument('--outcome_col_name', type=str, required=True) parser.add_argument('--train_csv_files', type=str, required=True) parser.add_argument('--test_csv_files', type=str, required=True) parser.add_argument('--data_dict_files', type=str, required=True) parser.add_argument('--batch_size', type=int, default=1024, help='Number of sequences per minibatch') parser.add_argument('--epochs', type=int, default=50, help='Number of epochs') parser.add_argument('--hidden_units', type=int, default=32, help='Number of hidden units') parser.add_argument('--hidden_layers', type=int, default=1, help='Number of hidden layers') parser.add_argument('--lr', type=float, default=0.0005, help='Learning rate for the optimizer') parser.add_argument('--dropout', type=float, default=0, help='dropout for optimizer') parser.add_argument('--weight_decay', type=float, default=0.0001, help='weight decay for optimizer') parser.add_argument('--seed', type=int, default=1111, help='random seed') parser.add_argument('--validation_size', type=float, default=0.15, help='validation split size') parser.add_argument( '--is_data_simulated', type=bool, default=False, help='boolean to check if data is simulated or from mimic') parser.add_argument( '--simulated_data_dir', type=str, default='simulated_data/2-state/', help= 'dir in which to simulated data is saved.Must be provide if is_data_simulated = True' ) parser.add_argument( '--output_dir', type=str, default=None, help= 'directory where trained model and loss curves over epochs are saved') parser.add_argument( '--output_filename_prefix', type=str, default=None, help='prefix for the training history jsons and trained classifier') args = parser.parse_args() torch.manual_seed(args.seed) device = 'cpu' x_train_csv_filename, y_train_csv_filename = args.train_csv_files.split( ',') x_test_csv_filename, y_test_csv_filename = args.test_csv_files.split(',') x_dict, y_dict = args.data_dict_files.split(',') x_data_dict = load_data_dict_json(x_dict) # get the id and feature columns id_cols = parse_id_cols(x_data_dict) feature_cols = parse_feature_cols(x_data_dict) # extract data train_vitals = TidySequentialDataCSVLoader( x_csv_path=x_train_csv_filename, y_csv_path=y_train_csv_filename, x_col_names=feature_cols, idx_col_names=id_cols, y_col_name=args.outcome_col_name, y_label_type='per_sequence') test_vitals = TidySequentialDataCSVLoader(x_csv_path=x_test_csv_filename, y_csv_path=y_test_csv_filename, x_col_names=feature_cols, idx_col_names=id_cols, y_col_name=args.outcome_col_name, y_label_type='per_sequence') X_train, y_train = train_vitals.get_batch_data(batch_id=0) X_test, y_test = test_vitals.get_batch_data(batch_id=0) _, T, F = X_train.shape print('number of time points : %s\n number of features : %s\n' % (T, F)) # set class weights as 1/(number of samples in class) for each class to handle class imbalance class_weights = torch.tensor( [1 / (y_train == 0).sum(), 1 / (y_train == 1).sum()]).double() # scale features # X_train = standard_scaler_3d(X_train) # X_test = standard_scaler_3d(X_test) # callback to compute gradient norm compute_grad_norm = ComputeGradientNorm(norm_type=2) # LSTM if args.output_filename_prefix == None: output_filename_prefix = ( 'hiddens=%s-layers=%s-lr=%s-dropout=%s-weight_decay=%s' % (args.hidden_units, args.hidden_layers, args.lr, args.dropout, args.weight_decay)) else: output_filename_prefix = args.output_filename_prefix print('RNN parameters : ' + output_filename_prefix) # # from IPython import embed; embed() rnn = RNNBinaryClassifier( max_epochs=50, batch_size=args.batch_size, device=device, lr=args.lr, callbacks=[ EpochScoring('roc_auc', lower_is_better=False, on_train=True, name='aucroc_score_train'), EpochScoring('roc_auc', lower_is_better=False, on_train=False, name='aucroc_score_valid'), EarlyStopping(monitor='aucroc_score_valid', patience=20, threshold=0.002, threshold_mode='rel', lower_is_better=False), LRScheduler(policy=ReduceLROnPlateau, mode='max', monitor='aucroc_score_valid', patience=10), compute_grad_norm, GradientNormClipping(gradient_clip_value=0.3, gradient_clip_norm_type=2), Checkpoint(monitor='aucroc_score_valid', f_history=os.path.join( args.output_dir, output_filename_prefix + '.json')), TrainEndCheckpoint(dirname=args.output_dir, fn_prefix=output_filename_prefix), ], criterion=torch.nn.CrossEntropyLoss, criterion__weight=class_weights, train_split=skorch.dataset.CVSplit(args.validation_size), module__rnn_type='LSTM', module__n_layers=args.hidden_layers, module__n_hiddens=args.hidden_units, module__n_inputs=X_train.shape[-1], module__dropout_proba=args.dropout, optimizer=torch.optim.Adam, optimizer__weight_decay=args.weight_decay) clf = rnn.fit(X_train, y_train) y_pred_proba = clf.predict_proba(X_train) y_pred_proba_neg, y_pred_proba_pos = zip(*y_pred_proba) auroc_train_final = roc_auc_score(y_train, y_pred_proba_pos) print('AUROC with LSTM (Train) : %.2f' % auroc_train_final) y_pred_proba = clf.predict_proba(X_test) y_pred_proba_neg, y_pred_proba_pos = zip(*y_pred_proba) auroc_test_final = roc_auc_score(y_test, y_pred_proba_pos) print('AUROC with LSTM (Test) : %.2f' % auroc_test_final)
def fit(self, X, y, *args, w=None, **kwargs): # Determine optional parameters if self.claim_count_names is None: self.claim_count_names = [ "claim_count_{}".format(x) for x in range(0, int(y.shape[1] / 2)) ] if self.claim_paid_names is None: self.claim_paid_names = [ "claim_paid_{}".format(x) for x in range(0, int(y.shape[1] / 2)) ] if self.feature_dimension is None: self.feature_dimension = X.shape[1] if self.output_dimension is None: self.output_dimension = len(self.claim_paid_names) if self.categorical_dimensions is None: self.categorical_dimensions = [] # TODO: This is a bit slow and unstable, is there a better way? for i in range(X.shape[1]): X_int = X[:, i].astype(int) if np.all((X_int - X[:, i]) == 0): self.categorical_dimensions += [(i, np.max(X_int))] print( "Auto detect categorical dimensions to be: {}".format( self.categorical_dimensions ) ) # Standardize outputs # self.X_mean = np.mean(X, axis=0) # self.X_std = np.std(X, axis=0) # Except categoricals # for i, j in self.categorical_dimensions: # self.X_mean[i] = 0 # self.X_std[i] = 1 # X = (X - self.X_mean) / self.X_std # Shuffle X, y X, y = shuffle(X, y, random_state=0) if w is None: w = np.where(np.isnan(y), 0.0, 1.0) else: w = w * np.where(np.isnan(y), 0.0, 1.0) X = np.hstack([X, w]).astype(np.float32) y_mean = np.nanmean(y, axis=0) y = np.hstack([y, y, y]) y = np.where(np.isnan(y), 0, np.maximum(EPSILON, y)).astype(np.float32) earlystop = EarlyStopping(patience=self.patience, threshold=0.0) gradclip = GradientNormClipping(gradient_clip_value=self.clipnorm) if X.shape[0] < self.batch_size: print("NOTE: Data size is small, outcomes may be odd.") batch_size = X.shape[0] else: batch_size = self.batch_size # One cycle policy (with Adam) # Step 1: LR Range Finder # Test which values fit # Use earlystop to get an idea of epochs to 1 cycle policy as well. for lr in self.lr_range: super(PUNPPCIClaimRegressor, self).__init__( PUNPPCIClaimModule( feature_dim=self.feature_dimension, output_dim=self.output_dimension, cat_dim=self.categorical_dimensions, y_mean=y_mean, layer_size=self.layer_size, device=self.device, ), *args, **kwargs, max_epochs=self.max_epochs, lr=lr, device=self.device, optimizer=self.optimizer, # optimizer__momentum=self.momentum, optimizer__param_groups=[ ("embeddings_linear*", {"weight_decay": self.l1_l2_linear}), ( "embeddings_residual*", {"weight_decay": self.l2_weights_residual}, ), ("dense_pricing*", {"weight_decay": self.l2_weights_residual}), ("count_linear_0.weight", {"weight_decay": self.l1_l2_linear}), ("paid_linear_0.weight", {"weight_decay": self.l1_l2_linear}), ( "count_residual_spread.bias", {"weight_decay": self.l2_bias_residual}, ), ( "paid_residual_spread.bias", {"weight_decay": self.l2_bias_residual}, ), ("count_residual_0.bias", {"weight_decay": self.l2_bias_residual}), ("paid_residual_0.bias", {"weight_decay": self.l2_bias_residual}), ], batch_size=batch_size, criterion=nn.MSELoss, callbacks=[gradclip, earlystop], verbose=0 ) self.initialize_module() super(PUNPPCIClaimRegressor, self).fit(X, y) if not np.isnan(self.history[-1]["valid_loss"]): self.lr_min = self.lr_range[-1] self.lr_max = lr break # Still broke? if np.isnan(self.history[-1]["valid_loss"]): warn( "This model may fail to converge on the data. Please review data and parameters." ) self.lr_min = self.lr_range[-1] self.lr_max = 0.001 print("Setting maximum learn rate to {}.".format(self.lr_max)) # Step 2: Cyclic LR with expected epoch count... valid_losses = [x["valid_loss"] for x in self.history] expected_epoch_count = valid_losses.index(min(valid_losses)) + 1 expected_epoch_count = int(np.ceil(expected_epoch_count / 2) * 2) expected_epoch_count = 4 if expected_epoch_count < 4 else expected_epoch_count print("Setting epochs for training model to {}".format(expected_epoch_count)) cyclic_lr = LRScheduler( policy=CyclicLR, base_lr=self.lr_min, max_lr=self.lr_max, step_size_up=expected_epoch_count / 2, step_size_down=expected_epoch_count / 2, ) # ... but still keep training for as many epochs as required. super(PUNPPCIClaimRegressor, self).__init__( PUNPPCIClaimModule( feature_dim=self.feature_dimension, output_dim=self.output_dimension, cat_dim=self.categorical_dimensions, y_mean=y_mean, layer_size=self.layer_size, device=self.device, ), max_epochs=expected_epoch_count, lr=self.lr_min, device=self.device, optimizer=self.optimizer, # optimizer__momentum=self.momentum, optimizer__param_groups=[ ("embeddings_linear*", {"weight_decay": self.l1_l2_linear}), ("embeddings_residual*", {"weight_decay": self.l2_weights_residual}), ("dense_pricing*", {"weight_decay": self.l2_weights_residual}), ("count_linear_0.weight", {"weight_decay": self.l1_l2_linear}), ("paid_linear_0.weight", {"weight_decay": self.l1_l2_linear}), ("count_residual_spread.bias", {"weight_decay": self.l2_bias_residual}), ("paid_residual_spread.bias", {"weight_decay": self.l2_bias_residual}), ("count_residual_0.bias", {"weight_decay": self.l2_bias_residual}), ("paid_residual_0.bias", {"weight_decay": self.l2_bias_residual}), ], batch_size=batch_size, criterion=nn.MSELoss, callbacks=[ CheckNaN(), # CheckMean(X, self.output_dimension, 1), # expected_epoch_count cyclic_lr, gradclip, # earlystop, ], ) self.initialize_module() super(PUNPPCIClaimRegressor, self).fit(X, y) # Finished fitting! self.is_fitted_ = True