Python GradientNormClippingの例

プログラミング言語: Python

名前空間/パッケージ名: skorch.callbacks

hotexamples.comのコード掲載数: 4

Python GradientNormClipping - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのskorch.callbacks.GradientNormClippingの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

GradientNormClipping(4)

よく使われるメソッド

GradientNormClipping (4)

コード例 #1

ファイルを表示

    def test_no_parameter_updates_when_norm_0(self, classifier_module,
                                              classifier_data):
        from copy import deepcopy
        from skorch import NeuralNetClassifier
        from skorch.callbacks import GradientNormClipping

        net = NeuralNetClassifier(
            classifier_module,
            callbacks=[('grad_norm', GradientNormClipping(0))],
            train_split=None,
            warm_start=True,
            max_epochs=1,
        )
        net.initialize()

        params_before = deepcopy(list(net.module_.parameters()))
        net.fit(*classifier_data)
        params_after = net.module_.parameters()
        for p0, p1 in zip(params_before, params_after):
            p0, p1 = to_numpy(p0), to_numpy(p1)
            assert np.allclose(p0, p1)

コード例 #2

ファイルを表示

 def get_pipeline(self):
     regressor = None
     if self.learning_method == "linear":
         regressor = MultiOutputRegressor(LinearRegression(fit_intercept=self.fit_intercept),
                                          n_jobs=6)
     elif self.learning_method == "booster":
         regressor = MultiOutputRegressor(XGBRegressor(n_jobs=12,
                                                       n_estimators=self.no_estimators))
     elif self.learning_method == "deep":
         regressor = NeuralNetRegressor(
             module=TemporalConvNet,
             module__num_inputs=1,
             module__num_channels=[2] * self.no_channels,
             module__output_sz=self.horizon,
             module__kernel_size=5,
             module__dropout=0.0,
             max_epochs=60,
             batch_size=256,
             lr=2e-3,
             optimizer=torch.optim.Adam,
             device='cpu',
             iterator_train__shuffle=True,
             callbacks=[GradientNormClipping(gradient_clip_value=1,
                                             gradient_clip_norm_type=2)],
             train_split=None,
         )
     return ForecasterPipeline([
         # Convert the `y` target into a horizon
         ('pre_horizon', HorizonTransformer(horizon=self.horizon)),
         ('pre_reversible_imputer', ReversibleImputer(y_only=True)),
         ('features', FeatureUnion([
             # Generate a week's worth of autoregressive features
             ('ar_features', AutoregressiveTransformer(
                 num_lags=int(self.horizon * self.num_lags), pred_stride=self.pred_stride)),
         ])),
         ('post_feature_imputer', ReversibleImputer()),
         ('regressor', regressor)
     ])

コード例 #3

ファイルを表示

ファイル: main_mimic.py プロジェクト: rathp/time_series_prediction

def main():
    parser = argparse.ArgumentParser(
        description='PyTorch RNN with variable-length numeric sequences wrapper'
    )
    parser.add_argument('--outcome_col_name', type=str, required=True)
    parser.add_argument('--train_csv_files', type=str, required=True)
    parser.add_argument('--test_csv_files', type=str, required=True)
    parser.add_argument('--data_dict_files', type=str, required=True)
    parser.add_argument('--batch_size',
                        type=int,
                        default=1024,
                        help='Number of sequences per minibatch')
    parser.add_argument('--epochs',
                        type=int,
                        default=50,
                        help='Number of epochs')
    parser.add_argument('--hidden_units',
                        type=int,
                        default=32,
                        help='Number of hidden units')
    parser.add_argument('--hidden_layers',
                        type=int,
                        default=1,
                        help='Number of hidden layers')
    parser.add_argument('--lr',
                        type=float,
                        default=0.0005,
                        help='Learning rate for the optimizer')
    parser.add_argument('--dropout',
                        type=float,
                        default=0,
                        help='dropout for optimizer')
    parser.add_argument('--weight_decay',
                        type=float,
                        default=0.0001,
                        help='weight decay for optimizer')
    parser.add_argument('--seed', type=int, default=1111, help='random seed')
    parser.add_argument('--validation_size',
                        type=float,
                        default=0.15,
                        help='validation split size')
    parser.add_argument(
        '--is_data_simulated',
        type=bool,
        default=False,
        help='boolean to check if data is simulated or from mimic')
    parser.add_argument(
        '--simulated_data_dir',
        type=str,
        default='simulated_data/2-state/',
        help=
        'dir in which to simulated data is saved.Must be provide if is_data_simulated = True'
    )
    parser.add_argument(
        '--output_dir',
        type=str,
        default=None,
        help=
        'directory where trained model and loss curves over epochs are saved')
    parser.add_argument(
        '--output_filename_prefix',
        type=str,
        default=None,
        help='prefix for the training history jsons and trained classifier')
    args = parser.parse_args()

    torch.manual_seed(args.seed)
    device = 'cpu'

    x_train_csv_filename, y_train_csv_filename = args.train_csv_files.split(
        ',')
    x_test_csv_filename, y_test_csv_filename = args.test_csv_files.split(',')
    x_dict, y_dict = args.data_dict_files.split(',')
    x_data_dict = load_data_dict_json(x_dict)

    # get the id and feature columns
    id_cols = parse_id_cols(x_data_dict)
    feature_cols = parse_feature_cols(x_data_dict)
    # extract data
    train_vitals = TidySequentialDataCSVLoader(
        x_csv_path=x_train_csv_filename,
        y_csv_path=y_train_csv_filename,
        x_col_names=feature_cols,
        idx_col_names=id_cols,
        y_col_name=args.outcome_col_name,
        y_label_type='per_sequence')

    test_vitals = TidySequentialDataCSVLoader(x_csv_path=x_test_csv_filename,
                                              y_csv_path=y_test_csv_filename,
                                              x_col_names=feature_cols,
                                              idx_col_names=id_cols,
                                              y_col_name=args.outcome_col_name,
                                              y_label_type='per_sequence')

    X_train, y_train = train_vitals.get_batch_data(batch_id=0)
    X_test, y_test = test_vitals.get_batch_data(batch_id=0)
    _, T, F = X_train.shape

    print('number of time points : %s\n number of features : %s\n' % (T, F))

    # set class weights as 1/(number of samples in class) for each class to handle class imbalance
    class_weights = torch.tensor(
        [1 / (y_train == 0).sum(), 1 / (y_train == 1).sum()]).double()

    # scale features
    #     X_train = standard_scaler_3d(X_train)
    #     X_test = standard_scaler_3d(X_test)

    # callback to compute gradient norm
    compute_grad_norm = ComputeGradientNorm(norm_type=2)

    # LSTM
    if args.output_filename_prefix == None:
        output_filename_prefix = (
            'hiddens=%s-layers=%s-lr=%s-dropout=%s-weight_decay=%s' %
            (args.hidden_units, args.hidden_layers, args.lr, args.dropout,
             args.weight_decay))
    else:
        output_filename_prefix = args.output_filename_prefix

    print('RNN parameters : ' + output_filename_prefix)
    # #     from IPython import embed; embed()
    rnn = RNNBinaryClassifier(
        max_epochs=50,
        batch_size=args.batch_size,
        device=device,
        lr=args.lr,
        callbacks=[
            EpochScoring('roc_auc',
                         lower_is_better=False,
                         on_train=True,
                         name='aucroc_score_train'),
            EpochScoring('roc_auc',
                         lower_is_better=False,
                         on_train=False,
                         name='aucroc_score_valid'),
            EarlyStopping(monitor='aucroc_score_valid',
                          patience=20,
                          threshold=0.002,
                          threshold_mode='rel',
                          lower_is_better=False),
            LRScheduler(policy=ReduceLROnPlateau,
                        mode='max',
                        monitor='aucroc_score_valid',
                        patience=10),
            compute_grad_norm,
            GradientNormClipping(gradient_clip_value=0.3,
                                 gradient_clip_norm_type=2),
            Checkpoint(monitor='aucroc_score_valid',
                       f_history=os.path.join(
                           args.output_dir, output_filename_prefix + '.json')),
            TrainEndCheckpoint(dirname=args.output_dir,
                               fn_prefix=output_filename_prefix),
        ],
        criterion=torch.nn.CrossEntropyLoss,
        criterion__weight=class_weights,
        train_split=skorch.dataset.CVSplit(args.validation_size),
        module__rnn_type='LSTM',
        module__n_layers=args.hidden_layers,
        module__n_hiddens=args.hidden_units,
        module__n_inputs=X_train.shape[-1],
        module__dropout_proba=args.dropout,
        optimizer=torch.optim.Adam,
        optimizer__weight_decay=args.weight_decay)

    clf = rnn.fit(X_train, y_train)
    y_pred_proba = clf.predict_proba(X_train)
    y_pred_proba_neg, y_pred_proba_pos = zip(*y_pred_proba)
    auroc_train_final = roc_auc_score(y_train, y_pred_proba_pos)
    print('AUROC with LSTM (Train) : %.2f' % auroc_train_final)

    y_pred_proba = clf.predict_proba(X_test)
    y_pred_proba_neg, y_pred_proba_pos = zip(*y_pred_proba)
    auroc_test_final = roc_auc_score(y_test, y_pred_proba_pos)
    print('AUROC with LSTM (Test) : %.2f' % auroc_test_final)

コード例 #4

ファイルを表示

ファイル: punppci.py プロジェクト: JackyP/penalised-unexplainability-network-payments-per-claim-incurred

    def fit(self, X, y, *args, w=None, **kwargs):

        # Determine optional parameters
        if self.claim_count_names is None:
            self.claim_count_names = [
                "claim_count_{}".format(x) for x in range(0, int(y.shape[1] / 2))
            ]

        if self.claim_paid_names is None:
            self.claim_paid_names = [
                "claim_paid_{}".format(x) for x in range(0, int(y.shape[1] / 2))
            ]

        if self.feature_dimension is None:
            self.feature_dimension = X.shape[1]

        if self.output_dimension is None:
            self.output_dimension = len(self.claim_paid_names)

        if self.categorical_dimensions is None:
            self.categorical_dimensions = []
            # TODO: This is a bit slow and unstable, is there a better way?
            for i in range(X.shape[1]):
                X_int = X[:, i].astype(int)
                if np.all((X_int - X[:, i]) == 0):
                    self.categorical_dimensions += [(i, np.max(X_int))]
            print(
                "Auto detect categorical dimensions to be: {}".format(
                    self.categorical_dimensions
                )
            )
        # Standardize outputs
        # self.X_mean = np.mean(X, axis=0)
        # self.X_std = np.std(X, axis=0)

        # Except categoricals
        # for i, j in self.categorical_dimensions:
        #     self.X_mean[i] = 0
        #     self.X_std[i] = 1

        # X = (X - self.X_mean) / self.X_std

        # Shuffle X, y
        X, y = shuffle(X, y, random_state=0)

        if w is None:
            w = np.where(np.isnan(y), 0.0, 1.0)
        else:
            w = w * np.where(np.isnan(y), 0.0, 1.0)

        X = np.hstack([X, w]).astype(np.float32)

        y_mean = np.nanmean(y, axis=0)

        y = np.hstack([y, y, y])
        y = np.where(np.isnan(y), 0, np.maximum(EPSILON, y)).astype(np.float32)

        earlystop = EarlyStopping(patience=self.patience, threshold=0.0)
        gradclip = GradientNormClipping(gradient_clip_value=self.clipnorm)

        if X.shape[0] < self.batch_size:
            print("NOTE: Data size is small, outcomes may be odd.")
            batch_size = X.shape[0]
        else:
            batch_size = self.batch_size

        # One cycle policy (with Adam)

        # Step 1: LR Range Finder
        # Test which values fit
        # Use earlystop to get an idea of epochs to 1 cycle policy as well.

        for lr in self.lr_range:

            super(PUNPPCIClaimRegressor, self).__init__(
                PUNPPCIClaimModule(
                    feature_dim=self.feature_dimension,
                    output_dim=self.output_dimension,
                    cat_dim=self.categorical_dimensions,
                    y_mean=y_mean,
                    layer_size=self.layer_size,
                    device=self.device,
                ),
                *args,
                **kwargs,
                max_epochs=self.max_epochs,
                lr=lr,
                device=self.device,
                optimizer=self.optimizer,
                # optimizer__momentum=self.momentum,
                optimizer__param_groups=[
                    ("embeddings_linear*", {"weight_decay": self.l1_l2_linear}),
                    (
                        "embeddings_residual*",
                        {"weight_decay": self.l2_weights_residual},
                    ),
                    ("dense_pricing*", {"weight_decay": self.l2_weights_residual}),
                    ("count_linear_0.weight", {"weight_decay": self.l1_l2_linear}),
                    ("paid_linear_0.weight", {"weight_decay": self.l1_l2_linear}),
                    (
                        "count_residual_spread.bias",
                        {"weight_decay": self.l2_bias_residual},
                    ),
                    (
                        "paid_residual_spread.bias",
                        {"weight_decay": self.l2_bias_residual},
                    ),
                    ("count_residual_0.bias", {"weight_decay": self.l2_bias_residual}),
                    ("paid_residual_0.bias", {"weight_decay": self.l2_bias_residual}),
                ],
                batch_size=batch_size,
                criterion=nn.MSELoss,
                callbacks=[gradclip, earlystop],
                verbose=0
            )

            self.initialize_module()

            super(PUNPPCIClaimRegressor, self).fit(X, y)

            if not np.isnan(self.history[-1]["valid_loss"]):
                self.lr_min = self.lr_range[-1]
                self.lr_max = lr
                break

        # Still broke?
        if np.isnan(self.history[-1]["valid_loss"]):
            warn(
                "This model may fail to converge on the data. Please review data and parameters."
            )
            self.lr_min = self.lr_range[-1]
            self.lr_max = 0.001

        print("Setting maximum learn rate to {}.".format(self.lr_max))

        # Step 2: Cyclic LR with expected epoch count...
        valid_losses = [x["valid_loss"] for x in self.history]
        expected_epoch_count = valid_losses.index(min(valid_losses)) + 1
        expected_epoch_count = int(np.ceil(expected_epoch_count / 2) * 2)
        expected_epoch_count = 4 if expected_epoch_count < 4 else expected_epoch_count

        print("Setting epochs for training model to {}".format(expected_epoch_count))
        cyclic_lr = LRScheduler(
            policy=CyclicLR,
            base_lr=self.lr_min,
            max_lr=self.lr_max,
            step_size_up=expected_epoch_count / 2,
            step_size_down=expected_epoch_count / 2,
        )

        # ... but still keep training for as many epochs as required.

        super(PUNPPCIClaimRegressor, self).__init__(
            PUNPPCIClaimModule(
                feature_dim=self.feature_dimension,
                output_dim=self.output_dimension,
                cat_dim=self.categorical_dimensions,
                y_mean=y_mean,
                layer_size=self.layer_size,
                device=self.device,
            ),
            max_epochs=expected_epoch_count,
            lr=self.lr_min,
            device=self.device,
            optimizer=self.optimizer,
            # optimizer__momentum=self.momentum,
            optimizer__param_groups=[
                ("embeddings_linear*", {"weight_decay": self.l1_l2_linear}),
                ("embeddings_residual*", {"weight_decay": self.l2_weights_residual}),
                ("dense_pricing*", {"weight_decay": self.l2_weights_residual}),
                ("count_linear_0.weight", {"weight_decay": self.l1_l2_linear}),
                ("paid_linear_0.weight", {"weight_decay": self.l1_l2_linear}),
                ("count_residual_spread.bias", {"weight_decay": self.l2_bias_residual}),
                ("paid_residual_spread.bias", {"weight_decay": self.l2_bias_residual}),
                ("count_residual_0.bias", {"weight_decay": self.l2_bias_residual}),
                ("paid_residual_0.bias", {"weight_decay": self.l2_bias_residual}),
            ],
            batch_size=batch_size,
            criterion=nn.MSELoss,
            callbacks=[
                CheckNaN(),
                # CheckMean(X, self.output_dimension, 1),  # expected_epoch_count
                cyclic_lr,
                gradclip,
                # earlystop,
            ],
        )

        self.initialize_module()

        super(PUNPPCIClaimRegressor, self).fit(X, y)

        # Finished fitting!
        self.is_fitted_ = True