예제 #1
0
def _train_and_predict_r_stage1(X,
                                y,
                                w,
                                fit_mask,
                                pred_mask,
                                n_layers_out: int = DEFAULT_LAYERS_OUT,
                                n_units_out: int = DEFAULT_UNITS_OUT,
                                n_layers_r: int = DEFAULT_LAYERS_R,
                                n_units_r: int = DEFAULT_UNITS_R,
                                penalty_l2: float = DEFAULT_PENALTY_L2,
                                step_size: float = DEFAULT_STEP_SIZE,
                                n_iter: int = DEFAULT_N_ITER,
                                batch_size: int = DEFAULT_BATCH_SIZE,
                                val_split_prop: float = DEFAULT_VAL_SPLIT,
                                early_stopping: bool = True,
                                patience: int = DEFAULT_PATIENCE,
                                n_iter_min: int = DEFAULT_N_ITER_MIN,
                                verbose: int = 1,
                                n_iter_print: int = DEFAULT_N_ITER_PRINT,
                                seed: int = DEFAULT_SEED,
                                nonlin: str = DEFAULT_NONLIN):
    if len(w.shape) > 1:
        w = w.reshape((len(w), ))

    # split the data
    X_fit, y_fit, w_fit = X[fit_mask, :], y[fit_mask], w[fit_mask]
    X_pred = X[pred_mask, :]

    if verbose > 0:
        print('Training output Net')
    params_out, predict_fun_out = train_output_net_only(
        X_fit,
        y_fit,
        n_layers_out=n_layers_out,
        n_units_out=n_units_out,
        n_layers_r=n_layers_r,
        n_units_r=n_units_r,
        penalty_l2=penalty_l2,
        step_size=step_size,
        n_iter=n_iter,
        batch_size=batch_size,
        val_split_prop=val_split_prop,
        early_stopping=early_stopping,
        patience=patience,
        n_iter_min=n_iter_min,
        n_iter_print=n_iter_print,
        verbose=verbose,
        seed=seed,
        nonlin=nonlin)
    mu_hat = predict_fun_out(params_out, X_pred)

    if verbose > 0:
        print('Training propensity net')
    params_prop, predict_fun_prop = train_output_net_only(
        X_fit,
        w_fit,
        binary_y=True,
        n_layers_out=n_layers_out,
        n_units_out=n_units_out,
        n_layers_r=n_layers_r,
        n_units_r=n_units_r,
        penalty_l2=penalty_l2,
        step_size=step_size,
        n_iter=n_iter,
        batch_size=batch_size,
        val_split_prop=val_split_prop,
        early_stopping=early_stopping,
        patience=patience,
        n_iter_min=n_iter_min,
        n_iter_print=n_iter_print,
        verbose=verbose,
        seed=seed,
        nonlin=nonlin)
    pi_hat = predict_fun_prop(params_prop, X_pred)

    return mu_hat, pi_hat
예제 #2
0
def train_tnet(X,
               y,
               w,
               binary_y: bool = False,
               n_layers_out: int = DEFAULT_LAYERS_OUT,
               n_units_out: int = DEFAULT_UNITS_OUT,
               n_layers_r: int = DEFAULT_LAYERS_R,
               n_units_r: int = DEFAULT_UNITS_R,
               penalty_l2: float = DEFAULT_PENALTY_L2,
               step_size: float = DEFAULT_STEP_SIZE,
               n_iter: int = DEFAULT_N_ITER,
               batch_size: int = DEFAULT_BATCH_SIZE,
               val_split_prop: float = DEFAULT_VAL_SPLIT,
               early_stopping: bool = True,
               patience: int = DEFAULT_PATIENCE,
               n_iter_min: int = DEFAULT_N_ITER_MIN,
               verbose: int = 1,
               n_iter_print: int = DEFAULT_N_ITER_PRINT,
               seed: int = DEFAULT_SEED,
               return_val_loss: bool = False,
               train_separate: bool = True,
               penalty_diff: float = DEFAULT_PENALTY_L2,
               nonlin: str = DEFAULT_NONLIN,
               avg_objective: bool = DEFAULT_AVG_OBJECTIVE):
    # w should be 1-D for indexing
    if len(w.shape) > 1:
        w = w.reshape((len(w), ))

    if train_separate:
        # train two heads completely independently
        if verbose > 0:
            print('Training PO_0 Net')
        out_0 = train_output_net_only(X[w == 0],
                                      y[w == 0],
                                      binary_y=binary_y,
                                      n_layers_out=n_layers_out,
                                      n_units_out=n_units_out,
                                      n_layers_r=n_layers_r,
                                      n_units_r=n_units_r,
                                      penalty_l2=penalty_l2,
                                      step_size=step_size,
                                      n_iter=n_iter,
                                      batch_size=batch_size,
                                      val_split_prop=val_split_prop,
                                      early_stopping=early_stopping,
                                      patience=patience,
                                      n_iter_min=n_iter_min,
                                      n_iter_print=n_iter_print,
                                      verbose=verbose,
                                      seed=seed,
                                      return_val_loss=return_val_loss,
                                      nonlin=nonlin,
                                      avg_objective=avg_objective)
        if verbose > 0:
            print('Training PO_1 Net')
        out_1 = train_output_net_only(X[w == 1],
                                      y[w == 1],
                                      binary_y=binary_y,
                                      n_layers_out=n_layers_out,
                                      n_units_out=n_units_out,
                                      n_layers_r=n_layers_r,
                                      n_units_r=n_units_r,
                                      penalty_l2=penalty_l2,
                                      step_size=step_size,
                                      n_iter=n_iter,
                                      batch_size=batch_size,
                                      val_split_prop=val_split_prop,
                                      early_stopping=early_stopping,
                                      patience=patience,
                                      n_iter_min=n_iter_min,
                                      n_iter_print=n_iter_print,
                                      verbose=verbose,
                                      seed=seed,
                                      return_val_loss=return_val_loss,
                                      nonlin=nonlin,
                                      avg_objective=avg_objective)

        if return_val_loss:
            params_0, predict_fun_0, loss_0 = out_0
            params_1, predict_fun_1, loss_1 = out_1
            return (params_0, params_1), (predict_fun_0,
                                          predict_fun_1), loss_1 + loss_0

        params_0, predict_fun_0 = out_0
        params_1, predict_fun_1 = out_1
    else:
        # train jointly by regularizing similarity
        params, predict_fun = _train_tnet_jointly(
            X,
            y,
            w,
            binary_y=binary_y,
            n_layers_out=n_layers_out,
            n_units_out=n_units_out,
            n_layers_r=n_layers_r,
            n_units_r=n_units_r,
            penalty_l2=penalty_l2,
            step_size=step_size,
            n_iter=n_iter,
            batch_size=batch_size,
            val_split_prop=val_split_prop,
            early_stopping=early_stopping,
            patience=patience,
            n_iter_min=n_iter_min,
            n_iter_print=n_iter_print,
            verbose=verbose,
            seed=seed,
            return_val_loss=return_val_loss,
            penalty_diff=penalty_diff,
            nonlin=nonlin)
        params_0, params_1 = params[0], params[1]
        predict_fun_0, predict_fun_1 = predict_fun, predict_fun

    return (params_0, params_1), (predict_fun_0, predict_fun_1)
예제 #3
0
def train_r_net(X,
                y,
                w,
                p=None,
                second_stage_strategy: str = R_STRATEGY_NAME,
                data_split: bool = False,
                cross_fit: bool = False,
                n_cf_folds: int = DEFAULT_CF_FOLDS,
                n_layers_out: int = DEFAULT_LAYERS_OUT,
                n_layers_r: int = DEFAULT_LAYERS_R,
                n_layers_r_t: int = DEFAULT_LAYERS_R_T,
                n_layers_out_t: int = DEFAULT_LAYERS_OUT_T,
                n_units_out: int = DEFAULT_UNITS_OUT,
                n_units_r: int = DEFAULT_UNITS_R,
                n_units_out_t: int = DEFAULT_UNITS_OUT_T,
                n_units_r_t: int = DEFAULT_UNITS_R_T,
                penalty_l2: float = DEFAULT_PENALTY_L2,
                penalty_l2_t: float = DEFAULT_PENALTY_L2,
                step_size: float = DEFAULT_STEP_SIZE,
                step_size_t: float = DEFAULT_STEP_SIZE_T,
                n_iter: int = DEFAULT_N_ITER,
                batch_size: int = DEFAULT_BATCH_SIZE,
                val_split_prop: float = DEFAULT_VAL_SPLIT,
                early_stopping: bool = True,
                patience: int = DEFAULT_PATIENCE,
                n_iter_min: int = DEFAULT_N_ITER_MIN,
                verbose: int = 1,
                n_iter_print: int = DEFAULT_N_ITER_PRINT,
                seed: int = DEFAULT_SEED,
                return_val_loss: bool = False,
                nonlin: str = DEFAULT_NONLIN):
    # get shape of data
    n, d = X.shape

    if p is not None:
        p = check_shape_1d_data(p)

    # split data as wanted
    if not cross_fit:
        if not data_split:
            if verbose > 0:
                print('Training first stage with all data (no data splitting)')
            # use all data for both
            fit_mask = onp.ones(n, dtype=bool)
            pred_mask = onp.ones(n, dtype=bool)
        else:
            if verbose > 0:
                print(
                    'Training first stage with half of the data (data splitting)'
                )
            # split data in half
            fit_idx = onp.random.choice(n, int(onp.round(n / 2)))
            fit_mask = onp.zeros(n, dtype=bool)

            fit_mask[fit_idx] = 1
            pred_mask = ~fit_mask

        mu_hat, pi_hat = _train_and_predict_r_stage1(
            X,
            y,
            w,
            fit_mask,
            pred_mask,
            n_layers_out=n_layers_out,
            n_layers_r=n_layers_r,
            n_units_out=n_units_out,
            n_units_r=n_units_r,
            penalty_l2=penalty_l2,
            step_size=step_size,
            n_iter=n_iter,
            batch_size=batch_size,
            val_split_prop=val_split_prop,
            early_stopping=early_stopping,
            patience=patience,
            n_iter_min=n_iter_min,
            verbose=verbose,
            n_iter_print=n_iter_print,
            seed=seed,
            nonlin=nonlin)
        if data_split:
            # keep only prediction data
            X, y, w = X[pred_mask, :], y[pred_mask, :], w[pred_mask, :]

            if p is not None:
                p = p[pred_mask, :]

    else:
        if verbose > 0:
            print('Training first stage in {} folds (cross-fitting)'.format(
                n_cf_folds))
        # do cross fitting
        mu_hat, pi_hat = onp.zeros((n, 1)), onp.zeros((n, 1))
        splitter = StratifiedKFold(n_splits=n_cf_folds,
                                   shuffle=True,
                                   random_state=seed)

        fold_count = 1
        for train_idx, test_idx in splitter.split(X, w):

            if verbose > 0:
                print('Training fold {}.'.format(fold_count))
            fold_count = fold_count + 1

            pred_mask = onp.zeros(n, dtype=bool)
            pred_mask[test_idx] = 1
            fit_mask = ~pred_mask

            mu_hat[pred_mask], pi_hat[pred_mask] = \
                _train_and_predict_r_stage1(X, y, w, fit_mask, pred_mask,
                                            n_layers_out=n_layers_out,
                                            n_layers_r=n_layers_r,
                                            n_units_out=n_units_out,
                                            n_units_r=n_units_r,
                                            penalty_l2=penalty_l2,
                                            step_size=step_size,
                                            n_iter=n_iter,
                                            batch_size=batch_size,
                                            val_split_prop=val_split_prop,
                                            early_stopping=early_stopping,
                                            patience=patience,
                                            n_iter_min=n_iter_min,
                                            verbose=verbose,
                                            n_iter_print=n_iter_print,
                                            seed=seed, nonlin=nonlin)

    if verbose > 0:
        print('Training second stage.')

    if p is not None:
        # use known propensity score
        p = check_shape_1d_data(p)
        pi_hat = p

    y, w = check_shape_1d_data(y), check_shape_1d_data(w)
    w_ortho = w - pi_hat
    y_ortho = y - mu_hat

    if second_stage_strategy == R_STRATEGY_NAME:
        return train_r_stage2(X,
                              y_ortho,
                              w_ortho,
                              n_layers_out=n_layers_out_t,
                              n_units_out=n_units_out_t,
                              n_layers_r=n_layers_r_t,
                              n_units_r=n_units_r_t,
                              penalty_l2=penalty_l2_t,
                              step_size=step_size_t,
                              n_iter=n_iter,
                              batch_size=batch_size,
                              val_split_prop=val_split_prop,
                              early_stopping=early_stopping,
                              patience=patience,
                              n_iter_min=n_iter_min,
                              verbose=verbose,
                              n_iter_print=n_iter_print,
                              seed=seed,
                              return_val_loss=return_val_loss,
                              nonlin=nonlin)
    elif second_stage_strategy == U_STRATEGY_NAME:
        return train_output_net_only(X,
                                     y_ortho / w_ortho,
                                     n_layers_out=n_layers_out_t,
                                     n_units_out=n_units_out_t,
                                     n_layers_r=n_layers_r_t,
                                     n_units_r=n_units_r_t,
                                     penalty_l2=penalty_l2_t,
                                     step_size=step_size_t,
                                     n_iter=n_iter,
                                     batch_size=batch_size,
                                     val_split_prop=val_split_prop,
                                     early_stopping=early_stopping,
                                     patience=patience,
                                     n_iter_min=n_iter_min,
                                     verbose=verbose,
                                     n_iter_print=n_iter_print,
                                     seed=seed,
                                     return_val_loss=return_val_loss,
                                     nonlin=nonlin)
    else:
        raise ValueError('R-learner only supports strategies R and U.')
예제 #4
0
def _train_and_predict_first_stage_s1(X, y, w, fit_mask, pred_mask, binary_y: bool = False,
                                      n_layers_out: int = DEFAULT_LAYERS_OUT,
                                      n_layers_r: int = DEFAULT_LAYERS_R,
                                      n_units_out: int = DEFAULT_UNITS_OUT,
                                      n_units_r: int = DEFAULT_UNITS_R,
                                      penalty_l2: float = DEFAULT_PENALTY_L2,
                                      step_size: float = DEFAULT_STEP_SIZE,
                                      n_iter: int = DEFAULT_N_ITER,
                                      batch_size: int = DEFAULT_BATCH_SIZE,
                                      val_split_prop: float = DEFAULT_VAL_SPLIT,
                                      early_stopping: bool = True,
                                      patience: int = DEFAULT_PATIENCE,
                                      n_iter_min: int = DEFAULT_N_ITER_MIN,
                                      verbose: int = 1, n_iter_print: int = DEFAULT_N_ITER_PRINT,
                                      seed: int = DEFAULT_SEED, nonlin: str = DEFAULT_NONLIN,
                                      avg_objective: bool = False,
                                      transformation: str = AIPW_TRANSFORMATION):
    # Train and predict first stage estimators using SNet1/ TARNet
    # split the data
    X_fit, y_fit, w_fit = X[fit_mask, :], y[fit_mask], w[fit_mask]
    X_pred = X[pred_mask, :]

    if verbose > 0:
        print('Training SNet1')
    params_cfr, predict_funs_cfr = train_snet1(X_fit, y_fit, w_fit, binary_y=binary_y,
                                               n_layers_r=n_layers_r,
                                               n_units_r=n_units_r, n_layers_out=n_layers_out,
                                               n_units_out=n_units_out, penalty_l2=penalty_l2,
                                               penalty_disc=0, step_size=step_size,
                                               n_iter=n_iter, batch_size=batch_size,
                                               val_split_prop=val_split_prop,
                                               early_stopping=early_stopping,
                                               patience=patience, n_iter_min=n_iter_min,
                                               verbose=verbose, n_iter_print=n_iter_print,
                                               seed=seed, nonlin=nonlin,
                                               avg_objective=avg_objective)
    _, mu_0_hat, mu_1_hat = predict_snet1(X_pred, params_cfr, predict_funs_cfr, return_po=True)

    if transformation is not RA_TRANSFORMATION:
        if verbose > 0:
            print('Training propensity net')
        params_prop, predict_fun_prop = train_output_net_only(X_fit, w_fit,
                                                              binary_y=True,
                                                              n_layers_out=n_layers_out,
                                                              n_units_out=n_units_out,
                                                              n_layers_r=n_layers_r,
                                                              n_units_r=n_units_r,
                                                              penalty_l2=penalty_l2,
                                                              step_size=step_size,
                                                              n_iter=n_iter,
                                                              batch_size=batch_size,
                                                              val_split_prop=val_split_prop,
                                                              early_stopping=early_stopping,
                                                              patience=patience,
                                                              n_iter_min=n_iter_min,
                                                              n_iter_print=n_iter_print,
                                                              verbose=verbose,
                                                              seed=seed, nonlin=nonlin,
                                                              avg_objective=avg_objective)
        pi_hat = predict_fun_prop(params_prop, X_pred)
    else:
        pi_hat = onp.nan

    return mu_0_hat, mu_1_hat, pi_hat
예제 #5
0
def _train_and_predict_first_stage_t(X, y, w, fit_mask, pred_mask,
                                     binary_y: bool = False,
                                     n_layers_out: int = DEFAULT_LAYERS_OUT,
                                     n_units_out: int = DEFAULT_UNITS_OUT,
                                     n_layers_r: int = DEFAULT_LAYERS_R,
                                     n_units_r: int = DEFAULT_UNITS_R,
                                     penalty_l2: float = DEFAULT_PENALTY_L2,
                                     step_size: float = DEFAULT_STEP_SIZE,
                                     n_iter: int = DEFAULT_N_ITER,
                                     batch_size: int = DEFAULT_BATCH_SIZE,
                                     val_split_prop: float = DEFAULT_VAL_SPLIT,
                                     early_stopping: bool = True,
                                     patience: int = DEFAULT_PATIENCE,
                                     n_iter_min: int = DEFAULT_N_ITER_MIN,
                                     verbose: int = 1, n_iter_print: int = DEFAULT_N_ITER_PRINT,
                                     seed: int = DEFAULT_SEED, nonlin: str = DEFAULT_NONLIN,
                                     avg_objective: bool = False,
                                     transformation: str = AIPW_TRANSFORMATION):
    # train and predict first stage estimators using TNet
    if len(w.shape) > 1:
        w = w.reshape((len(w),))

    # split the data
    X_fit, y_fit, w_fit = X[fit_mask, :], y[fit_mask], w[fit_mask]
    X_pred = X[pred_mask, :]

    if transformation is not HT_TRANSFORMATION:
        if verbose > 0:
            print('Training PO_0 Net')
        params_0, predict_fun_0 = train_output_net_only(X_fit[w_fit == 0], y_fit[w_fit == 0],
                                                        binary_y=binary_y,
                                                        n_layers_out=n_layers_out,
                                                        n_units_out=n_units_out,
                                                        n_layers_r=n_layers_r,
                                                        n_units_r=n_units_r,
                                                        penalty_l2=penalty_l2,
                                                        step_size=step_size,
                                                        n_iter=n_iter,
                                                        batch_size=batch_size,
                                                        val_split_prop=val_split_prop,
                                                        early_stopping=early_stopping,
                                                        patience=patience,
                                                        n_iter_min=n_iter_min,
                                                        n_iter_print=n_iter_print,
                                                        verbose=verbose,
                                                        seed=seed, nonlin=nonlin,
                                                        avg_objective=avg_objective)
        mu_0 = predict_fun_0(params_0, X_pred)

        if verbose > 0:
            print('Training PO_1 Net')
        params_1, predict_fun_1 = train_output_net_only(X_fit[w_fit == 1], y_fit[w_fit == 1],
                                                        binary_y=binary_y,
                                                        n_layers_out=n_layers_out,
                                                        n_units_out=n_units_out,
                                                        n_layers_r=n_layers_r,
                                                        n_units_r=n_units_r,
                                                        penalty_l2=penalty_l2,
                                                        step_size=step_size,
                                                        n_iter=n_iter,
                                                        batch_size=batch_size,
                                                        val_split_prop=val_split_prop,
                                                        early_stopping=early_stopping,
                                                        patience=patience,
                                                        n_iter_min=n_iter_min,
                                                        n_iter_print=n_iter_print,
                                                        verbose=verbose,
                                                        seed=seed, nonlin=nonlin,
                                                        avg_objective=avg_objective)
        mu_1 = predict_fun_1(params_1, X_pred)
    else:
        mu_0, mu_1 = onp.nan, onp.nan

    if transformation is not RA_TRANSFORMATION:
        if verbose > 0:
            print('Training propensity net')
        params_prop, predict_fun_prop = train_output_net_only(X_fit, w_fit,
                                                              binary_y=True,
                                                              n_layers_out=n_layers_out,
                                                              n_units_out=n_units_out,
                                                              n_layers_r=n_layers_r,
                                                              n_units_r=n_units_r,
                                                              penalty_l2=penalty_l2,
                                                              step_size=step_size,
                                                              n_iter=n_iter,
                                                              batch_size=batch_size,
                                                              val_split_prop=val_split_prop,
                                                              early_stopping=early_stopping,
                                                              patience=patience,
                                                              n_iter_min=n_iter_min,
                                                              n_iter_print=n_iter_print,
                                                              verbose=verbose,
                                                              seed=seed, nonlin=nonlin,
                                                              avg_objective=avg_objective)
        pi_hat = predict_fun_prop(params_prop, X_pred)
    else:
        pi_hat = onp.nan

    return mu_0, mu_1, pi_hat
예제 #6
0
def train_twostep_net(X, y, w, p=None, first_stage_strategy: str = T_STRATEGY,
                      data_split: bool = False,
                      cross_fit: bool = False, n_cf_folds: int = DEFAULT_CF_FOLDS,
                      transformation: str = AIPW_TRANSFORMATION,
                      binary_y: bool = False,
                      n_layers_out: int = DEFAULT_LAYERS_OUT,
                      n_layers_r: int = DEFAULT_LAYERS_R,
                      n_layers_r_t: int = DEFAULT_LAYERS_R_T,
                      n_layers_out_t: int = DEFAULT_LAYERS_OUT_T,
                      n_units_out: int = DEFAULT_UNITS_OUT,
                      n_units_r: int = DEFAULT_UNITS_R,
                      n_units_out_t: int = DEFAULT_UNITS_OUT_T,
                      n_units_r_t: int = DEFAULT_UNITS_R_T,
                      penalty_l2: float = DEFAULT_PENALTY_L2,
                      penalty_l2_t: float = DEFAULT_PENALTY_L2,
                      step_size: float = DEFAULT_STEP_SIZE,
                      step_size_t: float = DEFAULT_STEP_SIZE_T,
                      n_iter: int = DEFAULT_N_ITER,
                      batch_size: int = DEFAULT_BATCH_SIZE,
                      val_split_prop: float = DEFAULT_VAL_SPLIT,
                      early_stopping: bool = True,
                      patience: int = DEFAULT_PATIENCE,
                      n_iter_min: int = DEFAULT_N_ITER_MIN,
                      verbose: int = 1, n_iter_print: int = DEFAULT_N_ITER_PRINT,
                      seed: int = DEFAULT_SEED, rescale_transformation: bool = False,
                      return_val_loss: bool = False,
                      penalty_orthogonal: float = DEFAULT_PENALTY_ORTHOGONAL,
                      n_units_r_small: int = DEFAULT_UNITS_R_SMALL_S,
                      nonlin: str = DEFAULT_NONLIN, avg_objective: bool = DEFAULT_AVG_OBJECTIVE):
    # get shape of data
    n, d = X.shape

    if p is not None:
        p = check_shape_1d_data(p)

    # get transformation function
    transformation_function = _get_transformation_function(transformation)

    # get strategy name
    if first_stage_strategy not in ALL_STRATEGIES:
        raise ValueError('Parameter first stage should be in '
                         'catenets.models.twostep_nets.ALL_STRATEGIES. '
                         'You passed {}'.format(first_stage_strategy))

    # split data as wanted
    if p is None or transformation is not HT_TRANSFORMATION:
        if not cross_fit:
            if not data_split:
                if verbose > 0:
                    print('Training first stage with all data (no data splitting)')
                # use all data for both
                fit_mask = onp.ones(n, dtype=bool)
                pred_mask = onp.ones(n, dtype=bool)
            else:
                if verbose > 0:
                    print('Training first stage with half of the data (data splitting)')
                # split data in half
                fit_idx = onp.random.choice(n, int(onp.round(n / 2)))
                fit_mask = onp.zeros(n, dtype=bool)

                fit_mask[fit_idx] = 1
                pred_mask = ~ fit_mask

            mu_0, mu_1, pi_hat = _train_and_predict_first_stage(X, y, w, fit_mask, pred_mask,
                                                                first_stage_strategy=first_stage_strategy,
                                                                binary_y=binary_y,
                                                                n_layers_out=n_layers_out,
                                                                n_layers_r=n_layers_r,
                                                                n_units_out=n_units_out,
                                                                n_units_r=n_units_r,
                                                                penalty_l2=penalty_l2,
                                                                step_size=step_size,
                                                                n_iter=n_iter,
                                                                batch_size=batch_size,
                                                                val_split_prop=val_split_prop,
                                                                early_stopping=early_stopping,
                                                                patience=patience,
                                                                n_iter_min=n_iter_min,
                                                                verbose=verbose,
                                                                n_iter_print=n_iter_print,
                                                                seed=seed,
                                                                penalty_orthogonal=penalty_orthogonal,
                                                                n_units_r_small=n_units_r_small,
                                                                nonlin=nonlin,
                                                                avg_objective=avg_objective,
                                                                transformation=transformation)
            if data_split:
                # keep only prediction data
                X, y, w = X[pred_mask, :], y[pred_mask, :], w[pred_mask, :]

                if p is not None:
                    p = p[pred_mask, :]

        else:
            if verbose > 0:
                print('Training first stage in {} folds (cross-fitting)'.format(n_cf_folds))
            # do cross fitting
            mu_0, mu_1, pi_hat = onp.zeros((n, 1)), onp.zeros((n, 1)), onp.zeros((n, 1))
            splitter = StratifiedKFold(n_splits=n_cf_folds, shuffle=True,
                                       random_state=seed)

            fold_count = 1
            for train_idx, test_idx in splitter.split(X, w):

                if verbose > 0:
                    print('Training fold {}.'.format(fold_count))
                fold_count = fold_count + 1

                pred_mask = onp.zeros(n, dtype=bool)
                pred_mask[test_idx] = 1
                fit_mask = ~ pred_mask

                mu_0[pred_mask], mu_1[pred_mask], pi_hat[pred_mask] = \
                    _train_and_predict_first_stage(X, y, w, fit_mask, pred_mask,
                                                   first_stage_strategy=first_stage_strategy,
                                                   binary_y=binary_y,
                                                   n_layers_out=n_layers_out,
                                                   n_layers_r=n_layers_r,
                                                   n_units_out=n_units_out,
                                                   n_units_r=n_units_r,
                                                   penalty_l2=penalty_l2,
                                                   step_size=step_size,
                                                   n_iter=n_iter,
                                                   batch_size=batch_size,
                                                   val_split_prop=val_split_prop,
                                                   early_stopping=early_stopping,
                                                   patience=patience,
                                                   n_iter_min=n_iter_min,
                                                   verbose=verbose,
                                                   n_iter_print=n_iter_print,
                                                   seed=seed,
                                                   penalty_orthogonal=penalty_orthogonal,
                                                   n_units_r_small=n_units_r_small,
                                                   nonlin=nonlin, avg_objective=avg_objective,
                                                   transformation=transformation)

    if verbose > 0:
        print('Training second stage.')

    if p is not None:
        # use known propensity score
        p = check_shape_1d_data(p)
        pi_hat = p

    # second stage
    y, w = check_shape_1d_data(y), check_shape_1d_data(w)
    # transform data and fit on transformed data
    if transformation is HT_TRANSFORMATION:
        mu_0 = None
        mu_1 = None

    pseudo_outcome = transformation_function(y=y, w=w, p=pi_hat, mu_0=mu_0, mu_1=mu_1)
    if rescale_transformation:
        scale_factor = onp.std(y) / onp.std(pseudo_outcome)
        if scale_factor > 1:
            scale_factor = 1
        else:
            pseudo_outcome = scale_factor * pseudo_outcome
        params, predict_funs = train_output_net_only(X, pseudo_outcome, binary_y=False,
                                                     n_layers_out=n_layers_out_t,
                                                     n_units_out=n_units_out_t,
                                                     n_layers_r=n_layers_r_t,
                                                     n_units_r=n_units_r_t,
                                                     penalty_l2=penalty_l2_t,
                                                     step_size=step_size_t,
                                                     n_iter=n_iter,
                                                     batch_size=batch_size,
                                                     val_split_prop=val_split_prop,
                                                     early_stopping=early_stopping,
                                                     patience=patience,
                                                     n_iter_min=n_iter_min,
                                                     n_iter_print=n_iter_print,
                                                     verbose=verbose,
                                                     seed=seed,
                                                     return_val_loss=return_val_loss,
                                                     nonlin=nonlin,
                                                     avg_objective=avg_objective)
        return params, predict_funs, scale_factor
    else:
        return train_output_net_only(X, pseudo_outcome, binary_y=False,
                                     n_layers_out=n_layers_out_t,
                                     n_units_out=n_units_out_t,
                                     n_layers_r=n_layers_r_t,
                                     n_units_r=n_units_r_t,
                                     penalty_l2=penalty_l2_t,
                                     step_size=step_size_t,
                                     n_iter=n_iter,
                                     batch_size=batch_size,
                                     val_split_prop=val_split_prop,
                                     early_stopping=early_stopping,
                                     patience=patience,
                                     n_iter_min=n_iter_min,
                                     n_iter_print=n_iter_print,
                                     verbose=verbose,
                                     seed=seed,
                                     return_val_loss=return_val_loss, nonlin=nonlin,
                                     avg_objective=avg_objective)
예제 #7
0
def train_x_net(X,
                y,
                w,
                weight_strategy: int = None,
                binary_y: bool = False,
                n_layers_out: int = DEFAULT_LAYERS_OUT,
                n_layers_r: int = DEFAULT_LAYERS_R,
                n_layers_out_t: int = DEFAULT_LAYERS_OUT_T,
                n_layers_r_t: int = DEFAULT_LAYERS_R_T,
                n_units_out: int = DEFAULT_UNITS_OUT,
                n_units_r: int = DEFAULT_UNITS_R,
                n_units_out_t: int = DEFAULT_UNITS_OUT_T,
                n_units_r_t: int = DEFAULT_UNITS_R_T,
                penalty_l2: float = DEFAULT_PENALTY_L2,
                penalty_l2_t: float = DEFAULT_PENALTY_L2,
                step_size: float = DEFAULT_STEP_SIZE,
                step_size_t: float = DEFAULT_STEP_SIZE_T,
                n_iter: int = DEFAULT_N_ITER,
                batch_size: int = DEFAULT_BATCH_SIZE,
                n_iter_min: int = DEFAULT_N_ITER_MIN,
                val_split_prop: float = DEFAULT_VAL_SPLIT,
                early_stopping: bool = True,
                patience: int = DEFAULT_PATIENCE,
                verbose: int = 1,
                n_iter_print: int = DEFAULT_N_ITER_PRINT,
                seed: int = DEFAULT_SEED,
                nonlin: str = DEFAULT_NONLIN,
                return_val_loss: bool = False,
                avg_objective: bool = DEFAULT_AVG_OBJECTIVE):
    y = check_shape_1d_data(y)
    if len(w.shape) > 1:
        w = w.reshape((len(w), ))

    if weight_strategy not in [0, 1, -1, None]:
        # weight_strategy is coded as follows:
        # for tau(x)=g(x)tau_0(x) + (1-g(x))tau_1(x) [eq 9, kuenzel et al (2019)]
        # weight_strategy=0 sets g(x)=0, weight_strategy=1 sets g(x)=1,
        # weight_strategy=None sets g(x)=pi(x) [propensity score],
        # weight_strategy=-1 sets g(x)=(1-pi(x))
        raise ValueError(
            'XNet only implements weight_strategy in [0, 1, -1, None]')

    # first stage: get estimates of PO regression
    if verbose > 0:
        print("Training first stage")

    if not weight_strategy == 1:
        if verbose > 0:
            print('Training PO_0 Net')
        params_0, predict_fun_0 = train_output_net_only(
            X[w == 0],
            y[w == 0],
            binary_y=binary_y,
            n_layers_out=n_layers_out,
            n_units_out=n_units_out,
            n_layers_r=n_layers_r,
            n_units_r=n_units_r,
            penalty_l2=penalty_l2,
            step_size=step_size,
            n_iter=n_iter,
            batch_size=batch_size,
            val_split_prop=val_split_prop,
            early_stopping=early_stopping,
            patience=patience,
            n_iter_min=n_iter_min,
            n_iter_print=n_iter_print,
            verbose=verbose,
            seed=seed,
            nonlin=nonlin,
            avg_objective=avg_objective)
        mu_hat_0 = predict_fun_0(params_0, X[w == 1])
    else:
        mu_hat_0 = None

    if not weight_strategy == 0:
        if verbose > 0:
            print('Training PO_1 Net')
        params_1, predict_fun_1 = train_output_net_only(
            X[w == 1],
            y[w == 1],
            binary_y=binary_y,
            n_layers_out=n_layers_out,
            n_units_out=n_units_out,
            n_layers_r=n_layers_r,
            n_units_r=n_units_r,
            penalty_l2=penalty_l2,
            step_size=step_size,
            n_iter=n_iter,
            batch_size=batch_size,
            val_split_prop=val_split_prop,
            early_stopping=early_stopping,
            patience=patience,
            n_iter_min=n_iter_min,
            n_iter_print=n_iter_print,
            verbose=verbose,
            seed=seed,
            nonlin=nonlin,
            avg_objective=avg_objective)
        mu_hat_1 = predict_fun_1(params_1, X[w == 0])
    else:
        mu_hat_1 = None

    if weight_strategy is None or weight_strategy == -1:
        # also fit propensity estimator
        if verbose > 0:
            print('Training propensity net')
        params_prop, predict_fun_prop = train_output_net_only(
            X,
            w,
            binary_y=True,
            n_layers_out=n_layers_out,
            n_units_out=n_units_out,
            n_layers_r=n_layers_r,
            n_units_r=n_units_r,
            penalty_l2=penalty_l2,
            step_size=step_size,
            n_iter=n_iter,
            batch_size=batch_size,
            val_split_prop=val_split_prop,
            early_stopping=early_stopping,
            patience=patience,
            n_iter_min=n_iter_min,
            n_iter_print=n_iter_print,
            verbose=verbose,
            seed=seed,
            nonlin=nonlin,
            avg_objective=avg_objective)

    else:
        params_prop, predict_fun_prop = None, None

    # second stage
    if verbose > 0:
        print("Training second stage")
    if not weight_strategy == 0:
        # fit tau_0
        if verbose > 0:
            print("Fitting tau_0")
        pseudo_outcome0 = mu_hat_1 - y[w == 0]
        params_tau0, predict_fun_tau0 = train_output_net_only(
            X[w == 0],
            pseudo_outcome0,
            binary_y=False,
            n_layers_out=n_layers_out_t,
            n_units_out=n_units_out_t,
            n_layers_r=n_layers_r_t,
            n_units_r=n_units_r_t,
            penalty_l2=penalty_l2_t,
            step_size=step_size_t,
            n_iter=n_iter,
            batch_size=batch_size,
            val_split_prop=val_split_prop,
            early_stopping=early_stopping,
            patience=patience,
            n_iter_min=n_iter_min,
            n_iter_print=n_iter_print,
            verbose=verbose,
            seed=seed,
            return_val_loss=return_val_loss,
            nonlin=nonlin,
            avg_objective=avg_objective)
    else:
        params_tau0, predict_fun_tau0 = None, None

    if not weight_strategy == 1:
        # fit tau_1
        if verbose > 0:
            print("Fitting tau_1")
        pseudo_outcome1 = y[w == 1] - mu_hat_0
        params_tau1, predict_fun_tau1 = train_output_net_only(
            X[w == 1],
            pseudo_outcome1,
            binary_y=False,
            n_layers_out=n_layers_out_t,
            n_units_out=n_units_out_t,
            n_layers_r=n_layers_r_t,
            n_units_r=n_units_r_t,
            penalty_l2=penalty_l2_t,
            step_size=step_size_t,
            n_iter=n_iter,
            batch_size=batch_size,
            val_split_prop=val_split_prop,
            early_stopping=early_stopping,
            patience=patience,
            n_iter_min=n_iter_min,
            n_iter_print=n_iter_print,
            verbose=verbose,
            seed=seed,
            return_val_loss=return_val_loss,
            nonlin=nonlin,
            avg_objective=avg_objective)

    else:
        params_tau1, predict_fun_tau1 = None, None

    params = params_tau0, params_tau1, params_prop
    predict_funs = predict_fun_tau0, predict_fun_tau1, predict_fun_prop

    return params, predict_funs