Esempio n. 1
0
def clad_ilpa(yvar, xvars, maxiter_ilpa=20, quiet=False, b=None):
    x = np.array(xvars)
    N, K, xvars = labels(x)
    y = np.array(yvar).reshape(N, 1)

    if np.mean(1 * (y > 0)) < 0.5:
        print(
            'Error: More than half of observations are censored. Beta is unidentified'
        )
        return np.nan * np.ones((K, 1))

    if b is None:  # use lad estimates as starating values
        lad = sm.QuantReg(y, x).fit(q=0.5, p_tol=1e-05)
        b = np.array(lad.params).reshape(-1, 1)

    for i in range(maxiter_ilpa):
        b0 = b
        yhat = x @ b
        lad = sm.QuantReg(y[yhat > 0], x[yhat[:, 0] > 0, :]).fit(q=0.5,
                                                                 p_tol=1e-05)
        b = np.array(lad.params).reshape(-1, 1)

        if (b == b0).all():  # "convrgence achieved";
            if quiet == False:
                print('CLAD finished in %d iterations using ILPA' % i)
                print('Fractions of observations that are censored: ',
                      np.mean(1 * (y == 0)))
                print(lad.summary())
            return np.array(b).reshape(-1, 1)

    return np.nan * np.ones((K, 1))
def qrTraining(horizon, inpEndo, inpExo, tar):
    '''
    Arguments:
    - horizon: The forecast horizon for which a model should be learned.
    - inpEndo: A pandas DataFrame containing the endogenous inputs.
    - inpExo: A pandas DataFrame containing the endogenous inputs.
    - tar: A pandas DataFrame containting the targets.
    Returns:
    - Stored .pickle files containing the QR parameters, one for each horizon
      and nominal probability.
    '''
    taus = np.arange(0.1, 0.91, 0.1)
    # Training months
    tr_m = [
        2, 3, 5, 6
    ]  # Removed months 1, 7 and 8 to speed it up but also because they're less relevant.
    # Test month April
    te_m = 4

    cols = ["{}_{}".format("t", horizon)]

    train = inpEndo[inpEndo.index.month.isin(tr_m)]
    train = train.join(inpExo[inpExo.index.month.isin(tr_m)], how="inner")
    train = train.join(tar[tar.index.month.isin(tr_m)], how="inner")

    test = inpEndo[inpEndo.index.month == te_m]
    test = test.join(inpExo[inpEndo.index.month == te_m], how="inner")
    test = test.join(tar[tar.index.month == te_m], how="inner")

    feature_cols = inpEndo.filter(regex='y').columns.tolist()
    feature_cols_endo = inpEndo.filter(regex='y').columns.tolist()
    feature_cols.extend([
        "Temperature_{}".format(horizon), "TotalCloudCover_{}".format(horizon)
    ])  # ,"WindUMS_{}".format(horizon),"WindVMS_{}".format(horizon)

    train = train[cols + feature_cols].dropna(how="any")
    test = test[cols + feature_cols].dropna(how="any")

    train_X = train[feature_cols].values
    test_X = test[feature_cols].values

    #scaler = preprocessing.StandardScaler().fit(train_X)
    #train_X = scaler.transform(train_X)

    train_y = train[cols].values
    test_y = test[cols].values

    # Perhaps add some jitter:
    #Xtra_jitter = np.random.normal(1*Xtra,0.01) # Add some random noise to avoid singular matrix

    quantreg = sm.QuantReg(train_y, train_X)
    tau = 1
    for q in taus:
        res = quantreg.fit(q=q, max_iter=10000)
        #res.save("{}_{}_{}_{}.{}".format("ForecastModels\qr",horizon,"tau",tau,"pickle"))
        res.save(
            os.path.join(
                FORECASTMODELS,
                "{}_{}_{}_{}.{}".format("qr", horizon, "tau", tau, "pickle")))
        tau += 1
Esempio n. 3
0
def QR_fit(y, q=0.5):
    # QR fit for dtrend: should be constant, i.e. no slope in dtrend
    # dtrend model: y = slope
    X = np.ones(len(y))
    res = sm.QuantReg(y, X).fit(q)
    slope = res.params[0]
    resid = res.resid
    std = np.std(resid)
    mean = np.mean(resid)
    print('std: ' + str(np.round(std, 4)) + ' mean: ' + str(np.round(mean, 4)))
    return slope
Esempio n. 4
0
def fit_model(tr_df, controls, ivs, dv, model_class):
    X = tr_df[['const'] + list(controls) + list(ivs)]
    y = tr_df[dv]

    try:
        if model_class == 'logreg':
            model = sm.Logit(y, X, missing='drop', hasconst=True)
            res = model.fit()
        elif model_class == 'ols':
            model = sm.OLS(y, X, missing='drop', hasconst=True)
            res = model.fit()
        elif model_class == 'qr':  # median regression
            model = sm.QuantReg(y, X, missing='drop', hasconst=True)
            res = model.fit(q=0.5)
        else:
            raise Exception('Do not recognize model "{}"'.format(model_class))
    except Exception as ex:
        print(ex)
        return None, None

    return res, model
Esempio n. 5
0
def stats(predictor, response, model):
    ##will apply the statistical model you enter to the variables inputed, the
    ##codes for each statistical model are viewable in the chain of if statements
    predictor = np.asarray(predictor)
    response = np.asarray(response)
    if model == 'logit':
        model = sm.Logit(predictor, response)
    elif model == 'lsr':
        model = sm.OLS(predictor, response)
    elif model == "probit":
        model = sm.Probit(predictor, response)
    elif model == "gls":
        model = sm.GLS(predictor, response)
    elif model == "glsar":
        model = sm.GLSAR(predictor, response)
    elif model == "quantreg":
        model = sm.QuantReg(predictor, response)
    else:
        pass
    model = model.fit()
    print(model.summary())
Esempio n. 6
0
def cross_validation(features,
                     data,
                     y,
                     model=None,
                     quantile=None,
                     max_iter=1000,
                     p_tol=1e-6,
                     logistic=False):

    is_statsmodel = (model is None)

    yrs = data["Draft Year"].unique()
    result_df = pd.DataFrame()

    # Initialize empty values for the Y^ model, Y^ baseline, and actual Y.

    model_predicted = np.array([])
    actual_values = np.array([])
    if logistic:
        model_proba = np.array([])

    # Iterate through each year
    for yr in yrs:
        train = data[data["Draft Year"] != yr]
        test = data[data["Draft Year"] == yr]

        train_columns = train[features]

        X_train = np.array(train_columns.values.tolist())

        test_columns = test[features]

        X_test = np.array(test_columns.values.tolist())

        Y_train = np.array(train[[y]].values.tolist())
        Y_test = np.array(test[[y]].values.tolist())

        res = None
        if is_statsmodel:
            model = sm.QuantReg(Y_train, train_columns)
            res = model.fit(q=quantile, max_iter=max_iter, p_tol=p_tol)
            Y_pred = model.predict(res.params, exog=test_columns)
        else:
            model.fit(X_train, Y_train.ravel())
            Y_pred = model.predict(X_test)
            if logistic:
                Y_proba = model.predict_proba(X_test)
                Y_proba = [sample[1] for sample in Y_proba]

        # Append predictions
        model_predicted = np.append(model_predicted, Y_pred)
        if logistic:
            model_proba = np.append(model_proba, Y_proba)
        actual_values = np.append(actual_values, Y_test)

        # Append to our result dataframe to export later
        test = test[['Name', "Draft Year"] + features]
        # test["Model Projected Average PPR Points Per First 48 Games"] = inv_boxcox(Y_pred, value)
        if logistic:
            test["Model"] = Y_proba
        test["Actual"] = Y_test
        if not logistic:
            test["Model"] = Y_pred
            test["Residual"] = np.abs(
                np.subtract(Y_pred.flatten(), Y_test.flatten()))
        result_df = result_df.append(test)

    # Calculate total r^2 and total RMSE on overall predictions, or if logistic balanced_accuracy and sensitivity
    if logistic:
        model_r_2 = log_loss(actual_values, model_predicted)
        model_rmse = f1_score(actual_values, model_predicted)
    else:
        model_r_2 = 1 - (1 - r2_score(actual_values, model_predicted)) * (
            (len(model_predicted) - 1) /
            (len(model_predicted) - X_train.shape[1] - 1))
        model_rmse = mean_squared_error(actual_values,
                                        model_predicted,
                                        squared=False)

    return model_rmse, model_r_2, result_df, res
def qr_train_forecast(horizon):
    '''
    This function should read .pickle files and predict for all
    quantileLevels for "horizon" at the current time step i.

    Arguments:
    - horizon: the forecast horizon.
    - inpEndo: A pandas DataFrame containing the endogenous inputs.
    - inpExo: A pandas DataFrame containing the endogenous inputs.
    - tar: A pandas DataFrame containting the targets.
    - quantileLevels: a vector with the nominal probabilities expressed
      as quantiles.
    - i: current time step.
    Returns:
    - Numpy array (horizon x length(quantileLevels))
    '''
    taus = np.arange(0.05, 0.96, 0.05)
    #taus = np.linspace(0.001,0.999,num=21) # quantreg does not accept 0
    # Training month April
    tr_m = [5]  # Train the QR on the uncalibrated GBRT forecasts
    # Test month May
    te_m = [4]  # Test the QR on the uncalibrated GBRT forecasts

    preds = []
    gbrt_fc_str = os.path.join(FORECASTS, "{}_{}.{}".format(
        "gbrt", horizon, "txt"
    ))  # glob.glob(os.path.join(FORECASTS, "g*.txt")) # Select gbrt forecasts
    gbrt_fc = pd.read_csv(
        gbrt_fc_str, sep="\t",
        parse_dates=True)  #,header=True,index_col="DateTime",parse_dates=True
    gbrt_fc['DateTime'] = pd.to_datetime(gbrt_fc['DateTime'])
    gbrt_fc = gbrt_fc.set_index(pd.DatetimeIndex(gbrt_fc['DateTime']))
    gbrt_fc = gbrt_fc.drop(['DateTime'], axis=1)

    gbrt_ob_str = os.path.join(OBSERVATIONS, "{}_{}.{}".format(
        "obs", horizon, "txt"
    ))  # glob.glob(os.path.join(FORECASTS, "g*.txt")) # Select gbrt forecasts
    gbrt_ob = pd.read_csv(
        gbrt_ob_str, sep="\t",
        parse_dates=True)  #,header=True,index_col="DateTime",parse_dates=True
    gbrt_ob['DateTime'] = pd.to_datetime(gbrt_ob['DateTime'])
    gbrt_ob = gbrt_ob.set_index(pd.DatetimeIndex(gbrt_ob['DateTime']))
    gbrt_ob = gbrt_ob.drop(['DateTime'], axis=1)

    train = gbrt_fc[gbrt_fc.index.month.isin(tr_m)]
    train = train.join(gbrt_ob[gbrt_ob.index.month.isin(tr_m)], how="inner")
    test = gbrt_fc[gbrt_fc.index.month.isin(te_m)]
    test = test.join(gbrt_ob[gbrt_ob.index.month.isin(te_m)], how="inner")

    cols = ["{}_{}".format("t", horizon)]
    feature_cols = gbrt_fc.columns.tolist(
    )  # Take the fc columns as feature names

    train = train[cols + feature_cols].dropna(how="any")
    test = test[cols + feature_cols].dropna(how="any")

    train_X = train[feature_cols].values
    test_X = test[feature_cols].values
    train_y = train[cols].values
    test_y = test[cols]  # To store as pandas series

    tau = 1
    test_pred = []
    quantreg = sm.QuantReg(train_y, train_X)
    for q in taus:
        model = quantreg.fit(q=q, max_iter=10000)
        test_pred.append(model.predict(test_X))
        tau += 1

    tmp = np.vstack(test_pred).T  # List to NumPy array
    fc_df = pd.DataFrame(data=tmp,
                         index=test.index)  # To store as pandas DataFrame
    fc_df.to_csv(os.path.join(FORECASTS,
                              "{}_{}.{}".format("qr", horizon, "txt")),
                 sep="\t")
    test_y.to_csv(os.path.join(OBSERVATIONS,
                               "{}_{}.{}".format("qr_obs", horizon, "txt")),
                  sep="\t")
Esempio n. 8
0
    def _FittingFunctions(input_ax, input_X, input_Y, Pred_type, Clean_type,
                          Visualization, test_frac_size, color, random_Seed):

        pred_summary = {}
        pred_summary["y_pred"] = 0
        pred_summary["groundtruth"] = 0
        pred_summary["y_pred_95confi"] = None

        if Pred_type == None:
            if Visualization == True:
                input_X, input_Y, X_train, y_train, X_test, y_test = PredictOppoBehavior._utilityCleaning(
                    input_X, input_Y, Clean_type, random_Seed, test_frac_size)

                input_ax.plot(input_X,
                              input_Y,
                              'ok',
                              ms=4,
                              color=color,
                              alpha=0.5)
            else:
                pass

        ## Linear Regression
        elif Pred_type == "LR":

            ## train test sets split
            input_X, input_Y, X_train, y_train, X_test, y_test = PredictOppoBehavior._utilityCleaning(
                input_X, input_Y, Clean_type, random_Seed, test_frac_size)

            ## PolynomialFeatures with order 1
            features = PolynomialFeatures(degree=1, include_bias=True)
            X_feature = features.fit_transform(input_X[:-1])
            regress = LinearRegression()
            regress.fit(X_feature, input_Y[:-1])
            y_model = regress.predict(X_feature)

            ## predict next utility
            next_X = np.atleast_2d(np.array(input_X[-1])).T
            next_X_feature = features.fit_transform(next_X)
            y_pred = regress.predict(next_X_feature)

            if Visualization == True:
                input_ax.plot(input_X[:-1],
                              input_Y[:-1],
                              'ok',
                              ms=4,
                              color=color,
                              alpha=0.5)
                input_ax.plot(input_X[-1],
                              input_Y[-1],
                              'v',
                              ms=8,
                              color=color,
                              alpha=0.5)
                input_ax.plot(input_X[:-1], y_model, c='k')
                input_ax.scatter(next_X, y_pred, color="black", s=50)

            pred_summary["y_pred"] = list(y_pred)[0]
            pred_summary["groundtruth"] = input_Y[-1]

            ## save prediction and 95% confi prediction in pred_summary
            pred_summary = PredictOppoBehavior._save_pred_summary(
                pred_summary, input_Y[-1],
                list(y_pred)[0], None)
            ## return slope
            init_X = np.atleast_2d(np.array(input_X[0])).T
            init_X_feature = features.fit_transform(init_X)
            y_init = regress.predict(init_X_feature)

            slope = (y_pred - y_init) / (next_X - init_X)
            pred_summary["slope"] = slope

        ## non-linear regression
        elif Pred_type == "NLR":

            input_X, input_Y, X_train, y_train, X_test, y_test = PredictOppoBehavior._utilityCleaning(
                input_X, input_Y, Clean_type, random_Seed, test_frac_size)

            ## PolynomialFeatures with order 3 (default, can be tuned for other projects)
            features = PolynomialFeatures(degree=3, include_bias=True)
            X_feature = features.fit_transform(input_X[:-1])
            regress = LinearRegression()
            regress.fit(X_feature, input_Y[:-1])
            y_model = regress.predict(X_feature)

            ## predict next utility
            next_X = np.atleast_2d(np.array(input_X[-1])).T
            next_X_feature = features.fit_transform(next_X)
            y_pred = regress.predict(next_X_feature)

            if Visualization == True:
                input_ax.plot(input_X[:-1],
                              input_Y[:-1],
                              'ok',
                              ms=4,
                              color=color,
                              alpha=0.5)
                input_ax.plot(input_X[-1],
                              input_Y[-1],
                              'v',
                              ms=8,
                              color=color,
                              alpha=0.5)
                input_ax.plot(input_X[:-1], y_model, c='k')
                input_ax.scatter(next_X, y_pred, color="black", s=50)

            pred_summary["y_pred"] = list(y_pred)[0]
            pred_summary["groundtruth"] = input_Y[-1]

            ## save prediction and 95% confi prediction in pred_summary
            pred_summary = PredictOppoBehavior._save_pred_summary(
                pred_summary, input_Y[-1],
                list(y_pred)[0], None)

        ## Gaussian process regression
        elif Pred_type == "GPR":

            input_X, input_Y, X_train, y_train, X_test, y_test = PredictOppoBehavior._utilityCleaning(
                input_X, input_Y, Clean_type, random_Seed, test_frac_size)

            ## the default kernel is RationalQuadratic (can be tuned for other projects)
            kernels = [
                1.0 * RBF(length_scale=1.0, length_scale_bounds=(1e-1, 10.0)),
                1.0 * RationalQuadratic(length_scale=1.0, alpha=0.1),
                1.0 * ExpSineSquared(length_scale=1.0,
                                     periodicity=3.0,
                                     length_scale_bounds=(0.1, 10.0),
                                     periodicity_bounds=(1.0, 10.0)),
                1.0 * Matern(
                    length_scale=1.0, length_scale_bounds=(1e-1, 10.0), nu=1.5)
            ]
            ## Instantiate a Gaussian Process model
            gp = GaussianProcessRegressor(kernel=kernels[1])
            ## Fit to data
            gp.fit(X_train, y_train)
            ## Make the prediction
            y_pred_fit, sigma = gp.predict(input_X, return_std=True)

            ## predict next utility
            next_X = np.atleast_2d(np.array(input_X[-1])).T
            y_pred = gp.predict(next_X)

            if Visualization == True:

                input_ax.plot(input_X[-1],
                              input_Y[-1],
                              'ok',
                              ms=4,
                              color=color,
                              alpha=0.5)

                input_ax.plot(X_train,
                              y_train,
                              'ok',
                              ms=4,
                              color="r",
                              alpha=0.5,
                              label="Observation")
                input_ax.plot(X_test,
                              y_test,
                              'ok',
                              ms=4,
                              color=color,
                              alpha=0.5)
                input_ax.plot(input_X,
                              y_pred_fit,
                              'k--',
                              alpha=0.5,
                              label='Prediction')
                input_ax.fill(np.concatenate([input_X, input_X[::-1]]),
                              np.concatenate([
                                  y_pred_fit - 1.9600 * sigma,
                                  (y_pred_fit + 1.9600 * sigma)[::-1]
                              ]),
                              alpha=.3,
                              fc=color,
                              ec='None',
                              label='95% confidence interval')

                input_ax.plot(X_train,
                              y_train,
                              'ok',
                              ms=4,
                              color="r",
                              alpha=0.5,
                              label="Observation")
                input_ax.scatter(next_X, y_pred, color="black", s=50)

            ## save prediction and 95% confi prediction in pred_summary
            pred_summary = PredictOppoBehavior._save_pred_summary(
                pred_summary, input_Y[-1],
                list(y_pred)[0], (y_pred + 1.9600 * sigma)[-1])

        ## Gaussian process regression with random noise
        elif Pred_type == "GPRN":

            input_X, input_Y, X_train, y_train, X_test, y_test = PredictOppoBehavior._utilityCleaning(
                input_X, input_Y, Clean_type, random_Seed, test_frac_size)

            ## the default kernel is RationalQuadratic (can be tuned for other projects)
            kernels = [
                1.0 * RBF(length_scale=1.0, length_scale_bounds=(1e-1, 10.0)),
                1.0 * RationalQuadratic(length_scale=1.0, alpha=0.1),
                1.0 * ExpSineSquared(length_scale=1.0,
                                     periodicity=3.0,
                                     length_scale_bounds=(0.1, 10.0),
                                     periodicity_bounds=(1.0, 10.0)),
                1.0 * Matern(
                    length_scale=1.0, length_scale_bounds=(1e-1, 10.0), nu=1.5)
            ]
            ## the random noise in the range of (0, 0.05) (default, can be tuned for other projects)
            dy = 0.0 + 0.05 * np.random.random(y_train.shape)
            noise = np.random.normal(0, dy)
            y_train += noise
            gp = GaussianProcessRegressor(kernel=kernels[1], alpha=dy**2)
            ## Fit to data
            gp.fit(X_train, y_train)
            ## Make the prediction
            y_pred_fit, sigma = gp.predict(input_X, return_std=True)

            ## predict next utility
            next_X = np.atleast_2d(np.array(input_X[-1])).T
            y_pred = gp.predict(next_X)

            if Visualization == True:
                input_ax.plot(input_X[-1],
                              input_Y[-1],
                              'ok',
                              ms=4,
                              color=color,
                              alpha=0.5)

                input_ax.plot(X_train,
                              y_train,
                              'ok',
                              ms=4,
                              color="r",
                              alpha=0.5,
                              label="Observation")
                input_ax.errorbar(X_train,
                                  y_train,
                                  dy,
                                  fmt='r.',
                                  markersize=4,
                                  label='Observations')
                input_ax.plot(X_test,
                              y_test,
                              'ok',
                              ms=4,
                              color=color,
                              alpha=0.5)
                input_ax.plot(input_X,
                              y_pred_fit,
                              'k--',
                              alpha=0.5,
                              label='Prediction')
                input_ax.fill(np.concatenate([input_X, input_X[::-1]]),
                              np.concatenate([
                                  y_pred_fit - 1.9600 * sigma,
                                  (y_pred_fit + 1.9600 * sigma)[::-1]
                              ]),
                              alpha=.3,
                              fc=color,
                              ec='None',
                              label='95% confidence interval')

                input_ax.plot(X_train,
                              y_train,
                              'ok',
                              ms=4,
                              color="r",
                              alpha=0.5,
                              label="Observation")
                input_ax.scatter(next_X, y_pred, color="black", s=50)

            pred_summary = PredictOppoBehavior._save_pred_summary(
                pred_summary, input_Y[-1],
                list(y_pred)[0], (y_pred + 1.9600 * sigma)[-1])

        #elif Pred_type == "OLS":
        #
        #    input_X, input_Y, X_train, y_train, X_test, y_test = PredictOppoBehavior._utilityCleaning(input_X, input_Y, Clean_type, random_Seed, test_frac_size)
        #
        #    import statsmodels.api as sm
        #    from statsmodels.regression.quantile_regression import QuantReg
        #    from statsmodels.sandbox.regression.predstd import wls_prediction_std
        #
        #    input_X = np.array(input_X)
        #    input_Y = np.array(input_Y)
        #
        #    input_X2 = sm.add_constant(input_X)
        #    est = sm.OLS(input_Y[:-1], input_X2[:-1])
        #    results = est.fit()
        #
        #    if Visualization == True:
        #        print(results.summary())
        #
        #        input_ax.plot(input_X[:-1], input_Y[:-1], 'ok', ms = 4, color = color, alpha=0.5)
        #        input_ax.plot(input_X[-1], input_Y[-1], 'v', ms = 8, color = color, alpha=0.5)
        #
        #        input_ax.plot(input_X[:-1], results.predict(), label='predicted', color = 'k')
        #
        #        from statsmodels.sandbox.regression.predstd import wls_prediction_std
        #
        #        sdev, lower, upper = wls_prediction_std(results, exog=input_X2[:-1], alpha=0.05)
        #
        #        input_ax.plot(input_X[:-1], upper, color='#888888', alpha=1)
        #        input_ax.plot(input_X[:-1], lower, color='#888888', alpha=1)

        ## the predictable check by quantile regression
        elif Pred_type == "QuantReg":

            input_X, input_Y, X_train, y_train, X_test, y_test = PredictOppoBehavior._utilityCleaning(
                input_X, input_Y, Clean_type, random_Seed, test_frac_size)

            import statsmodels.api as sm
            from statsmodels.regression.quantile_regression import QuantReg
            from statsmodels.sandbox.regression.predstd import wls_prediction_std

            input_X = np.array(input_X)
            input_Y = np.array(input_Y)

            input_X2 = sm.add_constant(input_X)
            est = sm.QuantReg(input_Y[:-1], input_X2[:-1])
            results = est.fit()

            ## Build the model for other quantiles
            quantiles = np.array((0.1, 0.5, 0.9))
            models = []
            for qt in quantiles:
                res = est.fit(q=qt)
                models.append(res)

            y_pred1 = models[0].params[0] + models[0].params[1] * input_X[:-1]
            y_pred2 = models[1].params[0] + models[1].params[1] * input_X[:-1]
            y_pred3 = models[2].params[0] + models[2].params[1] * input_X[:-1]

            if Visualization == True:
                print(results.summary())

                input_ax.plot(input_X[:-1],
                              input_Y[:-1],
                              'ok',
                              ms=4,
                              color=color,
                              alpha=0.5)
                input_ax.plot(input_X[-1],
                              input_Y[-1],
                              'v',
                              ms=8,
                              color=color,
                              alpha=0.5)

                input_ax.scatter(input_X[-1],
                                 results.predict(input_X2[-1]),
                                 color="black",
                                 s=50)
                input_ax.plot(input_X[:-1],
                              y_pred1,
                              color='#888888',
                              alpha=1,
                              label='Q Reg : 0.1')
                input_ax.plot(input_X[:-1],
                              y_pred2,
                              color='k',
                              alpha=1,
                              label='Q Reg : 0.5')
                input_ax.plot(input_X[:-1],
                              y_pred3,
                              color='#888888',
                              alpha=1,
                              label='Q Reg : 0.9')

                input_ax.set_title(
                    "predictable checking on Q Reg: 0.1, 0.5 and 0.9")

            ## record slopes for predictable check
            pred_summary["slope"] = []

            upper_y_pred09 = models[2].params[
                0] + models[2].params[1] * input_X[-1]
            upper_y_init09 = models[2].params[
                0] + models[2].params[1] * input_X[0]
            slope09 = (upper_y_pred09 - upper_y_init09) / (input_X[-1] -
                                                           input_X[0])
            pred_summary["slope"].append(slope09)

            upper_y_pred05 = models[1].params[
                0] + models[1].params[1] * input_X[-1]
            upper_y_init05 = models[1].params[
                0] + models[1].params[1] * input_X[0]
            slope05 = (upper_y_pred05 - upper_y_init05) / (input_X[-1] -
                                                           input_X[0])
            pred_summary["slope"].append(slope05)

            pred_summary["y_pred"] = list(upper_y_pred05)[0]
            pred_summary["groundtruth"] = input_Y[-1]

        #### Exploratory Data Analysis (EDA)
        ## Moving average
        elif Pred_type == "MA" and Visualization == True:

            input_ax.plot(input_X[:-1],
                          input_Y[:-1],
                          'ok',
                          ms=1,
                          color=color,
                          alpha=0.1)
            input_ax.plot(input_X[-1],
                          input_Y[-1],
                          'v',
                          ms=8,
                          color=color,
                          alpha=0.5)
            input_Y_pd = pd.DataFrame(input_Y)

            plot_moving_average(input_ax,
                                input_Y_pd,
                                90,
                                color,
                                plot_intervals=True,
                                scale=1.96)

        ## Exponential smoothing
        elif Pred_type == "ES" and Visualization == True:

            input_ax.plot(input_X[:-1],
                          input_Y[:-1],
                          'ok',
                          ms=1,
                          color=color,
                          alpha=0.3)
            input_ax.plot(input_X[-1],
                          input_Y[-1],
                          'v',
                          ms=8,
                          color=color,
                          alpha=0.5)
            ## (default, can be tuned for other projects)
            plot_exponential_smoothing(input_ax, input_Y, [0.05])

        ## Double exponential smoothing
        elif Pred_type == "DES" and Visualization == True:

            input_ax.plot(input_X[:-1],
                          input_Y[:-1],
                          'ok',
                          ms=1,
                          color=color,
                          alpha=0.3)
            input_ax.plot(input_X[-1],
                          input_Y[-1],
                          'v',
                          ms=8,
                          color=color,
                          alpha=0.5)
            ## (default, can be tuned for other projects)
            plot_double_exponential_smoothing(input_ax,
                                              input_Y,
                                              alphas=[0.02],
                                              betas=[0.02])

        elif Pred_type == "PASS" and Visualization == True:
            pass

        return pred_summary
Esempio n. 9
0
X = lsmod.model.wexog
y = lsmod.model.wendog
rlmod = sm.RLM(y,X).fit()
rlmod.summary()


#	

wts = rlmod.weights
wts[wts < 1]


#	

l1mod = sm.QuantReg(y,X).fit()
l1mod.summary()


# ### High Breakdown Estimators
#	

import faraway.datasets.star
star = faraway.datasets.star.load()
gs1 = smf.ols('light ~ temp', star).fit()
X = gs1.model.wexog
gs2 = sm.RLM(star.light, X, data=star).fit()
gs3 = smf.ols('light ~ temp', star.loc[star.temp > 3.6,:]).fit()
plt.scatter(star.temp, star.light, label = None)
xr = np.array([min(star.temp), max(star.temp)])
plt.plot(xr, gs1.params[0] + gs1.params[1]*xr,'k-',label="OLS")
Esempio n. 10
0
def covar(banks, year_from, quarter_from, year_to, quarter_to):

    #Calcolo il portfolio system return
    psr = portfolio_system_return(banks,year_start=2000,year_end=2015)

    # Calcolo i B di Xsys = a + B * X

    # Preparo il vettore y filtrando i quarti
    mask = (psr['Year'] == year_from) & (psr['Quarter'] == quarter_from)
    start_index = psr[mask].index[0]
    mask = (psr['Year'] == year_to) & (psr['Quarter'] == quarter_to)
    end_index = psr[mask].index[0]
    y = psr['PSR'].iloc[start_index:end_index+1]
    y.reset_index(drop=True, inplace=True)
    y.name = 'PSR'

    # Preparo la matrice X
    X = pd.DataFrame()

    for b in banks:
        mask = (b.mva['Year'] == year_from) & (b.mva['Quarter'] == quarter_from)
        if any(mask == True):
            start_index = b.mva[mask].index[0]
            mask = (b.mva['Year'] == year_to) & (b.mva['Quarter'] == quarter_to)
            if any(mask == True):
                end_index = b.mva[mask].index[0]
                s = b.mva['DELTA_MVA'].iloc[start_index:end_index+1]
                s.reset_index(drop=True, inplace=True)
                s.name = b.ticker
                X.reset_index(drop=True, inplace=True)
                X = pd.concat([X, s], axis=1)

    # Eseguo la quantile regression per ogni banca

    covar_unc_matrix = pd.DataFrame(columns=['Ticker', 'Beta', 'COVAR', 'VAR_0.01', 'VAR_0.5'])

    for ticker in X.columns.values:

        x = X[ticker]

        if not x.isnull().values.sum():
            x = sm.add_constant(x)

            model = sm.QuantReg(y, x)

            res = model.fit(q=0.01)

            x_pred = [1, X[ticker].quantile(q=0.01)]

            # Calcolo il covar
            covar = np.float(res.predict(x_pred))

            covar_unc_matrix = covar_unc_matrix.append(
                {'Ticker': ticker, 'Beta': res.params[ticker],
                 'COVAR': covar,
                 'VAR_0.01': X[ticker].quantile(q=0.01),
                 'VAR_0.5': X[ticker].quantile(q=0.5)}, ignore_index=True)


    # Calcolo il delta covar unconditional
    covar_unc_matrix['DELTA_COVAR_UNC'] = covar_unc_matrix['Beta'] * (
    covar_unc_matrix['VAR_0.01'] - covar_unc_matrix['VAR_0.5'])

    # Carico le variabili di stato
    states_variables = get_states_variable()

    # Preparo la parte delle X con le variabili di sistema e la chiamo X2
    mask = (states_variables['Year'] == year_from) & (states_variables['Quarter'] == quarter_from)
    start_index = states_variables[mask].index[0]
    mask = (states_variables['Year'] == year_to) & (states_variables['Quarter'] == quarter_to)
    end_index = states_variables[mask].index[0]
    start_index -= 1
    X2 = states_variables.iloc[start_index:end_index]
    X2 = X2[['V2X Index', 'SX7P Index', 'Spr_Liq_St', 'Incl_curv_rend', 'var_t-bill_3M','credit_spread']]
    X2.reset_index(drop=True, inplace=True)

    # Inizializzo la matrice covar
    covar_matrix = pd.DataFrame(columns=['Ticker', 'Beta', 'COVAR', 'VAR_0.01', 'VAR_0.5', 'DELTA_COVAR'])

    # Eseguo la regressione OLS per ogni banca
    for ticker in X.columns.values:

        X1 = X[ticker]

        if not X1.isnull().values.sum():
            # Preparo gli input per la regressione OLS, le y non cambiano

            # Preparo le X

            X1_X2 = pd.concat([X1, X2], axis=1)
            X1_X2 = sm.add_constant(X1_X2)

            # Eseguo la regressione
            model = sm.OLS(y, X1_X2)
            results = model.fit()

            # Preparo X1 e X2 Predict
            X1_pred = pd.Series(X[ticker].quantile(q=0.01), name=b.ticker)

            X2_pred = X2.iloc[-1]

            X2_pred = pd.DataFrame(X2_pred).transpose()

            X2_pred.reset_index(drop=True, inplace=True)

            X1_X2_pred = pd.concat([X1_pred, X2_pred], axis=1, ignore_index=True)

            X1_X2_pred = sm.add_constant(X1_X2_pred)

            # Calcolo il covar
            covar = results.predict(X1_X2_pred)

            # Memorizzo il covar
            covar_matrix = covar_matrix.append({'Ticker': ticker, 'COVAR': covar[0],
                                                'Beta': results.params[ticker],
                                                'VAR_0.01': X[ticker].quantile(q=0.01),
                                                'VAR_0.5': X[ticker].quantile(q=0.5)}, ignore_index=True)

    covar_matrix['DELTA_COVAR'] = covar_matrix['Beta'] * (covar_matrix['VAR_0.01'] - covar_matrix['VAR_0.5'])

    return covar_unc_matrix, covar_matrix
 def __init__(self, x, y, args):
     super(LinearQR, self).__init__()
     self.model = sm.QuantReg(y, x)
     self.alpha = args.alpha
     self.model_name = "LinearQR"
Esempio n. 12
0
    def construct_quantile_regression_models(self,
                                             years,
                                             week,
                                             lags=6,
                                             future_intervals=3):
        """Construct regression models for each"""

        # Construct dataset
        lagged, future = self.construct_dataset(years, week)

        # DUIDs
        duids = list(set([i.split('_future')[0] for i in future.columns]))
        duids.sort()

        # Container for quantile regression results
        results = {}

        # Run model for each quantile
        for duid in duids:
            # for duid in [duid]:
            results[duid] = {}

            # Lagged values
            x = pd.concat(
                [lagged.loc[:, f'{duid}_lag_{i}'] for i in range(0, lags + 1)],
                axis=1)
            x = x.dropna()

            # For each future interval range
            for f in range(1, future_intervals + 1):
                results[duid][f] = {}

                # Split independent and dependent variables
                y = future[f'{duid}_future_{f}']
                y = y.dropna()

                # Ensure index is the same
                new_index = y.index.intersection(x.index).sort_values()
                x = x.reindex(new_index)
                y = y.reindex(new_index)

                # Run model for each quantile
                for q in [0.1, 0.2, 0.3, 0.4, 0.6, 0.7, 0.8, 0.9]:
                    # print(f'Fitting model: duid={duid}, future_interval={f}, quantile={q}')

                    try:
                        # Construct and fit model
                        m = sm.QuantReg(y, x)
                        res = m.fit(q=q)

                        # Make prediction for last time point
                        last_observation = lagged.loc[:, [
                            f'{duid}_lag_{i}' for i in range(0, lags + 1)
                        ]].iloc[-1].values
                        pred = res.predict(last_observation)[0]
                        results[duid][f][q] = pred

                    except ValueError:
                        results[duid][f][q] = None
                        # print(f'Failed for: duid={duid}, quantile={q}')

        return results