Exemple #1
0
def main(tickers=['AAPL'], start=None, end=None, n_steps=21):
    data = OrderedDict()
    pred_data = OrderedDict()
    forecast_data = OrderedDict()

    for ticker in tickers:
        data[ticker] = fc.get_time_series(ticker, start, end)

        # add the outcome variable, 1 if the trading session was positive (close>open), 0 otherwise
        data[ticker]['outcome'] = data[ticker].apply(
            lambda x: 1 if x['adj_close'] > x['adj_open'] else 0, axis=1)

        data[ticker] = fc.get_sma_classifier_features(data[ticker]).dropna()

        # cross-validation testing
        split = rand.uniform(0.60, 0.80)

        train_size = int(len(data[ticker]) * split)

        train, test = data[ticker][0:train_size], data[ticker][
            train_size:len(data[ticker])]

        features = ['sma_2', 'sma_3', 'sma_4', 'sma_5', 'sma_6']

        # values of features
        X = list(train[features].values)

        # target values
        Y = list(train['outcome'])

        # fit a Naive Bayes model to the data
        mdl = MLPClassifier(hidden_layer_sizes=(100, 100, 100)).fit(X, Y)
        print(mdl)

        # make predictions
        pred = mdl.predict(test[features].values)

        # summarize the fit of the model
        classification_report, confusion_matrix = fc.get_classifier_metrics(
            test['outcome'].values, pred)

        print("{} Neural Network\n"
              "-------------\n"
              "Classification report: {}\n\n"
              "Confusion matrix: {}\n\n".format(ticker, classification_report,
                                                confusion_matrix))

        pred_results = pd.DataFrame(data=dict(original=test['outcome'],
                                              prediction=pred),
                                    index=test.index)

        pred_data[ticker] = pred_results

        # out-of-sample test
        forecast_data[ticker] = fc.forecast_classifier(model=mdl,
                                                       sample=test,
                                                       features=features,
                                                       steps=n_steps)

    return forecast_data
def main(tickers=['AAPL'], start=None, end=None):
    data = OrderedDict()

    for ticker in tickers:
        data[ticker] = fc.get_time_series(ticker, start, end)

        # log_returns
        data[ticker]['log_returns'] = np.log(
            data[ticker]['adj_close'] / data[ticker]['adj_close'].shift(1))

        data[ticker]['log_returns'].dropna(inplace=True)

        # plotting the histogram of returns
        fc.plot_histogram(y=data[ticker]['log_returns'], ticker=ticker)

        fc.plot_time_series(y=data[ticker]['log_returns'],
                            lags=30,
                            ticker=ticker)
def main(tickers='AAPL', start=None, end=None, n_steps=21):
    data = OrderedDict()
    pred_data = OrderedDict()
    forecast_data = OrderedDict()

    for ticker in tickers:
        data[ticker] = fc.get_time_series(ticker, start, end)

        # log_returns
        data[ticker]['log_returns'] = np.log(
            data[ticker]['adj_close'] / data[ticker]['adj_close'].shift(1))

        data[ticker]['log_returns'].dropna(inplace=True)

        # plotting the histogram of returns
        fc.plot_histogram(data[ticker]['log_returns'])

        fc.plot_time_series(data[ticker]['log_returns'], lags=30)

        print("{} Series\n"
              "-------------\n"
              "mean: {:.3f}\n"
              "median: {:.3f}\n"
              "maximum: {:.3f}\n"
              "minimum: {:.3f}\n"
              "variance: {:.3f}\n"
              "standard deviation: {:.3f}\n"
              "skewness: {:.3f}\n"
              "kurtosis: {:.3f}".format(ticker,
                                        data[ticker]['adj_close'].mean(),
                                        data[ticker]['adj_close'].median(),
                                        data[ticker]['adj_close'].max(),
                                        data[ticker]['adj_close'].min(),
                                        data[ticker]['adj_close'].var(),
                                        data[ticker]['adj_close'].std(),
                                        data[ticker]['adj_close'].skew(),
                                        data[ticker]['adj_close'].kurtosis()))

        adfstat, pvalue, critvalues, resstore, dagostino_results, shapiro_results, ks_results, anderson_results, kpss_results = fc.get_stationarity_statistics(
            data[ticker]['log_returns'].values)

        print(
            "{} Stationarity Statistics\n"
            "-------------\n"
            "Augmented Dickey-Fuller unit root test: {}\n"
            "MacKinnon’s approximate p-value: {}\n"
            "Critical values for the test statistic at the 1 %, 5 %, and 10 % levels: {}\n"
            "D’Agostino and Pearson’s normality test: {}\n"
            "Shapiro-Wilk normality test: {}\n"
            "Kolmogorov-Smirnov goodness of fit test: {}\n"
            "Anderson-Darling test: {}\n"
            "Kwiatkowski, Phillips, Schmidt, and Shin (KPSS) stationarity test: {}"
            .format(ticker, adfstat, pvalue, critvalues, dagostino_results,
                    shapiro_results, ks_results, anderson_results,
                    kpss_results))

        # Fit ARMA model to AAPL returns
        res_tup = fc.get_best_arma_model(data[ticker]['log_returns'])

        res_tup[2].summary()

        # verify stationarity
        adfstat, pvalue, critvalues, resstore, dagostino_results, shapiro_results, ks_results, anderson_results, kpss_results = fc.get_stationarity_statistics(
            res_tup[2].resid.values)

        print(
            "Stationarity Statistics\n"
            "-------------\n"
            "Augmented Dickey-Fuller unit root test: {}\n"
            "MacKinnon’s approximate p-value: {}\n"
            "Critical values for the test statistic at the 1 %, 5 %, and 10 % levels: {}\n"
            "D’Agostino and Pearson’s normality test: {}\n"
            "Shapiro-Wilk normality test: {}\n"
            "Kolmogorov-Smirnov goodness of fit test: {}\n"
            "Anderson-Darling test: {}\n"
            "Kwiatkowski, Phillips, Schmidt, and Shin (KPSS) stationarity test: {}"
            .format(adfstat, pvalue, critvalues, dagostino_results,
                    shapiro_results, ks_results, anderson_results,
                    kpss_results))

        fc.plot_histogram(y=res_tup[2].resid, ticker=ticker, title='ARMA')

        fc.plot_time_series(y=res_tup[2].resid,
                            lags=30,
                            ticker=ticker,
                            title='ARMA')

        # cross-validation testing
        split = rand.uniform(0.60, 0.80)

        train_size = int(len(data[ticker]) * split)

        train, test = data[ticker][0:train_size], data[ticker][
            train_size:len(data[ticker])]

        # in-sample prediction
        pred_data[ticker] = res_tup[2].predict(start=len(train),
                                               end=len(train) + len(test) - 1)

        pred_results = pd.DataFrame(data=dict(
            original=test['log_returns'], prediction=pred_data[ticker].values),
                                    index=test.index)

        print('{} Original Sharpe Ratio:'.format(ticker),
              fc.get_sharpe_ratio(returns=pred_results['original']))
        print('{} Prediction Sharpe Ratio:'.format(ticker),
              fc.get_sharpe_ratio(returns=pred_results['prediction']))

        # prediction plot
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.plot(pred_results['original'])
        ax.plot(pred_results['prediction'])
        ax.set(title='{} ARMA{} In-Sample Return Prediction'.format(
            ticker, res_tup[1]),
               xlabel='time',
               ylabel='$')
        ax.legend(['Original', 'Prediction'])
        fig.tight_layout()
        fig.savefig(
            'charts/{}-ARMA-In-Sample-Return-Prediction'.format(ticker))

        # out-of-sample forecast
        forecast_data[ticker] = res_tup[2].forecast(steps=n_steps)

        # forecast plot
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.plot(forecast_data[ticker][0])
        ax.set(
            title='{} Day {} ARMA{} Out-of-Sample Return Forecast.png'.format(
                n_steps, ticker, res_tup[1]),
            xlabel='time',
            ylabel='$')
        ax.legend(['Forecast'])
        fig.tight_layout()
        fig.savefig(
            'charts/{}-Day-{}-ARMA-Out-of-Sample-Return-Forecast.png'.format(
                n_steps, ticker))

    # end of day plot of all tickers
    fig = plt.figure()
    ax = fig.add_subplot(111)
    for ticker in tickers:
        ax.plot(data[ticker]['adj_close'])
    ax.set(title='Time series plot', xlabel='time', ylabel='$')
    ax.legend(tickers)
    fig.tight_layout()
    fig.savefig('charts/stocks.png')

    return forecast_data
def main(tickers=['AAPL'], start=None, end=None, n_steps=21):
    """
    Main entry point of the app 
    """
    data = OrderedDict()
    pred_data = OrderedDict()
    forecast_data = OrderedDict()

    for ticker in tickers:
        data[ticker] = fc.get_time_series(ticker, start, end)

        data[ticker] = fc.get_sma_regression_features(data[ticker]).dropna()

        # cross-validation testing
        split = rand.uniform(0.60, 0.80)

        train_size = int(len(data[ticker]) * split)

        train, test = data[ticker][0:train_size], data[ticker][train_size:len(data[ticker])]

        features = ['sma_15', 'sma_50']

        # values of features
        X = np.array(train[features].values)

        # target values
        Y = list(train['adj_close'])

        # fit a Naive Bayes model to the data
        mdl = MLPRegressor(hidden_layer_sizes=(100, 100, 100)).fit(X, Y)
        # print(mdl)

        # in-sample test
        pred = mdl.predict(test[features].values)

        # summarize the fit of the model
        explained_variance_score, mean_absolute_error, mean_squared_error, median_absolute_error, r2_score = fc.get_regression_metrics(
            test['adj_close'].values, pred)

        print("{} Neural Network\n"
              "-------------\n"
              "Explained variance score: {:.3f}\n"
              "Mean absolute error: {:.3f}\n"
              "Mean squared error: {:.3f}\n"
              "Median absolute error: {:.3f}\n"
              "Coefficient of determination: {:.3f}".format(ticker,
                                                            explained_variance_score,
                                                            mean_absolute_error,
                                                            mean_squared_error,
                                                            median_absolute_error,
                                                            r2_score))

        pred_results = pd.DataFrame(data=dict(original=test['adj_close'], prediction=pred), index=test.index)

        pred_data[ticker] = pred_results

        # out-of-sample test
        forecast_data[ticker] = fc.forecast_regression(model=mdl, sample=test, features=features, steps=n_steps)

        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.plot(pred_data[ticker]['original'])
        ax.plot(pred_data[ticker]['prediction'])
        ax.set(title='{} Neural Network In-Sample Prediction'.format(ticker), xlabel='time', ylabel='$')
        ax.legend(['Original $', 'Prediction $'])
        fig.tight_layout()
        fig.savefig('charts/{}-Neural-Network-In-Sample-Prediction.png'.format(ticker))

        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.plot(forecast_data[ticker]['adj_close'][-n_steps:])
        ax.set(title='{} Day {} Neural Network Out-of-Sample Forecast'.format(n_steps, ticker),
               xlabel='time',
               ylabel='$')
        ax.legend(['Forecast $'])
        fig.tight_layout()
        fig.savefig('charts/{}-Day-{}-Neural-Network-Out-of-Sample-Forecast.png'.format(n_steps, ticker))

    fig = plt.figure()
    ax = fig.add_subplot(111)
    for ticker in tickers:
        ax.plot(data[ticker]['adj_close'])
    ax.set(title='Time series plot', xlabel='time', ylabel='$')
    ax.legend(tickers)
    fig.tight_layout()
    fig.savefig('charts/stocks.png')

    return forecast_data
Exemple #5
0
def main(tickers=['AAPL'], start=None, end=None, n_steps=21):
    data = OrderedDict()
    pred_data = OrderedDict()
    forecast_data = OrderedDict()

    for ticker in tickers:
        data[ticker] = fc.get_time_series(ticker, start, end)

        data[ticker] = fc.get_sma_regression_features(data[ticker]).dropna()

        # cross-validation testing
        split = rand.uniform(0.60, 0.80)

        train_size = int(len(data[ticker]) * split)

        train, test = data[ticker][0:train_size], data[ticker][
            train_size:len(data[ticker])]

        # values of features
        X = list(train['sma_15'].values)

        # target values
        Y = list(train['adj_close'].values)

        mdl = sm.OLS(Y, X).fit()
        print(mdl.summary())

        print(mdl.params)

        print(mdl.bse)

        # in sample prediction
        pred = mdl.predict(test['sma_15'].values)

        # summarize the fit of the model
        explained_variance_score, mean_absolute_error, mean_squared_error, median_absolute_error, r2_score = fc.get_regression_metrics(
            test['adj_close'].values, pred)

        print("{} Ordinary Least Squares\n"
              "-------------\n"
              "Explained variance score: {:.3f}\n"
              "Mean absolute error: {:.3f}\n"
              "Mean squared error: {:.3f}\n"
              "Median absolute error: {:.3f}\n"
              "Coefficient of determination: {:.3f}".format(
                  ticker, explained_variance_score, mean_absolute_error,
                  mean_squared_error, median_absolute_error, r2_score))

        pred_results = pd.DataFrame(data=dict(original=test['adj_close'],
                                              prediction=pred),
                                    index=test.index)

        pred_data[ticker] = pred_results

        # out-of-sample test
        forecast_data[ticker] = fc.forecast_regression(model=mdl,
                                                       sample=test,
                                                       features='sma_15',
                                                       steps=n_steps)

        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.plot(pred_data[ticker]['original'], color='red')
        ax.plot(pred_data[ticker]['prediction'], color='blue')
        ax.set(title='{} OLS In-Sample Prediction'.format(ticker),
               xlabel='time',
               ylabel='$')
        ax.legend(['Original $', 'Prediction $'])
        fig.tight_layout()
        fig.savefig('charts/{}-OLS-In-Sample-Prediction'.format(ticker))

        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.plot(forecast_data[ticker]['adj_close'][-n_steps:])
        ax.set(title='{} Day {} OLS Out-of-Sample Forecast'.format(
            n_steps, ticker),
               xlabel='time',
               ylabel='$')
        ax.legend(tickers)
        fig.tight_layout()
        fig.savefig('charts/{}-Day-{}-OLS-Out-of-Sample-Forecast'.format(
            n_steps, ticker))

    fig = plt.figure()
    ax = fig.add_subplot(111)
    for ticker in tickers:
        ax.plot(data[ticker]['adj_close'])
    ax.set(title='Time series plot', xlabel='time', ylabel='$')
    ax.legend(tickers)
    fig.tight_layout()
    fig.savefig('charts/stocks.png')

    return forecast_data
Exemple #6
0
def main(tickers=['AAPL'], start=None, end=None, n_steps=21):
    data = OrderedDict()
    pred_data = OrderedDict()
    forecast_data = OrderedDict()

    for ticker in tickers:
        data[ticker] = fc.get_time_series(ticker, start, end)

        # add the outcome variable, 1 if the trading session was positive (close>open), 0 otherwise
        data[ticker]['outcome'] = data[ticker].apply(
            lambda x: 1 if x['adj_close'] > x['adj_open'] else 0, axis=1)

        data[ticker] = fc.get_sma_classifier_features(data[ticker])

        train_size = int(len(data[ticker]) * 0.80)

        train, test = data[ticker][0:train_size], data[ticker][
            train_size:len(data[ticker])]

        features = ['sma_2', 'sma_3', 'sma_4', 'sma_5', 'sma_6']

        # values of features
        X = list(train[features].values)

        # target values
        Y = list(train['outcome'])

        mdl = DecisionTreeClassifier().fit(X, Y)
        print(mdl)
        '''
        dot_data = export_graphviz(mdl,
                                   out_file=None,
                                   feature_names=list(train[['feat1', 'feat2', 'feat3', 'feat4', 'feat5']]),
                                   class_names='outcome',
                                   filled=True,
                                   rounded=True,
                                   special_characters=True)
        
        graph = pydot.graph_from_dot_data(dot_data)
        graph.write_png("charts/decision-tree-classifier2.png")
        '''

        pred = mdl.predict(test[features].values)
        pred_prob = mdl.predict_proba(test[features].values)

        # summarize the fit of the model
        classification_report, confusion_matrix = fc.get_classifier_metrics(
            test['outcome'].values, pred)

        print("{} Decision Tree\n"
              "-------------\n"
              "Classification report: {}\n"
              "Confusion matrix: {}\n"
              "Prediction probability: {}\n".format(ticker,
                                                    classification_report,
                                                    confusion_matrix,
                                                    pred_prob))

        pred_results = pd.DataFrame(data=dict(original=test['outcome'],
                                              prediction=pred),
                                    index=test.index)

        pred_data[ticker] = pred_results

        # out-of-sample test
        forecast_data[ticker] = fc.forecast_classifier(model=mdl,
                                                       sample=test,
                                                       features=features,
                                                       steps=n_steps)

    return forecast_data
Exemple #7
0
def main(tickers=['AAPL'], n_steps=21):
    """
    Main entry point of the app
    """
    data = OrderedDict()
    pred_data = OrderedDict()
    forecast_data = OrderedDict()

    for ticker in tickers:
        data[ticker] = fc.get_time_series(ticker)[-500:]

        print("{} Series\n"
              "-------------\n"
              "mean: {:.3f}\n"
              "median: {:.3f}\n"
              "maximum: {:.3f}\n"
              "minimum: {:.3f}\n"
              "variance: {:.3f}\n"
              "standard deviation: {:.3f}\n"
              "skewness: {:.3f}\n"
              "kurtosis: {:.3f}".format(ticker,
                                        data[ticker]['adj_close'].mean(),
                                        data[ticker]['adj_close'].median(),
                                        data[ticker]['adj_close'].max(),
                                        data[ticker]['adj_close'].min(),
                                        data[ticker]['adj_close'].var(),
                                        data[ticker]['adj_close'].std(),
                                        data[ticker]['adj_close'].skew(),
                                        data[ticker]['adj_close'].kurtosis()))

        data[ticker]['log_returns'] = np.log(
            data[ticker]['adj_close'] / data[ticker]['adj_close'].shift(1))

        data[ticker]['log_returns'].dropna(inplace=True)

        adfstat, pvalue, critvalues, resstore, dagostino_results, shapiro_results, ks_results, anderson_results, kpss_results = fc.get_stationarity_statistics(
            data[ticker]['log_returns'].values)

        print(
            "{} Stationarity Statistics\n"
            "-------------\n"
            "Augmented Dickey-Fuller unit root test: {}\n"
            "MacKinnon’s approximate p-value: {}\n"
            "Critical values for the test statistic at the 1 %, 5 %, and 10 % levels: {}\n"
            "D’Agostino and Pearson’s normality test: {}\n"
            "Shapiro-Wilk normality test: {}\n"
            "Kolmogorov-Smirnov goodness of fit test: {}\n"
            "Anderson-Darling test: {}\n"
            "Kwiatkowski, Phillips, Schmidt, and Shin (KPSS) stationarity test: {}"
            .format(ticker, adfstat, pvalue, critvalues, dagostino_results,
                    shapiro_results, ks_results, anderson_results,
                    kpss_results))

        train, test = np.arange(0, 450), np.arange(
            451, len(data[ticker]['log_returns']))
        n = len(train)

        with pm.Model() as model:
            sigma = pm.Exponential('sigma', 1. / .02, testval=.1)
            mu = pm.Normal('mu', 0, sd=5, testval=.1)

            nu = pm.Exponential('nu', 1. / 10)
            logs = pm.GaussianRandomWalk('logs', tau=sigma**-2, shape=n)

            # lam uses variance in pymc3, not sd like in scipy
            r = pm.StudentT('r',
                            nu,
                            mu=mu,
                            lam=1 / np.exp(-2 * logs),
                            observed=data[ticker]['log_returns'].values[train])

        with model:
            start = pm.find_MAP(vars=[logs], fmin=sp.optimize.fmin_l_bfgs_b)

        with model:
            step = pm.NUTS(vars=[logs, mu, nu, sigma],
                           scaling=start,
                           gamma=.25)
            start2 = pm.sample(100, step, start=start)[-1]

            # Start next run at the last sampled position.
            step = pm.NUTS(vars=[logs, mu, nu, sigma],
                           scaling=start2,
                           gamma=.55)
            trace = pm.sample(2000, step, start=start2)

        pred_data[ticker], vol = fc.generate_proj_returns(
            1000, trace, len(test))

        pred_results = pd.DataFrame(
            data=dict(original=data[ticker]['log_returns'][test],
                      prediction=pred_data[ticker][1, :]),
            index=data[ticker]['log_returns'][test].index)

        print('{} Original Sharpe Ratio:'.format(ticker),
              fc.get_sharpe_ratio(returns=pred_results['original']))
        print('{} Prediction Sharpe Ratio:'.format(ticker),
              fc.get_sharpe_ratio(returns=pred_results['prediction']))

        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.plot(data[ticker]['log_returns'].values, color='blue')
        ax.plot(1 + len(train) + np.arange(0, len(test)),
                pred_data[ticker][1, :],
                color='red')
        ax.set(title='{} NUTS In-Sample Returns Prediction'.format(ticker),
               xlabel='time',
               ylabel='%')
        ax.legend(['Original', 'Prediction'])
        fig.tight_layout()
        fig.savefig(
            'charts/{}-NUTS-In-Sample-Returns-Prediction.png'.format(ticker))

        # out-of-sample test
        forecast_data[ticker], vol = fc.generate_proj_returns(
            1000, trace,
            len(test) + n_steps)

        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.plot(forecast_data[ticker][1, :][-n_steps:])
        ax.set(title='{} Day {} NUTS Out-of-Sample Returns Forecast'.format(
            n_steps, ticker),
               xlabel='time',
               ylabel='%')
        ax.legend(['Forecast'])
        fig.tight_layout()
        fig.savefig(
            'charts/{}-Day-{}-NUTS-Out-of-Sample-Returns-Forecast.png'.format(
                n_steps, ticker))

    fig = plt.figure()
    ax = fig.add_subplot(111)
    for ticker in tickers:
        ax.plot(data[ticker]['adj_close'])
    ax.set(title='Time series plot', xlabel='time', ylabel='$')
    ax.legend(tickers)
    fig.tight_layout()
    fig.savefig('charts/stocks-close-price.png')

    fig = plt.figure()
    ax = fig.add_subplot(111)
    for ticker in tickers:
        ax.plot(data[ticker]['log_returns'])
    ax.set(title='Time series plot', xlabel='time', ylabel='%')
    ax.legend(tickers)
    fig.tight_layout()
    fig.savefig('charts/stocks-close-returns.png')

    return forecast_data
Exemple #8
0
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Bidirectional
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import TimeDistributed
from tensorflow.keras.layers.convolutional import Conv1D
from tensorflow.keras.layers.convolutional import MaxPooling1D
from tensorflow.keras.layers import ConvLSTM2D

from simple_models import train_test_split_serie, split_to_sequence, train_lstm_model, predict_lstm

from functions import get_time_series, get_residuals

serie = get_time_series(stock='aapl',
                        start='2017-01-01',
                        end='2021-01-01',
                        value='Open',
                        index_as_date=False)

train, test = train_test_split_serie(serie, train_size=0.8)

X_train, y_train = split_to_sequence(train, n_steps=4)
X_test, y_test = split_to_sequence(test, n_steps=4)

lstm_types = ['vanilla', 'stacked', 'bidirectional']
n_steps = 4
preds, train_preds, resids, mse_report_test, mse_report_train = {
    'aapl': y_test
}, {}, {}, {}, {}

for lstm_type in lstm_types:
Exemple #9
0
    case_doubling='Doubling Time for Confirmed Cases',
    death_doubling='Doubling Time of Deaths')

variable_dict = {}
for variable, label in label_dict.items():
    variable_dict[label] = variable

dates = np.array([
    datetime.datetime.strptime(date, '%m/%d/%y')
    for date in data_df.date.unique()
])
date_strings = [date.strftime('%-m/%-d/%y') for date in dates]

country_labels = make_country_labels(data=confirmed)

old_confirmed, old_deaths, old_recovered, time_series_dates = get_time_series(
    local=config['LOCAL'])

print("TIME SERIES DATE LIST ORIGINAL")
print(time_series_date_list)


def confinement_by_area(country='Sweden', col='country', df=None):
    data = pd.Series(
        [df.loc[df[col] == country][date].mean() for date in confined_dates])
    print("DATA BY AREA")
    print(data)
    return data


# Creating our dataframe with mean,max,std values of our detected number of people
def make_data_confinement(city='Stockholm'):
def main(tickers=['AAPL'], start=None, end=None, n_steps=21):
    data = OrderedDict()
    pred_data = OrderedDict()
    forecast_data = OrderedDict()

    for ticker in tickers:
        data[ticker] = fc.get_time_series(ticker, start, end)

        data[ticker] = fc.get_sma_regression_features(data[ticker]).dropna()

        # cross-validation testing
        split = rand.uniform(0.60, 0.80)

        train_size = int(len(data[ticker]) * split)

        train, test = data[ticker][0:train_size], data[ticker][
            train_size:len(data[ticker])]

        features = ['sma_15', 'sma_50']

        # values of features
        X = np.array(train[features].values)

        # target values
        Y = np.array(train['adj_close'])

        mdl = DecisionTreeRegressor().fit(X, Y)
        print(mdl)
        '''
        dot_data = export_graphviz(mdl,
                                   out_file=None,
                                   feature_names=list(train[features]),
                                   class_names='outcome',
                                   filled=True,
                                   rounded=True,
                                   special_characters=True)

        graph = pydot.graph_from_dot_data(dot_data)
        graph.write_png("charts/decision-tree-regression.png")
        '''

        pred = mdl.predict(test[features].values)

        # summarize the fit of the model
        explained_variance_score, mean_absolute_error, mean_squared_error, median_absolute_error, r2_score = fc.get_regression_metrics(
            test['adj_close'].values, pred)

        print("{} Decision Trees\n"
              "-------------\n"
              "Explained variance score: {:.3f}\n"
              "Mean absolute error: {:.3f}\n"
              "Mean squared error: {:.3f}\n"
              "Median absolute error: {:.3f}\n"
              "Coefficient of determination: {:.3f}".format(
                  ticker, explained_variance_score, mean_absolute_error,
                  mean_squared_error, median_absolute_error, r2_score))

        pred_results = pd.DataFrame(data=dict(original=test['adj_close'],
                                              prediction=pred),
                                    index=test.index)

        pred_data[ticker] = pred_results

        # out-of-sample test
        forecast_data[ticker] = fc.forecast_regression(model=mdl,
                                                       sample=test.copy(),
                                                       features=features,
                                                       steps=n_steps)

        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.plot(pred_data[ticker]['original'])
        ax.plot(pred_data[ticker]['prediction'])
        ax.set(title='{} Decision Trees In-Sample Prediction'.format(ticker),
               xlabel='time',
               ylabel='$')
        ax.legend(['Original $', 'Prediction $'])
        fig.tight_layout()
        fig.savefig(
            'charts/{}-Decision-Trees-In-Sample-Prediction.png'.format(ticker))

        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.plot(forecast_data[ticker]['adj_close'][-n_steps:])
        ax.set(title='{} Day {} Decision Trees Out-of-Sample Forecast'.format(
            n_steps, ticker),
               xlabel='time',
               ylabel='$')
        ax.legend(['Forecast $'])
        fig.tight_layout()
        fig.savefig(
            'charts/{}-Day-{}-Decision-Trees-Out-of-Sample-Forecast'.format(
                n_steps, ticker))

    fig = plt.figure()
    ax = fig.add_subplot(111)
    for ticker in tickers:
        ax.plot(data[ticker]['adj_close'])
    ax.set(title='Time series plot', xlabel='time', ylabel='$')
    ax.legend(tickers)
    fig.tight_layout()
    fig.savefig('charts/stocks.png')

    return forecast_data
Exemple #11
0
    start = '2010-1-1'

    end = '2017-1-1'

    tickers = ['MSFT', 'CDE', 'NAVB', 'HRG', 'HL']

    # index to benchmark the algorithm
    benchmark = 'GSPC'

    # initialising an ordered dictionary to store all our stocks
    data = OrderedDict()

    # tidying the data for the backtester
    for ticker in tickers:
        data[ticker] = fc.get_time_series(ticker=ticker,
                                          start_date=start,
                                          end_date=end)

        data[ticker].drop(
            ['open', 'high', 'low', 'close', 'ex-dividend', 'split_ratio'],
            axis=1,
            inplace=True)

        data[ticker].rename(columns={
            'ticker': 'sid',
            'adj_open': 'open',
            'adj_high': 'high',
            'adj_low': 'low',
            'adj_close': 'close'
        },
                            inplace=True)
Exemple #12
0
def main(tickers=['AAPL'], start=None, end=None, n_steps=21):
    data = OrderedDict()
    pred_data = OrderedDict()
    forecast_data = OrderedDict()

    for ticker in tickers:
        data[ticker] = fc.get_time_series(ticker, start, end)

        # add the outcome variable, 1 if the trading session was positive (close>open), 0 otherwise
        data[ticker]['outcome'] = data[ticker].apply(
            lambda x: 1 if x['adj_close'] > x['adj_open'] else -1, axis=1)

        data[ticker] = fc.get_sma_classifier_features(data[ticker])

        # cross-validation testing
        split = rand.uniform(0.60, 0.80)

        train_size = int(len(data[ticker]) * split)

        train, test = data[ticker][0:train_size], data[ticker][
            train_size:len(data[ticker])]

        features = ['sma_2', 'sma_3', 'sma_4', 'sma_5', 'sma_6']

        # values of features
        X = list(train[features].values)

        # target values
        Y = list(train['outcome'])

        clf1 = AdaBoostClassifier()
        clf2 = RandomForestClassifier()
        clf3 = DecisionTreeClassifier()
        clf4 = KNeighborsClassifier()
        clf5 = LogisticRegression()
        clf6 = SGDClassifier()
        clf7 = MLPClassifier()
        clf8 = GaussianNB()
        clf9 = BernoulliNB()
        clf10 = SVC()

        mdl = VotingClassifier(estimators=[('bt', clf1), ('rf', clf2),
                                           ('dt', clf3), ('knn', clf4),
                                           ('lgt', clf5), ('sgd', clf6),
                                           ('mlp', clf7), ('gnb', clf8),
                                           ('bnb', clf9), ('svm', clf10)],
                               voting='hard').fit(X, Y)

        print(mdl)

        confidence = mdl.score(test[features].values, test['outcome'].values)

        print("{} Voting Classifier\n"
              "-------------\n"
              "Confidence: {}\n".format(ticker, confidence))

        pred = mdl.predict(test[features].values)

        pred_data[ticker] = pred

        # out-of-sample test
        forecast_data[ticker] = fc.forecast_classifier(
            model=mdl, sample=test, features=features,
            steps=n_steps)['outcome']

    return forecast_data
Exemple #13
0
from copulas.visualization import hist_1d, side_by_side
from copulas.visualization import compare_2d
from copulas.bivariate import gumbel
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import seaborn as sns
from scipy import stats

from functions import get_time_series , arima_model
from copulas.multivariate import GaussianMultivariate





aapl = get_time_series(stock="aapl" , start="2018-01-01" , end="2020-02-01" , value='Open'  , index_as_date=True)
   
    
arima , res = arima_model( aapl , (3,1,0) , model_report = False )

arima , res2 = arima_model( aapl , (2,1,0) , model_report = False )

res2 = aapl - aapl.mean()
  

df0 = pd.DataFrame({'res1':res,'res2':res2})
df = pd.DataFrame(MinMaxScaler().fit_transform(df0.values),columns = df0.columns)

df =  pd.DataFrame(np.where(df==0 , 0.00000001 , np.where(df==1 , 0.99999999 , df)),columns = df0.columns)