def prepare_fly(Fmonth, Dmonth1, Dmonth2, year, period, instType='spread'):

    Fmonth, Fmonth_n, Fmonth_d = load_data(Fmonth, year, interval='1min')
    Fmonth = resamplePeriod(Fmonth, period=period)

    Dmonth1, Dmonth_n1, Dmonth_d1 = load_data(Dmonth1, year, interval='1min')
    Dmonth1 = resamplePeriod(Dmonth1, period=period)

    Dmonth2, Dmonth_n2, Dmonth_d2 = load_data(Dmonth2, year, interval='1min')
    Dmonth2 = resamplePeriod(Dmonth2, period=period)

    x = Fmonth.loc[Fmonth.index.intersection(Dmonth1.index.values), :].dropna()
    y = Dmonth1.loc[Dmonth1.index.intersection(x.index.values), :]
    z = Dmonth2.loc[Dmonth2.index.intersection(x.index.values), :].dropna()
    x = x.loc[x.index.intersection(z.index.values), :].dropna()
    y = y.loc[y.index.intersection(z.index.values), :].dropna()

    s1 = (x - y).Close
    s2 = (y - z).Close
    if instType == 'spread':
        return s1, s2
    if instType == 'fly':
        return (s1 - s2).dropna()
from DataProcessing import load_data
from DataProcessing import split_data
from DataProcessing import encode_class_labels
from DataProcessing import report_results
from DataProcessing import extract_feats_from_text
from DataProcessing import extract_feats_from_text_and_desc

from sklearn.model_selection import GridSearchCV
from sklearn.naive_bayes import MultinomialNB

JOBS = 4
PARAMS = [{'alpha': [8, 4, 2, 1, 0.5, 0.25, 0.1, 0.07, 0.05, 0.03, 0.01, 0.001]}]

df = load_data()
x_train, x_test , index_train1, index_test1 = split_data()

y_train, class_names = encode_class_labels(x_train)
y_test, class_names1 = encode_class_labels(x_test)



print("Features only from Text")

X_train, X_test = extract_feats_from_text()

grid_search = GridSearchCV(MultinomialNB(), PARAMS, n_jobs=JOBS, verbose=5, cv=4,
                           scoring="f1")

grid_search.fit(X_train, y_train)
report_results(grid_search, y_train, X_train, y_test, X_test, class_names)
Exemple #3
0
from sklearn.model_selection import GridSearchCV

import matplotlib.pyplot as plt
import seaborn as sns

plt.rc("font", size=14)
sns.set(style="white")
sns.set(style="whitegrid", color_codes=True)

PARAMS = [{
    'penalty': ["l1", "l2"],
    'C': [4, 2, 1.5, 1, 0.5, 0.1, 0.05, 0.01, 0.001, 0.0001]
}]
JOBS = 4

data = load_data()
# print(list(data.columns))

x_train, x_test, index_train1, index_test1 = split_data()

# print(data.shape)
# print(x_test.shape)
# print(x_train.shape)
# array = ['male', 'female']
# data1 = x_test.loc[:, 'gender'].values
# data2 = x_train.loc[:, 'gender'].values

y_train, class_names_train = encode_class_labels(x_train)
y_test, class_names_test = encode_class_labels(x_test)

# print(len(data1))
Exemple #4
0
        cross_validator = CrossVal(X, y, 10, model)
        cross_validator.fit()

        test_mse = np.mean(cross_validator.test_mses)
        train_mse = np.mean(cross_validator.train_mses)

        results.append([p, train_mse, test_mse])

    return pd.DataFrame(results,
                        columns=['polynomials', 'train_mse', 'test_mse'])


if __name__ == "__main__":
    print("\n###   OLS  ### \n")

    X, y = load_data()

    model = OLS()
    # Running trials for different polynomials
    results = test_diffent_polynomials(X, y)
    results.to_csv(cfg.RESULT_FOLDER.joinpath('OLS_polynomial_results.csv'))

    best_poly = results[results.test_mse ==
                        results.test_mse.min()].polynomials.tolist()[0]

    model_summary(model, add_polynomials(X, best_poly), y)
    bias_variance_plot(results.polynomials,
                       results.train_mse,
                       results.test_mse,
                       save=True)