def get_fetched_data(cache_data=False,
                     use_cached_data=False,
                     use_all_data=False):
    # Get windowed data for S&P 500 stocks, together with the
    # window_size
    if use_cached_data:
        cached_data_filename = "data/cached_downloaded_data.p"
        try:
            with open(cached_data_filename) as cached_data:
                print("Using cached data found in " + cached_data_filename \
                        + "...")
                fetched_data = pickle.load(open(cached_data_filename, "rb"))
        except FileNotFoundError:
            print("Cached data not found in " + cached_data_filename + "...")
            print("Downloading data instead...")
            fetched_data = scraper.fetch_data(cache_data=cache_data)
    elif use_all_data:
        plot = False
        all_data_filename = "data/cached_snp_data.p"
        try:
            with open(all_data_filename) as all_data:
                print("Using cached data found in " + all_data_filename \
                        + "...")
                fetched_data = pickle.load(open(all_data_filename, "rb"))
        except FileNotFoundError:
            print("Cached data not found in " + all_data_filename + "...")
            print("Downloading data instead...")
            fetched_data = scraper.fetch_data(cache_data=cache_data)
    else:
        fetched_data = scraper.fetch_data(cache_data=cache_data)

    return fetched_data
Example #2
0
def timer():  #threading to keep the data updated every 90 seconds
    threading.Timer(90, timer).start()
    global data
    start = datetime.now()
    print("Fetching data")
    data = sc.fetch_data() 
    end = datetime.now()
    runtime  = end - start
    print("Data fetched")
    print("Runtime: " + str(runtime))
Example #3
0
def main():

    print("Fetching data")

    price_data = scraper.fetch_data(
        os.path.dirname(os.getcwd()) + "/data/price_data.csv")
    blockchain_data = scraper.fetch_data(
        os.path.dirname(os.getcwd()) + "/data/blockchain_data.csv")
    coindesk_headlines = pd.read_csv(os.path.dirname(os.getcwd()) +
                                     "/data/scored_headlines_sentiment.csv",
                                     usecols=["Headline", "Sentiment"],
                                     sep=",")

    # Preprocessing #

    ####
    ## START Sentiment Analysis Block
    ####

    print("Sentiment Analysis")
    coindesk_headlines, stemmed = pp.sentiment_preprocessing(
        coindesk_headlines)

    # Create bag of words model.
    coindesk_headlines = pp.make_bag_of_words(coindesk_headlines, stemmed)

    x_train, x_test, y_train, y_test = pp.headlines_balanced_split(
        coindesk_headlines, test_size=.2)

    print("\nFitting sentiment models...\n")

    rand_forest = SentimentModel(estimator="RandomForest",
                                 train_set=(x_train, y_train),
                                 test_set=(x_test, y_test))

    grad_boost = SentimentModel(estimator="GradientBoosting",
                                train_set=(x_train, y_train),
                                test_set=(x_test, y_test))

    support_vec = SentimentModel(estimator="SupportVectorClassifier",
                                 train_set=(x_train, y_train),
                                 test_set=(x_test, y_test))

    # Evaluation #

    print("\nEvaluating sentiment models...\n")

    conf_matrix_counter = 0

    # Random Forest Classifier
    print("\tRandom Forest Classifier")
    analysis.plot_cnf_matrix(rand_forest.y_pred, rand_forest.y_test)
    rand_forest.cross_validate(method="Holdout")

    # Gradient Boosting Classifier
    print("\tGradient Boosting Classifier")
    analysis.plot_cnf_matrix(grad_boost.y_pred, grad_boost.y_test)
    grad_boost.cross_validate(method="Holdout")

    # Support Vector Classifier
    print("\tSupport Vector Classifier")
    analysis.plot_cnf_matrix(support_vec.y_pred, support_vec.y_test)
    support_vec.cross_validate(method="Holdout")

    ####
    ## END Sentiment Analysis Block
    ####

    print("Preprocessing")

    data = (
        price_data.pipe(pp.calculate_indicators).pipe(
            pp.merge_datasets,
            other_sets=[blockchain_data
                        ])  # [blockchain_data, coindesk_headlines]
        .pipe(pp.binarize_labels).pipe(pp.fix_null_vals).pipe(
            pp.add_lag_variables, lag=3).pipe(pp.power_transform))
    x_train, x_test, y_train, y_test = pp.split(data,
                                                test_size=.2,
                                                balanced=True)

    # Exploratory Analysis #

    print("Analyzing features")

    #print(data.describe())
    analysis.plot_corr_matrix(data)

    # Fitting Models #

    print("Fitting models")

    log_reg = Model(estimator="LogisticRegression",
                    train_set=(x_train, y_train),
                    test_set=(x_test, y_test),
                    select_features="RecursiveFE",
                    optimize=OPTIMIZE)
    rand_forest = Model(estimator="RandomForest",
                        train_set=(x_train, y_train),
                        test_set=(x_test, y_test),
                        select_features="RecursiveFE",
                        optimize=OPTIMIZE)
    grad_boost = Model(estimator="GradientBoosting",
                       train_set=(x_train, y_train),
                       test_set=(x_test, y_test),
                       select_features="RecursiveFE",
                       optimize=OPTIMIZE)

    # Evaluation #

    print("Evaluating")

    # Logistic Regression
    print("\tLogistic Regression Estimator")
    log_reg.plot_cnf_matrix()
    log_reg.cross_validate(method="Holdout")
    log_reg.cross_validate(method="RollingWindow",
                           data=data,
                           window_size=.9,
                           test_size=.1)

    # Random Forest
    print("\tRandom Forest Classifier")
    rand_forest.plot_cnf_matrix()
    rand_forest.cross_validate(method="holdout")
    rand_forest.cross_validate(method="RollingWindow",
                               data=data,
                               window_size=.9,
                               test_size=.1)

    # Gradient Boosting
    print("\tGradient Boosting Classifier")
    grad_boost.plot_cnf_matrix()
    grad_boost.cross_validate(method="holdout")
    grad_boost.cross_validate(method="RollingWindow",
                              data=data,
                              window_size=.9,
                              test_size=.1)
Example #4
0
def home():
    data = fetch_data()
    return jsonify(data)
Example #5
0
import os.path
import json
import csv
import time
import scraper
import analysis
import training
import pandas as pd
import preprocessing as ppc

# Data Bus #

print("Fetching data")

price_data = scraper.fetch_data(
    os.path.dirname(os.getcwd()) + "/data/price_data.csv")
blockchain_data = scraper.fetch_data(
    os.path.dirname(os.getcwd()) + "/data/blockchain_data.csv")
#coindesk_headlines = pd.read_csv(os.path.dirname(os.getcwd()) + "/data/test_scores.csv", sep=",")

# Preprocessing #

print("Preprocessing")

data = (price_data.pipe(ppc.calculate_indicators).pipe(
    ppc.merge_datasets, set_b=blockchain_data).pipe(ppc.binarize_labels).pipe(
        ppc.fix_null_vals).pipe(ppc.power_transform))
"""
x_train, x_test, y_train, y_test = (price_data.pipe(ppc.calculate_indicators)
	.pipe(ppc.merge_datasets, set_b=blockchain_data, set_c=(coindesk_headlines.pipe(scraper.get_popularity)
		.pipe(ppc.calculate_sentiment)))