Esempio n. 1
0
    def train_classifier(self, X_train, X_test, y_train, y_test):
        classifiers = [
            svm.SVR(),
            linear_model.SGDRegressor(),
            linear_model.BayesianRidge(),
            linear_model.LassoLars(),
            linear_model.ARDRegression(),
            linear_model.PassiveAggressiveRegressor(),
            linear_model.TheilSenRegressor(),
            linear_model.LinearRegression()
        ]

        best_accuracy = self.lowest_accuracy_wanted
        best_classifier = None

        for classifier in classifiers:
            classifier.fit(X_train, y_train)

            prediction_score = classifier.score(X_test, y_test)
            if prediction_score > best_accuracy:
                best_accuracy = prediction_score
                best_classifier = classifier

        if best_classifier:
            print(
                f"{classifier} performed the best with an accuracy of: {best_accuracy}"
            )
            joblib.dump(
                classifier,
                relative_path(
                    f'models/files/predict_{self.prediction_column_name}.pkl'))
Esempio n. 2
0
def __get_reddit_df(path):
    with open(relative_path(path)) as f:
        data = json.load(f)
        dict_collection = []
        for company in data['companies'].keys():
            for date in data['companies'][company].keys():
                row = data['companies'][company][date].copy()
                row['company'] = company
                row['date'] = date
            
            dict_collection.append(row)



        return pd.DataFrame(dict_collection)
Esempio n. 3
0
def __get_companies_related_reddit_submissions(companies):
    for company in companies:
        submission_dict_collection = RedditInterface.get_all_submissions_for(company)
        DataFileInterface.write_to_file(relative_path('data/files/stock_submissions.csv'), submission_dict_collection)
Esempio n. 4
0
def __get_historical_data_for_all_tech_companies(companies):
    for company in companies:
        print(f"getting historical data for company: {company}")
        company_historical_data_collection = IEXCloudInterface.get_historical_data(company, '1m')
        DataFileInterface.write_to_file(relative_path('data/files/stock_data.csv'), company_historical_data_collection)
Esempio n. 5
0
def __get_stats_for_all_tech_companies(companies):
    for company in companies:
        print(f"getting stat data for company: {company}")
        company_stat_data_collection = IEXCloudInterface.get_stats(company)
        DataFileInterface.write_to_file(relative_path('data/files/stock_stats.csv'), company_stat_data_collection)
Esempio n. 6
0
def __get_news_data_for_all_tech_companies(companies):
    for company in companies:
        company_news_collection = IEXCloudInterface.get_news(company, 'last-week')
        DataFileInterface.write_to_file(relative_path('data/files/news.csv'), company_news_collection)
Esempio n. 7
0
                row['company'] = company
                row['date'] = date
            
            dict_collection.append(row)



        return pd.DataFrame(dict_collection)

if __name__ == '__main__':
    if '--get-data' in  sys.argv:
        companies = __get_companies()
        __retrieve_data(companies)

    if '--analyze-text' in sys.argv:
        reddit_submissions = pd.read_csv(relative_path('data/files/stock_submissions.csv'))
        ssm = SubmissionSentimentModel(reddit_submissions)
        ssm.analyze(relative_path('data/files/reddit_submissions_analyzed.json'))

    if '--train' in sys.argv:
        prediction_column_names = ['high', 'low', 'close']

        stock_stats_df = pd.read_csv(relative_path('data/files/stock_stats.csv'))

        spm = StockPriceModel()

        for prediction_column_name in prediction_column_names:
            stock_price_movement_df = pd.read_csv(relative_path('data/files/stock_data.csv'))
            stock_price_movement_df = stock_price_movement_df[['company', 'date', 'open', prediction_column_name]].copy()
            reddit_df = __get_reddit_df('data/files/reddit_submissions_analyzed.json')