def test_time_series_fillna_for_specific_filled_values(self):
     mock_df = pd.DataFrame(data=[5.0, np.nan, 2.0, np.nan, 8.0],
                            columns=["test_attribute"])
     expected = [3.5, 5]
     outcome = data_preparation.Preparation(mock_df).time_series_fillna()
     self.assertEqual(outcome["test_attribute"].iloc[1], expected[0])
     self.assertEqual(outcome["test_attribute"].iloc[3], expected[1])
 def test_sort_by_timestamp_for_given_index(self):
     mock_df = pd.DataFrame(data=np.zeros([3]),
                            columns=["test_attribute"],
                            index=[1010, 1127, 986])
     expected = 986
     self.assertEqual(
         data_preparation.Preparation(
             data=mock_df).sort_by_timestamp().index[0], expected)
 def test_drop_unused_for_dataframe_that_includes_attributes_out_of_scope(
         self):
     mock_df = pd.DataFrame(data=np.random.rand(10, 4),
                            columns=["attr1", "Close", "attr2", "Open"])
     expected = ["Close", "Open"]
     self.assertListEqual(
         list(
             data_preparation.Preparation(
                 data=mock_df).drop_unused().columns.values), expected)
 def test_transform_to_float_for_df_that_includes_string_with_comas_and_normal_float_values(
         self):
     mock_df = pd.DataFrame(data=[["3,0", 2.0], [3.7, "10,0.00"]],
                            columns=["test_attribute1", "test_attribute2"])
     expected = [30.0, 3.7]
     outcome = data_preparation.Preparation(
         data=mock_df).transform_to_float()
     self.assertEqual(outcome["test_attribute1"].dtypes, "float")
     self.assertEqual(outcome["test_attribute1"].iloc[0], expected[0])
     self.assertEqual(outcome["test_attribute1"].iloc[1], expected[1])
     self.assertTrue(all(outcome.dtypes == float))
 def test_detect_projection_point_for_short_long_data(self):
     mock_object = data_preparation.Preparation(data=pd.DataFrame())
     short_series = pd.Series(data=np.random.rand(5))
     long_series = pd.Series(data=np.random.rand(200))
     self.assertEqual(
         mock_object.detect_projection_point(given_prices=long_series)[0],
         194)
     self.assertEqual(
         mock_object.detect_projection_point(given_prices=long_series)[1],
         195)
     with self.assertRaises(ValueError):
         mock_object.detect_projection_point(given_prices=short_series)
 def test_check_if_weekend_day_for_week_day_and_weekend_day(self):
     mock_object = data_preparation.Preparation(data=pd.DataFrame())
     week_day = dict(day=5, month=6, year=2020)  # it's Friday
     weekend_day = dict(day=6, month=6, year=2020)  # it's Saturday
     self.assertIsNone(
         mock_object.check_if_weekend_day(giv_day=week_day["day"],
                                          giv_month=week_day["month"],
                                          giv_year=week_day["year"]))
     with self.assertRaises(ValueError):
         mock_object.check_if_weekend_day(giv_day=weekend_day["day"],
                                          giv_month=weekend_day["month"],
                                          giv_year=weekend_day["year"])
 def test_check_attribute_existence_for_including_and_excluding_attributes(
         self):
     mock_df = pd.DataFrame(data=np.random.rand(10, 4),
                            columns=["attr1", "attr2", "attr3", "attr4"])
     mock_object = data_preparation.Preparation(data=mock_df)
     with self.assertRaises(ValueError):
         mock_object.check_attribute_existence(
             attributes=["attr5", "attr6"])
     self.assertIsNone(
         mock_object.check_attribute_existence(
             attributes=["attr1", "attr2"]))
     self.assertIsNone(
         mock_object.check_attribute_existence(attributes=["attr3"]))
 def test_plot_daily_prices_for_valid_input(self):
     mock_object = data_preparation.Preparation(data=pd.DataFrame())
     date_index = pd.date_range(start="01-Jul-2020",
                                end="15-Jul-2020",
                                freq="H")
     test_series = pd.Series(data=np.random.rand(len(date_index)),
                             index=date_index)
     in_range_date = dict(day=9, month=7, year=2020)
     self.assertIsNone(
         mock_object.plot_daily_prices(test_series,
                                       day=in_range_date["day"],
                                       month=in_range_date["month"],
                                       year=in_range_date["year"]))
 def test_create_timestamps_for_given_date_and_time_in_string_format(self):
     mock_data = {
         "Local Date":
         pd.date_range(start="01-01-2021", end="20-01-2021",
                       periods=10).to_series().dt.date.astype(str),
         "Local Time":
         pd.date_range(start="01-01-2021", end="20-01-2021",
                       periods=10).to_series().dt.time.astype(str)
     }
     mock_df = pd.DataFrame(data=mock_data)
     expected = datetime.strptime("01-01-2021 00:00:00",
                                  "%d-%m-%Y %H:%M:%S")
     self.assertEqual(
         data_preparation.Preparation(
             data=mock_df).create_timestamps().iloc[0], expected)
 def test_set_timestamps_as_index_for_valid_dataframe_with_Date_and_Time_attributes(
         self):
     mock_data = {
         "Local Date":
         pd.date_range(start="01-01-2021", end="20-01-2021",
                       periods=10).to_series().dt.date,
         "Local Time":
         pd.date_range(start="01-01-2021", end="20-01-2021",
                       periods=10).to_series().dt.time
     }
     mock_df = pd.DataFrame(data=mock_data)
     expected = datetime.strptime("01-01-2021 00:00:00",
                                  "%d-%m-%Y %H:%M:%S")
     self.assertEqual(
         data_preparation.Preparation(
             data=mock_df).set_timestamps_as_index().index[0], expected)
 def test_check_if_date_in_range_for_valid_and_invalid_date(self):
     date_index = pd.date_range(start="01-Jul-2020",
                                end="15-Jul-2020",
                                freq="H")
     mock_df = pd.DataFrame(data=np.random.rand(len(date_index)),
                            columns=["attr1"],
                            index=date_index)
     mock_object = data_preparation.Preparation(data=mock_df)
     self.assertIsNone(
         mock_object.check_if_date_in_range(given_prices=mock_df["attr1"],
                                            giv_year=2020,
                                            giv_month=7,
                                            giv_day=13))
     with self.assertRaises(ValueError):
         self.assertIsNone(
             mock_object.check_if_date_in_range(
                 given_prices=mock_df["attr1"],
                 giv_year=2019,
                 giv_month=8,
                 giv_day=23))
コード例 #12
0
import pandas as pd
import os
os.environ[
    'TF_CPP_MIN_LOG_LEVEL'] = "3"  # info and warning messages are not printed

import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.losses import MeanAbsoluteError, MeanSquaredError

from core import data_preparation, data_exploration, learning_rate_strategy, model_developement

df = pd.read_csv(os.path.join(os.getcwd(), "data", "GSK per min.csv"))
""" Data Preparation """
prepare = data_preparation.Preparation(data=df)
prepare.set_timestamps_as_index()
prepare.sort_by_timestamp()
prepare.drop_unused()
prepare.transform_to_float()
prepare.time_series_fillna()
prepare.plot_daily_prices(show=True)
prepare.plot_prices_and_projection(show=True)
""" Data Exploration """
explore = data_exploration.Exploration(data=prepare.data)
train_set, test_set = explore.split_train_test()
explore.distribution_comparison(train_set=train_set["Close"],
                                test_set=test_set["Close"],
                                show=True)
explore.plot_series_but_ignore_date(train_set=train_set["Close"],
                                    test_set=test_set["Close"],
                                    show=True)
train_report = explore.custom_stat_report(data=train_set, name="Train")