def test_create_relevant_data_row(self): set_intervals([5,5,5]) set_target_interval(timedelta(minutes=5)) df = pd.read_csv('tests/resources/dataframe_data.csv').iloc[1998:2018, :] expected_row = create_expected_row([0.79227, 0.79231, 0.79216, 0.79222, 0.79223, 0.79312, 0.79219, 0.79312, 0.79315, 0.79325, 0.79279, 0.79284], [0.79283, 0.79258]) actual_row = create_relevant_data_row(df, datetime.strptime('2014-07-18 09:04:00', '%Y-%m-%d %H:%M:%S')) self.assertTrue(np.array_equal(expected_row, actual_row))
def test_get_relevant_data(self, mock_method): set_intervals([15,15,15,15]) set_target_interval(timedelta(minutes=60)) df = pd.read_csv('tests/resources/dataframe_data.csv') target_date = datetime.strptime('2014-07-17 00:00:00', '%Y-%m-%d %H:%M:%S') get_relevant_data(df, target_date) start_date = datetime.strptime('2014-07-16 23:00:00', '%Y-%m-%d %H:%M:%S') end_date = datetime.strptime('2014-07-17 01:00:00', '%Y-%m-%d %H:%M:%S') mock_method.assert_called_with(start_date, end_date, df)
def test_process_input_data_error(self): set_intervals([5, 5, 5, 60]) df = pd.read_csv('tests/resources/dataframe_data.csv').iloc[1998:2013, :] expected_error_message = 'Insufficient data to process for this number of intervals' try: actual_input_data = process_input_data(df) except: exc_type, exc_value, exc_traceback = sys.exc_info() self.assertEqual(expected_error_message, str(exc_value))
def test_process_input_data(self): set_intervals([5, 5, 5]) df = pd.read_csv('tests/resources/dataframe_data.csv').iloc[1998:2013, :] test_data = { 'datetime': ['2014-07-18 08:49:00', '2014-07-18 08:54:00', '2014-07-18 08:59:00'], 'open': [0.79227, 0.79223, 0.79315], 'high': [0.79231, 0.79312, 0.79325], 'low': [0.79216, 0.79219, 0.79279], 'close': [0.79222, 0.79312, 0.79284] } expected_input_data = pd.DataFrame(data=test_data) actual_input_data = process_input_data(df) self.assertTrue(expected_input_data.equals(actual_input_data))
def test_create_row(self): set_intervals([5,5,5]) test_data = { 'datetime': ['2014-07-18 08:49:00', '2014-07-18 08:54:00', '2014-07-18 08:59:00'], 'open': [0.79227, 0.79223, 0.79315], 'high': [0.79231, 0.79312, 0.79325], 'low': [0.79216, 0.79219, 0.79279], 'close': [0.79222, 0.79312, 0.79284] } input_values = pd.DataFrame(data=test_data) expected_row = create_expected_row([0.79227, 0.79231, 0.79216, 0.79222, 0.79223, 0.79312, 0.79219, 0.79312, 0.79315, 0.79325, 0.79279, 0.79284], [1, 2]) actual_row = create_row(input_values, [1,2]) self.assertTrue(np.array_equal(expected_row, actual_row))
def test_get_dates(self): intervals = [5, 5, 5] set_intervals(intervals) training_start = datetime.strptime('2020-01-01 00:00:00', '%Y-%m-%d %H:%M:%S') validation_start = datetime.strptime('2020-01-01 01:00:00', '%Y-%m-%d %H:%M:%S') test_start = datetime.strptime('2020-01-01 02:00:00', '%Y-%m-%d %H:%M:%S') test_end = datetime.strptime('2020-01-01 03:00:00', '%Y-%m-%d %H:%M:%S') actual_training_dates, actual_validation_dates, actual_test_dates = get_dates(training_start, validation_start, test_start, test_end) expected_training_dates = convert_datestring_array_to_datetime(['2020-01-01 00:00:00', '2020-01-01 00:15:00', '2020-01-01 00:30:00', '2020-01-01 00:45:00']) expected_validation_dates = convert_datestring_array_to_datetime(['2020-01-01 01:00:00', '2020-01-01 01:15:00', '2020-01-01 01:30:00', '2020-01-01 01:45:00']) expected_test_dates = convert_datestring_array_to_datetime(['2020-01-01 02:00:00', '2020-01-01 02:15:00', '2020-01-01 02:30:00', '2020-01-01 02:45:00']) self.assertEqual(expected_training_dates, actual_training_dates) self.assertEqual(expected_validation_dates, actual_validation_dates) self.assertEqual(expected_test_dates, actual_test_dates)
def process_raw_data_file(name, intervals, target_interval_minutes, market, input_minutes_missing_allowance, training_start_str, validation_start_str, test_start_str, test_end_str, dataframe_batch_size): set_name(name) set_intervals(intervals) set_target_interval(timedelta(minutes=target_interval_minutes)) set_market(market) set_max_input_minutes_missing(input_minutes_missing_allowance) set_df_width(dataframe_batch_size) training_start = datetime.strptime(training_start_str, '%Y-%m-%d %H:%M:%S') validation_start = datetime.strptime(validation_start_str, '%Y-%m-%d %H:%M:%S') test_start = datetime.strptime(test_start_str, '%Y-%m-%d %H:%M:%S') test_end = datetime.strptime(test_end_str, '%Y-%m-%d %H:%M:%S') dates = [training_start, validation_start, test_start, test_end] timings = create_data( [training_start, validation_start, test_start, test_end]) meta_data = { 'name': name, 'intervals': intervals, 'target_interval_minutes': target_interval_minutes, 'market': market, 'training_start': training_start_str, 'validation_start': validation_start_str, 'test_start': test_start_str, 'test_end': test_end_str, 'dataframe_batch_size': dataframe_batch_size, 'time_to_write_training': timings[0], 'time_to_write_validation': timings[1], 'time_to_write_test': timings[2] } write_meta_data_file(f'models/{name}/data', meta_data)
def test_set_intervals(self): intervals = [5, 5, 5] set_intervals(intervals) self.assertEqual(intervals, get_intervals())