예제 #1
0
 def test_create_relevant_data_row(self):
     set_intervals([5,5,5])
     set_target_interval(timedelta(minutes=5))
     df = pd.read_csv('tests/resources/dataframe_data.csv').iloc[1998:2018, :]
     expected_row = create_expected_row([0.79227, 0.79231, 0.79216, 0.79222, 0.79223, 0.79312, 0.79219, 0.79312, 0.79315, 0.79325, 0.79279, 0.79284], [0.79283, 0.79258])
     actual_row = create_relevant_data_row(df, datetime.strptime('2014-07-18 09:04:00', '%Y-%m-%d %H:%M:%S'))
     self.assertTrue(np.array_equal(expected_row, actual_row))
예제 #2
0
 def test_get_relevant_data(self, mock_method):
     set_intervals([15,15,15,15])
     set_target_interval(timedelta(minutes=60))
     df = pd.read_csv('tests/resources/dataframe_data.csv')
     target_date = datetime.strptime('2014-07-17 00:00:00', '%Y-%m-%d %H:%M:%S')
     get_relevant_data(df, target_date)
     start_date = datetime.strptime('2014-07-16 23:00:00', '%Y-%m-%d %H:%M:%S')
     end_date = datetime.strptime('2014-07-17 01:00:00', '%Y-%m-%d %H:%M:%S')
     mock_method.assert_called_with(start_date, end_date, df)
예제 #3
0
 def test_process_input_data_error(self):
     set_intervals([5, 5, 5, 60])
     df = pd.read_csv('tests/resources/dataframe_data.csv').iloc[1998:2013, :]
     expected_error_message = 'Insufficient data to process for this number of intervals'
     try:
         actual_input_data = process_input_data(df)
     except:
         exc_type, exc_value, exc_traceback = sys.exc_info() 
     self.assertEqual(expected_error_message, str(exc_value))
예제 #4
0
 def test_process_input_data(self):
     set_intervals([5, 5, 5])
     df = pd.read_csv('tests/resources/dataframe_data.csv').iloc[1998:2013, :]
     test_data = {
         'datetime': ['2014-07-18 08:49:00', '2014-07-18 08:54:00', '2014-07-18 08:59:00'],
         'open': [0.79227, 0.79223, 0.79315],
         'high': [0.79231, 0.79312, 0.79325],
         'low': [0.79216, 0.79219, 0.79279],
         'close': [0.79222, 0.79312, 0.79284]
     }
     expected_input_data = pd.DataFrame(data=test_data)
     actual_input_data = process_input_data(df)
     self.assertTrue(expected_input_data.equals(actual_input_data))
예제 #5
0
 def test_create_row(self):
     set_intervals([5,5,5])
     test_data = {
         'datetime': ['2014-07-18 08:49:00', '2014-07-18 08:54:00', '2014-07-18 08:59:00'],
         'open': [0.79227, 0.79223, 0.79315],
         'high': [0.79231, 0.79312, 0.79325],
         'low': [0.79216, 0.79219, 0.79279],
         'close': [0.79222, 0.79312, 0.79284]
     }
     input_values = pd.DataFrame(data=test_data)
     expected_row = create_expected_row([0.79227, 0.79231, 0.79216, 0.79222, 0.79223, 0.79312, 0.79219, 0.79312, 0.79315, 0.79325, 0.79279, 0.79284], [1, 2])
     actual_row = create_row(input_values, [1,2])
     self.assertTrue(np.array_equal(expected_row, actual_row))
예제 #6
0
 def test_get_dates(self):
     intervals = [5, 5, 5]
     set_intervals(intervals)
     training_start = datetime.strptime('2020-01-01 00:00:00', '%Y-%m-%d %H:%M:%S')
     validation_start = datetime.strptime('2020-01-01 01:00:00', '%Y-%m-%d %H:%M:%S')
     test_start = datetime.strptime('2020-01-01 02:00:00', '%Y-%m-%d %H:%M:%S')
     test_end = datetime.strptime('2020-01-01 03:00:00', '%Y-%m-%d %H:%M:%S')
     actual_training_dates, actual_validation_dates, actual_test_dates = get_dates(training_start, validation_start, test_start, test_end)
     expected_training_dates = convert_datestring_array_to_datetime(['2020-01-01 00:00:00', '2020-01-01 00:15:00', '2020-01-01 00:30:00', '2020-01-01 00:45:00'])
     expected_validation_dates = convert_datestring_array_to_datetime(['2020-01-01 01:00:00', '2020-01-01 01:15:00', '2020-01-01 01:30:00', '2020-01-01 01:45:00'])
     expected_test_dates = convert_datestring_array_to_datetime(['2020-01-01 02:00:00', '2020-01-01 02:15:00', '2020-01-01 02:30:00', '2020-01-01 02:45:00'])
     self.assertEqual(expected_training_dates, actual_training_dates)
     self.assertEqual(expected_validation_dates, actual_validation_dates)
     self.assertEqual(expected_test_dates, actual_test_dates)
예제 #7
0
def process_raw_data_file(name, intervals, target_interval_minutes, market,
                          input_minutes_missing_allowance, training_start_str,
                          validation_start_str, test_start_str, test_end_str,
                          dataframe_batch_size):
    set_name(name)
    set_intervals(intervals)
    set_target_interval(timedelta(minutes=target_interval_minutes))
    set_market(market)
    set_max_input_minutes_missing(input_minutes_missing_allowance)
    set_df_width(dataframe_batch_size)

    training_start = datetime.strptime(training_start_str, '%Y-%m-%d %H:%M:%S')
    validation_start = datetime.strptime(validation_start_str,
                                         '%Y-%m-%d %H:%M:%S')
    test_start = datetime.strptime(test_start_str, '%Y-%m-%d %H:%M:%S')
    test_end = datetime.strptime(test_end_str, '%Y-%m-%d %H:%M:%S')
    dates = [training_start, validation_start, test_start, test_end]

    timings = create_data(
        [training_start, validation_start, test_start, test_end])

    meta_data = {
        'name': name,
        'intervals': intervals,
        'target_interval_minutes': target_interval_minutes,
        'market': market,
        'training_start': training_start_str,
        'validation_start': validation_start_str,
        'test_start': test_start_str,
        'test_end': test_end_str,
        'dataframe_batch_size': dataframe_batch_size,
        'time_to_write_training': timings[0],
        'time_to_write_validation': timings[1],
        'time_to_write_test': timings[2]
    }

    write_meta_data_file(f'models/{name}/data', meta_data)
예제 #8
0
 def test_set_intervals(self):
     intervals = [5, 5, 5]
     set_intervals(intervals)
     self.assertEqual(intervals, get_intervals())