def test__apply__on_other_time_series_with_different_length(self): # Prepare test val_1 = 21 val_2 = 3 ts_1 = TimeSeries.create("01-2020", "02-2020", "H").fill(val_1) ts_2 = TimeSeries.create("01-2020", "04-2020", "H").fill(val_2) # Method at test with self.assertRaises(AssertionError): ts = ts_1.apply(lambda x, y: x * y, ts_2)
def test__create__with_freq_as_time_series(self): ts_freq = TimeSeries.create("01-01-2020", "02-01-2020", "H") ts = TimeSeries.create("01-01-2020", "02-01-2020", ts_freq) self.assertIsInstance(ts, TimeSeries) # Test if all elements are NaNs self.assertTrue(ts.series.isna().all().values[0]) # Test if frequency is daily self.assertEqual(ts.series.index.inferred_freq, ts_freq.series.index.inferred_freq)
def test__apply__on_other_time_series(self): # Prepare test val_1 = 21 val_2 = 3 ts_1 = TimeSeries.create("01-2020", "02-2020", "H").fill(val_1) ts_2 = TimeSeries.create("01-2020", "02-2020", "H").fill(val_2) # Method at test ts = ts_1.apply(lambda x, y: x * y, ts_2) # Test for i in ts: self.assertEqual(i, val_1 * val_2)
def test__append(self): # Create series tsd = TimeSeriesDataset() ts_1 = TimeSeries.create("01-2020", "02-2020", "H") ts_2 = TimeSeries.create("01-2020", "03-2020", "H") # Test self.assertTrue(len(tsd) == 0) tsd.append(ts_1) self.assertTrue(len(tsd) == 1) tsd.append(ts_2) self.assertTrue(len(tsd) == 2)
def test__del(self): # Create series ts_1 = TimeSeries.create("01-2020", "02-2020", "H") ts_2 = TimeSeries.create("01-2020", "03-2020", "H") my_arr = [ts_1, ts_2] tsd = TimeSeriesDataset(my_arr) # Test self.assertTrue(len(tsd) == 2) del tsd[-1] self.assertTrue(len(tsd) == 1) del tsd[-1] self.assertTrue(len(tsd) == 0)
def test__copy__shallow(self): # params ts_1 = TimeSeries.create("01-2020", "03-2020", "H") ts_2 = TimeSeries.create("02-2020", "04-2020", "min") my_arr = [ts_1, ts_2] # object creation tsd = TimeSeriesDataset(my_arr) # test copy = tsd.copy(deep=False) self.assertNotEqual(id(tsd), id(copy)) for i in range(len(copy)): self.assertEqual(id(tsd[i]), id(copy[i]))
def test__merge(self): # Prepare test ts1 = TimeSeries.create("01-2020", "03-2020", "H") ts2 = TimeSeries.create("02-2020", "04-2020", "H") # Call function ts = ts1.merge(ts2) # Test if index is monotonic increasing self.assertTrue(ts.series.index.is_monotonic_increasing) # Test if all values are there len1 = len(ts1) + len(ts2) len2 = len(ts) self.assertTrue(len1 == len2)
def test__pad_left(self): goal_right = "2020-01-10 00:00:00" # Create series ts1 = TimeSeries.create("01-02-2020", "01-08-2020", "1D") ts2 = TimeSeries.create("01-04-2020", "01-09-2020", "1D") tsd = TimeSeriesDataset([ts1, ts2]) tsd_padded = tsd.pad(limit=goal_right) # check if all have the same left boundary self.assertTrue(all([str(ts.boundaries()[1]) == goal_right for ts in tsd_padded]))
def test__merge(self): # Create the time series ts1 = TimeSeries.create("01-01-2020", "01-02-2020", "H").fill(0) ts2 = TimeSeries.create("01-01-2020", "01-03-2020", "H").fill(0) ts3 = TimeSeries.create("01-01-2020", "01-04-2020", "H").fill(0) ts4 = TimeSeries.create("01-02-2020", "01-05-2020", "H").fill(0) tsd1 = TimeSeriesDataset([ts1, ts2]) tsd2 = TimeSeriesDataset([ts3, ts4]) # Call the function tsd = tsd1.merge(tsd2) # Test the beginnings and the ends self.assertTrue(ts1.start() == tsd[0].start()) self.assertTrue(ts3.end() == tsd[0].end()) self.assertTrue(ts2.start() == tsd[1].start()) self.assertTrue(ts4.end() == tsd[1].end())
def test__to_darts__type_check(self): ts = TimeSeries.create("01-2020", "02-2020", "H") ts = ts.fill(np.random.randint(0, 1000, len(ts))) self.assertIsInstance(ts, TimeSeries) dts = ts.to_darts() from darts import TimeSeries as DartsTimeSeries self.assertIsInstance(dts, DartsTimeSeries)
def test__empty(self): ts = TimeSeries.create("01-01-2020", "03-01-2020", "H") # method at test ts = ts.empty() # test for i in ts: self.assertTrue(np.isnan(i))
def test__pad(self): def is_regular(ts): # test if double timestamps in ts no_duration_diff = ts.index.to_series().diff().diff()[2:] == \ Timedelta(0) return no_duration_diff.eq(True).all() def is_monotonic(ts): # test if monotonic return ts.series.index.is_monotonic def is_freq_similar(ts_before, ts_after): # test if freq is the same return ts_before.frequency() == ts_after.frequency() # Create TimeSeries ts_1 = TimeSeries.create("04-2020", "05-2020", "D") # Pad before ts_1_padded_before = ts_1.pad("03-2020")["2020-03-31":"2020-04-02"] self.assertTrue(is_regular(ts_1_padded_before)) self.assertTrue(is_monotonic(ts_1_padded_before)) self.assertTrue(is_freq_similar(ts_1, ts_1_padded_before)) # Pad after ts_1_padded_after = ts_1.pad("06-2020")["2020-05-29":"2020-06-02"] self.assertTrue(is_regular(ts_1_padded_after)) self.assertTrue(is_monotonic(ts_1_padded_after)) self.assertTrue(is_freq_similar(ts_1, ts_1_padded_after)) # Pad during (wrong case) with self.assertRaises(ValueError): ts_1.pad("2020-04-15")
def test__to_text(self): ts = TimeSeries.create("01-01-1990", "01-03-1990", "1D") # prepare data ts.series[TIME_SERIES_VALUES] = [0, 1, 2] ts.series["label_test"] = [0, None, 2] ts.class_label = "Test" tsd = TimeSeriesDataset([ts, ts, ts]) tsd.to_text(self.outdir) # preparte test variables goal_length = len(tsd) # check that it created the three folder folders = glob(f"{self.outdir}/[0-9]") self.assertEqual(len(folders), goal_length) # check that all folders have a data.csv and a metadata.json check = True for dir in folders: check &= os.path.isfile(f'{dir}/data.csv') check &= os.path.isfile(f'{dir}/meta.json') self.assertTrue(check) # clean up shutil.rmtree(self.outdir) # check if cleaned self.assertFalse(os.path.isdir(self.outdir))
def test__trim(self): # Create series ts1 = TimeSeries.create("02-01-2020", "06-01-2020", "H").fill(0) ts2 = TimeSeries.create("01-01-2020", "04-01-2020", "H").fill(0) # Add Nones ts1.series[:21] = None ts1.series[-4:] = None ts2.series[:2] = None ts2.series[-14:] = None # Make the TSD tsd = TimeSeriesDataset([ts1, ts2]) # Call the function to test tsd = tsd.trim() # Test for ts in tsd: for i in ts.series[TIME_SERIES_VALUES]: self.assertFalse(np.isnan(i))
def test__fill(self): ts = TimeSeries.create("01-01-2020", "03-01-2020", "H") val = 42 # method at test ts = ts.fill(val) # test for i in ts: self.assertEqual(val, i)
def test__apply__on_self(self): # Prepare test val = 21 ts = TimeSeries.create("01-2020", "02-2020", "H").fill(val) # Method at test ts = ts.apply(lambda x: x * 2) # Test for i in ts: self.assertEqual(i, val * 2)
def test__split_in_chunks(self): ts = TimeSeries.create("01-01-2020", "03-01-2020", "H") chunk_len = 5 # method at test chunks = ts.split_in_chunks(chunk_len) # test all element but last for ts_chunk in chunks[:-1]: self.assertEqual(len(ts_chunk), chunk_len) # test last element self.assertLessEqual(len(chunks[-1]), chunk_len)
def test__create_with_freq_as_time_series(self): # params length = 3 start = "01-01-2020" end = "02-01-2020" freq = TimeSeries.create("01-01-2020", "01-02-2020", "H") # object creation tsd = TimeSeriesDataset.create(length, start, end, freq) # test for ts in tsd: # if equal, all ts in tsd have an hourly frequency self.assertEqual(ts.frequency(), freq.frequency())
def test__regularize_right(self): # The goal of the right boundary goal_right = "2019-01-10 00:00:00" # setup data ts_1 = TimeSeries.create('2019-01-03', '2019-01-05', "1D") ts_1.series[TIME_SERIES_VALUES] = [3, 4, 5] ts_2 = TimeSeries.create('2019-01-01', '2019-01-05', "1D") ts_2.series[TIME_SERIES_VALUES] = [1, 2, 3, 4, 5] ts_3 = TimeSeries.create('2019-01-05', '2019-01-10', "1D") ts_3.series[TIME_SERIES_VALUES] = [5, 6, 7, 8, 9, 10] ts_4 = TimeSeries.create('2019-01-03', '2019-01-07', "1D") ts_4.series[TIME_SERIES_VALUES] = [3, 4, 5, 6, 7] tsd = TimeSeriesDataset([ts_1, ts_2, ts_3, ts_4]) # regularize to the right tsd = tsd.regularize(side="]]") # assert that all TS in TSD end in right_goal self.assertTrue(all([str(ts.boundaries()[1]) == goal_right for ts in tsd]))
def test__merge_by_label(self): # Create TSD ts_1 = TimeSeries.create('2019-01-01', '2019-01-02', "1D") ts_1.series[TIME_SERIES_VALUES] = [0, 1] ts_1.class_label = "Sensor1" ts_2 = TimeSeries.create('2019-01-03', '2019-01-03', "1D") ts_2.series[TIME_SERIES_VALUES] = [2] ts_2.class_label = "Sensor1" tsd1 = TimeSeriesDataset([ts_1, ts_2]) ts_3 = TimeSeries.create('2019-01-01', '2019-01-03', "1D") ts_3.series[TIME_SERIES_VALUES] = [0, 1, 2] ts_3.class_label = "Sensor2" tsd2 = TimeSeriesDataset([ts_3]) tsd_merged = tsd1.merge_by_label(tsd2) self.assertIsInstance(tsd_merged, TimeSeriesDataset) # Create Goal ts_goal_1 = TimeSeries.create('2019-01-01', '2019-01-03', "1D") ts_goal_1.series[TIME_SERIES_VALUES] = [0, 1, 2] ts_goal_1.class_label = "Sensor1" ts_goal_2 = TimeSeries.create('2019-01-01', '2019-01-03', "1D") ts_goal_2.series[TIME_SERIES_VALUES] = [0, 1, 2] ts_goal_2.class_label = "Sensor2" tsd_goal = TimeSeriesDataset([ts_goal_1, ts_goal_2]) check = True for i, ts in enumerate(tsd_goal): check &= ts.series.equals(tsd_merged[i].series) check &= (ts.class_label == tsd_merged[i].class_label) self.assertTrue(check)
def test__resample(self): # Create series ts_1 = TimeSeries.create("01-2020", "02-2020", "H") ts_2 = TimeSeries.create("01-2020", "03-2020", "min") my_arr = [ts_1, ts_2] tsd = TimeSeriesDataset(my_arr) # Test lowest tsd_freq_before = tsd.frequency() lowest_freq = max([to_offset(f) for f in tsd_freq_before]) tsd_res = tsd.resample(freq="lowest") for ts in tsd_res: current_offset = to_offset(ts.frequency()) self.assertEqual(current_offset, lowest_freq) # Test highest tsd_freq_before = tsd.frequency() highest_freq = min([to_offset(f) for f in tsd_freq_before]) tsd_res = tsd.resample(freq="highest") for ts in tsd_res: current_offset = to_offset(ts.frequency()) self.assertEqual(current_offset, highest_freq) # Test Dateoffset str offest_str = "15min" tsd_res = tsd.resample(freq=offest_str) for ts in tsd_res: current_offset = to_offset(ts.frequency()) self.assertEqual(current_offset, offest_str) # Test TimeSeries as arg offset_str_arg = "30min" ts_arg = TimeSeries.create("01-2020", "03-2020", offset_str_arg) tsd_res = tsd.resample(freq=ts_arg) for ts in tsd_res: current_offset = to_offset(ts.frequency()) self.assertEqual(current_offset, offset_str_arg)
def test__split_at(self): # Create TimeSeries and split it ts = TimeSeries.create("01-01-2020", "03-01-2020", "H") a, b = ts.split_at("02-01-2020 00:00") # Get all the indexes ts_start = ts.series[TIME_SERIES_VALUES].index[0] ts_end = ts.series[TIME_SERIES_VALUES].index[-1] a_start = a.series[TIME_SERIES_VALUES].index[0] a_end = a.series[TIME_SERIES_VALUES].index[-1] b_start = b.series[TIME_SERIES_VALUES].index[0] b_end = b.series[TIME_SERIES_VALUES].index[-1] # Test boundaries self.assertEqual(ts_start, a_start) self.assertEqual(ts_end, b_end) # Test split point self.assertEqual(a_end, b_start)
def test__trim__both_side_by_default(self): # Prepare TimeSeries with int as values start = Timestamp("01-01-2020") end = Timestamp("03-01-2020") ts_initial = TimeSeries.create(start, end, "H") ts_initial = ts_initial.fill(42) # Pad the TimeSeries with np.nans ts_pad = ts_initial.pad("01-12-2019").pad("05-01-2020") # Method at test ts_trimmed = ts_pad.trim() # test if no NaNs self.assertFalse(ts_trimmed.series.isna().values.any()) # test boundaries new_start, new_end = ts_trimmed.boundaries() self.assertEqual(start, new_start) self.assertEqual(end, new_end)
def test__trim__only_end(self): # Prepare TimeSeries with int as values start = Timestamp("01-01-2020") end = Timestamp("03-01-2020") ts_initial = TimeSeries.create(start, end, "H") ts_initial = ts_initial.fill(42) # Pad the TimeSeries with np.nans pad_start = Timestamp("01-12-2019") pad_end = Timestamp("05-01-2020") ts_pad = ts_initial.pad(pad_start).pad(pad_end) # Method at test ts_trimmed = ts_pad.trim(side="end") # test if NaNs self.assertTrue(ts_trimmed.series.isna().values.any()) # test boundaries new_start, new_end = ts_trimmed.boundaries() self.assertEqual(pad_start, new_start) self.assertEqual(end, new_end)
def test__to_df(self): # goal_df df_goal = DataFrame([[0, 0, 0, 0], [1, None, 1, None]], columns=['0_values', '0_label_test', '1_values', '1_label_test'], index=["01-01-1990", "01-02-1990"]) ts = TimeSeries.create("01-01-1990", "01-02-1990", "1D") # prepare data ts.series[TIME_SERIES_VALUES] = [0, 1] ts.series["label_test"] = [0, None] ts.class_label = "Test" tsd = TimeSeriesDataset([ts, ts]) df = tsd.to_df() self.assertTrue(df.equals(df_goal))
def test__to_pickle(self): ts = TimeSeries.create("01-01-1990", "01-03-1990", "1D") # prepare data ts.series[TIME_SERIES_VALUES] = [0, 1, 2] ts.series["label_test"] = [0, None, 2] ts.class_label = "Test" tsd = TimeSeriesDataset([ts, ts, ts]) tsd.to_pickle(f"{self.outdir}/tsd.pkl") self.assertTrue(os.path.isfile(f"{self.outdir}/tsd.pkl")) # clean up shutil.rmtree(self.outdir) # check if cleaned self.assertFalse(os.path.isdir(self.outdir))
def test__shuffle(self): # Create TSD ts = TimeSeries.create('2019-01-03', '2019-01-03', "1D") ts.series[TIME_SERIES_VALUES] = [2] tss = [] for i in range(100): tmp_ts = deepcopy(ts) tmp_ts.class_label = f'Sensor{i}' tss.append(tmp_ts) tsd = TimeSeriesDataset(tss) tsd_shuffled = tsd.shuffle(inplace=False) # check if the label is the same -> can fail with a low probability check = True for i, ts in enumerate(tsd): check &= (ts.class_label == tsd_shuffled[i].class_label) self.assertFalse(check)
def test__time_deltas(self): ts = TimeSeries.create("01-2020", "03-2020", "H") deltas = ts.time_detlas() for i in deltas[1:]: self.assertEqual(i, 3600.0) self.assertIs(type(i), float)
def test__to_darts__series_equality(self): ts = TimeSeries.create("01-2020", "02-2020", "H") ts = ts.fill(np.random.randint(0, 1000, len(ts))) dts = ts.to_darts() is_equal = ts.series[TIME_SERIES_VALUES].equals(dts.pd_series()) self.assertTrue(is_equal)
def test__copy__deep(self): # object creation ts = TimeSeries.create("01-2020", "03-2020", "H") copy = ts.copy(deep=True) self.assertNotEqual(id(ts), id(copy))