def test_unique_timeseries(data_source_cls): data_source = data_source_cls.local() timeseries = TimeseriesDataset.build_from_data_source(data_source) timeseries = combined_datasets.US_STATES_FILTER.apply(timeseries) timeseries_data = timeseries.data.set_index(timeseries.INDEX_FIELDS) duplicates = timeseries_data.index.duplicated() assert not sum(duplicates)
def test_unique_timeseries(data_source_cls): data_source = data_source_cls.local() timeseries = TimeseriesDataset.build_from_data_source(data_source) timeseries = combined_datasets.US_STATES_FILTER.apply(timeseries) # Check for duplicate rows with the same INDEX_FIELDS. Sort by index so duplicates are next to # each other in the message if the assert fails. timeseries_data = timeseries.data.set_index(timeseries.INDEX_FIELDS).sort_index() duplicates = timeseries_data.index.duplicated(keep=False) assert not sum(duplicates), str(timeseries_data.loc[duplicates])
def build_from_data_source(cls, source): from libs.datasets.timeseries import TimeseriesDataset if set(source.INDEX_FIELD_MAP.keys()) == set(TimeseriesDataset.INDEX_FIELDS): timeseries = TimeseriesDataset.build_from_data_source(source) return timeseries.to_latest_values_dataset() if set(source.INDEX_FIELD_MAP.keys()) != set(cls.INDEX_FIELDS): raise ValueError("Index fields must match") return cls.from_source(source)
def build_from_data_source(cls, source): from libs.datasets.timeseries import TimeseriesDataset if set(source.INDEX_FIELD_MAP.keys()) == set( TimeseriesDataset.INDEX_FIELDS): timeseries = TimeseriesDataset.build_from_data_source(source) return timeseries.to_latest_values_dataset() if set(source.INDEX_FIELD_MAP.keys()) != set(cls.INDEX_FIELDS): raise ValueError("Index fields must match") return cls.from_source( source, fill_missing_state=source.FILL_MISSING_STATE_LEVEL_DATA)
def timeseries(self) -> TimeseriesDataset: """Builds generic beds dataset""" return TimeseriesDataset.build_from_data_source(self)