def _get_range(forecast: pd.DataFrame, start: pd.Timestamp | dt.datetime, end: pd.Timestamp | dt.datetime) -> pd.DataFrame: if start is not None: start = start.astimezone(forecast.index.tz) if start is None or start < forecast.index[0]: start = forecast.index[0] if end is not None: end = end.astimezone(forecast.index.tz) if end is None or end > forecast.index[-1]: end = forecast.index[-1] return forecast.loc[start:end, :]
def get(self, start: pd.Timestamp | dt.datetime = dt.datetime.now(tz.utc), end: pd.Timestamp | dt.datetime = None, **kwargs) -> pd.DataFrame: # Calculate the available forecast start and end times interval = self.interval/3600 timezone = tz.timezone(self._system.location.tz) if start.tzinfo is None or start.tzinfo.utcoffset(start) is None: start = tz.utc.localize(start) start_schedule = start.astimezone(timezone).replace(minute=0, second=0, microsecond=0) if start_schedule.hour % interval != 0: start_schedule = start_schedule - dt.timedelta(hours=start_schedule.hour % interval) if self._database is not None and self._database.exists(start_schedule, subdir=self._id): forecast = self._database.read(start_schedule, subdir=self._id) else: forecast = self._get(start, **kwargs) if self._database is not None: # Store the retrieved forecast self._database.write(forecast, start=start_schedule, subdir=self._id) return self._get_range(forecast, start, end)
def _get(self, start: pd.Timestamp | dt.datetime = None, end: pd.Timestamp | dt.datetime = None, format: str = '%d.%m.%Y', **kwargs) -> pd.DataFrame: if start is None: start = tz.utc.localize(dt.datetime.utcnow()) start.replace(year=start.year-1, month=1, day=1, hour=0, minute=0, second=0) elif isinstance(start, str): start = tz.utc.localize(dt.datetime.strptime(start, format)) if end is None: end = start + dt.timedelta(days=364) elif isinstance(end, str): end = tz.utc.localize(dt.datetime.strptime(end, format)) return self._database.read(start=start, end=end, **kwargs)
def read(self, start: pd.Timestamp | dt.datetime = None, end: pd.Timestamp | dt.datetime = None, resolution: int = None, file: str = None, subdir: str = '', **kwargs) -> pd.DataFrame: if file is None: file = self.file if file is not None: data = self._read_file(os.path.join(self.dir, subdir, file), **kwargs) else: data = self._read_file(os.path.join(self.dir, subdir, start.strftime(self.format) + '.csv'), **kwargs) if end is not None: date = start while date <= end: if self.exists(date, subdir=subdir): date_str = date.strftime(self.format) data_file = date_str + '.csv' data = data.combine_first(self._read_file(os.path.join(self.dir, subdir, data_file), **kwargs)) date += dt.timedelta(hours=self.interval) if resolution is not None and resolution > 900: offset = (start - start.replace(hour=0, minute=0, second=0, microsecond=0)).total_seconds() % resolution data = data.resample(str(int(resolution))+'s', base=offset).sum() if end is not None: end += dt.timedelta(seconds=resolution) if end is not None: if start > end: return data.truncate(before=start).head(1) return data[(data.index >= start) & (data.index <= end)] return data
def read(self, start: pd.Timestamp | dt.datetime = None, end: pd.Timestamp | dt.datetime = None, resolution: int = None, **kwargs): epoch = dt.datetime(1970, 1, 1, tzinfo=tz.UTC) data = pd.DataFrame() for column, table in self.tables.items(): cursor = self.connector.cursor() select = "SELECT time, data FROM {0} WHERE ".format(table) if end is None: select += "time >= %s ORDER BY time ASC" cursor.execute(select, ((start.astimezone(tz.UTC)-epoch).total_seconds(),)) else: select += "time BETWEEN %s AND %s ORDER BY time ASC" cursor.execute(select, ((start.astimezone(tz.UTC)-epoch).total_seconds(), (end.astimezone(tz.UTC)-epoch).total_seconds())) times = [] values = [] for timestamp, value in cursor.fetchall(): time = tz.utc.localize(dt.datetime.fromtimestamp(timestamp)) times.append(time) values.append(value) result = pd.DataFrame(data=values, index=times, columns=[column]) data = pd.concat([data, result], axis=1) if resolution is not None and resolution > 900: offset = (start - start.replace(hour=0, minute=0, second=0, microsecond=0)).total_seconds() % resolution data = data.resample(str(int(resolution))+'s', base=offset).sum() return data
def write(self, data: pd.DataFrame, start: pd.Timestamp | dt.datetime = None, end: pd.Timestamp | dt.datetime = None, file: str = None, subdir: str = '', **kwargs) -> None: if data is not None and self.enabled: if start is None: start = data.index[0] if end is None: end = data.index[-1] if file is None: file = self.file if file is None: file = start.strftime(self.format) + '.csv' path = os.path.join(self.dir, subdir) if not os.path.exists(path): os.makedirs(path) self._write_file(os.path.join(path, file), data.loc[start:end], **kwargs)