def fetch_forecast(self, date): # construct url datestr = date.strftime('%Y%m%d') url = self.base_url + '/Library/Repository/Market%20Reports/' + datestr + '_da_ex.xls' # make request with self.request for easier debugging, mocking response = self.request(url) if not response: return pd.DataFrame() if response.status_code == 404: LOGGER.debug('No MISO forecast data available at %s' % datestr) return pd.DataFrame() xls = pd.read_excel(BytesIO(response.content)) # clean header header_df = xls.iloc[:5] df = xls.iloc[5:] df.columns = ['hour_str'] + list(header_df.iloc[-1][1:]) # set index idx = [] for hour_str in df['hour_str']: # format like 'Hour 01' to 'Hour 24' ihour = int(hour_str[5:]) - 1 local_ts = datetime(date.year, date.month, date.day, ihour) idx.append(self.utcify(local_ts)) df.index = idx df.index.set_names(['timestamp'], inplace=True) # return return df
def fetch_forecast(self, date): # construct url datestr = date.strftime("%Y%m%d") url = self.base_url + "/Library/Repository/Market%20Reports/" + datestr + "_da_ex.xls" # make request try: xls = pd.read_excel(url) except HTTPError: LOGGER.debug("No MISO forecast data available at %s" % datestr) return pd.DataFrame() # clean header header_df = xls.iloc[:5] df = xls.iloc[5:] df.columns = ["hour_str"] + list(header_df.iloc[-1][1:]) # set index idx = [] for hour_str in df["hour_str"]: # format like 'Hour 01' to 'Hour 24' ihour = int(hour_str[5:]) - 1 local_ts = datetime(date.year, date.month, date.day, ihour) idx.append(self.utcify(local_ts)) df.index = idx df.index.set_names(["timestamp"], inplace=True) # return return df
def utcify_index(self, local_index, tz_name=None): """ Convert a DateTimeIndex to UTC. :param DateTimeIndex local_index: The local DateTimeIndex to be converted. :param string tz_name: If local_ts is naive, it is assumed to be in timezone tz. If tz is not provided, the client's default timezone is used. :return: DatetimeIndex in UTC. :rtype: DatetimeIndex """ # set up tz if tz_name is None: tz_name = self.TZ_NAME # localize try: aware_local_index = local_index.tz_localize(tz_name) except AmbiguousTimeError as e: LOGGER.debug(e) aware_local_index = local_index.tz_localize(tz_name, ambiguous='infer') # except Exception as e: # LOGGER.debug(e) # already aware # print e # aware_local_index = local_index # convert to utc aware_utc_index = aware_local_index.tz_convert('UTC') # return return aware_utc_index
def unzip(self, content): """ Unzip encoded data. Returns the unzipped content as an array of strings, each representing one file's content or returns None if an error was encountered. ***Previous behavior: Only returned the content from the first file*** """ # create zip file try: filecontent = BytesIO(content) except TypeError: filecontent = StringIO(content) try: # have zipfile z = zipfile.ZipFile(filecontent) except zipfile.BadZipfile: LOGGER.error('%s: unzip failure for content beginning:\n%s' % (self.NAME, str(content)[0:100])) LOGGER.debug('%s: Faulty unzip content:\n%s' % (self.NAME, content)) return None # have unzipped content unzipped = [z.read(thisfile) for thisfile in z.namelist()] z.close() # return return unzipped
def utcify_index(self, local_index, tz_name=None, tz_col=None): """ Convert a DateTimeIndex to UTC. :param DateTimeIndex local_index: The local DateTimeIndex to be converted. :param string tz_name: If local_ts is naive, it is assumed to be in timezone tz. If tz is not provided, the client's default timezone is used. :return: DatetimeIndex in UTC. :rtype: DatetimeIndex """ # set up tz if tz_name is None: tz_name = self.TZ_NAME # use tz col if given if tz_col is not None: # it seems like we shouldn't have to iterate, but all the smart ways aren't working aware_utc_list = [] for i in range(len(local_index)): try: aware_local_ts = pytz.timezone(tz_col[i]).localize(local_index[i]) except pytz.UnknownTimeZoneError: # fall back to local ts aware_local_ts = pytz.timezone(tz_name).localize(local_index[i]) # utcify aware_utc_ts = self.utcify(aware_local_ts) aware_utc_list.append(aware_utc_ts) # indexify aware_utc_index = pd.DatetimeIndex(aware_utc_list) else: # localize try: aware_local_index = local_index.tz_localize(tz_name) except AmbiguousTimeError as e: LOGGER.debug(e) try: aware_local_index = local_index.tz_localize(tz_name, ambiguous='infer') except AmbiguousTimeError: LOGGER.warn('Second DatetimeIndex localization fallback, assuming DST transition day.') dst_active_list = self._dst_active_hours_for_transition_day(local_dt_index=local_index) aware_local_index = local_index.tz_localize(tz_name, ambiguous=dst_active_list) except TypeError as e: # already aware LOGGER.debug(e) aware_local_index = local_index # convert to utc aware_utc_index = aware_local_index.tz_convert('UTC') # return return aware_utc_index
def request(self, url, mode='get', retry_sec=5, **kwargs): """ Get or post to a URL with the provided kwargs. Returns the response, or None if an error was encountered. If the mode is not 'get' or 'post', raises ValueError. """ # check args allowed_modes = ['get', 'post'] if mode not in allowed_modes: raise ValueError('Invalid request mode %s' % mode) # check for session try: session = getattr(self, 'session') except AttributeError: self.session = requests.Session() session = self.session # carry out request try: response = getattr(session, mode)(url, verify=False, timeout=self.TIMEOUT_SECONDS, **kwargs) # except requests.exceptions.ChunkedEncodingError as e: # # JSON incomplete or not found # msg = '%s: chunked encoding error for %s, %s:\n%s' % (self.NAME, url, kwargs, e) # LOGGER.error(msg) # return None except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e: # eg max retries exceeded msg = '%s: connection error for %s, %s:\n%s' % (self.NAME, url, kwargs, e) LOGGER.error(msg) return None # except requests.exceptions.RequestException: # msg = '%s: request exception for %s, %s:\n%s' % (self.NAME, url, kwargs, e) # LOGGER.error(msg) # return None if response.status_code == 200: # success LOGGER.debug('%s: request success for %s, %s with cache hit %s' % (self.NAME, url, kwargs, getattr(response, 'from_cache', None))) elif response.status_code == 429: # retry on throttle LOGGER.warn('%s: retrying in %d seconds, throttled for %s, %s' % (self.NAME, retry_sec, url, kwargs)) sleep(retry_sec) return self.request(url, mode=mode, retry_sec=retry_sec, **kwargs) else: # non-throttle error LOGGER.error('%s: request failure with code %s for %s, %s' % (self.NAME, response.status_code, url, kwargs)) return response
def utcify_index(self, local_index, tz_name=None, tz_col=None): """ Convert a DateTimeIndex to UTC. :param DateTimeIndex local_index: The local DateTimeIndex to be converted. :param string tz_name: If local_ts is naive, it is assumed to be in timezone tz. If tz is not provided, the client's default timezone is used. :return: DatetimeIndex in UTC. :rtype: DatetimeIndex """ # set up tz if tz_name is None: tz_name = self.TZ_NAME # use tz col if given if tz_col is not None: # it seems like we shouldn't have to iterate, but all the smart ways aren't working aware_utc_list = [] for i in range(len(local_index)): try: aware_local_ts = pytz.timezone(tz_col[i]).localize( local_index[i]) except pytz.UnknownTimeZoneError: # fall back to local ts aware_local_ts = pytz.timezone(tz_name).localize( local_index[i]) # utcify aware_utc_ts = self.utcify(aware_local_ts) aware_utc_list.append(aware_utc_ts) # indexify aware_utc_index = pd.DatetimeIndex(aware_utc_list) else: # localize try: aware_local_index = local_index.tz_localize(tz_name) except AmbiguousTimeError as e: LOGGER.debug(e) aware_local_index = local_index.tz_localize(tz_name, ambiguous='infer') except TypeError as e: # already aware LOGGER.debug(e) aware_local_index = local_index # convert to utc aware_utc_index = aware_local_index.tz_convert('UTC') # return return aware_utc_index
def request(self, url, mode='get', retry_sec=5, retries_remaining=5, **kwargs): """ Get or post to a URL with the provided kwargs. Returns the response, or None if an error was encountered. If the mode is not 'get' or 'post', raises ValueError. """ # check args allowed_modes = ['get', 'post'] if mode not in allowed_modes: raise ValueError('Invalid request mode %s' % mode) # check for session try: session = getattr(self, 'session') except AttributeError: self.session = requests.Session() session = self.session # carry out request try: response = getattr(session, mode)(url, verify=False, timeout=self.timeout_seconds, **kwargs) # except requests.exceptions.ChunkedEncodingError as e: # # JSON incomplete or not found # msg = '%s: chunked encoding error for %s, %s:\n%s' % (self.NAME, url, kwargs, e) # LOGGER.error(msg) # return None except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e: # eg max retries exceeded msg = '%s: connection error for %s, %s:\n%s' % (self.NAME, url, kwargs, e) LOGGER.error(msg) return None # except requests.exceptions.RequestException: # msg = '%s: request exception for %s, %s:\n%s' % (self.NAME, url, kwargs, e) # LOGGER.error(msg) # return None if response.status_code == 200: # success LOGGER.debug('%s: request success for %s, %s with cache hit %s' % (self.NAME, url, kwargs, getattr(response, 'from_cache', None))) elif response.status_code == 429: if retries_remaining > 0: # retry on throttle LOGGER.warn( '%s: retrying in %d seconds (%d retries remaining), throttled for %s, %s' % (self.NAME, retry_sec, retries_remaining, url, kwargs)) sleep(retry_sec) retries_remaining -= 1 return self.request(url, mode=mode, retry_sec=retry_sec * 2, retries_remaining=retries_remaining, **kwargs) else: # exhausted retries LOGGER.warn('%s: exhausted retries for %s, %s' % (self.NAME, url, kwargs)) return None else: # non-throttle error LOGGER.error('%s: request failure with code %s for %s, %s' % (self.NAME, response.status_code, url, kwargs)) return response