def fetch_csvs(self, date, label): # construct url datestr = date.strftime('%Y%m%d') if self.options['data'] == 'lmp': url = '%s/%s/%s%s_zone.csv' % (self.base_url, label, datestr, label) else: url = '%s/%s/%s%s.csv' % (self.base_url, label, datestr, label) # make request response = self.request(url) # if 200, return if response and response.status_code == 200: return [response.text] # if failure, try zipped monthly data datestr = date.strftime('%Y%m01') if self.options['data'] == 'lmp': url = '%s/%s/%s%s_zone_csv.zip' % (self.base_url, label, datestr, label) else: url = '%s/%s/%s%s_csv.zip' % (self.base_url, label, datestr, label) # make request and unzip response_zipped = self.request(url) if response_zipped: unzipped = self.unzip(response_zipped.content) else: return [] # return if unzipped: LOGGER.info('Failed to find daily %s data for %s but found monthly data, using that' % (self.options['data'], date)) return unzipped else: return []
def parse_genmix(self, content): # parse csv to df df = self.parse_to_df(content, header=0, index_col=0, parse_dates=True) # set index df.index = self.utcify_index(df.index, tz_col=df['Time Zone']) df.index.name = 'timestamp' # convert fuel names df['fuel_name'] = df.apply( lambda x: self.fuel_names[x['Fuel Category']], axis=1) # assemble final try: final_df = pd.DataFrame({ 'gen_MW': df['Gen MW'], 'fuel_name': df['fuel_name'] }) except KeyError: LOGGER.info( 'Falling back to legacy rtfuelmix column header names.') final_df = pd.DataFrame({ 'gen_MW': df['Gen MWh'], 'fuel_name': df['fuel_name'] }) # return return final_df
def parse_genmix(self, content): # parse csv to df df = self.parse_to_df(content, header=0, index_col=0, parse_dates=True) # set index df.index = self.utcify_index(df.index, tz_col=df['Time Zone']) df.index.name = 'timestamp' # convert fuel names df['fuel_name'] = df.apply(lambda x: self.fuel_names[x['Fuel Category']], axis=1) # assemble final try: final_df = pd.DataFrame({'gen_MW': df['Gen MW'], 'fuel_name': df['fuel_name']}) except KeyError: LOGGER.info('Falling back to legacy rtfuelmix column header names.') final_df = pd.DataFrame({'gen_MW': df['Gen MWh'], 'fuel_name': df['fuel_name']}) # return return final_df
def request(self, url, mode='get', retry_sec=5, retries_remaining=5, **kwargs): """ Get or post to a URL with the provided kwargs. Returns the response, or None if an error was encountered. If the mode is not 'get' or 'post', raises ValueError. """ # check args allowed_modes = ['get', 'post'] if mode not in allowed_modes: raise ValueError('Invalid request mode %s' % mode) # check for session try: session = getattr(self, 'session') except AttributeError: self.session = requests.Session() session = self.session # carry out request try: response = getattr(session, mode)(url, verify=True, timeout=self.timeout_seconds, **kwargs) # except requests.exceptions.ChunkedEncodingError as e: # # JSON incomplete or not found # msg = '%s: chunked encoding error for %s, %s:\n%s' % (self.NAME, url, kwargs, e) # LOGGER.error(msg) # return None except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e: # eg max retries exceeded msg = '%s: connection error for %s, %s:\n%s' % (self.NAME, url, kwargs, e) LOGGER.error(msg) return None # except requests.exceptions.RequestException: # msg = '%s: request exception for %s, %s:\n%s' % (self.NAME, url, kwargs, e) # LOGGER.error(msg) # return None if response.status_code == 200: # success LOGGER.debug('%s: request success for %s, %s with cache hit %s' % (self.NAME, url, kwargs, getattr(response, 'from_cache', None))) elif response.status_code == 429: if retries_remaining > 0: # retry on throttle LOGGER.warn( '%s: retrying in %d seconds (%d retries remaining), throttled for %s, %s' % (self.NAME, retry_sec, retries_remaining, url, kwargs)) sleep(retry_sec) retries_remaining -= 1 return self.request(url, mode=mode, retry_sec=retry_sec * 2, retries_remaining=retries_remaining, **kwargs) else: # exhausted retries LOGGER.warn('%s: exhausted retries for %s, %s' % (self.NAME, url, kwargs)) return None else: # non-throttle error LOGGER.error('%s: request failure with code %s for %s, %s' % (self.NAME, response.status_code, url, kwargs)) if os.environ.get('VERBOSE_REQUESTS') == 'verbose': LOGGER.info(mode) LOGGER.info(url) LOGGER.info(kwargs) LOGGER.info(response.status_code) print(response.text) return response
def request(self, url, mode='get', retry_sec=5, retries_remaining=5, **kwargs): """ Get or post to a URL with the provided kwargs. Returns the response, or None if an error was encountered. If the mode is not 'get' or 'post', raises ValueError. """ # check args allowed_modes = ['get', 'post'] if mode not in allowed_modes: raise ValueError('Invalid request mode %s' % mode) # check for session try: session = getattr(self, 'session') except AttributeError: self.session = requests.Session() session = self.session # carry out request try: response = getattr(session, mode)(url, verify=True, timeout=self.timeout_seconds, **kwargs) # except requests.exceptions.ChunkedEncodingError as e: # # JSON incomplete or not found # msg = '%s: chunked encoding error for %s, %s:\n%s' % (self.NAME, url, kwargs, e) # LOGGER.error(msg) # return None except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e: # eg max retries exceeded msg = '%s: connection error for %s, %s:\n%s' % (self.NAME, url, kwargs, e) LOGGER.error(msg) return None # except requests.exceptions.RequestException: # msg = '%s: request exception for %s, %s:\n%s' % (self.NAME, url, kwargs, e) # LOGGER.error(msg) # return None if response.status_code == 200: # success LOGGER.debug('%s: request success for %s, %s with cache hit %s' % (self.NAME, url, kwargs, getattr(response, 'from_cache', None))) elif response.status_code == 429: if retries_remaining > 0: # retry on throttle LOGGER.warn('%s: retrying in %d seconds (%d retries remaining), throttled for %s, %s' % (self.NAME, retry_sec, retries_remaining, url, kwargs)) sleep(retry_sec) retries_remaining -= 1 return self.request(url, mode=mode, retry_sec=retry_sec*2, retries_remaining=retries_remaining, **kwargs) else: # exhausted retries LOGGER.warn('%s: exhausted retries for %s, %s' % (self.NAME, url, kwargs)) return None else: # non-throttle error LOGGER.error('%s: request failure with code %s for %s, %s' % (self.NAME, response.status_code, url, kwargs)) if os.environ.get('VERBOSE_REQUESTS') == 'verbose': LOGGER.info(mode) LOGGER.info(url) LOGGER.info(kwargs) LOGGER.info(response.status_code) print(response.text) return response