Example #1
0
 def _format_start_end(self, data):
     formatted_sliced = []
     if 'gen' not in self.options['data']:
         formatted_sliced = [
             i for i in data if i['timestamp'] >= self.options['start_at']
             and i['timestamp'] <= self.options['end_at']
         ]
     else:
         try:
             yesterday = (self.local_now() - timedelta(days=2)).replace(
                 hour=0, minute=0, second=0, microsecond=0)
             tomorrow = (self.local_now() + timedelta(days=1)).replace(
                 hour=23, minute=0, second=0, microsecond=0)
             assert ((self.options['start_at'] >= yesterday)
                     and (self.options['end_at'] <= tomorrow))
             formatted_sliced = [
                 i for i in data
                 if i['timestamp'] >= self.options['start_at']
                 and i['timestamp'] <= self.options['end_at']
             ]
         except:
             LOGGER.error('Generation data error for %s' % self.BA)
             raise ValueError(
                 'Generation data is available for the \
                              previous and current day.', self.options)
     return formatted_sliced
Example #2
0
    def latest_fuel_mix(self):
        # set up request
        url = self.base_url + '/ria/FuelMix.aspx?CSV=True'

        # carry out request
        response = self.request(url)
        if not response:
            return pd.DataFrame()

        # test for valid content
        if 'The page cannot be displayed' in response.text:
            LOGGER.error('MISO: Error in source data for generation')
            return pd.DataFrame()

        # preliminary parsing
        df = pd.read_csv(BytesIO(response.content), header=0, index_col=0, parse_dates=True)

        # set index
        df.index = self.utcify_index(df.index)
        df.index.set_names(['timestamp'], inplace=True)

        # set names and labels
        df['fuel_name'] = df.apply(lambda x: self.fuels[x['CATEGORY']], axis=1)
        df['gen_MW'] = df['ACT']

        # return
        return df[['fuel_name', 'gen_MW']]
Example #3
0
    def latest_fuel_mix(self):
        # set up request
        url = self.base_url + '/ria/FuelMix.aspx?CSV=True'

        # carry out request
        response = self.request(url)
        if not response:
            return pd.DataFrame()

        # test for valid content
        if 'The page cannot be displayed' in response.text:
            LOGGER.error('MISO: Error in source data for generation')
            return pd.DataFrame()

        # preliminary parsing
        df = pd.read_csv(BytesIO(response.content),
                         header=0,
                         index_col=0,
                         parse_dates=True)

        # set index
        df.index = self.utcify_index(df.index)
        df.index.set_names(['timestamp'], inplace=True)

        # set names and labels
        df['fuel_name'] = df.apply(lambda x: self.fuels[x['CATEGORY']], axis=1)
        df['gen_MW'] = df['ACT']

        # return
        return df[['fuel_name', 'gen_MW']]
Example #4
0
    def handle_options(self, **kwargs):
        """
        Process and store keyword argument options.
        """
        super(EIAClient, self).handle_options(**kwargs)

        if not hasattr(self, 'BA'):
            LOGGER.error('Balancing authority not set.')
            raise ValueError('Balancing authority not set.')

        if 'market' not in self.options:
            if self.options['forecast']:
                self.options['market'] = self.MARKET_CHOICES.dam
            elif self.options['sliceable'] and self.options['data'] == 'gen':
                self.options['market'] = self.MARKET_CHOICES.dam
            else:
                self.options['market'] = self.MARKET_CHOICES.hourly
        if 'freq' not in self.options:
            if self.options['forecast']:
                self.options['freq'] = self.FREQUENCY_CHOICES.hourly
            elif self.options['sliceable'] and self.options['data'] == 'gen':
                self.options['freq'] = self.FREQUENCY_CHOICES.hourly
            else:
                self.options['freq'] = self.FREQUENCY_CHOICES.hourly
        if 'yesterday' not in self.options:
            self.options['yesterday'] = False
Example #5
0
    def handle_options(self, **kwargs):
        """
        Process and store keyword argument options.
        """
        super(EIAClient, self).handle_options(**kwargs)

        if not hasattr(self, 'BA'):
            LOGGER.error('Balancing authority not set.')
            raise ValueError('Balancing authority not set.')

        if 'market' not in self.options:
            if self.options['forecast']:
                self.options['market'] = self.MARKET_CHOICES.dam
            elif self.options['sliceable'] and self.options['data'] == 'gen':
                self.options['market'] = self.MARKET_CHOICES.dam
            else:
                self.options['market'] = self.MARKET_CHOICES.hourly
        if 'freq' not in self.options:
            if self.options['forecast']:
                self.options['freq'] = self.FREQUENCY_CHOICES.hourly
            elif self.options['sliceable'] and self.options['data'] == 'gen':
                self.options['freq'] = self.FREQUENCY_CHOICES.hourly
            else:
                self.options['freq'] = self.FREQUENCY_CHOICES.hourly
        if 'yesterday' not in self.options:
            self.options['yesterday'] = False
Example #6
0
    def unzip(self, content):
        """
        Unzip encoded data.
        Returns the unzipped content as an array of strings, each representing one file's content
        or returns None if an error was encountered.
        ***Previous behavior: Only returned the content from the first file***
        """
        # create zip file
        try:
            filecontent = BytesIO(content)
        except TypeError:
            filecontent = StringIO(content)

        try:
            # have zipfile
            z = zipfile.ZipFile(filecontent)
        except zipfile.BadZipfile:
            LOGGER.error('%s: unzip failure for content:\n%s' % (self.NAME, content))
            return None

        # have unzipped content
        unzipped = [z.read(thisfile) for thisfile in z.namelist()]
        z.close()

        # return
        return unzipped
Example #7
0
    def get_generation(self,
                       latest=False,
                       yesterday=False,
                       start_at=False,
                       end_at=False,
                       **kwargs):
        """
        Scrape and parse generation fuel mix data.
        Note: Generation may be quite low for HST and NSB BAs.
        """

        self.handle_options(data='gen',
                            latest=latest,
                            yesterday=yesterday,
                            start_at=start_at,
                            end_at=end_at,
                            **kwargs)
        self.handle_ba_limitations()
        self.format_url()
        result = self.request(self.url)
        if result is not None:
            result_json = json.loads(result.text)
            result_formatted = self.format_result(result_json)
            return result_formatted
        else:
            LOGGER.error('No results for %s' % self.BA)
            return []
Example #8
0
    def get_ancillary_market_mcp(self, latest=False, **kwargs):
        # set up request
        url = self.base_url + '?messageType=getAncillaryMarketMCP&returnType=json'

        # carry out request
        response = self.request(url)
        if not response:
            return None

        # test for valid content
        if 'The page cannot be displayed' in response.text:
            LOGGER.error('MISO: Error in source data for Ancillary Market MCP')
            return None
        ammcp = json.loads(response.content)

        # set args
        self.handle_options(latest=latest, **kwargs)

        # get data
        if self.options['latest']:
            data = self.parse_latest_ammcp_data(ammcp)
        elif self.options['forecast']:
            data = self.parse_forecast_ammcp_data(ammcp)
        else:
            raise ValueError('Either latest or forecast must be True')
        # return good
        return self.serialize_faster(data)
Example #9
0
    def parse_latest_ammcp_data(self, content):
        if not content:
            return pd.DataFrame()

        # add time to each zone.
        if self.options['latest']:
            for i in range(0, 8):
                content['MCPData']['RealTimeMCP']['Zone'][i][
                    'time'] = content['MCPData']['MktDay'] + " " + content[
                        'MCPData']['RealTimeMCP']['HourAndMin'] + ":00"
        df = pd.DataFrame(content['MCPData']['RealTimeMCP']['Zone'],
                          columns=[
                              'time', 'number', 'GenRegMCP', 'GenSpinMCP',
                              'GenSuppMCP', 'DemSuppMCP', 'RegMileageMCP'
                          ])
        df['time'] = pd.to_datetime(df['time'])
        df.set_index('time', inplace=True)
        # set index
        try:
            df.index = self.utcify_index(df.index)
        except AttributeError:
            LOGGER.error(
                'MISO: Error in source data for Ancillary Market MCP data %s' %
                content)
            return pd.DataFrame()
        df.index.set_names(['timestamp'], inplace=True)

        return df
Example #10
0
    def parse_latest_fuel_mix(self, content):
        # handle bad input
        if not content:
            return pd.DataFrame()

        # preliminary parsing
        df = pd.read_csv(BytesIO(content),
                         header=0,
                         index_col=0,
                         parse_dates=True)

        # set index
        try:
            df.index = self.utcify_index(df.index)
        except AttributeError:
            LOGGER.error('MISO: Error in source data for generation %s' %
                         content)
            return pd.DataFrame()
        df.index.set_names(['timestamp'], inplace=True)

        # set names and labels
        df['fuel_name'] = df.apply(lambda x: self.fuels[x['CATEGORY']], axis=1)
        df['gen_MW'] = df['ACT']

        # return
        return df[['fuel_name', 'gen_MW']]
Example #11
0
    def get_load(self,
                 latest=False,
                 yesterday=False,
                 start_at=False,
                 end_at=False,
                 forecast=False,
                 **kwargs):
        """
        Scrape and parse load data.
        """

        self.handle_options(data='load',
                            latest=latest,
                            start_at=start_at,
                            end_at=end_at,
                            **kwargs)
        self.handle_ba_limitations()
        self.format_url()
        result = self.request(self.url)
        if result is not None:
            result_json = json.loads(result.text)
            result_formatted = self.format_result(result_json)
            return result_formatted
        else:
            LOGGER.error('No results for %s' % self.BA)
            return []
Example #12
0
    def unzip(self, content):
        """
        Unzip encoded data.
        Returns the unzipped content as an array of strings, each representing one file's content
        or returns None if an error was encountered.
        ***Previous behavior: Only returned the content from the first file***
        """
        # create zip file
        try:
            filecontent = BytesIO(content)
        except TypeError:
            filecontent = StringIO(content)

        try:
            # have zipfile
            z = zipfile.ZipFile(filecontent)
        except zipfile.BadZipfile:
            LOGGER.error('%s: unzip failure for content:\n%s' %
                         (self.NAME, content))
            return None

        # have unzipped content
        unzipped = [z.read(thisfile) for thisfile in z.namelist()]
        z.close()

        # return
        return unzipped
Example #13
0
    def latest_fuel_mix(self):
        # set up request
        url = self.base_url + "/ria/FuelMix.aspx?CSV=True"

        # carry out request
        response = self.request(url)
        if not response:
            return pd.DataFrame()

        # test for valid content
        if "The page cannot be displayed" in response.text:
            LOGGER.error("MISO: Error in source data for generation")
            return pd.DataFrame()

        # preliminary parsing
        df = pd.read_csv(StringIO(response.text), header=0, index_col=0, parse_dates=True)

        # set index
        df.index = self.utcify_index(df.index)
        df.index.set_names(["timestamp"], inplace=True)

        # set names and labels
        df["fuel_name"] = df.apply(lambda x: self.fuels[x["CATEGORY"]], axis=1)
        df["gen_MW"] = df["ACT"]

        # return
        return df[["fuel_name", "gen_MW"]]
Example #14
0
    def fetch_oasis(self, payload={}, return_all_files=False):
        """
        Returns a list of report data elements, or an empty list if an error was encountered.

        If return_all_files=False, returns only the content from the first file in the .zip -
        this is the default behavior and was used in earlier versions of this function.

        If return_all_files=True, will return an array representing the content from each file.
        This is useful for processing LMP data or other fields where multiple price components are returned in a zip.
        """
        # set up storage
        raw_data = []

        if return_all_files is True:
            default_return_val = []
        else:
            default_return_val = ''

        # try get
        response = self.request(self.base_url_oasis, params=payload)
        if not response:
            return default_return_val

        # read data from zip
        # This will be an array of content if successful, and None if unsuccessful
        content = self.unzip(response.content)
        if not content:
            return default_return_val

        # check xml content for errors
        soup = BeautifulSoup(content[0], 'lxml')
        error = soup.find('m:error')
        if error:
            code = error.find('m:err_code')
            desc = error.find('m:err_desc')
            msg = 'XML error for CAISO OASIS with payload %s: %s %s' % (
                payload, code, desc)
            LOGGER.error(msg)
            return default_return_val

        # return xml or csv data
        if payload.get('resultformat', False) == 6:
            # If we requested CSV files
            if return_all_files:
                return content
            else:
                return content[0]
        else:
            # Return XML content
            if return_all_files:
                raw_data = [
                    BeautifulSoup(thisfile).find_all('report_data')
                    for thisfile in content
                ]
                return raw_data
            else:
                raw_data = soup.find_all('report_data')
                return raw_data
Example #15
0
 def time_from_soup(self, soup):
     """
     Returns a UTC timestamp if one is found in the soup,
     or None if an error was encountered.
     """
     ts_elt = soup.find(class_='ts')
     if not ts_elt:
         LOGGER.error('PJM: Timestamp not found in soup:\n%s' % soup)
         return None
     return self.utcify(ts_elt.string)
Example #16
0
    def fetch_oasis(self, payload={}, return_all_files=False):
        """
        Returns a list of report data elements, or an empty list if an error was encountered.

        If return_all_files=False, returns only the content from the first file in the .zip -
        this is the default behavior and was used in earlier versions of this function.

        If return_all_files=True, will return an array representing the content from each file.
        This is useful for processing LMP data or other fields where multiple price components are returned in a zip.
        """
        # set up storage
        raw_data = []

        if return_all_files is True:
            default_return_val = []
        else:
            default_return_val = ''

        # try get
        response = self.request(self.base_url_oasis, params=payload)
        if not response:
            return default_return_val

        # read data from zip
        # This will be an array of content if successful, and None if unsuccessful
        content = self.unzip(response.content)
        if not content:
            return default_return_val

        # check xml content for errors
        soup = BeautifulSoup(content[0], 'lxml')
        error = soup.find('m:error')
        if error:
            code = error.find('m:err_code')
            desc = error.find('m:err_desc')
            msg = 'XML error for CAISO OASIS with payload %s: %s %s' % (payload, code, desc)
            LOGGER.error(msg)
            return default_return_val

        # return xml or csv data
        if payload.get('resultformat', False) == 6:
            # If we requested CSV files
            if return_all_files:
                return content
            else:
                return content[0]
        else:
            # Return XML content
            if return_all_files:
                raw_data = [BeautifulSoup(thisfile).find_all('report_data') for thisfile in content]
                return raw_data
            else:
                raw_data = soup.find_all('report_data')
                return raw_data
Example #17
0
    def request(self, url, mode='get', retry_sec=5, **kwargs):
        """
        Get or post to a URL with the provided kwargs.
        Returns the response, or None if an error was encountered.
        If the mode is not 'get' or 'post', raises ValueError.
        """
        # check args
        allowed_modes = ['get', 'post']
        if mode not in allowed_modes:
            raise ValueError('Invalid request mode %s' % mode)

        # check for session
        try:
            session = getattr(self, 'session')
        except AttributeError:
            self.session = requests.Session()
            session = self.session

        # carry out request
        try:
            response = getattr(session, mode)(url, verify=False,
                                              timeout=self.TIMEOUT_SECONDS,
                                              **kwargs)
        # except requests.exceptions.ChunkedEncodingError as e:
        #     # JSON incomplete or not found
        #     msg = '%s: chunked encoding error for %s, %s:\n%s' % (self.NAME, url, kwargs, e)
        #     LOGGER.error(msg)
        #     return None
        except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e:
            # eg max retries exceeded
            msg = '%s: connection error for %s, %s:\n%s' % (self.NAME, url, kwargs, e)
            LOGGER.error(msg)
            return None
        # except requests.exceptions.RequestException:
        #     msg = '%s: request exception for %s, %s:\n%s' % (self.NAME, url, kwargs, e)
        #     LOGGER.error(msg)
        #     return None

        if response.status_code == 200:
            # success
            LOGGER.debug('%s: request success for %s, %s with cache hit %s' % (self.NAME, url, kwargs, getattr(response, 'from_cache', None)))

        elif response.status_code == 429:
            # retry on throttle
            LOGGER.warn('%s: retrying in %d seconds, throttled for %s, %s' % (self.NAME, retry_sec, url, kwargs))
            sleep(retry_sec)
            return self.request(url, mode=mode, retry_sec=retry_sec, **kwargs)

        else:
            # non-throttle error
            LOGGER.error('%s: request failure with code %s for %s, %s' % (self.NAME, response.status_code, url, kwargs))

        return response
Example #18
0
    def parse_oasis_renewable(self, raw_data):
        """Parse raw data output of fetch_oasis for renewables."""
        # set up storage
        preparsed_data = {}
        parsed_data = []

        # extract values from xml

        for raw_soup_dp in raw_data:
            # set up storage for timestamp
            ts = self.utcify(
                raw_soup_dp.find(['INTERVAL_START_GMT',
                                  'interval_start_gmt']).string)
            if ts not in preparsed_data:
                preparsed_data[ts] = {'wind': 0, 'solar': 0}

            # store generation value
            try:
                fuel_name = raw_soup_dp.find(
                    ['RENEWABLE_TYPE', 'renewable_type']).string.lower()
                gen_MW = float(raw_soup_dp.find(['VALUE', 'value']).string)
                preparsed_data[ts][fuel_name] += gen_MW
            except TypeError:
                LOGGER.error('Error in schema for CAISO OASIS result %s' %
                             raw_soup_dp.prettify())
                continue

        # collect values into dps
        freq = self.options.get('freq', self.FREQUENCY_CHOICES.hourly)
        market = self.options.get('market', self.MARKET_CHOICES.hourly)

        for ts, preparsed_dp in preparsed_data.items():
            # set up base
            base_parsed_dp = {
                'timestamp': ts,
                'freq': freq,
                'market': market,
                'gen_MW': 0,
                'ba_name': self.NAME
            }

            # collect data
            for fuel_name in ['wind', 'solar']:
                parsed_dp = copy.deepcopy(base_parsed_dp)
                parsed_dp['fuel_name'] = fuel_name
                parsed_dp['gen_MW'] += preparsed_dp[fuel_name]
                parsed_data.append(parsed_dp)

        # return
        return parsed_data
 def _assert_entries_1hr_apart(self, result_ts):
     prev_entry = None
     for entry in result_ts:
         if prev_entry:
             seconds_delta = (entry['timestamp'] - prev_entry['timestamp']).total_seconds()
             if seconds_delta > 3600:
                 LOGGER.error('prev_entry timestamp: ' + str(
                     prev_entry['timestamp'].astimezone(pytz.timezone(self.nbpower_client.TZ_NAME))
                 ))
                 LOGGER.error('entry timestamp: ' + str(
                     entry['timestamp'].astimezone(pytz.timezone(self.nbpower_client.TZ_NAME))
                 ))
             self.assertEqual(3600, seconds_delta)
         prev_entry = entry
Example #20
0
    def handle_ba_limitations(self):
        """Handle BA limitations"""
        today = pytz.utc.localize(datetime.utcnow()).astimezone(pytz.timezone(self.TZ_NAME))
        two_days_ago = today - timedelta(days=2)
        load_not_supported_bas = ['DEAA', 'EEI', 'GRIF', 'GRMA', 'GWA',
                                  'HGMA', 'SEPA', 'WWA', 'YAD']
        delay_bas = ['AEC', 'DOPD', 'GVL', 'HST', 'NSB', 'PGE', 'SCL',
                     'TAL', 'TIDC', 'TPWR']
        canada_mexico = ['IESO', 'BCTC', 'MHEB', 'AESO', 'HQT', 'NBSO',
                         'CFE', 'SPC']
        if self.BA in delay_bas:
            if self.options['end_at'] and self.options['end_at'] > two_days_ago:
                LOGGER.error('No data for %s due to 2 day delay' % self.BA)
                raise ValueError('No data: 2 day delay for this BA.')
            elif self.options['yesterday']:
                LOGGER.error('No data for %s due to 2 day delay' % self.BA)
                raise ValueError('No data: 2 day delay for this BA.')
            elif self.options['forecast']:
                raise ValueError('No data: 2 day delay for this BA.')

        if self.BA in load_not_supported_bas:
            if self.options['data'] == 'load':
                LOGGER.error('Load data not supported for %s' % self.BA)
                raise ValueError('Load data not supported for this BA.')
        if self.BA in canada_mexico:
            LOGGER.error('Data not supported for %s' % self.BA)
            raise ValueError('Data not currently supported for Canada and Mexico')
Example #21
0
    def handle_ba_limitations(self):
        """Handle BA limitations"""
        today = pytz.utc.localize(datetime.utcnow()).astimezone(
            pytz.timezone(self.TZ_NAME))
        two_days_ago = today - timedelta(days=2)
        load_not_supported_bas = [
            'DEAA', 'EEI', 'GRIF', 'GRMA', 'GWA', 'HGMA', 'SEPA', 'WWA', 'YAD'
        ]
        delay_bas = [
            'AEC', 'DOPD', 'GVL', 'HST', 'NSB', 'PGE', 'SCL', 'TAL', 'TIDC',
            'TPWR'
        ]
        canada_mexico = [
            'IESO', 'BCTC', 'MHEB', 'AESO', 'HQT', 'NBSO', 'CFE', 'SPC'
        ]
        if self.BA in delay_bas:
            if self.options['end_at'] and self.options['end_at'] > two_days_ago:
                LOGGER.error('No data for %s due to 2 day delay' % self.BA)
                raise ValueError('No data: 2 day delay for this BA.')
            elif self.options['yesterday']:
                LOGGER.error('No data for %s due to 2 day delay' % self.BA)
                raise ValueError('No data: 2 day delay for this BA.')
            elif self.options['forecast']:
                raise ValueError('No data: 2 day delay for this BA.')

        if self.BA in load_not_supported_bas:
            if self.options['data'] == 'load':
                LOGGER.error('Load data not supported for %s' % self.BA)
                raise ValueError('Load data not supported for this BA.')
        if self.BA in canada_mexico:
            LOGGER.error('Data not supported for %s' % self.BA)
            raise ValueError(
                'Data not currently supported for Canada and Mexico')
Example #22
0
 def _assert_entires_5min_apart(self, result_ts):
     prev_entry = None
     for entry in result_ts:
         if prev_entry:
             seconds_delta = (entry['timestamp'] -
                              prev_entry['timestamp']).total_seconds()
             if seconds_delta > 300:
                 LOGGER.error('prev_entry timestamp: ' +
                              str(prev_entry['timestamp'].astimezone(
                                  pytz.timezone(self.ieso_client.TZ_NAME))))
                 LOGGER.error('entry timestamp: ' +
                              str(entry['timestamp'].astimezone(
                                  pytz.timezone(self.ieso_client.TZ_NAME))))
             self.assertEqual(300, seconds_delta)
         prev_entry = entry
Example #23
0
    def get_latest_fuel_mix(self):
        # set up request
        url = self.base_url + '/ria/FuelMix.aspx?CSV=True'

        # carry out request
        response = self.request(url)
        if not response:
            return None

        # test for valid content
        if 'The page cannot be displayed' in response.text:
            LOGGER.error('MISO: Error in source data for generation')
            return None

        # return good
        return response.content
Example #24
0
    def get_latest_fuel_mix(self):
        # set up request
        url = self.base_url + '?messageType=getfuelmix&returnType=csv'

        # carry out request
        response = self.request(url)
        if not response:
            return None

        # test for valid content
        if 'The page cannot be displayed' in response.text:
            LOGGER.error('MISO: Error in source data for generation')
            return None

        # return good
        return response.content
Example #25
0
    def parse_ace_data(self, content):
        if not content:
            return pd.DataFrame()

        # preliminary parsing
        df = pd.DataFrame(content, columns=['instantEST', 'value'])
        df['instantEST'] = pd.to_datetime(df['instantEST'])
        df.set_index('instantEST', inplace=True)
        # set index
        try:
            df.index = self.utcify_index(df.index)
        except AttributeError:
            LOGGER.error('MISO: Error in source data for ACE %s' % content)
            return pd.DataFrame()
        df.index.set_names(['timestamp'], inplace=True)
        return df
Example #26
0
    def get_latest_fuel_mix(self):
        # set up request
        url = self.base_url + '/ria/FuelMix.aspx?CSV=True'

        # carry out request
        response = self.request(url)
        if not response:
            return None

        # test for valid content
        if 'The page cannot be displayed' in response.text:
            LOGGER.error('MISO: Error in source data for generation')
            return None

        # return good
        return response.content
Example #27
0
 def _format_start_end(self, data):
     formatted_sliced = []
     if 'gen' not in self.options['data']:
         formatted_sliced = [i for i in data if i['timestamp'] >= self.options['start_at'] and i['timestamp'] <= self.options['end_at']]
     else:
         try:
             yesterday = (self.local_now() - timedelta(days=2)).replace(hour=0, minute=0,
                                                                        second=0, microsecond=0)
             tomorrow = (self.local_now() + timedelta(days=1)).replace(hour=23, minute=0,
                                                                       second=0, microsecond=0)
             assert ((self.options['start_at'] >= yesterday) and (self.options['end_at'] <= tomorrow))
             formatted_sliced = [i for i in data if i['timestamp'] >= self.options['start_at'] and i['timestamp'] <= self.options['end_at']]
         except:
             LOGGER.error('Generation data error for %s' % self.BA)
             raise ValueError('Generation data is available for the \
                              previous and current day.', self.options)
     return formatted_sliced
Example #28
0
    def val_from_soup(self, soup, key):
        """
        Returns a float value if one is found in the soup for the provided key,
        or None if an error was encountered.
        """
        for elt in soup.find_all('td'):
            try:
                if elt.find('a').string == key:
                    # numbers may have commas in the thousands
                    val_str = elt.next_sibling.string.replace(',', '')
                    return float(val_str)
            except AttributeError:  # no 'a' child
                continue

        # no value found
        LOGGER.error('PJM: Value for %s not found in soup:\n%s' % (key, soup))
        return None
Example #29
0
    def get_load(self, latest=False, yesterday=False, start_at=False,
                 end_at=False, forecast=False, **kwargs):
        """
        Scrape and parse load data.
        """

        self.handle_options(data='load', latest=latest, start_at=start_at,
                            end_at=end_at, **kwargs)
        self.handle_ba_limitations()
        self.format_url()
        result = self.request(self.url)
        if result is not None:
            result_json = json.loads(result.text)
            result_formatted = self.format_result(result_json)
            return result_formatted
        else:
            LOGGER.error('No results for %s' % self.BA)
            return []
Example #30
0
    def get_latest_ace(self):
        # set up request
        url = self.base_url + '?messageType=getACE&returnType=json'

        # carry out request
        response = self.request(url)
        if not response:
            return None

        # test for valid content
        if 'The page cannot be displayed' in response.text:
            LOGGER.error('MISO: Error in source data for ACE')
            return None
        ace = json.loads(response.content)

        # return good
        data = self.parse_ace_data(ace['ACE'])
        return self.serialize_faster(data)
Example #31
0
    def get_generation(self, latest=False, yesterday=False,
                       start_at=False, end_at=False, **kwargs):
        """
        Scrape and parse generation fuel mix data.
        Note: Generation may be quite low for HST and NSB BAs.
        """

        self.handle_options(data='gen', latest=latest, yesterday=yesterday,
                            start_at=start_at, end_at=end_at, **kwargs)
        self.handle_ba_limitations()
        self.format_url()
        result = self.request(self.url)
        if result is not None:
            result_json = json.loads(result.text)
            result_formatted = self.format_result(result_json)
            return result_formatted
        else:
            LOGGER.error('No results for %s' % self.BA)
            return []
Example #32
0
    def get_generation(self, latest=False, **kwargs):
        # set args
        self.handle_options(data='gen', **kwargs)

        # get data
        load_ts, load_val = self.fetch_edata_point('instLoad', 'PJM RTO Total')
        imports_ts, imports_val = self.fetch_edata_point('tieFlow', 'PJM RTO')
        wind_ts, wind_gen = self.fetch_edata_point('wind', 'RTO Wind Power')

        # compute nonwind gen
        try:
            total_gen = load_val - imports_val
            nonwind_gen = total_gen - wind_gen
        except TypeError:  # value was None
            LOGGER.error('PJM: No timestamps found for options %s' % str(self.options))
            return []

        # choose best time to use
        if load_ts:
            ts = load_ts
        elif imports_ts:
            ts = imports_ts
        elif wind_ts:
            ts = wind_ts
        else:
            LOGGER.error('PJM: No timestamps found for options %s' % str(self.options))
            return []

        # set up storage
        parsed_data = []
        base_dp = {'timestamp': ts,
                   'freq': self.FREQUENCY_CHOICES.fivemin, 'market': self.MARKET_CHOICES.fivemin,
                   'gen_MW': 0, 'ba_name': self.NAME}

        # collect data
        for gen_MW, fuel_name in [(wind_gen, 'wind'), (nonwind_gen, 'nonwind')]:
            parsed_dp = copy.deepcopy(base_dp)
            parsed_dp['fuel_name'] = fuel_name
            parsed_dp['gen_MW'] = gen_MW
            parsed_data.append(parsed_dp)

        # return
        return parsed_data
Example #33
0
    def parse_oasis_renewable(self, raw_data):
        """Parse raw data output of fetch_oasis for renewables."""
        # set up storage
        preparsed_data = {}
        parsed_data = []

        # extract values from xml

        for raw_soup_dp in raw_data:
            # set up storage for timestamp
            ts = self.utcify(raw_soup_dp.find('interval_start_gmt').string)
            if ts not in preparsed_data:
                preparsed_data[ts] = {'wind': 0, 'solar': 0}

            # store generation value
            try:
                fuel_name = raw_soup_dp.find('renewable_type').string.lower()
                gen_MW = float(raw_soup_dp.find('value').string)
                preparsed_data[ts][fuel_name] += gen_MW
            except TypeError:
                LOGGER.error('Error in schema for CAISO OASIS result %s' % raw_soup_dp.prettify())
                continue

        # collect values into dps
        freq = self.options.get('freq', self.FREQUENCY_CHOICES.hourly)
        market = self.options.get('market', self.MARKET_CHOICES.hourly)

        for ts, preparsed_dp in preparsed_data.items():
            # set up base
            base_parsed_dp = {'timestamp': ts,
                              'freq': freq,
                              'market': market,
                              'gen_MW': 0, 'ba_name': self.NAME}

            # collect data
            for fuel_name in ['wind', 'solar']:
                parsed_dp = copy.deepcopy(base_parsed_dp)
                parsed_dp['fuel_name'] = fuel_name
                parsed_dp['gen_MW'] += preparsed_dp[fuel_name]
                parsed_data.append(parsed_dp)

        # return
        return parsed_data
Example #34
0
 def format_url(self):
     """Set EIA API URL based on options"""
     if self.options['data'] == 'gen':
         if self.options['forecast']:
             LOGGER.error('Forecast not supported for generation.')
             raise ValueError('Forecast not supported for generation.')
         else:
             self.set_url('series', '-ALL.NG.H')
     elif self.options['data'] == 'load':
         if self.options['forecast']:
             self.set_url('series', '-ALL.DF.H')
         else:
             self.set_url('series', '-ALL.D.H')
     elif self.options['data'] == 'trade':
         if self.options['forecast']:
             LOGGER.error('Forecast not supported for generation.')
             raise ValueError('Forecast not supported for trade.')
         elif self.options['end_at']:
             if self.options['end_at'] > pytz.utc.localize(
                     datetime.utcnow()):
                 LOGGER.error('Forecast not supported for generation.')
                 raise ValueError('Forecast not supported for trade.')
             else:
                 self.set_url('series', '-ALL.TI.H')
         else:
             self.set_url('series', '-ALL.TI.H')
Example #35
0
 def format_url(self):
     """Set EIA API URL based on options"""
     if self.options['data'] == 'gen':
         if self.options['forecast']:
             LOGGER.error('Forecast not supported for generation.')
             raise ValueError('Forecast not supported for generation.')
         else:
             self.set_url('series', '-ALL.NG.H')
     elif self.options['data'] == 'load':
         if self.options['forecast']:
                 self.set_url('series', '-ALL.DF.H')
         else:
             self.set_url('series', '-ALL.D.H')
     elif self.options['data'] == 'trade':
         if self.options['forecast']:
             LOGGER.error('Forecast not supported for generation.')
             raise ValueError('Forecast not supported for trade.')
         elif self.options['end_at']:
             if self.options['end_at'] > pytz.utc.localize(datetime.utcnow()):
                 LOGGER.error('Forecast not supported for generation.')
                 raise ValueError('Forecast not supported for trade.')
             else:
                 self.set_url('series', '-ALL.TI.H')
         else:
             self.set_url('series', '-ALL.TI.H')
Example #36
0
    def format_result(self, data):
        """Output EIA API results in pyiso format"""
        try:
            assert('series' in data)
        except:
            LOGGER.error('Unable to format result for %s' % data['request'])
            raise ValueError('Query error for %s:' % data['request'])
        market = self._set_market()
        data_type = self._set_data_type()
        data_formatted = []
        if self.options['latest']:
            data_formatted = self._format_latest(data, data_type, market)
        elif self.options['yesterday']:
            data_formatted = self._format_yesterday(data, data_type, market)
        else:
            data_formatted = self._format_general(data, data_type, market)

        if self.options['start_at'] and self.options['end_at']:
            data_formatted = self._format_start_end(data_formatted)
        if self.options['data'] == 'gen':
            data_formatted = self.add_gen_data(data_formatted)
        return data_formatted
Example #37
0
    def format_result(self, data):
        """Output EIA API results in pyiso format"""
        try:
            assert ('series' in data)
        except:
            LOGGER.error('Unable to format result for %s' % data['request'])
            raise ValueError('Query error for %s:' % data['request'])
        market = self._set_market()
        data_type = self._set_data_type()
        data_formatted = []
        if self.options['latest']:
            data_formatted = self._format_latest(data, data_type, market)
        elif self.options['yesterday']:
            data_formatted = self._format_yesterday(data, data_type, market)
        else:
            data_formatted = self._format_general(data, data_type, market)

        if self.options['start_at'] and self.options['end_at']:
            data_formatted = self._format_start_end(data_formatted)
        if self.options['data'] == 'gen':
            data_formatted = self.add_gen_data(data_formatted)
        return data_formatted
Example #38
0
    def parse_oasis_renewable(self, raw_data):
        """Parse raw data output of fetch_oasis for renewables."""
        # set up storage
        preparsed_data = {}
        parsed_data = []

        # extract values from xml
        for raw_soup_dp in raw_data:
            # set up storage for timestamp
            ts = self.utcify(raw_soup_dp.find("interval_start_gmt").string)
            if ts not in preparsed_data:
                preparsed_data[ts] = {"wind": 0, "solar": 0}

            # store generation value
            try:
                fuel_name = raw_soup_dp.find("renewable_type").string.lower()
                gen_MW = float(raw_soup_dp.find("value").string)
                preparsed_data[ts][fuel_name] += gen_MW
            except TypeError:
                LOGGER.error("Error in schema for CAISO OASIS result %s" % raw_soup_dp.prettify())
                continue

        # collect values into dps
        freq = self.options.get("freq", self.FREQUENCY_CHOICES.hourly)
        market = self.options.get("market", self.MARKET_CHOICES.hourly)

        for ts, preparsed_dp in preparsed_data.items():
            # set up base
            base_parsed_dp = {"timestamp": ts, "freq": freq, "market": market, "gen_MW": 0, "ba_name": self.NAME}

            # collect data
            for fuel_name in ["wind", "solar"]:
                parsed_dp = copy.deepcopy(base_parsed_dp)
                parsed_dp["fuel_name"] = fuel_name
                parsed_dp["gen_MW"] += preparsed_dp[fuel_name]
                parsed_data.append(parsed_dp)

        # return
        return parsed_data
Example #39
0
    def parse_latest_fuel_mix(self, content):
        # handle bad input
        if not content:
            return pd.DataFrame()

        # preliminary parsing
        df = pd.read_csv(BytesIO(content), header=0, index_col=0, skiprows=2, parse_dates=True)

        # set index
        try:
            df.index = self.utcify_index(df.index)
        except AttributeError:
            LOGGER.error('MISO: Error in source data for generation %s' % content)
            return pd.DataFrame()
        df.index.set_names(['timestamp'], inplace=True)

        # set names and labels
        df['fuel_name'] = df.apply(lambda x: self.fuels[x['CATEGORY']], axis=1)
        df['gen_MW'] = df['ACT']

        # return
        return df[['fuel_name', 'gen_MW']]
Example #40
0
    def time_as_of(self, content):
        """
        Returns a UTC timestamp if one is found in the html content,
        or None if an error was encountered.
        """
        # soup it up
        soup = BeautifulSoup(content, 'lxml')

        # like 12.11.2015 17:15
        ts_elt = soup.find(id='ctl00_ContentPlaceHolder1_DateAndTime')
        if not ts_elt:
            LOGGER.error('PJM: Timestamp not found in soup:\n%s' % soup)
            return None
        ts_str = ts_elt.string

        # EDT or EST
        tz_elt = ts_elt.next_sibling
        tz_str = tz_elt.string.strip()
        is_dst = tz_str == 'EDT'

        # utcify and return
        return self.utcify(ts_str, is_dst=is_dst)
Example #41
0
    def time_as_of(self, content):
        """
        Returns a UTC timestamp if one is found in the html content,
        or None if an error was encountered.
        """
        # soup it up
        soup = BeautifulSoup(content, 'lxml')

        # like 12.11.2015 17:15
        ts_elt = soup.find(id='ctl00_ContentPlaceHolder1_DateAndTime')
        if not ts_elt:
            LOGGER.error('PJM: Timestamp not found in soup:\n%s' % soup)
            return None
        ts_str = ts_elt.string

        # EDT or EST
        tz_elt = ts_elt.next_sibling
        tz_str = tz_elt.string.strip()
        is_dst = tz_str == 'EDT'

        # utcify and return
        return self.utcify(ts_str, is_dst=is_dst)
Example #42
0
 def set_ba(self, bal_auth):
     if bal_auth in self.EIA_BAs:
         self.BA = bal_auth
     else:
         LOGGER.error('Unknown BA: %s' % bal_auth)
         raise ValueError('Unknown BA: %s' % bal_auth)
Example #43
0
    def request(self,
                url,
                mode='get',
                retry_sec=5,
                retries_remaining=5,
                **kwargs):
        """
        Get or post to a URL with the provided kwargs.
        Returns the response, or None if an error was encountered.
        If the mode is not 'get' or 'post', raises ValueError.
        """
        # check args
        allowed_modes = ['get', 'post']
        if mode not in allowed_modes:
            raise ValueError('Invalid request mode %s' % mode)

        # check for session
        try:
            session = getattr(self, 'session')
        except AttributeError:
            self.session = requests.Session()
            session = self.session

        # carry out request
        try:
            response = getattr(session, mode)(url,
                                              verify=False,
                                              timeout=self.timeout_seconds,
                                              **kwargs)
        # except requests.exceptions.ChunkedEncodingError as e:
        #     # JSON incomplete or not found
        #     msg = '%s: chunked encoding error for %s, %s:\n%s' % (self.NAME, url, kwargs, e)
        #     LOGGER.error(msg)
        #     return None
        except (requests.exceptions.ConnectionError,
                requests.exceptions.Timeout) as e:
            # eg max retries exceeded
            msg = '%s: connection error for %s, %s:\n%s' % (self.NAME, url,
                                                            kwargs, e)
            LOGGER.error(msg)
            return None
        # except requests.exceptions.RequestException:
        #     msg = '%s: request exception for %s, %s:\n%s' % (self.NAME, url, kwargs, e)
        #     LOGGER.error(msg)
        #     return None

        if response.status_code == 200:
            # success
            LOGGER.debug('%s: request success for %s, %s with cache hit %s' %
                         (self.NAME, url, kwargs,
                          getattr(response, 'from_cache', None)))

        elif response.status_code == 429:
            if retries_remaining > 0:
                # retry on throttle
                LOGGER.warn(
                    '%s: retrying in %d seconds (%d retries remaining), throttled for %s, %s'
                    % (self.NAME, retry_sec, retries_remaining, url, kwargs))
                sleep(retry_sec)
                retries_remaining -= 1
                return self.request(url,
                                    mode=mode,
                                    retry_sec=retry_sec * 2,
                                    retries_remaining=retries_remaining,
                                    **kwargs)
            else:
                # exhausted retries
                LOGGER.warn('%s: exhausted retries for %s, %s' %
                            (self.NAME, url, kwargs))
                return None

        else:
            # non-throttle error
            LOGGER.error('%s: request failure with code %s for %s, %s' %
                         (self.NAME, response.status_code, url, kwargs))

        return response
Example #44
0
    def _generation_historical(self):
        # set up storage
        parsed_data = []

        # collect data
        request_date = self.options['start_at'].astimezone(self.ca_tz).date()
        local_end_at = self.options['end_at'].astimezone(self.ca_tz).date()
        while request_date <= local_end_at:
            # set up request
            url_file = request_date.strftime('%Y%m%d_DailyRenewablesWatch.txt')
            url = self.base_url_gen + url_file

            # carry out request
            response = self.request(url)
            if not response:
                request_date += timedelta(days=1)
                continue

            dst_error_text = 'The supplied DateTime represents an invalid time.  For example, when the clock is ' \
                             'adjusted forward, any time in the period that is skipped is invalid.'
            header_idx = 1
            for part in [1, 2]:  # process both halves of page (i.e. two parts)
                num_data_rows = 24

                # The day transitioning to daylight saving time adds extra erroneous lines of text.
                if part == 1 and dst_error_text in response.text:
                    num_data_rows = 29

                df = self.parse_to_df(response.text, nrows=num_data_rows, header=header_idx, delimiter='\t+')

                # The day transitioning to daylight saving time has errors in part two of the file that need removal.
                if part == 2:
                    df = df[df.THERMAL.map(str) != '#VALUE!']

                # combine date with hours to index
                try:
                    indexed = self.set_dt_index(df, request_date, df['Hour'])
                except Exception as e:
                    LOGGER.error(e)
                    continue

                # original header is fuel names
                indexed.rename(columns=self.fuels, inplace=True)

                # remove non-fuel cols
                fuel_cols = list(set(self.fuels.values()) & set(indexed.columns))
                subsetted = indexed[fuel_cols]

                # pivot
                pivoted = self.unpivot(subsetted)
                pivoted.rename(columns={'level_1': 'fuel_name', 0: 'gen_MW'}, inplace=True)

                # slice times
                sliced = self.slice_times(pivoted)

                # store
                parsed_data += self.serialize(sliced,
                                      header=['timestamp', 'fuel_name', 'gen_MW'],
                                      extras={'ba_name': self.NAME,
                                              'market': self.MARKET_CHOICES.hourly,
                                              'freq': self.FREQUENCY_CHOICES.hourly})

                # If processing the first part, set the header index for second part.
                if part == 1:
                    header_idx = num_data_rows + 3

            # finish day
            request_date += timedelta(days=1)

        # return
        return parsed_data
Example #45
0
 def set_ba(self, bal_auth):
     if bal_auth in self.EIA_BAs:
         self.BA = bal_auth
     else:
         LOGGER.error('Unknown BA: %s' % bal_auth)
         raise ValueError('Unknown BA: %s' % bal_auth)
Example #46
0
    def _generation_historical(self):
        # set up storage
        parsed_data = []

        # collect data
        request_date = self.options['start_at'].astimezone(self.ca_tz).date()
        local_end_at = self.options['end_at'].astimezone(self.ca_tz).date()
        while request_date <= local_end_at:
            # set up request
            url_file = request_date.strftime('%Y%m%d_DailyRenewablesWatch.txt')
            url = self.base_url_gen + url_file

            # carry out request
            response = self.request(url)
            if not response:
                request_date += timedelta(days=1)
                continue

            dst_error_text = 'The supplied DateTime represents an invalid time.  For example, when the clock is ' \
                             'adjusted forward, any time in the period that is skipped is invalid.'
            header_idx = 1
            for part in [1, 2]:  # process both halves of page (i.e. two parts)
                num_data_rows = 24

                # The day transitioning to daylight saving time adds extra erroneous lines of text.
                if part == 1 and dst_error_text in response.text:
                    num_data_rows = 29

                df = self.parse_to_df(response.text,
                                      nrows=num_data_rows,
                                      header=header_idx,
                                      delimiter='\t+')

                # The day transitioning to daylight saving time has errors in part two of the file that need removal.
                if part == 2:
                    df = df[df.THERMAL.map(str) != '#VALUE!']

                # combine date with hours to index
                try:
                    indexed = self.set_dt_index(df, request_date, df['Hour'])
                except Exception as e:
                    LOGGER.error(e)
                    continue

                # original header is fuel names
                indexed.rename(columns=self.fuels, inplace=True)

                # remove non-fuel cols
                fuel_cols = list(
                    set(self.fuels.values()) & set(indexed.columns))
                subsetted = indexed[fuel_cols]

                # pivot
                pivoted = self.unpivot(subsetted)
                pivoted.rename(columns={
                    'level_1': 'fuel_name',
                    0: 'gen_MW'
                },
                               inplace=True)

                # slice times
                sliced = self.slice_times(pivoted)

                # store
                parsed_data += self.serialize(
                    sliced,
                    header=['timestamp', 'fuel_name', 'gen_MW'],
                    extras={
                        'ba_name': self.NAME,
                        'market': self.MARKET_CHOICES.hourly,
                        'freq': self.FREQUENCY_CHOICES.hourly
                    })

                # If processing the first part, set the header index for second part.
                if part == 1:
                    header_idx = num_data_rows + 3

            # finish day
            request_date += timedelta(days=1)

        # return
        return parsed_data