Beispiel #1
0
 def process_raw(self, df, page, **partition):
     """Drops reserved columns."""
     to_drop = [c for c in df.columns if c[:8] == 'reserved']
     df.drop(to_drop, axis=1, inplace=True)
     df = df.rename(
         columns=self._metadata.get_column_map(page, **partition))
     self.cols_added = []
     df = fix_leading_zero_gen_ids(df)
     return df
Beispiel #2
0
 def process_raw(self, df, page, **partition):
     """Adds source column and report_year column if missing."""
     df = df.rename(
         columns=self._metadata.get_column_map(page, **partition))
     if 'report_year' not in df.columns:
         df['report_year'] = datetime.strptime(
             list(partition.values())[0], "%Y-%m").year
     df = df.assign(data_source='eia860m')
     self.cols_added = ['data_source', 'report_year']
     df = fix_leading_zero_gen_ids(df)
     return df
Beispiel #3
0
 def process_raw(self, df, page, **partition):
     """Rename columns with location."""
     warnings.warn(
         "Integration of EIA 861 into PUDL is still experimental and incomplete.\n"
         "The data has not yet been validated, and the structure may change."
     )
     column_map_numeric = self._metadata.get_column_map(page, **partition)
     df = df.rename(columns=dict(
         zip(df.columns[list(column_map_numeric.keys())],
             list(column_map_numeric.values()))))
     self.cols_added = []
     df = fix_leading_zero_gen_ids(df)
     return df
Beispiel #4
0
    def process_raw(self, df, page, **partition):
        """
        Apply necessary pre-processing to the dataframe.

        * Rename columns based on our compiled spreadsheet metadata
        * Add report_year if it is missing
        * Add a flag indicating if record came from EIA 860, or EIA 860M
        * Fix any generator_id values with leading zeroes.

        """
        df = df.rename(
            columns=self._metadata.get_column_map(page, **partition))
        if 'report_year' not in df.columns:
            df['report_year'] = list(partition.values())[0]
        self.cols_added = ['report_year']
        # if this is one of the EIA860M pages, add data_source
        meta_eia860m = excel.Metadata('eia860m')
        pages_eia860m = meta_eia860m.get_all_pages()
        if page in pages_eia860m:
            df = df.assign(data_source='eia860')
            self.cols_added.append('data_source')
        df = fix_leading_zero_gen_ids(df)
        return df
Beispiel #5
0
def test_fix_leading_zero_gen_ids():
    """Test removal of leading zeroes from EIA generator IDs."""
    in_df = pd.DataFrame({
        "generator_id": [
            "0001",  # Leading zeroes, all numeric string.
            "26",  # An appropriate numeric string w/o leading zeroes.
            100,  # Integer, should get stringified.
            100.0,  # What happens if it's a float?
            "01-A",  # Leading zeroes, alphanumeric. Should not change.
            "HRSG-01",  # Alphanumeric, should be no change.
        ]
    })
    expected_df = pd.DataFrame(
        {"generator_id": [
            "1",
            "26",
            "100",
            "100.0",
            "01-A",
            "HRSG-01",
        ]})
    out_df = fix_leading_zero_gen_ids(in_df)
    assert_frame_equal(out_df, expected_df)