예제 #1
0
 def transform(self, **kwargs):
     """
     Wrapper function to perform all transformation steps for meters.
     """
     super().transform()
     logger.info("Transforming the data to create workable pd DataFrame")
     self._rename_columns()
     self._expand_dates()
예제 #2
0
 def load(self):
     if "FIRESTORE_EMULATOR_HOST" in os.environ:
         logger.info("Loading into emulated firestore")
         super().load()
     else:
         logger.warning(
             "Attempting to load with a local ETL process but no emulator is configured. Loading aborted."
         )
 def load(self, **kwargs):
     """
     Wrapper function to perform all loading steps for updated project info.
     """
     logger.info("Loading the updated project info table to the firestore")
     record = self.extracted_data.record_project_info
     record["record"] = self.transformed_data.record
     firestore.Client().collection("ProjectInfo").document(
         f"{self.client_name}_project_dates").set(record)
예제 #4
0
 def transform(self):
     logger.info("Transforming the data following the Capacity protocol")
     logger.info(
         "Transforming by using the extracted data directly. There was no previous tranformed data"
     )
     self.transformed_data = copy.deepcopy(self.extracted_data)
     self._fix_dates()
     self.fill_projectspecific_phase_config()
     self.transform_bis_etl()
     self.add_bis_etl_to_transformed_data()
     self._combine_and_reformat_data_for_capacity_analysis()
 def extract(self):
     logger.info("Extracting the aansluitingen")
     self._extract_from_sql()
     logger.info("Extracting mappings of bnumber vs projectname")
     self.extracted_data.map_bnumber_vs_projectname_sql = (
         get_map_bnumber_vs_project_from_sql())
     self.extracted_data.map_bnumber_vs_projectname_fc = (
         self._get_map_bnumber_vs_project_from_fc())
     logger.info("Extracting Project info from firestore as record")
     self.extracted_data.record_project_info = self.get_project_info_record(
         self.client_name)
     logger.info("Extracting Project info from excel")
     self.extracted_data.excel_project_info = self._get_project_info_excel()
 def transform(self, **kwargs):
     """
     Wrapper function to perform all transformation steps for meters.
     """
     logger.info("Making complete mapping of bnumber vs projectname")
     map_bnumber_vs_projectname = self._make_map_bnumber_vs_projectname()
     logger.info("Making project info table based on firestore record")
     df = self._make_table_from_record_project_info()
     logger.info("Making project info table based on excel")
     df_newinfo = self._make_table_from_excel_project_info().drop(
         columns=["doorlooptijd", "project"])
     logger.info(
         "Update project info table from record based information in excel")
     df_updated = self.update_project_info(df, df_newinfo,
                                           map_bnumber_vs_projectname)
     self.transformed_data.record = df_updated.to_dict(orient="dict")
예제 #7
0
    def _expand_dates(self):
        """
        Expands dataframe with to be a timeseries of the complete daterange between first and last date,
        filling missing dates with zeroes.
        """
        logger.info("Expanding dates to create date-based index")

        def transform_weeknumbers(x):
            """
            Transforms input date into a datetime object
            Args:
                x: input date

            Returns: datetime object with the first date of the input week.

            """
            if x.startswith("2021_"):
                return pd.to_datetime(x + "1", format="%Y_%W%w")
            else:
                return (pd.to_datetime(
                    x + "1", format="%Y_%W%w")) - pd.to_timedelta(7, unit="d")

        self.transformed_data.df["date"] = self.transformed_data.df[
            "date"].apply(transform_weeknumbers)
        self.transformed_data.df = self.transformed_data.df.set_index(
            ["project", "date"])
        self.transformed_data.df = self.transformed_data.df[
            ~self.transformed_data.df.duplicated()]

        df_date = pd.date_range(
            start=self.transformed_data.df.index.get_level_values(1).min(),
            end=(self.transformed_data.df.index.get_level_values(1).max() +
                 pd.to_timedelta(6, unit="d")),
            freq="D",
        )
        self.transformed_data.df = self.transformed_data.df.reindex(
            df_date, fill_value=None, level=1)