def transform(self, **kwargs): """ Wrapper function to perform all transformation steps for meters. """ super().transform() logger.info("Transforming the data to create workable pd DataFrame") self._rename_columns() self._expand_dates()
def load(self): if "FIRESTORE_EMULATOR_HOST" in os.environ: logger.info("Loading into emulated firestore") super().load() else: logger.warning( "Attempting to load with a local ETL process but no emulator is configured. Loading aborted." )
def load(self, **kwargs): """ Wrapper function to perform all loading steps for updated project info. """ logger.info("Loading the updated project info table to the firestore") record = self.extracted_data.record_project_info record["record"] = self.transformed_data.record firestore.Client().collection("ProjectInfo").document( f"{self.client_name}_project_dates").set(record)
def transform(self): logger.info("Transforming the data following the Capacity protocol") logger.info( "Transforming by using the extracted data directly. There was no previous tranformed data" ) self.transformed_data = copy.deepcopy(self.extracted_data) self._fix_dates() self.fill_projectspecific_phase_config() self.transform_bis_etl() self.add_bis_etl_to_transformed_data() self._combine_and_reformat_data_for_capacity_analysis()
def extract(self): logger.info("Extracting the aansluitingen") self._extract_from_sql() logger.info("Extracting mappings of bnumber vs projectname") self.extracted_data.map_bnumber_vs_projectname_sql = ( get_map_bnumber_vs_project_from_sql()) self.extracted_data.map_bnumber_vs_projectname_fc = ( self._get_map_bnumber_vs_project_from_fc()) logger.info("Extracting Project info from firestore as record") self.extracted_data.record_project_info = self.get_project_info_record( self.client_name) logger.info("Extracting Project info from excel") self.extracted_data.excel_project_info = self._get_project_info_excel()
def transform(self, **kwargs): """ Wrapper function to perform all transformation steps for meters. """ logger.info("Making complete mapping of bnumber vs projectname") map_bnumber_vs_projectname = self._make_map_bnumber_vs_projectname() logger.info("Making project info table based on firestore record") df = self._make_table_from_record_project_info() logger.info("Making project info table based on excel") df_newinfo = self._make_table_from_excel_project_info().drop( columns=["doorlooptijd", "project"]) logger.info( "Update project info table from record based information in excel") df_updated = self.update_project_info(df, df_newinfo, map_bnumber_vs_projectname) self.transformed_data.record = df_updated.to_dict(orient="dict")
def _expand_dates(self): """ Expands dataframe with to be a timeseries of the complete daterange between first and last date, filling missing dates with zeroes. """ logger.info("Expanding dates to create date-based index") def transform_weeknumbers(x): """ Transforms input date into a datetime object Args: x: input date Returns: datetime object with the first date of the input week. """ if x.startswith("2021_"): return pd.to_datetime(x + "1", format="%Y_%W%w") else: return (pd.to_datetime( x + "1", format="%Y_%W%w")) - pd.to_timedelta(7, unit="d") self.transformed_data.df["date"] = self.transformed_data.df[ "date"].apply(transform_weeknumbers) self.transformed_data.df = self.transformed_data.df.set_index( ["project", "date"]) self.transformed_data.df = self.transformed_data.df[ ~self.transformed_data.df.duplicated()] df_date = pd.date_range( start=self.transformed_data.df.index.get_level_values(1).min(), end=(self.transformed_data.df.index.get_level_values(1).max() + pd.to_timedelta(6, unit="d")), freq="D", ) self.transformed_data.df = self.transformed_data.df.reindex( df_date, fill_value=None, level=1)