def label_holidays2(frame: DataFrame, colname: str = "date") -> DataFrame: """Add a column indicating whether or not the column `colname` is a holiday.""" # A more efficient implementation of `label_holidays`. Major downside is # that the range of years needs to be known a priori. Put them in a config # file or extract the range from the data beforehand. holidays_be = holidays.BE(years=list(range(2015, 2020))) return frame.withColumn("is_belgian_holiday", col(colname)).isin(list(holidays_be.keys()))
def label_holidays3(frame: DataFrame, colname: str = "date") -> DataFrame: """Add a column indicating whether or not the column `colname` is a holiday.""" # Another more efficient implementation of `label_holidays`. Same downsides # as label_holidays2, but scales better. holidays_be = holidays.BE(years=list(range(2015, 2020))) spark = SparkSession.builder.getOrCreate() holidays_frame = spark.createDataFrame( data=[(day, True) for day in holidays_be.keys()], schema=StructType([ StructField(colname, DateType(), False), StructField("is_belgian_holiday", BooleanType(), False), ]), ) return frame.join(holidays_frame, on=colname, how="left").na.fill(False, ["is_belgian_holiday"])
def is_belgian_holiday(date: datetime.date): belgian_holidays = holidays.BE() return date in belgian_holidays
def judge_local_holiday(self, df): country = df['geoNetwork_country'] date = df['visitId'].apply(lambda x: x.date()) judge_holiday = \ np.where(country.isin( ['United States','India','Canada','Germany', 'Japan','France','Mexico','Australia', 'Spain','Netherlands','Italy','Ireland', 'Sweden','Argentina','Colombia','Belgium', 'Switzerland','Czechia','Colombia','Belgium', 'New Zealand','South Africa','South Africa']),\ np.where((country=='United States')& (date.isin(holidays.US())),1, np.where((country=='India')& (date.isin(holidays.India())),1, np.where((country=='Canada')& (date.isin(holidays.CA())),1, np.where((country=='Germany')& (date.isin(holidays.DE())),1,\ np.where((country=='Japan')& (date.isin(holidays.JP())),1, np.where((country=='France')& (date.isin(holidays.FRA())),1, np.where((country=='Mexico')& (date.isin(holidays.MX())),1, np.where((country=='Australia')& (date.isin(holidays.AU())),1,\ np.where((country=='Spain')& (date.isin(holidays.ES())),1, np.where((country=='Netherlands')& (date.isin(holidays.NL())),1, np.where((country=='Italy')& (date.isin(holidays.IT())),1, np.where((country=='Ireland')& (date.isin(holidays.IE())),1,\ np.where((country=='Sweden')& (date.isin(holidays.SE())),1, np.where((country=='Argentina')& (date.isin(holidays.AR())),1, np.where((country=='Colombia')& (date.isin(holidays.CO())),1, np.where((country=='Belgium')& (date.isin(holidays.BE())),1,\ np.where((country=='Switzerland')& (date.isin(holidays.CH())),1, np.where((country=='Czechia')& (date.isin(holidays.CZ())),1, np.where((country=='Denmark')& (date.isin(holidays.DK())),1, np.where((country=='Austria')& (date.isin(holidays.AT())),1,\ np.where((country=='Hungary')& (date.isin(holidays.HU())),1, np.where((country=='Portugal')& (date.isin(holidays.PT())),1, np.where((country=='Norway')& (date.isin(holidays.NO())),1, np.where((country=='Portugal')& (date.isin(holidays.PT())),1,\ np.where((country=='New Zealand')& (date.isin(holidays.NZ())),1, np.where((country=='South Africa')& (date.isin(holidays.ZA())),1, np.where((country=='South Africa')& (date.isin(holidays.ZA())),1,\ 0))))))))))))))))))))))))))),np.nan).astype(int) return judge_holiday
def setUp(self): self.holidays = holidays.BE()
"DE": holidays.DE(), "AT": holidays.AT(), "DK": holidays.DK(), "UK": holidays.UK(), "IE": holidays.IE(), "ES": holidays.ES(), "CZ": holidays.CZ(), "SK": holidays.SK(), "PL": holidays.PL(), "PT": holidays.PT(), "NL": holidays.NL(), "NO": holidays.NO(), "IT": holidays.IT(), "SE": holidays.SE(), "JP": holidays.JP(), "BE": holidays.BE(), "ZA": holidays.ZA(), "SI": holidays.SI(), "FI": holidays.FI(), "CH": holidays.CH() } def get_holiday(): country = get_country_code() if not country in country_holidays.keys(): return None return country_holidays[country].get(datetime.now().date()) def topic_date(config):
return meta[app]['genre'] else: return 'unknown' tqdm.pandas(desc="Adding category", position=0, leave=True) df['category'] = df.application.progress_apply(adding_category_row) return df ################################################## # Weekends, holidays, working hours, time of day # ################################################## # Holidays --> complete with non-standard days be_holidays = holidays.BE() # Schedule morning = (dt.time(8, 30), dt.time(12)) afternoon = (dt.time(13, 30), dt.time(16)) schedule = { 0: [morning, afternoon], 1: [morning, afternoon], 2: [morning], 3: [morning, afternoon], 4: [morning, afternoon] } def add_date_annotation(df: pd.DataFrame,