Esempio n. 1
0
def label_holidays2(frame: DataFrame, colname: str = "date") -> DataFrame:
    """Add a column indicating whether or not the column `colname`
    is a holiday."""

    # A more efficient implementation of `label_holidays`. Major downside is
    # that the range of years needs to be known a priori. Put them in a config
    # file or extract the range from the data beforehand.
    holidays_be = holidays.BE(years=list(range(2015, 2020)))
    return frame.withColumn("is_belgian_holiday",
                            col(colname)).isin(list(holidays_be.keys()))
Esempio n. 2
0
def label_holidays3(frame: DataFrame, colname: str = "date") -> DataFrame:
    """Add a column indicating whether or not the column `colname`
    is a holiday."""

    # Another more efficient implementation of `label_holidays`. Same downsides
    # as label_holidays2, but scales better.
    holidays_be = holidays.BE(years=list(range(2015, 2020)))
    spark = SparkSession.builder.getOrCreate()

    holidays_frame = spark.createDataFrame(
        data=[(day, True) for day in holidays_be.keys()],
        schema=StructType([
            StructField(colname, DateType(), False),
            StructField("is_belgian_holiday", BooleanType(), False),
        ]),
    )

    return frame.join(holidays_frame, on=colname,
                      how="left").na.fill(False, ["is_belgian_holiday"])
Esempio n. 3
0
def is_belgian_holiday(date: datetime.date):
    belgian_holidays = holidays.BE()
    return date in belgian_holidays
Esempio n. 4
0
 def judge_local_holiday(self, df):
     country = df['geoNetwork_country']
     date = df['visitId'].apply(lambda x: x.date())
     judge_holiday = \
         np.where(country.isin(
                 ['United States','India','Canada','Germany',
                  'Japan','France','Mexico','Australia',
                  'Spain','Netherlands','Italy','Ireland',
                  'Sweden','Argentina','Colombia','Belgium',
                  'Switzerland','Czechia','Colombia','Belgium',
                  'New Zealand','South Africa','South Africa']),\
         np.where((country=='United States')&
                  (date.isin(holidays.US())),1,
                  np.where((country=='India')&
                           (date.isin(holidays.India())),1,
                           np.where((country=='Canada')&
                                    (date.isin(holidays.CA())),1,
                                    np.where((country=='Germany')&
                                             (date.isin(holidays.DE())),1,\
         np.where((country=='Japan')&
                  (date.isin(holidays.JP())),1,
                  np.where((country=='France')&
                           (date.isin(holidays.FRA())),1,
                           np.where((country=='Mexico')&
                                    (date.isin(holidays.MX())),1,
                                    np.where((country=='Australia')&
                                             (date.isin(holidays.AU())),1,\
         np.where((country=='Spain')&
                  (date.isin(holidays.ES())),1,
                  np.where((country=='Netherlands')&
                           (date.isin(holidays.NL())),1,
                           np.where((country=='Italy')&
                                    (date.isin(holidays.IT())),1,
                                    np.where((country=='Ireland')&
                                             (date.isin(holidays.IE())),1,\
         np.where((country=='Sweden')&
                  (date.isin(holidays.SE())),1,
                  np.where((country=='Argentina')&
                           (date.isin(holidays.AR())),1,
                           np.where((country=='Colombia')&
                                    (date.isin(holidays.CO())),1,
                                    np.where((country=='Belgium')&
                                             (date.isin(holidays.BE())),1,\
         np.where((country=='Switzerland')&
                  (date.isin(holidays.CH())),1,
                  np.where((country=='Czechia')&
                           (date.isin(holidays.CZ())),1,
                           np.where((country=='Denmark')&
                                    (date.isin(holidays.DK())),1,
                                    np.where((country=='Austria')&
                                             (date.isin(holidays.AT())),1,\
         np.where((country=='Hungary')&
                  (date.isin(holidays.HU())),1,
                  np.where((country=='Portugal')&
                           (date.isin(holidays.PT())),1,
                           np.where((country=='Norway')&
                                    (date.isin(holidays.NO())),1,
                                    np.where((country=='Portugal')&
                                             (date.isin(holidays.PT())),1,\
         np.where((country=='New Zealand')&
                  (date.isin(holidays.NZ())),1,
                  np.where((country=='South Africa')&
                           (date.isin(holidays.ZA())),1,
                           np.where((country=='South Africa')&
                                    (date.isin(holidays.ZA())),1,\
         0))))))))))))))))))))))))))),np.nan).astype(int)
     return judge_holiday
Esempio n. 5
0
 def setUp(self):
     self.holidays = holidays.BE()
Esempio n. 6
0
    "DE": holidays.DE(),
    "AT": holidays.AT(),
    "DK": holidays.DK(),
    "UK": holidays.UK(),
    "IE": holidays.IE(),
    "ES": holidays.ES(),
    "CZ": holidays.CZ(),
    "SK": holidays.SK(),
    "PL": holidays.PL(),
    "PT": holidays.PT(),
    "NL": holidays.NL(),
    "NO": holidays.NO(),
    "IT": holidays.IT(),
    "SE": holidays.SE(),
    "JP": holidays.JP(),
    "BE": holidays.BE(),
    "ZA": holidays.ZA(),
    "SI": holidays.SI(),
    "FI": holidays.FI(),
    "CH": holidays.CH()
}


def get_holiday():
    country = get_country_code()
    if not country in country_holidays.keys():
        return None
    return country_holidays[country].get(datetime.now().date())


def topic_date(config):
Esempio n. 7
0
                return meta[app]['genre']
        else:
            return 'unknown'

    tqdm.pandas(desc="Adding category", position=0, leave=True)
    df['category'] = df.application.progress_apply(adding_category_row)

    return df


##################################################
# Weekends, holidays, working hours, time of day #
##################################################

# Holidays --> complete with non-standard days
be_holidays = holidays.BE()

# Schedule
morning = (dt.time(8, 30), dt.time(12))
afternoon = (dt.time(13, 30), dt.time(16))

schedule = {
    0: [morning, afternoon],
    1: [morning, afternoon],
    2: [morning],
    3: [morning, afternoon],
    4: [morning, afternoon]
}


def add_date_annotation(df: pd.DataFrame,