def parseHolidays(holidaysStr, holidayMap=None): """ Takes a string like NZ[WTL,Nelson],AU[*],Northern Ireland and builds a HolidaySum from it """ if holidayMap is None: holidayMap = _PYTHON_HOLIDAYS_MAP retval = python_holidays.HolidayBase() retval.country = None holidaysStr = holidaysStr.strip() for (country, subdivisions) in HolsRe.findall(holidaysStr): if country == "*": retval = python_holidays.HolidayBase() retval.country = None for cls in holidayMap.values(): if subdivisions: retval += _parseSubdivisions(subdivisions, cls) else: retval += cls() return retval cls = holidayMap.get(country) if cls is not None: if subdivisions: retval += _parseSubdivisions(subdivisions, cls) else: retval += cls() return retval
def get_holidays(custom_days, year): import holidays holiday_dict = holidays.US(years=year) custom_holiday_dict = holidays.HolidayBase() custom_holiday_dict.append(custom_days) holiday_list = [] date_list = [] for i in holiday_dict.items(): date_list.append(i[0]) holiday_list.append(i[1]) df_a = pd.DataFrame({'ds': date_list, 'holiday': holiday_list}) custom_holiday_list = [] custom_date_list = [] for i in custom_holiday_dict.items(): custom_date_list.append(i[0]) custom_holiday_list.append(i[1]) df_b = pd.DataFrame({ 'ds': custom_date_list, 'holiday': custom_holiday_list }) holiday_df = pd.concat([df_a, df_b]) holiday_df.reset_index(drop=True) return holiday_df.sort_values(by='ds')
async def previous_working_day(day): hol = holidays.HolidayBase() for i in range(date.today().year - 3, date.today().year + 3): hol.append({datetime(i, 1, 1): 'Neujahr'}) hol.append({datetime(i, 4, 10): 'Karfreitag'}) hol.append({datetime(i, 4, 13): 'Ostermontag'}) hol.append({datetime(i, 5, 1): 'Tag der Arbeit'}) hol.append({datetime(i, 5, 21): 'Christi Himmelfahrt'}) hol.append({datetime(i, 6, 1): 'Pfingstmontag'}) hol.append({datetime(i, 10, 3): 'Tag der dt. Einheit'}) hol.append({datetime(i, 12, 24): 'Heiligabend'}) hol.append({datetime(i, 12, 25): '1. Weihnachtsfeiertag'}) hol.append({datetime(i, 12, 26): '2. Weihnachtsfeiertag'}) hol.append({datetime(i, 12, 31): 'Silvester'}) weekday = day.weekday() if weekday == 5: most_recent = day - timedelta(days=1) elif weekday == 6: most_recent = day - timedelta(days=2) else: most_recent = day if most_recent not in hol: return most_recent else: most_recent = most_recent - timedelta(days=1) return await previous_working_day(most_recent)
def is_US_holiday(est_date): # if not a holiday then holiday will be assigned NoneType # if it is a holiday then holiday will be assigned a US Holiday Name -> 'Independence Day' us_holidays = holidays.UnitedStates() holiday = us_holidays.get(est_date) # custom_holidays = holidays.HolidayBase() # usage append: custom_holidays.append({"2018-03-30": "Good Friday"}) # usage get: custom_holidays.get("2018-03-30") # # Good Friday needs to be added as a custom holiday. custom_holidays = holidays.HolidayBase() custom_holidays.append({'2017-04-14': 'Good Friday'}) custom_holidays.append({'2018-03-30': 'Good Friday'}) custom_holidays.append({'2019-04-19': 'Good Friday'}) custom_holidays.append({'2020-04-10': 'Good Friday'}) custom_holidays.append({'2021-04-02': 'Good Friday'}) custom_holidays.append({'2022-04-15': 'Good Friday'}) custom_holidays.append({'2023-04-07': 'Good Friday'}) custom_holidays.append({'2024-03-29': 'Good Friday'}) custom_holidays.append({'2025-04-18': 'Good Friday'}) custom_holidays.append({'2026-04-03': 'Good Friday'}) if holiday == None: holiday = custom_holidays.get(est_date) if holiday == None: holiday_result = ('not holiday') else: holiday_result = ('is holiday', holiday) return holiday_result
def _load_holidays(year: int, config: dict) -> holidays.HolidayBase: """Load holiday data based on config and year. We re-instantiate this at each update so it keeps working as the years change.""" options = holidays.HolidayBase() for entry in config.get(CONF_SOURCES): CountryCls = getattr(holidays, entry.get(CONF_COUNTRY)) candidates = CountryCls(state=entry.get(CONF_STATE), prov=entry.get(CONF_PROVINCE), observed=entry.get(CONF_OBSERVED), years=year, **entry.get(CONF_KWARGS)) exclude = [exclusion.lower() for exclusion in entry.get(CONF_EXCLUDE)] for query in entry.get(CONF_FILTER): # allow text filter (default to add all) for date in sorted(candidates.get_named(query)): holiday_name = candidates[date] if holiday_name.lower() in exclude: continue if entry.get(CONF_MULTIDAY) or (holiday_name not in options.values()): options[date] = holiday_name return options
def test_update(self): h = holidays.HolidayBase() h.update({ date(2015, 1, 1): "New Year's Day", "2015-12-25": "Christmas Day", }) self.assertIn("2015-01-01", h) self.assertIn(date(2015, 12, 25), h)
def test_append(self): h = holidays.HolidayBase() h.update({ date(2015, 1, 1): "New Year's Day", "2015-12-25": "Christmas Day", }) h.append([date(2015, 4, 1), "2015-04-03"]) h.append(date(2015, 4, 6)) h.append("2015-04-07") self.assertIn("2015-01-01", h) self.assertIn(date(2015, 12, 25), h) self.assertIn("2015-04-01", h) self.assertNotIn("2015-04-02", h) self.assertIn("2015-04-03", h) self.assertNotIn("2015-04-04", h) self.assertNotIn("2015-04-05", h) self.assertIn("2015-04-06", h) self.assertIn("2015-04-07", h)
def listCustomHolidays(): custom_holidays = holidays.HolidayBase() custom_holidays.append({"2015-01-01": "New Year's Day"}) return custom_holidays
today = datetime.date.today() mydate = today - datetime.timedelta(days=1) mm = mydate.strftime("%d %B %Y") datetochecheck = mm.strip('US$: ') # Delete sql file before create the new one # endor if os.path.exists('C:/tools/zinc/data/export_zinc_prices.sql'): os.remove('C:/tools/zinc/data/export_zinc_prices.sql') print('File deleted') else: print('File does not exist') time.sleep(5) # Check for public day of zinc prices in_holidays = holidays.HolidayBase() # append custom dates to holiday in_holidays.append([ '10-04-2020', '13-04-2020', '08-05-2020', '25-05-2020', '31-08-2020', '25-12-2020', '28-12-2020' ]) # check condition and run script or stop script if mydate.strftime('%d-%m-%Y') in in_holidays: print('it is holidays date :)') quit() else: # Set cookie session = requests.Session() jar = requests.cookies.RequestsCookieJar()
def format_features(df): # Fill missing copyright across train and test with fixed values df["copyright"] = df["copyright"].fillna("UnavailableInformation") # Fill nan album print("There is {} ratio is nan album".format( len(df[df["album"].isnull()]) / len(df))) df["album_raw_from_mp3_metadata"] = df["album"] df["album"] = df["album"].fillna("") df["len_album_name"] = df["album"].apply(lambda x: len(x.split(" "))) df["isRemixAlbum"] = [1 if "Remix" in t else 0 for t in df["album"]] df["isOSTAlbum"] = [1 if "OST" in t else 0 for t in df["album"]] df["isSingleAlbum"] = [1 if "Single" in t else 0 for t in df["album"]] df["isBeatAlbum"] = [1 if "Beat" in t else 0 for t in df["album"]] df["isTopHitAlbum"] = [1 if "Top Hits" in t else 0 for t in df["album"]] df["isCoverAlbum"] = [1 if "Cover" in t else 0 for t in df["album"]] df["isEPAlbum"] = [1 if "EP" in t else 0 for t in df["album"]] df["isLienKhucAlbum"] = [1 if "Liên Khúc" in t else 0 for t in df["album"]] df["album_name_is_title_name"] = [ 1 if r.title in r.album else 0 for i, r in df.iterrows() ] # Fill genre print("There is {} ratio is nan genre".format( len(df[df["genre"].isnull()]) / len(df))) df["genre"] = df["genre"].fillna("No genre") # Fill album_artist print("There is {} ratio is nan album_artist".format( len(df[df["album_artist"].isnull()]) / len(df))) df["album_artist"] = df["album_artist"].fillna("No album_artist") df["album_artist_contain_artistname"] = [ 1 if r.album_artist in r.artist_name else 0 for i, r in df.iterrows() ] # Fill track print("There is {} ratio is nan track".format( len(df[df["track"].isnull()]) / len(df))) df["track"] = df["track"].fillna("(1, 1)") df["istrack11"] = df["track"] == "(1, 1)" def tracknum_to_value(track_num): try: track_num = make_tuple(track_num) if track_num[0] is not None: return float(track_num[0]) / float(track_num[1]) else: return 1.0 except: return 1.0 df["track"] = df["track"].apply(lambda t: tracknum_to_value(t)) # Fill lyric print("There is {} ratio is nan lyric".format( len(df[df["lyric"].isnull()]) / len(df))) df["lyric"] = df["lyric"].fillna("") df["islyric"] = df["lyric"].apply(lambda x: True if len(x) else False) df["num_line_lyric"] = df["lyric"].apply(lambda x: len(x.split("\r"))) # -------------------------------------------------------- df['no_artist'] = df.artist_name.apply(lambda x: len(x.split(","))) df['no_composer'] = df.composers_name.apply(lambda x: len(x.split(","))) df["datetime"] = pd.to_datetime(df.release_time) df["year"] = df["datetime"].dt.year df["month"] = df["datetime"].dt.month df["hour"] = df["datetime"].dt.hour df["day"] = df["datetime"].dt.day df["dayofyear"] = df["datetime"].dt.dayofyear df["weekday"] = df["datetime"].dt.weekday in_holidays = holidays.HolidayBase() for i in range(26, 32): in_holidays.append(str(i) + '-01-2017') in_holidays.append('01-02-2017') for i in range(14, 21): in_holidays.append(str(i) + '-02-2018') in_holidays.append('30-04-2017') in_holidays.append('30-04-2018') in_holidays.append('01-01-2017') in_holidays.append('01-01-2018') in_holidays.append('14-02-2017') in_holidays.append('14-02-2018') in_holidays.append('08-03-2017') in_holidays.append('08-03-2018') in_holidays.append('01-05-2017') in_holidays.append('01-05-2018') in_holidays.append('06-04-2017') in_holidays.append('25-04-2018') in_holidays.append('01-06-2017') in_holidays.append('01-06-2018') in_holidays.append('04-10-2017') in_holidays.append('24-09-2018') in_holidays.append('20-10-2017') in_holidays.append('20-10-2018') in_holidays.append('20-11-2017') in_holidays.append('20-11-2018') in_holidays.append('24-12-2017') in_holidays.append('24-12-2018') df['isHoliday'] = df.release_time.apply(lambda x: x in in_holidays) df["len_of_songname"] = df["title"].apply(lambda x: len(x.split(" "))) df["isRemix"] = [1 if "Remix" in t else 0 for t in df["title"]] df["isOST"] = [1 if "OST" in t else 0 for t in df["title"]] df["isBeat"] = [1 if "Beat" in t else 0 for t in df["title"]] df["isVersion"] = [1 if "Version" in t else 0 for t in df["title"]] df["isCover"] = [1 if "Cover" in t else 0 for t in df["title"]] df["isLienKhuc"] = [1 if "Liên Khúc" in t else 0 for t in df["title"]] df["day_release"] = df.groupby(["year", "dayofyear" ]).ngroup().astype("category").cat.codes ### '''from fast ai ''' from pandas import DataFrame import re from functools import partial import calendar from typing import Sequence, Tuple, TypeVar, Union def ifnone(a, b): "`a` if `a` is not None, otherwise `b`." return b if a is None else a def make_date(df: DataFrame, date_field: str): "Make sure `df[field_name]` is of the right date type." field_dtype = df[date_field].dtype if isinstance(field_dtype, pd.core.dtypes.dtypes.DatetimeTZDtype): field_dtype = np.datetime64 if not np.issubdtype(field_dtype, np.datetime64): df[date_field] = pd.to_datetime(df[date_field], infer_datetime_format=True) def cyclic_dt_feat_names(time: bool = True, add_linear: bool = False): "Return feature names of date/time cycles as produced by `cyclic_dt_features`." fs = ['cos', 'sin'] attr = [ f'{r}_{f}' for r in 'weekday day_month month_year day_year'.split() for f in fs ] if time: attr += [ f'{r}_{f}' for r in 'hour clock min sec'.split() for f in fs ] if add_linear: attr.append('year_lin') return attr def cyclic_dt_features(d, time: bool = True, add_linear: bool = False): "Calculate the cos and sin of date/time cycles." tt, fs = d.timetuple(), [np.cos, np.sin] day_year, days_month = tt.tm_yday, calendar.monthrange( d.year, d.month)[1] days_year = 366 if calendar.isleap(d.year) else 365 rs = d.weekday() / 7, (d.day - 1) / days_month, (d.month - 1) / 12, ( day_year - 1) / days_year feats = [f(r * 2 * np.pi) for r in rs for f in fs] if time and isinstance(d, datetime) and type(d) != date: rs = tt.tm_hour / 24, tt.tm_hour % 12 / 12, tt.tm_min / 60, tt.tm_sec / 60 feats += [f(r * 2 * np.pi) for r in rs for f in fs] if add_linear: if type(d) == date: feats.append(d.year + rs[-1]) else: secs_in_year = (datetime(d.year + 1, 1, 1) - datetime(d.year, 1, 1)).total_seconds() feats.append(d.year + ((d - datetime(d.year, 1, 1)).total_seconds() / secs_in_year)) return feats def add_cyclic_datepart(df: DataFrame, field_name: str, prefix: str = None, drop: bool = True, time: bool = False, add_linear: bool = False): "Helper function that adds trigonometric date/time features to a date in the column `field_name` of `df`." make_date(df, field_name) field = df[field_name] prefix = ifnone(prefix, re.sub('[Dd]ate$', '', field_name)) series = field.apply( partial(cyclic_dt_features, time=time, add_linear=add_linear)) columns = [prefix + c for c in cyclic_dt_feat_names(time, add_linear)] df_feats = pd.DataFrame([item for item in series], columns=columns, index=series.index) for column in columns: df[column] = df_feats[column] if drop: df.drop(field_name, axis=1, inplace=True) return df def add_datepart(df: DataFrame, field_name: str, prefix: str = None, drop: bool = True, time: bool = False): ''' 'datetimeweekday_cos', 'datetimeweekday_sin', 'datetimeday_month_cos', 'datetimeday_month_sin', 'datetimemonth_year_cos', 'datetimemonth_year_sin', 'datetimeday_year_cos', 'datetimeday_year_sin' Helper function that adds columns relevant to a date in the column `field_name` of `df`. ''' make_date(df, field_name) field = df[field_name] prefix = ifnone(prefix, re.sub('[Dd]ate$', '', field_name)) attr = [ 'Year', 'Month', 'Week', 'Day', 'Dayofweek', 'Dayofyear', 'Is_month_end', 'Is_month_start', 'Is_quarter_end', 'Is_quarter_start', 'Is_year_end', 'Is_year_start' ] if time: attr = attr + ['Hour', 'Minute', 'Second'] for n in attr: df[prefix + n] = getattr(field.dt, n.lower()) df[prefix + 'Elapsed'] = field.astype(np.int64) // 10**9 if drop: df.drop(field_name, axis=1, inplace=True) return df add_datepart(df, 'datetime', drop=False) # inplace add_cyclic_datepart(df, 'datetime', drop=False) # inplace df['title_truncated'] = df['title'].str.split( '(', expand=True).loc[:, 0].str.rstrip().str.rstrip('!').str.rstrip('?') #is_special_char_mask = df['title_truncated'].apply(lambda d: isStringContainSpecialCharacter(d)) print( f"{len(df['title']) - df['title'].nunique()} raw titles are identical between songs: {df['title'].nunique()} unique titles" ) print( f"After cleaning brackets etc. only {df['title_truncated'].nunique()} unique titles remain, i.e. {df['title'].nunique() - df['title_truncated'].nunique()} are highly similar titles " ) # It seems like all songs on albums release at the same time, so groupby by release_time will create album df["album_right"] = df.release_time.astype("category").cat.codes import re def isContainsSpecialChar(string): # Make own character set and pass # this as argument in compile method regex = re.compile('^.*[^a-zA-Z0-9_]') # [@_!#$%^&*()<>?/\|}{~:] # Pass the string in search # method of regex object. if (regex.search(string) == None): return False else: return True df['title_truncated'] = df['title'].str.split( '(', expand=True).loc[:, 0].str.rstrip().str.rstrip('!').str.rstrip('?') is_special_char_mask = df['title_truncated'].apply( lambda d: isContainsSpecialChar(d)) _df_train = df[df.dataset == "train"] english_like_names = _df_train.loc[_df_train['title_truncated'] [~is_special_char_mask].index]['label'] test = ttest_ind(_df_train['label'], english_like_names) if test.pvalue < 0.05: print( "There is a statistically signficiant relationship between English-like title and rank. So adding feature: isEnglishLikeTitle" ) df['isEnglishLikeTitle'] = ~is_special_char_mask import re def get_min_artist_id(s): ps = re.split(',|\.', s) ps = [int(p) for p in ps] return np.min(ps) def get_max_artist_id(s): ps = re.split(',|\.', s) ps = [int(p) for p in ps] return np.max(ps) df["artist_id_min"] = df["artist_id"].apply(lambda x: get_min_artist_id(x)) df["artist_id_min_cat"] = df["artist_id_min"].astype('category') df["artist_id_min_cat"] = df["artist_id_min_cat"].cat.codes df["composers_id_min"] = df["composers_id"].apply( lambda x: get_min_artist_id(x)) df["composers_id_min_cat"] = df["composers_id_min"].astype('category') df["composers_id_min_cat"] = df["composers_id_min_cat"].cat.codes df["artist_id_max"] = df["artist_id"].apply(lambda x: get_max_artist_id(x)) df["artist_id_max_cat"] = df["artist_id_max"].astype('category') df["artist_id_max_cat"] = df["artist_id_max_cat"].cat.codes df["composers_id_max"] = df["composers_id"].apply( lambda x: get_max_artist_id(x)) df["composers_id_max_cat"] = df["composers_id_max"].astype('category') df["composers_id_max_cat"] = df["composers_id_max_cat"].cat.codes df["num_same_title"] = df.groupby("title")["title"].transform("count") df["title_cat"] = df["title"].astype('category') ############## # These use knowledge of entire dataset X values ############## df["numsongInAlbum"] = df.groupby("album_right")["album_right"].transform( "count") df["isSingleAlbum_onesong"] = df["isSingleAlbum"] & (df["numsongInAlbum"] == 1) ''' # Find the number of songs which were released between 5-6 months from the datetime field == the release date def find_num_song_released_that_week(df, day): fromtime = day + relativedelta.relativedelta(days=7) totime = day return len(df.datetime[(df.datetime >= fromtime) & (df.datetime <= totime)]) df["num_song_released_that_week"] = df.datetime.apply(lambda d: find_num_song_released_that_week(df, d)) ''' # Find the number of songs which were released between 5-6 months from the datetime field == the release date def find_num_song_release_in_final_month(df, day): month5th = day + relativedelta.relativedelta(months=5) month6th = day + relativedelta.relativedelta(months=6) return len(df.datetime[(df.datetime >= month5th) & (df.datetime <= month6th)]) df["num_song_release_in_final_month"] = df.datetime.apply( lambda d: find_num_song_release_in_final_month(df, d)) df["freq_artist"] = df.groupby('artist_id')['artist_id'].transform( 'count').astype('float') df["freq_composer"] = df.groupby('composers_id')['composers_id'].transform( 'count').astype('float') df["_artist_id_min_cat"] = df["artist_id_min"].astype('category') df["_artist_id_min_cat"] = df["_artist_id_min_cat"].cat.codes df["_composers_id_min_cat"] = df["composers_id_min"].astype('category') df["_composers_id_min_cat"] = df["_composers_id_min_cat"].cat.codes df["freq_artist_min"] = df.groupby('_artist_id_min_cat')[ '_artist_id_min_cat'].transform('count').astype('float') df["freq_composer_min"] = df.groupby('_composers_id_min_cat')[ '_composers_id_min_cat'].transform('count').astype('float') df["num_album_per_min_artist"] = df.groupby( ['_artist_id_min_cat', 'album_right'])['album_right'].transform('count').astype('float') df["num_album_per_min_composer"] = df.groupby( ['composers_id_min', 'album_right'])['album_right'].transform('count').astype('float') # df = df.drop(['album_hash'], axis = 1) df = remove_duplicate_songs_with_low_ranks(df) # recommended by zalo return df
from datetime import date import holidays #custome made holidays custom_holidays = holidays.HolidayBase() year=2020 custom_holidays.append({"{0}-{1}-{2}".format(year,1,1): "New Year's Day"}) custom_holidays.append({"{0}-{1}-{2}".format(year,1,2): "Guru Govind Singh Jayanti"}) custom_holidays.append({"{0}-{1}-{2}".format(year,1,14): "Lohri"}) custom_holidays.append({"{0}-{1}-{2}".format(year,1,15): "Pongal"}) custom_holidays.append({"{0}-{1}-{2}".format(year,1,15): "Makar Sankranti"}) custom_holidays.append({"{0}-{1}-{2}".format(year,1,25): "Chinese New Year"}) custom_holidays.append({"{0}-{1}-{2}".format(year,1,26): "Republic Day"}) custom_holidays.append({"{0}-{1}-{2}".format(year,1,30): "Vasant Panchami"}) custom_holidays.append({"{0}-{1}-{2}".format(year,2,9): "Guru Ravidas Jayanti"}) custom_holidays.append({"{0}-{1}-{2}".format(year,2,14): "Valentine's Day"}) custom_holidays.append({"{0}-{1}-{2}".format(year,2,18): "Maharishi Dayanand Saraswati Jayanti"}) custom_holidays.append({"{0}-{1}-{2}".format(year,2,19): "Shivaji Jayanti"}) custom_holidays.append({"{0}-{1}-{2}".format(year,2,21): "Maha Shivaratri/Shivaratri"}) custom_holidays.append({"{0}-{1}-{2}".format(year,3,9): "Holika Dahana"}) custom_holidays.append({"{0}-{1}-{2}".format(year,3,9): "Hazarat Ali's Birthday"}) custom_holidays.append({"{0}-{1}-{2}".format(year,3,10): "Holi"}) custom_holidays.append({"{0}-{1}-{2}".format(year,3,20):"March Equinox"}) custom_holidays.append({"{0}-{1}-{2}".format(year,3,25): "Chaitra Sukhladi"}) custom_holidays.append({"{0}-{1}-{2}".format(year,4,2): "Rama Navami"}) custom_holidays.append({"{0}-{1}-{2}".format(year,4,6): "Mahavir Jayanti"}) custom_holidays.append({"{0}-{1}-{2}".format(year,4,9): "First day of Passover"}) custom_holidays.append({"{0}-{1}-{2}".format(year,4,9): "Maundy Thursday"}) custom_holidays.append({"{0}-{1}-{2}".format(year,4,10): "Good Friday"})
#calendar from: #https://www.kippmetroatlanta.org/wp-content/uploads/KIPP-Metro-Atlanta-Schools-Calendar-SY19-20.pdf from datetime import date import holidays empty_calendar = holidays.HolidayBase() school_calendar = holidays.US(state='GA') days_off = {} #2019 #end of summer break for day in range(1, 8): days_off[date(2019, 8, day)] = 'Summer Break' #first day of school days_off[date(2019, 8, 8)] = 'First Day' #half days for month, day in [(8, 9), (8, 30), (9, 27), (11, 8), (12, 13), (12, 20)]: days_off[date(2019, month, day)] = 'Half Day' #labor day days_off[date(2019, 9, 2)] = 'Labor Day' #fall break days_off[date(2019, 10, 11)] = 'Fall Break' #indigenous people's day days_off[date(2019, 10, 14)] = "Indigenous People's Day"