def fit(self, X: dt.Frame, y: np.array = None): """Fit is used to keep the memory of Holidays""" # For holidays we only need the date X = X[:, self.time_column].to_pandas() # Transform to pandas date time X[self.time_column] = pd.to_datetime(X[self.time_column]) # Compute min and max year to decide the number of years in adavnce we keep mn_year = X[self.time_column].dt.year.min() mx_year = X[self.time_column].dt.year.max() if np.isnan(mn_year) or np.isnan(mx_year): years = [] else: # Start at min year and end at 2*max_year - min_year + 1 # If min year is 2016, max year 2018 # then we keep dates until 2021 # As a reminder np.arange(1, 3) returns [1, 2] years = np.arange(int(mn_year), int(mx_year + mx_year - mn_year + 2)) # Germany general and province holidays self.memos = {} # General first ge_holidays = holidays.DE() for year in list(years): ge_holidays._populate(year) ge_holidays.observed = False hdays = [date for date, name in sorted(ge_holidays.items())] holidays_df = pd.DataFrame(hdays, columns=[self.time_column], dtype='datetime64[ns]') holidays_df['year'] = holidays_df[self.time_column].dt.year holidays_df['doy'] = holidays_df[self.time_column].dt.dayofyear holidays_df.sort_values(by=['year', 'doy']).drop_duplicates( subset=['year'], keep='first').reset_index(drop=True) holidays_df.drop(self.time_column, axis=1, inplace=True) self.memos['country'] = holidays_df # Now do province in the same manner for prov in [ 'BW', 'BY', 'BE', 'BB', 'HB', 'HH', 'HE', 'MV', 'NI', 'NW', 'RP', 'SL', 'SN', 'ST', 'SH', 'TH' ]: ge_holidays = holidays.DE(prov=prov) for year in list(years): ge_holidays._populate(year) ge_holidays.observed = False hdays = [date for date, name in sorted(ge_holidays.items())] holidays_df = pd.DataFrame(hdays, columns=[self.time_column], dtype='datetime64[ns]') holidays_df['year'] = holidays_df[self.time_column].dt.year holidays_df['doy'] = holidays_df[self.time_column].dt.dayofyear holidays_df.sort_values(by=['year', 'doy']).drop_duplicates( subset=['year'], keep='first').reset_index(drop=True) holidays_df.drop(self.time_column, axis=1, inplace=True) self.memos[prov] = holidays_df
def transform(self, X: dt.Frame): X = X[:, self.time_column] X = X.to_pandas() ge_holidays = holidays.DE() X["is_ge_holiday"] = X[self.time_column].apply( lambda x: x in ge_holidays) for prov in [ "BW", 'BY', 'BE', 'BB', 'HB', 'HH', 'HE', 'MV', 'NI', 'NW', 'RP', 'SL', 'SN', 'ST', 'SH', 'TH' ]: ge_prov_holidays = holidays.DE(state=prov) X["is_ge_holiday_%s" % prov] = X[self.time_column].apply( lambda x: x in ge_prov_holidays) X.drop(self.time_column, axis=1, inplace=True) return X
def ist_jetzt_handelszeit(): feiertag = datetime.now().strftime('%Y-%m-%d') in holidays.DE() aktuelle_stunde = int(time.strftime("%H")) aktueller_wochentag = int(time.strftime("%w")) richtige_zeit = ((aktuelle_stunde >= 9) and (aktuelle_stunde < 17)) richtiger_tag = ((aktueller_wochentag >= 1) and (aktueller_wochentag <= 5)) return (not (feiertag) and richtiger_tag and richtige_zeit)
def get_holidays(): hdays = [] for key, value in states.items(): for date, name in sorted( holidays.DE(prov=key, years=range(2013, 2019)).items()): h = Holiday(date, value, name) h.to_string() hdays.append(Holiday(date, value, name)) return hdays
def main(outdir): rng = RandomState(MT19937(SeedSequence(config.seed))) berlin_holidays = holidays.DE(prov="BW") num_employees = 20000 num_jobsites = 200 num_areas = 20 num_qualifications = 40 num_shifts = 3 num_days = 356 num_orders = 1000 df = pd.DataFrame.from_dict({ "Einsatzort": rng.randint(0, num_jobsites, num_orders), "Qualifikation": rng.randint(0, num_qualifications, num_orders), "Schicht": rng.randint(0, num_shifts, num_orders), "Tag": rng.randint(0, num_days, num_orders), }) df["Tag"] = df["Tag"].apply( lambda day: datetime(2019, 1, 1) + timedelta(day)) df["Wochentag"] = df["Tag"].apply(lambda day: day.strftime("%a")) df["Feiertag"] = df["Tag"].apply(lambda day: day in berlin_holidays) # grouping of jobsites into areas area_splits = np.cumsum(rng.randint(1, 10, num_areas)) area_splits = (area_splits.T / area_splits.max() * num_jobsites).astype(int) df["Ort"] = df["Einsatzort"].apply( lambda jobsite_id: np.argmax(area_splits > jobsite_id)) offers = [] for _ in range(len(df)): offers.append( rng.choice(range(num_employees), replace=False, size=rng.randint(1, 6)).tolist()) df["Mitarbeiter ID"] = offers train, test = train_test_split(df) train.to_csv(os.path.join(outdir, "train.tsv"), index=False, sep="\t") test.to_csv(os.path.join(outdir, "test_truth.tsv"), index=False, sep="\t") test[[ "Einsatzort", "Qualifikation", "Schicht", "Tag", "Wochentag", "Feiertag", "Ort" ]].to_csv(os.path.join(outdir, "test_publish.tsv"), index=False, sep="\t")
def get_working_days(start_date, end_date): days = [ start_date + timedelta(days=1) * i for i in range((end_date - start_date).days + 1) ] working_days = [] for day in days: if day in holidays.DE(prov='BE') or day.weekday() in [5, 6]: continue working_days.append(day) return working_days
def pre_reboot_state(con, consul_lock, hostname, flags): today = datetime.date.today() if flags.get("check_holidays") and today in holidays.DE(): LOG.info("Refuse to run on holiday") sys.exit(EXIT_HOLIDAY) if check_stop_flag(con) and not flags.get("ignore_global_stop_flag"): LOG.info("Global stop flag is set: exit") sys.exit(EXIT_GLOBAL_STOP_FLAG_SET) if is_node_disabled(con, hostname) and not flags.get("ignore_node_disabled"): LOG.info("Rebootmgr is disabled in consul config for this node. Exit") sys.exit(EXIT_NODE_DISABLED) if flags.get("check_triggers") and not is_reboot_required(con, hostname): sys.exit(0) LOG.info("Entering pre reboot state") check_consul_services(con, hostname, flags.get("ignore_failed_checks"), ["rebootmgr", "rebootmgr_preboot"]) LOG.info("Executing pre reboot tasks") run_tasks("pre_boot", con, hostname, flags.get("dryrun")) if not flags.get("lazy_consul_checks"): LOG.info("Sleep for 2 minutes. Waiting for consul checks.") time.sleep((60 * 2) + 10) check_consul_cluster(con, flags.get("ignore_failed_checks")) check_consul_services(con, hostname, flags.get("ignore_failed_checks"), ["rebootmgr", "rebootmgr_preboot"]) if not consul_lock.acquired: LOG.error("Lost consul lock. Exit") sys.exit(EXIT_CONSUL_LOST_LOCK) if check_stop_flag(con) and not flags.get("ignore_global_stop_flag"): LOG.info("Global stop flag is set: exit") sys.exit(EXIT_GLOBAL_STOP_FLAG_SET) # check again if reboot is still required if flags.get("check_triggers") and not is_reboot_required(con, hostname): sys.exit(0) if not flags.get("dryrun"): LOG.debug("Write %s in key service/rebootmgr/reboot_in_progress" % hostname) con.kv.put("service/rebootmgr/reboot_in_progress", hostname) else: LOG.debug("Would write %s in key service/rebootmgr/reboot_in_progress" % hostname) consul_lock.release()
def broadcast(self, message, bot, tastatur=None, author=None): today = date.today() holidays_nrw = holidays.DE(years=[2019, 2020, 2021], prov='NW') if today not in holidays_nrw: for sub in Subscribers.select(): if sub.chat_id == author: continue try: bot.send_message(sub.chat_id, message, reply_markup=tastatur) except telegram.TelegramError as ex: log.warning(ex)
def add_independent_features(df: pd.DataFrame) -> pd.DataFrame: """Add returnQuantity independent features to DataFrame. Calls methods that each add a feature in form of a column to the data. Parameters ---------- df : pandas.DataFrame Cleaned table training data Returns ------- pd.DataFrame Feature-enriched table """ df['productPrice'] = df.price / df.quantity df['totalSavings'] = df.rrp - df.productPrice df['relativeSavings'] = (1 - df.productPrice / df.rrp).fillna(1.) df['orderYear'] = df.orderDate.apply(lambda x: x.year) df['orderMonth'] = df.orderDate.apply(lambda x: x.month) df['orderDay'] = df.orderDate.apply(lambda x: x.day) df['orderWeekDay'] = df.orderDate.apply(lambda x: x.dayofweek) df['orderDayOfYear'] = df.orderDate.apply(lambda x: x.dayofyear) df['orderWeek'] = df.orderDate.apply(lambda x: x.week) df['orderWeekOfYear'] = df.orderDate.apply(lambda x: x.weekofyear) df['orderQuarter'] = df.orderDate.apply(lambda x: x.quarter) df['orderTotalDay'] = df.orderDate.apply(total_day) df['orderSeason'] = df.orderDate.apply(date_to_season) df['orderIsOnGermanHoliday'] = df.orderDate.apply( lambda x: 1 if x in holidays.DE() else 0) df['surplusArticleQuantity'] = same_article_surplus(df) df['surplusArticleSizeQuantity'] = same_article_same_size_surplus(df) df['surplusArticleColorQuantity'] = same_article_same_color_surplus(df) df['totalOrderShare'] = total_order_share(df) df['voucherSavings'] = voucher_saving(df) # df['voucherFirstUsedDate'] = pd.to_datetime(df.t_voucher_firstUsedDate_A).apply(total_day) # df['voucherLastUsedDate'] = pd.to_datetime(df.t_voucher_lastUsedDate_A).apply(total_day) df['products3DayNeighborhood'] = orders_in_neighborhood(df, 3) df['products7DayNeighborhood'] = orders_in_neighborhood(df, 7) df['products14DayNeighborhood'] = orders_in_neighborhood(df, 14) df['products30DayNeighborhood'] = orders_in_neighborhood(df, 30) df['previousOrders'] = previous_orders(df) df['t_posInOrder'] = df.groupby( 'orderID', as_index=False).apply(pos_in_grouping).reset_index(level=0, drop=True) df['t_posInDay'] = df.groupby( 'orderDate', as_index=False).apply(pos_in_grouping).reset_index(level=0, drop=True) return df
def load(country, region, observed, expand, years): # Erases existing holiday cache and makes a new one... global dates if country == "US": dates = holidays.US(state=region, observed=observed, expand=expand, years=years) elif country == "CA": dates = holidays.CA(prov=region, observed=observed, expand=expand, years=years) elif country == "MX": dates = holidays.MX(observed=observed, expand=expand, years=years) elif country == "NZ": dates = holidays.NZ(prov=region, observed=observed, expand=expand, years=years) elif country == "AU": dates = holidays.AU(prov=region, observed=observed, expand=expand, years=years) elif country == "AT": dates = holidays.AT(prov=region, observed=observed, expand=expand, years=years) elif country == "DE": dates = holidays.DE(prov=region, observed=observed, expand=expand, years=years) else: print "UNKNOWN COUNTRY ", country
def cal_day( context, year, week, day ): context['day'] = day currdate = iso_to_gregorian(year, week, day) context['currdate'] = currdate context['is_past'] = currdate < datetime.date.today() context['is_holiday'] = currdate in holidays.DE(prov = 'NW') first = datetime.datetime( currdate.year, currdate.month, currdate.day, hour = 8, ) last = datetime.datetime( currdate.year, currdate.month, currdate.day, hour = 17, ) unavailable = [] hours = rrule(HOURLY, dtstart = first, until = last) for h in hours: if h in context['bookings']: unavailable.append(h.hour) context['unavailable'] = unavailable context['hours'] = range(8,18) return context
def parse_prov(in_): if isinstance(in_, str) and "-" in in_: return holidays.DE(prov=in_.split("-")[1]) return holidays.DE()
def ts_fit(self, suppress=False): """Fit Prophet to the time series data. Parameters: ---------- suppress: bool Suppress or not some of the output messages """ if self.hyper_params is not None: self._gs.set_forecaster(self) self._gs.set_hyper_params(self.hyper_params) # a very important command here to avoid endless loop self.hyper_params = None self._prophet_logger.info("***** Starting grid search *****") self._gs = self._gs.grid_search(suppress=suppress, show_plot=False) # self.best_model = self._gs.best_model self.__dict__.update(self.best_model['forecaster'].__dict__) self._prophet_logger.info("***** Finished grid search *****") else: self._prepare_fit() self._model = None self.ts_split() ts_df = self._train_dt.copy() ts_test_df = self._test_dt # sanity check if 'on_weekend' in ts_df.columns: ts_df.drop(['on_weekend', 'off_weekend'], inplace=True, axis=1) # ts_test_df.drop(['on_weekend', 'off_weekend'], inplace=True, axis=1) # Fit self._prophet_logger.info("Trying to fit the Prophet model....") try: if not suppress: self._prophet_logger.info("...via using parameters\n") print_attributes(self) # diagnose on? if self._diagnose: try: assert self._step is not None and self._horizon is not None except (KeyError, AssertionError): self._prophet_logger.warning("You want to diagnose the Prophet model. Please provide parameters " "'step' and 'horizon' within object initialization!") sys.exit("STOP") ts_df = ts_df.reset_index() ts_df.columns = self._ts_df_cols if ts_test_df is not None and not ts_test_df.empty: ts_test_df = ts_test_df.reset_index() ts_test_df.columns = self._ts_df_cols # weekly_s = self._weekly_seasonality if self._weekend_seasonality: # force to False weekly_s = False # if not self._consider_holidays: self._model = Prophet(interval_width=self._prophet_interval_width, yearly_seasonality=self._yearly_seasonality, weekly_seasonality=weekly_s, daily_seasonality=self._daily_seasonality, changepoint_range=self._changepoint_range, changepoint_prior_scale=self._changepoint_prior_scale) else: try: assert self._country in ['AT', 'DE', 'US'] except AssertionError: self._prophet_logger.exception("Assrtion exception occurred. Right now, Austria (AT), " "Germany(DE) and USA (US) supported.") sys.exit("STOP") else: holi = None if self._country == 'AT': holi = holidays.AT(state=None, years=list(np.unique(np.asarray(self.ts_df.index.year)))) elif self._country == 'DE': holi = holidays.DE(state=None, years=list(np.unique(np.asarray(self.ts_df.index.year)))) elif self._country == 'US': holi = holidays.US(state=None, years=list(np.unique(np.asarray(self.ts_df.index.year)))) # holi_dict = dict() for date, name in sorted(holi.items()): holi_dict[date] = name df_holi = pd.DataFrame.from_dict(data=holi_dict, orient='index').reset_index() df_holi.columns = ['ds', 'holiday'] df_holi['lower_window'] = 0 df_holi['upper_window'] = 0 self._model = Prophet(interval_width=self._prophet_interval_width, yearly_seasonality=self._yearly_seasonality, weekly_seasonality=weekly_s, daily_seasonality=self._daily_seasonality, changepoint_range=self._changepoint_range, changepoint_prior_scale=self._changepoint_prior_scale, holidays=df_holi) if self._monthly_seasonality: self._model.add_seasonality(name='monthly', period=30.5, fourier_order=20) if not suppress: self._prophet_logger.info("Added monthly seasonality.") if self._quarterly_seasonality: self._model.add_seasonality(name='quarterly', period=91.5, fourier_order=20) if not suppress: self._prophet_logger.info("Added quarterly seasonality.") if self._weekend_seasonality: ts_df['on_weekend'] = ts_df['ds'].apply(self.we_season) ts_df['off_weekend'] = ~ts_df['ds'].apply(self.we_season) self._train_dt = ts_df.copy() self._train_dt.set_index('ds', inplace=True) # if ts_test_df is not None and not ts_test_df.empty: ts_test_df['on_weekend'] = ts_test_df['ds'].apply(self.we_season) ts_test_df['off_weekend'] = ~ts_test_df['ds'].apply(self.we_season) self._test_dt = ts_test_df.copy() self._test_dt.set_index('ds', inplace=True) # and add self._model.add_seasonality(name='weekend_on_season', period=7, fourier_order=5, condition_name='on_weekend') self._model.add_seasonality(name='weekend_off_season', period=7, fourier_order=5, condition_name='off_weekend') if not suppress: self._prophet_logger.info("Added week-end seasonality.") # tic start = time() self.model_fit = self._model.fit(ts_df) # toc if not suppress: self._prophet_logger.info("Time elapsed: {} sec.".format(time() - start)) except (Exception, ValueError): self._prophet_logger.exception("Prophet error...") return -1 else: self._prophet_logger.info("Model successfully fitted to the data!") # Fitted values self._prophet_logger.info("Computing fitted values and residuals...") # in-sample predict try: self.fittedvalues = self._model.predict(ts_df.drop('y', axis=1)) except (Exception, ValueError): self._prophet_logger.exception("Prophet predict error...") # Residuals try: # use fittedvalues to fill in the model dictionary self.residuals = pd.Series(np.asarray(ts_df.y) - np.asarray(self.fittedvalues['yhat']), index=self._train_dt.index) except (KeyError, AttributeError): self._prophet_logger.exception("Model was not fitted or ts has other structure...") # self.lower_conf_int = pd.Series(np.asarray(self.fittedvalues['yhat_lower']), index=self._train_dt.index) self.upper_conf_int = pd.Series(np.asarray(self.fittedvalues['yhat_upper']), index=self._train_dt.index) self._prophet_logger.info("Done.") return self
def isHoliday(myYear, myMonth, myDay): bw_holidays = holidays.DE(prov='BW') bw_holidays.append({"2017-10-31": "Reformationstag"}) return datetime.date(myYear, myMonth, myDay) in bw_holidays
pass settings["country_last_updated"] = now settings["country_last"] = country_last settings.flush() return country_last country_holidays = { "CA": holidays.CA(), "CO": holidays.CO(), "MX": holidays.MX(), "US": holidays.US(), "NZ": holidays.NZ(), "AU": holidays.AU(), "DE": holidays.DE(), "AT": holidays.AT(), "DK": holidays.DK(), "UK": holidays.UK(), "IE": holidays.IE(), "ES": holidays.ES(), "CZ": holidays.CZ(), "SK": holidays.SK(), "PL": holidays.PL(), "PT": holidays.PT(), "NL": holidays.NL(), "NO": holidays.NO(), "IT": holidays.IT(), "SE": holidays.SE(), "JP": holidays.JP(), "BE": holidays.BE(),
def cleanData(df, settype): """Cleans data. Set settype to 'train' for training set and 'test' for test set""" # eliminate rows where store is empty df = df[~df.loc[:, 'Store'].isnull()] df.reset_index(inplace=True) print("Dropped rows without store-ids") # Join Function Needed here! # extract year, month and day from Date date = pd.DatetimeIndex(df.loc[:, 'Date']) df.loc[:, 'Year'] = date.year df.loc[:, 'Month'] = date.month df.loc[:, 'Day'] = date.day print("Extracted year, month and day from Date") # extract day of week df.loc[:, 'DayOfWeek'] = date.dayofweek + 1 print("Extracted and reset day of week") for i in range(len(df)): if (np.isnan(df['Sales'][i])) & (df['Customers'][i] == 0): df['Sales'][i] = 0 else: pass print('Set Sales to 0 if customers are 0') # deleting 0 sales rows df = df[df['Sales'] != 0] df.reset_index(inplace=True) print('Dropped 0-sales rows in df') # Sets Open to 1 if Sales happened while Open is 0 for i in range(len(df)): if (df['Sales'][i] > 0) & (np.isnan(df['Open'][i])): df['Open'][i] = 1 else: pass print('Set Open = 1 if Sales > 0') # function to fill school holiday based on state holiday def helper_schoolholiday(row): if pd.isnull(row['SchoolHoliday']): return 0.0 else: return row['SchoolHoliday'] df['SchoolHoliday'] = df.apply(helper_schoolholiday, axis=1) print("Filled school holidays based on state holidays") # Taking care of shops in train stations # def applymask(df): # mask = df.loc[:,'DayOfWeek'] == 7.0 # train2 = df[mask] # train3 = train2.groupby('Store')['Open'].sum().to_frame().rename(columns={'Open': 'newopen'}) # train_station_stores = [i for i in train3[train3.newopen > 3].index] # return train_station_stores # train_station_stores = applymask(df) # # def train_station_stores_nan_open(row): # if (pd.isnull(row['Open'])) & (row['Store'] in train_station_stores): # return 1.0 # else: # return row['Open'] # # df['Open'] = df.apply(train_station_stores_nan_open, axis=1) # # print("Train station store always open") # Sets all Shops with isna('Open') to 0 on a German public holiday # de_holidays = holidays.DE() for i in range(len(df)): if (np.isnan(df['Open'][i])) & (df['Date'][i] in holidays.DE()): df['Open'][i] = 0 else: pass print('Public Holidays updated') # take care of regional stateholiday for i in range(len(df)): if (pd.isnull(df['StateHoliday'][i])) & (df['Month'][i] == 1) & (df['Day'][i] == 6): if df['Year'][i] == 2013: storename = df['Store'][i] row1 = df[df.Store == storename] row2 = row1[row1.Date == '2014-01-06'] try: df['StateHoliday'][i] = row2['StateHoliday'].values[0] except: pass else: storename = df['Store'][i] row1 = df[df.Store == storename] row2 = row1[row1.Date == '2013-01-06'] try: train['StateHoliday'][i] = row2['StateHoliday'].values[0] except: pass elif (pd.isnull(df['StateHoliday'][i])) & (df['Month'][i] == 6) & (df['Day'][i] == 1): if df['Year'][i] == 2013: storename = df['Store'][i] row1 = df[df.Store == storename] row2 = row1[row1.Date == '2014-06-01'] try: df['StateHoliday'][i] = row2['StateHoliday'].values[0] except: pass else: storename = df['Store'][i] row1 = df[df.Store == storename] row2 = row1[row1.Date == '2013-06-01'] try: df['StateHoliday'][i] = row2['StateHoliday'].values[0] except: pass else: pass print('Finished regional stateholidays') # take care of remained stateholiday def remained_stateholiday(row): if (pd.isnull(row['StateHoliday'])): if pd.isnull(row['Open']): if row['Sales'] > 0: return '0' else: return 'a' else: if row['Open'] == 0.0: return 'a' else: return '0' else: return row['StateHoliday'] df['StateHoliday'] = df.apply(remained_stateholiday, axis=1) print('Finished cleaning remaining stateholidays') # Sets all Shops with isna('Open') to 0 based on stateholiday state def open_stateholiday(row): if pd.isnull(row['Open']) & (row['StateHoliday'] == 'a') & (row['StateHoliday'] == 'b') & ( row['StateHoliday'] == 'c'): return 0.0 elif pd.isnull(row['Open']) & (row['StateHoliday'] == '0'): return 1.0 else: return row['Open'] df['Open'] = df.apply(open_stateholiday, axis=1) print('Adjusted open status of shops according to state holidays') # fill empty 'Customers' with average customer number when open=1.0, when open=0.0 customer=0.0 if settype == 'train': df_mean_customers = df['Customers'].mean() print('Mean customers of test cleaning:' + str(df_mean_customers)) elif settype == 'test': df_mean_customers = 758.7492748450405 else: pass def helper_customers(row): if (pd.isnull(row['Customers'])) & (row['Open'] == 1.0): return df_mean_customers elif (pd.isnull(row['Customers'])) & (row['Open'] == 0.0): return 0.0 else: return row['Customers'] df.loc[:, 'Customers'] = df.apply(helper_customers, axis=1) print('Finished filling in empty customers cells') # Fills empty 'Sales'-Cells in train with average if there have been non 0 customers in the shop if (settype == 'train'): mean_sales = df.loc[:, 'Sales'].mean() print('Mean Sales of training set = ' + str(mean_sales)) elif settype == 'test': mean_sales = 6836.722219708965 def helper_sales(row): if pd.isnull(row['Sales']) & (float(row['Customers']) > 0): return mean_sales else: return row['Sales'] df['Sales'] = df.apply(helper_sales, axis=1) print("Finished cleaning sales") if settype == 'train': competitionDistanceMean = df.loc[:, 'CompetitionDistance'].mean() print('Mean Competition Distance of training set = ' + str(competitionDistanceMean)) elif settype == 'test': competitionDistanceMean = 5446.105182647453 def fillEmptyDistances(row): """Filling empty distances with mean""" if pd.isnull(row['CompetitionDistance']): return competitionDistanceMean else: return row['CompetitionDistance'] df['CompetitionDistance'] = df.apply(fillEmptyDistances, axis=1) #Gets dummies for 'PromoInterval' into three columns and concat them to the table #PromoInterval = pd.get_dummies(df['PromoInterval']) #df = pd.concat([df, PromoInterval], axis=1) #print('PromoIntervals encoded') #Encoding Store Types #NewStoreType = pd.get_dummies(df['StoreType']) #NewStoreType.rename(columns={'a': 'StoreType a', 'b': 'StoreType b', 'c': 'StoreType c', 'd': 'StoreType d'}, # inplace=True) #df = pd.concat([df, NewStoreType], axis=1) #print('Store Type Encoded') #Encoding State Holidays #newstateholiday = pd.get_dummies(df['StateHoliday']) #newstateholiday.rename( # columns={'0': 'NoStateHoliday', 'a': 'PublicHoliday', 'b': 'EasterHoliday', 'c': 'Christmas Holiday'}, # inplace=True) #df = pd.concat([df, newstateholiday], axis=1) #print('State Holidays Encoded') #Gets dummies for 'Assortment' into three columns and concat them to the table #NewAssortment = pd.get_dummies(df['Assortment']) #NewAssortment.rename(columns={'a': 'Basic Assort', 'b': 'Extra Assort', 'c': 'Extended Assort'}, inplace=True) #df = pd.concat([df, NewAssortment], axis=1) #print('Assortment Type Encoded') #print('---Cleaning completed---') df = df[df['Open'] != 0] df = df[df['Sales'] != 0] df.drop(['Date'], axis=1, inplace=True) #df.drop(['StateHoliday'], axis=1, inplace=True) #df.drop(['Assortment'], axis=1, inplace=True) #df.drop(['Christmas Holiday'], axis=1, inplace=True) #df.drop(['PromoInterval'], axis=1, inplace=True) if 'level_0' in df.columns: df.drop(['level_0'], axis=1, inplace=True) else: pass if 'index' in df.columns: df.drop(['index'], axis=1, inplace=True) else: pass #df.drop(['StoreType'], axis=1, inplace=True) df = df.dropna(axis=0, how='any') print('Dropped last leftovers') print('Clean all done!') return df
from datetime import date import holidays from django.contrib.auth.models import User from django.db import models from django.utils.translation import gettext_lazy as _ from ls.joyous.models import CalendarPage, CalendarPageForm from wagtail.admin.edit_handlers import FieldPanel, MultiFieldPanel, FieldRowPanel from wagtail.core.models import Page, PageQuerySet from wagtail.images.edit_handlers import ImageChooserPanel from wagtail.snippets.models import register_snippet from home.models import MenuMixin CalendarPage.holidays.register(holidays.DE(prov=holidays.BY)) CalendarPage.holidays.add(date=date(2019, 10, 25), value='HAPPY') CalendarPage.is_creatable = False class GirotondoCalendar(CalendarPage, MenuMixin): class Meta: proxy = True def get_context(self, request, *args, **kwargs): print('get context') siblings = self.get_root().get_siblings(inclusive=True).filter( live=True, show_in_menus=True) root_kids = self.get_root().get_children().filter(live=True, show_in_menus=True) menus = self.get_children().filter(
#!/usr/bin/env python3 import datetime import holidays now = datetime.datetime.now() for Holiday in holidays.DE(prov='BY', years=[now.year, now.year + 1], expand='False'): print(Holiday)
def find_good_epics(): spreads_and_epics = [] i_count = 0 pick_from_epics = [] full_hol_list = [] ################################################################### tz = pytz.timezone('Europe/Berlin') todays_date = str(datetime.datetime.now(tz=tz).strftime('%Y-%m-%d')) ger_today = str(str("GER_" + str(todays_date))) print("Europe/Berlin :- Today's Date is ..." + str(todays_date)) ################################################################### tz = pytz.timezone('Europe/London') todays_date = str(datetime.datetime.now(tz=tz).strftime('%Y-%m-%d')) gb_today = str(str("GB_" + str(todays_date))) print("Europe/London :- Today's Date is ..." + str(todays_date)) ################################################################### tz = pytz.timezone('America/New_York') todays_date = str(datetime.datetime.now(tz=tz).strftime('%Y-%m-%d')) us_today = str(str("US_" + str(todays_date))) print("America/New_York :- Today's Date is ..." + str(todays_date)) ################################################################### tz = pytz.timezone('Australia/Sydney') todays_date = str(datetime.datetime.now(tz=tz).strftime('%Y-%m-%d')) aus_today = str(str("AUS_" + str(todays_date))) print("Australia/Sydney :- Today's Date is ..." + str(todays_date)) ################################################################### tz = pytz.timezone('Asia/Tokyo') todays_date = str(datetime.datetime.now(tz=tz).strftime('%Y-%m-%d')) jp_today = str(str("JP_" + str(todays_date))) print("Asia/Tokyo :- Today's Date is ..." + str(todays_date)) ################################################################### b_ger_hol = False b_uk_hol = False b_us_hol = False b_aus_hol = False b_jp_hol = False for date, name in sorted(holidays.DE(years=YEAR_var).items()): full_hol_list.append(str("GER_" + str(date))) for date, name in sorted(holidays.UK(years=YEAR_var).items()): full_hol_list.append(str("GB_" + str(date))) for date, name in sorted(holidays.US(years=YEAR_var).items()): full_hol_list.append(str("US_" + str(date))) for date, name in sorted(holidays.AU(years=YEAR_var).items()): full_hol_list.append(str("AUS_" + str(date))) for date, name in sorted(holidays.JP(years=YEAR_var).items()): full_hol_list.append(str("JP_" + str(date))) full_hol_list = sorted(full_hol_list) for d in full_hol_list: #print (d) if str(d) == ger_today: b_ger_hol = True if str(d) == gb_today: b_uk_hol = True if str(d) == us_today: b_us_hol = True if str(d) == aus_today: b_aus_hol = True if str(d) == jp_today: b_jp_hol = True for epic_id in main_epic_ids: tmp_lst = [] base_url = REAL_OR_NO_REAL + '/markets/' + epic_id auth_r = requests.get(base_url, headers=authenticated_headers) d = json.loads(auth_r.text) try: i_count = i_count + 1 if epic_id.find('MXN') != -1: #print("!!DEBUG!!...skipping, FOUND MXN in..." + str(epic_id)) time.sleep(1) elif epic_id.find('SEK') != -1: #print("!!DEBUG!!...skipping, FOUND SEK in..." + str(epic_id)) time.sleep(1) elif epic_id.find('NOK') != -1: #print("!!DEBUG!!...skipping, FOUND NOK in..." + str(epic_id)) time.sleep(1) elif epic_id.find('CNH') != -1: #print("!!DEBUG!!...skipping, FOUND CNH in..." + str(epic_id)) time.sleep(1) else: b_TRADE_OK = False while True: ###################EUROPE############################ ###################EUROPE############################ ###################EUROPE############################ tz = pytz.timezone('Europe/Berlin') now_time = datetime.datetime.now(tz=tz).strftime('%H:%M') #print ("!!DEBUG!! Europe/Berlin:" + str(now_time)) if is_between(str(now_time), ("08:00", "16:00")): #print("!!DEBUG!!...FRANKFURT MARKET OPEN!!") time.sleep(1) STR_CHECK = "EUR" if STR_CHECK in epic_id and b_ger_hol == False: b_TRADE_OK = True break ###################LONDON############################ ###################LONDON############################ ###################LONDON############################ tz = pytz.timezone('Europe/London') now_time = datetime.datetime.now(tz=tz).strftime('%H:%M') while True: if is_between(str(now_time), ("22:00", "22:59")): time.sleep(1) # Sleeping for the tally up hour print("!!DEBUG!! Tally Up hour:" + str(now_time)) now_time = datetime.datetime.now( tz=tz).strftime('%H:%M') else: break #print ("!!DEBUG!! Europe/London:" + str(now_time)) if is_between(str(now_time), ("08:00", "16:00")): #print("!!DEBUG!!...LONDON MARKET OPEN!!") time.sleep(1) STR_CHECK = "GBP" if STR_CHECK in epic_id and b_uk_hol == False: b_TRADE_OK = True break ###################NY############################ ###################NY############################ ###################NY############################ tz = pytz.timezone('America/New_York') now_time = datetime.datetime.now(tz=tz).strftime('%H:%M') #print ("!!DEBUG!! America/New_York:" + str(now_time)) if is_between(str(now_time), ("08:00", "16:00")): #print("!!DEBUG!!...NEW YORK MARKET OPEN!!") time.sleep(1) STR_CHECK = "USD" if STR_CHECK in epic_id and b_us_hol == False: b_TRADE_OK = True break ###################AUS############################ ###################AUS############################ ###################AUS############################ tz = pytz.timezone('Australia/Sydney') now_time = datetime.datetime.now(tz=tz).strftime('%H:%M') #print ("!!DEBUG!! Australia/Sydney:" + str(now_time)) if is_between(str(now_time), ("08:00", "16:00")): #print("!!DEBUG!!...SYDNEY MARKET OPEN!!") time.sleep(1) STR_CHECK = "AUD" if STR_CHECK in epic_id and b_aus_hol == False: b_TRADE_OK = True break ###################TOKYO############################ ###################TOKYO############################ ###################TOKYO############################ tz = pytz.timezone('Asia/Tokyo') now_time = datetime.datetime.now(tz=tz).strftime('%H:%M') #print ("!!DEBUG!! Asia/Tokyo:" + str(now_time)) if is_between(str(now_time), ("08:00", "16:00")): #print("!!DEBUG!!...TOKYO MARKET OPEN!!") time.sleep(1) STR_CHECK = "JPY" if STR_CHECK in epic_id and b_jp_hol == False: b_TRADE_OK = True break break if b_TRADE_OK: current_bid = d['snapshot']['bid'] ask_price = d['snapshot']['offer'] spread = float(current_bid) - float(ask_price) if float(spread) >= -1: # tmp_lst.append(epic_id) # spreads_and_epics.append(tmp_lst) pick_from_epics.append(epic_id) # print ("bid : " + str(current_bid)) # print ("ask : " + str(ask_price)) # print ("-------------------------") # print ("spread : " + str(spread)) # print ("-------------------------") print("!!DEBUG!!...FOUND GOOD EPIC..." + str(i_count) + "/" + str(len(main_epic_ids))) time.sleep(1) else: print( "!!DEBUG!!...skipping, NO GOOD EPIC....Checking next epic spreads..." + str(i_count) + "/" + str(len(main_epic_ids))) time.sleep(1) continue except Exception as e: print(e) pass return (pick_from_epics)
#!/usr/bin/python3.6 from datetime import date, datetime import holidays import config import tele_util import lst swagbot = tele_util.startBot(config.swagbot) if date.today() in holidays.DE(years=date.today().year): sql = "select chat_id from props where name='holidays'" rows = tele_util.readSQLL(sql) for r in rows: swagbot.sendMessage(r[0], 'Heute haben wir frei => *' + u'\U0001F389' + holidays.DE(years=2020)[date.today()] + '*' + u'\U0001F389', parse_mode='Markdown') sql = "select chat_id, value from props where name='backlog/reminder'" rows = tele_util.readSQL(sql) doy = datetime.now().timetuple().tm_yday for r in rows: if doy % int(r[1]) == 0: l = lst.getList(r[0], 'backlog') if len(l) > 0: swagbot.sendMessage(r[0], ('Es steht folgendes aus: %s' % l))
poolclass=NullPool, # dont maintain a pool of connections pool_recycle=3600 # handles timeouts better, I think... ) q = """ SELECT distinct(state_id), state FROM locations """ # get all locations with missing data df_todoliste = pd.read_sql(q, aws_engine) # run this script at end of calendar year to get public holidays for next year relevant_year = datetime.today().year + 1 germany_public_holidays = [] for index, row in df_todoliste.iterrows(): # start get public holiday data for state # ------------------------------------------------- for date in holidays.DE(years=np.arange(relevant_year, relevant_year + 5), prov=row['state_id']): germany_public_holidays.append([str(date), row['state_id']]) # end get public holiday data for state # ------------------------------------------------- # upload data to db with aws_engine.connect() as cnx: q = """ REPLACE INTO holidays (dt, state_id) VALUES(%s,%s) """ cnx.execute(q, germany_public_holidays, multi=True)
for date, name in sorted(holidays.ES(years=years).items()): st.write(date,name) if selected_country == 'United States': for date, name in sorted(holidays.US(years=years).items()): st.write(date,name) if selected_country == 'France': for date, name in sorted(holidays.FR(years=years).items()): st.write(date,name) if selected_country == 'Germany': for date, name in sorted(holidays.DE(years=years).items()): st.write(date,name) if selected_country == 'Ukraine': for date, name in sorted(holidays.UKR(years=years).items()): st.write(date,name) else: holidays = False holidays = st.checkbox('Add country holidays to the model') with st.beta_expander('Hyperparameters'): st.write('In this section it is possible to tune the scaling coefficients.')
def create_request(): json_data = {} data_sets = [] BP = BikePrediction() WEATHER_API_KEY = os.getenv('WEATHER_API_KEY') for index, row in locations_df.iterrows(): #print('index', index) url = 'http://data.eco-counter.com/ParcPublic/CounterData' yesterday_day, yesterday_month, yesterday_year = yesterday_date.day, yesterday_date.month, yesterday_date.year today_day, today_month, today_year = today_date.day, today_date.month, today_date.year #start get bike count data #------------------------------------------------ pratiques = "" if hasattr(row, 'pratiques'): pratiques = "&pratiques=" + row.pratiques body = "idOrganisme=4586&idPdc={}&fin={}%2F{}%2F{}&debut={}%2F{}%2F{}&interval=4&pratiques={}".format( row.idPdc, today_day, today_month, today_year, yesterday_day, yesterday_month, yesterday_year, pratiques) headers = { "Accept": "text/plain, */*; q=0.01", "Accept-Encoding": "gzip, deflate", "Accept-Language": "en-US,en;q=0.9", "Connection": "keep-alive", "Content-Length": "115", "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", "Cookie": "i18next=en_US; _ga=GA1.2.1682226698.1584790632; _gid=GA1.2.220973166.1584790632", "Host": "data.eco-counter.com", "Origin": "http://data.eco-counter.com", "Referer": "http://data.eco-counter.com/ParcPublic/?id=4586", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.80 Safari/537.36", "X-Requested-With": "XMLHttpRequest" } bike_count_data = requests.post(url, body, headers=headers) #no data available for location on current day if not bike_count_data.json()[:-1]: continue bike_count_data_entry = bike_count_data.json()[:-1][0] #------------------------------------------------- #end get bike count data #start get weather data #------------------------------------------------- weather_stations = requests.get( 'https://api.meteostat.net/v1/stations/nearby?lat={}&lon={}&limit=20&key={}' .format(row.lat, row.lon, WEATHER_API_KEY)) #loop over next stations if current station has no data for current day for station in weather_stations.json()['data']: #print('station_tried', station) closest_station = station['id'] weather_data = requests.get( 'https://api.meteostat.net/v1/history/daily?station={}&start={}&end={}&key={}' .format(closest_station, str(yesterday_date).split()[0], str(yesterday_date).split()[0], WEATHER_API_KEY)) #exit loop if current station already has data for current day if weather_data.json()['data'] and ( weather_data.json()['data'][-1]['date'] == str(yesterday_date).split()[0]): break weather_data_entry = weather_data.json()['data'][0] #-------------------------------------------------- #end get weather data #start get public holiday data #------------------------------------------------- province_public_holidays = [] geolocator = Nominatim(user_agent="everyonecounts") location = geolocator.reverse(str(row['lat']) + "," + str(row['lon'])) #when city=province, state is not returned if 'state' in location.raw['address']: province = location.raw['address']['state'] else: province = location.raw['address']['city'] province_abb = province_abbs[province] for date in holidays.DE(years=[yesterday_date.year], prov=province_abb): province_public_holidays.append(str(date)) #end get public holiday data #------------------------------------------------- data_set = {} data_set['date'] = str(yesterday_date).split()[0] data_set['bike_count'] = str(bike_count_data_entry[1]) data_set['name'] = row['nom'] data_set['lon'] = row['lon'] data_set['lat'] = row['lat'] data_set['temperature'] = weather_data_entry['temperature'] data_set['precipitation'] = weather_data_entry['precipitation'] data_set['snowdepth'] = weather_data_entry['snowdepth'] data_set['windspeed'] = weather_data_entry['windspeed'] data_set['sunshine'] = weather_data_entry['sunshine'] data_set['is_holiday'] = 1 if str( yesterday_date).split()[0] in province_public_holidays else 0 #start get prediction for normal bike count #------------------------------------------------- prediction = BP.predict_single(station_string=row['nom'], day=yesterday_date, temperature=data_set['temperature'] or 0, precipitation=data_set['precipitation'] or 0, snowdepth=data_set['snowdepth'] or 0, windspeed=data_set['windspeed'] or 0, sunshine=data_set['sunshine'] or 0, is_holiday=data_set['is_holiday'] or 0) #end get prediction for normal bike count #------------------------------------------------- #predict 0 if prediction -ve data_set['prediction'] = max(prediction, 0) data_sets.append(data_set) return data_sets
def main(outdir): rng = RandomState(MT19937(SeedSequence(config.seed))) num_employees = 50000 num_orders = 1000000 num_jobsites = 2800 num_areas = 180 num_qualifications = 214 num_qualigroups = 13 num_shifts = 4 num_days = 2708 start_day = datetime(2013, 8, 1) print("create sliding window of active employees") active_employees = np.zeros((num_employees, num_days)).astype(bool) left = 0 right = 100 upkeep = 400 change = (.95, 1 - .95) for irow, row in enumerate(active_employees): active_employees[irow, left:right] = 1 left = left + rng.choice([0, 1], p=change) right = left + upkeep + rng.choice([0, 1], p=change) print("create base distributions for areas, qualis and shifts") areas = rng.dirichlet(np.ones(num_areas) * .1) jobsites = rng.dirichlet(np.ones(num_jobsites) * .1) area_of_jobsite = np.empty(num_jobsites) for ijobsite, jobsite in enumerate(jobsites): area_of_jobsite[ijobsite] = rng.choice(np.arange(num_areas), p=areas) qualigroups = rng.dirichlet(np.ones(num_qualigroups) * .1) qualis = rng.dirichlet(np.ones(num_qualifications) * .1) qualigroup_of_quali = np.empty(num_qualifications) for iquali, quali in enumerate(qualis): qualigroup_of_quali[iquali] = rng.choice(np.arange(num_qualigroups), p=qualigroups) shifts = rng.dirichlet(np.ones(num_shifts)) orders = [] for _ in tqdm(range(num_orders), desc="create orders"): shift = rng.choice(range(num_shifts), p=shifts) jobsite = rng.choice(range(num_jobsites), p=jobsites) area = area_of_jobsite[jobsite] quali = rng.choice(range(num_qualifications), p=qualis) qualigroup = qualigroup_of_quali[quali] day = rng.randint(0, num_days) orders.append({ "Schicht": shift, "Einsatzort": jobsite, "PLZ": area, "Qualifikation": quali, "Qualifikationgruppe": qualigroup, "Tag": day, }) employee_qualifications = rng.multinomial( 1, qualis, size=(num_employees)).astype(bool) employee_jobsites = rng.multinomial(1, jobsites, size=(num_employees)).astype(bool) orders = pd.DataFrame(orders) offers = [] ps = np.ones(6) / np.arange(1, 7) ps /= ps.sum() for _, order in tqdm(orders.iterrows(), desc="create offers", total=len(orders)): match_active = active_employees[:, int(order.Tag)] match_quali = employee_qualifications[:, int(order.Qualifikation)] match_jobsite = employee_jobsites[:, int(order.Einsatzort)] match, = (match_active & match_quali & match_jobsite).nonzero() offers.append(match[:6].tolist()) if len(offers[-1]) == 0: offers[-1] = rng.choice(match_active.nonzero()[0], np.random.choice(range(1, 7), p=ps)).tolist() berlin_holidays = holidays.DE(prov="BE") orders["Mitarbeiter ID"] = offers print("add day meta data") orders["Tag"] = orders["Tag"].apply(lambda day: start_day + timedelta(day)) orders["Wochentag"] = orders["Tag"].apply(lambda day: day.strftime("%a")) orders["Feiertag"] = orders["Tag"].apply( lambda day: day in berlin_holidays) orders = orders[[ "Einsatzort", "PLZ", "Qualifikation", "Qualifikationgruppe", "Schicht", "Tag", "Wochentag", "Feiertag", "Mitarbeiter ID" ]] orders = orders.sort_values("Tag") train, test = train_test_split(orders) train.to_csv(os.path.join(outdir, "train.tsv"), index=False, sep="\t") test.to_csv(os.path.join(outdir, "test_truth.tsv"), index=False, sep="\t") test[[ "Einsatzort", "PLZ", "Qualifikation", "Qualifikationgruppe", "Schicht", "Tag", "Wochentag", "Feiertag" ]].to_csv(os.path.join(outdir, "test_publish.tsv"), index=False, sep="\t")
def judge_local_holiday(self, df): country = df['geoNetwork_country'] date = df['visitId'].apply(lambda x: x.date()) judge_holiday = \ np.where(country.isin( ['United States','India','Canada','Germany', 'Japan','France','Mexico','Australia', 'Spain','Netherlands','Italy','Ireland', 'Sweden','Argentina','Colombia','Belgium', 'Switzerland','Czechia','Colombia','Belgium', 'New Zealand','South Africa','South Africa']),\ np.where((country=='United States')& (date.isin(holidays.US())),1, np.where((country=='India')& (date.isin(holidays.India())),1, np.where((country=='Canada')& (date.isin(holidays.CA())),1, np.where((country=='Germany')& (date.isin(holidays.DE())),1,\ np.where((country=='Japan')& (date.isin(holidays.JP())),1, np.where((country=='France')& (date.isin(holidays.FRA())),1, np.where((country=='Mexico')& (date.isin(holidays.MX())),1, np.where((country=='Australia')& (date.isin(holidays.AU())),1,\ np.where((country=='Spain')& (date.isin(holidays.ES())),1, np.where((country=='Netherlands')& (date.isin(holidays.NL())),1, np.where((country=='Italy')& (date.isin(holidays.IT())),1, np.where((country=='Ireland')& (date.isin(holidays.IE())),1,\ np.where((country=='Sweden')& (date.isin(holidays.SE())),1, np.where((country=='Argentina')& (date.isin(holidays.AR())),1, np.where((country=='Colombia')& (date.isin(holidays.CO())),1, np.where((country=='Belgium')& (date.isin(holidays.BE())),1,\ np.where((country=='Switzerland')& (date.isin(holidays.CH())),1, np.where((country=='Czechia')& (date.isin(holidays.CZ())),1, np.where((country=='Denmark')& (date.isin(holidays.DK())),1, np.where((country=='Austria')& (date.isin(holidays.AT())),1,\ np.where((country=='Hungary')& (date.isin(holidays.HU())),1, np.where((country=='Portugal')& (date.isin(holidays.PT())),1, np.where((country=='Norway')& (date.isin(holidays.NO())),1, np.where((country=='Portugal')& (date.isin(holidays.PT())),1,\ np.where((country=='New Zealand')& (date.isin(holidays.NZ())),1, np.where((country=='South Africa')& (date.isin(holidays.ZA())),1, np.where((country=='South Africa')& (date.isin(holidays.ZA())),1,\ 0))))))))))))))))))))))))))),np.nan).astype(int) return judge_holiday
def create_season_pickle(pickle_dir=Path('pickles')): file_paths = get_file_paths(pickle_dir) print(file_paths) for path in file_paths: print(path) station_name = path df_mean_season = pd.Series() df_mean_pickle = pd.read_pickle(pickle_dir / (str(path) + 'aggregation')) print('len mean_pickle: ' + str(len(df_mean_pickle))) # df_mean_pickle = df_mean_pickle.iloc[:100800] print(df_mean_pickle) column_name = 'windowed_means' # df_mean_pickle = lf.generators.add_daytypes(df_mean_pickle) # df_mean_pickle = lf.generators.add_holidays(df_mean_pickle, 'NW') holidays_nrw = list(holidays.DE(years=2017, state='NW').keys()) # df_mean_pickle_restday = df_mean_pickle[ # ((df_mean_pickle.is_saturday == 1) | (df_mean_pickle.is_sunday == 1) | (df_mean_pickle.is_holiday == True))] # df_mean_pickle_workday = df_mean_pickle[ # True ^ ((df_mean_pickle.is_saturday == 1) | (df_mean_pickle.is_sunday == 1) | ( # df_mean_pickle.is_holiday == True))] print(holidays_nrw) # test = df_mean_pickle[df_mean_pickle.index.isin(holidays_nrw)] # print(test) df_mean_pickle_restday = df_mean_pickle[( (df_mean_pickle.index.dayofweek >= 5) | (df_mean_pickle.index).isin(holidays_nrw))] df_mean_pickle_workday = df_mean_pickle[True ^ ( (df_mean_pickle.index.dayofweek >= 5) | (df_mean_pickle.index).isin(holidays_nrw))] print('Split_dataframe') for i, df_mean_pickle_typeday in enumerate( [df_mean_pickle_restday, df_mean_pickle_workday]): df_mean_pickle_typeday = df_mean_pickle_typeday[[station_name ]].dropna() v1s = [] min_date = df_mean_pickle_typeday.index.min() max_date = df_mean_pickle_typeday.index.max() three_w_timedelta = pd.Timedelta('3w') old_window_min_date = min_date.date() old_window_max_date = max_date.date() print(min_date) for index, row in df_mean_pickle_typeday.iterrows(): window_min_date = max(min_date, index - three_w_timedelta) window_max_date = min(max_date, index + three_w_timedelta) window_slice = df_mean_pickle_typeday.loc[ window_min_date:window_max_date] window_slice = window_slice.loc[window_slice.index.time == index.time()] v1 = window_slice[station_name].mean() if old_window_min_date != window_min_date.date( ) or old_window_max_date != window_max_date.date(): print(str(window_min_date) + ' -> ' + str(window_max_date)) old_window_min_date = window_min_date.date() old_window_max_date = window_max_date.date() print(window_slice) print(v1) v1s.append(v1) df_mean_pickle_typeday[column_name] = v1s print('len v1s: ' + str(len(v1s))) print(df_mean_pickle_typeday[[column_name]]) print(df_mean_season) df_mean_season = pd.concat( [df_mean_season, df_mean_pickle_typeday[column_name]], sort=True) print('len mean_season: ' + str(df_mean_season.size)) print(df_mean_season) df_mean_season.to_pickle(pickle_dir / (str(path) + 'season_aggregation'))
def find_good_epics(): spreads_and_epics = [] i_count = 0 pick_from_epics = [] full_hol_list = [] ################################################################### tz = pytz.timezone('Europe/Berlin') todays_date = str(datetime.datetime.now(tz=tz).strftime('%Y-%m-%d')) ger_today = str(str("GER_" + str(todays_date))) print("Europe/Berlin :- Today's Date is ..." + str(todays_date)) ################################################################### tz = pytz.timezone('Europe/London') todays_date = str(datetime.datetime.now(tz=tz).strftime('%Y-%m-%d')) gb_today = str(str("GB_" + str(todays_date))) print("Europe/London :- Today's Date is ..." + str(todays_date)) ################################################################### tz = pytz.timezone('America/New_York') todays_date = str(datetime.datetime.now(tz=tz).strftime('%Y-%m-%d')) us_today = str(str("US_" + str(todays_date))) print("America/New_York :- Today's Date is ..." + str(todays_date)) ################################################################### tz = pytz.timezone('Australia/Sydney') todays_date = str(datetime.datetime.now(tz=tz).strftime('%Y-%m-%d')) aus_today = str(str("AUS_" + str(todays_date))) print("Australia/Sydney :- Today's Date is ..." + str(todays_date)) ################################################################### tz = pytz.timezone('Asia/Tokyo') todays_date = str(datetime.datetime.now(tz=tz).strftime('%Y-%m-%d')) jp_today = str(str("JP_" + str(todays_date))) print("Asia/Tokyo :- Today's Date is ..." + str(todays_date)) ################################################################### b_ger_hol = False b_uk_hol = False b_us_hol = False b_aus_hol = False b_jp_hol = False for date, name in sorted(holidays.DE(years=YEAR_var).items()): full_hol_list.append(str("GER_" + str(date))) for date, name in sorted(holidays.UK(years=YEAR_var).items()): full_hol_list.append(str("GB_" + str(date))) for date, name in sorted(holidays.US(years=YEAR_var).items()): full_hol_list.append(str("US_" + str(date))) for date, name in sorted(holidays.AU(years=YEAR_var).items()): full_hol_list.append(str("AUS_" + str(date))) for date, name in sorted(holidays.JP(years=YEAR_var).items()): full_hol_list.append(str("JP_" + str(date))) full_hol_list = sorted(full_hol_list) for d in full_hol_list: #print (d) if str(d) == ger_today: b_ger_hol = True if str(d) == gb_today: b_uk_hol = True if str(d) == us_today: b_us_hol = True if str(d) == aus_today: b_aus_hol = True if str(d) == jp_today: b_jp_hol = True for epic_id in main_epic_ids: tmp_lst = [] base_url = REAL_OR_NO_REAL + '/markets/' + epic_id auth_r = requests.get(base_url, headers=authenticated_headers) d = json.loads(auth_r.text) try: i_count = i_count + 1 if epic_id.find('MXN') != -1: #print("!!DEBUG!!...skipping, FOUND MXN in..." + str(epic_id)) time.sleep(1) elif epic_id.find('SEK') != -1: #print("!!DEBUG!!...skipping, FOUND SEK in..." + str(epic_id)) time.sleep(1) elif epic_id.find('NOK') != -1: #print("!!DEBUG!!...skipping, FOUND NOK in..." + str(epic_id)) time.sleep(1) elif epic_id.find('CNH') != -1: #print("!!DEBUG!!...skipping, FOUND CNH in..." + str(epic_id)) time.sleep(1) else: b_TRADE_OK = True if b_TRADE_OK: current_bid = d['snapshot']['bid'] ask_price = d['snapshot']['offer'] spread = float(current_bid) - float(ask_price) if float(spread) >= -1.51: # tmp_lst.append(epic_id) # spreads_and_epics.append(tmp_lst) pick_from_epics.append(epic_id) # print ("bid : " + str(current_bid)) # print ("ask : " + str(ask_price)) # print ("-------------------------") # print ("spread : " + str(spread)) # print ("-------------------------") print( "!!DEBUG!!...FOUND GOOD EPIC {} spread {}...{}/{}". format(epic_id, spread, i_count, len(main_epic_ids))) time.sleep(1) else: print( "!!DEBUG!!...skipping, NO GOOD EPIC {} spread {} ....Checking next epic spreads...{}/{}" .format(epic_id, spread, i_count, len(main_epic_ids))) time.sleep(1) continue else: print( "!!DEBUG!!...skipping, NOT CURRENTLY TRADEABLE EPIC {} ....Checking next epic spreads...{}/{}" .format(epic_id, i_count, len(main_epic_ids))) except Exception as e: print(e) pass return (pick_from_epics)
def generate_timesheet_data(year, month, fdom, ldom, hours): """ By Patrick Faion <https://github.com/pfaion/timesheet_generator> """ days_of_week = [0, 1, 2, 3, 4] start_hour = 8 end_hour = 18 max_hours = 6 state = 'NI' # get public holidays and length of the month public_holidays = holidays.DE(state=state, years=year) days_in_month = calendar.monthrange(year, month)[1] # check which days are valid, i.e. are specified workdays and not holidays valid_days = [] for day in range(fdom, min(days_in_month, ldom) + 1): date = datetime.date(year, month, day) if date not in public_holidays and date.weekday() in days_of_week: valid_days.append(day) # Distribute hours over valid days. Use exponential weights (after random shuffle) for days, # so some days are used often and some are used rarely. possible_days = valid_days random.shuffle(possible_days) weights = list(1 / np.arange(1, len(possible_days) + 1)) # collector for sampled distribution # day => (start, end) collector = dict() # possible chunks over the day are from start to end in steps of half-hours chunk_starts = np.arange(start_hour, end_hour, 0.5) # distribute all hours h = hours while h > 0: if len(possible_days) == 0: raise TimesheetCreationError( "Too many hours for specified range of month") # select day day, weight = weighted_choice(zip(possible_days, weights)) # if day is already listed, extend working hours there either before or after if day in collector: start, end = collector[day] possible_extensions = [] if start > start_hour: possible_extensions.append('before') if end < (end_hour - 0.5): possible_extensions.append('after') extension = random.choice(possible_extensions) if extension == 'before': start -= 0.5 if extension == 'after': end += 0.5 collector[day] = (start, end) if end - start == max_hours: possible_days.remove(day) weights.remove(weight) # if day not yet listed, select random starting chunk else: start = random.choice(chunk_starts) end = start + 0.5 collector[day] = (start, end) # half and hour was distributed off h -= 0.5 data = [] for day in range(1, days_in_month + 1): if day in collector: date = datetime.date(year, month, day) s, e = collector[day] s_h = int(s) s_m = int((s % 1) * 60) e_h = int(e) e_m = int((e % 1) * 60) start = datetime.datetime.combine(date, datetime.time(s_h, s_m)) end = datetime.datetime.combine(date, datetime.time(e_h, e_m)) duration = end - start data.append({ 'day': "{}.".format(day), 'start': start.strftime("%H:%M"), 'end': end.strftime("%H:%M"), 'duration': format_timedelta(duration), 'date': date.strftime("%d.%m.") }) else: data.append({ 'day': "{}.".format(day), 'start': "", 'end': "", 'duration': "", 'date': "" }) # additional format strings header_date = "{:0>2d}/{}".format(month, year) total_hours_formatted = format_timedelta(datetime.timedelta(hours=hours)) return data, header_date, total_hours_formatted