Ejemplo n.º 1
0
    def fit(self, X: dt.Frame, y: np.array = None):
        """Fit is used to keep the memory of Holidays"""
        # For holidays we only need the date
        X = X[:, self.time_column].to_pandas()
        # Transform to pandas date time
        X[self.time_column] = pd.to_datetime(X[self.time_column])
        # Compute min and max year to decide the number of years in adavnce we keep
        mn_year = X[self.time_column].dt.year.min()
        mx_year = X[self.time_column].dt.year.max()
        if np.isnan(mn_year) or np.isnan(mx_year):
            years = []
        else:
            # Start at min year and end at 2*max_year - min_year + 1
            # If min year is 2016, max year 2018
            # then we keep dates until 2021
            # As a reminder np.arange(1, 3) returns [1, 2]
            years = np.arange(int(mn_year),
                              int(mx_year + mx_year - mn_year + 2))

        # Germany general and province holidays
        self.memos = {}

        # General first
        ge_holidays = holidays.DE()
        for year in list(years):
            ge_holidays._populate(year)
        ge_holidays.observed = False
        hdays = [date for date, name in sorted(ge_holidays.items())]
        holidays_df = pd.DataFrame(hdays,
                                   columns=[self.time_column],
                                   dtype='datetime64[ns]')
        holidays_df['year'] = holidays_df[self.time_column].dt.year
        holidays_df['doy'] = holidays_df[self.time_column].dt.dayofyear
        holidays_df.sort_values(by=['year', 'doy']).drop_duplicates(
            subset=['year'], keep='first').reset_index(drop=True)
        holidays_df.drop(self.time_column, axis=1, inplace=True)
        self.memos['country'] = holidays_df

        # Now do province in the same manner
        for prov in [
                'BW', 'BY', 'BE', 'BB', 'HB', 'HH', 'HE', 'MV', 'NI', 'NW',
                'RP', 'SL', 'SN', 'ST', 'SH', 'TH'
        ]:
            ge_holidays = holidays.DE(prov=prov)
            for year in list(years):
                ge_holidays._populate(year)
            ge_holidays.observed = False
            hdays = [date for date, name in sorted(ge_holidays.items())]
            holidays_df = pd.DataFrame(hdays,
                                       columns=[self.time_column],
                                       dtype='datetime64[ns]')
            holidays_df['year'] = holidays_df[self.time_column].dt.year
            holidays_df['doy'] = holidays_df[self.time_column].dt.dayofyear
            holidays_df.sort_values(by=['year', 'doy']).drop_duplicates(
                subset=['year'], keep='first').reset_index(drop=True)
            holidays_df.drop(self.time_column, axis=1, inplace=True)
            self.memos[prov] = holidays_df
Ejemplo n.º 2
0
 def transform(self, X: dt.Frame):
     X = X[:, self.time_column]
     X = X.to_pandas()
     ge_holidays = holidays.DE()
     X["is_ge_holiday"] = X[self.time_column].apply(
         lambda x: x in ge_holidays)
     for prov in [
             "BW", 'BY', 'BE', 'BB', 'HB', 'HH', 'HE', 'MV', 'NI', 'NW',
             'RP', 'SL', 'SN', 'ST', 'SH', 'TH'
     ]:
         ge_prov_holidays = holidays.DE(state=prov)
         X["is_ge_holiday_%s" % prov] = X[self.time_column].apply(
             lambda x: x in ge_prov_holidays)
     X.drop(self.time_column, axis=1, inplace=True)
     return X
Ejemplo n.º 3
0
def ist_jetzt_handelszeit():
    feiertag = datetime.now().strftime('%Y-%m-%d') in holidays.DE()
    aktuelle_stunde = int(time.strftime("%H"))
    aktueller_wochentag = int(time.strftime("%w"))
    richtige_zeit = ((aktuelle_stunde >= 9) and (aktuelle_stunde < 17))
    richtiger_tag = ((aktueller_wochentag >= 1) and (aktueller_wochentag <= 5))
    return (not (feiertag) and richtiger_tag and richtige_zeit)
def get_holidays():
    hdays = []
    for key, value in states.items():
        for date, name in sorted(
                holidays.DE(prov=key, years=range(2013, 2019)).items()):
            h = Holiday(date, value, name)
            h.to_string()
            hdays.append(Holiday(date, value, name))

    return hdays
Ejemplo n.º 5
0
def main(outdir):
    rng = RandomState(MT19937(SeedSequence(config.seed)))

    berlin_holidays = holidays.DE(prov="BW")

    num_employees = 20000
    num_jobsites = 200
    num_areas = 20
    num_qualifications = 40
    num_shifts = 3
    num_days = 356

    num_orders = 1000
    df = pd.DataFrame.from_dict({
        "Einsatzort":
        rng.randint(0, num_jobsites, num_orders),
        "Qualifikation":
        rng.randint(0, num_qualifications, num_orders),
        "Schicht":
        rng.randint(0, num_shifts, num_orders),
        "Tag":
        rng.randint(0, num_days, num_orders),
    })

    df["Tag"] = df["Tag"].apply(
        lambda day: datetime(2019, 1, 1) + timedelta(day))
    df["Wochentag"] = df["Tag"].apply(lambda day: day.strftime("%a"))
    df["Feiertag"] = df["Tag"].apply(lambda day: day in berlin_holidays)

    # grouping of jobsites into areas
    area_splits = np.cumsum(rng.randint(1, 10, num_areas))
    area_splits = (area_splits.T / area_splits.max() *
                   num_jobsites).astype(int)
    df["Ort"] = df["Einsatzort"].apply(
        lambda jobsite_id: np.argmax(area_splits > jobsite_id))

    offers = []
    for _ in range(len(df)):
        offers.append(
            rng.choice(range(num_employees),
                       replace=False,
                       size=rng.randint(1, 6)).tolist())

    df["Mitarbeiter ID"] = offers

    train, test = train_test_split(df)

    train.to_csv(os.path.join(outdir, "train.tsv"), index=False, sep="\t")
    test.to_csv(os.path.join(outdir, "test_truth.tsv"), index=False, sep="\t")
    test[[
        "Einsatzort", "Qualifikation", "Schicht", "Tag", "Wochentag",
        "Feiertag", "Ort"
    ]].to_csv(os.path.join(outdir, "test_publish.tsv"), index=False, sep="\t")
Ejemplo n.º 6
0
def get_working_days(start_date, end_date):
    days = [
        start_date + timedelta(days=1) * i
        for i in range((end_date - start_date).days + 1)
    ]

    working_days = []
    for day in days:
        if day in holidays.DE(prov='BE') or day.weekday() in [5, 6]:
            continue
        working_days.append(day)
    return working_days
Ejemplo n.º 7
0
def pre_reboot_state(con, consul_lock, hostname, flags):
    today = datetime.date.today()
    if flags.get("check_holidays") and today in holidays.DE():
        LOG.info("Refuse to run on holiday")
        sys.exit(EXIT_HOLIDAY)

    if check_stop_flag(con) and not flags.get("ignore_global_stop_flag"):
        LOG.info("Global stop flag is set: exit")
        sys.exit(EXIT_GLOBAL_STOP_FLAG_SET)

    if is_node_disabled(con, hostname) and not flags.get("ignore_node_disabled"):
        LOG.info("Rebootmgr is disabled in consul config for this node. Exit")
        sys.exit(EXIT_NODE_DISABLED)

    if flags.get("check_triggers") and not is_reboot_required(con, hostname):
        sys.exit(0)

    LOG.info("Entering pre reboot state")

    check_consul_services(con, hostname, flags.get("ignore_failed_checks"), ["rebootmgr", "rebootmgr_preboot"])

    LOG.info("Executing pre reboot tasks")
    run_tasks("pre_boot", con, hostname, flags.get("dryrun"))

    if not flags.get("lazy_consul_checks"):
        LOG.info("Sleep for 2 minutes. Waiting for consul checks.")
        time.sleep((60 * 2) + 10)

    check_consul_cluster(con, flags.get("ignore_failed_checks"))
    check_consul_services(con, hostname, flags.get("ignore_failed_checks"), ["rebootmgr", "rebootmgr_preboot"])

    if not consul_lock.acquired:
        LOG.error("Lost consul lock. Exit")
        sys.exit(EXIT_CONSUL_LOST_LOCK)

    if check_stop_flag(con) and not flags.get("ignore_global_stop_flag"):
        LOG.info("Global stop flag is set: exit")
        sys.exit(EXIT_GLOBAL_STOP_FLAG_SET)

    # check again if reboot is still required
    if flags.get("check_triggers") and not is_reboot_required(con, hostname):
        sys.exit(0)

    if not flags.get("dryrun"):
        LOG.debug("Write %s in key service/rebootmgr/reboot_in_progress" % hostname)
        con.kv.put("service/rebootmgr/reboot_in_progress", hostname)
    else:
        LOG.debug("Would write %s in key service/rebootmgr/reboot_in_progress" % hostname)

    consul_lock.release()
Ejemplo n.º 8
0
    def broadcast(self, message, bot, tastatur=None, author=None):
        today = date.today()
        holidays_nrw = holidays.DE(years=[2019, 2020, 2021], prov='NW')

        if today not in holidays_nrw:
            for sub in Subscribers.select():
                if sub.chat_id == author:
                    continue
                try:
                    bot.send_message(sub.chat_id,
                                     message,
                                     reply_markup=tastatur)
                except telegram.TelegramError as ex:
                    log.warning(ex)
Ejemplo n.º 9
0
def add_independent_features(df: pd.DataFrame) -> pd.DataFrame:
    """Add returnQuantity independent features to DataFrame.
    Calls methods that each add a feature in form of a column to the data.

    Parameters
    ----------
    df : pandas.DataFrame
        Cleaned table training data

    Returns
    -------
    pd.DataFrame
        Feature-enriched table
    """
    df['productPrice'] = df.price / df.quantity
    df['totalSavings'] = df.rrp - df.productPrice
    df['relativeSavings'] = (1 - df.productPrice / df.rrp).fillna(1.)
    df['orderYear'] = df.orderDate.apply(lambda x: x.year)
    df['orderMonth'] = df.orderDate.apply(lambda x: x.month)
    df['orderDay'] = df.orderDate.apply(lambda x: x.day)
    df['orderWeekDay'] = df.orderDate.apply(lambda x: x.dayofweek)
    df['orderDayOfYear'] = df.orderDate.apply(lambda x: x.dayofyear)
    df['orderWeek'] = df.orderDate.apply(lambda x: x.week)
    df['orderWeekOfYear'] = df.orderDate.apply(lambda x: x.weekofyear)
    df['orderQuarter'] = df.orderDate.apply(lambda x: x.quarter)
    df['orderTotalDay'] = df.orderDate.apply(total_day)
    df['orderSeason'] = df.orderDate.apply(date_to_season)
    df['orderIsOnGermanHoliday'] = df.orderDate.apply(
        lambda x: 1 if x in holidays.DE() else 0)
    df['surplusArticleQuantity'] = same_article_surplus(df)
    df['surplusArticleSizeQuantity'] = same_article_same_size_surplus(df)
    df['surplusArticleColorQuantity'] = same_article_same_color_surplus(df)
    df['totalOrderShare'] = total_order_share(df)
    df['voucherSavings'] = voucher_saving(df)
    # df['voucherFirstUsedDate'] = pd.to_datetime(df.t_voucher_firstUsedDate_A).apply(total_day)
    # df['voucherLastUsedDate'] = pd.to_datetime(df.t_voucher_lastUsedDate_A).apply(total_day)
    df['products3DayNeighborhood'] = orders_in_neighborhood(df, 3)
    df['products7DayNeighborhood'] = orders_in_neighborhood(df, 7)
    df['products14DayNeighborhood'] = orders_in_neighborhood(df, 14)
    df['products30DayNeighborhood'] = orders_in_neighborhood(df, 30)
    df['previousOrders'] = previous_orders(df)
    df['t_posInOrder'] = df.groupby(
        'orderID',
        as_index=False).apply(pos_in_grouping).reset_index(level=0, drop=True)
    df['t_posInDay'] = df.groupby(
        'orderDate',
        as_index=False).apply(pos_in_grouping).reset_index(level=0, drop=True)
    return df
Ejemplo n.º 10
0
def load(country, region, observed, expand, years):
    # Erases existing holiday cache and makes a new one...
    global dates

    if country == "US":
        dates = holidays.US(state=region,
                            observed=observed,
                            expand=expand,
                            years=years)
    elif country == "CA":
        dates = holidays.CA(prov=region,
                            observed=observed,
                            expand=expand,
                            years=years)
    elif country == "MX":
        dates = holidays.MX(observed=observed, expand=expand, years=years)
    elif country == "NZ":
        dates = holidays.NZ(prov=region,
                            observed=observed,
                            expand=expand,
                            years=years)
    elif country == "AU":
        dates = holidays.AU(prov=region,
                            observed=observed,
                            expand=expand,
                            years=years)
    elif country == "AT":
        dates = holidays.AT(prov=region,
                            observed=observed,
                            expand=expand,
                            years=years)
    elif country == "DE":
        dates = holidays.DE(prov=region,
                            observed=observed,
                            expand=expand,
                            years=years)
    else:
        print "UNKNOWN COUNTRY ", country
Ejemplo n.º 11
0
def cal_day( context, year, week, day ):
    context['day'] = day
    currdate = iso_to_gregorian(year, week, day)
    context['currdate'] = currdate
    context['is_past'] = currdate < datetime.date.today() 
    context['is_holiday'] = currdate in holidays.DE(prov = 'NW')


    first = datetime.datetime(
        currdate.year, currdate.month, 
        currdate.day, hour = 8, 
    )
    last = datetime.datetime(
        currdate.year, currdate.month, 
        currdate.day, hour = 17, 
    )
    unavailable = []
    hours = rrule(HOURLY, dtstart = first, until = last)
    for h in hours:
        if h in context['bookings']:
            unavailable.append(h.hour)
    context['unavailable'] = unavailable
    context['hours'] = range(8,18)
    return context
Ejemplo n.º 12
0
def parse_prov(in_):
    if isinstance(in_, str) and "-" in in_:
        return holidays.DE(prov=in_.split("-")[1])
    return holidays.DE()
    def ts_fit(self, suppress=False):
        """Fit Prophet to the time series data.

         Parameters:
         ----------
         suppress: bool
            Suppress or not some of the output messages
         """

        if self.hyper_params is not None:
            self._gs.set_forecaster(self)
            self._gs.set_hyper_params(self.hyper_params)
            # a very important command here to avoid endless loop
            self.hyper_params = None
            self._prophet_logger.info("***** Starting grid search *****")
            self._gs = self._gs.grid_search(suppress=suppress, show_plot=False)
            #
            self.best_model = self._gs.best_model
            self.__dict__.update(self.best_model['forecaster'].__dict__)
            self._prophet_logger.info("***** Finished grid search *****")
        else:
            self._prepare_fit()
            self._model = None
            self.ts_split()

            ts_df = self._train_dt.copy()
            ts_test_df = self._test_dt
            # sanity check
            if 'on_weekend' in ts_df.columns:
                ts_df.drop(['on_weekend', 'off_weekend'], inplace=True, axis=1)
                # ts_test_df.drop(['on_weekend', 'off_weekend'], inplace=True, axis=1)
            # Fit
            self._prophet_logger.info("Trying to fit the Prophet model....")
            try:
                if not suppress:
                    self._prophet_logger.info("...via using parameters\n")
                    print_attributes(self)
                # diagnose on?
                if self._diagnose:
                    try:
                        assert self._step is not None and self._horizon is not None
                    except (KeyError, AssertionError):
                        self._prophet_logger.warning("You want to diagnose the Prophet model. Please provide parameters "
                                                     "'step' and 'horizon' within object initialization!")
                        sys.exit("STOP")

                ts_df = ts_df.reset_index()
                ts_df.columns = self._ts_df_cols
                if ts_test_df is not None and not ts_test_df.empty:
                    ts_test_df = ts_test_df.reset_index()
                    ts_test_df.columns = self._ts_df_cols
                #
                weekly_s = self._weekly_seasonality
                if self._weekend_seasonality:
                    # force to False
                    weekly_s = False
                #
                if not self._consider_holidays:
                    self._model = Prophet(interval_width=self._prophet_interval_width,
                                          yearly_seasonality=self._yearly_seasonality,
                                          weekly_seasonality=weekly_s,
                                          daily_seasonality=self._daily_seasonality,
                                          changepoint_range=self._changepoint_range,
                                          changepoint_prior_scale=self._changepoint_prior_scale)
                else:
                    try:
                        assert self._country in ['AT', 'DE', 'US']
                    except AssertionError:
                        self._prophet_logger.exception("Assrtion exception occurred. Right now, Austria (AT), "
                                                       "Germany(DE) and USA (US) supported.")
                        sys.exit("STOP")
                    else:
                        holi = None
                        if self._country == 'AT':
                            holi = holidays.AT(state=None, years=list(np.unique(np.asarray(self.ts_df.index.year))))
                        elif self._country == 'DE':
                            holi = holidays.DE(state=None, years=list(np.unique(np.asarray(self.ts_df.index.year))))
                        elif self._country == 'US':
                            holi = holidays.US(state=None, years=list(np.unique(np.asarray(self.ts_df.index.year))))
                        #
                        holi_dict = dict()
                        for date, name in sorted(holi.items()):
                            holi_dict[date] = name

                        df_holi = pd.DataFrame.from_dict(data=holi_dict, orient='index').reset_index()
                        df_holi.columns = ['ds', 'holiday']
                        df_holi['lower_window'] = 0
                        df_holi['upper_window'] = 0
                        self._model = Prophet(interval_width=self._prophet_interval_width,
                                              yearly_seasonality=self._yearly_seasonality,
                                              weekly_seasonality=weekly_s,
                                              daily_seasonality=self._daily_seasonality,
                                              changepoint_range=self._changepoint_range,
                                              changepoint_prior_scale=self._changepoint_prior_scale,
                                              holidays=df_holi)

                if self._monthly_seasonality:
                    self._model.add_seasonality(name='monthly', period=30.5, fourier_order=20)
                    if not suppress:
                        self._prophet_logger.info("Added monthly seasonality.")

                if self._quarterly_seasonality:
                    self._model.add_seasonality(name='quarterly', period=91.5, fourier_order=20)
                    if not suppress:
                        self._prophet_logger.info("Added quarterly seasonality.")

                if self._weekend_seasonality:
                    ts_df['on_weekend'] = ts_df['ds'].apply(self.we_season)
                    ts_df['off_weekend'] = ~ts_df['ds'].apply(self.we_season)
                    self._train_dt = ts_df.copy()
                    self._train_dt.set_index('ds', inplace=True)
                    #
                    if ts_test_df is not None and not ts_test_df.empty:
                        ts_test_df['on_weekend'] = ts_test_df['ds'].apply(self.we_season)
                        ts_test_df['off_weekend'] = ~ts_test_df['ds'].apply(self.we_season)
                        self._test_dt = ts_test_df.copy()
                        self._test_dt.set_index('ds', inplace=True)
                    # and add
                    self._model.add_seasonality(name='weekend_on_season', period=7,
                                                fourier_order=5, condition_name='on_weekend')
                    self._model.add_seasonality(name='weekend_off_season', period=7,
                                                fourier_order=5, condition_name='off_weekend')

                    if not suppress:
                        self._prophet_logger.info("Added week-end seasonality.")

                # tic
                start = time()
                self.model_fit = self._model.fit(ts_df)
                # toc
                if not suppress:
                    self._prophet_logger.info("Time elapsed: {} sec.".format(time() - start))
            except (Exception, ValueError):
                self._prophet_logger.exception("Prophet error...")
                return -1
            else:
                self._prophet_logger.info("Model successfully fitted to the data!")

                # Fitted values
                self._prophet_logger.info("Computing fitted values and residuals...")
                # in-sample predict
                try:
                    self.fittedvalues = self._model.predict(ts_df.drop('y', axis=1))
                except (Exception, ValueError):
                    self._prophet_logger.exception("Prophet predict error...")
                # Residuals
                try:
                    # use fittedvalues to fill in the model dictionary
                    self.residuals = pd.Series(np.asarray(ts_df.y) - np.asarray(self.fittedvalues['yhat']),
                                               index=self._train_dt.index)
                except (KeyError, AttributeError):
                    self._prophet_logger.exception("Model was not fitted or ts has other structure...")
                #
                self.lower_conf_int = pd.Series(np.asarray(self.fittedvalues['yhat_lower']), index=self._train_dt.index)
                self.upper_conf_int = pd.Series(np.asarray(self.fittedvalues['yhat_upper']), index=self._train_dt.index)

                self._prophet_logger.info("Done.")
            return self
Ejemplo n.º 14
0
def isHoliday(myYear, myMonth, myDay):
    bw_holidays = holidays.DE(prov='BW')
    bw_holidays.append({"2017-10-31": "Reformationstag"})

    return datetime.date(myYear, myMonth, myDay) in bw_holidays
Ejemplo n.º 15
0
        pass

    settings["country_last_updated"] = now
    settings["country_last"] = country_last
    settings.flush()
    return country_last


country_holidays = {
    "CA": holidays.CA(),
    "CO": holidays.CO(),
    "MX": holidays.MX(),
    "US": holidays.US(),
    "NZ": holidays.NZ(),
    "AU": holidays.AU(),
    "DE": holidays.DE(),
    "AT": holidays.AT(),
    "DK": holidays.DK(),
    "UK": holidays.UK(),
    "IE": holidays.IE(),
    "ES": holidays.ES(),
    "CZ": holidays.CZ(),
    "SK": holidays.SK(),
    "PL": holidays.PL(),
    "PT": holidays.PT(),
    "NL": holidays.NL(),
    "NO": holidays.NO(),
    "IT": holidays.IT(),
    "SE": holidays.SE(),
    "JP": holidays.JP(),
    "BE": holidays.BE(),
Ejemplo n.º 16
0
def cleanData(df, settype):
    """Cleans data. Set settype to 'train' for training set and 'test' for test set"""
    # eliminate rows where store is empty
    df = df[~df.loc[:, 'Store'].isnull()]
    df.reset_index(inplace=True)

    print("Dropped rows without store-ids")

    #    Join Function Needed here!

    # extract year, month and day from Date
    date = pd.DatetimeIndex(df.loc[:, 'Date'])
    df.loc[:, 'Year'] = date.year
    df.loc[:, 'Month'] = date.month
    df.loc[:, 'Day'] = date.day


    print("Extracted year, month and day from Date")

    #   extract day of week
    df.loc[:, 'DayOfWeek'] = date.dayofweek + 1

    print("Extracted and reset day of week")

    for i in range(len(df)):
        if (np.isnan(df['Sales'][i])) & (df['Customers'][i] == 0):
            df['Sales'][i] = 0
        else:
            pass
    print('Set Sales to 0 if customers are 0')

    #   deleting 0 sales rows
    df = df[df['Sales'] != 0]
    df.reset_index(inplace=True)
    print('Dropped 0-sales rows in df')

    #   Sets Open to 1 if Sales happened while Open is 0

    for i in range(len(df)):
        if (df['Sales'][i] > 0) & (np.isnan(df['Open'][i])):
            df['Open'][i] = 1
        else:
            pass

    print('Set Open = 1 if Sales > 0')

    #   function to fill school holiday based on state holiday

    def helper_schoolholiday(row):
        if pd.isnull(row['SchoolHoliday']):
            return 0.0
        else:
            return row['SchoolHoliday']

    df['SchoolHoliday'] = df.apply(helper_schoolholiday, axis=1)
    print("Filled school holidays based on state holidays")

    #   Taking care of shops in train stations
    # def applymask(df):
    #     mask = df.loc[:,'DayOfWeek'] == 7.0
    #     train2 = df[mask]
    #     train3 = train2.groupby('Store')['Open'].sum().to_frame().rename(columns={'Open': 'newopen'})
    #     train_station_stores = [i for i in train3[train3.newopen > 3].index]
    #     return train_station_stores
    # train_station_stores = applymask(df)
    #
    # def train_station_stores_nan_open(row):
    #     if (pd.isnull(row['Open'])) & (row['Store'] in train_station_stores):
    #         return 1.0
    #     else:
    #         return row['Open']
    #
    # df['Open'] = df.apply(train_station_stores_nan_open, axis=1)
    #
    # print("Train station store always open")

    #   Sets all Shops with isna('Open') to 0 on a German public holiday
    #   de_holidays = holidays.DE()

    for i in range(len(df)):
        if (np.isnan(df['Open'][i])) & (df['Date'][i] in holidays.DE()):
            df['Open'][i] = 0
        else:
            pass
    print('Public Holidays updated')

    #   take care of regional stateholiday
    for i in range(len(df)):
        if (pd.isnull(df['StateHoliday'][i])) & (df['Month'][i] == 1) & (df['Day'][i] == 6):
            if df['Year'][i] == 2013:
                storename = df['Store'][i]
                row1 = df[df.Store == storename]
                row2 = row1[row1.Date == '2014-01-06']
                try:
                    df['StateHoliday'][i] = row2['StateHoliday'].values[0]
                except:
                    pass
            else:
                storename = df['Store'][i]
                row1 = df[df.Store == storename]
                row2 = row1[row1.Date == '2013-01-06']
                try:
                    train['StateHoliday'][i] = row2['StateHoliday'].values[0]
                except:
                    pass
        elif (pd.isnull(df['StateHoliday'][i])) & (df['Month'][i] == 6) & (df['Day'][i] == 1):
            if df['Year'][i] == 2013:
                storename = df['Store'][i]
                row1 = df[df.Store == storename]
                row2 = row1[row1.Date == '2014-06-01']
                try:
                    df['StateHoliday'][i] = row2['StateHoliday'].values[0]
                except:
                    pass
            else:
                storename = df['Store'][i]
                row1 = df[df.Store == storename]
                row2 = row1[row1.Date == '2013-06-01']
                try:
                    df['StateHoliday'][i] = row2['StateHoliday'].values[0]
                except:
                    pass
        else:
            pass

    print('Finished regional stateholidays')

    # take care of remained stateholiday
    def remained_stateholiday(row):
        if (pd.isnull(row['StateHoliday'])):
            if pd.isnull(row['Open']):
                if row['Sales'] > 0:
                    return '0'
                else:
                    return 'a'
            else:
                if row['Open'] == 0.0:
                    return 'a'
                else:
                    return '0'
        else:
            return row['StateHoliday']

    df['StateHoliday'] = df.apply(remained_stateholiday, axis=1)
    print('Finished cleaning remaining stateholidays')

    # Sets all Shops with isna('Open') to 0 based on stateholiday state
    def open_stateholiday(row):
        if pd.isnull(row['Open']) & (row['StateHoliday'] == 'a') & (row['StateHoliday'] == 'b') & (
                row['StateHoliday'] == 'c'):
            return 0.0
        elif pd.isnull(row['Open']) & (row['StateHoliday'] == '0'):
            return 1.0
        else:
            return row['Open']

    df['Open'] = df.apply(open_stateholiday, axis=1)

    print('Adjusted open status of shops according to state holidays')

    # fill empty 'Customers' with average customer number when open=1.0, when open=0.0 customer=0.0
    if settype == 'train':
        df_mean_customers = df['Customers'].mean()
        print('Mean customers of test cleaning:' + str(df_mean_customers))
    elif settype == 'test':
        df_mean_customers = 758.7492748450405
    else:
        pass

    def helper_customers(row):
        if (pd.isnull(row['Customers'])) & (row['Open'] == 1.0):
            return df_mean_customers
        elif (pd.isnull(row['Customers'])) & (row['Open'] == 0.0):
            return 0.0
        else:
            return row['Customers']

    df.loc[:, 'Customers'] = df.apply(helper_customers, axis=1)

    print('Finished filling in empty customers cells')

    # Fills empty 'Sales'-Cells in train with average if there have been non 0 customers in the shop
    if (settype == 'train'):
         mean_sales = df.loc[:, 'Sales'].mean()
         print('Mean Sales of training set = ' + str(mean_sales))
    elif settype == 'test':
         mean_sales = 6836.722219708965

    def helper_sales(row):
        if pd.isnull(row['Sales']) & (float(row['Customers']) > 0):
            return mean_sales
        else:
            return row['Sales']

    df['Sales'] = df.apply(helper_sales, axis=1)
    print("Finished cleaning sales")

    if settype == 'train':
        competitionDistanceMean = df.loc[:, 'CompetitionDistance'].mean()
        print('Mean Competition Distance of training set = ' + str(competitionDistanceMean))
    elif settype == 'test':
        competitionDistanceMean = 5446.105182647453


    def fillEmptyDistances(row):
        """Filling empty distances with mean"""
        if pd.isnull(row['CompetitionDistance']):
            return competitionDistanceMean
        else:
            return row['CompetitionDistance']

    df['CompetitionDistance'] = df.apply(fillEmptyDistances, axis=1)

    #Gets dummies for 'PromoInterval' into three columns and concat them to the table
    #PromoInterval = pd.get_dummies(df['PromoInterval'])
    #df = pd.concat([df, PromoInterval], axis=1)
    #print('PromoIntervals encoded')

    #Encoding Store Types
    #NewStoreType = pd.get_dummies(df['StoreType'])
    #NewStoreType.rename(columns={'a': 'StoreType a', 'b': 'StoreType b', 'c': 'StoreType c', 'd': 'StoreType d'},
     #                   inplace=True)
    #df = pd.concat([df, NewStoreType], axis=1)
    #print('Store Type Encoded')

    #Encoding State Holidays
    #newstateholiday = pd.get_dummies(df['StateHoliday'])
    #newstateholiday.rename(
     #   columns={'0': 'NoStateHoliday', 'a': 'PublicHoliday', 'b': 'EasterHoliday', 'c': 'Christmas Holiday'},
     #   inplace=True)
    #df = pd.concat([df, newstateholiday], axis=1)
    #print('State Holidays Encoded')

    #Gets dummies for 'Assortment' into three columns and concat them to the table
    #NewAssortment = pd.get_dummies(df['Assortment'])
    #NewAssortment.rename(columns={'a': 'Basic Assort', 'b': 'Extra Assort', 'c': 'Extended Assort'}, inplace=True)
    #df = pd.concat([df, NewAssortment], axis=1)

    #print('Assortment Type Encoded')
    #print('---Cleaning completed---')

    df = df[df['Open'] != 0]
    df = df[df['Sales'] != 0]
    df.drop(['Date'], axis=1, inplace=True)
    #df.drop(['StateHoliday'], axis=1, inplace=True)
    #df.drop(['Assortment'], axis=1, inplace=True)
    #df.drop(['Christmas Holiday'], axis=1, inplace=True)
    #df.drop(['PromoInterval'], axis=1, inplace=True)
    if 'level_0' in df.columns:
        df.drop(['level_0'], axis=1, inplace=True)
    else:
        pass
    if 'index' in df.columns:
        df.drop(['index'], axis=1, inplace=True)
    else:
        pass
    #df.drop(['StoreType'], axis=1, inplace=True)
    df = df.dropna(axis=0, how='any')

    print('Dropped last leftovers')
    print('Clean all done!')
    return df
Ejemplo n.º 17
0
from datetime import date

import holidays
from django.contrib.auth.models import User
from django.db import models
from django.utils.translation import gettext_lazy as _
from ls.joyous.models import CalendarPage, CalendarPageForm
from wagtail.admin.edit_handlers import FieldPanel, MultiFieldPanel, FieldRowPanel
from wagtail.core.models import Page, PageQuerySet
from wagtail.images.edit_handlers import ImageChooserPanel
from wagtail.snippets.models import register_snippet

from home.models import MenuMixin

CalendarPage.holidays.register(holidays.DE(prov=holidays.BY))
CalendarPage.holidays.add(date=date(2019, 10, 25), value='HAPPY')

CalendarPage.is_creatable = False


class GirotondoCalendar(CalendarPage, MenuMixin):
    class Meta:
        proxy = True

    def get_context(self, request, *args, **kwargs):
        print('get context')
        siblings = self.get_root().get_siblings(inclusive=True).filter(
            live=True, show_in_menus=True)
        root_kids = self.get_root().get_children().filter(live=True,
                                                          show_in_menus=True)
        menus = self.get_children().filter(
Ejemplo n.º 18
0
#!/usr/bin/env python3

import datetime
import holidays

now = datetime.datetime.now()
for Holiday in holidays.DE(prov='BY',
                           years=[now.year, now.year + 1],
                           expand='False'):
    print(Holiday)
Ejemplo n.º 19
0
def find_good_epics():
    spreads_and_epics = []
    i_count = 0
    pick_from_epics = []
    full_hol_list = []
    ###################################################################
    tz = pytz.timezone('Europe/Berlin')
    todays_date = str(datetime.datetime.now(tz=tz).strftime('%Y-%m-%d'))
    ger_today = str(str("GER_" + str(todays_date)))
    print("Europe/Berlin :- Today's Date is ..." + str(todays_date))
    ###################################################################
    tz = pytz.timezone('Europe/London')
    todays_date = str(datetime.datetime.now(tz=tz).strftime('%Y-%m-%d'))
    gb_today = str(str("GB_" + str(todays_date)))
    print("Europe/London :- Today's Date is ..." + str(todays_date))
    ###################################################################
    tz = pytz.timezone('America/New_York')
    todays_date = str(datetime.datetime.now(tz=tz).strftime('%Y-%m-%d'))
    us_today = str(str("US_" + str(todays_date)))
    print("America/New_York :- Today's Date is ..." + str(todays_date))
    ###################################################################
    tz = pytz.timezone('Australia/Sydney')
    todays_date = str(datetime.datetime.now(tz=tz).strftime('%Y-%m-%d'))
    aus_today = str(str("AUS_" + str(todays_date)))
    print("Australia/Sydney :- Today's Date is ..." + str(todays_date))
    ###################################################################
    tz = pytz.timezone('Asia/Tokyo')
    todays_date = str(datetime.datetime.now(tz=tz).strftime('%Y-%m-%d'))
    jp_today = str(str("JP_" + str(todays_date)))
    print("Asia/Tokyo :- Today's Date is ..." + str(todays_date))
    ###################################################################
    b_ger_hol = False
    b_uk_hol = False
    b_us_hol = False
    b_aus_hol = False
    b_jp_hol = False

    for date, name in sorted(holidays.DE(years=YEAR_var).items()):
        full_hol_list.append(str("GER_" + str(date)))
    for date, name in sorted(holidays.UK(years=YEAR_var).items()):
        full_hol_list.append(str("GB_" + str(date)))
    for date, name in sorted(holidays.US(years=YEAR_var).items()):
        full_hol_list.append(str("US_" + str(date)))
    for date, name in sorted(holidays.AU(years=YEAR_var).items()):
        full_hol_list.append(str("AUS_" + str(date)))
    for date, name in sorted(holidays.JP(years=YEAR_var).items()):
        full_hol_list.append(str("JP_" + str(date)))

    full_hol_list = sorted(full_hol_list)

    for d in full_hol_list:
        #print (d)
        if str(d) == ger_today:
            b_ger_hol = True
        if str(d) == gb_today:
            b_uk_hol = True
        if str(d) == us_today:
            b_us_hol = True
        if str(d) == aus_today:
            b_aus_hol = True
        if str(d) == jp_today:
            b_jp_hol = True

    for epic_id in main_epic_ids:
        tmp_lst = []
        base_url = REAL_OR_NO_REAL + '/markets/' + epic_id
        auth_r = requests.get(base_url, headers=authenticated_headers)
        d = json.loads(auth_r.text)

        try:
            i_count = i_count + 1
            if epic_id.find('MXN') != -1:
                #print("!!DEBUG!!...skipping, FOUND MXN in..." + str(epic_id))
                time.sleep(1)
            elif epic_id.find('SEK') != -1:
                #print("!!DEBUG!!...skipping, FOUND SEK in..." + str(epic_id))
                time.sleep(1)
            elif epic_id.find('NOK') != -1:
                #print("!!DEBUG!!...skipping, FOUND NOK in..." + str(epic_id))
                time.sleep(1)
            elif epic_id.find('CNH') != -1:
                #print("!!DEBUG!!...skipping, FOUND CNH in..." + str(epic_id))
                time.sleep(1)
            else:
                b_TRADE_OK = False
                while True:

                    ###################EUROPE############################
                    ###################EUROPE############################
                    ###################EUROPE############################
                    tz = pytz.timezone('Europe/Berlin')
                    now_time = datetime.datetime.now(tz=tz).strftime('%H:%M')
                    #print ("!!DEBUG!! Europe/Berlin:" + str(now_time))
                    if is_between(str(now_time), ("08:00", "16:00")):
                        #print("!!DEBUG!!...FRANKFURT MARKET OPEN!!")
                        time.sleep(1)
                        STR_CHECK = "EUR"
                        if STR_CHECK in epic_id and b_ger_hol == False:
                            b_TRADE_OK = True
                            break
                    ###################LONDON############################
                    ###################LONDON############################
                    ###################LONDON############################
                    tz = pytz.timezone('Europe/London')
                    now_time = datetime.datetime.now(tz=tz).strftime('%H:%M')
                    while True:
                        if is_between(str(now_time), ("22:00", "22:59")):
                            time.sleep(1)  # Sleeping for the tally up hour
                            print("!!DEBUG!! Tally Up hour:" + str(now_time))
                            now_time = datetime.datetime.now(
                                tz=tz).strftime('%H:%M')
                        else:
                            break
                    #print ("!!DEBUG!! Europe/London:" + str(now_time))
                    if is_between(str(now_time), ("08:00", "16:00")):
                        #print("!!DEBUG!!...LONDON MARKET OPEN!!")
                        time.sleep(1)
                        STR_CHECK = "GBP"
                        if STR_CHECK in epic_id and b_uk_hol == False:
                            b_TRADE_OK = True
                            break
                    ###################NY############################
                    ###################NY############################
                    ###################NY############################
                    tz = pytz.timezone('America/New_York')
                    now_time = datetime.datetime.now(tz=tz).strftime('%H:%M')
                    #print ("!!DEBUG!! America/New_York:" + str(now_time))
                    if is_between(str(now_time), ("08:00", "16:00")):
                        #print("!!DEBUG!!...NEW YORK MARKET OPEN!!")
                        time.sleep(1)
                        STR_CHECK = "USD"
                        if STR_CHECK in epic_id and b_us_hol == False:
                            b_TRADE_OK = True
                            break
                    ###################AUS############################
                    ###################AUS############################
                    ###################AUS############################
                    tz = pytz.timezone('Australia/Sydney')
                    now_time = datetime.datetime.now(tz=tz).strftime('%H:%M')
                    #print ("!!DEBUG!! Australia/Sydney:" + str(now_time))
                    if is_between(str(now_time), ("08:00", "16:00")):
                        #print("!!DEBUG!!...SYDNEY MARKET OPEN!!")
                        time.sleep(1)
                        STR_CHECK = "AUD"
                        if STR_CHECK in epic_id and b_aus_hol == False:
                            b_TRADE_OK = True
                            break
                    ###################TOKYO############################
                    ###################TOKYO############################
                    ###################TOKYO############################
                    tz = pytz.timezone('Asia/Tokyo')
                    now_time = datetime.datetime.now(tz=tz).strftime('%H:%M')
                    #print ("!!DEBUG!! Asia/Tokyo:" + str(now_time))
                    if is_between(str(now_time), ("08:00", "16:00")):
                        #print("!!DEBUG!!...TOKYO MARKET OPEN!!")
                        time.sleep(1)
                        STR_CHECK = "JPY"
                        if STR_CHECK in epic_id and b_jp_hol == False:
                            b_TRADE_OK = True
                            break
                    break

                if b_TRADE_OK:

                    current_bid = d['snapshot']['bid']
                    ask_price = d['snapshot']['offer']
                    spread = float(current_bid) - float(ask_price)
                    if float(spread) >= -1:
                        # tmp_lst.append(epic_id)
                        # spreads_and_epics.append(tmp_lst)
                        pick_from_epics.append(epic_id)
                        # print ("bid : " + str(current_bid))
                        # print ("ask : " + str(ask_price))
                        # print ("-------------------------")
                        # print ("spread : " + str(spread))
                        # print ("-------------------------")
                        print("!!DEBUG!!...FOUND GOOD EPIC..." + str(i_count) +
                              "/" + str(len(main_epic_ids)))
                        time.sleep(1)
                    else:
                        print(
                            "!!DEBUG!!...skipping, NO GOOD EPIC....Checking next epic spreads..."
                            + str(i_count) + "/" + str(len(main_epic_ids)))
                        time.sleep(1)
                        continue

        except Exception as e:
            print(e)
            pass

    return (pick_from_epics)
Ejemplo n.º 20
0
#!/usr/bin/python3.6
from datetime import date, datetime
import holidays
import config
import tele_util
import lst

swagbot = tele_util.startBot(config.swagbot)

if date.today() in holidays.DE(years=date.today().year):
    sql = "select chat_id from props where name='holidays'"
    rows = tele_util.readSQLL(sql)
    for r in rows:
        swagbot.sendMessage(r[0],
                            'Heute haben wir frei => *' + u'\U0001F389' +
                            holidays.DE(years=2020)[date.today()] + '*' +
                            u'\U0001F389',
                            parse_mode='Markdown')

sql = "select chat_id, value from props where name='backlog/reminder'"
rows = tele_util.readSQL(sql)
doy = datetime.now().timetuple().tm_yday
for r in rows:
    if doy % int(r[1]) == 0:
        l = lst.getList(r[0], 'backlog')
        if len(l) > 0:
            swagbot.sendMessage(r[0], ('Es steht folgendes aus: %s' % l))
Ejemplo n.º 21
0
    poolclass=NullPool,  # dont maintain a pool of connections
    pool_recycle=3600  # handles timeouts better, I think...
)
q = """
  SELECT distinct(state_id), state
  FROM locations
"""
# get all locations with missing data
df_todoliste = pd.read_sql(q, aws_engine)

# run this script at end of calendar year to get public holidays for next year
relevant_year = datetime.today().year + 1

germany_public_holidays = []
for index, row in df_todoliste.iterrows():
    # start get public holiday data for state
    # -------------------------------------------------
    for date in holidays.DE(years=np.arange(relevant_year, relevant_year + 5),
                            prov=row['state_id']):
        germany_public_holidays.append([str(date), row['state_id']])
    # end get public holiday data for state
    # -------------------------------------------------

# upload data to db
with aws_engine.connect() as cnx:
    q = """
        REPLACE INTO holidays (dt, state_id)
        VALUES(%s,%s)
    """
    cnx.execute(q, germany_public_holidays, multi=True)
Ejemplo n.º 22
0
                    for date, name in sorted(holidays.ES(years=years).items()):
                            st.write(date,name)                      

                if selected_country == 'United States':
                    
                    for date, name in sorted(holidays.US(years=years).items()):
                            st.write(date,name)
                            
                if selected_country == 'France':
                    
                    for date, name in sorted(holidays.FR(years=years).items()):
                            st.write(date,name)
                            
                if selected_country == 'Germany':
                    
                    for date, name in sorted(holidays.DE(years=years).items()):
                            st.write(date,name)
                            
                if selected_country == 'Ukraine':
                    
                    for date, name in sorted(holidays.UKR(years=years).items()):
                            st.write(date,name)

                else:
                    holidays = False
                            
                holidays = st.checkbox('Add country holidays to the model')

        with st.beta_expander('Hyperparameters'):
            st.write('In this section it is possible to tune the scaling coefficients.')
            
Ejemplo n.º 23
0
def create_request():
    json_data = {}
    data_sets = []
    BP = BikePrediction()
    WEATHER_API_KEY = os.getenv('WEATHER_API_KEY')
    for index, row in locations_df.iterrows():
        #print('index', index)
        url = 'http://data.eco-counter.com/ParcPublic/CounterData'

        yesterday_day, yesterday_month, yesterday_year = yesterday_date.day, yesterday_date.month, yesterday_date.year
        today_day, today_month, today_year = today_date.day, today_date.month, today_date.year

        #start get bike count data
        #------------------------------------------------
        pratiques = ""
        if hasattr(row, 'pratiques'):
            pratiques = "&pratiques=" + row.pratiques
        body = "idOrganisme=4586&idPdc={}&fin={}%2F{}%2F{}&debut={}%2F{}%2F{}&interval=4&pratiques={}".format(
            row.idPdc, today_day, today_month, today_year, yesterday_day,
            yesterday_month, yesterday_year, pratiques)

        headers = {
            "Accept": "text/plain, */*; q=0.01",
            "Accept-Encoding": "gzip, deflate",
            "Accept-Language": "en-US,en;q=0.9",
            "Connection": "keep-alive",
            "Content-Length": "115",
            "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
            "Cookie":
            "i18next=en_US; _ga=GA1.2.1682226698.1584790632; _gid=GA1.2.220973166.1584790632",
            "Host": "data.eco-counter.com",
            "Origin": "http://data.eco-counter.com",
            "Referer": "http://data.eco-counter.com/ParcPublic/?id=4586",
            "User-Agent":
            "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.80 Safari/537.36",
            "X-Requested-With": "XMLHttpRequest"
        }
        bike_count_data = requests.post(url, body, headers=headers)
        #no data available for location on current day
        if not bike_count_data.json()[:-1]:
            continue
        bike_count_data_entry = bike_count_data.json()[:-1][0]
        #-------------------------------------------------
        #end get bike count data

        #start get weather data
        #-------------------------------------------------
        weather_stations = requests.get(
            'https://api.meteostat.net/v1/stations/nearby?lat={}&lon={}&limit=20&key={}'
            .format(row.lat, row.lon, WEATHER_API_KEY))
        #loop over next stations if current station has no data for current day
        for station in weather_stations.json()['data']:
            #print('station_tried', station)
            closest_station = station['id']
            weather_data = requests.get(
                'https://api.meteostat.net/v1/history/daily?station={}&start={}&end={}&key={}'
                .format(closest_station,
                        str(yesterday_date).split()[0],
                        str(yesterday_date).split()[0], WEATHER_API_KEY))
            #exit loop if current station already has data for current day
            if weather_data.json()['data'] and (
                    weather_data.json()['data'][-1]['date']
                    == str(yesterday_date).split()[0]):
                break
        weather_data_entry = weather_data.json()['data'][0]
        #--------------------------------------------------
        #end get weather data

        #start get public holiday data
        #-------------------------------------------------
        province_public_holidays = []
        geolocator = Nominatim(user_agent="everyonecounts")
        location = geolocator.reverse(str(row['lat']) + "," + str(row['lon']))
        #when city=province, state is not returned
        if 'state' in location.raw['address']:
            province = location.raw['address']['state']
        else:
            province = location.raw['address']['city']
        province_abb = province_abbs[province]
        for date in holidays.DE(years=[yesterday_date.year],
                                prov=province_abb):
            province_public_holidays.append(str(date))
        #end get public holiday data
        #-------------------------------------------------

        data_set = {}
        data_set['date'] = str(yesterday_date).split()[0]
        data_set['bike_count'] = str(bike_count_data_entry[1])
        data_set['name'] = row['nom']
        data_set['lon'] = row['lon']
        data_set['lat'] = row['lat']
        data_set['temperature'] = weather_data_entry['temperature']
        data_set['precipitation'] = weather_data_entry['precipitation']
        data_set['snowdepth'] = weather_data_entry['snowdepth']
        data_set['windspeed'] = weather_data_entry['windspeed']
        data_set['sunshine'] = weather_data_entry['sunshine']
        data_set['is_holiday'] = 1 if str(
            yesterday_date).split()[0] in province_public_holidays else 0

        #start get prediction for normal bike count
        #-------------------------------------------------
        prediction = BP.predict_single(station_string=row['nom'],
                                       day=yesterday_date,
                                       temperature=data_set['temperature']
                                       or 0,
                                       precipitation=data_set['precipitation']
                                       or 0,
                                       snowdepth=data_set['snowdepth'] or 0,
                                       windspeed=data_set['windspeed'] or 0,
                                       sunshine=data_set['sunshine'] or 0,
                                       is_holiday=data_set['is_holiday'] or 0)
        #end get prediction for normal bike count
        #-------------------------------------------------
        #predict 0 if prediction -ve
        data_set['prediction'] = max(prediction, 0)
        data_sets.append(data_set)
    return data_sets
Ejemplo n.º 24
0
def main(outdir):
    rng = RandomState(MT19937(SeedSequence(config.seed)))

    num_employees = 50000

    num_orders = 1000000

    num_jobsites = 2800
    num_areas = 180
    num_qualifications = 214
    num_qualigroups = 13
    num_shifts = 4
    num_days = 2708

    start_day = datetime(2013, 8, 1)

    print("create sliding window of active employees")
    active_employees = np.zeros((num_employees, num_days)).astype(bool)

    left = 0
    right = 100
    upkeep = 400
    change = (.95, 1 - .95)
    for irow, row in enumerate(active_employees):
        active_employees[irow, left:right] = 1
        left = left + rng.choice([0, 1], p=change)
        right = left + upkeep + rng.choice([0, 1], p=change)

    print("create base distributions for areas, qualis and shifts")
    areas = rng.dirichlet(np.ones(num_areas) * .1)

    jobsites = rng.dirichlet(np.ones(num_jobsites) * .1)

    area_of_jobsite = np.empty(num_jobsites)
    for ijobsite, jobsite in enumerate(jobsites):
        area_of_jobsite[ijobsite] = rng.choice(np.arange(num_areas), p=areas)

    qualigroups = rng.dirichlet(np.ones(num_qualigroups) * .1)

    qualis = rng.dirichlet(np.ones(num_qualifications) * .1)

    qualigroup_of_quali = np.empty(num_qualifications)
    for iquali, quali in enumerate(qualis):
        qualigroup_of_quali[iquali] = rng.choice(np.arange(num_qualigroups),
                                                 p=qualigroups)

    shifts = rng.dirichlet(np.ones(num_shifts))

    orders = []
    for _ in tqdm(range(num_orders), desc="create orders"):
        shift = rng.choice(range(num_shifts), p=shifts)

        jobsite = rng.choice(range(num_jobsites), p=jobsites)
        area = area_of_jobsite[jobsite]

        quali = rng.choice(range(num_qualifications), p=qualis)
        qualigroup = qualigroup_of_quali[quali]

        day = rng.randint(0, num_days)

        orders.append({
            "Schicht": shift,
            "Einsatzort": jobsite,
            "PLZ": area,
            "Qualifikation": quali,
            "Qualifikationgruppe": qualigroup,
            "Tag": day,
        })

    employee_qualifications = rng.multinomial(
        1, qualis, size=(num_employees)).astype(bool)
    employee_jobsites = rng.multinomial(1, jobsites,
                                        size=(num_employees)).astype(bool)

    orders = pd.DataFrame(orders)
    offers = []

    ps = np.ones(6) / np.arange(1, 7)
    ps /= ps.sum()

    for _, order in tqdm(orders.iterrows(),
                         desc="create offers",
                         total=len(orders)):

        match_active = active_employees[:, int(order.Tag)]
        match_quali = employee_qualifications[:, int(order.Qualifikation)]
        match_jobsite = employee_jobsites[:, int(order.Einsatzort)]

        match, = (match_active & match_quali & match_jobsite).nonzero()

        offers.append(match[:6].tolist())
        if len(offers[-1]) == 0:

            offers[-1] = rng.choice(match_active.nonzero()[0],
                                    np.random.choice(range(1, 7),
                                                     p=ps)).tolist()

    berlin_holidays = holidays.DE(prov="BE")

    orders["Mitarbeiter ID"] = offers
    print("add day meta data")
    orders["Tag"] = orders["Tag"].apply(lambda day: start_day + timedelta(day))
    orders["Wochentag"] = orders["Tag"].apply(lambda day: day.strftime("%a"))
    orders["Feiertag"] = orders["Tag"].apply(
        lambda day: day in berlin_holidays)

    orders = orders[[
        "Einsatzort", "PLZ", "Qualifikation", "Qualifikationgruppe", "Schicht",
        "Tag", "Wochentag", "Feiertag", "Mitarbeiter ID"
    ]]
    orders = orders.sort_values("Tag")

    train, test = train_test_split(orders)

    train.to_csv(os.path.join(outdir, "train.tsv"), index=False, sep="\t")
    test.to_csv(os.path.join(outdir, "test_truth.tsv"), index=False, sep="\t")
    test[[
        "Einsatzort", "PLZ", "Qualifikation", "Qualifikationgruppe", "Schicht",
        "Tag", "Wochentag", "Feiertag"
    ]].to_csv(os.path.join(outdir, "test_publish.tsv"), index=False, sep="\t")
Ejemplo n.º 25
0
 def judge_local_holiday(self, df):
     country = df['geoNetwork_country']
     date = df['visitId'].apply(lambda x: x.date())
     judge_holiday = \
         np.where(country.isin(
                 ['United States','India','Canada','Germany',
                  'Japan','France','Mexico','Australia',
                  'Spain','Netherlands','Italy','Ireland',
                  'Sweden','Argentina','Colombia','Belgium',
                  'Switzerland','Czechia','Colombia','Belgium',
                  'New Zealand','South Africa','South Africa']),\
         np.where((country=='United States')&
                  (date.isin(holidays.US())),1,
                  np.where((country=='India')&
                           (date.isin(holidays.India())),1,
                           np.where((country=='Canada')&
                                    (date.isin(holidays.CA())),1,
                                    np.where((country=='Germany')&
                                             (date.isin(holidays.DE())),1,\
         np.where((country=='Japan')&
                  (date.isin(holidays.JP())),1,
                  np.where((country=='France')&
                           (date.isin(holidays.FRA())),1,
                           np.where((country=='Mexico')&
                                    (date.isin(holidays.MX())),1,
                                    np.where((country=='Australia')&
                                             (date.isin(holidays.AU())),1,\
         np.where((country=='Spain')&
                  (date.isin(holidays.ES())),1,
                  np.where((country=='Netherlands')&
                           (date.isin(holidays.NL())),1,
                           np.where((country=='Italy')&
                                    (date.isin(holidays.IT())),1,
                                    np.where((country=='Ireland')&
                                             (date.isin(holidays.IE())),1,\
         np.where((country=='Sweden')&
                  (date.isin(holidays.SE())),1,
                  np.where((country=='Argentina')&
                           (date.isin(holidays.AR())),1,
                           np.where((country=='Colombia')&
                                    (date.isin(holidays.CO())),1,
                                    np.where((country=='Belgium')&
                                             (date.isin(holidays.BE())),1,\
         np.where((country=='Switzerland')&
                  (date.isin(holidays.CH())),1,
                  np.where((country=='Czechia')&
                           (date.isin(holidays.CZ())),1,
                           np.where((country=='Denmark')&
                                    (date.isin(holidays.DK())),1,
                                    np.where((country=='Austria')&
                                             (date.isin(holidays.AT())),1,\
         np.where((country=='Hungary')&
                  (date.isin(holidays.HU())),1,
                  np.where((country=='Portugal')&
                           (date.isin(holidays.PT())),1,
                           np.where((country=='Norway')&
                                    (date.isin(holidays.NO())),1,
                                    np.where((country=='Portugal')&
                                             (date.isin(holidays.PT())),1,\
         np.where((country=='New Zealand')&
                  (date.isin(holidays.NZ())),1,
                  np.where((country=='South Africa')&
                           (date.isin(holidays.ZA())),1,
                           np.where((country=='South Africa')&
                                    (date.isin(holidays.ZA())),1,\
         0))))))))))))))))))))))))))),np.nan).astype(int)
     return judge_holiday
Ejemplo n.º 26
0
def create_season_pickle(pickle_dir=Path('pickles')):
    file_paths = get_file_paths(pickle_dir)
    print(file_paths)
    for path in file_paths:
        print(path)
        station_name = path
        df_mean_season = pd.Series()
        df_mean_pickle = pd.read_pickle(pickle_dir /
                                        (str(path) + 'aggregation'))
        print('len mean_pickle: ' + str(len(df_mean_pickle)))
        # df_mean_pickle = df_mean_pickle.iloc[:100800]
        print(df_mean_pickle)
        column_name = 'windowed_means'
        # df_mean_pickle = lf.generators.add_daytypes(df_mean_pickle)

        # df_mean_pickle = lf.generators.add_holidays(df_mean_pickle, 'NW')
        holidays_nrw = list(holidays.DE(years=2017, state='NW').keys())
        # df_mean_pickle_restday = df_mean_pickle[
        #     ((df_mean_pickle.is_saturday == 1) | (df_mean_pickle.is_sunday == 1) | (df_mean_pickle.is_holiday == True))]
        # df_mean_pickle_workday = df_mean_pickle[
        #     True ^ ((df_mean_pickle.is_saturday == 1) | (df_mean_pickle.is_sunday == 1) | (
        #                 df_mean_pickle.is_holiday == True))]

        print(holidays_nrw)
        # test = df_mean_pickle[df_mean_pickle.index.isin(holidays_nrw)]
        # print(test)

        df_mean_pickle_restday = df_mean_pickle[(
            (df_mean_pickle.index.dayofweek >= 5) |
            (df_mean_pickle.index).isin(holidays_nrw))]
        df_mean_pickle_workday = df_mean_pickle[True ^ (
            (df_mean_pickle.index.dayofweek >= 5)
            | (df_mean_pickle.index).isin(holidays_nrw))]
        print('Split_dataframe')
        for i, df_mean_pickle_typeday in enumerate(
            [df_mean_pickle_restday, df_mean_pickle_workday]):
            df_mean_pickle_typeday = df_mean_pickle_typeday[[station_name
                                                             ]].dropna()
            v1s = []
            min_date = df_mean_pickle_typeday.index.min()
            max_date = df_mean_pickle_typeday.index.max()
            three_w_timedelta = pd.Timedelta('3w')
            old_window_min_date = min_date.date()
            old_window_max_date = max_date.date()
            print(min_date)

            for index, row in df_mean_pickle_typeday.iterrows():
                window_min_date = max(min_date, index - three_w_timedelta)
                window_max_date = min(max_date, index + three_w_timedelta)
                window_slice = df_mean_pickle_typeday.loc[
                    window_min_date:window_max_date]
                window_slice = window_slice.loc[window_slice.index.time ==
                                                index.time()]
                v1 = window_slice[station_name].mean()
                if old_window_min_date != window_min_date.date(
                ) or old_window_max_date != window_max_date.date():
                    print(str(window_min_date) + ' -> ' + str(window_max_date))
                    old_window_min_date = window_min_date.date()
                    old_window_max_date = window_max_date.date()
                    print(window_slice)
                    print(v1)
                v1s.append(v1)
            df_mean_pickle_typeday[column_name] = v1s
            print('len v1s: ' + str(len(v1s)))
            print(df_mean_pickle_typeday[[column_name]])
            print(df_mean_season)
            df_mean_season = pd.concat(
                [df_mean_season, df_mean_pickle_typeday[column_name]],
                sort=True)
        print('len mean_season: ' + str(df_mean_season.size))
        print(df_mean_season)
        df_mean_season.to_pickle(pickle_dir /
                                 (str(path) + 'season_aggregation'))
Ejemplo n.º 27
0
def find_good_epics():
    spreads_and_epics = []
    i_count = 0
    pick_from_epics = []
    full_hol_list = []
    ###################################################################
    tz = pytz.timezone('Europe/Berlin')
    todays_date = str(datetime.datetime.now(tz=tz).strftime('%Y-%m-%d'))
    ger_today = str(str("GER_" + str(todays_date)))
    print("Europe/Berlin :- Today's Date is ..." + str(todays_date))
    ###################################################################
    tz = pytz.timezone('Europe/London')
    todays_date = str(datetime.datetime.now(tz=tz).strftime('%Y-%m-%d'))
    gb_today = str(str("GB_" + str(todays_date)))
    print("Europe/London :- Today's Date is ..." + str(todays_date))
    ###################################################################
    tz = pytz.timezone('America/New_York')
    todays_date = str(datetime.datetime.now(tz=tz).strftime('%Y-%m-%d'))
    us_today = str(str("US_" + str(todays_date)))
    print("America/New_York :- Today's Date is ..." + str(todays_date))
    ###################################################################
    tz = pytz.timezone('Australia/Sydney')
    todays_date = str(datetime.datetime.now(tz=tz).strftime('%Y-%m-%d'))
    aus_today = str(str("AUS_" + str(todays_date)))
    print("Australia/Sydney :- Today's Date is ..." + str(todays_date))
    ###################################################################
    tz = pytz.timezone('Asia/Tokyo')
    todays_date = str(datetime.datetime.now(tz=tz).strftime('%Y-%m-%d'))
    jp_today = str(str("JP_" + str(todays_date)))
    print("Asia/Tokyo :- Today's Date is ..." + str(todays_date))
    ###################################################################
    b_ger_hol = False
    b_uk_hol = False
    b_us_hol = False
    b_aus_hol = False
    b_jp_hol = False

    for date, name in sorted(holidays.DE(years=YEAR_var).items()):
        full_hol_list.append(str("GER_" + str(date)))
    for date, name in sorted(holidays.UK(years=YEAR_var).items()):
        full_hol_list.append(str("GB_" + str(date)))
    for date, name in sorted(holidays.US(years=YEAR_var).items()):
        full_hol_list.append(str("US_" + str(date)))
    for date, name in sorted(holidays.AU(years=YEAR_var).items()):
        full_hol_list.append(str("AUS_" + str(date)))
    for date, name in sorted(holidays.JP(years=YEAR_var).items()):
        full_hol_list.append(str("JP_" + str(date)))

    full_hol_list = sorted(full_hol_list)

    for d in full_hol_list:
        #print (d)
        if str(d) == ger_today:
            b_ger_hol = True
        if str(d) == gb_today:
            b_uk_hol = True
        if str(d) == us_today:
            b_us_hol = True
        if str(d) == aus_today:
            b_aus_hol = True
        if str(d) == jp_today:
            b_jp_hol = True

    for epic_id in main_epic_ids:
        tmp_lst = []
        base_url = REAL_OR_NO_REAL + '/markets/' + epic_id
        auth_r = requests.get(base_url, headers=authenticated_headers)
        d = json.loads(auth_r.text)

        try:
            i_count = i_count + 1
            if epic_id.find('MXN') != -1:
                #print("!!DEBUG!!...skipping, FOUND MXN in..." + str(epic_id))
                time.sleep(1)
            elif epic_id.find('SEK') != -1:
                #print("!!DEBUG!!...skipping, FOUND SEK in..." + str(epic_id))
                time.sleep(1)
            elif epic_id.find('NOK') != -1:
                #print("!!DEBUG!!...skipping, FOUND NOK in..." + str(epic_id))
                time.sleep(1)
            elif epic_id.find('CNH') != -1:
                #print("!!DEBUG!!...skipping, FOUND CNH in..." + str(epic_id))
                time.sleep(1)
            else:
                b_TRADE_OK = True
                if b_TRADE_OK:

                    current_bid = d['snapshot']['bid']
                    ask_price = d['snapshot']['offer']
                    spread = float(current_bid) - float(ask_price)
                    if float(spread) >= -1.51:
                        # tmp_lst.append(epic_id)
                        # spreads_and_epics.append(tmp_lst)
                        pick_from_epics.append(epic_id)
                        # print ("bid : " + str(current_bid))
                        # print ("ask : " + str(ask_price))
                        # print ("-------------------------")
                        # print ("spread : " + str(spread))
                        # print ("-------------------------")
                        print(
                            "!!DEBUG!!...FOUND GOOD EPIC {} spread {}...{}/{}".
                            format(epic_id, spread, i_count,
                                   len(main_epic_ids)))
                        time.sleep(1)
                    else:
                        print(
                            "!!DEBUG!!...skipping, NO GOOD EPIC {} spread {} ....Checking next epic spreads...{}/{}"
                            .format(epic_id, spread, i_count,
                                    len(main_epic_ids)))
                        time.sleep(1)
                        continue
                else:
                    print(
                        "!!DEBUG!!...skipping, NOT CURRENTLY TRADEABLE EPIC {} ....Checking next epic spreads...{}/{}"
                        .format(epic_id, i_count, len(main_epic_ids)))

        except Exception as e:
            print(e)
            pass

    return (pick_from_epics)
Ejemplo n.º 28
0
def generate_timesheet_data(year, month, fdom, ldom, hours):
    """
    By Patrick Faion <https://github.com/pfaion/timesheet_generator>
    """
    days_of_week = [0, 1, 2, 3, 4]
    start_hour = 8
    end_hour = 18
    max_hours = 6
    state = 'NI'

    # get public holidays and length of the month
    public_holidays = holidays.DE(state=state, years=year)
    days_in_month = calendar.monthrange(year, month)[1]

    # check which days are valid, i.e. are specified workdays and not holidays
    valid_days = []
    for day in range(fdom, min(days_in_month, ldom) + 1):
        date = datetime.date(year, month, day)
        if date not in public_holidays and date.weekday() in days_of_week:
            valid_days.append(day)

    # Distribute hours over valid days. Use exponential weights (after random shuffle) for days,
    # so some days are used often and some are used rarely.
    possible_days = valid_days
    random.shuffle(possible_days)
    weights = list(1 / np.arange(1, len(possible_days) + 1))

    # collector for sampled distribution
    # day => (start, end)
    collector = dict()

    # possible chunks over the day are from start to end in steps of half-hours
    chunk_starts = np.arange(start_hour, end_hour, 0.5)

    # distribute all hours
    h = hours
    while h > 0:
        if len(possible_days) == 0:
            raise TimesheetCreationError(
                "Too many hours for specified range of month")
        # select day
        day, weight = weighted_choice(zip(possible_days, weights))
        # if day is already listed, extend working hours there either before or after
        if day in collector:
            start, end = collector[day]
            possible_extensions = []
            if start > start_hour:
                possible_extensions.append('before')
            if end < (end_hour - 0.5):
                possible_extensions.append('after')
            extension = random.choice(possible_extensions)
            if extension == 'before':
                start -= 0.5
            if extension == 'after':
                end += 0.5
            collector[day] = (start, end)
            if end - start == max_hours:
                possible_days.remove(day)
                weights.remove(weight)
        # if day not yet listed, select random starting chunk
        else:
            start = random.choice(chunk_starts)
            end = start + 0.5
            collector[day] = (start, end)
        # half and hour was distributed off
        h -= 0.5

    data = []
    for day in range(1, days_in_month + 1):
        if day in collector:
            date = datetime.date(year, month, day)
            s, e = collector[day]
            s_h = int(s)
            s_m = int((s % 1) * 60)
            e_h = int(e)
            e_m = int((e % 1) * 60)
            start = datetime.datetime.combine(date, datetime.time(s_h, s_m))
            end = datetime.datetime.combine(date, datetime.time(e_h, e_m))
            duration = end - start
            data.append({
                'day': "{}.".format(day),
                'start': start.strftime("%H:%M"),
                'end': end.strftime("%H:%M"),
                'duration': format_timedelta(duration),
                'date': date.strftime("%d.%m.")
            })
        else:
            data.append({
                'day': "{}.".format(day),
                'start': "",
                'end': "",
                'duration': "",
                'date': ""
            })

    # additional format strings
    header_date = "{:0>2d}/{}".format(month, year)
    total_hours_formatted = format_timedelta(datetime.timedelta(hours=hours))

    return data, header_date, total_hours_formatted