def update_balances(): import holidays holidays = holidays.Russia() today = datetime.datetime.now() if today.strftime('%Y-%m-%d') in holidays or today.weekday() >= 5: return sqlite_connection = sqlite3.connect(db_name) cursor = sqlite_connection.cursor() sql = """SELECT * from users""" cursor.execute(sql) records = cursor.fetchall() users_ids = [row[1] for row in records] users_balances = [row[5] for row in records] users_gains = [row[6] for row in records] for i in range(len(users_ids)): funds = [j[0] for j in cursor.execute("""SELECT size FROM funds WHERE owner_id is {user_id}""" .format(user_id=users_ids[i])).fetchall()] sql = """UPDATE users SET balance={balance} WHERE user_id is {user_id}""" \ .format(balance=round(users_balances[i] + sum(funds) * 0.01, 4), user_id=users_ids[i]) cursor.execute(sql) sqlite_connection.commit() sql = """UPDATE users SET gain={gain} WHERE user_id is {user_id}""" \ .format(gain=round(users_gains[i] + sum(funds) * 0.01, 4), user_id=users_ids[i]) cursor.execute(sql) sqlite_connection.commit()
import holidays FIRST_DAY_OF_TRAIN_PRD = (2013, 1, 1) LAST_DAY_OF_TRAIN_PRD = (2015, 10, 31) FIRST_DAY_OF_TEST_PRD = (2015, 11, 1) # Russian public holidays in 2012, 2013, 2014, 2015, and 2016 PUBLIC_HOLIDAYS = holidays.Russia(years=[2012, 2013, 2014, 2015, 2016]) PUBLIC_HOLIDAY_DTS = list(PUBLIC_HOLIDAYS.keys()) OLYMPICS2014 = ("2/7/2014", "2/23/2014") WORLDCUP2014 = ("6/12/2014", "7/13/2014") # city populations as of 1/1/2020 # (source: https://rosstat.gov.ru/storage/mediabank/CcG8qBhP/mun_obr2020.rar, accessed 11/17/2020): CITY_POP = [ ("РостовНаДону", 1137904.0, "47°14′26″ с. ш. 39°42′38″ в. д.", "UTC+3"), ("Н.Новгород", 1252236.0, "56°19′37″ с. ш. 44°00′27″ в. д.", "UTC+3"), ("Казань", 1257391.0, "55°47′27″ с. ш. 49°06′52″ в. д.", "UTC+3"), ("Новосибирск", 1625631.0, "55°01′ с. ш. 82°55′ в. д.", "UTC+7"), ("Воронеж", 1058261.0, "51°40′18″ с. ш. 39°12′38″ в. д.", "UTC+3"), ("Красноярск", 1093771.0, "56°00′43″ с. ш. 92°52′17″ в. д.", "UTC+7"), ("Ярославль", 608353.0, "57°37′ с. ш. 39°51′ в. д.", "UTC+3"), ("Тюмень", 807271.0, "57°09′ с. ш. 65°32′ в. д.", "UTC+5"), ("Сургут", 380632.0, "61°15′00″ с. ш. 73°26′00″ в. д.", "UTC+5"), ("Омск", 1154507.0, "54°58′ с. ш. 73°23′ в. д.", "UTC+6"), ("Волжский", 323906.0, "48°47′ с. ш. 44°46′ в. д.", "UTC+4"), ("Уфа", 1128787.0, "54°44′ с. ш. 55°58′ в. д.", "UTC+5"),
def app_data_preparation(file_list, lock_period, impute): ''' recieves file list of data file names/paths in a certain order: 1) icp das 2) metering devices 3) SVO 4) VDNH 5) COVID 6) self-isolation index lock_period - can be specified as tuple (start date, edn date)in case new lockdown is introduced impute=True - NaN values will be imputed using KNN algorithm; impute=False - NaN values will be dropped ''' # data processing and analysis import os import pandas as pd # module with information about holidays import holidays from app_processing import app_icp_preprocess, app_meter_preprocess from app_processing import app_svo_preprocess, app_vdnh_preprocess from app_processing import app_isolation_preprocessing, app_covid_preprocessing, app_imputing_data # -------------------------------------------------DATA-LOAD-------------------------------------------------------- # icp das icp_features_url = os.path.join(os.getcwd(), 'data', 'building_features.pickle') # metering device metering_features_url = os.path.join(os.getcwd(), 'data', 'meter_features.pickle') # ---------------------------------------------FEATURE-SELECTION---------------------------------------------------- # relevant icp_das features icp_das = app_icp_preprocess(file_list[0], icp_features_url) # relevant metering devices features meter_dev = app_meter_preprocess(file_list[1], metering_features_url) # temperature, atmospheric pressure, cloudness svo = app_svo_preprocess(file_list[2], ['T', 'U', 'c']) # precipitation vdnh = app_vdnh_preprocess(file_list[3]) # covid cases cov = app_covid_preprocessing(file_list[4]) # isolation index iso = app_isolation_preprocessing(file_list[5]) # ---------------------------------------------MERGING-DATASETS----------------------------------------------------- def merge_data(*args): ''' merging datasets ''' data = args[0] for i in range(1, len(args)): data = data.merge(args[i], how='left', on='time') return data data = merge_data(icp_das, meter_dev, svo, vdnh, cov, iso) data = data.set_index('time') # ----------------------------------------------ADD-COVID-CASES----------------------------------------------------- # populating daily values data['covid_cases'] = data['covid_cases'].groupby( pd.Grouper(freq='D')).ffill() data['isolation_idx'] = data['isolation_idx'].groupby( pd.Grouper(freq='D')).ffill() # fill leaking values data.loc[:'2020-03', 'covid_cases'] = data.loc[:'2020-03', 'covid_cases'].fillna(0) data.loc[:'2020-03', 'isolation_idx'] = data.loc[:'2020-03', 'isolation_idx'].fillna(0) # ----------------------------------------SPECIFY-WEEKDAYS-AND-MONTHS----------------------------------------------- # add weekday data['weekday'] = data.index.weekday # add month data['month'] = data.index.month # add yearday data['yearday'] = data.index.dayofyear # add monthday data['monthday'] = data.index.to_series().dt.day # -----------------------------------------------ADD-HOLIDAYS------------------------------------------------------- # add holidays rus_holidays = holidays.Russia() def holidays_selector(df, holidays_list): res = [] for t in df.index: if t in holidays_list: res.append(1) else: res.append(0) return pd.DataFrame({'time': df.index, 'holiday': res}) all_holidays = holidays_selector(data, rus_holidays) # -----------------------------------------------ADD-LOCKDOWN------------------------------------------------------- # set time of lockdown in Moscow lockdown = pd.DataFrame(pd.date_range(start='2020-03-30 00:00', end='2020-06-08 23:00', freq='H'), columns=['time']) # set corresponding column to 1 lockdown['lockdown'] = 1 # in case of new lockdown if lock_period is not None: new_lockdown = pd.DataFrame(pd.date_range(start=lock_period[0], end=lock_period[1], freq='H'), columns=['time']) lockdown.append(new_lockdown) # add lockdown periods data = merge_data(data, all_holidays, lockdown).set_index('time') # -----------------------------------------------FILL-NAs----------------------------------------------------------- data['lockdown'] = data['lockdown'].fillna(0) data['precipitation'] = data['precipitation'].fillna(0) if impute: # TODO: make user to decide which columns to impute data = app_imputing_data(data) return data
import pandas as pd import datetime as dt import holidays start_date = dt.datetime.strptime("2013-01-01", "%Y-%m-%d") end_date = dt.datetime.strptime("2015-11-30", "%Y-%m-%d") dates = [ start_date + dt.timedelta(days=x) for x in range(0, (end_date - start_date + dt.timedelta(days=1)).days) ] ru_holidays = holidays.Russia() calendar = pd.Series(dates).rename("date").to_frame() calendar["bank_holiday"] = calendar["date"].apply(lambda x: ru_holidays.get(x)) calendar["weekday"] = calendar["date"].apply(lambda x: dt.date.isoweekday(x)) # In Russia, if the date of bank holiday observance falls on a weekend, the following Monday will be a day off in lieu # of the holiday. I think the exception is New Year Holiday as it lasts from 1st to 8th January and additional day is # not given. days_in_lieu = calendar.loc[calendar["bank_holiday"].notnull() & calendar["weekday"].isin([6, 7]) & (calendar["bank_holiday"] != "Новый год")].copy() days_in_lieu["date"] = days_in_lieu.apply( lambda x: x["date"] + dt.timedelta(days=7 - x["weekday"] + 1), axis=1) calendar = calendar.merge(days_in_lieu, how="left",
import pandas as pd from dateutil.relativedelta import relativedelta import holidays from math import sqrt RU_HOLIDAYS = holidays.Russia() def get_holidays_count(month_start): month_start = pd.to_datetime(month_start) month_end = month_start + relativedelta(months=1) return len(RU_HOLIDAYS[month_start:month_end]) def get_rmse(y_actual, y_predicted): return sqrt(mean_squared_error(y_actual, y_predicted))
df_sample = df_sample.iloc[0:].reset_index().drop('index', axis=1) # Add time, week and holiday columns labels = [ str(i) + '-' + str(j) for (i, j) in zip(np.arange(0, 26, 2), np.arange(0, 26, 2)[1:]) ] df_sample['time_bin'] = pd.cut(df_sample.time_plan_ts.dt.hour, bins=np.arange(0, 26, 2), include_lowest=True, labels=labels).astype(str) df_sample['weekday'] = df_sample.time_plan_ts.apply( lambda x: x.weekday()).astype(str) holiday_dates = [str(h) for h in holidays.Russia(years=2020)] df_sample['is_holiday'] = df_sample.date.apply( lambda x: x in holiday_dates).astype(int) df_sample['is_weekend'] = df_sample.weekday.isin({5, 6}).astype(int) ## To timeseries format # Encode stops stops = df_sample.apply(lambda x: str(x.latitude) + ' - ' + str(x.longitude), axis=1) ind2stop = dict(enumerate(stops.unique())) stop2ind = {s: i for i, s in ind2stop.items()} df_sample['stop_number'] = stops.apply(lambda x: stop2ind[x])
def __generate(self): ''' Генерирует словарь из 365 (366) элементов, в котором ключ - число в формате DD.MM, значение - тип дня Типов дней всего 4: 0. рабочий 1. выходной 2. предпраздничный (когда необходимо сократить рабочий день на 1 час) 3. праздничный ''' calendar = {} sizes = [ 31, "CTF\{h4h4_n0_fl4g_h3r3\}", 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 ] rus_holidays = holidays.Russia() # определить день недели 1 января # мы знаем, что 1 января 2020 - среда tmp = 3 + self.year - 2020 for y in range(2020, self.year): if (y % 400 == 0) or (y % 4 == 0 and y % 100 != 0): tmp += 1 tmp = tmp % 7 day_week = tmp #january for i in range(31): if (day_week == 6) or (day_week == 0): calendar[self.__nts(i + 1) + '.01'] = 1 elif date(self.year, 1, i + 1) in rus_holidays: calendar[self.__nts(i + 1) + '.01'] = 3 if i > 1: if date(self.year, 1, i) not in rus_holidays: calendar[self.__nts(i + 1) + '.01'] = 2 else: calendar[self.__nts(i + 1) + '.01'] = 0 day_week = (day_week + 1) % 7 #february # определяем кол-во дней в феврале ld = 28 if (self.year % 400 == 0) or (self.year % 4 == 0 and self.year % 100 != 0): ld = 29 for i in range(ld): if (day_week == 6) or (day_week == 0): calendar[self.__nts(i + 1) + '.02'] = 1 elif date(self.year, 1, i + 1) in rus_holidays: calendar[self.__nts(i + 1) + '.02'] = 3 if i > 1: if date(self.year, 1, i) not in rus_holidays: calendar[self.__nts(i + 1) + '.02'] = 2 else: calendar[self.__nts(i + 1) + '.02'] = 0 day_week = (day_week + 1) % 7 #march-december for m in range(3, 13): ld = sizes[m - 1] for i in range(ld): if (day_week == 6) or (day_week == 0): calendar[self.__nts(i + 1) + '.' + self.__nts(m)] = 1 elif date(self.year, 1, i + 1) in rus_holidays: calendar[self.__nts(i + 1) + '.' + self.__nts(m)] = 3 if i > 1: if date(self.year, 1, i) not in rus_holidays: calendar[self.__nts(i + 1) + '.' + self.__nts(m)] = 2 else: calendar[self.__nts(i + 1) + '.' + self.__nts(m)] = 0 day_week = (day_week + 1) % 7 self.calendar = calendar