def extract_bday_feats_n_heads(series, modality, field, stat_type, tr_type): """ "bussiness day or not" conditioning feature extraction :return: feature name list, feature value list """ if series is None or len(series) == 0: b_day_heads, b_day_feats = extract_basic_feats_n_heads(None, modality, field, stat_type, tr_type) nb_day_heads, nb_day_feats = extract_basic_feats_n_heads(None, modality, field, stat_type, tr_type) else: cal = SouthKorea() time_stamp_series = Series(series.index.tolist()) unique_dates = time_stamp_series.map(lambda x: x.date()).unique() nb_day_series = None b_day_series = None for date in unique_dates: if cal.is_holiday(date) is False and date.weekday() < 5: if b_day_series is None: b_day_series = series[series.index.date == date] else: b_day_series = b_day_series.append(series[series.index.date == date]) else: if nb_day_series is None: nb_day_series = series[series.index.date == date] else: nb_day_series = nb_day_series.append(series[series.index.date == date]) b_day_heads, b_day_feats = extract_basic_feats_n_heads(b_day_series, modality, field, stat_type, tr_type) nb_day_heads, nb_day_feats = extract_basic_feats_n_heads(nb_day_series, modality, field, stat_type, tr_type) heads = list(map(lambda x: '%s_%s' % (feat.BSS_DAY, x), b_day_heads)) + list( map(lambda x: '%s_%s' % (feat.NON_BSS_DAY, x), nb_day_heads)) values = b_day_feats + nb_day_feats return heads, values
def workday_feature_genaration(self, data, country): data = data.reset_index() #1. dayofweek_grade data['dayofweek_grade'] = data['datetime'].dt.day_name() data['dayofweek_grade'] = data['dayofweek_grade'].astype('category') if country == 'SouthKorea': from workalendar.asia import SouthKorea calendar = SouthKorea() else: from workalendar.europe import italy calendar = italy() holiday_list = calendar.holidays(2017)+calendar.holidays(2018)+calendar.holidays(2019)+calendar.holidays(2020)+calendar.holidays(2020) holiday_list = list(zip(*holiday_list))[0] data['dayoff_grade'] = 'dayon' data.loc[data['dayofweek_grade'].isin(['Sunday', 'Saturday']), 'dayoff_grade']='dayoff' data.loc[data['datetime'].dt.date.isin(holiday_list), 'dayoff_grade']='dayoff' data['dayoff_grade'] = pd.Categorical(data['dayoff_grade'], categories =['dayon', 'dayoff'], ordered = True) #2. worktime_grade data['worktime_grade']='work' data.loc[data['datetime'].dt.hour < 9, 'worktime_grade']='notwork' data.loc[data['datetime'].dt.hour > 18, 'worktime_grade']='notwork' data.loc[data['dayoff_grade']=='dayoff', 'worktime_grade']='notwork' data['worktime_grade'] = pd.Categorical(data['worktime_grade'], categories =['work', 'notwork'], ordered = True) data = data.drop('dayofweek_grade', axis =1) data = data.set_index('datetime') return data
def get_weekday_index(date): from workalendar.asia import SouthKorea ko_calendar = SouthKorea() if ko_calendar.is_working_day(date.date()): return (date.date().weekday()) else: if date.date().weekday() in [5, 6]: return (date.date().weekday()) else: return (7)
def initUI(self): cal = QCalendarWidget(self) cal.setGridVisible(True) cal.setVerticalHeaderFormat( QCalendarWidget.VerticalHeaderFormat( QCalendarWidget.NoVerticalHeader)) cal.clicked[QDate].connect(self.showDate) # 한국 공휴일 캘린더 객체 생성 후 올해 기준으로 휴일을 3년간 Q캘린터에 표시 wcal = SouthKorea() date = QDate.currentDate() # 해당 연도의 공휴일을 리스트로 반환 # print(wcal.holidays(date.year())) # 공휴일 표시 서식 설정 fm = QTextCharFormat() fm.setForeground(Qt.red) # fm.setBackground(Qt.yellow) # 올해 기준 전년, 올해, 다음해까지 공휴일 표시 for one in wcal.holidays(date.year() - 1): print(one[0]) cal.setDateTextFormat(one[0], fm) for one in wcal.holidays(date.year()): print(one[0]) cal.setDateTextFormat(one[0], fm) for one in wcal.holidays(date.year() + 1): print(one[0]) cal.setDateTextFormat(one[0], fm) self.lbl = QLabel(self) date = cal.selectedDate() self.lbl.setText(date.toString()) self.lblmsg = QLabel(self) self.lblmsg.setText("강의 일정 계산") self.showDate(date) vbox = QVBoxLayout() vbox.addWidget(cal) vbox.addWidget(self.lbl) vbox.addWidget(self.lblmsg) self.setLayout(vbox) self.setWindowTitle('종강일 계산기') self.setGeometry(300, 300, 300, 300) self.show()
def check_holidays(start_yr, end_yr=-1): ''' start_yr or start~end ''' if end_yr == -1: holidays = pd.Series(np.array(SouthKorea().holidays(start))[:, 0]) elif end_yr < start_yr: print('input : start~end') else: holidays = pd.Series() for year in range(start_yr, end_yr + 1): holidyas = pd.concat([ holdays, pd.Series(np.array(SouthKorea().holidays(start))[:, 0]) ]) return holidays
def add_weekday_column(data): ''' detect holiday and add a categorical column to dataframe :param data: original data :return: appended data ''' from workalendar.asia import SouthKorea index = data.index ko_calendar = SouthKorea() is_holiday = [] for time_ in index: if ko_calendar.is_working_day(time_.date()): is_holiday.append(time_.date().weekday()) else: if time_.date().weekday() in [5, 6]: is_holiday.append(time_.date().weekday()) else: is_holiday.append(7) is_holiday = pd.DataFrame(is_holiday, columns=['weekday_index'], index=index) appended_data = pd.concat([data, is_holiday], axis=1) return appended_data
def calculate_date(self, start, selectDate): cnt = 0 day_cnt = 0 # workalendar 나라 설정 kcal = SouthKorea() # 개강일이 월, 화인지 설정 if start == 'mon': sd = 0 if start == 'tue': sd = 1 # 정규반 종강일 checkday 계산 while cnt != 8: checkday = selectDate + timedelta(days=day_cnt) if checkday.weekday() == sd or checkday.weekday() == sd + 2: # print(cnt, checkday, checkday.weekday()) if kcal.is_working_day(checkday): cnt += 1 day_cnt += 1 # 속성반 종강일 fastcheckday 계산 cnt = 0 day_cnt = 0 while cnt != 16: fastcheckday = selectDate + timedelta(days=day_cnt) # 월요일부터 목요일까지 계산 (0~3) if fastcheckday.weekday() >= 0 and fastcheckday.weekday() < 4: if kcal.is_working_day(fastcheckday): cnt += 1 day_cnt += 1 msgEnd = "정규반 %s수강 시 종강일은 %s입니다." % (selectDate.strftime("%m-%d"), checkday.strftime("%m-%d")) msgEnd += "\n속성반 %s수강 시 종강일은 %s입니다." % (selectDate.strftime("%m-%d"), fastcheckday.strftime("%m-%d")) return msgEnd
def get_holidays_list(): calendar_ko = SouthKorea() d = {} for y in range(2015, 2020): holidays = get_holidays(calendar_ko, y) d[y] = holidays # 임시공휴일 temp_holidays = ['2015, 4, 28', '2015, 5, 1', '2015, 8, 14', '2015, 9, 29', '2016, 2, 10', '2016, 4, 13', '2016, 5, 1', '2016, 5, 6', '2017, 1, 30', '2017, 5, 1','2017, 5, 9', '2017, 10, 2', '2017, 10, 6', '2018, 5, 1', '2018, 5, 7', '2018, 6, 13', '2018, 9, 26', '2019, 5, 6'] for h in temp_holidays: s = h.split(',') t = datetime.strptime(h, "%Y, %m, %d").date() d[int(s[0])].append(t) holiday_list = [v for lst in d.values() for v in lst] return holiday_list
import numpy as np import pandas as pd from workalendar.asia import SouthKorea dates = pd.date_range(start='2014/01/01 00:00:00', end='2018/11/01 00:00:00', closed='left', freq='1H') weekdays = np.array(dates.weekday_name.tolist()) # print weekdays # print dates.weekday_name.tolist() cal = SouthKorea() holidays = cal.holidays(2014) + cal.holidays(2015) + cal.holidays( 2016) + cal.holidays(2017) + cal.holidays(2018) result = [] for i in range(len(holidays)): result.append(holidays[i][0]) date_idx = np.zeros(len(dates), dtype=float) date_idx[np.where(weekdays == 'Saturday')[0]] += 1 date_idx[np.where(weekdays == 'Sunday')[0]] += 1 date_idx[np.where(dates.isin(result))[0]] += 100 print len( pd.date_range(start='2014/01/01 00:00:00', end='2018/01/01 00:00:00', closed='left', freq='1H')) #35064 print len( pd.date_range(start='2014/01/01 00:00:00',
from datetime import datetime, date from workalendar.asia import SouthKorea from threading import Timer cal = SouthKorea() print(cal.holidays(2019)) print(cal.is_holiday(datetime.today())) print(cal.is_working_day(datetime.today())) # x = datetime.today() # y = x.replace(day=x.day+1, hour=0, minute=0, second=0, microsecond=0) # delta_t = y-x # print(x) # print(y) # print(delta_t) # secs = delta_t.seconds+1 # print(secs) # # def hello_world(): # print("hello world") # # t = Timer(secs, hello_world) # t.start()
import os import json from datetime import datetime from workalendar.asia import SouthKorea from common.config import korea_timezone cal = SouthKorea() def is_semester(date_to_know=None): if not date_to_know: date_to_know = datetime.now(tz=korea_timezone) # 학기중, 계절학기, 방학 중인지 구별 코드 # json 파일 로드 current_dir = os.path.dirname(os.path.abspath(__file__)) date_url = f'{current_dir}/timetable/date.json' with open(date_url, 'r') as raw_json: result = json.load(raw_json) term_result = -1 for key in [ x for x in list(result.keys()) if x not in ['holiday', 'halt'] ]: for term in result[key]: start_time = datetime.strptime( term['start'], "%m/%d/%Y").replace(tzinfo=korea_timezone) end_time = datetime.strptime( term['end'], "%m/%d/%Y").replace(tzinfo=korea_timezone) start_time = start_time.replace(year=date_to_know.year) end_time = end_time.replace(year=date_to_know.year)
def __init__(self, input_width=7, label_width=7, shift=14, label_columns=["Maximum_Power_This_Year"], features=None): ############################################################## # Raw data ############################################################## kpx_load = pd.read_csv("./data/preprocess/KPX_load.csv") self.data = kpx_load[[ "Date", "Installed_Capacity", "Supply_Capacity", "Maximum_Power_Last_Year", "Maximum_Power_This_Year", "Supply_Reserve" ]] if isinstance(features, list): if "meteo" in features: """ """ meteorology = pd.read_csv("./data/preprocess/Meteorology.csv") meteorology = meteorology[[ "location", "Date", "avg_temp", "min_temp", "max_temp", # 평균기온 "max_rain_1h", #강우량 "avg_dew_point", #이슬점 "avg_relative_humidity", #상대습도 "sunshine_hr", #일조시간 "avg_land_temp", #지면온도 ]] # meteorology.columns = ["location","Date", # "avg_temp","min_temp","max_temp", # 평균기온 # "max_rain_1h", #강우량 # "avg_dew_point", #이슬점 # "avg_relative_humidity", #상대습도 # "sunshine_hr", #일조시간 # "avg_land_temp", #지면온도 # ] # meteorology = meteorology.fillna(0).groupby("Date").agg('mean') self.data = pd.merge(self.data, meteorology, on="Date") if "covid" in features: """ """ covid = pd.read_csv("./data/COVID/covid19.csv") covid["Date"] = covid["Date"].str.replace(" ", "") covid["Sum_diff"] = np.gradient(covid.Sum, 1) covid["Sum_diff2"] = np.gradient(covid.Sum, 2) covid = covid[[ "Date", "Sum_diff2", #전일대비 증가량의 증가량 "Sum_diff" #전일대비 증가량 ]] self.data = pd.merge(self.data, covid, on="Date") if "gas" in features: """ """ gas = pd.read_csv("./data/preprocess/shell_price.csv") gas["gasoline_diff"] = np.gradient(gas.gasoline2, 1) gas["diesel_diff"] = np.gradient(gas.diesel, 1) gas = gas[[ "Date", "gasoline2", #일반휘발유 가격 "diesel", #경유 가격 "gasoline_diff", #일반휘발유 가격 전일대비 증가량 "diesel_diff" #경유 전일대비 증가량 ]] self.data = pd.merge(self.data, gas, on="Date") if "exchange" in features: """ """ exchange = pd.read_csv("./data/preprocess/exchange.csv") exchange["Last_diff"] = np.gradient(exchange.Last, 1) exchange = exchange[[ "Date", "Last", #종가 "Last_diff" #종가 전일 대비 증가량 ]] self.data = pd.merge(self.data, exchange, on="Date") """ Others : holiday, weekday information """ date = pd.date_range('2020.01.01', end='2020.11.24', freq='d') date = pd.DataFrame(columns=["Date"], data=date.astype(str).values) date["Date"] = date["Date"].str.replace("-", ".") date["weekday"] = pd.to_datetime(date["Date"]).dt.weekday week_dict = {0: 1, 1: 0, 2: 0, 3: 0, 4: 0, 5: 1, 6: 1} date["weekday"] = date["weekday"].map(week_dict) date["holiday"] = 0 date.loc[date.Date.isin( pd.Series(np.array(SouthKorea().holidays(2020))[:, 0]).map(str). str.replace("-", ".")), "holiday"] = 1 self.data = pd.merge(self.data, date, on="Date", how='left') self.data.fillna(0, inplace=True) self.train_df = self.data[self.data.Date < "2020.11.01"] self.val_df = self.data[self.data.Date >= "2020.11.01"] self.label_columns = label_columns ############################################################## # Work out the label column indices. ############################################################## if label_columns is not None: self.label_columns_indices = { name: i for i, name in enumerate(self.label_columns) } self.column_indices = { name: i for i, name in enumerate(self.train_df.columns) } ############################################################## # Work out the window parameters ############################################################## self.input_width = input_width self.label_width = label_width self.shift = shift self.total_window_size = self.input_width + self.shift self.input_slice = slice(0, input_width) self.input_indices = np.arange( self.total_window_size)[self.input_slice] self.label_start = self.total_window_size - self.label_width self.labels_slice = slice(self.label_start, None) self.label_indices = np.arange( self.total_window_size)[self.labels_slice]
def cal_working_day(start_date, end_date): cal = SouthKorea() return cal.get_working_days_delta(start_date - timedelta(1), end_date)
def prophet_kospi(self, model_kospi): # # 1) 코스피 self.model_kospi = model_kospi self.df = copy.deepcopy(self.model_kospi) self.df['date'] = pd.to_datetime(self.df.index) self.data = self.df[['date', 'Close']].reset_index(drop=True) self.data = self.data.rename(columns={'date': 'ds', 'Close': 'y'}) # 데이터의 추이 파악 # self.data.plot(x='ds', y='y', figsize=(16, 8)) # 하이퍼 파라미터 #self.prop_model = Prophet( # growth='linear', # #changepoints=cp_1, # #n_changepoints=25, # changepoint_range=0.95, # yearly_seasonality='auto', # weekly_seasonality='auto', # daily_seasonality='auto', # holidays=None, # seasonality_mode='additive', # seasonality_prior_scale=10.0, # holidays_prior_scale=10.0, # changepoint_prior_scale=0.05, # mcmc_samples=0, # interval_width=0.8, # uncertainty_samples=1000, # stan_backend=None, # ) self.prop_model = Prophet(yearly_seasonality='auto', weekly_seasonality='auto', daily_seasonality='auto', changepoint_prior_scale=0.15, changepoint_range=0.9 ) self.model = self.prop_model self.model.add_country_holidays(country_name='KR') self.model.fit(self.data) self.kor_holidays = pd.concat([pd.Series(np.array(SouthKorea().holidays(2020))[:, 0]), pd.Series(np.array(SouthKorea().holidays(2021))[:, 0])]).reset_index(drop=True) self.future = self.model.make_future_dataframe(periods=self.pred_days) self.future = self.future[self.future.ds.dt.weekday != 5] self.future = self.future[self.future.ds.dt.weekday != 6] for self.kor_holiday in self.kor_holidays: self.future = self.future[self.future.ds != self.kor_holiday] self.forecast = self.model.predict(self.future) self.forecast[['ds', 'yhat', 'yhat_upper', 'yhat_lower']] # model.plot(forecast) # fig2 = model.plot_components(forecast) # figure = model.plot(forecast) # for changepoint in model.changepoints: # plt.axvline(changepoint,ls='--', lw=1) # figure.legend(loc=2) # df.shape # # Cross Validation # # - For measuring forecast error by comparing the predicted values with the actual values # - initial:the size of the initial training period # - period : the spacing between cutoff dates # - horizon : the forecast horizon((ds minus cutoff) # - By default, the initial training period is set to three times the horizon, and cutoffs are made every half a horizon #self.cv = cross_validation(self.model, initial='534 days', period='20 days', horizon='134 days') #self.df_pm = performance_metrics(self.cv) # # Visualizing Performance Metrics # - cutoff: how far into the future the prediction was #plot_cross_validation_metric(self.cv, metric='rmse') # 실제값 self.actual_value = float(self.data[self.data['ds'] == self.data.iloc[-1].ds]['y']) # 예측값 self.predict_value = float(self.forecast[self.forecast['ds'] == self.date]['yhat']) if self.actual_value < self.predict_value: return '1' else: return '0'
def __init__(self, input_width, label_width, shift, batch_size, label_columns = ["Maximum_Power_This_Year"], features = None, aux1 = False, aux2 = False): self.input_width = input_width self.label_width = label_width self.shift = shift self.batch_size = batch_size self.label_columns = label_columns self.total_window_size = self.input_width + self.shift self.aux1 = aux1 self.aux2 = aux2 ############################################################## # Raw data ############################################################## kpx_load = pd.read_csv("./data/preprocess/KPX_load.csv") self.kpx_load = kpx_load[["Date", # "Installed_Capacity", "Supply_Capacity", "Maximum_Power_Last_Year", "Maximum_Power_This_Year", "Supply_Reserve" ]][-329:].reset_index(drop=True) self.kpx_load_size = self.kpx_load.shape[1] - 1 self.data = self.kpx_load self.internal_size = self.kpx_load_size self.external_size = 0 if isinstance(features,list): if "meteo" in features: """ """ meteorology = pd.read_csv("./data/preprocess/Meteorology.csv") meteorology = meteorology[["location","Date", "avg_temp","min_temp","max_temp", # 평균기온 "max_rain_1h", #강우량 "avg_dew_point", #이슬점 "avg_relative_humidity", #상대습도 "sunshine_hr", #일조시간 "avg_land_temp", #지면온도 ]] meteorology = meteorology.fillna(0).groupby("Date").agg('mean') #'mean' self.meteorology = meteorology.drop(["location"], axis=1) self.meteorology_size = self.meteorology.shape[1] self.internal_size += self.meteorology_size self.data = pd.merge(self.data, self.meteorology, on="Date") # print(self.data.head(5)) if "covid" in features: """ """ covid = pd.read_csv("./data/preprocess/covid19.csv") covid["Date"] = covid["Date"].str.replace(" ","") covid["Sum_diff"] = np.gradient(covid.Sum,1) covid["Sum_diff2"] = np.gradient(covid.Sum,2) self.covid = covid[["Date", "Sum_diff2", #전일대비 증가량의 증가량 "Sum_diff", #전일대비 증가량 # "Sum" ]] self.covid_size = self.covid.shape[1] - 1 # except Date self.external_size += self.covid_size self.data = pd.merge(self.data, self.covid, on="Date") # print(self.data.head(5)) if "gas" in features: """ """ gas = pd.read_csv("./data/preprocess/shell_price.csv") gas["gasoline_diff"] = np.gradient(gas.gasoline2,1) gas["diesel_diff"] = np.gradient(gas.diesel,1) self.gas = gas[["Date", "gasoline2", #일반휘발유 가격 "diesel", #경유 가격 "gasoline_diff", #일반휘발유 가격 전일대비 증가량 "diesel_diff" #경유 전일대비 증가량 ]] self.gas_size = self.gas.shape[1] - 1 self.external_size += self.gas_size self.data = pd.merge(self.data, self.gas, on="Date") # print(self.data.head(5)) if "exchange" in features: """ """ exchange = pd.read_csv("./data/preprocess/exchange.csv") exchange["Last_diff"] = np.gradient(exchange.Last,1) self.exchange = exchange[["Date", "Last", #종가 "Last_diff" #종가 전일 대비 증가량 ]] self.exchange_size = self.exchange.shape[1] - 1 self.external_size += self.exchange_size self.data = pd.merge(self.data, self.exchange, on="Date") # print(self.data.head(5)) """ Others : holiday, weekday information """ date = pd.date_range('2020.01.01', end='2020.11.24', freq='d') date = pd.DataFrame(columns=["Date"],data=date.astype(str).values) date["Date"] = date["Date"].str.replace("-",".") date["weekday"] = pd.to_datetime(date["Date"]).dt.weekday # week_dict = {0:0,1:1,2:1,3:1,4:1,5:2,6:2} # date["weekday"] = date["weekday"].map(week_dict) date["holiday"] = 0 date.loc[date.Date.isin(pd.Series(np.array(SouthKorea().holidays(2020))[:, 0]).map(str).str.replace("-",".")),"holiday"] = 1 date.loc[date.Date.isin(["2020.01.24", "2020.01.25", "2020.01.26", "2020.01.27"]),"holiday"] = 2 date.loc[date.Date.isin(["2020.09.30", "2020.10.01", "2020.10.02", "2020.10.03"]),"holiday"] = 2 ############################################################## # dummy ############################################################## weekday_dum = pd.get_dummies(date.weekday, prefix = "week") date = pd.concat([date,weekday_dum],axis=1) holiday_dum = pd.get_dummies(date.holiday, prefix = "holiday") date = pd.concat([date,holiday_dum],axis=1) date.drop(["weekday","holiday"], axis = 1, inplace = True) self.date = date self.date_size = date.shape[1] - 1 ################################################################ # Post-process ################################################################ self.data = pd.merge(self.data, self.date, on="Date", how='left') self.data.fillna(0,inplace=True) idx_val = self.data[self.data.Date == "2020.11.01"].index.values[0] idx_test = self.data[self.data.Date == "2020.11.01"].index.values[0] # divice it into train, val, and test # Will create train and test only self.train_df = self.data[:idx_val] self.val_df = self.data[(idx_val - self.total_window_size):idx_test] self.test_df = self.data[(idx_val - self.total_window_size):] # get date info self.date_train_df = self.train_df.Date self.date_val_df = self.val_df.Date[self.total_window_size:] self.date_test_df = self.test_df.Date[self.total_window_size:] self.train_df.drop(["Date"],axis=1,inplace=True) self.val_df.drop(["Date"],axis=1,inplace=True) self.test_df.drop(["Date"],axis=1,inplace=True) ############################################################## # Scaler ############################################################## # self.data_mean = self.data.drop(["Date"],axis=1).mean() # self.data_std = self.data.drop(["Date"],axis=1).std() # self.train_df = (self.train_df - self.data_mean) / self.data_std # Normalize # self.val_df = (self.val_df - self.data_mean) / self.data_std # self.test_df = (self.test_df - self.data_mean) / self.data_std self.data_min = self.data.drop(["Date"],axis=1).min() self.data_max = self.data.drop(["Date"],axis=1).max() self.train_df = (self.train_df - self.data_min) / (self.data_max - self.data_min) # Normalize self.val_df = (self.val_df - self.data_min) / (self.data_max - self.data_min) self.test_df = (self.test_df - self.data_min) / (self.data_max - self.data_min) ############################################################## # Work out the label column indices. ############################################################## if label_columns is not None: self.label_columns_indices = {name: i for i, name in enumerate(self.label_columns)} self.column_indices = {name: i for i, name in enumerate(self.train_df.columns)} # ############################################################## # # Work out the window parameters # ############################################################## self.input_slice = slice(0, input_width) self.input_indices = np.arange(self.total_window_size)[self.input_slice] self.label_start = self.total_window_size - self.label_width self.labels_slice = slice(self.label_start, None) self.label_indices = np.arange(self.total_window_size)[self.labels_slice]
country_hols['Italy'] = Italy() from workalendar.europe import Portugal country_hols['Portugal'] = Portugal() from workalendar.europe import UnitedKingdom country_hols['UnitedKingdom'] = UnitedKingdom() from workalendar.europe import Ireland country_hols['Ireland'] = Ireland() from workalendar.europe import Netherlands country_hols['Netherlands'] = Netherlands() from workalendar.asia import China country_hols['China'] = China() from workalendar.asia import Japan country_hols['Japan'] = Japan() from workalendar.asia import SouthKorea country_hols['Korea'] = SouthKorea() # from workalendar.asia import India # country_hols['India'] = India() # from workalendar.asia import Thailand # country_hols['Thailand'] = Thailand() # from workalendar.asia import Vietnam # country_hols['Vietnam'] = Vietnam() # from workalendar.asia import Indonesia # country_hols['Indonesia'] = Indonesia() from workalendar.oceania import Australia country_hols['Australia'] = Australia() from workalendar.america import Brazil country_hols['Brazil'] = Brazil() from workalendar.america import Canada
from selenium import webdriver import time from selenium.webdriver.common.keys import Keys import calendar from datetime import date, datetime from workalendar.asia import SouthKorea cal = SouthKorea() #calendar holiday = [] #공휴일 리스트 holiday_date = [] #공휴일의 날짜(일) 리스트 #2020년에 있는 모든 공휴일 가져오기 for i in range(len(cal.holidays(2020))): holiday.append(str(cal.holidays(2020)[i][0])[5:]) month = datetime.today().month #현재 달 safety_id = input("하영드리미 아이디 : ") safety_pw = input("하영드리미 비밀번호 : ") #현재 달에 공휴일이 있으면 holiday_date에 며칠이 공휴일인지 추가 for j in range(len(holiday)): if month == int(holiday[j][:2]): holiday_date.append(int(holiday[j][3:])) #chrome driver driver = webdriver.Chrome('chromedriver') driver.get("http://safety.jejunu.ac.kr/") time.sleep(1) #safety에 로그인 search = driver.find_element_by_xpath('//*[@id="userId"]')
] hd3 = [ # 총선 '20160413', '20120411', '20080409', '20040415', '20000413', ] hd4 = [ # 지선 '20140604', '20100602', '20060531', '20020613', ] cal = SouthKorea() years = range(2000, 2019) hds = [] [hds.extend([h[0] for h in cal.holidays(y)]) for y in years] thds = [ dt.strptime(h, '%Y%m%d').date() for h in (*hd0, *hd1, *hd2, *hd3, *hd4) ] hds.extend(thds) # print(hds) # 2014.09.10: Extended Holiday ''' Extended Holidays: [20140910, 20150929, 20160210, 20170130, 20171006, 20180507, 20180926] https://namu.wiki/w/%EB%8C%80%EC%B2%B4%20%ED%9C%B4%EC%9D%BC%20%EC%A0%9C%EB%8F%84#s-4
def run(): from PIL import Image image = Image.open('logo.jpg') image_stock = Image.open('stock.jpg') st.image(image, use_column_width=False) add_selectbox = st.sidebar.selectbox("예측 방법 결정", ("Online", "Batch")) st.sidebar.info('프로젝트명 :' + '\n' + '자연어 처리 기반의 투자분석 및 예측시스템 개발') st.sidebar.success('★멘토님★ : 정좌연 PE') st.sidebar.info('팀명 : 턴어라운드') st.sidebar.success('팀원 : 이지훈, 이문형, 강민재, 구병진, 김서정') st.sidebar.image(image_stock) st.title("KOSPI 지수 및 YG 종목 주가 예측 모델") # 사용자 설정 if add_selectbox == 'Online': date = str( st.number_input('Date', min_value=20200101, max_value=20201231, value=20201027)) rev_date = date[0:4] + '-' + date[4:6] + '-' + date[6:] target = st.selectbox('Target', ['KOSPI', 'YG']) method = st.selectbox( 'Method', ['AutoML_CLA', 'AutoML_REG', 'ARIMA', 'Prophet', 'RL', 'NLP']) output = "" input_dict = {'Date': date, 'Target': target, 'Method': method} input_ = DataCollectionModel.DataCollection(date) prophet_input_ = ProphetModel.Prophet_(date) # 코스피 예측모델 데이터 수집 + 학습 데이터 준비 if target == 'KOSPI': input_df = input_.kospi_collection() if method == 'AutoML_CLA': # 예측 모델 model = load_model('deployment_kospi_20201029') # 학습 평가 모델 model_train = load_model('deployment_kospi_train_20201029') load_test_model = predict_model(model_train, data=input_df[0].iloc[382:]) test_model = load_test_model[['Labeling', 'Label']] acc_ = accuracy_score(test_model['Labeling'], test_model['Label']) auc_ = roc_auc_score(test_model['Labeling'], test_model['Label']) recall_ = recall_score(test_model['Labeling'], test_model['Label']) prec_ = precision_score(test_model['Labeling'], test_model['Label']) f1_ = f1_score(test_model['Labeling'], test_model['Label']) data = { 'ACC': [acc_], 'AUC': [auc_], 'RECALL': [recall_], 'PREC': [prec_], 'F1': [f1_] } score_model = pd.DataFrame( data=data, columns=['ACC', 'AUC', 'RECALL', 'PREC', 'F1']) score_model.index.name = "Metrics Score" st.write("Test Data Metrics Score") st.table(score_model) elif method == 'AutoML_REG': # 예측 모델 model = load_model('deployment_kospi_reg_20201029') # 학습 평가 모델 model_train = load_model('deployment_kospi_reg_train_20201029') reg_data = copy.deepcopy(input_df[0].iloc[382:]) del reg_data['Labeling'] load_test_model = predict_model(model_train, data=reg_data) test_model = load_test_model[['Close', 'Label']] mae_ = mean_absolute_error(test_model['Close'], test_model['Label']) mse_ = mean_squared_error(test_model['Close'], test_model['Label']) rmse_ = mean_squared_error(test_model['Close'], test_model['Label'], squared=False) r2_ = r2_score(test_model['Close'], test_model['Label']) data = { 'MAE': [mae_], 'MSE': [mse_], 'RMSE': [rmse_], 'R2': [r2_] } score_model = pd.DataFrame( data=data, columns=['MAE', 'MSE', 'RMSE', 'R2']) score_model.index.name = "Metrics Score" st.write("Test Data Metrics Score") st.table(score_model) st.write("Forecast Data (Test Data)") st.line_chart(test_model) elif method == 'ARIMA': # model load 필요시 여기에 추가 print("ARIMA") elif method == 'Prophet': # model load 필요시 여기에 추가 print("Prophet") elif method == 'RL': import main # model load 필요시 여기에 추가 print("RL") elif method == 'NLP': # model load 필요시 여기에 추가 print("NLP") # YG 예측모델 데이터 수집 + 학습 데이터 준비 else: input_df = input_.yg_collection() if method == 'AutoML_CLA': # 예측 모델 model = load_model('deployment_yg_20201029') # 학습 평가 모델 model_train = load_model('deployment_yg_train_20201029') load_test_model = predict_model(model_train, data=input_df[0][341:]) test_model = load_test_model[['Labeling', 'Label']] acc_ = accuracy_score(test_model['Labeling'], test_model['Label']) auc_ = roc_auc_score(test_model['Labeling'], test_model['Label']) recall_ = recall_score(test_model['Labeling'], test_model['Label']) prec_ = precision_score(test_model['Labeling'], test_model['Label']) f1_ = f1_score(test_model['Labeling'], test_model['Label']) data = { 'ACC': [acc_], 'AUC': [auc_], 'RECALL': [recall_], 'PREC': [prec_], 'F1': [f1_] } score_model = pd.DataFrame( data=data, columns=['ACC', 'AUC', 'RECALL', 'PREC', 'F1']) score_model.index.name = "Metrics Score" st.write("Test Data Metrics Score") st.table(score_model) elif method == 'AutoML_REG': # 예측 모델 model = load_model('deployment_yg_reg_20201029') # 학습 평가 모델 model_train = load_model('deployment_yg_reg_train_20201029') reg_data = copy.deepcopy(input_df[0].iloc[341:]) del reg_data['Labeling'] load_test_model = predict_model(model_train, data=reg_data) test_model = load_test_model[['Close', 'Label']] mae_ = mean_absolute_error(test_model['Close'], test_model['Label']) mse_ = mean_squared_error(test_model['Close'], test_model['Label']) rmse_ = mean_squared_error(test_model['Close'], test_model['Label'], squared=False) r2_ = r2_score(test_model['Close'], test_model['Label']) data = { 'MAE': [mae_], 'MSE': [mse_], 'RMSE': [rmse_], 'R2': [r2_] } score_model = pd.DataFrame( data=data, columns=['MAE', 'MSE', 'RMSE', 'R2']) score_model.index.name = "Metrics Score" st.write("Test Data Metrics Score") st.table(score_model) st.write("Forecast Data (Test Data)") st.line_chart(test_model) elif method == 'ARIMA': # model load 필요시 여기에 추가 print("ARIMA") elif method == 'Prophet': # model load 필요시 여기에 추가 print("prophet") elif method == 'RL': # model load 필요시 여기에 추가 print("RL") elif method == 'NLP': print("NLP") # 예측 모델 실행 buy_message = "주가 상승 예상 -> 매매 어드바이스 : 매수" sell_message = "주가 하락 예상 -> 매매 어드바이스 : 매도" if st.button("주가 예측"): if method == 'AutoML_CLA': output = predict(model=model, input_df=input_df[0]) if output == '1': output = date + buy_message else: output = date + sell_message elif method == 'AutoML_REG': output = predict_reg(model=model, input_df=input_df) if output == '1': output = date + buy_message else: output = date + sell_message elif method == 'ARIMA': print("ARIMA") elif method == 'Prophet': if target == 'KOSPI': df_prophet = copy.deepcopy(input_df[0]) df_prophet['date'] = pd.to_datetime(df_prophet.index) df_data = df_prophet[['date', 'Close']].reset_index(drop=True) df_data = df_data.rename(columns={ 'date': 'ds', 'Close': 'y' }) prop_model = Prophet(yearly_seasonality='auto', weekly_seasonality='auto', daily_seasonality='auto', changepoint_prior_scale=0.15, changepoint_range=0.9) prop_model.add_country_holidays(country_name='KR') prop_model.fit(df_data) kor_holidays = pd.concat([ pd.Series(np.array(SouthKorea().holidays(2020))[:, 0]), pd.Series(np.array(SouthKorea().holidays(2021))[:, 0]) ]).reset_index(drop=True) prop_future = prop_model.make_future_dataframe(periods=10) prop_future = prop_future[prop_future.ds.dt.weekday != 5] prop_future = prop_future[prop_future.ds.dt.weekday != 6] for kor_holiday in kor_holidays: prop_future = prop_future[ prop_future.ds != kor_holiday] prop_forecast = prop_model.predict(prop_future) prop_forecast[['ds', 'yhat', 'yhat_upper', 'yhat_lower']] fig1 = prop_model.plot(prop_forecast) fig2 = prop_model.plot_components(prop_forecast) #cv = cross_validation(prop_model, initial='10 days', period='20 days', horizon='5 days') #df_pm = performance_metrics(cv) #fig3 = plot_cross_validation_metric(cv, metric='rmse') st.write("Forecast Data") st.write(fig1) st.write("Component Wise Forecast") st.write(fig2) #st.write("Cross Validation Metric") #st.table(df_pm) #st.write(fig3) output = prophet_input_.prophet_kospi(input_df[0]) if output == '1': output = date + buy_message else: output = date + sell_message else: df_prophet = copy.deepcopy(input_df[0]) df_prophet['date'] = pd.to_datetime(df_prophet.index) df_data = df_prophet[['date', 'Close']].reset_index(drop=True) df_data = df_data.rename(columns={ 'date': 'ds', 'Close': 'y' }) # cp=['2019-10-23', '2019-11-04', '2019-11-13', '2019-11-22', '2019-12-04', '2019-12-13', '2019-12-26', '2020-01-08', '2020-01-17', '2020-01-31', '2020-02-11', '2020-02-20', '2020-03-03', '2020-03-12', '2020-03-23', '2020-04-02', '2020-04-13', '2020-04-23', '2020-05-08', '2020-05-19', '2020-05-29', '2020-06-09', '2020-06-18', '2020-06-30', '2020-07-09'] cp_spc = [ '2020-08-11', '2020-08-12', '2020-08-13', '2020-08-18', '2020-08-19', '2020-08-20', '2020-08-26', '2020-08-28', '2020-08-31', '2020-09-02', '2020-09-03', '2020-09-07', '2020-09-08' ] cp_default = [ '2018-10-29', '2018-11-19', '2018-12-11', '2019-01-04', '2019-01-29', '2019-02-22', '2019-03-19', '2019-04-10', '2019-05-03', '2019-05-27', '2019-06-19', '2019-07-10', '2019-08-01', '2019-08-26', '2019-09-20', '2019-10-15', '2019-11-07', '2019-11-29', '2019-12-26', '2020-01-20', '2020-02-13', '2020-03-05', '2020-03-30', '2020-04-21', '2020-05-18' ] cp = cp_default + cp_spc prop_model = Prophet(yearly_seasonality='auto', weekly_seasonality='auto', daily_seasonality='auto', changepoints=cp, changepoint_range=0.85, changepoint_prior_scale=0.2) prop_model.fit(df_data) kor_holidays = pd.concat([ pd.Series(np.array(SouthKorea().holidays(2019))[:, 0]), pd.Series(np.array(SouthKorea().holidays(2020))[:, 0]) ]).reset_index(drop=True) prop_future = prop_model.make_future_dataframe(periods=10) prop_future = prop_future[prop_future.ds.dt.weekday != 5] prop_future = prop_future[prop_future.ds.dt.weekday != 6] for kor_holiday in kor_holidays: prop_future = prop_future[ prop_future.ds != kor_holiday] prop_forecast = prop_model.predict(prop_future) prop_forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(10) fig1 = prop_model.plot(prop_forecast) fig2 = prop_model.plot_components(prop_forecast) #cv = cross_validation(prop_model, initial='10 days', period='20 days', horizon='5 days') #df_pm = performance_metrics(cv) #fig3 = plot_cross_validation_metric(cv, metric='rmse') st.write("Forecast Data") st.write(fig1) st.write("Component Wise Forecast") st.write(fig2) #st.write("Cross Validation Metric") #st.table(df_pm) #st.write(fig3) output = prophet_input_.prophet_yg(input_df[0]) if output == '1': output = date + buy_message else: output = date + sell_message st.success(output) if add_selectbox == 'Batch': file_upload = st.file_uploader("Upload csv file for predictions", type=["csv"]) if file_upload is not None: data = pd.read_csv(file_upload) predictions = predict_model(estimator=model, data=data) st.write(predictions)
def prophet_yg(self, model_yg): # # 2) YG self.model_yg = model_yg self.df=copy.deepcopy(self.model_yg) self.df['date'] = pd.to_datetime(self.df.index) self.data = self.df[['date', 'Close']].reset_index(drop=True) self.data = self.data.rename(columns={'date': 'ds', 'Close': 'y'}) # 데이터의 추이 파악 #self.data.plot(x='ds', y='y', figsize=(16, 8)) # cp=['2019-10-23', '2019-11-04', '2019-11-13', '2019-11-22', '2019-12-04', '2019-12-13', '2019-12-26', '2020-01-08', '2020-01-17', '2020-01-31', '2020-02-11', '2020-02-20', '2020-03-03', '2020-03-12', '2020-03-23', '2020-04-02', '2020-04-13', '2020-04-23', '2020-05-08', '2020-05-19', '2020-05-29', '2020-06-09', '2020-06-18', '2020-06-30', '2020-07-09'] self.cp_spc=['2020-08-11', '2020-08-12', '2020-08-13', '2020-08-18', '2020-08-19', '2020-08-20', '2020-08-26', '2020-08-28', '2020-08-31', '2020-09-02', '2020-09-03', '2020-09-07', '2020-09-08'] self.cp_default=['2018-10-29', '2018-11-19', '2018-12-11', '2019-01-04', '2019-01-29', '2019-02-22', '2019-03-19', '2019-04-10', '2019-05-03', '2019-05-27', '2019-06-19', '2019-07-10', '2019-08-01', '2019-08-26', '2019-09-20', '2019-10-15', '2019-11-07', '2019-11-29', '2019-12-26', '2020-01-20', '2020-02-13', '2020-03-05', '2020-03-30', '2020-04-21', '2020-05-18'] self.cp=self.cp_default+self.cp_spc # 하이퍼 파라미터 # growth='linear', # #changepoints=cp_1, # #n_changepoints=25, # changepoint_range=0.95, # yearly_seasonality='auto', # weekly_seasonality='auto', # daily_seasonality='auto', # holidays=None, # seasonality_mode='additive', # seasonality_prior_scale=10.0, # holidays_prior_scale=10.0, # changepoint_prior_scale=0.05, # mcmc_samples=0, # interval_width=0.8, # uncertainty_samples=1000, # stan_backend=None, self.m = Prophet(yearly_seasonality='auto', weekly_seasonality='auto', daily_seasonality='auto', changepoints=self.cp, changepoint_range=0.8, changepoint_prior_scale=0.1 ) self.m.fit(self.data) self.kor_holidays = pd.concat([pd.Series(np.array(SouthKorea().holidays(2019))[:, 0]), pd.Series(np.array(SouthKorea().holidays(2020))[:, 0])]).reset_index(drop=True) self.future = self.m.make_future_dataframe(periods=self.pred_days) self.future = self.future[self.future.ds.dt.weekday != 5] self.future = self.future[self.future.ds.dt.weekday != 6] for self.kor_holiday in self.kor_holidays: self.future = self.future[self.future.ds != self.kor_holiday] self.forecast = self.m.predict(self.future) self.forecast[ [ 'ds', 'yhat', 'yhat_lower', 'yhat_upper' ] ].tail(self.pred_days) #self.m.plot(self.forecast) #self.m.plot_components(self.forecast) #self.figure = self.m.plot(self.forecast) #for changepoint in self.m.changepoints: # plt.axvline(changepoint,ls='--', lw=1) #self.figure.legend(loc=2) #print(self.m.changepoints) # 예측한 값만 표로 보기 #self.pred=self.forecast.tail(self.pred_days) #self.pred # plt.rc('font', family='NanumBarunGothic') # fig = plt.figure(figsize=(15,12)) # ax1 = fig.add_subplot(211) # ax1.plot(y['종가'],label='Y') # ax1.plot(pred['yhat'],color='red',label='Yhat') # ax1.plot(pred['yhat_lower'],color='green',label='Yhat_Lower') # ax1.plot(pred['yhat_upper'],color='green',label='Yhat_Upper') # ax1.set_xlabel('Date') # ax1.set_ylabel('Y') # ax1.legend(loc='best') # plt.show # 실제값 self.actual_value = float(self.data[self.data['ds'] == self.data.iloc[-1].ds]['y']) # 예측값 self.predict_value = float(self.forecast[self.forecast['ds'] == self.date]['yhat']) if self.actual_value < self.predict_value: return '1' else: return '0'