def update_marketdata_from_crawler(): m_type_dict = scdw.get_type_dict('A', is_name_index=True) def file_processing(csv_file_name): trading_df = pd.read_csv(os.path.join(CRAWLING_TARGET_PATH, csv_file_name), delimiter=',', encoding='CP949', names=COLUMN_NAMES, skiprows=[0]) trading_df = trading_df.fillna(0) trading_df['date'] = parse(str(re.findall('\d{8}', csv_file_name)[0])).date() trading_df['m_type'] = trading_df['m_type']\ .apply(lambda m_type: m_type_dict[str(m_type)]) trading_df = trading_df.drop(['m_dept'], axis=1) smdw.insert(trading_df) shutil.move(os.path.join(CRAWLING_TARGET_PATH, csv_file_name), os.path.join(CRAWLING_BACKUP_PATH, csv_file_name)) se = StartEndLogging() try: for file_name in tqdm(sorted(os.listdir(CRAWLING_TARGET_PATH))): file_processing(file_name) se.mid(file_name) except Exception as e: log.error(e) sys.exit() se.end()
def lstm_test3(): se = StartEndLogging() modeling_target_qs = scw.gets_modeling_target() for modeling_company in modeling_target_qs[:10]: model = LstmTraining(modeling_company.com_code, LSTM_KWARGS) model.modeling3() se.end()
def code_init(): se = StartEndLogging() try: cdw.delete() code_df = pd.read_csv(CODE_FILE_PATH, delimiter=',', encoding='utf-8') code_df = code_df.fillna('') cdw.insert(code_df) except Exception as e: log.error(e) sys.exit() se.end(f'{len(code_df)} codes insert!')
def start_crawler(self, is_disp_processing_step=False): se_check = StartEndLogging() about_period_months = int((self._e_date - self._s_date).days / 30) + 2 log.info(f'Input days: {self._s_date} ~ {self._e_date} ' f'({(self._e_date - self._s_date).days + 1} days)') if is_disp_processing_step: lists = tqdm(range(about_period_months)) else: lists = range(about_period_months) try: for _ in lists: cnt_work_days, cnt_skip_days = self._go_end_trading_day(not self._is_start) \ if self._is_start else \ self._change_calendar_month() if cnt_work_days == 0: break prev_work_date = None for day_idx in reversed(range(cnt_work_days)): curr_work_day = self._get_day_data(day_idx) self._cnt_work_days += 1 if self._we_date is None: self._we_date = parse(curr_work_day).date() self._ws_date = parse(curr_work_day).date() if self._ws_date == self._s_date: self._is_stop = True break if self._ws_date < self._s_date: self._is_stop = True os.remove( os.path.join(CRAWLING_TARGET_PATH, f'{curr_work_day}.csv')) self._ws_date = prev_work_date self._cnt_work_days -= 1 break prev_work_date = self._ws_date if str(self._ws_date)[:7] == str(self._s_date)[:7]: self._is_stop = True if self._is_stop: break except Exception as e: log.error(e) log.info( f'Working days: ' f'{self._ws_date} ~ {self._we_date} ({self._cnt_work_days} days)') self._web_driver.quit() se_check.end()
def insert_company_from_market(): se = StartEndLogging() scw.delete() market_qs = smdw.gets(date=smdw.get_date(is_add_one=False)) log.debug(f'market data size: {len(market_qs)}') company_objects = [] for market_data in market_qs: company_object = scw.make_object(market_data) if company_object is not None: company_objects.append(company_object) scw.insert(company_objects) se.end()
def update_modelingdata_from_market(): """ daily 작업에서는 추가되는 회사 데이터의 건수에 대한 카운팅은 무시하고 업데이트 되지 않은 마켓 데이터를 일괄로 추가한다. - 이 작업은 시스템 초기화나 주간 작업에서만 수행한다. :return: 없음 """ se = StartEndLogging() try: data_df = read_frame(smdw.gets('date', date__gt=smlw.get_date())) smlw.insert(data_df[['date','com_code']+MODELING_COLUMNS]) except Exception as e: log.error(e) sys.exit() se.end()
def insert_modelingdata_from_market(): se = StartEndLogging() company_qs = scw.gets('id') company_size = len(company_qs) log.debug(f'company size: {company_size}') smlw.delete() def get_normal_marketdata(com_code): company_market_qs = smdw.gets(com_code=com_code) first_data = company_market_qs.first() yesterday_data, first_normal_data = first_data, first_data if first_data.t_volume != 0: diff_ratio = company_market_qs.last( ).t_volume / first_data.t_volume else: # 첫번째 t_volume값이 0이기 때문에 전체 변동량 체크가 불가능함. # 따라서 세부 체크가 수행될 수 있도록 diff_ratio를 설정함 diff_ratio = TOTAL_CHECK_MAX_RATIO + 1. if diff_ratio > TOTAL_CHECK_MAX_RATIO or diff_ratio < TOTAL_CHECK_MIN_RATIO: for market_data in company_market_qs[ 1:]: # 첫번째 값은 위에서 first()을 이용해 이미 사용 if yesterday_data.t_volume != 0 and market_data.t_volume != 0: diff_ratio = market_data.t_volume / yesterday_data.t_volume if diff_ratio > DAY_CHECK_MAX_RATIO or diff_ratio < DAY_CHECK_MIN_RATIO: first_normal_data = market_data # 당일 거래량이 없는 종목은 감자/증자 대상으로 간주한다. if market_data.volume == 0: first_normal_data = market_data yesterday_data = market_data normal_qs = company_market_qs.filter(date__gte=first_normal_data.date) log.debug(f'com_code: {com_code}, modeling data size: ' f'total({len(company_market_qs)}), normal({len(normal_qs)})') return normal_qs for company in tqdm(company_qs): market_df = read_frame(get_normal_marketdata(company.com_code)) company.data_size = len(market_df) company.save() market_df = market_df[['date', 'com_code'] + MODELING_COLUMNS] smlw.insert(market_df) se.end()
def insert_marketdata_from_mergefile(): insert_se = StartEndLogging('insert marketdata') col_names = ['com_code', 'com_name', 'm_type', 'm_dept', 'close', 'diff', 'ratio', 'open', 'high', 'low', 'volume', 'value', 't_value', 't_volume', 'date'] # merge_reader = pd.read_csv(MERGE_FILE_PATH, encoding='CP949', low_memory=False, # names=col_names, chunksize=3000, skiprows=[0]) # # loop_cnt = 1 # for merge_df in merge_reader: # merge_df = merge_df.fillna(0) # smdw.insert(merge_df) # insert_se.mid(f'{loop_cnt * 3000}') # loop_cnt += 1 smdw.delete() insert_se.end()
def update_company_from_market(): se = StartEndLogging() market_qs = smdw.gets(date=smdw.get_date(is_min=False, is_add_one=False)) company_df = read_frame(scw.gets('id')) log.debug(f'market data size: {len(market_qs)}, company data size: {len(company_df)}') company_objects = [] for market_data in market_qs: company = company_df[company_df['com_code']==market_data.com_code] if company.empty: company_object = scw.make_object(market_data) else: company_object = scw.make_object(company, market_data) if company_object is not None: company_objects.append(company_object) scw.insert(company_objects) se.end()
def lstm_test(): se = StartEndLogging() modeling_target_qs = scw.gets_modeling_target() log.info(len(modeling_target_qs)) cnt_skip_trend, cnt_skip_accuracy = 0, 0 for modeling_company in modeling_target_qs[:15]: model = LstmTraining(modeling_company.com_code, kwargs) is_skip = model.modeling() se.mid(f'{modeling_company.com_code}') if is_skip['trend']: cnt_skip_trend += 1 if is_skip['accuracy']: cnt_skip_accuracy += 1 log.info( f'modeling total count: {len(modeling_target_qs)}, ' f'trend skip: {cnt_skip_trend}, accuracy skip: {cnt_skip_accuracy}') se.end()
def today_modeling(): se = StartEndLogging() modeling_target_qs = scw.gets_modeling_target() modeling_size = len(modeling_target_qs) cnt_processing = 0 cnt_skip_trend, cnt_skip_accuracy = 0, 0 for modeling_company in modeling_target_qs: model = LstmTraining(modeling_company.com_code, LSTM_KWARGS) is_skip = model.modeling2() cnt_processing += 1 se.mid(f'{modeling_company.com_code}, {cnt_processing}/{modeling_size}') if is_skip['trend']: cnt_skip_trend += 1 if is_skip['accuracy']: cnt_skip_accuracy += 1 log.info(f'modeling total count: {len(modeling_target_qs)}, ' f'trend skip: {cnt_skip_trend}, accuracy skip: {cnt_skip_accuracy}') se.end()
diff_money = base_money_for_cnt - buy_money sell_money = y_trading.sell_price * y_trading.volume result_money = diff_money + sell_money total_money += result_money y_trading.save() account.balance = total_money - TRADING_BASE_MONEY account.ratio = total_money / TRADING_BASE_MONEY account.base_money = total_money account.save() except Exception as e: raise Exception(e) if __name__ == '__main__': tse = StartEndLogging('daily processing') try: start_krx_crawling() update_marketdata_from_crawler() update_company_from_market() update_modelingdata_from_market() yesterday_trading_result() # today_modeling() # today_trading() except Exception as err: log.error(err) tse.end('daily processing')