def test(self, code, day=252, year=1, test_year=1, full_year=5, test_month=1, full_month=5, category='d', eps=0.05): now = datetime.now() now = now.strftime('%Y%m%d') path = '.\\DB\\CSV\\daily\\' if category == 'm': path = '.\\DB\\CSV\\min\\' day = 20 day = day * 390 test_year = test_month full_year = full_month now = now + '_m' filename = code + '_ch.csv' chartfile = code + '.csv' if not os.path.exists(path + filename): self.logging.info('파일 생성') self.pre.change_csv(code, category=category) if not os.path.exists('.\\Test\\re_' + now): os.makedirs('.\\Test\\re_' + now) self.logging.info('테스트 데이터 분리') df_chart = pd.read_csv(path + chartfile) if category == 'm': df_chart = self.pre.min_preprocessing(df_chart) df_chart_split = df_chart.iloc[-1 * day * year * test_year:] df_chart_split.to_csv('.\\Test\\re_' + now + '\\chart.csv', index=False) df_chart = self.pre.change_feature(df_chart) df_test = df_chart.iloc[-1 * day * year * test_year:] df_prev = df_chart.iloc[-1 * day * year * test_year - 120:-1 * day * year * test_year] step = len(df_test) env = RLEnvTrain.RLEnv(df_test) agent = RLAgent.Agent(gamma=0.98, eps_start=eps, eps_end=0.01, eps_decay_steps=800, eps_exponential_decay=0.99, replay_capacity=int(1e6), batch_size=step - 1, tau=10, code=code, V_nn='DNN', P_nn='CNN', method='A2C') # policy value A2C reward_list = [] action_List = [] quant_list = [] stock_cnt_list = [] obs_list = [] obs = env.reset() agent.reset() if not os.path.isdir('.\\Test\\re_' + now + '\\'): os.makedirs('.\\Test\\re_' + now + '\\') df_prev = df_prev data = obs.reshape(1, -1) data = pd.DataFrame(data, columns=df_chart.columns) df_prev = pd.concat([df_prev, data], ignore_index=True) obs = self.pre.add_feature(df_prev) step = len(df_test) self.logging.info('테스트 시작') for i in tqdm(range(step)): # 관측 데이터로 예측한 가치신경망, 정책 신경망 예측값 value_per, policy_per = agent.predict_action_per(obs) action = agent.policy(value_per, policy_per) # 현재 잔고 및 주식 보유 수량 init_cash, stock_cnt = env.init_cash, env.total_stock # 현재 주식 가격 cu_price = obs[1] # close quant = agent.decide_quant(action, value_per, policy_per, init_cash, cu_price) # 매도 매수 가능 한 경우 확인 action, quant = env.validation_(action, quant, cu_price, stock_cnt) next_obs, reward, done, info = env.next_step(action, quant) if next_obs is not None: data = next_obs.reshape(1, -1) data = pd.DataFrame(data, columns=df_prev.columns) df_prev = pd.concat([df_prev, data], ignore_index=True) next_obs = self.pre.add_feature(df_prev) reward_list.append(reward) action_List.append(action) quant_list.append(quant) stock_cnt_list.append(stock_cnt) obs_list.append(obs) obs = next_obs df_reward = pd.DataFrame(reward_list) df_action = pd.DataFrame(action_List) df_quant = pd.DataFrame(quant_list) df_stock_cnt = pd.DataFrame(stock_cnt_list) df_obs = pd.DataFrame(obs_list) df_reward.to_csv('.\\Test\\re_' + now + '\\reward.csv', index=False) df_action.to_csv('.\\Test\\re_' + now + '\\action.csv', index=False) df_quant.to_csv('.\\Test\\re_' + now + '\\quant.csv', index=False) df_stock_cnt.to_csv('.\\Test\\re_' + now + '\\stock_cnt.csv', index=False) df_obs.to_csv('.\\Test\\re_' + now + '\\obs.csv', index=False) self.logging.info(f'reward : {reward} | 테스트 끝')
import numpy as np import RLEnvTrain, RLAgent from tqdm import tqdm from datetime import datetime os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' import tensorflow as tf tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) df = pd.read_csv('.\\DB\\CSV\\daily\\DA000020_ch.csv') df['profit'] = 0 df_obs = df.iloc[119:int(len(df) * 0.6), :].copy() df_obs = df_obs.reset_index() df_obs = df_obs.drop(['index'], axis=1) env = RLEnvTrain.RLEnv(df_obs) agent = RLAgent.Agent(batch_size=400) reward_list = [] action_List = [] quant_list = [] re_list = [] for k in range(100): # 학습 날짜 시간 디렉토리 now = datetime.now() now = now.strftime('%Y%m%d_%H%M%S') if not os.path.isdir('.\\reward\\re_' + now + '\\'): os.mkdir('.\\reward\\re_' + now + '\\') obs = env.reset() sub_action_list = [] sub_quant_list = [] sub_re_list = []
import pandas as pd import numpy as np import RLEnvTrain, RLAgent import time from tqdm import tqdm df = pd.read_csv('.\\DB\\CSV\\daily\\DA000020_ch.csv') env = RLEnvTrain.RLEnv(df) agent = RLAgent.Agent() for k in range(100): obs = env.reset() for i in tqdm(range(1000)): quant = 1 # 매수매도 수량 action = agent.policy(obs) price = obs[1] if not env.validation_(action, quant, price): action = 0 quant = 0 next_obs, reward, done, info = env.next_step(action, quant) agent.memorize_transition(obs, action, reward, next_obs, 0.0 if done else 1.0) if agent.train: agent.experience_replay() if done: break
f'데이터셋 분리 Train : {len(df_train)} | Test : {len(df_test)} | Set-up : {len(df_prev)}학습을 시작합니다.' ) step = len(df_train) logging.info(f'한 학습당 step : {step}') #step = 10 logging.info(f'학습 환경 구성') env = RLEnvTrain.RLEnv(df_train) logging.info(f'학습 에이전트 구성') agent = RLAgent.Agent(gamma=0.98, eps_start=0.8, eps_end=0.01, eps_decay_steps=800, eps_exponential_decay=0.99, replay_capacity=int(1e6), batch_size=step - 1, tau=10, code=code, V_nn='DNN', P_nn='CNN', method='A2C', tick='m') #policy value A2C reward_list = [] action_List = [] quant_list = [] re_list = [] stock_cnt_list = [] logging.info('10번의 학습 시작') for k in range(5):