Beispiel #1
0
    def test(self,
             code,
             day=252,
             year=1,
             test_year=1,
             full_year=5,
             test_month=1,
             full_month=5,
             category='d',
             eps=0.05):
        now = datetime.now()
        now = now.strftime('%Y%m%d')

        path = '.\\DB\\CSV\\daily\\'
        if category == 'm':
            path = '.\\DB\\CSV\\min\\'
            day = 20
            day = day * 390
            test_year = test_month
            full_year = full_month
            now = now + '_m'
        filename = code + '_ch.csv'
        chartfile = code + '.csv'
        if not os.path.exists(path + filename):
            self.logging.info('파일 생성')
            self.pre.change_csv(code, category=category)
        if not os.path.exists('.\\Test\\re_' + now):
            os.makedirs('.\\Test\\re_' + now)

        self.logging.info('테스트 데이터 분리')
        df_chart = pd.read_csv(path + chartfile)
        if category == 'm':

            df_chart = self.pre.min_preprocessing(df_chart)
        df_chart_split = df_chart.iloc[-1 * day * year * test_year:]
        df_chart_split.to_csv('.\\Test\\re_' + now + '\\chart.csv',
                              index=False)
        df_chart = self.pre.change_feature(df_chart)
        df_test = df_chart.iloc[-1 * day * year * test_year:]
        df_prev = df_chart.iloc[-1 * day * year * test_year - 120:-1 * day *
                                year * test_year]
        step = len(df_test)
        env = RLEnvTrain.RLEnv(df_test)
        agent = RLAgent.Agent(gamma=0.98,
                              eps_start=eps,
                              eps_end=0.01,
                              eps_decay_steps=800,
                              eps_exponential_decay=0.99,
                              replay_capacity=int(1e6),
                              batch_size=step - 1,
                              tau=10,
                              code=code,
                              V_nn='DNN',
                              P_nn='CNN',
                              method='A2C')  # policy value A2C

        reward_list = []
        action_List = []
        quant_list = []
        stock_cnt_list = []
        obs_list = []

        obs = env.reset()
        agent.reset()

        if not os.path.isdir('.\\Test\\re_' + now + '\\'):
            os.makedirs('.\\Test\\re_' + now + '\\')

        df_prev = df_prev
        data = obs.reshape(1, -1)
        data = pd.DataFrame(data, columns=df_chart.columns)
        df_prev = pd.concat([df_prev, data], ignore_index=True)
        obs = self.pre.add_feature(df_prev)
        step = len(df_test)
        self.logging.info('테스트 시작')

        for i in tqdm(range(step)):
            # 관측 데이터로 예측한 가치신경망, 정책 신경망 예측값
            value_per, policy_per = agent.predict_action_per(obs)

            action = agent.policy(value_per, policy_per)

            # 현재 잔고 및 주식 보유 수량
            init_cash, stock_cnt = env.init_cash, env.total_stock

            # 현재 주식 가격
            cu_price = obs[1]  # close
            quant = agent.decide_quant(action, value_per, policy_per,
                                       init_cash, cu_price)

            # 매도 매수 가능 한 경우 확인

            action, quant = env.validation_(action, quant, cu_price, stock_cnt)

            next_obs, reward, done, info = env.next_step(action, quant)
            if next_obs is not None:
                data = next_obs.reshape(1, -1)
                data = pd.DataFrame(data, columns=df_prev.columns)
                df_prev = pd.concat([df_prev, data], ignore_index=True)
                next_obs = self.pre.add_feature(df_prev)

            reward_list.append(reward)
            action_List.append(action)
            quant_list.append(quant)
            stock_cnt_list.append(stock_cnt)
            obs_list.append(obs)

            obs = next_obs

        df_reward = pd.DataFrame(reward_list)
        df_action = pd.DataFrame(action_List)
        df_quant = pd.DataFrame(quant_list)
        df_stock_cnt = pd.DataFrame(stock_cnt_list)
        df_obs = pd.DataFrame(obs_list)

        df_reward.to_csv('.\\Test\\re_' + now + '\\reward.csv', index=False)
        df_action.to_csv('.\\Test\\re_' + now + '\\action.csv', index=False)
        df_quant.to_csv('.\\Test\\re_' + now + '\\quant.csv', index=False)
        df_stock_cnt.to_csv('.\\Test\\re_' + now + '\\stock_cnt.csv',
                            index=False)
        df_obs.to_csv('.\\Test\\re_' + now + '\\obs.csv', index=False)
        self.logging.info(f'reward : {reward} | 테스트 끝')
Beispiel #2
0
import numpy as np
import RLEnvTrain, RLAgent
from tqdm import tqdm
from datetime import datetime
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

df = pd.read_csv('.\\DB\\CSV\\daily\\DA000020_ch.csv')
df['profit'] = 0
df_obs = df.iloc[119:int(len(df) * 0.6), :].copy()
df_obs = df_obs.reset_index()
df_obs = df_obs.drop(['index'], axis=1)

env = RLEnvTrain.RLEnv(df_obs)
agent = RLAgent.Agent(batch_size=400)
reward_list = []
action_List = []
quant_list = []
re_list = []
for k in range(100):
    # 학습 날짜 시간 디렉토리
    now = datetime.now()
    now = now.strftime('%Y%m%d_%H%M%S')
    if not os.path.isdir('.\\reward\\re_' + now + '\\'):
        os.mkdir('.\\reward\\re_' + now + '\\')

    obs = env.reset()
    sub_action_list = []
    sub_quant_list = []
    sub_re_list = []
Beispiel #3
0
import pandas as pd
import numpy as np
import RLEnvTrain, RLAgent
import time
from tqdm import tqdm

df = pd.read_csv('.\\DB\\CSV\\daily\\DA000020_ch.csv')

env = RLEnvTrain.RLEnv(df)
agent = RLAgent.Agent()

for k in range(100):
    obs = env.reset()

    for i in tqdm(range(1000)):

        quant = 1  # 매수매도 수량

        action = agent.policy(obs)

        price = obs[1]
        if not env.validation_(action, quant, price):
            action = 0
            quant = 0
        next_obs, reward, done, info = env.next_step(action, quant)
        agent.memorize_transition(obs, action, reward, next_obs,
                                  0.0 if done else 1.0)
        if agent.train:
            agent.experience_replay()
        if done:
            break
Beispiel #4
0
    f'데이터셋 분리 Train : {len(df_train)} | Test : {len(df_test)} | Set-up : {len(df_prev)}학습을 시작합니다.'
)

step = len(df_train)
logging.info(f'한 학습당 step : {step}')
#step = 10
logging.info(f'학습 환경 구성')
env = RLEnvTrain.RLEnv(df_train)
logging.info(f'학습 에이전트 구성')
agent = RLAgent.Agent(gamma=0.98,
                      eps_start=0.8,
                      eps_end=0.01,
                      eps_decay_steps=800,
                      eps_exponential_decay=0.99,
                      replay_capacity=int(1e6),
                      batch_size=step - 1,
                      tau=10,
                      code=code,
                      V_nn='DNN',
                      P_nn='CNN',
                      method='A2C',
                      tick='m')  #policy value A2C

reward_list = []
action_List = []
quant_list = []
re_list = []
stock_cnt_list = []
logging.info('10번의 학습 시작')
for k in range(5):