Python RLAgent.Agent Examples

Programming Language: Python

Class/Type: RLAgent

Method/Function: Agent

Examples at hotexamples.com: 4

Python RLAgent.Agent - 4 examples found. These are the top rated real world Python examples of RLAgent.Agent from package plato extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

RLAgent(6)

Agent(4)

setRl(4)

join(3)

start(3)

PGNAgent(2)

defineState(2)

setReward(2)

RandomSnakeAgent(1)

goal(1)

goalMargins(1)

setGoal(1)

setGoalMargins(1)

Example #1

Show file

    def test(self,
             code,
             day=252,
             year=1,
             test_year=1,
             full_year=5,
             test_month=1,
             full_month=5,
             category='d',
             eps=0.05):
        now = datetime.now()
        now = now.strftime('%Y%m%d')

        path = '.\\DB\\CSV\\daily\\'
        if category == 'm':
            path = '.\\DB\\CSV\\min\\'
            day = 20
            day = day * 390
            test_year = test_month
            full_year = full_month
            now = now + '_m'
        filename = code + '_ch.csv'
        chartfile = code + '.csv'
        if not os.path.exists(path + filename):
            self.logging.info('파일 생성')
            self.pre.change_csv(code, category=category)
        if not os.path.exists('.\\Test\\re_' + now):
            os.makedirs('.\\Test\\re_' + now)

        self.logging.info('테스트 데이터 분리')
        df_chart = pd.read_csv(path + chartfile)
        if category == 'm':

            df_chart = self.pre.min_preprocessing(df_chart)
        df_chart_split = df_chart.iloc[-1 * day * year * test_year:]
        df_chart_split.to_csv('.\\Test\\re_' + now + '\\chart.csv',
                              index=False)
        df_chart = self.pre.change_feature(df_chart)
        df_test = df_chart.iloc[-1 * day * year * test_year:]
        df_prev = df_chart.iloc[-1 * day * year * test_year - 120:-1 * day *
                                year * test_year]
        step = len(df_test)
        env = RLEnvTrain.RLEnv(df_test)
        agent = RLAgent.Agent(gamma=0.98,
                              eps_start=eps,
                              eps_end=0.01,
                              eps_decay_steps=800,
                              eps_exponential_decay=0.99,
                              replay_capacity=int(1e6),
                              batch_size=step - 1,
                              tau=10,
                              code=code,
                              V_nn='DNN',
                              P_nn='CNN',
                              method='A2C')  # policy value A2C

        reward_list = []
        action_List = []
        quant_list = []
        stock_cnt_list = []
        obs_list = []

        obs = env.reset()
        agent.reset()

        if not os.path.isdir('.\\Test\\re_' + now + '\\'):
            os.makedirs('.\\Test\\re_' + now + '\\')

        df_prev = df_prev
        data = obs.reshape(1, -1)
        data = pd.DataFrame(data, columns=df_chart.columns)
        df_prev = pd.concat([df_prev, data], ignore_index=True)
        obs = self.pre.add_feature(df_prev)
        step = len(df_test)
        self.logging.info('테스트 시작')

        for i in tqdm(range(step)):
            # 관측 데이터로 예측한 가치신경망, 정책 신경망 예측값
            value_per, policy_per = agent.predict_action_per(obs)

            action = agent.policy(value_per, policy_per)

            # 현재 잔고 및 주식 보유 수량
            init_cash, stock_cnt = env.init_cash, env.total_stock

            # 현재 주식 가격
            cu_price = obs[1]  # close
            quant = agent.decide_quant(action, value_per, policy_per,
                                       init_cash, cu_price)

            # 매도 매수 가능 한 경우 확인

            action, quant = env.validation_(action, quant, cu_price, stock_cnt)

            next_obs, reward, done, info = env.next_step(action, quant)
            if next_obs is not None:
                data = next_obs.reshape(1, -1)
                data = pd.DataFrame(data, columns=df_prev.columns)
                df_prev = pd.concat([df_prev, data], ignore_index=True)
                next_obs = self.pre.add_feature(df_prev)

            reward_list.append(reward)
            action_List.append(action)
            quant_list.append(quant)
            stock_cnt_list.append(stock_cnt)
            obs_list.append(obs)

            obs = next_obs

        df_reward = pd.DataFrame(reward_list)
        df_action = pd.DataFrame(action_List)
        df_quant = pd.DataFrame(quant_list)
        df_stock_cnt = pd.DataFrame(stock_cnt_list)
        df_obs = pd.DataFrame(obs_list)

        df_reward.to_csv('.\\Test\\re_' + now + '\\reward.csv', index=False)
        df_action.to_csv('.\\Test\\re_' + now + '\\action.csv', index=False)
        df_quant.to_csv('.\\Test\\re_' + now + '\\quant.csv', index=False)
        df_stock_cnt.to_csv('.\\Test\\re_' + now + '\\stock_cnt.csv',
                            index=False)
        df_obs.to_csv('.\\Test\\re_' + now + '\\obs.csv', index=False)
        self.logging.info(f'reward : {reward} | 테스트 끝')

Example #2

Show file

import numpy as np
import RLEnvTrain, RLAgent
from tqdm import tqdm
from datetime import datetime
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

df = pd.read_csv('.\\DB\\CSV\\daily\\DA000020_ch.csv')
df['profit'] = 0
df_obs = df.iloc[119:int(len(df) * 0.6), :].copy()
df_obs = df_obs.reset_index()
df_obs = df_obs.drop(['index'], axis=1)

env = RLEnvTrain.RLEnv(df_obs)
agent = RLAgent.Agent(batch_size=400)
reward_list = []
action_List = []
quant_list = []
re_list = []
for k in range(100):
    # 학습 날짜 시간 디렉토리
    now = datetime.now()
    now = now.strftime('%Y%m%d_%H%M%S')
    if not os.path.isdir('.\\reward\\re_' + now + '\\'):
        os.mkdir('.\\reward\\re_' + now + '\\')

    obs = env.reset()
    sub_action_list = []
    sub_quant_list = []
    sub_re_list = []

Example #3

Show file

File: RLMain.py Project: hongsamhc2/devhiiostudy

import pandas as pd
import numpy as np
import RLEnvTrain, RLAgent
import time
from tqdm import tqdm

df = pd.read_csv('.\\DB\\CSV\\daily\\DA000020_ch.csv')

env = RLEnvTrain.RLEnv(df)
agent = RLAgent.Agent()

for k in range(100):
    obs = env.reset()

    for i in tqdm(range(1000)):

        quant = 1  # 매수매도 수량

        action = agent.policy(obs)

        price = obs[1]
        if not env.validation_(action, quant, price):
            action = 0
            quant = 0
        next_obs, reward, done, info = env.next_step(action, quant)
        agent.memorize_transition(obs, action, reward, next_obs,
                                  0.0 if done else 1.0)
        if agent.train:
            agent.experience_replay()
        if done:
            break

Example #4

Show file

File: RLMain.py Project: hongsamhc2/AutoTradingSystem

    f'데이터셋 분리 Train : {len(df_train)} | Test : {len(df_test)} | Set-up : {len(df_prev)}학습을 시작합니다.'
)

step = len(df_train)
logging.info(f'한 학습당 step : {step}')
#step = 10
logging.info(f'학습 환경 구성')
env = RLEnvTrain.RLEnv(df_train)
logging.info(f'학습 에이전트 구성')
agent = RLAgent.Agent(gamma=0.98,
                      eps_start=0.8,
                      eps_end=0.01,
                      eps_decay_steps=800,
                      eps_exponential_decay=0.99,
                      replay_capacity=int(1e6),
                      batch_size=step - 1,
                      tau=10,
                      code=code,
                      V_nn='DNN',
                      P_nn='CNN',
                      method='A2C',
                      tick='m')  #policy value A2C

reward_list = []
action_List = []
quant_list = []
re_list = []
stock_cnt_list = []
logging.info('10번의 학습 시작')
for k in range(5):