Python RLAgentの例、RLAgent Pythonの例

コード例 #1

0

ファイルを表示

def main():
    agent = RLAgent('agent',
                    decisionFrequency=10.0,
                    defaultSpeed=4,
                    defaultAltitude=6,
                    yawRate=60)

    agent.defineState(orientation=getOrientation,
                      position=getPosition,
                      angularVelocity=getAngularVelocity,
                      linearVelocity=getVelocity,
                      linearAcceleration=getLinearAcceleration,
                      angularAcceleration=getAngularAcceleration)

    agent.setRl(partial(flightLogger, dataset='datasets/' + 'replay.csv'))
    agent.start()
    agent.join()

コード例 #2

0

ファイルを表示

def main():
    model = VelocityModel(
        regressionModel=joblib.load('models/gradient-m.model'), frequency=10.0)
    agent = RLAgent('agent',
                    decisionFrequency=20.0,
                    defaultSpeed=4,
                    defaultAltitude=6,
                    yawRate=60,
                    alternativeModel=model,
                    maxDepth=math.inf,
                    initialState=None)

    agent.setRl(verifyModel)
    agent.start()
    agent.join()

コード例 #3

0

ファイルを表示

def monteCarlo(agent, maxDepth=3, trials=12, frequency=10):
    PROCESSES = 4
    model = VelocityModel(
        regressionModel=joblib.load('models/gradient-m.model'),
        frequency=frequency)
    actions = np.array(agent.getActions())
    initialState, isTerminal = agent.getState(), 0

    jobs = [None] * len(actions) * trials
    while bool(isTerminal) is False:
        initialState = agent.getState()
        qs = {i: [] for i in actions}

        for index, a in enumerate(np.repeat(actions, trials)):
            virtualAgent, isTerminal = RLAgent(
                'virtual',
                alternativeModel=model,
                decisionFrequency=math.inf,
                maxDepth=maxDepth,
                initialState=initialState), False
            virtualAgent.setReward(reward)
            virtualAgent.goal = agent.getGoal()
            virtualAgent.goalMargins = agent.getGoalMargins()

            virtualAgent.setRl(
                partial(monteCarloSearch,
                        actions=getRandomActions(a, actions, maxDepth)))
            jobs[index] = virtualAgent

        pool = Pool(8)
        results = [pool.apply_async(job.run) for job in jobs]
        for result in results:
            action, score = result.get()
            qs[action].append(score)

        pool.close()
        pool.join()

        yield actions[np.argmax([np.average(qs[a]) for a in actions])]
        r, nextState, isTerminal = (yield)

        f = 1 / (nextState.lastUpdate - initialState.lastUpdate)
        # correct for deviations from desired freq.
        model.frequency = f

        agent.logger.info(f)

        yield

コード例 #4

0

ファイルを表示

ファイル: RLEnvironment.py プロジェクト: tomicchie/RLSample

    def __init__(self):
        # Utilクラスインスタンス生成
        self.util = RLUtil.RLUtil()
        # 実行する課題を設定
        self.env = gym.make(self.util.getENV())

        # 課題の状態と行動の数を設定
        self.num_states = self.env.observation_space.shape[0]
        self.num_actions = self.env.action_space.n
        # このとき、cartpoleの行動（left/right）の2を取得

        # Agentクラスのインスタンスを生成
        self.agent = RLAgent.RLAgent(self.num_states, self.num_actions)

        # デバイス情報の設定
        d = self.util.getDEVICE()
        self.device = torch.device(d)

コード例 #5

0

ファイルを表示

import numpy as np
import tensorflow as tf
import math
import cPickle
import graph
import RLAgent
import httpConec as hC
import matplotlib.pyplot as plt
import geneTopo

topo, hosts, nodes, links = geneTopo.getDCtopo()
ag = RLAgent.PGNAgent(links, nodes * 2 + 1)
atest = graph.graf(nodes, links, initopo=topo, inihost=hosts)
atest.initial()
#atest.printTopo()
batch_size = 16
batch_number = 0
total_episodes = 640
episode_number = 0
valid_action = 0
invalid_action = 0
xs, ys, rs = [], [], []

epslon = 0.9
plt_x = range(total_episodes / batch_size)
plt_y = []
r_batch = []

valid_action_combo = 0
gradBuffer = ag.sess.run(ag.tvars)

コード例 #6

0

ファイルを表示

import game
import RLAgent
import Multigamer

#Multi Game Simulation
agent = RLAgent.RandomSnakeAgent()

#Mutligamer attributes:
# - numberOfParallelGames (no default)
# - RLAgent (no default)
# - numberOfTrials (default = 1)
# - numberOfIterations (default=1)
# - reward (default=100)
# - cost (default=1)
# - dim (default=(250,250))
gamer = Multigamer.multiGamer(10, agent, 5, 25)

gamer.runSimulation()
results = gamer.getResults()

## Single Game Simulation (with GUI)
# snake = game.Snake()
# foodSpawner = game.FoodSpawner()
# snakeGame = game.SnakeGame(snake, foodSpawner, 100, 1, True, (150,150))
# results = snakeGame.simulate(agent,5,3)

## Single Game Manual Play (with GUI)
# snake = game.Snake()
# foodSpawner = game.FoodSpawner()
# #snakeGame.play()
# #score, movements, food = snakeGame.getResults()

コード例 #7

0

ファイルを表示

    def test(self,
             code,
             day=252,
             year=1,
             test_year=1,
             full_year=5,
             test_month=1,
             full_month=5,
             category='d',
             eps=0.05):
        now = datetime.now()
        now = now.strftime('%Y%m%d')

        path = '.\\DB\\CSV\\daily\\'
        if category == 'm':
            path = '.\\DB\\CSV\\min\\'
            day = 20
            day = day * 390
            test_year = test_month
            full_year = full_month
            now = now + '_m'
        filename = code + '_ch.csv'
        chartfile = code + '.csv'
        if not os.path.exists(path + filename):
            self.logging.info('파일 생성')
            self.pre.change_csv(code, category=category)
        if not os.path.exists('.\\Test\\re_' + now):
            os.makedirs('.\\Test\\re_' + now)

        self.logging.info('테스트 데이터 분리')
        df_chart = pd.read_csv(path + chartfile)
        if category == 'm':

            df_chart = self.pre.min_preprocessing(df_chart)
        df_chart_split = df_chart.iloc[-1 * day * year * test_year:]
        df_chart_split.to_csv('.\\Test\\re_' + now + '\\chart.csv',
                              index=False)
        df_chart = self.pre.change_feature(df_chart)
        df_test = df_chart.iloc[-1 * day * year * test_year:]
        df_prev = df_chart.iloc[-1 * day * year * test_year - 120:-1 * day *
                                year * test_year]
        step = len(df_test)
        env = RLEnvTrain.RLEnv(df_test)
        agent = RLAgent.Agent(gamma=0.98,
                              eps_start=eps,
                              eps_end=0.01,
                              eps_decay_steps=800,
                              eps_exponential_decay=0.99,
                              replay_capacity=int(1e6),
                              batch_size=step - 1,
                              tau=10,
                              code=code,
                              V_nn='DNN',
                              P_nn='CNN',
                              method='A2C')  # policy value A2C

        reward_list = []
        action_List = []
        quant_list = []
        stock_cnt_list = []
        obs_list = []

        obs = env.reset()
        agent.reset()

        if not os.path.isdir('.\\Test\\re_' + now + '\\'):
            os.makedirs('.\\Test\\re_' + now + '\\')

        df_prev = df_prev
        data = obs.reshape(1, -1)
        data = pd.DataFrame(data, columns=df_chart.columns)
        df_prev = pd.concat([df_prev, data], ignore_index=True)
        obs = self.pre.add_feature(df_prev)
        step = len(df_test)
        self.logging.info('테스트 시작')

        for i in tqdm(range(step)):
            # 관측 데이터로 예측한 가치신경망, 정책 신경망 예측값
            value_per, policy_per = agent.predict_action_per(obs)

            action = agent.policy(value_per, policy_per)

            # 현재 잔고 및 주식 보유 수량
            init_cash, stock_cnt = env.init_cash, env.total_stock

            # 현재 주식 가격
            cu_price = obs[1]  # close
            quant = agent.decide_quant(action, value_per, policy_per,
                                       init_cash, cu_price)

            # 매도 매수 가능 한 경우 확인

            action, quant = env.validation_(action, quant, cu_price, stock_cnt)

            next_obs, reward, done, info = env.next_step(action, quant)
            if next_obs is not None:
                data = next_obs.reshape(1, -1)
                data = pd.DataFrame(data, columns=df_prev.columns)
                df_prev = pd.concat([df_prev, data], ignore_index=True)
                next_obs = self.pre.add_feature(df_prev)

            reward_list.append(reward)
            action_List.append(action)
            quant_list.append(quant)
            stock_cnt_list.append(stock_cnt)
            obs_list.append(obs)

            obs = next_obs

        df_reward = pd.DataFrame(reward_list)
        df_action = pd.DataFrame(action_List)
        df_quant = pd.DataFrame(quant_list)
        df_stock_cnt = pd.DataFrame(stock_cnt_list)
        df_obs = pd.DataFrame(obs_list)

        df_reward.to_csv('.\\Test\\re_' + now + '\\reward.csv', index=False)
        df_action.to_csv('.\\Test\\re_' + now + '\\action.csv', index=False)
        df_quant.to_csv('.\\Test\\re_' + now + '\\quant.csv', index=False)
        df_stock_cnt.to_csv('.\\Test\\re_' + now + '\\stock_cnt.csv',
                            index=False)
        df_obs.to_csv('.\\Test\\re_' + now + '\\obs.csv', index=False)
        self.logging.info(f'reward : {reward} | 테스트 끝')

コード例 #8

0

ファイルを表示

ファイル: RLMain.py プロジェクト: hongsamhc2/devhiiostudy

import pandas as pd
import numpy as np
import RLEnvTrain, RLAgent
import time
from tqdm import tqdm

df = pd.read_csv('.\\DB\\CSV\\daily\\DA000020_ch.csv')

env = RLEnvTrain.RLEnv(df)
agent = RLAgent.Agent()

for k in range(100):
    obs = env.reset()

    for i in tqdm(range(1000)):

        quant = 1  # 매수매도 수량

        action = agent.policy(obs)

        price = obs[1]
        if not env.validation_(action, quant, price):
            action = 0
            quant = 0
        next_obs, reward, done, info = env.next_step(action, quant)
        agent.memorize_transition(obs, action, reward, next_obs,
                                  0.0 if done else 1.0)
        if agent.train:
            agent.experience_replay()
        if done:
            break

コード例 #9

0

ファイルを表示

import numpy as np
import RLEnvTrain, RLAgent
from tqdm import tqdm
from datetime import datetime
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

df = pd.read_csv('.\\DB\\CSV\\daily\\DA000020_ch.csv')
df['profit'] = 0
df_obs = df.iloc[119:int(len(df) * 0.6), :].copy()
df_obs = df_obs.reset_index()
df_obs = df_obs.drop(['index'], axis=1)

env = RLEnvTrain.RLEnv(df_obs)
agent = RLAgent.Agent(batch_size=400)
reward_list = []
action_List = []
quant_list = []
re_list = []
for k in range(100):
    # 학습 날짜 시간 디렉토리
    now = datetime.now()
    now = now.strftime('%Y%m%d_%H%M%S')
    if not os.path.isdir('.\\reward\\re_' + now + '\\'):
        os.mkdir('.\\reward\\re_' + now + '\\')

    obs = env.reset()
    sub_action_list = []
    sub_quant_list = []
    sub_re_list = []

コード例 #10

0

ファイルを表示

ファイル: P4_AI_new.py プロジェクト: pinyaras/greente_ai

import numpy as np
import tensorflow as tf
import config
import csv
import graph
import RLAgent
import httpConec2 as hC
import matplotlib.pyplot as plt
from matplotlib.pyplot import draw

topo,hosts,nodes,hostnum,links=config.topo
print "host num:",hostnum
print "nodes num:",nodes
ag=RLAgent.PGNAgent(links,(nodes-hostnum)*2+1)
atest=graph.graf(nodes,links,initopo=topo,inihost=hosts)
atest.initial()
atest.printTopo() 
stepnum=2000
batch_size=config.batchsize
batch_sum=0   
episode_number=0
valid_action=0
total_episodes=config.episodes
xs,ys,rs=[],[],[]
epslon=config.explore_rate
plt_ANPB=[]
plt_ENPB=[]
plt_RNPB=[]
plt_rb=[]
overlink_record=[]
rsp=hC.sendTopo(atest.E,atest.host)

コード例 #11

0

ファイルを表示

ファイル: RLMain.py プロジェクト: hongsamhc2/AutoTradingSystem

    f'데이터셋 분리 Train : {len(df_train)} | Test : {len(df_test)} | Set-up : {len(df_prev)}학습을 시작합니다.'
)

step = len(df_train)
logging.info(f'한 학습당 step : {step}')
#step = 10
logging.info(f'학습 환경 구성')
env = RLEnvTrain.RLEnv(df_train)
logging.info(f'학습 에이전트 구성')
agent = RLAgent.Agent(gamma=0.98,
                      eps_start=0.8,
                      eps_end=0.01,
                      eps_decay_steps=800,
                      eps_exponential_decay=0.99,
                      replay_capacity=int(1e6),
                      batch_size=step - 1,
                      tau=10,
                      code=code,
                      V_nn='DNN',
                      P_nn='CNN',
                      method='A2C',
                      tick='m')  #policy value A2C

reward_list = []
action_List = []
quant_list = []
re_list = []
stock_cnt_list = []
logging.info('10번의 학습 시작')
for k in range(5):

コード例 #12

0

ファイルを表示

ファイル: RLMain.py プロジェクト: hongsamhc2/devhiiostudy

        env.reset()
        now_obs = env.observe()  # 초기화 후 관측치
        print('reset_state', now_obs)
        for step in range(1, episode + 1):
            action = agent.eps_policy(now_obs)  #행동
            done, reward = agent.do_action(action, now_obs)  # 액션 실행
            next_obs = env.observe()
            agent.memorize(now_obs, action, reward, next_obs,
                           0.0 if done == 0 else 1.0)
            if agent.train:
                agent.experience_replay()
            if done == 0:
                break
            now_obs = next_obs

    end = time.time()
    print(end - start)
    return


os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

df = pd.read_csv('.\\DB\\CSV\\daily\\DA000020.csv')
df.sort_values(by='date', inplace=True)

env = RLTrainEnv.RLtradingTestEnv(df)
agent = RLAgent.RLAgent(env)
agent.set_cash(1000000 * len(df))

train_model(env, episode=4096)

コード例 #13

0

ファイルを表示

def main():
    agent = RLAgent('agent',
                    decisionFrequency=10.0,
                    defaultSpeed=4,
                    defaultAltitude=20,
                    yawRate=70)

    # callbacks will be called in the order they were specified, beware of order of execution (if any state parameter is
    #  dependant on another)
    # state is lazily updated by the environment as the agent needs it , agent always get the freshest estimate of the
    # state, state updates are done by the environment in a rate that corresponds to agent decision making freq.

    agent.defineState(orientation=getOrientation,
                      angularVelocity=getAngularVelocity,
                      linearVelocity=getVelocity,
                      position=getPosition)

    agent.setRl(monteCarlo)
    agent.setReward(reward)
    agent.setGoal(position=np.array([-40, -50, 0]))
    agent.setGoalMargins(position=np.array([0.5, 0.5, math.inf]))
    agent.start()
    agent.join()

Python RLAgent, platoの例