コード例 #1
0
def main():
    agent = RLAgent('agent',
                    decisionFrequency=10.0,
                    defaultSpeed=4,
                    defaultAltitude=6,
                    yawRate=60)

    agent.defineState(orientation=getOrientation,
                      position=getPosition,
                      angularVelocity=getAngularVelocity,
                      linearVelocity=getVelocity,
                      linearAcceleration=getLinearAcceleration,
                      angularAcceleration=getAngularAcceleration)

    agent.setRl(partial(flightLogger, dataset='datasets/' + 'replay.csv'))
    agent.start()
    agent.join()
コード例 #2
0
def main():
    model = VelocityModel(
        regressionModel=joblib.load('models/gradient-m.model'), frequency=10.0)
    agent = RLAgent('agent',
                    decisionFrequency=20.0,
                    defaultSpeed=4,
                    defaultAltitude=6,
                    yawRate=60,
                    alternativeModel=model,
                    maxDepth=math.inf,
                    initialState=None)

    agent.setRl(verifyModel)
    agent.start()
    agent.join()
コード例 #3
0
def monteCarlo(agent, maxDepth=3, trials=12, frequency=10):
    PROCESSES = 4
    model = VelocityModel(
        regressionModel=joblib.load('models/gradient-m.model'),
        frequency=frequency)
    actions = np.array(agent.getActions())
    initialState, isTerminal = agent.getState(), 0

    jobs = [None] * len(actions) * trials
    while bool(isTerminal) is False:
        initialState = agent.getState()
        qs = {i: [] for i in actions}

        for index, a in enumerate(np.repeat(actions, trials)):
            virtualAgent, isTerminal = RLAgent(
                'virtual',
                alternativeModel=model,
                decisionFrequency=math.inf,
                maxDepth=maxDepth,
                initialState=initialState), False
            virtualAgent.setReward(reward)
            virtualAgent.goal = agent.getGoal()
            virtualAgent.goalMargins = agent.getGoalMargins()

            virtualAgent.setRl(
                partial(monteCarloSearch,
                        actions=getRandomActions(a, actions, maxDepth)))
            jobs[index] = virtualAgent

        pool = Pool(8)
        results = [pool.apply_async(job.run) for job in jobs]
        for result in results:
            action, score = result.get()
            qs[action].append(score)

        pool.close()
        pool.join()

        yield actions[np.argmax([np.average(qs[a]) for a in actions])]
        r, nextState, isTerminal = (yield)

        f = 1 / (nextState.lastUpdate - initialState.lastUpdate)
        # correct for deviations from desired freq.
        model.frequency = f

        agent.logger.info(f)

        yield
コード例 #4
0
ファイル: RLEnvironment.py プロジェクト: tomicchie/RLSample
    def __init__(self):
        # Utilクラスインスタンス生成
        self.util = RLUtil.RLUtil()
        # 実行する課題を設定
        self.env = gym.make(self.util.getENV())

        # 課題の状態と行動の数を設定
        self.num_states = self.env.observation_space.shape[0]
        self.num_actions = self.env.action_space.n
        # このとき、cartpoleの行動(left/right)の2を取得

        # Agentクラスのインスタンスを生成
        self.agent = RLAgent.RLAgent(self.num_states, self.num_actions)

        # デバイス情報の設定
        d = self.util.getDEVICE()
        self.device = torch.device(d)
コード例 #5
0
import numpy as np
import tensorflow as tf
import math
import cPickle
import graph
import RLAgent
import httpConec as hC
import matplotlib.pyplot as plt
import geneTopo

topo, hosts, nodes, links = geneTopo.getDCtopo()
ag = RLAgent.PGNAgent(links, nodes * 2 + 1)
atest = graph.graf(nodes, links, initopo=topo, inihost=hosts)
atest.initial()
#atest.printTopo()
batch_size = 16
batch_number = 0
total_episodes = 640
episode_number = 0
valid_action = 0
invalid_action = 0
xs, ys, rs = [], [], []

epslon = 0.9
plt_x = range(total_episodes / batch_size)
plt_y = []
r_batch = []

valid_action_combo = 0
gradBuffer = ag.sess.run(ag.tvars)
コード例 #6
0
import game
import RLAgent
import Multigamer

#Multi Game Simulation
agent = RLAgent.RandomSnakeAgent()

#Mutligamer attributes:
# - numberOfParallelGames (no default)
# - RLAgent (no default)
# - numberOfTrials (default = 1)
# - numberOfIterations (default=1)
# - reward (default=100)
# - cost (default=1)
# - dim (default=(250,250))
gamer = Multigamer.multiGamer(10, agent, 5, 25)

gamer.runSimulation()
results = gamer.getResults()

## Single Game Simulation (with GUI)
# snake = game.Snake()
# foodSpawner = game.FoodSpawner()
# snakeGame = game.SnakeGame(snake, foodSpawner, 100, 1, True, (150,150))
# results = snakeGame.simulate(agent,5,3)

## Single Game Manual Play (with GUI)
# snake = game.Snake()
# foodSpawner = game.FoodSpawner()
# #snakeGame.play()
# #score, movements, food = snakeGame.getResults()
コード例 #7
0
    def test(self,
             code,
             day=252,
             year=1,
             test_year=1,
             full_year=5,
             test_month=1,
             full_month=5,
             category='d',
             eps=0.05):
        now = datetime.now()
        now = now.strftime('%Y%m%d')

        path = '.\\DB\\CSV\\daily\\'
        if category == 'm':
            path = '.\\DB\\CSV\\min\\'
            day = 20
            day = day * 390
            test_year = test_month
            full_year = full_month
            now = now + '_m'
        filename = code + '_ch.csv'
        chartfile = code + '.csv'
        if not os.path.exists(path + filename):
            self.logging.info('파일 생성')
            self.pre.change_csv(code, category=category)
        if not os.path.exists('.\\Test\\re_' + now):
            os.makedirs('.\\Test\\re_' + now)

        self.logging.info('테스트 데이터 분리')
        df_chart = pd.read_csv(path + chartfile)
        if category == 'm':

            df_chart = self.pre.min_preprocessing(df_chart)
        df_chart_split = df_chart.iloc[-1 * day * year * test_year:]
        df_chart_split.to_csv('.\\Test\\re_' + now + '\\chart.csv',
                              index=False)
        df_chart = self.pre.change_feature(df_chart)
        df_test = df_chart.iloc[-1 * day * year * test_year:]
        df_prev = df_chart.iloc[-1 * day * year * test_year - 120:-1 * day *
                                year * test_year]
        step = len(df_test)
        env = RLEnvTrain.RLEnv(df_test)
        agent = RLAgent.Agent(gamma=0.98,
                              eps_start=eps,
                              eps_end=0.01,
                              eps_decay_steps=800,
                              eps_exponential_decay=0.99,
                              replay_capacity=int(1e6),
                              batch_size=step - 1,
                              tau=10,
                              code=code,
                              V_nn='DNN',
                              P_nn='CNN',
                              method='A2C')  # policy value A2C

        reward_list = []
        action_List = []
        quant_list = []
        stock_cnt_list = []
        obs_list = []

        obs = env.reset()
        agent.reset()

        if not os.path.isdir('.\\Test\\re_' + now + '\\'):
            os.makedirs('.\\Test\\re_' + now + '\\')

        df_prev = df_prev
        data = obs.reshape(1, -1)
        data = pd.DataFrame(data, columns=df_chart.columns)
        df_prev = pd.concat([df_prev, data], ignore_index=True)
        obs = self.pre.add_feature(df_prev)
        step = len(df_test)
        self.logging.info('테스트 시작')

        for i in tqdm(range(step)):
            # 관측 데이터로 예측한 가치신경망, 정책 신경망 예측값
            value_per, policy_per = agent.predict_action_per(obs)

            action = agent.policy(value_per, policy_per)

            # 현재 잔고 및 주식 보유 수량
            init_cash, stock_cnt = env.init_cash, env.total_stock

            # 현재 주식 가격
            cu_price = obs[1]  # close
            quant = agent.decide_quant(action, value_per, policy_per,
                                       init_cash, cu_price)

            # 매도 매수 가능 한 경우 확인

            action, quant = env.validation_(action, quant, cu_price, stock_cnt)

            next_obs, reward, done, info = env.next_step(action, quant)
            if next_obs is not None:
                data = next_obs.reshape(1, -1)
                data = pd.DataFrame(data, columns=df_prev.columns)
                df_prev = pd.concat([df_prev, data], ignore_index=True)
                next_obs = self.pre.add_feature(df_prev)

            reward_list.append(reward)
            action_List.append(action)
            quant_list.append(quant)
            stock_cnt_list.append(stock_cnt)
            obs_list.append(obs)

            obs = next_obs

        df_reward = pd.DataFrame(reward_list)
        df_action = pd.DataFrame(action_List)
        df_quant = pd.DataFrame(quant_list)
        df_stock_cnt = pd.DataFrame(stock_cnt_list)
        df_obs = pd.DataFrame(obs_list)

        df_reward.to_csv('.\\Test\\re_' + now + '\\reward.csv', index=False)
        df_action.to_csv('.\\Test\\re_' + now + '\\action.csv', index=False)
        df_quant.to_csv('.\\Test\\re_' + now + '\\quant.csv', index=False)
        df_stock_cnt.to_csv('.\\Test\\re_' + now + '\\stock_cnt.csv',
                            index=False)
        df_obs.to_csv('.\\Test\\re_' + now + '\\obs.csv', index=False)
        self.logging.info(f'reward : {reward} | 테스트 끝')
コード例 #8
0
ファイル: RLMain.py プロジェクト: hongsamhc2/devhiiostudy
import pandas as pd
import numpy as np
import RLEnvTrain, RLAgent
import time
from tqdm import tqdm

df = pd.read_csv('.\\DB\\CSV\\daily\\DA000020_ch.csv')

env = RLEnvTrain.RLEnv(df)
agent = RLAgent.Agent()

for k in range(100):
    obs = env.reset()

    for i in tqdm(range(1000)):

        quant = 1  # 매수매도 수량

        action = agent.policy(obs)

        price = obs[1]
        if not env.validation_(action, quant, price):
            action = 0
            quant = 0
        next_obs, reward, done, info = env.next_step(action, quant)
        agent.memorize_transition(obs, action, reward, next_obs,
                                  0.0 if done else 1.0)
        if agent.train:
            agent.experience_replay()
        if done:
            break
コード例 #9
0
import numpy as np
import RLEnvTrain, RLAgent
from tqdm import tqdm
from datetime import datetime
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

df = pd.read_csv('.\\DB\\CSV\\daily\\DA000020_ch.csv')
df['profit'] = 0
df_obs = df.iloc[119:int(len(df) * 0.6), :].copy()
df_obs = df_obs.reset_index()
df_obs = df_obs.drop(['index'], axis=1)

env = RLEnvTrain.RLEnv(df_obs)
agent = RLAgent.Agent(batch_size=400)
reward_list = []
action_List = []
quant_list = []
re_list = []
for k in range(100):
    # 학습 날짜 시간 디렉토리
    now = datetime.now()
    now = now.strftime('%Y%m%d_%H%M%S')
    if not os.path.isdir('.\\reward\\re_' + now + '\\'):
        os.mkdir('.\\reward\\re_' + now + '\\')

    obs = env.reset()
    sub_action_list = []
    sub_quant_list = []
    sub_re_list = []
コード例 #10
0
ファイル: P4_AI_new.py プロジェクト: pinyaras/greente_ai
import numpy as np
import tensorflow as tf
import config
import csv
import graph
import RLAgent
import httpConec2 as hC
import matplotlib.pyplot as plt
from matplotlib.pyplot import draw

topo,hosts,nodes,hostnum,links=config.topo
print "host num:",hostnum
print "nodes num:",nodes
ag=RLAgent.PGNAgent(links,(nodes-hostnum)*2+1)
atest=graph.graf(nodes,links,initopo=topo,inihost=hosts)
atest.initial()
atest.printTopo() 
stepnum=2000
batch_size=config.batchsize
batch_sum=0   
episode_number=0
valid_action=0
total_episodes=config.episodes
xs,ys,rs=[],[],[]
epslon=config.explore_rate
plt_ANPB=[]
plt_ENPB=[]
plt_RNPB=[]
plt_rb=[]
overlink_record=[]
rsp=hC.sendTopo(atest.E,atest.host)
コード例 #11
0
    f'데이터셋 분리 Train : {len(df_train)} | Test : {len(df_test)} | Set-up : {len(df_prev)}학습을 시작합니다.'
)

step = len(df_train)
logging.info(f'한 학습당 step : {step}')
#step = 10
logging.info(f'학습 환경 구성')
env = RLEnvTrain.RLEnv(df_train)
logging.info(f'학습 에이전트 구성')
agent = RLAgent.Agent(gamma=0.98,
                      eps_start=0.8,
                      eps_end=0.01,
                      eps_decay_steps=800,
                      eps_exponential_decay=0.99,
                      replay_capacity=int(1e6),
                      batch_size=step - 1,
                      tau=10,
                      code=code,
                      V_nn='DNN',
                      P_nn='CNN',
                      method='A2C',
                      tick='m')  #policy value A2C

reward_list = []
action_List = []
quant_list = []
re_list = []
stock_cnt_list = []
logging.info('10번의 학습 시작')
for k in range(5):
コード例 #12
0
ファイル: RLMain.py プロジェクト: hongsamhc2/devhiiostudy
        env.reset()
        now_obs = env.observe()  # 초기화 후 관측치
        print('reset_state', now_obs)
        for step in range(1, episode + 1):
            action = agent.eps_policy(now_obs)  #행동
            done, reward = agent.do_action(action, now_obs)  # 액션 실행
            next_obs = env.observe()
            agent.memorize(now_obs, action, reward, next_obs,
                           0.0 if done == 0 else 1.0)
            if agent.train:
                agent.experience_replay()
            if done == 0:
                break
            now_obs = next_obs

    end = time.time()
    print(end - start)
    return


os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

df = pd.read_csv('.\\DB\\CSV\\daily\\DA000020.csv')
df.sort_values(by='date', inplace=True)

env = RLTrainEnv.RLtradingTestEnv(df)
agent = RLAgent.RLAgent(env)
agent.set_cash(1000000 * len(df))

train_model(env, episode=4096)
コード例 #13
0
def main():
    agent = RLAgent('agent',
                    decisionFrequency=10.0,
                    defaultSpeed=4,
                    defaultAltitude=20,
                    yawRate=70)

    # callbacks will be called in the order they were specified, beware of order of execution (if any state parameter is
    #  dependant on another)
    # state is lazily updated by the environment as the agent needs it , agent always get the freshest estimate of the
    # state, state updates are done by the environment in a rate that corresponds to agent decision making freq.

    agent.defineState(orientation=getOrientation,
                      angularVelocity=getAngularVelocity,
                      linearVelocity=getVelocity,
                      position=getPosition)

    agent.setRl(monteCarlo)
    agent.setReward(reward)
    agent.setGoal(position=np.array([-40, -50, 0]))
    agent.setGoalMargins(position=np.array([0.5, 0.5, math.inf]))
    agent.start()
    agent.join()