Ejemplo n.º 1
0
class Agent(object):
    def __init__(self):
        self.agent = Framework()

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        self.sess = tf.Session(config=config)
        self.saver = tf.train.Saver()
        self.sess.run(tf.global_variables_initializer())
        self.sess.graph.finalize()

    def get_deterministic_policy(self, inputs):
        return self.agent.get_deterministic_policy(self.sess, inputs)

    def get_stochastic_policy(self, inputs, epsilon=0.9):
        return self.agent.get_stochastic_policy(self.sess, inputs, epsilon)

    def update_cache(self, state, action, reward, next_state, done):
        self.agent.update_cache(state, action, reward, next_state, done)

    def update_eval(self):
        self.agent.update_value_net(self.sess)

    def update_target(self):
        self.agent.update_target_net(self.sess)

    def save_model(self, path="model/ddqn.ckpt"):
        self.saver.save(self.sess, path)

    def restore_model(self, path="model/ddqn.ckpt"):
        self.saver.restore(self.sess, path)

    def close(self):
        self.sess.close()
Ejemplo n.º 2
0
import numpy as np
import tensorflow as tf
from agent.framework import Framework
from emulator_v0.main import Account

A = Account()
F = Framework()
# print(len(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)))
state, universe = A.reset()

sess = tf.Session()
sess.run(tf.global_variables_initializer())

order = F.get_deterministic_policy(sess, state)
next_state, next_universe, reward, done, value, portfolio = \
    A.step(order, universe)

for i in range(2048):
    F.update_cache(state, order, reward, next_state, done)

F.update_value_net(sess)