def __init__(self, state_size, window_size, trend, skip):
     self.state_size = state_size
     self.window_size = window_size
     self.half_window = window_size // 2
     self.trend = trend
     self.skip = skip
     tf.reset_default_graph()
     self.INITIAL_FEATURES = np.zeros((4, self.state_size))
     self.X = tf.placeholder(tf.float32, (None, None, self.state_size))
     self.Y = tf.placeholder(tf.float32, (None, self.OUTPUT_SIZE))
     cell = tf.nn.rnn_cell.LSTMCell(self.LAYER_SIZE, state_is_tuple=False)
     self.hidden_layer = tf.placeholder(tf.float32,
                                        (None, 2 * self.LAYER_SIZE))
     self.rnn, self.last_state = tf.nn.dynamic_rnn(
         inputs=self.X,
         cell=cell,
         dtype=tf.float32,
         initial_state=self.hidden_layer)
     tensor_action, tensor_validation = tf.split(self.rnn[:, -1], 2, 1)
     feed_action = tf.layers.dense(tensor_action, self.OUTPUT_SIZE)
     feed_validation = tf.layers.dense(tensor_validation, 1)
     self.logits = feed_validation + tf.subtract(
         feed_action, tf.reduce_mean(feed_action, axis=1, keep_dims=True))
     self.cost = tf.reduce_sum(tf.square(self.Y - self.logits))
     self.optimizer = tf.train.AdamOptimizer(
         learning_rate=self.LEARNING_RATE).minimize(self.cost)
     self.sess = tf.InteractiveSession()
     self.sess.run(tf.global_variables_initializer())
    def __init__(self, state_size, window_size, trend, skip, batch_size):
        self.state_size = state_size
        self.window_size = window_size
        self.half_window = window_size // 2
        self.trend = trend
        self.skip = skip
        self.action_size = 3
        self.batch_size = batch_size
        self.memory = deque(maxlen=1000)
        self.inventory = []

        self.gamma = 0.95
        self.epsilon = 0.5
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.999

        tf.reset_default_graph()
        self.sess = tf.InteractiveSession()
        self.X = tf.placeholder(tf.float32, [None, self.state_size])
        self.Y = tf.placeholder(tf.float32, [None, self.action_size])
        feed = tf.layers.dense(self.X, 512, activation=tf.nn.relu)
        tensor_action, tensor_validation = tf.split(feed, 2, 1)
        feed_action = tf.layers.dense(tensor_action, self.action_size)
        feed_validation = tf.layers.dense(tensor_validation, 1)
        self.logits = feed_validation + tf.subtract(
            feed_action, tf.reduce_mean(feed_action, axis=1, keep_dims=True))
        self.cost = tf.reduce_mean(tf.square(self.Y - self.logits))
        self.optimizer = tf.train.GradientDescentOptimizer(1e-5).minimize(
            self.cost)
        self.sess.run(tf.global_variables_initializer())
예제 #3
0
 def __init__(self, name, input_size, output_size, size_layer):
     with tf.variable_scope(name):
         self.X = tf.placeholder(tf.float32, (None, input_size))
         feed_actor = tf.layers.dense(self.X, size_layer, activation = tf.nn.relu)
         tensor_action, tensor_validation = tf.split(feed_actor,2,1)
         feed_action = tf.layers.dense(tensor_action, output_size)
         feed_validation = tf.layers.dense(tensor_validation, 1)
         self.logits = feed_validation + tf.subtract(feed_action,
                                                     tf.reduce_mean(feed_action,axis=1,keep_dims=True))
예제 #4
0
 def __init__(self, input_size, output_size, layer_size, learning_rate):
     self.X = tf.placeholder(tf.float32, (None, input_size))
     self.Y = tf.placeholder(tf.float32, (None, output_size))
     feed = tf.layers.dense(self.X, layer_size, activation=tf.nn.relu)
     tensor_action, tensor_validation = tf.split(feed, 2, 1)
     feed_action = tf.layers.dense(tensor_action, output_size)
     feed_validation = tf.layers.dense(tensor_validation, 1)
     self.logits = feed_validation + tf.subtract(
         feed_action, tf.reduce_mean(feed_action, axis=1, keep_dims=True))
     self.cost = tf.reduce_sum(tf.square(self.Y - self.logits))
     self.optimizer = tf.train.AdamOptimizer(
         learning_rate=learning_rate).minimize(self.cost)
 def __init__(self, name, input_size, output_size, size_layer):
     with tf.variable_scope(name):
         self.X = tf.placeholder(tf.float32, (None, None, input_size))
         self.hidden_layer = tf.placeholder(tf.float32, (None, 2 * size_layer))
         cell = tf.nn.rnn_cell.LSTMCell(size_layer, state_is_tuple = False)
         self.rnn,self.last_state = tf.nn.dynamic_rnn(inputs=self.X, cell=cell,
                                                 dtype=tf.float32,
                                                 initial_state=self.hidden_layer)
         tensor_action, tensor_validation = tf.split(self.rnn[:,-1],2,1)
         feed_action = tf.layers.dense(tensor_action, output_size)
         feed_validation = tf.layers.dense(tensor_validation, 1)
         self.logits = feed_validation + tf.subtract(feed_action,
                                                     tf.reduce_mean(feed_action,axis=1,keep_dims=True))
예제 #6
0
 def __init__(self, name, input_size, output_size, size_layer, learning_rate):
     with tf.variable_scope(name):
         self.X = tf.placeholder(tf.float32, (None, input_size))
         self.Y = tf.placeholder(tf.float32, (None, output_size))
         self.REWARD = tf.placeholder(tf.float32, (None, 1))
         feed_critic = tf.layers.dense(self.X, size_layer, activation = tf.nn.relu)
         tensor_action, tensor_validation = tf.split(feed_critic,2,1)
         feed_action = tf.layers.dense(tensor_action, output_size)
         feed_validation = tf.layers.dense(tensor_validation, 1)
         feed_critic = feed_validation + tf.subtract(feed_action,tf.reduce_mean(feed_action,axis=1,keep_dims=True))
         feed_critic = tf.nn.relu(feed_critic) + self.Y
         feed_critic = tf.layers.dense(feed_critic, size_layer//2, activation = tf.nn.relu)
         self.logits = tf.layers.dense(feed_critic, 1)
         self.cost = tf.reduce_mean(tf.square(self.REWARD - self.logits))
         self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(self.cost)
 def __init__(self, name, input_size, output_size, size_layer, learning_rate):
     with tf.variable_scope(name):
         self.X = tf.placeholder(tf.float32, (None, None, input_size))
         self.Y = tf.placeholder(tf.float32, (None, output_size))
         self.hidden_layer = tf.placeholder(tf.float32, (None, 2 * size_layer))
         self.REWARD = tf.placeholder(tf.float32, (None, 1))
         feed_critic = tf.layers.dense(self.X, size_layer, activation = tf.nn.relu)
         cell = tf.nn.rnn_cell.LSTMCell(size_layer, state_is_tuple = False)
         self.rnn,self.last_state = tf.nn.dynamic_rnn(inputs=self.X, cell=cell,
                                                 dtype=tf.float32,
                                                 initial_state=self.hidden_layer)
         tensor_action, tensor_validation = tf.split(self.rnn[:,-1],2,1)
         feed_action = tf.layers.dense(tensor_action, output_size)
         feed_validation = tf.layers.dense(tensor_validation, 1)
         feed_critic = feed_validation + tf.subtract(feed_action,tf.reduce_mean(feed_action,axis=1,keep_dims=True))
         feed_critic = tf.nn.relu(feed_critic) + self.Y
         feed_critic = tf.layers.dense(feed_critic, size_layer//2, activation = tf.nn.relu)
         self.logits = tf.layers.dense(feed_critic, 1)
         self.cost = tf.reduce_mean(tf.square(self.REWARD - self.logits))
         self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(self.cost)
    def __init__(self, state_size, window_size, trend, skip):
        self.state_size = state_size
        self.window_size = window_size
        self.half_window = window_size // 2
        self.trend = trend
        self.skip = skip
        tf.reset_default_graph()
        self.X = tf.placeholder(tf.float32, (None, self.state_size))
        self.Y = tf.placeholder(tf.float32, (None, self.state_size))
        self.ACTION = tf.placeholder(tf.float32, (None))
        self.REWARD = tf.placeholder(tf.float32, (None))
        self.batch_size = tf.shape(self.ACTION)[0]

        with tf.variable_scope('curiosity_model'):
            action = tf.reshape(self.ACTION, (-1, 1))
            state_action = tf.concat([self.X, action], axis=1)
            save_state = tf.identity(self.Y)

            feed = tf.layers.dense(state_action, 32, activation=tf.nn.relu)
            self.curiosity_logits = tf.layers.dense(feed, self.state_size)
            self.curiosity_cost = tf.reduce_sum(
                tf.square(save_state - self.curiosity_logits), axis=1)

            self.curiosity_optimizer = tf.train.RMSPropOptimizer(
                self.LEARNING_RATE).minimize(
                    tf.reduce_mean(self.curiosity_cost))

        total_reward = tf.add(self.curiosity_cost, self.REWARD)

        with tf.variable_scope("q_model"):
            with tf.variable_scope("eval_net"):
                x_action = tf.layers.dense(self.X, 128, tf.nn.relu)
                tensor_action, tensor_validation = tf.split(x_action, 2, 1)
                feed_action = tf.layers.dense(tensor_action, self.OUTPUT_SIZE)
                feed_validation = tf.layers.dense(tensor_validation, 1)
                self.logits = feed_validation + tf.subtract(
                    feed_action,
                    tf.reduce_mean(feed_action, axis=1, keep_dims=True))

            with tf.variable_scope("target_net"):
                y_action = tf.layers.dense(self.Y, 128, tf.nn.relu)
                tensor_action, tensor_validation = tf.split(y_action, 2, 1)
                feed_action = tf.layers.dense(tensor_action, self.OUTPUT_SIZE)
                feed_validation = tf.layers.dense(tensor_validation, 1)
                y_q = feed_validation + tf.subtract(
                    feed_action,
                    tf.reduce_mean(feed_action, axis=1, keep_dims=True))

            q_target = total_reward + self.GAMMA * tf.reduce_max(y_q, axis=1)
            action = tf.cast(self.ACTION, tf.int32)
            action_indices = tf.stack(
                [tf.range(self.batch_size, dtype=tf.int32), action], axis=1)
            q = tf.gather_nd(params=self.logits, indices=action_indices)
            self.cost = tf.losses.mean_squared_error(labels=q_target,
                                                     predictions=q)
            self.optimizer = tf.train.RMSPropOptimizer(
                self.LEARNING_RATE).minimize(
                    self.cost,
                    var_list=tf.get_collection(
                        tf.GraphKeys.TRAINABLE_VARIABLES, "q_model/eval_net"))

        t_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                     scope='q_model/target_net')
        e_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                     scope='q_model/eval_net')
        self.target_replace_op = [
            tf.assign(t, e) for t, e in zip(t_params, e_params)
        ]

        self.sess = tf.InteractiveSession()
        self.sess.run(tf.global_variables_initializer())