def __init__(self, state_size, window_size, trend, skip):
     self.state_size = state_size
     self.window_size = window_size
     self.half_window = window_size // 2
     self.trend = trend
     self.INITIAL_FEATURES = np.zeros((4, self.state_size))
     self.skip = skip
     tf.reset_default_graph()
     self.actor = Actor('actor-original', self.state_size, self.OUTPUT_SIZE,
                        self.LAYER_SIZE)
     self.actor_target = Actor('actor-target', self.state_size,
                               self.OUTPUT_SIZE, self.LAYER_SIZE)
     self.critic = Critic('critic-original', self.state_size,
                          self.OUTPUT_SIZE, self.LAYER_SIZE,
                          self.LEARNING_RATE)
     self.critic_target = Critic('critic-target', self.state_size,
                                 self.OUTPUT_SIZE, self.LAYER_SIZE,
                                 self.LEARNING_RATE)
     self.grad_critic = tf.gradients(self.critic.logits, self.critic.Y)
     self.actor_critic_grad = tf.placeholder(tf.float32,
                                             [None, self.OUTPUT_SIZE])
     weights_actor = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                       scope='actor')
     self.grad_actor = tf.gradients(self.actor.logits, weights_actor,
                                    -self.actor_critic_grad)
     grads = zip(self.grad_actor, weights_actor)
     self.optimizer = tf.train.AdamOptimizer(
         self.LEARNING_RATE).apply_gradients(grads)
     self.sess = tf.InteractiveSession()
     self.sess.run(tf.global_variables_initializer())
 def __init__(self, state_size, window_size, trend, skip):
     self.state_size = state_size
     self.window_size = window_size
     self.half_window = window_size // 2
     self.trend = trend
     self.skip = skip
     tf.reset_default_graph()
     self.INITIAL_FEATURES = np.zeros((4, self.state_size))
     self.X = tf.placeholder(tf.float32, (None, None, self.state_size))
     self.Y = tf.placeholder(tf.float32, (None, self.OUTPUT_SIZE))
     cell = tf.nn.rnn_cell.LSTMCell(self.LAYER_SIZE, state_is_tuple=False)
     self.hidden_layer = tf.placeholder(tf.float32,
                                        (None, 2 * self.LAYER_SIZE))
     self.rnn, self.last_state = tf.nn.dynamic_rnn(
         inputs=self.X,
         cell=cell,
         dtype=tf.float32,
         initial_state=self.hidden_layer)
     tensor_action, tensor_validation = tf.split(self.rnn[:, -1], 2, 1)
     feed_action = tf.layers.dense(tensor_action, self.OUTPUT_SIZE)
     feed_validation = tf.layers.dense(tensor_validation, 1)
     self.logits = feed_validation + tf.subtract(
         feed_action, tf.reduce_mean(feed_action, axis=1, keep_dims=True))
     self.cost = tf.reduce_sum(tf.square(self.Y - self.logits))
     self.optimizer = tf.train.AdamOptimizer(
         learning_rate=self.LEARNING_RATE).minimize(self.cost)
     self.sess = tf.InteractiveSession()
     self.sess.run(tf.global_variables_initializer())
    def __init__(self, state_size, window_size, trend, skip, batch_size):
        self.state_size = state_size
        self.window_size = window_size
        self.half_window = window_size // 2
        self.trend = trend
        self.skip = skip
        self.action_size = 3
        self.batch_size = batch_size
        self.memory = deque(maxlen=1000)
        self.inventory = []

        self.gamma = 0.95
        self.epsilon = 0.5
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.999

        tf.reset_default_graph()
        self.sess = tf.InteractiveSession()
        self.X = tf.placeholder(tf.float32, [None, self.state_size])
        self.Y = tf.placeholder(tf.float32, [None, self.action_size])
        feed = tf.layers.dense(self.X, 512, activation=tf.nn.relu)
        tensor_action, tensor_validation = tf.split(feed, 2, 1)
        feed_action = tf.layers.dense(tensor_action, self.action_size)
        feed_validation = tf.layers.dense(tensor_validation, 1)
        self.logits = feed_validation + tf.subtract(
            feed_action, tf.reduce_mean(feed_action, axis=1, keep_dims=True))
        self.cost = tf.reduce_mean(tf.square(self.Y - self.logits))
        self.optimizer = tf.train.GradientDescentOptimizer(1e-5).minimize(
            self.cost)
        self.sess.run(tf.global_variables_initializer())
Esempio n. 4
0
 def __init__(self, state_size, window_size, trend, skip):
     self.state_size = state_size
     self.window_size = window_size
     self.half_window = window_size // 2
     self.trend = trend
     self.skip = skip
     self.X = tf.placeholder(tf.float32, (None, self.state_size))
     self.REWARDS = tf.placeholder(tf.float32, (None))
     self.ACTIONS = tf.placeholder(tf.int32, (None))
     feed_forward = tf.layers.dense(self.X,
                                    self.LAYER_SIZE,
                                    activation=tf.nn.relu)
     self.logits = tf.layers.dense(feed_forward,
                                   self.OUTPUT_SIZE,
                                   activation=tf.nn.softmax)
     input_y = tf.one_hot(self.ACTIONS, self.OUTPUT_SIZE)
     loglike = tf.log((input_y * (input_y - self.logits) + (1 - input_y) *
                       (input_y + self.logits)) + 1)
     rewards = tf.tile(tf.reshape(self.REWARDS, (-1, 1)),
                       [1, self.OUTPUT_SIZE])
     self.cost = -tf.reduce_mean(loglike * (rewards + 1))
     self.optimizer = tf.train.AdamOptimizer(
         learning_rate=self.LEARNING_RATE).minimize(self.cost)
     self.sess = tf.InteractiveSession()
     self.sess.run(tf.global_variables_initializer())
    def __init__(self,
                 input_,
                 dimension=2,
                 learning_rate=0.01,
                 hidden_layer=256,
                 epoch=20):
        input_size = input_.shape[1]
        self.X = tf.placeholder("float", [None, input_.shape[1]])

        weights = {
            'encoder_h1':
            tf.Variable(tf.random_normal([input_size, hidden_layer])),
            'encoder_h2':
            tf.Variable(tf.random_normal([hidden_layer, dimension])),
            'decoder_h1':
            tf.Variable(tf.random_normal([dimension, hidden_layer])),
            'decoder_h2':
            tf.Variable(tf.random_normal([hidden_layer, input_size])),
        }

        biases = {
            'encoder_b1': tf.Variable(tf.random_normal([hidden_layer])),
            'encoder_b2': tf.Variable(tf.random_normal([dimension])),
            'decoder_b1': tf.Variable(tf.random_normal([hidden_layer])),
            'decoder_b2': tf.Variable(tf.random_normal([input_size])),
        }

        first_layer_encoder = tf.nn.sigmoid(
            tf.add(tf.matmul(self.X, weights['encoder_h1']),
                   biases['encoder_b1']))
        self.second_layer_encoder = tf.nn.sigmoid(
            tf.add(tf.matmul(first_layer_encoder, weights['encoder_h2']),
                   biases['encoder_b2']))
        first_layer_decoder = tf.nn.sigmoid(
            tf.add(tf.matmul(self.second_layer_encoder, weights['decoder_h1']),
                   biases['decoder_b1']))
        second_layer_decoder = tf.nn.sigmoid(
            tf.add(tf.matmul(first_layer_decoder, weights['decoder_h2']),
                   biases['decoder_b2']))
        self.cost = tf.reduce_mean(tf.pow(self.X - second_layer_decoder, 2))
        self.optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(
            self.cost)
        self.sess = tf.InteractiveSession()
        self.sess.run(tf.global_variables_initializer())

        for i in range(epoch):
            last_time = time.time()
            _, loss = self.sess.run([self.optimizer, self.cost],
                                    feed_dict={self.X: input_})
            if (i + 1) % 10 == 0:
                print('epoch:', i + 1, 'loss:', loss, 'time:',
                      time.time() - last_time)
 def __init__(self, state_size, window_size, trend, skip):
     self.state_size = state_size
     self.window_size = window_size
     self.half_window = window_size // 2
     self.trend = trend
     self.skip = skip
     tf.reset_default_graph()
     self.model = Model(self.state_size, self.OUTPUT_SIZE, self.LAYER_SIZE,
                        self.LEARNING_RATE)
     self.model_negative = Model(self.state_size, self.OUTPUT_SIZE,
                                 self.LAYER_SIZE, self.LEARNING_RATE)
     self.sess = tf.InteractiveSession()
     self.sess.run(tf.global_variables_initializer())
     self.trainable = tf.trainable_variables()
 def __init__(self, state_size, window_size, trend, skip):
     self.state_size = state_size
     self.window_size = window_size
     self.half_window = window_size // 2
     self.trend = trend
     self.skip = skip
     tf.reset_default_graph()
     self.X = tf.placeholder(tf.float32, (None, self.state_size))
     self.Y = tf.placeholder(tf.float32, (None, self.state_size))
     self.ACTION = tf.placeholder(tf.float32, (None))
     self.REWARD = tf.placeholder(tf.float32, (None))
     self.batch_size = tf.shape(self.ACTION)[0]
     
     with tf.variable_scope('curiosity_model'):
         action = tf.reshape(self.ACTION, (-1,1))
         state_action = tf.concat([self.X, action], axis=1)
         save_state = tf.identity(self.Y)
         
         feed = tf.layers.dense(state_action, 32, activation=tf.nn.relu)
         self.curiosity_logits = tf.layers.dense(feed, self.state_size)
         self.curiosity_cost = tf.reduce_sum(tf.square(save_state - self.curiosity_logits), axis=1)
         
         self.curiosity_optimizer = tf.train.RMSPropOptimizer(self.LEARNING_RATE)            .minimize(tf.reduce_mean(self.curiosity_cost))
     
     total_reward = tf.add(self.curiosity_cost, self.REWARD)
     
     with tf.variable_scope("q_model"):
         with tf.variable_scope("eval_net"):
             x_action = tf.layers.dense(self.X, 128, tf.nn.relu)
             self.logits = tf.layers.dense(x_action, self.OUTPUT_SIZE)
         
         with tf.variable_scope("target_net"):
             y_action = tf.layers.dense(self.Y, 128, tf.nn.relu)
             y_q = tf.layers.dense(y_action, self.OUTPUT_SIZE)
         
         q_target = total_reward + self.GAMMA * tf.reduce_max(y_q, axis=1)
         action = tf.cast(self.ACTION, tf.int32)
         action_indices = tf.stack([tf.range(self.batch_size, dtype=tf.int32), action], axis=1)
         q = tf.gather_nd(params=self.logits, indices=action_indices)
         self.cost = tf.losses.mean_squared_error(labels=q_target, predictions=q)
         self.optimizer = tf.train.RMSPropOptimizer(self.LEARNING_RATE).minimize(
         self.cost, var_list=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "q_model/eval_net"))
         
     t_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='q_model/target_net')
     e_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='q_model/eval_net')
     self.target_replace_op = [tf.assign(t, e) for t, e in zip(t_params, e_params)]
     
     self.sess = tf.InteractiveSession()
     self.sess.run(tf.global_variables_initializer())
def forecast():
    tf.reset_default_graph()
    modelnn = Model(
        learning_rate, num_layers, df_log.shape[1], size_layer, df_log.shape[1], dropout_rate
    )
    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())
    date_ori = pd.to_datetime(df.iloc[:, 0]).tolist()

    pbar = tqdm(range(epoch), desc = 'train loop')
    for i in pbar:
        init_value = np.zeros((1, num_layers * size_layer))
        total_loss, total_acc = [], []
        for k in range(0, df_train.shape[0] - 1, timestamp):
            index = min(k + timestamp, df_train.shape[0] - 1)
            batch_x = np.expand_dims(
                df_train.iloc[k : index, :].values, axis = 0
            )
            batch_y = df_train.iloc[k + 1 : index + 1, :].values
            logits, last_state, _, loss = sess.run(
                [modelnn.logits, modelnn.last_state, modelnn.optimizer, modelnn.cost],
                feed_dict = {
                    modelnn.X: batch_x,
                    modelnn.Y: batch_y,
                    modelnn.hidden_layer: init_value,
                },
            )        
            init_value = last_state
            total_loss.append(loss)
            total_acc.append(calculate_accuracy(batch_y[:, 0], logits[:, 0]))
        pbar.set_postfix(cost = np.mean(total_loss), acc = np.mean(total_acc))
    
    future_day = test_size

    output_predict = np.zeros((df_train.shape[0] + future_day, df_train.shape[1]))
    output_predict[0] = df_train.iloc[0]
    upper_b = (df_train.shape[0] // timestamp) * timestamp
    init_value = np.zeros((1, num_layers * size_layer))

    for k in range(0, (df_train.shape[0] // timestamp) * timestamp, timestamp):
        out_logits, last_state = sess.run(
            [modelnn.logits, modelnn.last_state],
            feed_dict = {
                modelnn.X: np.expand_dims(
                    df_train.iloc[k : k + timestamp], axis = 0
                ),
                modelnn.hidden_layer: init_value,
            },
        )
        init_value = last_state
        output_predict[k + 1 : k + timestamp + 1] = out_logits

    if upper_b != df_train.shape[0]:
        out_logits, last_state = sess.run(
            [modelnn.logits, modelnn.last_state],
            feed_dict = {
                modelnn.X: np.expand_dims(df_train.iloc[upper_b:], axis = 0),
                modelnn.hidden_layer: init_value,
            },
        )
        output_predict[upper_b + 1 : df_train.shape[0] + 1] = out_logits
        future_day -= 1
        date_ori.append(date_ori[-1] + timedelta(days = 1))

    init_value = last_state
    
    for i in range(future_day):
        o = output_predict[-future_day - timestamp + i:-future_day + i]
        out_logits, last_state = sess.run(
            [modelnn.logits, modelnn.last_state],
            feed_dict = {
                modelnn.X: np.expand_dims(o, axis = 0),
                modelnn.hidden_layer: init_value,
            },
        )
        init_value = last_state
        output_predict[-future_day + i] = out_logits[-1]
        date_ori.append(date_ori[-1] + timedelta(days = 1))
    
    output_predict = minmax.inverse_transform(output_predict)
    deep_future = anchor(output_predict[:, 0], 0.3)
    
    return deep_future[-test_size:]
Esempio n. 9
0
thought_vector.shape

# In[8]:

num_layers = 1
size_layer = 128
timestamp = 5
epoch = 500
dropout_rate = 0.1

# In[9]:

tf.reset_default_graph()
modelnn = model.Model(0.01, num_layers, thought_vector.shape[1], size_layer, 1,
                      dropout_rate)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
for i in range(epoch):
    init_value = np.zeros((1, num_layers * 2 * size_layer))
    total_loss = 0
    for k in range(0, (thought_vector.shape[0] // timestamp) * timestamp,
                   timestamp):
        batch_x = np.expand_dims(thought_vector[k:k + timestamp, :], axis=0)
        batch_y = df_log.values[k + 1:k + timestamp + 1, 3].reshape([-1, 1])
        last_state, _, loss = sess.run(
            [modelnn.last_state, modelnn.optimizer, modelnn.cost],
            feed_dict={
                modelnn.X: batch_x,
                modelnn.Y: batch_y,
                modelnn.hidden_layer: init_value
            })
    def __init__(self, state_size, window_size, trend, skip):
        self.state_size = state_size
        self.window_size = window_size
        self.half_window = window_size // 2
        self.trend = trend
        self.skip = skip
        tf.reset_default_graph()
        self.INITIAL_FEATURES = np.zeros((4, self.state_size))
        self.X = tf.placeholder(tf.float32, (None, None, self.state_size))
        self.Y = tf.placeholder(tf.float32, (None, None, self.state_size))
        self.hidden_layer = tf.placeholder(tf.float32,
                                           (None, 2 * self.LAYER_SIZE))
        self.ACTION = tf.placeholder(tf.float32, (None))
        self.REWARD = tf.placeholder(tf.float32, (None))
        self.batch_size = tf.shape(self.ACTION)[0]
        self.seq_len = tf.shape(self.X)[1]

        with tf.variable_scope('curiosity_model'):
            action = tf.reshape(self.ACTION, (-1, 1, 1))
            repeat_action = tf.tile(action, [1, self.seq_len, 1])
            state_action = tf.concat([self.X, repeat_action], axis=-1)
            save_state = tf.identity(self.Y)
            cell = tf.nn.rnn_cell.LSTMCell(self.LAYER_SIZE,
                                           state_is_tuple=False)
            self.rnn, last_state = tf.nn.dynamic_rnn(
                inputs=state_action,
                cell=cell,
                dtype=tf.float32,
                initial_state=self.hidden_layer)
            self.curiosity_logits = tf.layers.dense(self.rnn[:, -1],
                                                    self.state_size)
            self.curiosity_cost = tf.reduce_sum(
                tf.square(save_state[:, -1] - self.curiosity_logits), axis=1)

            self.curiosity_optimizer = tf.train.RMSPropOptimizer(
                self.LEARNING_RATE).minimize(
                    tf.reduce_mean(self.curiosity_cost))

        total_reward = tf.add(self.curiosity_cost, self.REWARD)

        with tf.variable_scope("q_model"):
            with tf.variable_scope("eval_net"):
                cell = tf.nn.rnn_cell.LSTMCell(self.LAYER_SIZE,
                                               state_is_tuple=False)
                rnn, self.last_state = tf.nn.dynamic_rnn(
                    inputs=self.X,
                    cell=cell,
                    dtype=tf.float32,
                    initial_state=self.hidden_layer)
                self.logits = tf.layers.dense(rnn[:, -1], self.OUTPUT_SIZE)

            with tf.variable_scope("target_net"):
                cell = tf.nn.rnn_cell.LSTMCell(self.LAYER_SIZE,
                                               state_is_tuple=False)
                rnn, last_state = tf.nn.dynamic_rnn(
                    inputs=self.Y,
                    cell=cell,
                    dtype=tf.float32,
                    initial_state=self.hidden_layer)
                y_q = tf.layers.dense(rnn[:, -1], self.OUTPUT_SIZE)

            q_target = total_reward + self.GAMMA * tf.reduce_max(y_q, axis=1)
            action = tf.cast(self.ACTION, tf.int32)
            action_indices = tf.stack(
                [tf.range(self.batch_size, dtype=tf.int32), action], axis=1)
            q = tf.gather_nd(params=self.logits, indices=action_indices)
            self.cost = tf.losses.mean_squared_error(labels=q_target,
                                                     predictions=q)
            self.optimizer = tf.train.RMSPropOptimizer(
                self.LEARNING_RATE).minimize(
                    self.cost,
                    var_list=tf.get_collection(
                        tf.GraphKeys.TRAINABLE_VARIABLES, "q_model/eval_net"))

        t_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                     scope='q_model/target_net')
        e_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                     scope='q_model/eval_net')
        self.target_replace_op = [
            tf.assign(t, e) for t, e in zip(t_params, e_params)
        ]

        self.sess = tf.InteractiveSession()
        self.sess.run(tf.global_variables_initializer())