Ejemplo n.º 1
0
    def _build_qnet(self):
        """
    Build q-network
    """
        with tf.variable_scope(self.scope):
            self.state_input = tf.placeholder(tf.float32,
                                              [None, self.state_size])
            self.action = tf.placeholder(tf.int32, [None])
            self.target_q = tf.placeholder(tf.float32, [None])

            fc1 = tf_utils.fc(self.state_input,
                              n_output=self.n_hidden_1,
                              activation_fn=tf.nn.relu)
            fc2 = tf_utils.fc(fc1,
                              n_output=self.n_hidden_2,
                              activation_fn=tf.nn.relu)
            self.q_values = tf_utils.fc(fc2,
                                        self.action_size,
                                        activation_fn=None)

            action_mask = tf.one_hot(self.action, self.action_size, 1.0, 0.0)
            q_value_pred = tf.reduce_sum(self.q_values * action_mask, 1)

            self.loss = tf.reduce_mean(
                tf.square(tf.subtract(self.target_q, q_value_pred)))
            self.optimizer = tf.train.AdamOptimizer(self.lr)
            self.train_op = self.optimizer.minimize(
                self.loss, global_step=tf.contrib.framework.get_global_step())
Ejemplo n.º 2
0
 def _build_network(self, name, conv):
     if conv:
         input_s = tf.placeholder(tf.float32,
                                  [None, self.width, self.height, 1])
         with tf.variable_scope(name):
             conv1 = tf_utils.conv2d(input_s, 64, (3, 3), 1)
             conv2 = tf_utils.conv2d(conv1, 32, (1, 1), 1)
             conv3 = tf_utils.conv2d(conv2, 32, (1, 1), 1)
             reward = tf_utils.conv2d(conv3, 1, (1, 1), 1)
         theta = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                   scope=name)
         return input_s, tf.squeeze(tf.reshape(reward,
                                               (-1, self.n_input))), theta
     else:
         input_s = tf.placeholder(tf.float32, [None, self.n_input])
         with tf.variable_scope(name):
             fc1 = tf_utils.fc(
                 input_s,
                 self.n_h1,
                 scope="fc1",
                 activation_fn=tf.nn.elu,
                 initializer=tf.contrib.layers.variance_scaling_initializer(
                     mode="FAN_IN"))
             fc2 = tf_utils.fc(
                 fc1,
                 self.n_h2,
                 scope="fc2",
                 activation_fn=tf.nn.elu,
                 initializer=tf.contrib.layers.variance_scaling_initializer(
                     mode="FAN_IN"))
             reward = tf_utils.fc(fc2, self.n_input, scope="reward")
         theta = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                   scope=name)
         return input_s, tf.squeeze(reward), theta
Ejemplo n.º 3
0
    def qnetwork(self):
        """
        创建Q network
        :return:
        """
        with tf.variable_scope(self.name_scope):
            self.state_input = tf.placeholder(tf.float32,
                                              [None, self.state_size])  # 状态输入
            self.action = tf.placeholder(tf.int32, [None])  # 动作输入
            self.target_q = tf.placeholder(tf.float32, [None])  # target Q

            fc1 = tf_utils.fc(self.state_input,
                              n_output=16,
                              activation_fn=tf.nn.relu)
            fc2 = tf_utils.fc(fc1, n_output=32, activation_fn=tf.nn.relu)
            fc3 = tf_utils.fc(fc2, n_output=16, activation_fn=tf.nn.relu)
            self.q_values = tf_utils.fc(fc3,
                                        self.action_size,
                                        activation_fn=None)
            # 动作用one-hot编码
            action_mask = tf.one_hot(self.action, self.action_size, 1.0, 0.0)
            # 预测的q
            q_value_pred = tf.reduce_sum(self.q_values * action_mask, 1)
            # q network的loss
            self.loss = tf.reduce_mean(
                tf.square(tf.subtract(self.target_q, q_value_pred)))
            self.optimizer = tf.train.AdamOptimizer(self.lr)
            self.train_op = self.optimizer.minimize(self.loss)
Ejemplo n.º 4
0
 def _build_network(self, name):
     input_s = tf.placeholder(tf.float32, [None, self.state_size])
     action = tf.placeholder(tf.float32, [None, self.action_size])
     with tf.variable_scope(name):
         layer_1 = tf_utils.fc(
             input_s,
             self.n_h1,
             scope="fc1",
             activation_fn=tf.nn.relu,
             initializer=tf.contrib.layers.variance_scaling_initializer(
                 mode="FAN_IN"))
         # tf.concat((layer_1, action), 1)
         layer_2 = tf_utils.fc(
             tf.concat((layer_1, action), 1),
             self.n_h2,
             scope="fc2",
             activation_fn=tf.nn.relu,
             initializer=tf.contrib.layers.variance_scaling_initializer(
                 mode="FAN_IN"))
         q_value = tf_utils.fc(layer_2,
                               1,
                               scope="out",
                               initializer=tf.random_uniform_initializer(
                                   -3e-3, 3e-3))
     critic_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                          scope=name)
     return input_s, action, critic_variables, tf.squeeze(q_value)
 def _build_network(self, name):
   input_s = tf.placeholder(tf.float32, [None, self.n_input])
   with tf.variable_scope(name):
     fc1 = tf_utils.fc(input_s, self.n_h1, scope="fc1", activation_fn=tf.nn.elu,
       initializer=tf.contrib.layers.variance_scaling_initializer(mode="FAN_IN"))
     fc2 = tf_utils.fc(fc1, self.n_h2, scope="fc2", activation_fn=tf.nn.elu,
       initializer=tf.contrib.layers.variance_scaling_initializer(mode="FAN_IN"))
     reward = tf_utils.fc(fc2, 1, scope="reward")
   theta = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=name)
   return input_s, reward, theta
def load_detection_model(ws):
    assert len(ws) == 8
    data_blob = tf.placeholder(tf.float32)
    rois_blob = tf.placeholder(tf.float32, shape=[None, 5])
    pool5, _ = roi_pooling_op.roi_pool(data_blob, rois_blob, 7, 7, 0.0625)
    flat_pool5 = tf.reshape(pool5, [-1, 25088])
    fc6 = fc(flat_pool5, ws[0], ws[1])
    fc7 = fc(fc6, ws[2], ws[3])
    cls_prob = fc(fc7, ws[4], ws[5], 'softmax')
    bbox_pred = fc(fc7, ws[6], ws[7], 'linear')
    return data_blob, rois_blob, cls_prob, bbox_pred
Ejemplo n.º 7
0
 def _build_network(self, name):
   input_s = tf.placeholder(tf.float32, [None, self.n_input])
   img_in = tf.reshape(input_s, shape=[-1, 1, 4, 1])
   with tf.variable_scope(name):
     cnv1 = tf_utils.conv2d(img_in, 2, (2,2))
     fltn_conv = tf_utils.flatten(cnv1)
     # fc1 = tf_utils.fc(input_s, self.n_h1, scope="fc1", activation_fn=tf.nn.elu,
     #   initializer=tf.contrib.layers.variance_scaling_initializer(mode="FAN_IN"))
     fc2 = tf_utils.fc(fltn_conv, self.n_h2, scope="fc2", activation_fn=tf.nn.elu,
       initializer=tf.contrib.layers.variance_scaling_initializer(mode="FAN_IN"))
     reward = tf_utils.fc(fc2, 1, scope="reward")
   theta = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=name)
   return input_s, reward, theta
Ejemplo n.º 8
0
    def _build_network(self):
        with tf.variable_scope(self.scope):
            self.state_input = tf.placeholder(tf.float32,
                                              [None, self.state_size],
                                              name="state_input")
            self.action = tf.placeholder(tf.int32, [None], name="action")
            self.fc1 = tf_utils.fc(
                self.state_input,
                self.n_h1,
                scope="fc1",
                activation_fn=tf.nn.relu,
                initializer=tf.contrib.layers.variance_scaling_initializer(
                    mode="FAN_IN"))
            self.fc1_softmax = tf.nn.softmax(self.fc1, name="fc1_softmax")
            self.fc2 = tf_utils.fc(
                self.fc1,
                self.n_h2,
                scope="fc2",
                activation_fn=tf.nn.relu,
                initializer=tf.contrib.layers.variance_scaling_initializer(
                    mode="FAN_IN"))
            self.fc2_softmax = tf.nn.softmax(self.fc2, name="fc2_softmax")
            self.q_value = tf_utils.fc(self.fc2,
                                       self.action_size,
                                       scope="q_value",
                                       activation_fn=None)

            self.action_pred = tf.nn.softmax(self.q_value,
                                             name="action_prediction")
            self.action_target = tf.one_hot(self.action,
                                            self.action_size,
                                            on_value=1.0,
                                            off_value=0.0,
                                            name="action_target")
            self.loss = tf.nn.softmax_cross_entropy_with_logits_v2(
                labels=self.action_target,
                logits=self.action_pred,
                name="loss")
            #self.loss = tf.reduce_mean(tf.square(tf.subtract(self.action_pred, self.action_target)))
            self.optimizer = tf.train.AdamOptimizer(self.learning_rate,
                                                    name="optimizer")
            self.train_op = self.optimizer.minimize(
                self.loss,
                global_step=tf.train.get_global_step(),
                name="train_op")
            new_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                              scope=self.scope)
        return new_variables
    def _build_network(self, name):
        input_s = tf.placeholder(tf.float32, [None, self.n_input])
        input_inv = tf.placeholder(tf.float32, [None, self.n_input])
        img_in = tf.reshape(input_s, shape=[-1, 1, 4, 1])
        img_inv = tf.reshape(input_inv, shape=[-1, 1, 4, 1])
        with tf.variable_scope(name):
            cnv1 = tf_utils.conv2d(img_in, 2, (2, 2))
            # max_conv_p = tf_utils.max_pool(cnv1_p)
            fltn_conv = tf_utils.flatten(cnv1)
            fc1 = tf_utils.fc(
                fltn_conv,
                self.n_h2,
                scope="fc1",
                activation_fn=tf.nn.elu,
                initializer=tf.contrib.layers.variance_scaling_initializer(
                    mode="FAN_IN"))

            cnv1_inv = tf_utils.conv2d(img_inv, 2, (2, 2))
            # max_conv_p = tf_utils.max_pool(cnv1_p)
            fltn_conv_inv = tf_utils.flatten(cnv1_inv)
            fc1_inv = tf_utils.fc(
                fltn_conv_inv,
                self.n_h2,
                scope="fc1_inv",
                activation_fn=tf.nn.elu,
                initializer=tf.contrib.layers.variance_scaling_initializer(
                    mode="FAN_IN"))

            subt = tf.subtract(fc1, fc1_inv)
            # blah = tf.multiply(tf.divide(fc2, fc1_p), 0.35)
            # comb = tf.concat([fc1, fc1_inv], 1)
            fc_p1 = tf_utils.fc(
                subt,
                2 * self.n_h1,
                scope="fc_p1",
                activation_fn=tf.nn.elu,
                initializer=tf.contrib.layers.variance_scaling_initializer(
                    mode="FAN_IN"))
            # fc_p2 = tf_utils.fc(fc_p1, self.n_h2, scope="fc_p2", activation_fn=tf.nn.elu,
            #   initializer=tf.contrib.layers.variance_scaling_initializer(mode="FAN_IN"))
            reward = tf_utils.fc(fc_p1, 1, scope="reward")
        theta = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=name)
        return input_s, input_inv, reward, theta
Ejemplo n.º 10
0
    def _build_network(self, name):
        input_s = tf.placeholder(tf.float32, [None, self.state_size])
        input_a = tf.placeholder(tf.int32, [None])
        advantage = tf.placeholder(tf.float32, [None])
        target_v = tf.placeholder(tf.float32, [None])

        with tf.variable_scope(name):
            layer_1 = tf_utils.fc(
                input_s,
                self.n_h1,
                scope="fc1",
                activation_fn=tf.nn.relu,
                initializer=tf.contrib.layers.variance_scaling_initializer(
                    mode="FAN_IN"))
            layer_2 = tf_utils.fc(
                layer_1,
                self.n_h2,
                scope="fc2",
                activation_fn=tf.nn.relu,
                initializer=tf.contrib.layers.variance_scaling_initializer(
                    mode="FAN_IN"))
            policy = tf_utils.fc(
                layer_2,
                self.action_size,
                activation_fn=tf.nn.softmax,
                scope="policy",
                initializer=tf_utils.normalized_columns_initializer(0.01))
            value = tf_utils.fc(
                layer_2,
                1,
                activation_fn=None,
                scope="value",
                initializer=tf_utils.normalized_columns_initializer(1.0))

            action_mask = tf.one_hot(input_a, self.action_size, 1.0, 0.0)
            action_est = tf.reduce_sum(policy * action_mask, 1)

        model_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                            scope=name)
        return input_s, input_a, advantage, target_v, policy, value, action_est, model_variables
  def _build_policy_net(self):
    """Build policy network"""
    with tf.variable_scope(self.scope):
      self.state_input = tf.placeholder(tf.float32, [None, self.state_size])
      self.action = tf.placeholder(tf.int32, [None])
      self.target = tf.placeholder(tf.float32, [None])

      layer_1 = tf_utils.fc(self.state_input, self.n_hidden_1, tf.nn.relu)
      layer_2 = tf_utils.fc(layer_1, self.n_hidden_2, tf.nn.relu)

      self.action_values = tf_utils.fc(layer_2, self.action_size)
      action_mask = tf.one_hot(self.action, self.action_size, 1.0, 0.0)
      self.action_prob = tf.nn.softmax(self.action_values)
      self.action_value_pred = tf.reduce_sum(self.action_prob * action_mask, 1)

      # l2 regularization
      self.l2_loss = tf.add_n([ tf.nn.l2_loss(v) for v in tf.trainable_variables()  ]) 
      self.pg_loss = tf.reduce_mean(-tf.log(self.action_value_pred) * self.target)

      self.loss = self.pg_loss + 0.002 * self.l2_loss
      self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr)
      self.train_op = self.optimizer.minimize(self.loss, global_step=tf.contrib.framework.get_global_step())
Ejemplo n.º 12
0
    def _build_model(self):
        self._observation = tf.placeholder(tf.float32, [None, self.state_size])

        self._action_target = tf.placeholder(tf.int32, [None],
                                             name='action_target')
        self._q_target = tf.placeholder(tf.float32, [None],
                                        name='q_value_target')

        with tf.variable_scope('deepq_model'):
            _hidden = fc(self._observation,
                         h_size=24,
                         name='fc_input',
                         act=tf.nn.relu)
            for idx in range(2):
                _hidden = fc(_hidden,
                             h_size=24,
                             name='fc' + str(idx),
                             act=tf.nn.relu)
            self._q_hat = fc(_hidden,
                             h_size=self.action_size,
                             name='fc',
                             act=None)

        # turn (0..2) into 1-hot encoding
        _action_one_hot = tf.one_hot(self._action_target,
                                     self.action_size,
                                     1.0,
                                     0.0,
                                     name='action_target_one_hot')

        # values collected following action_target
        _q_acted = tf.reduce_sum(self._q_hat * _action_one_hot,
                                 reduction_indices=1,
                                 name='q_hat')
        _delta = self._q_target - _q_acted

        self._loss = tf.reduce_mean(tf.square(_delta))
        self._train_op = tf.train.AdamOptimizer(
            learning_rate=self.learning_rate).minimize(self._loss)
Ejemplo n.º 13
0
def RelationNetwork(encoder, hidden_size, trainable=True):
    x = encoder
    with tf.variable_scope('RelationNetwork') as scope:
        with tf.variable_scope('layer1'):
            x = utils.conv2d(x,
                             name='conv1',
                             shape=[3, 3, 128, 64],
                             padding='SAME',
                             activation_func=tf.nn.relu,
                             trainable=trainable,
                             use_bn=True)
            x = tf.nn.max_pool(x,
                               ksize=[1, 2, 2, 1],
                               strides=[1, 2, 2, 1],
                               padding='SAME')

        with tf.variable_scope('layer2'):
            x = utils.conv2d(x,
                             name='conv1',
                             shape=[3, 3, 64, 64],
                             padding='SAME',
                             activation_func=tf.nn.relu,
                             trainable=trainable,
                             use_bn=True)
            x = tf.nn.max_pool(x,
                               ksize=[1, 2, 2, 1],
                               strides=[1, 2, 2, 1],
                               padding='SAME')

        with tf.variable_scope('fc1'):
            x = utils.fc(x,
                         num_out=hidden_size,
                         name='fc1',
                         activation_func=tf.nn.relu)

        with tf.variable_scope('fc2'):
            x = utils.fc(x, num_out=1, name='fc2', activation_func=None)

    return x
Ejemplo n.º 14
0
def build_model(net):
  # Construct model..
  X = tf.placeholder("float", [BATCH, T_in, IMG_H, IMG_W, IMG_CH])
  Y = tf.placeholder("float", [BATCH, T_pred, IMG_H, IMG_W, IMG_CH])

  # Flatten the images going in s.t. BATCH * T_in, height, width, ch..
  X_flat = tf.reshape(X, [BATCH * T_in, IMG_H, IMG_W, IMG_CH])
  conv1 = conv2d(X_flat, net.weights['wc1'], net.biases['bc1'], 2)
  conv2 = conv2d(conv1, net.weights['wc2'], net.biases['bc2'], 2)
  conv3 = conv2d(conv2, net.weights['wc3'], net.biases['bc3'], 2)

  # Now we hyperflatten everything for the lstm: BATCH, T-in, everything.
  res = tf.reshape(conv3, [BATCH, T_in, -1])
  prediction = net.EncoderDecoder(res)

  # Infer on BATCH * T_pred, everything.
  fc_out = fc(prediction, net.weights['wfc1'], net.biases['bfc1'])
  # Reshape to on BATCH, T_pred, IMG_H * IMG_W * IMG_CH.
  fc_out = tf.reshape(fc_out, [BATCH, T_pred, IMG_H * IMG_W * IMG_CH])
  sig_out = tf.sigmoid(fc_out)

  # Calculate difference..
  Y_flat = tf.reshape(Y, [BATCH, T_pred, IMG_H * IMG_W * IMG_CH])
  diff = fc_out - Y_flat

  # Compute loss...
  vs = tf.trainable_variables() 
  lossL2 = tf.add_n([tf.nn.l2_loss(v) for v in vs
                     if 'bias' not in v.name ]) * 0.001

  loss_op = tf.reduce_sum(tf.reduce_sum(diff * diff, axis=2), axis=1) + lossL2
  loss_op = tf.reduce_mean(loss_op)

  train_op = tf.train.AdamOptimizer(learning_rate=LR).minimize(loss_op)

  return fc_out, sig_out, X, Y, loss_op, train_op
Ejemplo n.º 15
0
    def _build_network(self, name, agent_class):
        # input_s = tf.placeholder(tf.float32, [None, self.state_size])
        input_a = tf.placeholder(tf.int32, [None])
        advantage = tf.placeholder(tf.float32, [None])
        target_v = tf.placeholder(tf.float32, [None])

        with tf.variable_scope(name):
            # layer_1 = tf_utils.fc(
            #     input_s,
            #     self.n_h1,
            #     scope="fc1",
            #     activation_fn=tf.nn.relu,
            #     initializer=tf.contrib.layers.variance_scaling_initializer(
            #         mode="FAN_IN"))
            # layer_2 = tf_utils.fc(
            #     layer_1,
            #     self.n_h2,
            #     scope="fc2",
            #     activation_fn=tf.nn.relu,
            #     initializer=tf.contrib.layers.variance_scaling_initializer(
            #         mode="FAN_IN"))
            # policy = tf_utils.fc(
            #     layer_2,
            #     self.action_size,
            #     activation_fn=tf.nn.softmax,
            #     scope="policy",
            #     initializer=tf_utils.normalized_columns_initializer(0.01)) + 1e-8
            # value = tf_utils.fc(layer_2, 1, activation_fn=None,
            #                     scope="value", initializer=tf_utils.normalized_columns_initializer(1.0))

            self.agent = agent_class(
                simulate_steps=self.SIM_STEPS,
                max_bp_steps=self.BP_STEPS,
                mult_fac=self.MULT_FAC,
                discount_factor=1,
                scope=self.net_scope_name,
                # goal_position=self.env_args['goal_position'],
                # disappearance_probability=self.env_args['disappearance_probability'],
                # sequential=self.sequential
            )

            # Extract Policy and value nodes

            input_s = self.agent.init_state_pl
            final_action_belief = self.agent.final_action_belief * Temperature
            self.final_state = self.agent.final_state

            if LAYER_OVER_POLICY:
                policy = tf_utils.fc(
                    final_action_belief,
                    self.action_size,
                    activation_fn=tf.nn.softmax,
                    scope="policy",
                    initializer=tf_utils.normalized_columns_initializer(0.01)
                ) + 1e-8
            else:
                policy = tf.nn.softmax(final_action_belief)[0] + 1e-8
                # input_s1, input_s2 = tf.split(input_s, [30, 2], axis=1)
                # input_s_new = tf.expand_dims(input_s1, 2) + tf.expand_dims(input_s2, 1)
                input_s_new = input_s
                input_s_new = tf.exp(input_s_new)
                input_s_new = tcl.flatten(input_s_new)

            # layer1 = tf_utils.fc(
            #     input_s_new,
            #     300,
            #     scope="fc1",
            #     activation_fn=tf.nn.relu,
            #     initializer=tf.contrib.layers.variance_scaling_initializer(mode="FAN_IN")
            # )

            # value = tf_utils.fc(layer1, 1, activation_fn=None, scope="value", initializer=tf_utils.normalized_columns_initializer(1.0))

            value = self._create_value_network(input_s_new)

            action_mask = tf.one_hot(input_a, self.action_size, 1.0, 0.0)
            action_est = tf.reduce_sum(policy * action_mask, 1)

        model_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=name)
        return input_s, input_a, advantage, target_v, policy, value, action_est, model_variables