예제 #1
0
    def __init__(self,
                 clean_encoder_layer,
                 corrupted_encoder_layer,
                 previous_decoder_layer=None,
                 is_training_phase=True):
        with tf.name_scope("decoder_layer") as scope:
            is_first_decoder_layer = previous_decoder_layer is None
            if is_first_decoder_layer:
                pre_1st_normalization = corrupted_encoder_layer.post_activation
            else:
                input_size = _layer_size(previous_decoder_layer.post_denoising)
                output_size = _layer_size(clean_encoder_layer.post_activation)
                weights = _weight_variable([input_size, output_size], name='V')
                pre_1st_normalization = tf.matmul(
                    previous_decoder_layer.post_denoising, weights)

            pre_denoising, _, _ = batch_norm(
                pre_1st_normalization, is_training_phase=is_training_phase)
            post_denoising = self._denoise(
                corrupted_encoder_layer.pre_activation, pre_denoising)
            post_2nd_normalization = \
              (post_denoising - clean_encoder_layer.batch_mean) / clean_encoder_layer.batch_std

            self.post_denoising = post_denoising
            self.post_2nd_normalization = post_2nd_normalization
예제 #2
0
  def __init__(self, inputs, output_size, non_linearity,
      noise_level, is_training_phase, reuse_variables = None):
    with tf.name_scope("encoder_layer") as scope:
      self._create_or_reuse_variables(reuse_variables, _layer_size(inputs), output_size)

      self.pre_normalization = tf.matmul(inputs, self.weights)
      pre_noise, self.batch_mean, self.batch_std = batch_norm(
          self.pre_normalization, is_training_phase = is_training_phase)
      self.pre_activation = self._add_noise(pre_noise, noise_level)
      beta_gamma = self.gamma * (self.pre_activation + self.beta)
      self.post_activation = non_linearity(beta_gamma)
예제 #3
0
    def __init__(self,
                 inputs,
                 output_size,
                 non_linearity,
                 noise_level,
                 is_training_phase,
                 reuse_variables=None):
        with tf.name_scope("encoder_layer") as scope:
            self._create_or_reuse_variables(reuse_variables,
                                            _layer_size(inputs), output_size)

            self.pre_normalization = tf.matmul(inputs, self.weights)
            pre_noise, self.batch_mean, self.batch_std = batch_norm(
                self.pre_normalization, is_training_phase=is_training_phase)
            self.pre_activation = self._add_noise(pre_noise, noise_level)
            beta_gamma = self.gamma * (self.pre_activation + self.beta)
            self.post_activation = non_linearity(beta_gamma)
예제 #4
0
  def __init__(self,
      clean_encoder_layer, corrupted_encoder_layer,
      previous_decoder_layer = None, is_training_phase = True):
    with tf.name_scope("decoder_layer") as scope:
      is_first_decoder_layer = previous_decoder_layer is None
      if is_first_decoder_layer:
        pre_1st_normalization = corrupted_encoder_layer.post_activation
      else:
        input_size = _layer_size(previous_decoder_layer.post_denoising)
        output_size = _layer_size(clean_encoder_layer.post_activation)
        weights = _weight_variable([input_size, output_size], name = 'V')
        pre_1st_normalization = tf.matmul(
          previous_decoder_layer.post_denoising, weights)

      pre_denoising, _, _ = batch_norm(pre_1st_normalization, is_training_phase = is_training_phase)
      post_denoising = self._denoise(
        corrupted_encoder_layer.pre_activation, pre_denoising)
      post_2nd_normalization = \
        (post_denoising - clean_encoder_layer.batch_mean) / clean_encoder_layer.batch_std

      self.post_denoising = post_denoising
      self.post_2nd_normalization = post_2nd_normalization
예제 #5
0
    def __init__(self, num_states, num_actions):

        self.sess = tf.InteractiveSession()

        # Critic Q Network:
        self.critic_state_in = tf.placeholder("float", [None, num_states])
        self.critic_action_in = tf.placeholder("float", [None, num_actions])
        self.W1_c = tf.Variable(tf.random_uniform([num_states, N_HIDDEN_1],
                                                  -1 / math.sqrt(num_states),
                                                  1 / math.sqrt(num_states)),
                                name="W1_c")
        self.B1_c = tf.Variable(tf.random_uniform([N_HIDDEN_1],
                                                  -1 / math.sqrt(num_states),
                                                  1 / math.sqrt(num_states)),
                                name="B1_c")
        self.W2_c = tf.Variable(tf.random_uniform(
            [N_HIDDEN_1, N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1 + num_actions),
            1 / math.sqrt(N_HIDDEN_1 + num_actions)),
                                name="W2_c")
        self.B2_c = tf.Variable(tf.random_uniform(
            [N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1 + num_actions),
            1 / math.sqrt(N_HIDDEN_1 + num_actions)),
                                name="B2_c")
        self.W2_action_c = tf.Variable(
            tf.random_uniform([num_actions, N_HIDDEN_2],
                              -1 / math.sqrt(N_HIDDEN_1 + num_actions),
                              1 / math.sqrt(N_HIDDEN_1 + num_actions)),
            name="W2_action_c")
        self.W3_c = tf.Variable(tf.random_uniform([N_HIDDEN_2, 1], -0.003,
                                                  0.003),
                                name="W3_c")
        self.B3_c = tf.Variable(tf.random_uniform([1], -0.003, 0.003),
                                name="B3_c")

        self.is_training = tf.placeholder(tf.bool, [])
        self.H1_t = tf.matmul(self.critic_state_in, self.W1_c)
        self.H1_c_bn = batch_norm(self.H1_t, N_HIDDEN_1, self.is_training,
                                  self.sess)

        self.H1_c = tf.nn.relu(self.H1_c_bn.bnorm) + self.B1_c

        self.H2_t = tf.matmul(self.H1_c, self.W2_c) + tf.matmul(
            self.critic_action_in, self.W2_action_c)
        self.H2_c_bn = batch_norm(self.H2_t, N_HIDDEN_2, self.is_training,
                                  self.sess)
        self.H2_c = tf.nn.relu(self.H2_c_bn.bnorm) + self.B2_c

        self.critic_q_model = tf.matmul(self.H2_c, self.W3_c) + self.B3_c

        # Target Critic Q Network:
        self.t_critic_state_in = tf.placeholder("float", [None, num_states])
        self.t_critic_action_in = tf.placeholder("float", [None, num_actions])
        self.t_W1_c = tf.Variable(tf.random_uniform([num_states, N_HIDDEN_1],
                                                    -1 / math.sqrt(num_states),
                                                    1 / math.sqrt(num_states)),
                                  name="t_W1_c")
        self.t_B1_c = tf.Variable(tf.random_uniform([N_HIDDEN_1],
                                                    -1 / math.sqrt(num_states),
                                                    1 / math.sqrt(num_states)),
                                  name="t_B1_c")
        self.t_W2_c = tf.Variable(tf.random_uniform(
            [N_HIDDEN_1, N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1 + num_actions),
            1 / math.sqrt(N_HIDDEN_1 + num_actions)),
                                  name="t_W2_c")
        self.t_W2_action_c = tf.Variable(
            tf.random_uniform([num_actions, N_HIDDEN_2],
                              -1 / math.sqrt(N_HIDDEN_1 + num_actions),
                              1 / math.sqrt(N_HIDDEN_1 + num_actions)),
            name="t_W2_action_c")
        self.t_B2_c = tf.Variable(tf.random_uniform(
            [N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1 + num_actions),
            1 / math.sqrt(N_HIDDEN_1 + num_actions)),
                                  name="t_B2_c")
        self.t_W3_c = tf.Variable(tf.random_uniform([N_HIDDEN_2, 1], -0.003,
                                                    0.003),
                                  name="t_W3_c")
        self.t_B3_c = tf.Variable(tf.random_uniform([1], -0.003, 0.003),
                                  name="t_B3_c")

        self.t_H1_t = tf.matmul(self.t_critic_state_in, self.t_W1_c)
        self.t_H1_c_bn = batch_norm(self.t_H1_t, N_HIDDEN_1, self.is_training,
                                    self.sess, self.H1_c_bn)
        self.t_H1_c = tf.nn.relu(self.t_H1_c_bn.bnorm) + self.t_B1_c

        self.t_H2_t = tf.matmul(self.t_H1_c, self.t_W2_c) + tf.matmul(
            self.t_critic_action_in, self.t_W2_action_c)
        self.t_H2_c_bn = batch_norm(self.t_H2_t, N_HIDDEN_2, self.is_training,
                                    self.sess, self.H2_c_bn)
        self.t_H2_c = tf.nn.relu(self.t_H2_c_bn.bnorm) + self.t_B2_c

        self.t_critic_q_model = tf.matmul(self.t_H2_c,
                                          self.t_W3_c) + self.t_B3_c

        self.t_critic_q_model = tf.matmul(self.t_H2_c,
                                          self.t_W3_c) + self.t_B3_c

        self.q_value_in = tf.placeholder("float", [None, 1])  # supervisor
        # self.l2_regularizer_loss = tf.nn.l2_loss(self.W1_c)+tf.nn.l2_loss(self.W2_c)+ tf.nn.l2_loss(self.W2_action_c) + tf.nn.l2_loss(self.W3_c)+tf.nn.l2_loss(self.B1_c)+tf.nn.l2_loss(self.B2_c)+tf.nn.l2_loss(self.B3_c)
        # self.l2_regularizer_loss = 0.01 * tf.reduce_sum(tf.pow(self.W2_c, 2)) + \
        #                            0.01 * tf.reduce_sum(tf.pow(self.W1_c, 2)) + \
        #                            0.01 * tf.reduce_sum(tf.pow(self.W3_c, 2)) + \
        #                            0.01 * tf.reduce_sum(tf.pow(self.B1_c, 2)) + \
        #                            0.01 * tf.reduce_sum(tf.pow(self.B2_c, 2)) + \
        #                            0.01 * tf.reduce_sum(tf.pow(self.B3_c, 2)) + \
        #                            0.01 * tf.reduce_sum(tf.pow(self.W2_action_c, 2))
        self.cost = (tf.reduce_mean(pow(self.critic_q_model - self.q_value_in, 2))) \
                        # + self.l2_regularizer_loss

        self.optimizer = tf.train.GradientDescentOptimizer(
            learning_rate=LEARNING_RATE).minimize(self.cost)
        self.act_grad_v = tf.gradients(self.critic_q_model,
                                       self.critic_action_in)
        self.action_gradients = [self.act_grad_v[0]]
        # / tf.to_float(tf.shape(self.act_grad_v[0])[0])]
        #
        self.check_fl = self.action_gradients

        self.sess.run(tf.initialize_all_variables())
        # self.saver = tf.train.Saver()
        # self.saver.save(self.sess, 'DDPG_MIMO', global_step=1000)

        # To initialize critic and target with the same values:

        self.sess.run([
            self.t_W1_c.assign(self.W1_c),
            self.t_B1_c.assign(self.B1_c),
            self.t_W2_c.assign(self.W2_c),
            self.t_W2_action_c.assign(self.W2_action_c),
            self.t_B2_c.assign(self.B2_c),
            self.t_W3_c.assign(self.W3_c),
            self.t_B3_c.assign(self.B3_c)
        ])

        self.update_target_critic_op = [
            self.t_W1_c.assign(TAU * self.W1_c + (1 - TAU) * self.t_W1_c),
            self.t_B1_c.assign(TAU * self.B1_c + (1 - TAU) * self.t_B1_c),
            self.t_W2_c.assign(TAU * self.W2_c + (1 - TAU) * self.t_W2_c),
            self.t_W2_action_c.assign(TAU * self.W2_action_c +
                                      (1 - TAU) * self.t_W2_action_c),
            self.t_B2_c.assign(TAU * self.B2_c + (1 - TAU) * self.t_B2_c),
            self.t_W3_c.assign(TAU * self.W3_c + (1 - TAU) * self.t_W3_c),
            self.t_B3_c.assign(TAU * self.B3_c + (1 - TAU) * self.t_B3_c),
            self.t_H1_c_bn.updateTarget, self.t_H2_c_bn.updateTarget
        ]
예제 #6
0
L3 = tf.nn.pool(L3,
                pooling_type='MAX',
                window_shape=[2],
                strides=[2],
                padding='SAME')
print(L3.shape)
'''
    (?, 15, 32)
'''
L3_flat = tf.reshape(L3, [-1, 15 * 32])

W4 = tf.get_variable("W4",
                     shape=[15 * 32, 500],
                     initializer=tf.contrib.layers.xavier_initializer())
b4 = tf.Variable(tf.random_normal([500]))
Bn4 = batch_norm(tf.matmul(L3_flat, W4) + b4)
L4 = tf.nn.relu(Bn4)
L4 = tf.nn.dropout(L4, keep_prob=keep_prob)

W5 = tf.get_variable("W5",
                     shape=[500, nb_transmitter],
                     initializer=tf.contrib.layers.xavier_initializer())
b5 = tf.Variable(tf.random_normal([nb_transmitter]))
hypothesis = tf.matmul(L4, W5) + b5

# logits = tf.matmul(L2, W3) + b3
# hypothesis = tf.nn.softmax(logits)

cost = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits_v2(logits=hypothesis,
                                               labels=tf.stop_gradient(
예제 #7
0
    def __init__(self, num_states, num_actions):

        self.sess = tf.InteractiveSession()

        # actor network model parameters:
        self.actor_state_in = tf.placeholder("float", [None, num_states])
        self.W1_a = tf.Variable(tf.random_uniform([num_states, N_HIDDEN_1],
                                                  -1 / math.sqrt(num_states),
                                                  1 / math.sqrt(num_states)),
                                name="W1_a")
        self.B1_a = tf.Variable(tf.random_uniform([N_HIDDEN_1],
                                                  -1 / math.sqrt(num_states),
                                                  1 / math.sqrt(num_states)),
                                name="B1_a")
        self.W2_a = tf.Variable(tf.random_uniform([N_HIDDEN_1, N_HIDDEN_2],
                                                  -1 / math.sqrt(N_HIDDEN_1),
                                                  1 / math.sqrt(N_HIDDEN_1)),
                                name="W2_a")
        self.B2_a = tf.Variable(tf.random_uniform([N_HIDDEN_2],
                                                  -1 / math.sqrt(N_HIDDEN_1),
                                                  1 / math.sqrt(N_HIDDEN_1)),
                                name="B2_a")
        self.W3_a = tf.Variable(tf.random_uniform([N_HIDDEN_2, num_actions],
                                                  -0.003, 0.003),
                                name="W3_a")
        self.B3_a = tf.Variable(tf.random_uniform([num_actions], -0.003,
                                                  0.003),
                                name="B3_a")

        self.is_training = tf.placeholder(tf.bool, [])
        self.H1_t = tf.matmul(self.actor_state_in, self.W1_a)
        self.H1_a_bn = batch_norm(self.H1_t, N_HIDDEN_1, self.is_training,
                                  self.sess)
        self.H1_a = tf.nn.relu(self.H1_a_bn.bnorm) + self.B1_a

        self.H2_t = tf.matmul(self.H1_a, self.W2_a)
        self.H2_a_bn = batch_norm(self.H2_t, N_HIDDEN_2, self.is_training,
                                  self.sess)
        self.H2_a = tf.nn.relu(self.H2_a_bn.bnorm) + self.B2_a
        self.actor_model = tf.matmul(self.H2_a, self.W3_a) + self.B3_a

        # target actor network model parameters:
        self.t_actor_state_in = tf.placeholder("float", [None, num_states])
        self.t_W1_a = tf.Variable(tf.random_uniform([num_states, N_HIDDEN_1],
                                                    -1 / math.sqrt(num_states),
                                                    1 / math.sqrt(num_states)),
                                  name="t_W1_a")
        self.t_B1_a = tf.Variable(tf.random_uniform([N_HIDDEN_1],
                                                    -1 / math.sqrt(num_states),
                                                    1 / math.sqrt(num_states)),
                                  name="t_B1_a")
        self.t_W2_a = tf.Variable(tf.random_uniform([N_HIDDEN_1, N_HIDDEN_2],
                                                    -1 / math.sqrt(N_HIDDEN_1),
                                                    1 / math.sqrt(N_HIDDEN_1)),
                                  name="t_W2_a")
        self.t_B2_a = tf.Variable(tf.random_uniform([N_HIDDEN_2],
                                                    -1 / math.sqrt(N_HIDDEN_1),
                                                    1 / math.sqrt(N_HIDDEN_1)),
                                  name="t_B2_a")
        self.t_W3_a = tf.Variable(tf.random_uniform([N_HIDDEN_2, num_actions],
                                                    -0.003, 0.003),
                                  name="t_W3_a")
        self.t_B3_a = tf.Variable(tf.random_uniform([num_actions], -0.003,
                                                    0.003),
                                  name="t_B3_a")

        self.t_is_training = tf.placeholder(tf.bool, [])
        self.t_H1_t = tf.matmul(self.t_actor_state_in, self.t_W1_a)
        self.t_H1_a_bn = batch_norm(self.t_H1_t, N_HIDDEN_1,
                                    self.t_is_training, self.sess,
                                    self.H1_a_bn)
        self.t_H1_a = tf.nn.relu(self.t_H1_a_bn.bnorm) + self.t_B1_a

        self.t_H2_t = tf.matmul(self.t_H1_a, self.t_W2_a)
        self.t_H2_a_bn = batch_norm(self.t_H2_t, N_HIDDEN_2,
                                    self.t_is_training, self.sess,
                                    self.H2_a_bn)
        self.t_H2_a = tf.nn.relu(self.t_H2_a_bn.bnorm) + self.t_B2_a
        self.t_actor_model = tf.matmul(self.t_H2_a, self.t_W3_a) + self.t_B3_a

        # cost of actor network:
        self.q_gradient_input = tf.placeholder("float", [None, num_actions])
        self.actor_parameters = [
            self.W1_a, self.B1_a, self.W2_a, self.B2_a, self.W3_a, self.B3_a,
            self.H1_a_bn.scale, self.H1_a_bn.beta, self.H2_a_bn.scale,
            self.H2_a_bn.beta
        ]
        self.parameters_gradients = tf.gradients(
            self.actor_model, self.actor_parameters,
            -self.q_gradient_input / BATCH_SIZE)

        self.optimizer = tf.train.GradientDescentOptimizer(
            learning_rate=LEARNING_RATE).apply_gradients(
                zip(self.parameters_gradients, self.actor_parameters))
        # initialize all tensor variable parameters:
        self.sess.run(tf.initialize_all_variables())
        # self.saver = tf.train.Saver()
        # self.saver.save(self.sess, 'DDPG_MIMO', global_step=1000)

        self.sess.run([
            self.t_W1_a.assign(self.W1_a),
            self.t_B1_a.assign(self.B1_a),
            self.t_W2_a.assign(self.W2_a),
            self.t_B2_a.assign(self.B2_a),
            self.t_W3_a.assign(self.W3_a),
            self.t_B3_a.assign(self.B3_a)
        ])

        self.update_target_actor_op = [
            self.t_W1_a.assign(TAU * self.W1_a + (1 - TAU) * self.t_W1_a),
            self.t_B1_a.assign(TAU * self.B1_a + (1 - TAU) * self.t_B1_a),
            self.t_W2_a.assign(TAU * self.W2_a + (1 - TAU) * self.t_W2_a),
            self.t_B2_a.assign(TAU * self.B2_a + (1 - TAU) * self.t_B2_a),
            self.t_W3_a.assign(TAU * self.W3_a + (1 - TAU) * self.t_W3_a),
            self.t_B3_a.assign(TAU * self.B3_a + (1 - TAU) * self.t_B3_a),
            self.t_H1_a_bn.updateTarget,
            self.t_H2_a_bn.updateTarget,
        ]