def __init__(self, clean_encoder_layer, corrupted_encoder_layer, previous_decoder_layer=None, is_training_phase=True): with tf.name_scope("decoder_layer") as scope: is_first_decoder_layer = previous_decoder_layer is None if is_first_decoder_layer: pre_1st_normalization = corrupted_encoder_layer.post_activation else: input_size = _layer_size(previous_decoder_layer.post_denoising) output_size = _layer_size(clean_encoder_layer.post_activation) weights = _weight_variable([input_size, output_size], name='V') pre_1st_normalization = tf.matmul( previous_decoder_layer.post_denoising, weights) pre_denoising, _, _ = batch_norm( pre_1st_normalization, is_training_phase=is_training_phase) post_denoising = self._denoise( corrupted_encoder_layer.pre_activation, pre_denoising) post_2nd_normalization = \ (post_denoising - clean_encoder_layer.batch_mean) / clean_encoder_layer.batch_std self.post_denoising = post_denoising self.post_2nd_normalization = post_2nd_normalization
def __init__(self, inputs, output_size, non_linearity, noise_level, is_training_phase, reuse_variables = None): with tf.name_scope("encoder_layer") as scope: self._create_or_reuse_variables(reuse_variables, _layer_size(inputs), output_size) self.pre_normalization = tf.matmul(inputs, self.weights) pre_noise, self.batch_mean, self.batch_std = batch_norm( self.pre_normalization, is_training_phase = is_training_phase) self.pre_activation = self._add_noise(pre_noise, noise_level) beta_gamma = self.gamma * (self.pre_activation + self.beta) self.post_activation = non_linearity(beta_gamma)
def __init__(self, inputs, output_size, non_linearity, noise_level, is_training_phase, reuse_variables=None): with tf.name_scope("encoder_layer") as scope: self._create_or_reuse_variables(reuse_variables, _layer_size(inputs), output_size) self.pre_normalization = tf.matmul(inputs, self.weights) pre_noise, self.batch_mean, self.batch_std = batch_norm( self.pre_normalization, is_training_phase=is_training_phase) self.pre_activation = self._add_noise(pre_noise, noise_level) beta_gamma = self.gamma * (self.pre_activation + self.beta) self.post_activation = non_linearity(beta_gamma)
def __init__(self, clean_encoder_layer, corrupted_encoder_layer, previous_decoder_layer = None, is_training_phase = True): with tf.name_scope("decoder_layer") as scope: is_first_decoder_layer = previous_decoder_layer is None if is_first_decoder_layer: pre_1st_normalization = corrupted_encoder_layer.post_activation else: input_size = _layer_size(previous_decoder_layer.post_denoising) output_size = _layer_size(clean_encoder_layer.post_activation) weights = _weight_variable([input_size, output_size], name = 'V') pre_1st_normalization = tf.matmul( previous_decoder_layer.post_denoising, weights) pre_denoising, _, _ = batch_norm(pre_1st_normalization, is_training_phase = is_training_phase) post_denoising = self._denoise( corrupted_encoder_layer.pre_activation, pre_denoising) post_2nd_normalization = \ (post_denoising - clean_encoder_layer.batch_mean) / clean_encoder_layer.batch_std self.post_denoising = post_denoising self.post_2nd_normalization = post_2nd_normalization
def __init__(self, num_states, num_actions): self.sess = tf.InteractiveSession() # Critic Q Network: self.critic_state_in = tf.placeholder("float", [None, num_states]) self.critic_action_in = tf.placeholder("float", [None, num_actions]) self.W1_c = tf.Variable(tf.random_uniform([num_states, N_HIDDEN_1], -1 / math.sqrt(num_states), 1 / math.sqrt(num_states)), name="W1_c") self.B1_c = tf.Variable(tf.random_uniform([N_HIDDEN_1], -1 / math.sqrt(num_states), 1 / math.sqrt(num_states)), name="B1_c") self.W2_c = tf.Variable(tf.random_uniform( [N_HIDDEN_1, N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1 + num_actions), 1 / math.sqrt(N_HIDDEN_1 + num_actions)), name="W2_c") self.B2_c = tf.Variable(tf.random_uniform( [N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1 + num_actions), 1 / math.sqrt(N_HIDDEN_1 + num_actions)), name="B2_c") self.W2_action_c = tf.Variable( tf.random_uniform([num_actions, N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1 + num_actions), 1 / math.sqrt(N_HIDDEN_1 + num_actions)), name="W2_action_c") self.W3_c = tf.Variable(tf.random_uniform([N_HIDDEN_2, 1], -0.003, 0.003), name="W3_c") self.B3_c = tf.Variable(tf.random_uniform([1], -0.003, 0.003), name="B3_c") self.is_training = tf.placeholder(tf.bool, []) self.H1_t = tf.matmul(self.critic_state_in, self.W1_c) self.H1_c_bn = batch_norm(self.H1_t, N_HIDDEN_1, self.is_training, self.sess) self.H1_c = tf.nn.relu(self.H1_c_bn.bnorm) + self.B1_c self.H2_t = tf.matmul(self.H1_c, self.W2_c) + tf.matmul( self.critic_action_in, self.W2_action_c) self.H2_c_bn = batch_norm(self.H2_t, N_HIDDEN_2, self.is_training, self.sess) self.H2_c = tf.nn.relu(self.H2_c_bn.bnorm) + self.B2_c self.critic_q_model = tf.matmul(self.H2_c, self.W3_c) + self.B3_c # Target Critic Q Network: self.t_critic_state_in = tf.placeholder("float", [None, num_states]) self.t_critic_action_in = tf.placeholder("float", [None, num_actions]) self.t_W1_c = tf.Variable(tf.random_uniform([num_states, N_HIDDEN_1], -1 / math.sqrt(num_states), 1 / math.sqrt(num_states)), name="t_W1_c") self.t_B1_c = tf.Variable(tf.random_uniform([N_HIDDEN_1], -1 / math.sqrt(num_states), 1 / math.sqrt(num_states)), name="t_B1_c") self.t_W2_c = tf.Variable(tf.random_uniform( [N_HIDDEN_1, N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1 + num_actions), 1 / math.sqrt(N_HIDDEN_1 + num_actions)), name="t_W2_c") self.t_W2_action_c = tf.Variable( tf.random_uniform([num_actions, N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1 + num_actions), 1 / math.sqrt(N_HIDDEN_1 + num_actions)), name="t_W2_action_c") self.t_B2_c = tf.Variable(tf.random_uniform( [N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1 + num_actions), 1 / math.sqrt(N_HIDDEN_1 + num_actions)), name="t_B2_c") self.t_W3_c = tf.Variable(tf.random_uniform([N_HIDDEN_2, 1], -0.003, 0.003), name="t_W3_c") self.t_B3_c = tf.Variable(tf.random_uniform([1], -0.003, 0.003), name="t_B3_c") self.t_H1_t = tf.matmul(self.t_critic_state_in, self.t_W1_c) self.t_H1_c_bn = batch_norm(self.t_H1_t, N_HIDDEN_1, self.is_training, self.sess, self.H1_c_bn) self.t_H1_c = tf.nn.relu(self.t_H1_c_bn.bnorm) + self.t_B1_c self.t_H2_t = tf.matmul(self.t_H1_c, self.t_W2_c) + tf.matmul( self.t_critic_action_in, self.t_W2_action_c) self.t_H2_c_bn = batch_norm(self.t_H2_t, N_HIDDEN_2, self.is_training, self.sess, self.H2_c_bn) self.t_H2_c = tf.nn.relu(self.t_H2_c_bn.bnorm) + self.t_B2_c self.t_critic_q_model = tf.matmul(self.t_H2_c, self.t_W3_c) + self.t_B3_c self.t_critic_q_model = tf.matmul(self.t_H2_c, self.t_W3_c) + self.t_B3_c self.q_value_in = tf.placeholder("float", [None, 1]) # supervisor # self.l2_regularizer_loss = tf.nn.l2_loss(self.W1_c)+tf.nn.l2_loss(self.W2_c)+ tf.nn.l2_loss(self.W2_action_c) + tf.nn.l2_loss(self.W3_c)+tf.nn.l2_loss(self.B1_c)+tf.nn.l2_loss(self.B2_c)+tf.nn.l2_loss(self.B3_c) # self.l2_regularizer_loss = 0.01 * tf.reduce_sum(tf.pow(self.W2_c, 2)) + \ # 0.01 * tf.reduce_sum(tf.pow(self.W1_c, 2)) + \ # 0.01 * tf.reduce_sum(tf.pow(self.W3_c, 2)) + \ # 0.01 * tf.reduce_sum(tf.pow(self.B1_c, 2)) + \ # 0.01 * tf.reduce_sum(tf.pow(self.B2_c, 2)) + \ # 0.01 * tf.reduce_sum(tf.pow(self.B3_c, 2)) + \ # 0.01 * tf.reduce_sum(tf.pow(self.W2_action_c, 2)) self.cost = (tf.reduce_mean(pow(self.critic_q_model - self.q_value_in, 2))) \ # + self.l2_regularizer_loss self.optimizer = tf.train.GradientDescentOptimizer( learning_rate=LEARNING_RATE).minimize(self.cost) self.act_grad_v = tf.gradients(self.critic_q_model, self.critic_action_in) self.action_gradients = [self.act_grad_v[0]] # / tf.to_float(tf.shape(self.act_grad_v[0])[0])] # self.check_fl = self.action_gradients self.sess.run(tf.initialize_all_variables()) # self.saver = tf.train.Saver() # self.saver.save(self.sess, 'DDPG_MIMO', global_step=1000) # To initialize critic and target with the same values: self.sess.run([ self.t_W1_c.assign(self.W1_c), self.t_B1_c.assign(self.B1_c), self.t_W2_c.assign(self.W2_c), self.t_W2_action_c.assign(self.W2_action_c), self.t_B2_c.assign(self.B2_c), self.t_W3_c.assign(self.W3_c), self.t_B3_c.assign(self.B3_c) ]) self.update_target_critic_op = [ self.t_W1_c.assign(TAU * self.W1_c + (1 - TAU) * self.t_W1_c), self.t_B1_c.assign(TAU * self.B1_c + (1 - TAU) * self.t_B1_c), self.t_W2_c.assign(TAU * self.W2_c + (1 - TAU) * self.t_W2_c), self.t_W2_action_c.assign(TAU * self.W2_action_c + (1 - TAU) * self.t_W2_action_c), self.t_B2_c.assign(TAU * self.B2_c + (1 - TAU) * self.t_B2_c), self.t_W3_c.assign(TAU * self.W3_c + (1 - TAU) * self.t_W3_c), self.t_B3_c.assign(TAU * self.B3_c + (1 - TAU) * self.t_B3_c), self.t_H1_c_bn.updateTarget, self.t_H2_c_bn.updateTarget ]
L3 = tf.nn.pool(L3, pooling_type='MAX', window_shape=[2], strides=[2], padding='SAME') print(L3.shape) ''' (?, 15, 32) ''' L3_flat = tf.reshape(L3, [-1, 15 * 32]) W4 = tf.get_variable("W4", shape=[15 * 32, 500], initializer=tf.contrib.layers.xavier_initializer()) b4 = tf.Variable(tf.random_normal([500])) Bn4 = batch_norm(tf.matmul(L3_flat, W4) + b4) L4 = tf.nn.relu(Bn4) L4 = tf.nn.dropout(L4, keep_prob=keep_prob) W5 = tf.get_variable("W5", shape=[500, nb_transmitter], initializer=tf.contrib.layers.xavier_initializer()) b5 = tf.Variable(tf.random_normal([nb_transmitter])) hypothesis = tf.matmul(L4, W5) + b5 # logits = tf.matmul(L2, W3) + b3 # hypothesis = tf.nn.softmax(logits) cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(logits=hypothesis, labels=tf.stop_gradient(
def __init__(self, num_states, num_actions): self.sess = tf.InteractiveSession() # actor network model parameters: self.actor_state_in = tf.placeholder("float", [None, num_states]) self.W1_a = tf.Variable(tf.random_uniform([num_states, N_HIDDEN_1], -1 / math.sqrt(num_states), 1 / math.sqrt(num_states)), name="W1_a") self.B1_a = tf.Variable(tf.random_uniform([N_HIDDEN_1], -1 / math.sqrt(num_states), 1 / math.sqrt(num_states)), name="B1_a") self.W2_a = tf.Variable(tf.random_uniform([N_HIDDEN_1, N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1), 1 / math.sqrt(N_HIDDEN_1)), name="W2_a") self.B2_a = tf.Variable(tf.random_uniform([N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1), 1 / math.sqrt(N_HIDDEN_1)), name="B2_a") self.W3_a = tf.Variable(tf.random_uniform([N_HIDDEN_2, num_actions], -0.003, 0.003), name="W3_a") self.B3_a = tf.Variable(tf.random_uniform([num_actions], -0.003, 0.003), name="B3_a") self.is_training = tf.placeholder(tf.bool, []) self.H1_t = tf.matmul(self.actor_state_in, self.W1_a) self.H1_a_bn = batch_norm(self.H1_t, N_HIDDEN_1, self.is_training, self.sess) self.H1_a = tf.nn.relu(self.H1_a_bn.bnorm) + self.B1_a self.H2_t = tf.matmul(self.H1_a, self.W2_a) self.H2_a_bn = batch_norm(self.H2_t, N_HIDDEN_2, self.is_training, self.sess) self.H2_a = tf.nn.relu(self.H2_a_bn.bnorm) + self.B2_a self.actor_model = tf.matmul(self.H2_a, self.W3_a) + self.B3_a # target actor network model parameters: self.t_actor_state_in = tf.placeholder("float", [None, num_states]) self.t_W1_a = tf.Variable(tf.random_uniform([num_states, N_HIDDEN_1], -1 / math.sqrt(num_states), 1 / math.sqrt(num_states)), name="t_W1_a") self.t_B1_a = tf.Variable(tf.random_uniform([N_HIDDEN_1], -1 / math.sqrt(num_states), 1 / math.sqrt(num_states)), name="t_B1_a") self.t_W2_a = tf.Variable(tf.random_uniform([N_HIDDEN_1, N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1), 1 / math.sqrt(N_HIDDEN_1)), name="t_W2_a") self.t_B2_a = tf.Variable(tf.random_uniform([N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1), 1 / math.sqrt(N_HIDDEN_1)), name="t_B2_a") self.t_W3_a = tf.Variable(tf.random_uniform([N_HIDDEN_2, num_actions], -0.003, 0.003), name="t_W3_a") self.t_B3_a = tf.Variable(tf.random_uniform([num_actions], -0.003, 0.003), name="t_B3_a") self.t_is_training = tf.placeholder(tf.bool, []) self.t_H1_t = tf.matmul(self.t_actor_state_in, self.t_W1_a) self.t_H1_a_bn = batch_norm(self.t_H1_t, N_HIDDEN_1, self.t_is_training, self.sess, self.H1_a_bn) self.t_H1_a = tf.nn.relu(self.t_H1_a_bn.bnorm) + self.t_B1_a self.t_H2_t = tf.matmul(self.t_H1_a, self.t_W2_a) self.t_H2_a_bn = batch_norm(self.t_H2_t, N_HIDDEN_2, self.t_is_training, self.sess, self.H2_a_bn) self.t_H2_a = tf.nn.relu(self.t_H2_a_bn.bnorm) + self.t_B2_a self.t_actor_model = tf.matmul(self.t_H2_a, self.t_W3_a) + self.t_B3_a # cost of actor network: self.q_gradient_input = tf.placeholder("float", [None, num_actions]) self.actor_parameters = [ self.W1_a, self.B1_a, self.W2_a, self.B2_a, self.W3_a, self.B3_a, self.H1_a_bn.scale, self.H1_a_bn.beta, self.H2_a_bn.scale, self.H2_a_bn.beta ] self.parameters_gradients = tf.gradients( self.actor_model, self.actor_parameters, -self.q_gradient_input / BATCH_SIZE) self.optimizer = tf.train.GradientDescentOptimizer( learning_rate=LEARNING_RATE).apply_gradients( zip(self.parameters_gradients, self.actor_parameters)) # initialize all tensor variable parameters: self.sess.run(tf.initialize_all_variables()) # self.saver = tf.train.Saver() # self.saver.save(self.sess, 'DDPG_MIMO', global_step=1000) self.sess.run([ self.t_W1_a.assign(self.W1_a), self.t_B1_a.assign(self.B1_a), self.t_W2_a.assign(self.W2_a), self.t_B2_a.assign(self.B2_a), self.t_W3_a.assign(self.W3_a), self.t_B3_a.assign(self.B3_a) ]) self.update_target_actor_op = [ self.t_W1_a.assign(TAU * self.W1_a + (1 - TAU) * self.t_W1_a), self.t_B1_a.assign(TAU * self.B1_a + (1 - TAU) * self.t_B1_a), self.t_W2_a.assign(TAU * self.W2_a + (1 - TAU) * self.t_W2_a), self.t_B2_a.assign(TAU * self.B2_a + (1 - TAU) * self.t_B2_a), self.t_W3_a.assign(TAU * self.W3_a + (1 - TAU) * self.t_W3_a), self.t_B3_a.assign(TAU * self.B3_a + (1 - TAU) * self.t_B3_a), self.t_H1_a_bn.updateTarget, self.t_H2_a_bn.updateTarget, ]