def build(self, img, q, scope='Classifier'): # Normalize input images into 0 ~ 1 img = img / 255. def _positional_encoding(features): # Append two features of positional encoding to the given feature maps d = features.get_shape().as_list()[1] indices = tf.range(d) x = tf.tile(tf.reshape(indices, [d, 1]), [1, d]) y = tf.tile(tf.reshape(indices, [1, d]), [d, 1]) pos = tf.cast(tf.stack([x, y], axis=2)[None] / d, tf.float32) pos = tf.tile(pos, [tf.shape(img)[0], 1, 1, 1]) return tf.concat([features, pos], axis=3) def f_phi(g, scope='f_phi'): with tf.variable_scope(scope): fc_1 = fc(g, 256, activation_fn=tf.nn.relu, name='fc_1') fc_1 = slim.dropout(fc_1, keep_prob=0.5, is_training=self.is_training, scope='fc_3/') logits = fc(fc_1, self.a_dim, activation_fn=None, name='fc_3') return logits with tf.variable_scope(scope): conv_1 = conv2d(img, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_1') conv_2 = conv2d(conv_1, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_2') conv_3 = conv2d(conv_2, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_3') conv_pos = _positional_encoding(conv_3) ###################### MODIFY HERE ###################### def g_theta(o_i, o_j, q, scope='g_theta', reuse=True): with tf.variable_scope(scope, reuse=reuse): g_1 = fc(tf.concat([o_i, o_j, q], axis=1), 256, name='g_1') g_2 = fc(g_1, 256, name='g_2') return g_2 d = conv_pos.get_shape().as_list()[1] all_g = [] for i in range(d * d): o_i = conv_pos[:, int(i / d), int(i % d), :] for j in range(d * d): o_j = conv_pos[:, int(j / d), int(j % d), :] if i == 0 and j == 0: g_i_j = g_theta(o_i, o_j, q, reuse=False) else: g_i_j = g_theta(o_i, o_j, q, reuse=True) all_g.append(g_i_j) all_g = tf.stack(all_g, axis=0) all_g = tf.reduce_sum(all_g, axis=0) ######################################################### logits = f_phi(all_g) return logits
def build(self, img, q, scope='Classifier'): # Normalize input images into 0 ~ 1 img = img / 255. def _positional_encoding(features): # Append two features of positional encoding to the given feature maps d = features.get_shape().as_list()[1] indices = tf.range(d) x = tf.tile(tf.reshape(indices, [d, 1]), [1, d]) y = tf.tile(tf.reshape(indices, [1, d]), [d, 1]) pos = tf.cast(tf.stack([x, y], axis=2)[None] / d, tf.float32) pos = tf.tile(pos, [tf.shape(img)[0], 1, 1, 1]) return tf.concat([features, pos], axis=3) def f_phi(g, scope='f_phi'): with tf.variable_scope(scope): fc_1 = fc(g, 256, activation_fn=tf.nn.relu, name='fc_1') fc_1 = slim.dropout(fc_1, keep_prob=0.5, is_training=self.is_training, scope='fc_3/') logits = fc(fc_1, self.a_dim, activation_fn=None, name='fc_3') return logits with tf.variable_scope(scope): conv_1 = conv2d(img, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_1') conv_2 = conv2d(conv_1, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_2') conv_3 = conv2d(conv_2, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_3') conv_pos = _positional_encoding(conv_3) ###################### MODIFY HERE ###################### def g_theta(o_pair, q, d, scope='g_theta'): with tf.variable_scope(scope): q = q[:, None, None, :] q = tf.tile(q, [1, d * d, d * d, 1]) o = tf.concat([o_pair, q], axis=3) g_1 = conv2d(o, 256, self.is_training, k_h=1, k_w=1, s_h=1, s_w=1, activation_fn=tf.nn.relu, name='g_1') g_2 = conv2d(g_1, 256, self.is_training, k_h=1, k_w=1, s_h=1, s_w=1, activation_fn=tf.nn.relu, name='g_2') return g_2 d = conv_3.get_shape().as_list()[1] o_1 = tf.tile(tf.reshape(conv_pos, [-1, d*d, 1, 26]), [1, 1, d*d, 26]) o_2 = tf.tile(tf.reshape(conv_pos, [-1, 1, d*d, 26]), [1, d*d, 1, 26]) o_pair = tf.concat([o_1, o_2], axis=3) g = g_theta(o_pair, q, d) all_g = tf.reduce_sum(g, [1, 2]) ######################################################### logits = f_phi(all_g) return logits
def g_theta(o_pair, q, d, scope='g_theta'): with tf.variable_scope(scope): q = q[:, None, None, :] q = tf.tile(q, [1, d * d, d * d, 1]) o = tf.concat([o_pair, q], axis=3) g_1 = conv2d(o, 256, self.is_training, k_h=1, k_w=1, s_h=1, s_w=1, activation_fn=tf.nn.relu, name='g_1') g_2 = conv2d(g_1, 256, self.is_training, k_h=1, k_w=1, s_h=1, s_w=1, activation_fn=tf.nn.relu, name='g_2') return g_2
def State_Encoder(s, batch_size, scope='State_Encoder', reuse=False): with tf.variable_scope(scope, reuse=reuse) as scope: if not reuse: log.warning(scope.name) _ = conv2d(s, 16, is_train, k_h=3, k_w=3, info=not reuse, batch_norm=True, name='conv1') _ = conv2d(_, 32, is_train, k_h=3, k_w=3, info=not reuse, batch_norm=True, name='conv2') _ = conv2d(_, 48, is_train, k_h=3, k_w=3, info=not reuse, batch_norm=True, name='conv3') if self.dataset_type == 'vizdoom': _ = conv2d(_, 48, is_train, k_h=3, k_w=3, info=not reuse, batch_norm=True, name='conv4') _ = conv2d(_, 48, is_train, k_h=3, k_w=3, info=not reuse, batch_norm=True, name='conv5') state_feature = tf.reshape(_, [batch_size, -1]) return state_feature
def build(self, img, q, scope='Classifier'): # Normalize input images into 0 ~ 1 img = img / 255. with tf.variable_scope(scope): conv_1 = conv2d(img, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_1') conv_2 = conv2d(conv_1, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_2') conv_3 = conv2d(conv_2, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_3') # Append the question into image features conv_q = tf.concat([tf.reshape(conv_3, [tf.shape(conv_3)[0], -1]), q], axis=1) fc_1 = fc(conv_q, 256, activation_fn=tf.nn.relu, name='fc_1') fc_2 = fc(fc_1, 256, activation_fn=tf.nn.relu, name='fc_2') fc_2 = slim.dropout(fc_2, keep_prob=0.5, is_training=self.is_training, scope='fc_3/') logits = fc(fc_2, self.a_dim, activation_fn=None, name='fc_3') return logits
def build(self, img, q, scope='Classifier'): # Normalize input images into 0 ~ 1 img = img / 255. def _positional_encoding(features): # Append two features of positional encoding to the given feature maps d = features.get_shape().as_list()[1] indices = tf.range(d) x = tf.tile(tf.reshape(indices, [d, 1]), [1, d]) y = tf.tile(tf.reshape(indices, [1, d]), [d, 1]) pos = tf.cast(tf.stack([x, y], axis=2)[None] / d, tf.float32) pos = tf.tile(pos, [tf.shape(img)[0], 1, 1, 1]) return tf.concat([features, pos], axis=3) def f_phi(g, scope='f_phi'): with tf.variable_scope(scope): fc_1 = fc(g, 256, activation_fn=tf.nn.relu, name='fc_1') fc_1 = slim.dropout(fc_1, keep_prob=0.5, is_training=self.is_training, scope='fc_3/') logits = fc(fc_1, self.a_dim, activation_fn=None, name='fc_3') return logits with tf.variable_scope(scope): conv_1 = conv2d(img, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_1') conv_2 = conv2d(conv_1, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_2') conv_3 = conv2d(conv_2, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_3') # (b,d,d,c) # Adding positional information (x,y) into features: size (b,d,d,c+2) conv_pos = _positional_encoding(conv_3) ###################### MODIFY HERE ###################### conv_q = tf.concat([tf.reshape(conv_pos, [tf.shape(conv_pos)[0], -1]), q], axis=1) fc_1 = fc(conv_q, 256, activation_fn=tf.nn.relu, name='fc_1') all_g = fc_1 ######################################################### logits = f_phi(all_g) return logits
def build(self, img, q, scope='Classifier'): # Normalize input images into 0 ~ 1 img = img / 255. with tf.variable_scope(scope): ###################### MODIFY HERE ###################### ## Compute film(q) for gamma, beta conv_1 = conv2d(img, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_1') ## Affine transform of conv_1 conv_2 = conv2d(conv_1, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_2') ## Affine transform of conv_2 conv_3 = conv2d(conv_2, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_3') ## Affine transform of conv_3 ######################################################### features = tf.reshape(conv_3, [tf.shape(conv_3)[0], -1]) fc_1 = fc(features, 256, activation_fn=tf.nn.relu, name='fc_1') fc_2 = fc(fc_1, 256, activation_fn=tf.nn.relu, name='fc_2') fc_2 = slim.dropout(fc_2, keep_prob=0.5, is_training=self.is_training, scope='fc_3/') logits = fc(fc_2, self.a_dim, activation_fn=None, name='fc_3') return logits
def build(self, img, q, scope='Classifier'): # Normalize input images into 0 ~ 1 img = img / 255. with tf.variable_scope(scope): ###################### MODIFY HERE ###################### def film(q, scope='film'): with tf.variable_scope(scope): cond = fc(q, 3 * 2 * 24, activation_fn=None, name='cond') cond = tf.reshape(cond, [-1, 3, 2, 24]) return cond def modulate(conv, gamma, beta): gamma = tf.reshape(gamma, [-1, 1, 1, 24]) beta = tf.reshape(beta, [-1, 1, 1, 24]) return (1 + gamma) * conv + beta q_embed = fc(q, 256, name='fc_q') cond = film(q_embed) conv_1 = conv2d(img, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_1') conv_1 = modulate(conv_1, cond[:, 0, 0, :], cond[:, 0, 1, :]) conv_2 = conv2d(conv_1, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_2') conv_2 = modulate(conv_2, cond[:, 1, 0, :], cond[:, 1, 1, :]) conv_3 = conv2d(conv_2, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_3') conv_3 = modulate(conv_3, cond[:, 2, 0, :], cond[:, 2, 1, :]) ######################################################### features = tf.reshape(conv_3, [tf.shape(conv_3)[0], -1]) fc_1 = fc(features, 256, activation_fn=tf.nn.relu, name='fc_1') fc_2 = fc(fc_1, 256, activation_fn=tf.nn.relu, name='fc_2') fc_2 = slim.dropout(fc_2, keep_prob=0.5, is_training=self.is_training, scope='fc_3/') logits = fc(fc_2, self.a_dim, activation_fn=None, name='fc_3') return logits
def State_Encoder(s, per, batch_size, scope='State_Encoder', reuse=False): with tf.variable_scope(scope, reuse=reuse) as scope: if not reuse: log.warning(scope.name) _ = conv2d(s, 16, is_train, k_h=3, k_w=3, info=not reuse, batch_norm=True, name='conv1') _ = conv2d(_, 32, is_train, k_h=3, k_w=3, info=not reuse, batch_norm=True, name='conv2') _ = conv2d(_, 48, is_train, k_h=3, k_w=3, info=not reuse, batch_norm=True, name='conv3') if self.pixel_input: _ = conv2d(_, 48, is_train, k_h=3, k_w=3, info=not reuse, batch_norm=True, name='conv4') _ = conv2d(_, 48, is_train, k_h=3, k_w=3, info=not reuse, batch_norm=True, name='conv5') state_feature = tf.reshape(_, [batch_size, -1]) if self.state_encoder_fc: state_feature = fc(state_feature, 512, is_train, info=not reuse, name='fc1') state_feature = fc(state_feature, 512, is_train, info=not reuse, name='fc2') state_feature = tf.concat([state_feature, per], axis=-1) if not reuse: log.info( 'concat feature {}'.format(state_feature)) return state_feature
def construct(self, config): self.w = {} self.t_w = {} activation_fn = tf.nn.relu initializer = tf.truncated_normal_initializer(0, 0.02) #all use the same state representation. with tf.variable_scope("target_ori_q"): if self.cnn_format == 'NHWC': self.residual_state_input_n = tf.placeholder( "float32", [ None, self.screen_height, self.screen_width, self.history_length ], name="residual_state_input") else: self.residual_state_input_n = tf.placeholder( "float32", [ None, self.history_length, self.screen_height, self.screen_width ], name="residual_state_input") if self.cnn_format == 'NHWC': self.state_input_n = tf.placeholder("float32", [ None, self.screen_height, self.screen_width, self.history_length ], name="state_input") else: self.state_input_n = tf.placeholder("float32", [ None, self.history_length, self.screen_height, self.screen_width ], name="state_input") self.l1_n, self.t_w['l1_s_w'], self.t_w['l1_s_b'] = conv2d( tf.concat(3, [self.state_input_n, self.residual_state_input_n]), 32, [8, 8], [4, 4], initializer, activation_fn, self.cnn_format, name='l1_s') self.l2_n, self.t_w['l2_s_w'], self.t_w['l2_s_b'] = conv2d( self.l1_n, 64, [4, 4], [2, 2], initializer, activation_fn, self.cnn_format, name="l2_s") self.l3_n, self.t_w['l3_s_w'], self.t_w['l3_s_b'] = conv2d( self.l2_n, 64, [3, 3], [1, 1], initializer, activation_fn, self.cnn_format, name="l3_s") shape = self.l3_n.get_shape().as_list() self.l3_n_flat = tf.reshape( self.l3_n, [-1, reduce(lambda x, y: x * y, shape[1:])]) self.l1_n_q, self.t_w['l1_q_w'], self.t_w['l1_q_b'] = linear( self.l3_n_flat, 512, activation_fn=activation_fn, name="l1_q") self.ori_q_n, self.t_w['l2_q_w'], self.t_w['l2_q_b'] = linear( self.l1_n_q, self.config.option_num + self.config.action_num, name='ori_q') with tf.variable_scope("ori_q"): if self.cnn_format == 'NHWC': self.residual_state_input_s = tf.placeholder( "float32", [ None, self.screen_height, self.screen_width, self.history_length ], name="residual_state_input") else: self.residual_state_input_s = tf.placeholder( "float32", [ None, self.history_length, self.screen_height, self.screen_width ], name="residual_state_input") if self.cnn_format == 'NHWC': self.state_input = tf.placeholder("float32", [ None, self.screen_height, self.screen_width, self.history_length ], name="state_input") else: self.state_input = tf.placeholder("float32", [ None, self.history_length, self.screen_height, self.screen_width ], name="state_input") self.l1_s, self.w['l1_s_w'], self.w['l1_s_b'] = conv2d( tf.concat(3, [self.state_input, self.residual_state_input_s]), 32, [8, 8], [4, 4], initializer, activation_fn, self.cnn_format, name='l1_s') self.l2_s, self.w['l2_s_w'], self.w['l2_s_b'] = conv2d( self.l1_s, 64, [4, 4], [2, 2], initializer, activation_fn, self.cnn_format, name="l2_s") self.l3_s, self.w['l3_s_w'], self.w['l3_s_b'] = conv2d( self.l2_s, 64, [3, 3], [1, 1], initializer, activation_fn, self.cnn_format, name="l3_s") shape = self.l3_s.get_shape().as_list() self.l3_s_flat = tf.reshape( self.l3_s, [-1, reduce(lambda x, y: x * y, shape[1:])]) self.l1_q, self.w['l1_q_w'], self.w['l1_q_b'] = linear( self.l3_s_flat, 512, activation_fn=activation_fn, name="l1_q") self.ori_q, self.w['l2_q_w'], self.w['l2_q_b'] = linear( self.l1_q, self.config.option_num + self.config.action_num, name='ori_q') with tf.variable_scope("qq"): self.l1_qq, self.w['l1_qq_w'], self.w['l1_qq_b'] = linear( self.l3_s_flat, 512, activation_fn=activation_fn, name="l1_qq") self.q, self.w['l2_qq_w'], self.w['l2_qq_b'] = linear( self.l1_qq, (self.config.action_num + self.config.option_num) * self.config.option_num, name='q') with tf.variable_scope("target_qq"): self.l1_qq_n, self.t_w['l1_qq_w'], self.t_w['l1_qq_b'] = linear( self.l3_n_flat, 512, activation_fn=activation_fn, name="l1_qq") self.q_n, self.t_w['l2_qq_w'], self.t_w['l2_qq_b'] = linear( self.l1_qq_n, (self.config.action_num + self.config.option_num) * self.config.option_num, name='q') with tf.variable_scope("parameter"): self.k = tf.placeholder("float32", [None], name="k") self.terminals = tf.placeholder('float32', [None], name="terminals") self.ep = tf.placeholder("float32", None, name="ep") with tf.variable_scope("input"): self.o = tf.placeholder("int64", [None], name="o") self.b = tf.placeholder("int64", [None], name="b") self.g = tf.placeholder('int64', [None], name="g") with tf.variable_scope("reward"): self.reward_st = tf.placeholder("float32", [None], name="reward_st") with tf.variable_scope("beta"): shape = self.residual_state_input_n.get_shape().as_list() self.residual_input_n_flat = tf.reshape( self.residual_state_input_n, [-1, reduce(lambda x, y: x * y, shape[1:])]) shape = self.state_input_n.get_shape().as_list() self.state_input_n_flat = tf.reshape( self.state_input_n, [-1, reduce(lambda x, y: x * y, shape[1:])]) self.state_input_n_flat_ = tf.concat( 1, [self.residual_input_n_flat, 0.1 * self.state_input_n_flat]) self.beta_na_, self.l1_b_w, self.l1_b_b = linear( self.state_input_n_flat_, self.config.option_num, stddev=0.1, activation_fn=tf.nn.sigmoid, name="beta") #self.beta_na = self.beta_na_ self.beta_na = tf.select( tf.greater(self.beta_na_, self.config.clip_prob), tf.ones_like(self.beta_na_, tf.float32), tf.zeros_like(self.beta_na_, tf.float32)) self.beta_ng = tf.reduce_sum( tf.mul(self.beta_na, tf.one_hot(self.g, self.config.option_num, 1., 0., -1)), 1) with tf.variable_scope('pred_to_target'): self.t_w_input = {} self.t_w_assign_op = {} for name in self.w.keys(): self.t_w_input[name] = tf.placeholder( 'float32', self.t_w[name].get_shape().as_list(), name=name) self.t_w_assign_op[name] = self.t_w[name].assign( self.t_w_input[name]) with tf.variable_scope("q"): #ori - q self.q_na = self.ori_q_n self.q_sa = self.ori_q self.max_q_n = tf.reduce_max(self.ori_q_n, 1) self.q_so = tf.reduce_sum( tf.mul( self.q_sa, tf.one_hot(self.o, self.config.option_num + self.config.action_num, 1., 0., -1)), 1) self.target_q_so = tf.stop_gradient(self.reward_st + (1 - self.terminals) * self.config.discount**self.k * \ self.max_q_n) #g - q action_num = self.config.action_num + self.config.option_num fn = (self.config.option_num) * (self.config.action_num + self.config.option_num) self.q_naa = tf.reshape(self.q_n, [ -1, (self.config.action_num + self.config.option_num), self.config.option_num ]) self.q_nga = tf.reduce_sum( tf.mul( self.q_naa, tf.expand_dims( tf.one_hot(self.g, self.config.option_num, 1., 0., -1), 1)), 2) self.max_q_ng = tf.reduce_max(self.q_nga, 1) self.q_saa = tf.reshape(self.q, [ -1, (self.config.action_num + self.config.option_num), self.config.option_num ]) self.q_sga = tf.reduce_sum( tf.mul( self.q_saa, tf.expand_dims( tf.one_hot(self.g, self.config.option_num, 1., 0., -1), 1)), 2) self.q_sgo = tf.reduce_sum( tf.mul( self.q, tf.one_hot(self.o * self.config.option_num + self.g, fn, 1., 0., -1)), 1) self.target_q_sgo = tf.stop_gradient( self.reward_st + (1 - self.terminals) * self.config.discount**self.k * (self.beta_ng * (self.config.goal_pho + self.max_q_n) + (1 - self.beta_ng) * self.max_q_ng)) with tf.variable_scope("optimizer"): self.q_delta = self.target_q_so - self.q_so self.qq_delta = self.target_q_sgo - self.q_sgo self.q_loss = tf.reduce_mean(tf.square(self.q_delta), name="q_loss") self.qq_loss = tf.reduce_mean(tf.square(self.qq_delta), name="qq_loss") self.learning_rate_op = tf.maximum( self.learning_rate_minimum, tf.train.exponential_decay(self.learning_rate, self.learn_count, self.learning_rate_decay_step, self.learning_rate_decay, staircase=True)) q_optim = tf.train.GradientDescentOptimizer(self.learning_rate_op) qq_optim = tf.train.GradientDescentOptimizer( self.learning_rate_op * 2) self.gvs, self.gvs2 = q_optim.compute_gradients( self.q_loss), qq_optim.compute_gradients(self.qq_loss) capped_gvs, capped_gvs2 = [ (tf.clip_by_value(grad, self.min_delta, self.max_delta), var) for grad, var in self.gvs if grad is not None ], [(tf.clip_by_value(grad, self.min_delta, self.max_delta), var) for grad, var in self.gvs2 if grad is not None] self.q_optim = q_optim.apply_gradients(capped_gvs) self.qq_optim = qq_optim.apply_gradients(capped_gvs2) #self.beta_optim = tf.train.GradientDescentOptimizer(self.learning_rate_op).minimize(self.beta_loss) ''' with tf.variable_scope("summary"): tags = np.empty((self.config.option_num, (self.config.action_num+self.config.option_num), self.config.state_num),dtype="<40U") for i in range(tags.shape[0]): for j in range(tags.shape[1]): for z in range(tags.shape[2]): tags[i,j,z] = "%s=%s/%s:g:%d-o:%d-s:%d"%(self.env_name, self.env_type, "q", i,j,z) self.w_summary = tf.scalar_summary(tags, self.qs) self.all_summary = tf.merge_summary([self.learn_count_summary, self.w_summary]) ''' with tf.variable_scope("summary"): self.all_summary = tf.merge_summary([self.learn_count_summary])