def model(self, reuse=False): with tf.variable_scope("G", reuse=reuse): z = tf.random_uniform([self.batch_size, 1024], minval=-1.0, maxval=1.0) fc1 = fc(z, 1024, 7*7*256, bn=True, activation_fn=tf.nn.relu, scope="fc1") fc1 = tf.reshape(fc1, [-1, 7, 7, 256]) convt1 = convt(fc1, kernel=[3, 3, 256, 256], stride=[1, 2, 2, 1], output=[self.batch_size, 14, 14, 256], bn=True, activation_fn=tf.nn.relu, scope="convt1") """ convt2 = convt(convt1, kernel=[3, 3, 256, 256], stride=[1, 1, 1, 1], output=[self.batch_size, 14, 14, 256], bn=True, activation_fn=tf.nn.relu, scope="convt2") """ convt3 = convt(convt1, kernel=[3, 3, 256, 256], stride=[1, 2, 2, 1], output=[self.batch_size, 28, 28, 256], bn=True, activation_fn=tf.nn.relu, scope="convt3") """ convt4 = convt(convt3, kernel=[3, 3, 256, 256], stride=[1, 1, 1, 1], output=[self.batch_size, 28, 28, 256], bn=True, activation_fn=tf.nn.relu, scope="convt4") """ convt5 = convt(convt3, kernel=[3, 3, 128, 256], stride=[1, 2, 2, 1], output=[self.batch_size, 56, 56, 128], bn=True, activation_fn=tf.nn.relu, scope="convt5") """ convt6 = convt(convt5, kernel=[3, 3, 64, 128], stride=[1, 1, 1, 1], output=[self.batch_size, 56, 56, 64], bn=True, activation_fn=tf.nn.relu, scope="convt6") convt7 = convt(convt6, kernel=[3, 3, 3, 64], stride=[1, 1, 1, 1], output=[self.batch_size, 56, 56, 3], activation_fn=tf.nn.tanh, scope="convt7") """ convt6 = convt(convt5, kernel=[3, 3, 64, 128], stride=[1, 2, 2, 1], output=[self.batch_size, 112, 112, 64], bn=True, activation_fn=tf.nn.relu, scope="convt6") convt7 = convt(convt6, kernel=[3, 3, 3, 64], stride=[1, 1, 1, 1], output=[self.batch_size, 112, 112, 3], activation_fn=tf.nn.tanh, scope="convt7") return convt7
def build_graph(self, ph_ob): ob = ph_ob[-1] assert len(ob.shape.as_list()) == 4 #B, H, W, C with tf.variable_scope("obfilter"): self.ob_rms = RunningMeanStd(shape=ob.shape.as_list()[1:3] + [1]) ob_norm = ob[:, :, :, -1:] ob_norm = tf.cast(ob_norm, tf.float32) ob_norm = tf.clip_by_value( (ob_norm - self.ob_rms.mean) / self.ob_rms.std, -5.0, 5.0) # Random target network xr = tf.nn.leaky_relu( conv(ob_norm, "c1r", nf=self.convfeat * 1, rf=8, stride=4, init_scale=np.sqrt(2))) xr = tf.nn.leaky_relu( conv(xr, 'c2r', nf=self.convfeat * 2 * 1, rf=4, stride=2, init_scale=np.sqrt(2))) xr = tf.nn.leaky_relu( conv(xr, 'c3r', nf=self.convfeat * 2 * 1, rf=3, stride=1, init_scale=np.sqrt(2))) rgbr = [to2d(xr)] X_r = fc(rgbr[0], 'fc1r', nh=self.rep_size, init_scale=np.sqrt(2)) # Predictor network xrp = tf.nn.leaky_relu( conv(ob_norm, 'c1rp_pred', nf=self.convfeat, rf=8, stride=4, init_scale=np.sqrt(2))) xrp = tf.nn.leaky_relu( conv(xrp, 'c2rp_pred', nf=self.convfeat * 2, rf=4, stride=2, init_scale=np.sqrt(2))) xrp = tf.nn.leaky_relu( conv(xrp, 'c3rp_pred', nf=self.convfeat * 2, rf=3, stride=1, init_scale=np.sqrt(2))) rgbrp = to2d(xrp) X_r_hat = tf.nn.relu( fc(rgbrp, 'fc1r_hat1_pred', nh=256 * self.enlargement, init_scale=np.sqrt(2))) X_r_hat = tf.nn.relu( fc(X_r_hat, 'fc1r_hat2_pred', nh=256 * self.enlargement, init_scale=np.sqrt(2))) X_r_hat = fc(X_r_hat, 'fc1r_hat3_pred', nh=self.rep_size, init_scale=np.sqrt(2)) self.feat_var = tf.reduce_mean(tf.nn.moments(X_r, axes=[0])[1]) self.max_feat = tf.reduce_max(tf.abs(X_r)) self.int_rew = tf.reduce_mean( tf.square(tf.stop_gradient(X_r) - X_r_hat), axis=-1, keep_dims=True) targets = tf.stop_gradient(X_r) # self.aux_loss = tf.reduce_mean(tf.square(noisy_targets-X_r_hat)) self.aux_loss = tf.reduce_mean(tf.square(targets - X_r_hat), -1) mask = tf.random_uniform(shape=tf.shape(self.aux_loss), minval=0., maxval=1., dtype=tf.float32) mask = tf.cast(mask < self.proportion_of_exp_used_for_predictor_update, tf.float32) self.aux_loss = tf.reduce_sum(mask * self.aux_loss) / tf.maximum( tf.reduce_sum(mask), 1.) self._predictor = U.function([ob], [self.int_rew])
def __init__(self, sess, ob_space, ac_space, nenvs, nsteps, units_per_hlayer, reuse=False, activ_fcn='relu6'): # pylint: disable=W0613 # this method is called with nbatch = nenvs*nsteps # nh, nw, nc = ob_space.shape # ob_shape = (nbatch, nh, nw, nc) # actdim = ac_space.shape[0] # Todo check initialization # Input and Output dimensions nd, = ob_space.shape nbatch = nenvs * nsteps ob_shape = (nbatch, nd) nact = ac_space.n X = tf.placeholder(tf.float32, ob_shape, name='Ob') # obs with tf.variable_scope("model", reuse=reuse): if activ_fcn == 'relu6': h1 = tf.nn.relu6(fc(X, 'pi_vf_fc1', nh=units_per_hlayer[0])) # , init_scale=np.sqrt(2))) h2 = tf.nn.relu6(fc(h1, 'pi_vf_fc2', nh=units_per_hlayer[1])) # , init_scale=np.sqrt(2))) elif activ_fcn == 'elu': h1 = tf.nn.elu(fc(X, 'pi_vf_fc1', nh=units_per_hlayer[0])) # , init_scale=np.sqrt(2))) h2 = tf.nn.elu(fc(h1, 'pi_vf_fc2', nh=units_per_hlayer[1])) # , init_scale=np.sqrt(2))) elif activ_fcn == 'mixed': h1 = tf.nn.relu6(fc(X, 'pi_vf_fc1', nh=units_per_hlayer[0])) # , init_scale=np.sqrt(2))) h2 = tf.nn.relu6(fc(h1, 'pi_vf_fc2', nh=units_per_hlayer[1])) # , init_scale=np.sqrt(2))) # The output matrix [nbatch x trace_length, h_units] of layer 2 needs to be reshaped to a vector with # dimensions: [nbatch , trace_length , h_units] for rnn processing. rnn_cell = tf.contrib.rnn.GRUCell(num_units=units_per_hlayer[1]) rnn_input = tf.reshape(h2, shape=[nenvs, nsteps, units_per_hlayer[1]]) rnn_state_in = rnn_cell.zero_state(batch_size=nenvs, dtype=tf.float32) # reset the state in every training iteration rnn_output, rnn_state_out = tf.nn.dynamic_rnn(inputs=rnn_input, cell=rnn_cell, initial_state=rnn_state_in, dtype=tf.float32, scope="model" + '_rnn') # The output of the recurrent cell then needs to be reshaped to the original matrix shape. rnn_output = tf.reshape(rnn_output, shape=[-1, units_per_hlayer[1]]) if activ_fcn == 'relu6': activ = tf.nn.relu6 elif activ_fcn == 'elu': activ = tf.nn.elu elif activ_fcn == 'mixed': activ = tf.nn.tanh h3 = activ(fc(rnn_output, 'pi_fc1', nh=units_per_hlayer[2])) # , init_scale=np.sqrt(2))) pi_logit = fc(h3, 'pi', nact, init_scale=0.01) pi = tf.nn.softmax(pi_logit) vf = fc(rnn_output, 'vf', 1)[:, 0] # predicted value of input state self.pd = CategoricalPd(pi_logit) # pdparam a0 = self.pd.sample() # returns action index: 0,1 # a0 = tf.argmax(pi_logit, axis=1) neglogp0 = self.pd.neglogp(a0) # The rnn state consists of the "cell state" c and the "input vector" x_t = h_{t-1} self.initial_state = np.zeros([nenvs, units_per_hlayer[1]]) def step(ob, r_state, *_args, **_kwargs): a, pi, v, r_state_out, neglogp = sess.run([a0, pi_logit, vf, rnn_state_out, neglogp0], {X: ob, rnn_state_in: r_state}) return a, pi, v, r_state_out, neglogp def value(ob, r_state, *_args, **_kwargs): return sess.run(vf, {X: ob, rnn_state_in: r_state}) self.X = X self.pi = pi self.pi_logit = pi_logit self.vf = vf self.ac = a0 self.rnn_state_in = rnn_state_in self.rnn_state_out = rnn_state_out self.step = step self.value = value
def define_action_balance_rew(self, units, rep_size): logger.info( "Using Action Balance BONUS ****************************************************" ) # (s, a) seen frequency as bonus with tf.variable_scope('action_balance', reuse=tf.AUTO_REUSE): ac_one_hot = tf.one_hot(self.ph_ac, self.ac_space.n, axis=2) assert ac_one_hot.get_shape().ndims == 3 assert ac_one_hot.get_shape().as_list() == [ None, None, self.ac_space.n ], ac_one_hot.get_shape().as_list() ac_one_hot = tf.reshape(ac_one_hot, (-1, self.ac_space.n)) def cond(x): return tf.concat([x, ac_one_hot], 1) # Random target network. for ph in self.ph_ob.values(): if len(ph.shape.as_list()) == 3: # B,T,S logger.info( "Mlp Target: using '%s' shape %s as image input" % (ph.name, str(ph.shape))) xr = ph[:, :-1] xr = tf.cast(xr, tf.float32) xr = tf.reshape(xr, (-1, *ph.shape.as_list()[-1:])) xr = tf.clip_by_value((xr - self.ph_mean) / self.ph_std, -5.0, 5.0) xr = tf.nn.relu( fc(cond(xr), 'fc_sa0_r', nh=units, init_scale=np.sqrt(2))) xr = tf.nn.relu( fc(cond(xr), 'fc_sa1_r', nh=units, init_scale=np.sqrt(2))) X_r = fc(cond(xr), 'fc_sa2_r', nh=rep_size, init_scale=np.sqrt(2)) # Predictor network. for ph in self.ph_ob.values(): if len(ph.shape.as_list()) == 3: # B,T,S logger.info( "Mlp Target: using '%s' shape %s as image input" % (ph.name, str(ph.shape))) xrp = ph[:, :-1] xrp = tf.cast(xrp, tf.float32) xrp = tf.reshape(xrp, (-1, *ph.shape.as_list()[-1:])) xrp = tf.clip_by_value((xrp - self.ph_mean) / self.ph_std, -5.0, 5.0) xrp = tf.nn.relu( fc(cond(xrp), 'fc_sa0_r', nh=units * 2, init_scale=np.sqrt(2))) xrp = tf.nn.relu( fc(cond(xrp), 'fc_sa1_r', nh=units * 2, init_scale=np.sqrt(2))) X_r_hat = fc(cond(xrp), 'fc_sa2_r', nh=rep_size, init_scale=np.sqrt(2)) self.feat_var_ab = tf.reduce_mean(tf.nn.moments(X_r, axes=[0])[1]) self.max_feat_ab = tf.reduce_max(tf.abs(X_r)) self.int_rew_ab = tf.reduce_mean( tf.square(tf.stop_gradient(X_r) - X_r_hat), axis=-1, keep_dims=True) self.int_rew_ab = tf.reshape(self.int_rew_ab, (self.sy_nenvs, self.sy_nsteps - 1)) noisy_targets = tf.stop_gradient(X_r) # self.aux_loss = tf.reduce_mean(tf.square(noisy_targets-X_r_hat)) self.aux_loss_ab = tf.reduce_mean(tf.square(noisy_targets - X_r_hat), -1) mask = tf.random_uniform(shape=tf.shape(self.aux_loss_ab), minval=0., maxval=1., dtype=tf.float32) mask = tf.cast(mask < self.proportion_of_exp_used_for_predictor_update, tf.float32) self.aux_loss_ab = tf.reduce_sum(mask * self.aux_loss_ab) / tf.maximum( tf.reduce_sum(mask), 1.)
def define_dynamics_prediction_rew(self, convfeat, rep_size, enlargement): #Dynamics loss with random features. activ = tf.nn.relu # Random target network. for ph in self.ph_ob.values(): if len(ph.shape.as_list()) == 3: # B,T,S logger.info("Mlp Target: using '%s' shape %s as image input" % (ph.name, str(ph.shape))) xr = ph[:, 1:] # get next status index is 1: xr = tf.cast(xr, tf.float32) xr = tf.reshape(xr, (-1, *ph.shape.as_list()[-1:])) xr = tf.clip_by_value((xr - self.ph_mean) / self.ph_std, -5.0, 5.0) xr = activ(fc(xr, 'fc_0_r', nh=32, init_scale=np.sqrt(2))) xr = activ(fc(xr, 'fc_1_r', nh=32, init_scale=np.sqrt(2))) X_r = fc(xr, 'fc_2_r', nh=rep_size, init_scale=np.sqrt(2)) # Predictor network. ac_one_hot = tf.one_hot(self.ph_ac, self.ac_space.n, axis=2) assert ac_one_hot.get_shape().ndims == 3 assert ac_one_hot.get_shape().as_list() == [ None, None, self.ac_space.n ], ac_one_hot.get_shape().as_list() ac_one_hot = tf.reshape(ac_one_hot, (-1, self.ac_space.n)) def cond(x): return tf.concat([x, ac_one_hot], 1) for ph in self.ph_ob.values(): if len(ph.shape.as_list()) == 3: # B,T,S logger.info("Mlp Target: using '%s' shape %s as image input" % (ph.name, str(ph.shape))) xrp = ph[:, 1:] xrp = tf.cast(xrp, tf.float32) xrp = tf.reshape(xrp, (-1, *ph.shape.as_list()[-1:])) xrp = tf.clip_by_value((xrp - self.ph_mean) / self.ph_std, -5.0, 5.0) xrp = activ(fc(xrp, 'fc_0_pred', nh=32, init_scale=np.sqrt(2))) xrp = activ(fc(xrp, 'fc_1_pred', nh=32, init_scale=np.sqrt(2))) X_r_hat = fc(xrp, 'fc_2r_pred', nh=rep_size, init_scale=np.sqrt(2)) self.feat_var = tf.reduce_mean(tf.nn.moments(X_r, axes=[0])[1]) self.max_feat = tf.reduce_max(tf.abs(X_r)) self.int_rew = tf.reduce_mean( tf.square(tf.stop_gradient(X_r) - X_r_hat), axis=-1, keep_dims=True) self.int_rew = tf.reshape(self.int_rew, (self.sy_nenvs, self.sy_nsteps - 1)) noisy_targets = tf.stop_gradient(X_r) # self.aux_loss = tf.reduce_mean(tf.square(noisy_targets-X_r_hat)) self.aux_loss = tf.reduce_mean(tf.square(noisy_targets - X_r_hat), -1) mask = tf.random_uniform(shape=tf.shape(self.aux_loss), minval=0., maxval=1., dtype=tf.float32) mask = tf.cast(mask < self.proportion_of_exp_used_for_predictor_update, tf.float32) self.aux_loss = tf.reduce_sum(mask * self.aux_loss) / tf.maximum( tf.reduce_sum(mask), 1.)
def apply_policy(ph_ob, ph_new, ph_istate, reuse, scope, hidsize, memsize, extrahid, sy_nenvs, sy_nsteps, pdparamsize, rec_gate_init): data_format = 'NHWC' ph = ph_ob assert len(ph.shape.as_list()) == 5 # B,T,H,W,C logger.info("CnnGruPolicy: using '%s' shape %s as image input" % (ph.name, str(ph.shape))) X = tf.cast(ph, tf.float32) / 255. X = tf.reshape(X, (-1, *ph.shape.as_list()[-3:])) activ = tf.nn.relu yes_gpu = any(get_available_gpus()) with tf.variable_scope( scope, reuse=reuse), tf.device('/gpu:0' if yes_gpu else '/cpu:0'): X = activ( conv(X, 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2), data_format=data_format)) X = activ( conv(X, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2), data_format=data_format)) X = activ( conv(X, 'c3', nf=64, rf=4, stride=1, init_scale=np.sqrt(2), data_format=data_format)) X = to2d(X) X = activ(fc(X, 'fc1', nh=hidsize, init_scale=np.sqrt(2))) X = tf.reshape(X, [sy_nenvs, sy_nsteps, hidsize]) X, snext = tf.nn.dynamic_rnn(GRUCell(memsize, rec_gate_init=rec_gate_init), (X, ph_new[:, :, None]), dtype=tf.float32, time_major=False, initial_state=ph_istate) X = tf.reshape(X, (-1, memsize)) Xtout = X if extrahid: Xtout = X + activ( fc(Xtout, 'fc2val', nh=memsize, init_scale=0.1)) X = X + activ(fc(X, 'fc2act', nh=memsize, init_scale=0.1)) pdparam = fc(X, 'pd', nh=pdparamsize, init_scale=0.01) vpred_int = fc(Xtout, 'vf_int', nh=1, init_scale=0.01) vpred_ext = fc(Xtout, 'vf_ext', nh=1, init_scale=0.01) pdparam = tf.reshape(pdparam, (sy_nenvs, sy_nsteps, pdparamsize)) vpred_int = tf.reshape(vpred_int, (sy_nenvs, sy_nsteps)) vpred_ext = tf.reshape(vpred_ext, (sy_nenvs, sy_nsteps)) return pdparam, vpred_int, vpred_ext, snext
def define_dynamics_prediction_rew(self, convfeat, rep_size, enlargement): #Dynamics based bonus. # Random target network. for ph in self.ph_ob.values(): if len(ph.shape.as_list()) == 5: # B,T,H,W,C logger.info("CnnTarget: using '%s' shape %s as image input" % (ph.name, str(ph.shape))) xr = ph[:, 1:] xr = tf.cast(xr, tf.float32) xr = tf.reshape(xr, (-1, *ph.shape.as_list()[-3:]))[:, :, :, -1:] xr = tf.clip_by_value((xr - self.ph_mean) / self.ph_std, -5.0, 5.0) xr = tf.nn.leaky_relu( conv(xr, 'c1r', nf=convfeat * 1, rf=8, stride=4, init_scale=np.sqrt(2))) xr = tf.nn.leaky_relu( conv(xr, 'c2r', nf=convfeat * 2 * 1, rf=4, stride=2, init_scale=np.sqrt(2))) xr = tf.nn.leaky_relu( conv(xr, 'c3r', nf=convfeat * 2 * 1, rf=3, stride=1, init_scale=np.sqrt(2))) rgbr = [to2d(xr)] X_r = fc(rgbr[0], 'fc1r', nh=rep_size, init_scale=np.sqrt(2)) # Predictor network. ac_one_hot = tf.one_hot(self.ph_ac, self.ac_space.n, axis=2) assert ac_one_hot.get_shape().ndims == 3 assert ac_one_hot.get_shape().as_list() == [ None, None, self.ac_space.n ], ac_one_hot.get_shape().as_list() ac_one_hot = tf.reshape(ac_one_hot, (-1, self.ac_space.n)) def cond(x): return tf.concat([x, ac_one_hot], 1) for ph in self.ph_ob.values(): if len(ph.shape.as_list()) == 5: # B,T,H,W,C logger.info("CnnTarget: using '%s' shape %s as image input" % (ph.name, str(ph.shape))) xrp = ph[:, :-1] xrp = tf.cast(xrp, tf.float32) xrp = tf.reshape(xrp, (-1, *ph.shape.as_list()[-3:])) # ph_mean, ph_std are 84x84x1, so we subtract the average of the last channel from all channels. Is this ok? xrp = tf.clip_by_value((xrp - self.ph_mean) / self.ph_std, -5.0, 5.0) xrp = tf.nn.leaky_relu( conv(xrp, 'c1rp_pred', nf=convfeat, rf=8, stride=4, init_scale=np.sqrt(2))) xrp = tf.nn.leaky_relu( conv(xrp, 'c2rp_pred', nf=convfeat * 2, rf=4, stride=2, init_scale=np.sqrt(2))) xrp = tf.nn.leaky_relu( conv(xrp, 'c3rp_pred', nf=convfeat * 2, rf=3, stride=1, init_scale=np.sqrt(2))) rgbrp = to2d(xrp) # X_r_hat = tf.nn.relu(fc(rgb[0], 'fc1r_hat1', nh=256 * enlargement, init_scale=np.sqrt(2))) X_r_hat = tf.nn.relu( fc(cond(rgbrp), 'fc1r_hat1_pred', nh=256 * enlargement, init_scale=np.sqrt(2))) X_r_hat = tf.nn.relu( fc(cond(X_r_hat), 'fc1r_hat2_pred', nh=256 * enlargement, init_scale=np.sqrt(2))) X_r_hat = fc(cond(X_r_hat), 'fc1r_hat3_pred', nh=rep_size, init_scale=np.sqrt(2)) self.feat_var = tf.reduce_mean(tf.nn.moments(X_r, axes=[0])[1]) self.max_feat = tf.reduce_max(tf.abs(X_r)) self.int_rew = tf.reduce_mean( tf.square(tf.stop_gradient(X_r) - X_r_hat), axis=-1, keep_dims=True) self.int_rew = tf.reshape(self.int_rew, (self.sy_nenvs, self.sy_nsteps - 1)) noisy_targets = tf.stop_gradient(X_r) self.aux_loss = tf.reduce_mean(tf.square(noisy_targets - X_r_hat), -1) mask = tf.random_uniform(shape=tf.shape(self.aux_loss), minval=0., maxval=1., dtype=tf.float32) mask = tf.cast(mask < self.proportion_of_exp_used_for_predictor_update, tf.float32) self.aux_loss = tf.reduce_sum(mask * self.aux_loss) / tf.maximum( tf.reduce_sum(mask), 1.)
def get_pdparam(self, x): pdparam = fc(x, name='pd', units=self.pdparamsize, activation=None) vpred = fc(x, name='value_function_output', units=1, activation=None) return pdparam, vpred
def __init__(self, config, input_, is_training=True): batch_size = input_.batch_size num_steps = input_.num_steps # for truncated backprop. lstm_width = config.hidden_size # size of hidden units in_shape = input_.input_data.get_shape() initializer = tf.random_uniform_initializer(-0.08, 0.08, dtype=tf.float32) enc_cell = utils.LSTM(size=lstm_width, init=initializer) dec_cell = utils.LSTM(size=lstm_width, init=initializer) # Encoder # ( fc > elu > lstm ) enc_state = enc_cell.zero_state(batch_size, tf.float32) enc_states = [] with tf.variable_scope("enc"): for i in range(num_steps): if i > 0: tf.get_variable_scope().reuse_variables() enc_inputs = input_.input_data[:, i, :] # 2d -> lstm width enc_cell_in = utils.fc(enc_inputs, enc_inputs.get_shape()[-1], lstm_width, init_w=initializer, a_fn=tf.nn.elu) (enc_cell_out, enc_state) = enc_cell(enc_cell_in, enc_state) enc_states.append(enc_state) # for test # self.enc_final_state = enc_state # Decoder # ( fc > elu > lstm > v^t tanh(W1 e + W2 d) > softmax > argmax ) dec_state = enc_states[-1] dec_inputs = tf.constant(0.0, shape=[batch_size, 2], dtype=tf.float32) # start symbol self.C_prob = [] self.C_idx = [] with tf.variable_scope("dec"): for i in range(num_steps): if i > 0: tf.get_variable_scope().reuse_variables() dec_cell_in = utils.fc(dec_inputs, dec_inputs.get_shape()[-1], lstm_width, init_w=initializer, a_fn=tf.nn.elu) (dec_cell_out, dec_state) = dec_cell(dec_cell_in, dec_state) # W1, W2 are square matrixes (SxS) # where S is the size of hidden states W1 = tf.get_variable("W1", [lstm_width, lstm_width], dtype=tf.float32, initializer=initializer) W2 = tf.get_variable("W2", [lstm_width, lstm_width], dtype=tf.float32, initializer=initializer) # v is a vector (S) v = tf.get_variable("v", [lstm_width], dtype=tf.float32, initializer=initializer) # W2 (SxS) d_i (S) = W2d (S) W2d = tf.matmul(dec_state.h, W2) # u_i (n) u_i = [] for j in range(num_steps): # W1 (SxS) e_j (S) = W1e (S) # t = tanh(W1e + W2d) (S) t = tf.tanh(tf.matmul(enc_states[j].h, W1) + W2d) # v^T (S) t (S) = U_ij (1) u_ij = tf.reduce_sum(v * t, axis=1) # cuz t is acutually BxS u_i.append(u_ij) u_i = tf.stack(u_i, axis=1) # asarray probs = tf.nn.softmax(u_i) C_i = tf.reshape(tf.cast(tf.argmax(probs, axis=1), tf.int32), shape=[batch_size, 1]) self.C_idx.append(C_i) first = tf.expand_dims(tf.range(batch_size), axis=1) dec_inputs = tf.gather_nd( input_.input_data, tf.concat(values=[first, C_i], axis=1)) self.C_prob.append(probs) self.C_prob = tf.squeeze(tf.stack(self.C_prob, axis=1)) self.C_idx = tf.squeeze(tf.stack(self.C_idx, axis=1)) targets = tf.one_hot(input_.targets, depth=51) self.loss = tf.nn.l2_loss(targets - self.C_prob) opt = tf.train.AdadeltaOptimizer(learning_rate=0.001, rho=0.95, epsilon=1e-6) self.train_op = opt.minimize(self.loss)
def define_rew_discriminator_v2(self, convfeat, rep_size, use_rew=False): output_shape = [self.sy_nenvs * (self.sy_nsteps - 1)] sample_prob = tf.reshape(self.sample_agent_prob, tf.stack(output_shape)) game_score = tf.reshape( self.game_score, tf.stack([self.sy_nenvs * (self.sy_nsteps - 1), 1])) rew_agent_label = tf.reshape( self.rew_agent_label, tf.stack([self.sy_nenvs * (self.sy_nsteps - 1), 1])) #rew_agent_label = tf.one_hot(self.rew_agent_label, self.num_agents, axis=-1) #rew_agent_label = tf.reshape(rew_agent_label,(-1,self.num_agents )) for ph in self.ph_ob.values(): if len(ph.shape.as_list()) == 5: # B,T,H,W,C phi = ph[:, 1:] phi = tf.cast(phi, tf.float32) phi = tf.reshape(phi, (-1, *ph.shape.as_list()[-3:]))[:, :, :, -1:] phi = phi / 255. last_rew_ob = self.last_rew_ob last_rew_ob = tf.cast(last_rew_ob, tf.float32) last_rew_ob = tf.reshape( last_rew_ob, (-1, *last_rew_ob.shape.as_list()[-3:]))[:, :, :, -1:] last_rew_ob = last_rew_ob / 255. if use_rew: phi = tf.concat([phi, last_rew_ob], axis=-1) phi = tf.nn.leaky_relu( conv(phi, 'c1r', nf=convfeat * 1, rf=8, stride=4, init_scale=np.sqrt(2))) #[20,20] [8,8] phi = tf.nn.leaky_relu( conv(phi, 'c2r', nf=convfeat * 2 * 1, rf=4, stride=2, init_scale=np.sqrt(2))) #[9,9] [7,7] phi = tf.nn.leaky_relu( conv(phi, 'c3r', nf=convfeat * 2 * 1, rf=3, stride=1, init_scale=np.sqrt(2))) phi = to2d(phi) phi = tf.nn.relu( fc(phi, 'fc1r', nh=rep_size, init_scale=np.sqrt(2))) phi = tf.nn.relu( fc(phi, 'fc2r', nh=rep_size, init_scale=np.sqrt(2))) disc_logits = fc(phi, 'fc3r', nh=self.num_agents, init_scale=np.sqrt(2)) one_hot_gidx = tf.one_hot(self.ph_agent_idx, self.num_agents, axis=-1) one_hot_gidx = tf.reshape(one_hot_gidx, (-1, self.num_agents)) flatten_all_div_prob = tf.nn.softmax(disc_logits, axis=-1) all_div_prob = tf.reshape( flatten_all_div_prob, (self.sy_nenvs, self.sy_nsteps - 1, self.num_agents)) sp_prob = tf.reduce_sum(one_hot_gidx * flatten_all_div_prob, axis=1) sp_prob = tf.reshape(sp_prob, (self.sy_nenvs, self.sy_nsteps - 1)) div_rew = -1 * tf.nn.softmax_cross_entropy_with_logits_v2( logits=disc_logits, labels=one_hot_gidx) base_rew = tf.log(0.01) div_rew = div_rew - tf.log(sample_prob) div_rew = tf.reshape(div_rew, (self.sy_nenvs, self.sy_nsteps - 1)) disc_pdtype = CategoricalPdType(self.num_agents) disc_pd = disc_pdtype.pdfromflat(disc_logits) disc_nlp = disc_pd.neglogp(rew_agent_label) return disc_logits, all_div_prob, sp_prob, div_rew, disc_pd, disc_nlp
def __init__(self, ob_space, ac_space, hidsize, ob_mean, ob_std, feat_dim, layernormalize, nl, n_env, n_steps, reuse, n_lstm=256, scope="policy"): super(ErrorPredRnnPolicy, self).__init__(ob_space, ac_space, hidsize, ob_mean, ob_std, feat_dim, layernormalize, nl, n_env, n_steps, reuse, n_lstm, scope) with tf.variable_scope(scope): self.flat_masks_ph = tf.reshape(self.masks_ph, [self.n_env * self.n_steps]) self.pred_error = tf.placeholder( dtype=tf.float32, shape=(self.n_env, self.n_steps, self.hidsize), name='pred_error') # prediction error self.flat_pred_error = flatten_two_dims(self.pred_error) self.obs_pred = tf.placeholder(dtype=tf.float32, shape=(self.n_env, self.n_steps, self.hidsize), name='obs_pred') self.flat_obs_pred = flatten_two_dims(self.obs_pred) with tf.variable_scope(scope, reuse=self.reuse): x = tf.concat([ self.flat_features, self.flat_obs_pred, self.flat_pred_error ], axis=1) input_sequence = batch_to_seq(x, self.n_env, self.n_steps) masks = batch_to_seq(self.masks_ph, self.n_env, self.n_steps) rnn_output, self.snew = lstm(input_sequence, masks, self.states_ph, 'lstm1', n_hidden=n_lstm, layer_norm=False) rnn_output = seq_to_batch(rnn_output) rnn_output = layernorm(rnn_output) ## Concat q = self.flat_features q = tf.concat([q, rnn_output], axis=1) q = fc(q, units=hidsize, activation=activ, name="fc1") q = fc(q, units=hidsize, activation=activ, name="fc2") pdparam, vpred = self.get_pdparam(q) self.pdparam = pdparam = unflatten_first_dim(pdparam, self.sh) self.vpred = unflatten_first_dim(vpred, self.sh)[:, :, 0] self.pd = pd = self.ac_pdtype.proba_distribution_from_flat(pdparam) self.a_samp = pd.sample() self.entropy = pd.entropy() self.nlp_samp = pd.neglogp(self.a_samp)
def __init__(self, input_states, taken_actions, num_actions,scope_name, shared_network = True, layer_norm = True): """ env: RL environment input_states [batch_size, obs_size]: Input state vectors to predict actions for taken_actions [batch_size, 1]: Actions taken by the old policy (used for training) num_actions (int): Number of discrete actions scope_name (string): scope name (i.e. policy or policy_old) shared_network (bool): Whether Actor and critic share part of network layer_norm(bool): perform layer_norm """ with tf.variable_scope(scope_name): # construct mlp networks self.policy_latent = mlp(num_layers = 2, num_hidden = 128, activation = tf.nn.relu, layer_norm = layer_norm)(input_states) ''' layer = tf.layers.flatten(input_states) for i in range(2): layer = tf.layers.dense(layer, 128, activation = None, kernel_initializer = ortho_init(np.sqrt(2.0)), bias_initializer = tf.constant_initializer(0.0), name = "mlp_fc{}".format(i)) if layer_norm: layer = tf.contrib.layers.layer_norm(layer, center = True, scale = True) layer = tf.nn.relu(layer) self.policy_latent = layer ''' if shared_network: self.value_latent = self.policy_latent else: self.value_latent = mlp(num_layers = 2, num_hidden =128, activation = tf.nn.relu, layer_norm = layer_norm)(input_states) ''' v_layer = tf.layers.flatten(input_states) for i in range(2): v_layer = tf.layers.dense(v_layer, 128, activation = None, kernel_initializer = ortho_init(np.sqrt(2.0)), bias_initializer = tf.constant_initializer(0.0),name = "mlp_fc{}".format(i)) if layer_norm: v_layer = tf.contrib.layers.layer_norm(v_layer, center = True, scale = True) v_layer = tf.nn.relu(v_layer) self.value_latent = v_layer ''' # Additional Flatten Layers(may be useless) self.value_latent = tf.layers.flatten(self.value_latent) self.policy_latent = tf.layers.flatten(self.policy_latent) # ============================ Policy Branch Pi(a_t | s_t; theta) # create graph for sampling actions # latent_vector (Batch_Size, 128) --> fc --> pdparams (Batch_Size, self.ncat) --> softmax --> logits (Batch_Size, self.ncat) (probability of each action) self.pdtype = CategoricalPdType(num_actions) self.pd, self.pi = self.pdtype.pdfromlatent(self.policy_latent, init_scale = 0.01) # Take an action from policy's distribution self.action = self.pd.sample() # ============================ Value Branch V(s_t; theta) # Note fc has no activation # Shape: [Batch_Size, 1] self.value = fc(self.value_latent,'v',1) #self.value = tf.layers.dense(self.value_latent, 1, activation = None, kernel_initializer = ortho_init(np.sqrt(2.0)), bias_initializer = tf.constant_initializer(0.0),name = 'v') # Shape: [Batch_Size] self.value = self.value[:,0] # check numericals self.pi = tf.check_numerics(self.pi, "Invalid value for self.pi") self.value = tf.check_numerics(self.value, "Invalid value for self.value")
def define_bottleneck_rew(self, convfeat, rep_size, enlargement, beta=1e-2, rew_counter=None): logger.info( "Using Curiosity Bottleneck ****************************************************" ) v_target = tf.reshape(self.ph_ret_ext, (-1, 1)) if rew_counter is None: sched_coef = 1. else: sched_coef = tf.minimum(rew_counter / 1000, 1.) # Random target network. for ph in self.ph_ob.values(): if len(ph.shape.as_list()) == 5: # B,T,H,W,C logger.info("CnnTarget: using '%s' shape %s as image input" % (ph.name, str(ph.shape))) xr = ph[:, 1:] xr = tf.cast(xr, tf.float32) xr = tf.reshape(xr, (-1, *ph.shape.as_list()[-3:]))[:, :, :, -1:] xr = tf.clip_by_value((xr - self.ph_mean) / self.ph_std, -5.0, 5.0) xr = tf.nn.leaky_relu( conv(xr, 'c1r', nf=convfeat * 1, rf=8, stride=4, init_scale=np.sqrt(2))) xr = tf.nn.leaky_relu( conv(xr, 'c2r', nf=convfeat * 2 * 1, rf=4, stride=2, init_scale=np.sqrt(2))) xr = tf.nn.leaky_relu( conv(xr, 'c3r', nf=convfeat * 2 * 1, rf=3, stride=1, init_scale=np.sqrt(2))) rgbr = [to2d(xr)] mu = fc(rgbr[0], 'fc_mu', nh=rep_size, init_scale=np.sqrt(2)) sigma = tf.nn.softplus( fc(rgbr[0], 'fc_sigma', nh=rep_size, init_scale=np.sqrt(2))) z = mu + sigma * tf.random_normal( tf.shape(mu), 0, 1, dtype=tf.float32) v = fc(z, 'value', nh=1, init_scale=np.sqrt(2)) self.feat_var = tf.reduce_mean(sigma) self.max_feat = tf.reduce_max(tf.abs(z)) self.kl = 0.5 * tf.reduce_sum(tf.square(mu) + tf.square(sigma) - tf.log(1e-8 + tf.square(sigma)) - 1, axis=-1, keep_dims=True) self.int_rew = tf.stop_gradient(self.kl) self.int_rew = tf.reshape(self.int_rew, (self.sy_nenvs, self.sy_nsteps - 1)) self.aux_loss = sched_coef * tf.square(v_target - v) + beta * self.kl mask = tf.random_uniform(shape=tf.shape(self.aux_loss), minval=0., maxval=1., dtype=tf.float32) mask = tf.cast(mask < self.proportion_of_exp_used_for_predictor_update, tf.float32) self.aux_loss = tf.reduce_sum(mask * self.aux_loss) / tf.maximum( tf.reduce_sum(mask), 1.)
def __init__(self, sess, ob_space, loc_space, ac_space, nbatch, nsteps, max_timesteps, reuse=False, seed=0): nenv = nbatch // nsteps self.pdtype = make_pdtype(ac_space) with tf.variable_scope("model", reuse=reuse): G = tf.placeholder(tf.float32, [nbatch, max_timesteps, loc_space]) X = tf.placeholder(tf.float32, (nbatch, )+ob_space.shape) Y = tf.placeholder(tf.float32, [nbatch, loc_space]) M = tf.placeholder(tf.float32, [nbatch]) S = tf.placeholder(tf.float32, [nenv, 128]) ys = batch_to_seq(Y, nenv, nsteps) ms = batch_to_seq(M, nenv, nsteps) tf.set_random_seed(seed) self.embed_W = tf.get_variable("embed_w", [loc_space, 64], initializer=ortho_init(1.0, seed)) self.embed_b = tf.get_variable("embed_b", [64,]) self.wa = tf.get_variable("wa", [128, 128], initializer=ortho_init(1.0, seed)) self.wb = tf.get_variable("wb", [128,]) self.ua = tf.get_variable("ua", [128, 128], initializer=ortho_init(1.0, seed)) self.ub = tf.get_variable("ub", [128,]) self.va = tf.get_variable("va", [128]) self.rnn = tf.nn.rnn_cell.GRUCell(128, kernel_initializer=ortho_init(1.0, seed)) enc_hidden = tf.zeros((nbatch, 128)) embed_G = tf.matmul(tf.reshape(G, (-1, loc_space)),self.embed_W)+self.embed_b embed_G = tf.reshape(embed_G, (nbatch, max_timesteps, -1)) enc_output, _ = tf.nn.dynamic_rnn(cell=self.rnn, inputs=embed_G, dtype=tf.float32) gs = batch_to_seq(enc_output, nenv, nsteps) dec_hidden = S h = [] for idx, (y, m, g) in enumerate(zip(ys, ms, gs)): dec_hidden = dec_hidden*(1-m) embed_y = tf.matmul(y,self.embed_W)+self.embed_b dec_output, dec_hidden = tf.nn.dynamic_rnn(cell=self.rnn, inputs=tf.expand_dims(embed_y,axis=1), initial_state=dec_hidden) tmp = tf.reshape(tf.matmul(tf.reshape(g, (-1, 128)), self.ua)+self.ub,(nenv, max_timesteps, 128)) tmp = tf.tanh(tf.expand_dims(tf.matmul(dec_hidden, self.wa)+self.wb,axis=1) + tmp) score = tf.reduce_sum(tmp*tf.expand_dims(tf.expand_dims(self.va, axis=0), axis=1), axis=2, keepdims=True) attention_weights = tf.nn.softmax(score, axis=1) context_vector = attention_weights * g context_vector = tf.reduce_sum(context_vector, axis=1) x = tf.concat([context_vector, dec_hidden], axis=-1) h.append(x) h = seq_to_batch(h) vf = fc(h, 'v', 1, seed=seed)[:,0] self.pd, self.pi = self.pdtype.pdfromlatent(h, seed=seed, init_scale=0.01) a0 = self.pd.sample() neglogp0 = self.pd.neglogp(a0) self.initial_state = np.zeros((nenv,128)) def step(ob, loc, goal, state, mask): a, v, state, neglogp = sess.run([a0, vf, dec_hidden, neglogp0], {X:ob, Y:loc, G:goal, M:mask, S:state}) return a, v, state, neglogp def value(ob, loc, goal, state, mask): return sess.run(vf, {X:ob, Y:loc, G:goal, M:mask, S:state}) self.G = G self.X = X self.Y = Y self.S = S self.M = M self.vf = vf self.step = step self.value = value
def define_self_prediction_rew(self, width, rep_size, enlargement): # RND. # Random target network. for ph in self.ph_ob.values(): if len(ph.shape.as_list()) == 3: # B, Envs, Features logger.info( f"FFNNTarget: using '{ph.name}' shape {ph.shape} as image input" ) xr = ph[:, 1:] xr = tf.cast(xr, tf.float32) xr = tf.reshape(xr, (-1, *ph.shape.as_list()[-3:]))[:, :, :, -1:] xr = tf.clip_by_value((xr - self.ph_mean) / self.ph_std, -5.0, 5.0) xr = tf.nn.leaky_relu( fc( xr, "fc1r", nh=width * 1, init_scale=np.sqrt(2), )) xr = tf.nn.leaky_relu( fc( xr, "fc2r", nh=width * 2 * 1, init_scale=np.sqrt(2), )) xr = tf.nn.leaky_relu( fc( xr, "fc3r", nh=width * 2 * 1, init_scale=np.sqrt(2), )) rgbr = [to2d(xr)] X_r = fc(rgbr[0], "fc4r", nh=rep_size, init_scale=np.sqrt(2)) # Predictor network. for ph in self.ph_ob.values(): if len(ph.shape.as_list()) == 3: # B,Envs,Features logger.info( f"FFNNTarget: using '{ph.name}' shape {ph.shape} as image input" ) xrp = ph[:, 1:] xrp = tf.cast(xrp, tf.float32) xrp = tf.reshape(xrp, (-1, *ph.shape.as_list()[-3:]))[:, :, :, -1:] xrp = tf.clip_by_value((xrp - self.ph_mean) / self.ph_std, -5.0, 5.0) xrp = tf.nn.leaky_relu( fc( xrp, "fc1rp_pred", nh=width, init_scale=np.sqrt(2), )) xrp = tf.nn.leaky_relu( fc( xrp, "fc2rp_pred", nh=width * 2, init_scale=np.sqrt(2), )) xrp = tf.nn.leaky_relu( fc( xrp, "fc3rp_pred", nh=width * 2, init_scale=np.sqrt(2), )) rgbrp = to2d(xrp) X_r_hat = tf.nn.relu( fc( rgbrp, "fc1r_hat1_pred", nh=256 * enlargement, init_scale=np.sqrt(2), )) X_r_hat = tf.nn.relu( fc( X_r_hat, "fc1r_hat2_pred", nh=256 * enlargement, init_scale=np.sqrt(2), )) X_r_hat = fc( X_r_hat, "fc1r_hat3_pred", nh=rep_size, init_scale=np.sqrt(2), ) self.feat_var = tf.reduce_mean(tf.nn.moments(X_r, axes=[0])[1]) self.max_feat = tf.reduce_max(tf.abs(X_r)) self.int_rew = tf.reduce_mean( tf.square(tf.stop_gradient(X_r) - X_r_hat), axis=-1, keep_dims=True, ) self.int_rew = tf.reshape( self.int_rew, (self.sy_nenvs, self.sy_nsteps - 1), ) noisy_targets = tf.stop_gradient(X_r) self.aux_loss = tf.reduce_mean(tf.square(noisy_targets - X_r_hat), -1) mask = tf.random_uniform( shape=tf.shape(self.aux_loss), minval=0.0, maxval=1.0, dtype=tf.float32, ) mask = tf.cast(mask < self.proportion_of_exp_used_for_predictor_update, tf.float32) self.aux_loss = tf.reduce_sum(mask * self.aux_loss) / tf.maximum( tf.reduce_sum(mask), 1.0)
def get_pdparam(self, features, reuse): with tf.variable_scope(self.scope, reuse=False): x = fc(features, units=self.hidsize, activation=activ, reuse=True) x = fc(x, units=self.hidsize, activation=activ, reuse=True) pdparam = fc(x, name='pd', units=self.pdparamsize, activation=None, reuse=reuse) return pdparam
Acc = [] max_epoch = 20 mini_batch = 100 for epoch_num in range(max_epoch): idxs = np.random.permutation(train_size) for k in range(math.ceil(train_size / mini_batch)): start_idx = k * mini_batch end_idx = min((k + 1) * mini_batch, train_size) a, z, delta = {}, {}, {} batch_indices = idxs[start_idx:end_idx] a[1] = X_train[:, batch_indices] y = trainLabels[:, batch_indices] for l in range(1, L): a[l + 1], z[l + 1] = fc(w[l], a[l]) delta[L] = (a[L] - y) * (a[L] * (1 - a[L])) print(delta[L]) for l in range(L - 1, 1, -1): delta[l] = bc(w[l], z[l], delta[l + 1]) for l in range(1, L): grad_w = np.dot(delta[l + 1], a[l].T) w[l] = w[l] - alpha * grad_w J.append(cost(a[L], y) / mini_batch) Acc.append(accuracy(a[L], y)) a[1] = X_test
def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, reuse=True): #pylint: disable=W0613 ob_shape = (nbatch, ) + ob_space.shape actdim = ac_space.shape[0] window_length = ob_space.shape[1] - 1 X = tf.placeholder(tf.float32, ob_shape, name='Ob') #obs # with tf.variable_scope("model", reuse=reuse) as scope: # # policy # w0 = tf.slice(X, [0,0,0,0],[-1,-1,1,1], name='pi_sl0') # x = tf.slice(X, [0,0,1,0],[-1,-1,-1,-1], name='pi_sl1') # x = conv(tf.cast(x, tf.float32),'c1', fh=1,fw=4,nf=3, stride=1, init_scale=np.sqrt(2)) # # x = tf.layers.conv2d( # # inputs=x, # # filters=3, # # kernel_size=[1, 4], # # padding="valid", # # activation=tf.nn.relu) # #(1, 3, 47, 3) # x = conv(x, 'c2', fh=1, fw=window_length -3, nf=20, stride= window_length -3, init_scale=np.sqrt(2)) # # x = tf.layers.conv2d( # # inputs=x, # # filters=20, # # kernel_size=[1, window_length -3], # # padding="valid", # # strides=(1, window_length -3), # # activation=tf.nn.relu) # x = tf.concat([x, w0], 3) # x = conv(x, 'c3', fh=1, fw=1, nf=1, stride= 1, init_scale=np.sqrt(2)) # # x = tf.layers.conv2d( # # inputs=x, # # filters=1, # # kernel_size=[1, 1], # # padding="valid", # # strides=(1, 1), # # activation=tf.nn.relu) # cash_bias = tf.zeros([x.shape[0],1,1,1], tf.float32) # c = tf.concat([cash_bias, x], 1) # v = conv_to_fc(x) # # vf = fc(v, 'v',1)[:,0] # f = tf.contrib.layers.flatten(c) # eps = 10e20 # f = tf.clip_by_value(f, -eps, eps, 'clip1') # # f = tf.Print(f, [f], "concatenate") # pi = tf.nn.softmax(f) # # pi = tf.Print(pi,[pi], 'pi ') # # f = tf.nn.relu(f) # vf = fc(v, 'v',1, act=tf.nn.relu)[:,0] # # vf = tf.add(tf.ones(v.shape), v) # # vf = fc(v, 'v',1)[:,0] # # vf = tf.add(vf, tf.ones(vf.shape, tf.float32)) # logstd = tf.get_variable(name="logstd", shape=[1, actdim], # initializer=tf.zeros_initializer()) # eps = 80 # logstd = tf.clip_by_value(logstd, -eps, eps, 'clip_logstd') # # logstd = tf.Print(logstd,[logstd], 'logstd ') with tf.variable_scope("model", reuse=reuse) as scope: w0 = tf.slice(X, [0, 0, 0, 0], [-1, -1, 1, 1]) x = tf.slice(X, [0, 0, 1, 0], [-1, -1, -1, -1]) # reuse when testing x = conv(tf.cast(x, tf.float32), 'c1', fh=1, fw=3, nf=3, stride=1, init_scale=np.sqrt(2)) x = conv(x, 'c2', fh=1, fw=window_length - 2, nf=20, stride=window_length - 2, init_scale=np.sqrt(2)) x = tf.concat([x, w0], 3) x = conv(x, 'c3', fh=1, fw=1, nf=1, stride=1, init_scale=np.sqrt(2)) cash_bias = tf.ones([x.shape[0], 1, 1, 1], tf.float32) c = tf.concat([cash_bias, x], 1) v = conv_to_fc(x) vf = fc(v, 'v', 1)[:, 0] f = tf.contrib.layers.flatten(c) pi = tf.nn.softmax(f) logstd = tf.get_variable( name="logstd", shape=[1, actdim], initializer=tf.truncated_normal_initializer()) # logstd = tf.Print(logstd,[logstd], 'logstd ') eps = 50 # logstd = tf.clip_by_value(logstd, -eps, eps, 'clip_logstd') pdparam = tf.concat([pi, pi * 0.0 + logstd], axis=1) self.pdtype = make_pdtype(ac_space) self.pd = self.pdtype.pdfromflat(pdparam) a0 = self.pd.sample() # a0 = tf.clip_by_value(a0, -eps, eps, 'clip2') a0 = tf.nn.softmax(a0) neglogp0 = self.pd.neglogp(a0) self.initial_state = None def step(ob, *_args, **_kwargs): a, v, neglogp, lst, p = sess.run([a0, vf, neglogp0, logstd, pi], {X: ob}) # print ("logstd: "+ str(lst[0])) # print ("action: " + str(a)) # print ("value: {}".format(v)) # print ("neglogp: "+ str(neglogp)) # print ("f:{}".format(f)) return a, v, self.initial_state, neglogp, lst[0], p def value(ob, *_args, **_kwargs): v = sess.run(vf, {X: ob}) # print ("vf: " + str(v)) return v self.X = X self.pi = pi self.vf = vf self.step = step self.value = value
def define_self_prediction_rew(self, convfeat, rep_size, enlargement): #RND. # Random target network. for ph in self.ph_ob.values(): if len(ph.shape.as_list()) == 5: # B,T,H,W,C logger.info("CnnTarget: using '%s' shape %s as image input" % (ph.name, str(ph.shape))) xr = ph[:, 1:] xr = tf.cast(xr, tf.float32) xr = tf.reshape(xr, (-1, *ph.shape.as_list()[-3:]))[:, :, :, -1:] xr = tf.clip_by_value((xr - self.ph_mean) / self.ph_std, -5.0, 5.0) xr = tf.nn.leaky_relu( conv(xr, 'c1r', nf=convfeat * 1, rf=8, stride=4, init_scale=np.sqrt(2))) xr = tf.nn.leaky_relu( conv(xr, 'c2r', nf=convfeat * 2 * 1, rf=4, stride=2, init_scale=np.sqrt(2))) xr = tf.nn.leaky_relu( conv(xr, 'c3r', nf=convfeat * 2 * 1, rf=3, stride=1, init_scale=np.sqrt(2))) rgbr = [to2d(xr)] X_r = fc(rgbr[0], 'fc1r', nh=rep_size, init_scale=np.sqrt(2)) # Predictor network. for ph in self.ph_ob.values(): if len(ph.shape.as_list()) == 5: # B,T,H,W,C logger.info("CnnTarget: using '%s' shape %s as image input" % (ph.name, str(ph.shape))) xrp = ph[:, 1:] xrp = tf.cast(xrp, tf.float32) xrp = tf.reshape(xrp, (-1, *ph.shape.as_list()[-3:]))[:, :, :, -1:] xrp = tf.clip_by_value((xrp - self.ph_mean) / self.ph_std, -5.0, 5.0) xrp = tf.nn.leaky_relu( conv(xrp, 'c1rp_pred', nf=convfeat, rf=8, stride=4, init_scale=np.sqrt(2))) xrp = tf.nn.leaky_relu( conv(xrp, 'c2rp_pred', nf=convfeat * 2, rf=4, stride=2, init_scale=np.sqrt(2))) xrp = tf.nn.leaky_relu( conv(xrp, 'c3rp_pred', nf=convfeat * 2, rf=3, stride=1, init_scale=np.sqrt(2))) rgbrp = to2d(xrp) X_r_hat = tf.nn.relu( fc(rgbrp, 'fc1r_hat1_pred', nh=256 * enlargement, init_scale=np.sqrt(2))) X_r_hat = tf.nn.relu( fc(X_r_hat, 'fc1r_hat2_pred', nh=256 * enlargement, init_scale=np.sqrt(2))) X_r_hat = fc(X_r_hat, 'fc1r_hat3_pred', nh=rep_size, init_scale=np.sqrt(2)) self.feat_var = tf.reduce_mean(tf.nn.moments(X_r, axes=[0])[1]) self.max_feat = tf.reduce_max(tf.abs(X_r)) self.int_rew = tf.reduce_mean( tf.square(tf.stop_gradient(X_r) - X_r_hat), axis=-1, keep_dims=True) self.int_rew = tf.reshape(self.int_rew, (self.sy_nenvs, self.sy_nsteps - 1)) noisy_targets = tf.stop_gradient(X_r) self.aux_loss = tf.reduce_mean(tf.square(noisy_targets - X_r_hat), -1) mask = tf.random_uniform(shape=tf.shape(self.aux_loss), minval=0., maxval=1., dtype=tf.float32) mask = tf.cast(mask < self.proportion_of_exp_used_for_predictor_update, tf.float32) self.aux_loss = tf.reduce_sum(mask * self.aux_loss) / tf.maximum( tf.reduce_sum(mask), 1.)
def __init__(self, ob_space, ac_space, hidsize, ob_mean, ob_std, feat_dim, layernormalize, nl, scope="policy"): if layernormalize: print( "Warning: policy is operating on top of layer-normed features. It might slow down the training." ) self.layernormalize = layernormalize self.nl = nl self.ob_mean = ob_mean self.ob_std = ob_std with tf.variable_scope(scope): self.ob_space = ob_space self.ac_space = ac_space self.ac_pdtype = make_pdtype(ac_space) self.ph_ob = tf.placeholder(dtype=tf.int32, shape=(None, None) + ob_space.shape, name='ob') self.ph_ac = self.ac_pdtype.sample_placeholder([None, None], name='ac') self.pd = self.vpred = None self.hidsize = hidsize self.feat_dim = feat_dim self.scope = scope pdparamsize = self.ac_pdtype.param_shape()[0] print('ob_mean shape: ', ob_mean.shape) sh = tf.shape(self.ph_ob) x = flatten_two_dims(self.ph_ob) x = tf.cast(x, dtype=tf.float32) l = [] for i in range(4): r = tf.multiply(x[:, :, :, i * 3], 0.299) g = tf.multiply(x[:, :, :, i * 3 + 1], 0.587) b = tf.multiply(x[:, :, :, i * 3 + 2], 0.114) gray = r + g + b l.append(gray) x = tf.stack(l, axis=-1) x = tf.cast(x, dtype=tf.int32) l = [] for i in range(4): r = ob_mean[:, :, i * 3] * 0.299 g = ob_mean[:, :, i * 3 + 1] * 0.587 b = ob_mean[:, :, i * 3 + 2] * 0.114 gray = r + g + b l.append(gray) print('before obmean: ', self.ob_mean.shape) self.ob_mean = np.stack(l, axis=-1) self.ob_rgb_mean = ob_mean print('after obmean: ', self.ob_mean.shape) self.flat_features = self.get_features(x, reuse=False) self.features = unflatten_first_dim(self.flat_features, sh) with tf.variable_scope(scope, reuse=False): x = fc(self.flat_features, units=hidsize, activation=activ) x = fc(x, units=hidsize, activation=activ) pdparam = fc(x, name='pd', units=pdparamsize, activation=None) vpred = fc(x, name='value_function_output', units=1, activation=None) pdparam = unflatten_first_dim(pdparam, sh) self.vpred = unflatten_first_dim(vpred, sh)[:, :, 0] self.pd = pd = self.ac_pdtype.pdfromflat(pdparam) self.a_samp = pd.sample() self.entropy = pd.entropy() self.nlp_samp = pd.neglogp(self.a_samp)
def __init__(self, ob_space, ac_space, hidsize, ob_mean, ob_std, feat_dim, layernormalize, nl, scope="policy"): if layernormalize: print( "Warning: policy is operating on top of layer-normed features. It might slow down the training." ) self.layernormalize = layernormalize self.bool_actionclip = True #TODO Need to make this flexible self.nl = nl self.ob_mean = ob_mean self.ob_std = ob_std #self.ac_range = ac_range with tf.variable_scope(scope): self.ob_space = ob_space self.ac_space = ac_space self.ac_pdtype = make_pdtype( ac_space ) #RS: Should give a continuous action space, given a continuous action env self.ph_ob = tf.placeholder(dtype=tf.int32, shape=(None, None) + ob_space.shape, name='ob') self.ph_ac = self.ac_pdtype.sample_placeholder([None, None], name='ac') self.pd = self.vpred = None self.hidsize = hidsize self.feat_dim = feat_dim self.scope = scope pdparamsize = self.ac_pdtype.param_shape()[0] sh = tf.shape(self.ph_ob) x = flatten_two_dims(self.ph_ob) self.flat_features = self.get_features(x, reuse=False) self.features = unflatten_first_dim(self.flat_features, sh) with tf.variable_scope(scope, reuse=False): x = fc(self.flat_features, units=hidsize, activation=activ) x = fc(x, units=hidsize, activation=activ) pdparam = fc(x, name='pd', units=pdparamsize, activation=tf.nn.tanh) vpred = fc(x, name='value_function_output', units=1, activation=None) pdparam = unflatten_first_dim(pdparam, sh) self.vpred = unflatten_first_dim(vpred, sh)[:, :, 0] self.pd = pd = self.ac_pdtype.pdfromflat(pdparam) self.a_samp = pd.sample() self.a_samp = self.clip_action( self.a_samp) if self.bool_actionclip else self.a_samp self.entropy = pd.entropy() self.nlp_samp = pd.neglogp(self.a_samp) self.pd_logstd = pd.logstd self.pd_std = pd.std self.pd_mean = pd.mean
def _encoder(input, code_size): out_1_encoder = fc('out_1_encoder', input, H_SIZE) out_2_encoder = fc('out_2_encoder', out_1_encoder, code_size) out_encoder = fc('out_encoder', out_2_encoder, code_size) return out_encoder
def define_self_prediction_rew(self, convfeat, rep_size, enlargement): logger.info( "Using RND BONUS ****************************************************" ) hidden_size = convfeat * 2 #RND bonus. activ = tf.nn.relu # Random target network. for ph in self.ph_ob.values(): if len(ph.shape.as_list()) == 3: # B,T,S logger.info("Mlp Target: using '%s' shape %s as image input" % (ph.name, str(ph.shape))) xr = ph[:, 1:] # get next status index is 1: xr = tf.cast(xr, tf.float32) xr = tf.reshape(xr, (-1, *ph.shape.as_list()[-1:])) xr = tf.clip_by_value((xr - self.ph_mean) / self.ph_std, -5.0, 5.0) xr = activ( fc(xr, 'fc_0_r', nh=hidden_size, init_scale=np.sqrt(2))) xr = activ( fc(xr, 'fc_1_r', nh=hidden_size, init_scale=np.sqrt(2))) X_r = fc(xr, 'fc_2_r', nh=rep_size, init_scale=np.sqrt(2)) # Predictor network. for ph in self.ph_ob.values(): if len(ph.shape.as_list()) == 3: # B,T,S logger.info("Mlp Target: using '%s' shape %s as image input" % (ph.name, str(ph.shape))) xrp = ph[:, 1:] xrp = tf.cast(xrp, tf.float32) xrp = tf.reshape(xrp, (-1, *ph.shape.as_list()[-1:])) xrp = tf.clip_by_value((xrp - self.ph_mean) / self.ph_std, -5.0, 5.0) xrp = activ( fc(xrp, 'fc_0_pred', nh=hidden_size, init_scale=np.sqrt(2))) xrp = activ( fc(xrp, 'fc_1_pred', nh=hidden_size, init_scale=np.sqrt(2))) X_r_hat = fc(xrp, 'fc_2_pred', nh=rep_size, init_scale=np.sqrt(2)) self.feat_var = tf.reduce_mean(tf.nn.moments(X_r, axes=[0])[1]) self.max_feat = tf.reduce_max(tf.abs(X_r)) self.int_rew = tf.reduce_mean( tf.square(tf.stop_gradient(X_r) - X_r_hat), axis=-1, keep_dims=True) self.int_rew = tf.reshape(self.int_rew, (self.sy_nenvs, self.sy_nsteps - 1)) targets = tf.stop_gradient(X_r) # self.aux_loss = tf.reduce_mean(tf.square(noisy_targets-X_r_hat)) self.aux_loss = tf.reduce_mean(tf.square(targets - X_r_hat), -1) mask = tf.random_uniform(shape=tf.shape(self.aux_loss), minval=0., maxval=1., dtype=tf.float32) mask = tf.cast(mask < self.proportion_of_exp_used_for_predictor_update, tf.float32) self.aux_loss = tf.reduce_sum(mask * self.aux_loss) / tf.maximum( tf.reduce_sum(mask), 1.)
def _decoder(code, code_size, out_size): out_1_decoder = fc('out_1_decoder', code, code_size) out_2_decoder = fc('out_2_decoder', out_1_decoder, H_SIZE) out_decoder = fc('out_decoder', out_2_decoder, out_size, act=tf.nn.sigmoid) return out_decoder
def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, reuse=False, deterministic=False): #pylint: disable=W0613 # Assign action as Gaussian Distribution self.pdtype = make_pdtype(ac_space) self.num_obs = 13 #print("action_space: {}".format(ac_space)) with tf.variable_scope("model", reuse=reuse): phero_values = tf.placeholder(shape=(None, self.num_obs), dtype=tf.float32, name="phero_values") #velocities = tf.placeholder(shape=(None, 2), dtype=tf.float32, name="velocities") # Actor neural net pi_net = self.net(phero_values) # Critic neural net vf_h2 = self.net(phero_values) vf = fc(vf_h2, 'vf', 1)[:, 0] self.pd, self.pi = self.pdtype.pdfromlatent(pi_net, init_scale=0.01) if deterministic: a0 = self.pd.mode() else: a0 = self.pd.sample() neglogp0 = self.pd.neglogp(a0) self.initial_state = None self.phero = phero_values #self.velocities = velocities self.vf = vf def step(ob, *_args, **_kwargs): ''' Generate action & value & log probability by inputting one observation into the policy neural net ''' phero = [o for o in ob] # lb = [o["laser"] for o in ob] # rb = [o["rel_goal"] for o in ob] # vb = [o["velocities"] for o in ob] a, v, neglogp = sess.run([a0, vf, neglogp0], {self.phero: phero}) # Action clipping (normalising action within the range (-1, 1) for better training) # The network will learn what is happening as the training goes. # for i in range(a.shape[1]): # a[0][i] = min(1.0, max(-1.0, a[0][i])) return a, v, self.initial_state, neglogp def value(ob, *_args, **_kwargs): phero = [o for o in ob] # lb = [o["laser"] for o in ob] # rb = [o["rel_goal"] for o in ob] # vb = [o["velocities"] for o in ob] return sess.run(vf, {self.phero: phero}) self.step = step self.value = value
def forward_alexnet(self, inp, weights, reuse=False): # reuse is for the normalization parameters. conv1 = conv_block(inp, weights['conv1_weights'], weights['conv1_biases'], stride_y=4, stride_x=4, groups=1, reuse=reuse, scope='conv1') norm1 = lrn(conv1, 2, 1e-05, 0.75) pool1 = max_pool(norm1, 3, 3, 2, 2, padding='VALID') # 2nd Layer: Conv (w ReLu) -> Lrn -> Pool with 2 groups conv2 = conv_block(pool1, weights['conv2_weights'], weights['conv2_biases'], stride_y=1, stride_x=1, groups=2, reuse=reuse, scope='conv2') norm2 = lrn(conv2, 2, 1e-05, 0.75) pool2 = max_pool(norm2, 3, 3, 2, 2, padding='VALID') # 3rd Layer: Conv (w ReLu) conv3 = conv_block(pool2, weights['conv3_weights'], weights['conv3_biases'], stride_y=1, stride_x=1, groups=1, reuse=reuse, scope='conv3') # 4th Layer: Conv (w ReLu) splitted into two groups conv4 = conv_block(conv3, weights['conv4_weights'], weights['conv4_biases'], stride_y=1, stride_x=1, groups=2, reuse=reuse, scope='conv4') # 5th Layer: Conv (w ReLu) -> Pool splitted into two groups conv5 = conv_block(conv4, weights['conv5_weights'], weights['conv5_biases'], stride_y=1, stride_x=1, groups=2, reuse=reuse, scope='conv5') pool5 = max_pool(conv5, 3, 3, 2, 2, padding='VALID') # 6th Layer: Flatten -> FC (w ReLu) -> Dropout flattened = tf.reshape(pool5, [-1, 6 * 6 * 256]) fc6 = fc(flattened, weights['fc6_weights'], weights['fc6_biases'], activation='relu') dropout6 = dropout(fc6, self.KEEP_PROB) # 7th Layer: FC (w ReLu) -> Dropout fc7 = fc(dropout6, weights['fc7_weights'], weights['fc7_biases'], activation='relu') dropout7 = dropout(fc7, self.KEEP_PROB) # 8th Layer: FC and return unscaled activations fc8 = fc(dropout7, weights['fc8_weights'], weights['fc8_biases']) return fc7, fc8
def build(self, input, is_dropout=False): # is_dropout 是否dropout #卷积层1 conv1 = convM_N(input, 96, "conv1", self.data_dict_AlexNet, [11, 11], 4, finetune=self.finetune) lrn1 = tf.nn.lrn(conv1, bias=1.0, alpha=0.001 / 9, beta=0.75, name='lrn1') pool1 = tf.nn.max_pool(lrn1, [1, 3, 3, 1], [1, 2, 2, 1], padding='VALID', name='pool1') # 卷积层2 conv2 = convM_N(pool1, 256, "conv2", self.data_dict_AlexNet, [5, 5], 1, finetune=self.finetune) lrn2 = tf.nn.lrn(conv2, bias=1.0, alpha=0.001 / 9, beta=0.75, name='lrn2') pool2 = tf.nn.max_pool(lrn2, [1, 3, 3, 1], [1, 2, 2, 1], padding='VALID', name='pool2') #卷积层3 conv3 = conv3_3(pool2, 384, 'conv3', self.data_dict_AlexNet, finetune=self.finetune) #卷积层4 conv4 = conv3_3(conv3, 384, 'conv4', self.data_dict_AlexNet, finetune=self.finetune) #卷积层5 conv5 = conv3_3(conv4, 256, 'conv5', self.data_dict_AlexNet, finetune=self.finetune) pool3 = tf.nn.max_pool(conv5, [1, 3, 3, 1], [1, 2, 2, 1], padding='VALID', name='pool3') # fully connected layer 全连接 flatten = tf.reshape(pool3, [self.batchsize, -1]) fc1 = fc(flatten, 4096, 'fc1', finetune=False) fc1 = tf.nn.relu(fc1) if is_dropout: fc1 = tf.nn.dropout(fc1, 0.5) fc2 = fc(fc1, 4096, 'fc2', finetune=False) fc2 = tf.nn.relu(fc2) if is_dropout: fc2 = tf.nn.dropout(fc2, 0.5) fc3 = fc(fc2, self.n_classes, 'fc3', finetune=False) return fc3
def apply_policy( ph_ob, ph_new, ph_istate, reuse, scope, hidsize, memsize, extrahid, sy_nenvs, sy_nsteps, pdparamsize, rec_gate_init, ): ph = ph_ob logger.info( f"CnnGruPolicy: using '{ph.name}' shape {ph.shape} as image input") assert len(ph.shape.as_list()) == 3 # B, Envs, Features X = tf.cast(ph, tf.float32) / 255.0 X = tf.reshape(X, (-1, *ph.shape.as_list()[-3:])) activ = tf.nn.relu yes_gpu = any(get_available_gpus()) with tf.variable_scope( scope, reuse=reuse), tf.device("/gpu:0" if yes_gpu else "/cpu:0"): X = activ(fc( X, "fc1", nh=32, init_scale=np.sqrt(2), )) X = activ(fc( X, "fc2", nh=64, init_scale=np.sqrt(2), )) X = activ(fc( X, "fc3", nh=64, init_scale=np.sqrt(2), )) X = to2d(X) X = activ(fc(X, "fc1", nh=hidsize, init_scale=np.sqrt(2))) X = tf.reshape(X, [sy_nenvs, sy_nsteps, hidsize]) X, snext = tf.nn.dynamic_rnn( GRUCell(memsize, rec_gate_init=rec_gate_init), (X, ph_new[:, :, None]), dtype=tf.float32, time_major=False, initial_state=ph_istate, ) X = tf.reshape(X, (-1, memsize)) Xtout = X if extrahid: Xtout = X + activ( fc(Xtout, "fc2val", nh=memsize, init_scale=0.1)) X = X + activ(fc(X, "fc2act", nh=memsize, init_scale=0.1)) pdparam = fc(X, "pd", nh=pdparamsize, init_scale=0.01) vpred_int = fc(Xtout, "vf_int", nh=1, init_scale=0.01) vpred_ext = fc(Xtout, "vf_ext", nh=1, init_scale=0.01) pdparam = tf.reshape(pdparam, (sy_nenvs, sy_nsteps, pdparamsize)) vpred_int = tf.reshape(vpred_int, (sy_nenvs, sy_nsteps)) vpred_ext = tf.reshape(vpred_ext, (sy_nenvs, sy_nsteps)) return pdparam, vpred_int, vpred_ext, snext
def __init__(self, sess, ob_space, ac_space, nenvs, nsteps, units_per_hlayer, reuse=False, activ_fcn='relu6'): # pylint: disable=W0613 # this method is called with nbatch = nenvs*nsteps # nh, nw, nc = ob_space.shape # ob_shape = (nbatch, nh, nw, nc) # actdim = ac_space.shape[0] # Todo check initialization # Input and Output dimensions nd, = ob_space.shape nbatch = nenvs * nsteps ob_shape = (nbatch, nd) nact = ac_space.n X = tf.placeholder(tf.float32, ob_shape, name='Ob') # obs with tf.variable_scope("model", reuse=reuse): if activ_fcn == 'relu6': h1 = tf.nn.relu6(fc(X, 'pi_vf_fc1', nh=units_per_hlayer[0])) # , init_scale=np.sqrt(2))) h2 = tf.nn.relu6(fc(h1, 'pi_vf_fc2', nh=units_per_hlayer[1])) # , init_scale=np.sqrt(2))) h3 = tf.nn.relu6(fc(h2, 'pi_fc1', nh=units_per_hlayer[2])) # , init_scale=np.sqrt(2))) elif activ_fcn == 'elu': h1 = tf.nn.elu(fc(X, 'pi_vf_fc1', nh=units_per_hlayer[0])) # , init_scale=np.sqrt(2))) h2 = tf.nn.elu(fc(h1, 'pi_vf_fc2', nh=units_per_hlayer[1])) # , init_scale=np.sqrt(2))) h3 = tf.nn.elu(fc(h2, 'pi_fc1', nh=units_per_hlayer[2])) # , init_scale=np.sqrt(2))) elif activ_fcn == 'mixed': h1 = tf.nn.relu6(fc(X, 'pi_vf_fc1', nh=units_per_hlayer[0])) #, init_scale=np.sqrt(2))) h2 = tf.nn.relu6(fc(h1, 'pi_vf_fc2', nh=units_per_hlayer[1])) #, init_scale=np.sqrt(2))) h3 = tf.nn.tanh(fc(h2, 'pi_fc1', nh=units_per_hlayer[2])) #, init_scale=np.sqrt(2))) pi_logit = fc(h3, 'pi', nact, init_scale=0.01) pi = tf.nn.softmax(pi_logit) vf = fc(h2, 'vf', 1)[:, 0] # predicted value of input state self.pd = CategoricalPd(pi_logit) # pdparam a0 = self.pd.sample() # returns action index: 0,1 # a0 = tf.argmax(pi, axis=1) neglogp0 = self.pd.neglogp(a0) self.initial_state = None def step(ob, *_args, **_kwargs): a, pi, v, neglogp = sess.run([a0, pi_logit, vf, neglogp0], {X: ob}) return a, pi, v, self.initial_state, neglogp def value(ob, *_args, **_kwargs): return sess.run(vf, {X: ob}) self.X = X self.pi = pi self.pi_logit = pi_logit self.vf = vf self.ac = a0 self.step = step self.value = value
# set data placeholders x = tf.placeholder(tf.float32, shape=[None, 784], name="x") x_image = tf.reshape(x, [-1, 28, 28, 1]) tf.summary.image('input', x_image, 3) y = tf.placeholder(tf.float32, shape=[None, 10], name="labels") if use_two_conv: conv1 = conv(x_image, 1, 32, "conv1") conv_out = conv(conv1, 32, 64, "conv2") else: conv_out = conv(x_image, 1, 16, "conv") flattened = tf.reshape(conv_out, [-1, 7 * 7 * 64]) if use_two_fc: fc1 = fc(flattened, 7 * 7 * 64, 1024, "fc1") relu = tf.nn.relu(fc1) embedding_input = relu tf.summary.histogram("fc1/relu", relu) embedding_size = 1024 logits = fc(relu, 1024, 10, "fc2") else: embedding_input = flattened embedding_size = 7 * 7 * 64 logits = fc(flattened, 7 * 7 * 64, 10, "fc") with tf.name_scope("xent"): xent = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits( logits=logits, labels=y), name="xent") tf.summary.scalar("xent", xent)