def _init(self, obs_space, ac_space, embedding_shape, hid_size, num_hid_layers, gaussian_fixed_var=True): self.pdtype = pdtype = make_pdtype(ac_space.shape[0]) batch_size = None ob = U.get_placeholder(name="ac_de_ob", dtype=tf.float32, shape=[batch_size, obs_space.shape[0]]) embedding = U.get_placeholder( name="ac_de_embedding", dtype=tf.float32, shape=[batch_size, embedding_shape ]) ##这里我觉得是一个embedding 的值扩展成sequence_len大小,暂时先不管,等具体做到 # 正则化一下 last_out = U.concatenate([ob, embedding], axis=1) with tf.variable_scope("ac_de_filter"): self.ac_rms = RunningMeanStd(shape=obs_space.shape[0] + embedding_shape) last_out = tf.clip_by_value( (last_out - self.ac_rms.mean) / self.ac_rms.std, -5.0, 5.0) for i in range(num_hid_layers): last_out = tf.nn.relu( U.dense(last_out, hid_size[i], "ac_de%i" % (i + 1), weight_init=U.normc_initializer(1.0))) if gaussian_fixed_var and isinstance(ac_space.shape[0], int): self.mean = U.dense(last_out, pdtype.param_shape()[0] // 2, "ac_de_final", U.normc_initializer(1.0)) logstd = tf.get_variable(name="logstd", shape=[1, pdtype.param_shape()[0] // 2], initializer=tf.zeros_initializer()) pdparam = U.concatenate([self.mean, self.mean * 0.0 + logstd], axis=1) else: pdparam = U.dense(last_out, pdtype.param_shape()[0], "ac_de_final", U.normc_initializer(0.01)) self.pd = pdtype.pdfromflat(pdparam) self.state_in = [] self.state_out = [] stochastic = U.get_placeholder(name="stochastic", dtype=tf.bool, shape=()) ac = U.switch(stochastic, self.pd.sample(), self.pd.mode()) self.ac = ac self._act = U.function([stochastic, ob, embedding], ac) self._get_pol_mean = U.function([ob, embedding], self.mean)
def _init(self, obs_space, embedding_shape, hid_size, num_hid_layers, gaussian_fixed_var=True): self.pdtype = pdtype = make_pdtype(obs_space.shape[0]) batch_size = None ob_input = U.get_placeholder(name="ob", dtype=tf.float32, shape=[batch_size, obs_space.shape[0]]) embedding = U.get_placeholder( name="embedding", dtype=tf.float32, shape=[ batch_size, embedding_shape ]) ##这里我觉得是一个embedding 的值扩展成sequence_len大小,暂时先不管,等具体做到这里的时候再处理 last_out = U.concatenate( [ob_input, embedding], axis=1) ##这里只有policy, 没有 value function, 还有这个要看看concatenate的对不对 # 正则化 with tf.variable_scope("state_de_filter"): self.state_rms = RunningMeanStd(shape=obs_space.shape[0] + embedding_shape) input_z = tf.clip_by_value( (last_out - self.state_rms.mean) / self.state_rms.std, -5.0, 5.0) for i in range(num_hid_layers): input_z = tf.nn.tanh( U.dense(input_z, hid_size[i], "state_de%i" % (i + 1), weight_init=U.normc_initializer(1.0))) if gaussian_fixed_var and isinstance(obs_space.shape[0], int): self.mean = U.dense(input_z, pdtype.param_shape()[0] // 2, "state_de_final", U.normc_initializer(0.01)) self.logstd = tf.get_variable( name="logstd", shape=[1, pdtype.param_shape()[0] // 2], initializer=tf.zeros_initializer()) pdparam = U.concatenate([self.mean, self.mean * 0.0 + self.logstd], axis=1) else: pdparam = U.dense(last_out, pdtype.param_shape()[0], "state_de_final", U.normc_initializer(0.01)) self.pd = pdtype.pdfromflat(pdparam) self.state_in = [] self.state_out = [] self._act = U.function([ob_input, embedding], self.pd.sample()) self.get_mean = U.function([ob_input, embedding], self.mean)
def _init(self, obs_space, batch_size, time_steps, LSTM_size, laten_size, gaussian_fixed_var=True): ##等会儿要重点看一下var有没有更新 self.pdtype = pdtype = make_pdtype(laten_size) obs = U.get_placeholder("en_ob", dtype=tf.float32, shape = [batch_size, time_steps, obs_space.shape[0]]) # 正则化 with tf.variable_scope("obfilter"): ## 看看有没有起效果,我觉得是其效果考虑的 self.obs_rms = RunningMeanStd(shape=obs_space.shape) obz = tf.clip_by_value((obs - self.obs_rms.mean) / self.obs_rms.std, -5.0, 5.0) lstm_fw_cell = rnn.BasicLSTMCell(LSTM_size, forget_bias=1.0) lstm_bw_cell = rnn.BasicLSTMCell(LSTM_size, forget_bias=1.0) outputs, output_state = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell, lstm_bw_cell, obz, dtype=tf.float32) outputs_average = tf.reduce_mean(outputs[0], axis=1) if gaussian_fixed_var and isinstance(laten_size, int): self.mean = U.dense(outputs_average, pdtype.param_shape()[0] // 2, "dblstmfin", U.normc_initializer(1.0)) self.logstd = U.dense(outputs_average, pdtype.param_shape()[0] // 2, "dblstm_logstd", U.normc_initializer(1.0)) # self.logstd = tf.get_variable(name="logstd", shape=[1, pdtype.param_shape()[0] // 2], # initializer=tf.constant_initializer(0.1)) ##这个地方是不是也是有问题的 pdparam = U.concatenate([self.mean, self.mean * 0.0 + self.logstd], axis=1) else: pdparam = U.dense(outputs_average, pdtype.param_shape()[0], "dblstmfin", U.normc_initializer(0.1)) self.pd = pdtype.pdfromflat(pdparam) self._encode = U.function([obs], self.pd.sample()) self._get_mean = U.function([obs], self.mean)
def make_model(name, obs_space, action_space, reg=1e-7, noisy_head=False): cube = tf.keras.Input(shape=obs_space['cube'].shape) arm = tf.keras.Input(shape=obs_space['arm'].shape) features = concatenate([arm, cube]) base = make_mlp([400, 300], 'tanh', reg)(features) head = DuelingModel([512], action_space.n, reg, noisy=noisy_head)(base) model = tf.keras.Model(inputs={ 'cube': cube, 'arm': arm }, outputs=head, name=name) return model
def make_uni_base(img, feat, reg): flat_base = None cnn_bases = list() if len(feat) > 0: feat_base = concatenate(list(feat.values())) flat_base = make_mlp([64, 64], 'relu', reg)(feat_base) if len(img) > 0: for i in img.values(): normalized = i / 255 cnn = make_impala_cnn((16, 32), reg, flat=False, use_bn=False)(normalized) cnn_bases.append(cnn) if flat_base is not None: cnn_shape = cnn_bases[0].shape[1:-1] assert all([c.shape[1:-1] == cnn_shape for c in cnn_bases ]), [c.shape[1:-1] for c in cnn_bases] flat_base = tf.keras.layers.Reshape((1, 1, 64))(flat_base) flat_base = tf.keras.layers.UpSampling2D(cnn_shape)(flat_base) cnn_bases.append(flat_base) merge_bases = concatenate(cnn_bases) flat_base = make_impala_cnn((64, ), reg, flat=True, use_bn=False)(merge_bases) return flat_base
def make_model(name, obs_space, action_space, reg=1e-6): pov = tf.keras.Input(shape=obs_space['pov'].shape) arm = tf.keras.Input(shape=obs_space['arm'].shape) normalized_pov = pov / 255 pov_base = make_cnn([32, 32, 32, 32], [3, 3, 3, 3], [2, 2, 2, 2], 'tanh', reg)(normalized_pov) angles_base = make_mlp([512, 256], 'tanh', reg)(arm) base = concatenate([pov_base, angles_base]) head = DuelingModel([1024], action_space.n, reg)(base) model = tf.keras.Model(inputs={ 'pov': pov, 'arm': arm }, outputs=head, name=name) return model
def _init(self, ob_space, ac_space, hid_size, num_hid_layers, vae_pol_mean, gaussian_fixed_var=True): assert isinstance(ob_space, gym.spaces.Box) self.pdtype = pdtype = make_pdtype(ac_space) sequence_length = None ob = U.get_placeholder(name="ob", dtype=tf.float32, shape=[sequence_length] + list(ob_space.shape)) with tf.variable_scope("obfilter"): self.ob_rms = RunningMeanStd(shape=ob_space.shape) obz = tf.clip_by_value((ob - self.ob_rms.mean) / self.ob_rms.std, -5.0, 5.0) last_out = obz for i in range(num_hid_layers): last_out = tf.nn.tanh( U.dense(last_out, hid_size[i], "vffc%i" % (i + 1), weight_init=U.normc_initializer(1.0))) self.vpred = U.dense(last_out, 1, "vffinal", weight_init=U.normc_initializer(1.0))[:, 0] last_out = obz for i in range(num_hid_layers): last_out = tf.nn.tanh( U.dense(last_out, hid_size[i], "polfc%i" % (i + 1), weight_init=U.normc_initializer(1.0))) if gaussian_fixed_var and isinstance(ac_space, gym.spaces.Box): mean = U.dense(last_out, pdtype.param_shape()[0] // 2, "polfinal", U.normc_initializer(0.01)) + vae_pol_mean logstd = tf.get_variable(name="logstd", shape=[1, pdtype.param_shape()[0] // 2], initializer=tf.constant_initializer(0.1)) pdparam = U.concatenate([mean, mean * 0.0 + logstd], axis=1) else: pdparam = U.dense(last_out, pdtype.param_shape()[0], "polfinal", U.normc_initializer(0.01)) self.pd = pdtype.pdfromflat(pdparam) self.state_in = [] self.state_out = [] # change for BC #stochastic = tf.placeholder(dtype=tf.bool, shape=()) stochastic = U.get_placeholder(name="stochastic", dtype=tf.bool, shape=()) ac = U.switch(stochastic, self.pd.sample(), self.pd.mode()) self.ac = ac self._act = U.function([stochastic, ob], [ac, self.vpred])
def learn(encoder, action_decorder, state_decorder, embedding_shape, *, dataset, logdir, batch_size, time_steps, epsilon=0.001, lr_rate=1e-3): lstm_encoder = encoder("lstm_encoder") ac_decoder = action_decorder("ac_decoder") state_decoder = state_decorder("state_decoder") #换成了mlp obs = U.get_placeholder_cached(name="obs") ##for encoder ob = U.get_placeholder_cached(name="ob") embedding = U.get_placeholder_cached(name="embedding") # obss = U.get_placeholder_cached(name="obss") ## for action decoder, 这个state decoder是不是也可以用, 是不是应该改成obs # ## for action decoder, 这个state decoder应该也是可以用的 # embeddingss = U.get_placeholder_cached(name="embeddingss") ac = ac_decoder.pdtype.sample_placeholder([None]) obs_out = state_decoder.pdtype.sample_placeholder([None]) # p(z) 标准正太分布, state先验分布???是不是应该换成demonstration的标准正态分布???? 可以考虑一下这个问题 from common.distributions import make_pdtype p_z_pdtype = make_pdtype(embedding_shape) p_z_params = U.concatenate([ tf.zeros(shape=[embedding_shape], name="mean"), tf.zeros(shape=[embedding_shape], name="logstd") ], axis=-1) p_z = p_z_pdtype.pdfromflat(p_z_params) recon_loss = -tf.reduce_mean( tf.reduce_sum(ac_decoder.pd.logp(ac) + state_decoder.pd.logp(obs_out), axis=0)) ##这个地方还要再改 kl_loss = lstm_encoder.pd.kl(p_z) ##p(z):标准正太分布, 这个看起来是不是也不太对!!!! vae_loss = recon_loss + kl_loss ###vae_loss 应该是一个batch的 ep_stats = stats(["recon_loss", "kl_loss", "vae_loss"]) losses = [recon_loss, kl_loss, vae_loss] ## var_list var_list = [] en_var_list = lstm_encoder.get_trainable_variables() var_list.extend(en_var_list) # ac_de_var_list = ac_decoder.get_trainable_variables() # var_list.extend(ac_de_var_list) state_de_var_list = state_decoder.get_trainable_variables() var_list.extend(state_de_var_list) # compute_recon_loss = U.function([ob, obs, embedding, obss, embeddingss, ac, obs_out], recon_loss) compute_losses = U.function([obs, ob, embedding, ac, obs_out], losses) compute_grad = U.function([obs, ob, embedding, ac, obs_out], U.flatgrad(vae_loss, var_list)) ###这里没有想好!!!,可能是不对的!! adam = MpiAdam(var_list, epsilon=epsilon) U.initialize() adam.sync() writer = U.FileWriter(logdir) writer.add_graph(tf.get_default_graph()) # =========================== TRAINING ===================== # iters_so_far = 0 saver = tf.train.Saver(var_list=tf.trainable_variables(), max_to_keep=100) saver_encoder = tf.train.Saver(var_list=en_var_list, max_to_keep=100) # saver_pol = tf.train.Saver(var_list=ac_de_var_list, max_to_keep=100) ##保留一下policy的参数,但是这个好像用不到哎 while True: logger.log("********** Iteration %i ************" % iters_so_far) recon_loss_buffer = deque(maxlen=100) kl_loss_buffer = deque(maxlen=100) vae_loss_buffer = deque(maxlen=100) for observations in dataset.get_next_batch(batch_size=time_steps): observations = observations.transpose((1, 0)) embedding_now = lstm_encoder.get_laten_vector(observations) embeddings = np.array([embedding_now for _ in range(time_steps)]) embeddings_reshape = embeddings.reshape((time_steps, -1)) actions = ac_decoder.act(stochastic=True, ob=observations, embedding=embeddings_reshape) state_outputs = state_decoder.get_outputs( observations.reshape(time_steps, -1, 1), embeddings) ##还没有加混合高斯......乱加了一通,已经加完了 recon_loss, kl_loss, vae_loss = compute_losses( observations, observations.reshape(batch_size, time_steps, -1), embeddings_reshape, observations.reshape(time_steps, -1, 1), embeddings, actions, state_outputs) g = compute_grad(observations, observations.reshape(batch_size, time_steps, -1), embeddings_reshape, observations.reshape(time_steps, -1, 1), embeddings, actions, state_outputs) adam.update(g, lr_rate) recon_loss_buffer.append(recon_loss) kl_loss_buffer.append(kl_loss) vae_loss_buffer.append(vae_loss) ep_stats.add_all_summary(writer, [ np.mean(recon_loss_buffer), np.mean(kl_loss_buffer), np.mean(vae_loss_buffer) ], iters_so_far) logger.record_tabular("recon_loss", recon_loss) logger.record_tabular("kl_loss", kl_loss) logger.record_tabular("vae_loss", vae_loss) logger.dump_tabular() if (iters_so_far % 10 == 0 and iters_so_far != 0): save(saver=saver, sess=tf.get_default_session(), logdir=logdir, step=iters_so_far) save(saver=saver_encoder, sess=tf.get_default_session(), logdir="./vae_saver", step=iters_so_far) # save(saver=saver_pol, sess=tf.get_default_session(), logdir="pol_saver", step=iters_so_far) iters_so_far += 1
def learn(env, encoder, action_decorder, state_decorder, embedding_shape,*, dataset, optimizer, logdir, batch_size, time_steps, adam_epsilon = 0.001, lr_rate = 1e-4, vae_beta = 8): lstm_encoder = encoder("lstm_encoder") ac_decoder = action_decorder("ac_decoder") state_decoder = state_decorder("state_decoder") #这个地方有问题 ac_de_ob = U.get_placeholder_cached(name="ac_de_ob") en_ob = U.get_placeholder_cached(name="en_ob") ##for encoder state_de_ob = U.get_placeholder_cached(name="state_de_ob") ## for action decoder, 这个state decoder是不是也可以用, 是不是应该改成obs ac_de_embedding = U.get_placeholder_cached(name="ac_de_embedding") ## for action decoder, 这个state decoder应该也是可以用的 state_de_embedding = U.get_placeholder_cached(name="state_de_embedding") # ac = ac_decoder.pdtype.sample_placeholder([None]) ob_next = tf.placeholder(name="ob_next", shape=[None, ob_shape], dtype=tf.float32) # ob_next_ac = tf.placeholder(name="ob_next_ac", shape=[ob_shape], dtype=tf.float32) # obs_out = state_decoder.pdtype.sample_placeholder([None]) # p(z) 标准正太分布 from common.distributions import make_pdtype p_z_pdtype = make_pdtype(embedding_shape) p_z_params = U.concatenate([tf.zeros(shape=[embedding_shape], name="mean"), tf.zeros(shape=[embedding_shape], name="logstd")], axis=-1) p_z = p_z_pdtype.pdfromflat(p_z_params) # recon_loss 里再加一个,对于action的 recon_loss = -tf.reduce_sum(state_decoder.pd.logp(ob_next)) # kl_loss = lstm_encoder.pd.kl(p_z)[0] ##p(z):标准正太分布, 这个看起来是不是也不太对!!!! # kl_loss = tf.maximum(lstm_encoder.pd.kl(p_z)[0], tf.constant(5.00)) ##p(z):标准正太分布, 这个看起来是不是也不太对!!!! kl_loss = lstm_encoder.pd.kl(p_z)[0] vae_loss = tf.reduce_mean(recon_loss + vae_beta * kl_loss) ###vae_loss 应该是一个batch的 ep_stats = stats(["recon_loss", "kl_loss", "vae_loss"]) losses = [recon_loss, kl_loss, vae_loss] # 均方误差去训练 action,把得到的action step 一下,得到x(t+1),然后用均方误差loss,或者可以试试交叉熵 ## var_list var_list = [] en_var_list = lstm_encoder.get_trainable_variables() var_list.extend(en_var_list) # ac_de_var_list = ac_decoder.get_trainable_variables() # var_list.extend(ac_de_var_list) state_de_var_list = state_decoder.get_trainable_variables() var_list.extend(state_de_var_list) # compute_recon_loss = U.function([ob, obs, embedding, obss, embeddingss, ac, obs_out], recon_loss) compute_losses = U.function([en_ob, ac_de_ob, state_de_ob, ac_de_embedding, state_de_embedding, ob_next], losses) compute_grad = U.function([en_ob, ac_de_ob, state_de_ob, ac_de_embedding, state_de_embedding, ob_next], U.flatgrad(vae_loss, var_list)) ###这里没有想好!!!,可能是不对的!! adam = MpiAdam(var_list, epsilon=adam_epsilon) U.initialize() adam.sync() writer = U.FileWriter(logdir) writer.add_graph(tf.get_default_graph()) # =========================== TRAINING ===================== # iters_so_far = 0 saver = tf.train.Saver(var_list=var_list, max_to_keep=100) saver_encoder = tf.train.Saver(var_list = en_var_list, max_to_keep=100) # saver_pol = tf.train.Saver(var_list=ac_de_var_list, max_to_keep=100) ##保留一下policy的参数,但是这个好像用不到哎 while iters_so_far < 50: ## 加多轮 logger.log("********** Iteration %i ************" % iters_so_far) ## 要不要每一轮调整一下batch_size recon_loss_buffer = deque(maxlen=100) # recon_loss2_buffer = deque(maxlen=100) kl_loss_buffer = deque(maxlen=100) vae_loss_buffer = deque(maxlen=100) # i = 0 for obs_and_next in dataset.get_next_batch(batch_size=time_steps): # print(i) # i += 1 observations = obs_and_next[0].transpose((1, 0))[:-1] ob_next = obs_and_next[0].transpose(1, 0)[state_decoder.receptive_field:, :] embedding_now = lstm_encoder.get_laten_vector(obs_and_next[0].transpose((1, 0))) embeddings = np.array([embedding_now for _ in range(time_steps - 1)]) embeddings_reshape = embeddings.reshape((time_steps-1, -1)) actions = ac_decoder.act(stochastic=True, ob=observations, embedding=embeddings_reshape) ob_next_ac = get_ob_next_ac(env, observations[-1], actions[0]) ##这个还需要再修改 #########################################3 # state_outputs = state_decoder.get_outputs(observations.reshape(1, time_steps, -1), embedding_now.reshape((1, 1, -1))) ##还没有加混合高斯......乱加了一通,已经加完了 # recon_loss = state_decoder.recon_loss(observations.reshape(1, time_steps, -1), embedding_now.reshape((1, 1, -1))) recon_loss, kl_loss, vae_loss = compute_losses(obs_and_next[0].transpose((1, 0)).reshape(1, time_steps, -1), observations.reshape(time_steps-1,-1), observations.reshape(1, time_steps-1, -1), embeddings_reshape, embedding_now.reshape((1,1, -1)), ob_next) g = compute_grad(obs_and_next[0].transpose((1, 0)).reshape(1, time_steps, -1), observations.reshape(time_steps-1,-1), observations.reshape(1, time_steps-1, -1), embeddings_reshape, embedding_now.reshape((1,1, -1)), ob_next) # logger.record_tabular("recon_loss", recon_loss) # logger.record_tabular("recon_loss2", recon_loss2) # logger.record_tabular("kl_loss", kl_loss) # logger.record_tabular("vae_loss", vae_loss) # logger.dump_tabular() adam.update(g, lr_rate) recon_loss_buffer.append(recon_loss) # recon_loss2_buffer.append(recon_loss2) kl_loss_buffer.append(kl_loss) vae_loss_buffer.append(vae_loss) ep_stats.add_all_summary(writer, [np.mean(recon_loss_buffer), np.mean(kl_loss_buffer), np.mean(vae_loss_buffer)], iters_so_far) logger.record_tabular("recon_loss", recon_loss) # logger.record_tabular("recon_loss2", recon_loss2) logger.record_tabular("kl_loss", kl_loss) logger.record_tabular("vae_loss", vae_loss) logger.dump_tabular() if(iters_so_far % 10 == 0 and iters_so_far != 0): save(saver=saver, sess=tf.get_default_session(), logdir=logdir, step=iters_so_far) save(saver=saver_encoder, sess=tf.get_default_session(),logdir="./vae_saver", step=iters_so_far) # save(saver=saver_pol, sess=tf.get_default_session(), logdir="pol_saver", step=iters_so_far) iters_so_far += 1 if iters_so_far < 6: lr_rate /= 2
def _create_network( self, obs_shape, embedding_shape): ## , input_batch, global_condition_batch '''Construct the WaveNet network.''' import common.tf_util as U outputs = [] sequence_length = 1 input_batch = U.get_placeholder( name="state_de_ob", dtype=tf.float32, shape=[batch_size, self.time_steps - 1, obs_shape.shape[0]]) ##input_batch是3D的 global_condition_batch = U.get_placeholder( name="state_de_embedding", dtype=tf.float32, shape=[batch_size, 1, embedding_shape]) current_layer = input_batch # Pre-process the input with a regular convolution current_layer = self._create_causal_layer(current_layer) ##这里不行 #output_width = tf.shape(input_batch)[1] - self.receptive_field + 1 output_width = input_batch.shape[1] - self.receptive_field + 1 # Add all defined dilation layers. with tf.name_scope('dilated_stack'): for layer_index, dilation in enumerate(self.dilations): with tf.name_scope('layer{}'.format(layer_index)): output, current_layer = self._create_dilation_layer( current_layer, layer_index, dilation, global_condition_batch, output_width) outputs.append(output) with tf.name_scope('postprocessing'): # Perform (+) -> ReLU -> 1x1 conv -> ReLU -> 1x1 conv to # postprocess the output. w1 = self.variables['postprocessing']['postprocess1'] w2 = self.variables['postprocessing']['postprocess2'] if self.use_biases: b1 = self.variables['postprocessing']['postprocess1_bias'] b2 = self.variables['postprocessing']['postprocess2_bias'] if self.histograms: tf.histogram_summary('postprocess1_weights', w1) tf.histogram_summary('postprocess2_weights', w2) if self.use_biases: tf.histogram_summary('postprocess1_biases', b1) tf.histogram_summary('postprocess2_biases', b2) # We skip connections from the outputs of each layer, adding them # all up here. total = sum(outputs) transformed1 = tf.nn.relu(total) conv1 = tf.nn.conv1d(transformed1, w1, stride=1, padding="SAME") if self.use_biases: conv1 = tf.add(conv1, b1) transformed2 = tf.nn.relu(conv1) conv2 = tf.nn.conv1d(transformed2, w2, stride=1, padding="SAME") if self.use_biases: conv2 = tf.add(conv2, b2) # print(conv2) # ========= add by myself =============== # # self.mean = tf.reduce_mean(conv2, axis=1) ###去均值作为每一个维度的 # self.logstd = tf.get_variable(name="wave_logstd", shape=[1, self.pdtype.param_shape()[0]//2], initializer=tf.zeros_initializer()) # pdparam = U.concatenate([self.mean, self.mean * 0.0 + self.logstd], axis=1) # self.pd = self.pdtype.pdfromflat(pdparam) # # self._act = U.function([input_batch, global_condition_batch], [self.pd.sample()]) # # for debug # self.get_mean = U.function([input_batch, global_condition_batch], self.mean) conv2 = tf.reshape(conv2, [-1, self.quantization_channels]) self.mean = U.dense(conv2, 63, "wave_mean", U.normc_initializer(1.0)) ## 48 * 63 self.logstd = U.dense( conv2, 63, "wave_logstd", weight_init=U.normc_initializer(1.0)) ## 48 * 63 # self.logstd = tf.get_variable(name="wave_logstd", shape=[1, self.pdtype.param_shape()[0] // 2], # initializer=tf.zeros_initializer()) ## 这个地方的大小有待商榷 pdparm = U.concatenate([self.mean, self.mean * 0.0 + self.logstd], axis=1) self.pd = self.pdtype.pdfromflat(pdparm) # target_output = tf.slice(input_batch, [0, self.receptive_field, 0], [-1, -1, -1]) self._act = U.function([input_batch, global_condition_batch], [self.pd.sample()])