def build_simple_fc(states): with tf.variable_scope('actor_l1'): fc_actor = tf.layers.dense( inputs=states, units=128, activation=tf.nn.relu, kernel_initializer=ortho_init(np.sqrt(2)), ) with tf.variable_scope('actor_l2'): fc_actor2 = tf.layers.dense( inputs=fc_actor, units=256, activation=tf.nn.relu, kernel_initializer=ortho_init(np.sqrt(2)), ) with tf.variable_scope('critic_l1'): fc_critic = tf.layers.dense( inputs=states, units=128, activation=tf.nn.relu, kernel_initializer=ortho_init(np.sqrt(2)), ) with tf.variable_scope('critic_l2'): fc_critic2 = tf.layers.dense( inputs=fc_critic, units=256, activation=tf.nn.relu, kernel_initializer=ortho_init(np.sqrt(2)), ) return fc_actor2, fc_critic2
def create_network(states, ac_space, network_type, v_space): if network_type == "cnn": l_shared = build_cnn(states) with tf.variable_scope("Logits"): scores = tf.layers.dense( inputs=l_shared, units=ac_space, ) with tf.variable_scope("V_fc"): quality = tf.layers.dense( inputs=l_shared, units=v_space, ) pi = tf.nn.softmax(scores) return pi, quality if network_type == "ff": l_ac, l_ct = build_simple_fc(states) with tf.variable_scope("Logits"): scores = tf.layers.dense( inputs=l_ac, units=ac_space, kernel_initializer=ortho_init(0.01), ) with tf.variable_scope("V_fc"): quality = tf.layers.dense( inputs=l_ct, units=v_space, kernel_initializer=ortho_init(), ) pi = tf.nn.softmax(scores) return pi, quality if network_type == "mnih": l_shared = build_mnih_A3C(states) with tf.variable_scope("Logits"): scores = tf.layers.dense( inputs=l_shared, units=ac_space, kernel_initializer=ortho_init(0.01), ) with tf.variable_scope("V_fc"): quality = tf.layers.dense( inputs=l_shared, units=v_space, kernel_initializer=ortho_init(), ) pi = tf.nn.softmax(scores) return pi, quality
def lnlstmbase(xs, s, scope, nh, init_scale=1.0): nbatch, nin = [v.value for v in xs[0].get_shape()] with tf.variable_scope(scope): wx = tf.get_variable("wx", [nin, nh * 4], initializer=utils.ortho_init(init_scale)) gx = tf.get_variable("gx", [nh * 4], initializer=tf.constant_initializer(1.0)) bx = tf.get_variable("bx", [nh * 4], initializer=tf.constant_initializer(0.0)) wh = tf.get_variable("wh", [nh, nh * 4], initializer=utils.ortho_init(init_scale)) gh = tf.get_variable("gh", [nh * 4], initializer=tf.constant_initializer(1.0)) bh = tf.get_variable("bh", [nh * 4], initializer=tf.constant_initializer(0.0)) b = tf.get_variable("b", [nh * 4], initializer=tf.constant_initializer(0.0)) gc = tf.get_variable("gc", [nh], initializer=tf.constant_initializer(1.0)) bc = tf.get_variable("bc", [nh], initializer=tf.constant_initializer(0.0)) c, h = tf.split(axis=1, num_or_size_splits=2, value=s) for idx, x in enumerate(xs): z = _ln(tf.matmul(x, wx), gx, bx) + _ln(tf.matmul(h, wh), gh, bh) + b i, f, o, u = tf.split(axis=1, num_or_size_splits=4, value=z) i = tf.nn.sigmoid(i) f = tf.nn.sigmoid(f) o = tf.nn.sigmoid(o) u = tf.tanh(u) c = f * c + i * u h = o * tf.tanh(_ln(c, gc, bc)) xs[idx] = h s = tf.concat(axis=1, values=[c, h]) return xs, s
def __init__(self, Vo, Eo, Ho, Ha, Hi, Hl, attn_cls=AttentionModule, init_orth=False, cell_type=rnn_cells.LSTMCell, is_multitarget=False): if (type(cell_type) == types.FunctionType): gru = cell_type(Eo + Hi, Ho) else: gru = rnn_cells.create_cell_model_from_string(cell_type)(Eo + Hi, Ho) #gru = cell_type(Eo + Hi, Ho) log.info("constructing decoder [%r]" % (cell_type, )) super(Decoder, self).__init__( emb=L.EmbedID(Vo, Eo), # gru = L.GRU(Ho, Eo + Hi), gru=gru, maxo=L.Maxout(Eo + Hi + Ho, Hl, 2), lin_o=L.Linear(Hl, Vo, nobias=False), attn_module=attn_cls(Hi, Ha, Ho, init_orth=init_orth)) # self.add_param("initial_state", (1, Ho)) self.add_param("bos_embeding", (1, Eo)) self.Hi = Hi self.Ho = Ho self.Eo = Eo self.is_multitarget = is_multitarget # self.initial_state.data[...] = np.random.randn(Ho) self.bos_embeding.data[...] = np.random.randn(Eo) if init_orth: ortho_init(self.gru) ortho_init(self.lin_o) ortho_init(self.maxo)
def _fcnobias(x, scope, nh, *, init_scale=1.0): with tf.variable_scope(scope): nin = x.get_shape()[1].value w = tf.get_variable("w", [nin, nh], initializer=ortho_init(init_scale)) return tf.matmul(x, w)
def __init__(self, sess, ob_space, loc_space, ac_space, nbatch, nsteps, max_timesteps, reuse=False, seed=0): nenv = nbatch // nsteps self.pdtype = make_pdtype(ac_space) with tf.variable_scope("model", reuse=reuse): G = tf.placeholder(tf.float32, [nbatch, max_timesteps, loc_space]) X = tf.placeholder(tf.float32, (nbatch, )+ob_space.shape) Y = tf.placeholder(tf.float32, [nbatch, loc_space]) M = tf.placeholder(tf.float32, [nbatch]) S = tf.placeholder(tf.float32, [nenv, 128]) ys = batch_to_seq(Y, nenv, nsteps) ms = batch_to_seq(M, nenv, nsteps) tf.set_random_seed(seed) self.embed_W = tf.get_variable("embed_w", [loc_space, 64], initializer=ortho_init(1.0, seed)) self.embed_b = tf.get_variable("embed_b", [64,]) self.wa = tf.get_variable("wa", [128, 128], initializer=ortho_init(1.0, seed)) self.wb = tf.get_variable("wb", [128,]) self.ua = tf.get_variable("ua", [128, 128], initializer=ortho_init(1.0, seed)) self.ub = tf.get_variable("ub", [128,]) self.va = tf.get_variable("va", [128]) self.rnn = tf.nn.rnn_cell.GRUCell(128, kernel_initializer=ortho_init(1.0, seed)) enc_hidden = tf.zeros((nbatch, 128)) embed_G = tf.matmul(tf.reshape(G, (-1, loc_space)),self.embed_W)+self.embed_b embed_G = tf.reshape(embed_G, (nbatch, max_timesteps, -1)) enc_output, _ = tf.nn.dynamic_rnn(cell=self.rnn, inputs=embed_G, dtype=tf.float32) gs = batch_to_seq(enc_output, nenv, nsteps) dec_hidden = S h = [] for idx, (y, m, g) in enumerate(zip(ys, ms, gs)): dec_hidden = dec_hidden*(1-m) embed_y = tf.matmul(y,self.embed_W)+self.embed_b dec_output, dec_hidden = tf.nn.dynamic_rnn(cell=self.rnn, inputs=tf.expand_dims(embed_y,axis=1), initial_state=dec_hidden) tmp = tf.reshape(tf.matmul(tf.reshape(g, (-1, 128)), self.ua)+self.ub,(nenv, max_timesteps, 128)) tmp = tf.tanh(tf.expand_dims(tf.matmul(dec_hidden, self.wa)+self.wb,axis=1) + tmp) score = tf.reduce_sum(tmp*tf.expand_dims(tf.expand_dims(self.va, axis=0), axis=1), axis=2, keepdims=True) attention_weights = tf.nn.softmax(score, axis=1) context_vector = attention_weights * g context_vector = tf.reduce_sum(context_vector, axis=1) x = tf.concat([context_vector, dec_hidden], axis=-1) h.append(x) h = seq_to_batch(h) vf = fc(h, 'v', 1, seed=seed)[:,0] self.pd, self.pi = self.pdtype.pdfromlatent(h, seed=seed, init_scale=0.01) a0 = self.pd.sample() neglogp0 = self.pd.neglogp(a0) self.initial_state = np.zeros((nenv,128)) def step(ob, loc, goal, state, mask): a, v, state, neglogp = sess.run([a0, vf, dec_hidden, neglogp0], {X:ob, Y:loc, G:goal, M:mask, S:state}) return a, v, state, neglogp def value(ob, loc, goal, state, mask): return sess.run(vf, {X:ob, Y:loc, G:goal, M:mask, S:state}) self.G = G self.X = X self.Y = Y self.S = S self.M = M self.vf = vf self.step = step self.value = value