def build_model(self): with tf.variable_scope('dense') as scope: d1 = tf.nn.relu( tf_util.dense(name='d1', x=self.state, weight_init=tf_util.normc_initializer(), size=self.net_param['d1'])) d2 = tf.nn.relu( tf_util.dense(name='d2', x=d1, weight_init=tf_util.normc_initializer(), size=self.net_param['d2'])) d3 = tf.nn.relu( tf_util.dense(name='d3', x=d2, weight_init=tf_util.normc_initializer(), size=self.net_param['d3'])) self.action = tf.tanh( tf_util.dense(name='out', x=d3, weight_init=tf_util.normc_initializer(), size=self.action_size))
def _init(self, ob_space, ac_space, hid_size, num_hid_layers, gaussian_fixed_var=True): assert isinstance(ob_space, gym.spaces.Box) self.pdtype = pdtype = make_pdtype(ac_space) sequence_length = None ob = U.get_placeholder(name="ob", dtype=tf.float32, shape=[sequence_length] + list(ob_space.shape)) #obz = ob #with tf.variable_scope("obfilter"): # self.ob_rms = RunningMeanStd(shape=ob_space.shape) #obz = tf.clip_by_value((ob - self.ob_rms.mean) / self.ob_rms.std, -5.0, 5.0) last_out = ob for i in range(num_hid_layers): last_out = tf.nn.tanh( U.dense(last_out, hid_size, "vffc%i" % (i + 1), weight_init=U.normc_initializer(1.0))) self.vpred = U.dense(last_out, 1, "vffinal", weight_init=U.normc_initializer(1.0))[:, 0] last_out = ob for i in range(num_hid_layers): last_out = tf.nn.tanh( U.dense(last_out, hid_size, "polfc%i" % (i + 1), weight_init=U.normc_initializer(1.0))) if gaussian_fixed_var and isinstance(ac_space, gym.spaces.Box): mean = U.dense(last_out, pdtype.param_shape()[0] // 2, "polfinal", U.normc_initializer(0.01)) logstd = tf.get_variable(name="logstd", shape=[1, pdtype.param_shape()[0] // 2], initializer=tf.zeros_initializer) pdparam = U.concatenate([mean, mean * 0.0 + logstd], axis=1) else: pdparam = U.dense(last_out, pdtype.param_shape()[0], "polfinal", U.normc_initializer(0.01)) self.pd = pdtype.pdfromflat(pdparam) self.state_in = [] self.state_out = [] stochastic = tf.placeholder(dtype=tf.bool, shape=()) ac = U.switch(stochastic, self.pd.sample(), self.pd.mode()) self._act = U.function([stochastic, ob], [ac, self.vpred])
def encoder_net(self, img, latent_dim): x = img x = tf.nn.relu(U.dense(x, 1200, 'l1', U.normc_initializer(1.0))) x = tf.nn.relu(U.dense(x, 1200, 'l2', U.normc_initializer(1.0))) mu = U.dense(x, latent_dim, 'l3_1', U.normc_initializer(1.0)) # 32 logvar = U.dense(x, latent_dim, 'l3_2', U.normc_initializer(1.0)) # 32 return mu, logvar
def deconv_net(scope, latent_variable): with tf.variable_scope(scope): x = latent_variable x = U.dense(x, 2048, 'l3', U.normc_initializer(1.0)) x = tf.nn.relu(U.dense(x, 128 * 8 * 11, 'l4', U.normc_initializer(1.0))) x = tf.reshape(x, [tf.shape(x)[0], 8, 11, 128]) # Unflatten x = tf.nn.relu( U.conv2d_transpose(x, [4, 4, 128, 128], [tf.shape(x)[0], 19, 25, 128], "uc1", [4, 4], [2, 2], pad="VALID")) x = tf.nn.relu( U.conv2d_transpose(x, [6, 6, 128, 128], [tf.shape(x)[0], 38, 50, 128], "uc2", [6, 6], [2, 2], pad="SAME")) x = tf.nn.relu( U.conv2d_transpose(x, [6, 6, 128, 128], [tf.shape(x)[0], 80, 105, 128], "uc3", [6, 6], [2, 2], pad="VALID")) x = U.conv2d_transpose(x, [8, 8, 1, 128], [tf.shape(x)[0], 160, 210, 1], "uc4", [8, 8], [2, 2], pad="SAME") return x
def decoder_net(self, latent_variable): x = latent_variable x = tf.nn.tanh(U.dense(x, 1200, 'l4', U.normc_initializer(1.0))) x = tf.nn.tanh(U.dense(x, 1200, 'l5', U.normc_initializer(1.0))) x = tf.nn.tanh(U.dense(x, 1200, 'l6', U.normc_initializer(1.0))) x_logit = U.dense(x, 4096, 'l7', U.normc_initializer(1.0)) x_mean = tf.nn.sigmoid(x_logit) return x_logit, x_mean
def _make_net(self, o): # Process observation. if self.connection_type == 'ff': x = o for ilayer, hd in enumerate(self.hidden_dims): x = self.nonlin( U.dense(x, hd, 'l{}'.format(ilayer), U.normc_initializer(1.0))) else: raise NotImplementedError(self.connection_type) # Map to action. adim = self.ac_space.shape[0] ahigh = self.ac_space.high alow = self.ac_space.low assert isinstance(self.ac_bins, str) ac_bin_mode, ac_bin_arg = self.ac_bins.split(':') if ac_bin_mode == 'uniform': # Uniformly spaced bins, from ac_space.low to ac_space.high. num_ac_bins = int(ac_bin_arg) aidx_na = bins(x, adim, num_ac_bins, 'out') ac_range_1a = (ahigh - alow)[None, :] a = (1. / (num_ac_bins - 1.) * tf.to_float(aidx_na) * ac_range_1a + alow[None, :]) elif ac_bin_mode == 'custom': # Custom bins specified as a list of values from -1 to 1. # The bins are rescaled to ac_space.low to ac_space.high. acvals_k = np.array(list(map(float, ac_bin_arg.split(','))), dtype=np.float32) logger.info('Custom action values: ' + ' '.join('{:.3f}'.format(x) for x in acvals_k)) assert acvals_k.ndim == 1 and acvals_k[0] == -1 and acvals_k[ -1] == 1 acvals_ak = ((ahigh - alow)[:, None] / (acvals_k[-1] - acvals_k[0]) * (acvals_k - acvals_k[0])[None, :] + alow[:, None]) aidx_na = bins(x, adim, len(acvals_k), 'out') # Values in [0, k-1]. a = tf.gather_nd( acvals_ak, tf.concat([ tf.tile( np.arange(adim)[None, :, None], [tf.shape(aidx_na)[0], 1, 1]), 2, tf.expand_dims(aidx_na, -1) ]) # (n, a, 2) ) # (n, a) elif ac_bin_mode == 'continuous': a = U.dense(x, adim, 'out', U.normc_initializer(0.01)) else: raise NotImplementedError(ac_bin_mode) return a
def decoder_net(self, latent_variable): x = latent_variable x = U.dense(x, 256, 'l2', U.normc_initializer(1.0)) x = tf.nn.relu(U.dense(x, 1024, 'l3', U.normc_initializer(1.0))) x = tf.reshape(x, [tf.shape(x)[0], 4,4,64]) # Unflatten [4, 4, 64] x = tf.nn.relu(U.conv2d_transpose(x, [4,4,64,64], [tf.shape(x)[0], 8,8,64], "uc1", [2, 2], pad="SAME")) # [8, 8, 64] x = tf.nn.relu(U.conv2d_transpose(x, [4,4,32,64], [tf.shape(x)[0], 16,16,32], "uc2", [2, 2], pad="SAME")) # [16, 16, 32] x = tf.nn.relu(U.conv2d_transpose(x, [4,4,32,32], [tf.shape(x)[0], 32,32,32], "uc3", [2, 2], pad="SAME")) # [32, 32, 32] x = U.conv2d_transpose(x, [4,4,3,32], [tf.shape(x)[0], 64,64,3], "uc4", [2, 2], pad="SAME") # [64, 64, 1] return x
def encoder_net(self, img, latent_dim): x = img x = tf.nn.relu(U.conv2d(x, 32, "c1", [4, 4], [2, 2], pad = "SAME")) # [32, 32, 32] x = tf.nn.relu(U.conv2d(x, 32, "c2", [4, 4], [2, 2], pad = "SAME")) # [16, 16, 32] x = tf.nn.relu(U.conv2d(x, 64, "c3", [4, 4], [2, 2], pad = "SAME")) # [8, 8, 64] x = tf.nn.relu(U.conv2d(x, 64, "c4", [4, 4], [2, 2], pad = "SAME")) # [4, 4, 64] x = U.flattenallbut0(x) # [1024] x = tf.nn.relu(U.dense(x, 256, 'l1', U.normc_initializer(1.0))) # 1024 mu = U.dense(x, latent_dim, 'l1_1', U.normc_initializer(1.0)) # 32 logvar = U.dense(x, latent_dim, 'l1_2', U.normc_initializer(1.0)) # 32 return mu, logvar
def proj_net(scope, img, latent_dim): with tf.variable_scope(scope): x = img x = tf.nn.relu(U.conv2d(x, 64, "c1", [8, 8], [2, 2], pad="SAME")) x = tf.nn.relu(U.conv2d(x, 128, "c2", [6, 6], [2, 2], pad="SAME")) x = tf.nn.relu(U.conv2d(x, 128, "c3", [6, 6], [2, 2], pad="SAME")) x = tf.nn.relu(U.conv2d(x, 128, "c4", [4, 4], [2, 2], pad="SAME")) x = U.flattenallbut0(x) x = tf.nn.relu(U.dense(x, 2048, 'l1', U.normc_initializer(1.0))) x = U.dense(x, latent_dim, 'l2', U.normc_initializer(1.0)) return x
def capsule(inputs, units, dim): shape = inputs.get_shape() # [batch, units, dim] units_in = shape[1] dim_in = shape[2] b = tf.get_variable('b', shape=[1, units_in, units, 1, 1], dtype=tf.float32, initializer=tf.zeros_initializer) w = tf.get_variable('w', shape=[1, units_in, units, dim_in, dim], dtype=tf.float32, initializer=U.normc_initializer(0.1)) inputs = tf.expand_dims(tf.expand_dims(inputs, axis=2), axis=2) inputs = tf.tile(inputs, [1, 1, units, 1, 1]) w = tf.tile(w, [tf.shape(inputs)[0], 1, 1, 1, 1]) inputs = tf.matmul(inputs, w) b = tf.tile(b, [tf.shape(inputs)[0], 1, 1, 1, 1]) for i in range(ROUTE_ITER): c = tf.nn.softmax(b, dim=2) outputs = squash(U.sum(c * inputs, axis=1, keepdims=True)) b += U.sum(inputs * outputs, axis=-1, keepdims=True) c = tf.nn.softmax(b, dim=2) outputs = squash(U.sum(c * inputs, axis=1, keepdims=True)) outputs = tf.reshape(outputs, [-1, units, dim]) return outputs
def reconstruct_fc(caps): with tf.variable_scope("reconstruction"): init = U.normc_initializer(0.1) fc1 = U.dense(caps, 512, name="fc1", weight_init=init) fc1_act = tf.nn.relu(fc1) fc2 = U.dense(fc1_act, 1024, name="fc2", weight_init=init) fc2_act = tf.nn.relu(fc2) fc3 = U.dense(fc2_act, 784, name="fc3", weight_init=init) fc3_act = tf.nn.sigmoid(fc3) return fc3_act
def classifier_net(self, z1, z2, feat_size, latent_dim, cls_L, cls_batch_per_gpu): with tf.variable_scope("classifier") as scope: z1 = tf.reshape(z1, (cls_batch_per_gpu, -1, latent_dim)) z2 = tf.reshape(z2, (cls_batch_per_gpu, -1, latent_dim)) warn("z1: {}".format(np.shape(z1))) z_diff = U.sum(z1 - z2, axis=1) / cls_L warn("z_diff: {}".format(np.shape(z_diff))) x = U.dense(z_diff, feat_size, 'cls_fc1', U.normc_initializer(1.0)) return x
def reconstruct_fc(caps): with tf.variable_scope("reconstruction"): init = U.normc_initializer(0.1) fc1 = U.dense(caps, 512, name = "fc1", weight_init = init) fc1_act = tf.nn.relu(fc1) fc2 = U.dense(fc1_act, 1024, name = "fc2", weight_init = init) fc2_act = tf.nn.relu(fc2) fc3 = U.dense(fc2_act, 784, name = "fc3", weight_init = init) fc3_act = tf.nn.sigmoid(fc3) return fc3_act
def _init(self, ob_space, ac_space): assert isinstance(ob_space, gym.spaces.Box) self.pdtype = pdtype = make_pdtype(ac_space) sequence_length = None sy_ob = U.get_placeholder(name="sy_ob", dtype=tf.float32, shape=[sequence_length] + list(ob_space.shape)) obscaled = sy_ob / 255.0 with tf.variable_scope("pol"): x = obscaled x = tf.nn.relu(U.conv2d(x, 8, "l1", [8, 8], [4, 4], pad="VALID")) x = tf.nn.relu(U.conv2d(x, 16, "l2", [4, 4], [2, 2], pad="VALID")) x = U.flattenallbut0(x) x = tf.nn.relu(U.dense(x, 128, 'lin', U.normc_initializer(1.0))) logits = U.dense(x, pdtype.param_shape()[0], "logits", U.normc_initializer(0.01)) self.pd = pdtype.pdfromflat(logits) with tf.variable_scope("vf"): x = obscaled x = tf.nn.relu(U.conv2d(x, 8, "l1", [8, 8], [4, 4], pad="VALID")) x = tf.nn.relu(U.conv2d(x, 16, "l2", [4, 4], [2, 2], pad="VALID")) x = U.flattenallbut0(x) x = tf.nn.relu(U.dense(x, 128, 'lin', U.normc_initializer(1.0))) self.vpred = U.dense(x, 1, "value", U.normc_initializer(1.0)) self.vpredz = self.vpred self.state_in = [] self.state_out = [] stochastic = tf.placeholder(dtype=tf.bool, shape=()) sy_ac = self.pd.sample() # XXX self._act = U.function([stochastic, sy_ob], [sy_ac, self.vpred])
def capsule(inputs, units, dim): shape = inputs.get_shape() # [batch, units, dim] units_in = shape[1] dim_in = shape[2] b = tf.get_variable('b', shape=[1, units_in, units, 1, 1], dtype=tf.float32, initializer=tf.zeros_initializer) w = tf.get_variable('w', shape=[1, units_in, units, dim_in, dim], dtype=tf.float32, initializer=U.normc_initializer(0.1)) inputs = tf.expand_dims(tf.expand_dims(inputs, axis=2), axis=2) inputs = tf.tile(inputs, [1, 1, units, 1, 1]) w = tf.tile(w, [tf.shape(inputs)[0], 1, 1, 1, 1]) inputs = tf.matmul(inputs, w) b = tf.tile(b, [tf.shape(inputs)[0], 1, 1, 1, 1]) for i in range(ROUTE_ITER): c = tf.nn.softmax(b, dim=2) outputs = squash(U.sum(c * inputs, axis=1, keepdims=True)) b += U.sum(inputs*outputs, axis=-1, keepdims=True) c = tf.nn.softmax(b, dim=2) outputs = squash(U.sum(c*inputs, axis=1, keepdims=True)) outputs = tf.reshape(outputs, [-1, units, dim]) return outputs
def _make_net(self, o): o_cnn = o[:,:-3] o_self = o[:,-3:] x_cnn = tf.reshape(o_cnn, [-1, 9, 9, 9]) x_cnn = tf.layers.conv2d(x_cnn, 32, 3, data_format='channels_last', name='cnn1', activation=self.nonlin) #x_cnn = tf.layers.batch_normalization(x_cnn, axis=3) x_cnn = tf.layers.conv2d(x_cnn, 32, 3, data_format='channels_last', name='cnn2', activation=self.nonlin) #x_cnn = tf.layers.batch_normalization(x_cnn, axis=3) x_cnn = tf.layers.conv2d(x_cnn, 32, 3, data_format='channels_last', name='cnn3', activation=self.nonlin) #x_cnn = tf.layers.batch_normalization(x_cnn, axis=3) x_cnn = tf.reshape(x_cnn, [-1, 288]) #x_cnn = tf.Print(x_cnn, [tf.shape(x_cnn)], message='x_cnn shape is:') x_cnn = self.nonlin(U.dense(x_cnn, 256, 'ff1', U.normc_initializer(1.0))) x_self = o_self x = tf.concat([x_cnn, x_self], 1) x = self.nonlin(U.dense(x, 256, 'ff2', U.normc_initializer(1.0))) ''' # Process observation if self.connection_type == 'ff': x = o for ilayer, hd in enumerate(self.hidden_dims): x = self.nonlin(U.dense(x, hd, 'l{}'.format(ilayer), U.normc_initializer(1.0))) else: raise NotImplementedError(self.connection_type) ''' # Map to action adim, ahigh, alow = 1, self.ac_space.n - 1, 0 assert isinstance(self.ac_bins, str) ac_bin_mode, ac_bin_arg = self.ac_bins.split(':') if ac_bin_mode == 'uniform': # Uniformly spaced bins, from ac_space.low to ac_space.high num_ac_bins = int(ac_bin_arg) aidx_na = bins(x, adim, num_ac_bins, 'out') # 0 ... num_ac_bins-1 #aidx_na = tf.Print(aidx_na, [aidx_na], message='aidx_na: ') a = tf.nn.softmax(aidx_na) #tf.sigmoid(aidx_na) #ac_range_1a = (ahigh - alow)[None, :] #a = 1. / (num_ac_bins - 1.) * tf.to_float(aidx_na) * ac_range_1a + alow[None, :] elif ac_bin_mode == 'custom': # Custom bins specified as a list of values from -1 to 1 # The bins are rescaled to ac_space.low to ac_space.high acvals_k = np.array(list(map(float, ac_bin_arg.split(','))), dtype=np.float32) logger.info('Custom action values: ' + ' '.join('{:.3f}'.format(x) for x in acvals_k)) assert acvals_k.ndim == 1 and acvals_k[0] == -1 and acvals_k[-1] == 1 acvals_ak = ( (ahigh - alow)[:, None] / (acvals_k[-1] - acvals_k[0]) * (acvals_k - acvals_k[0])[None, :] + alow[:, None] ) aidx_na = bins(x, adim, len(acvals_k), 'out') # values in [0, k-1] a = tf.gather_nd( acvals_ak, tf.concat([ tf.tile(np.arange(adim)[None, :, None], [tf.shape(aidx_na)[0], 1, 1]), tf.expand_dims(aidx_na, -1) ],2) # (n,a,2) ) # (n,a) elif ac_bin_mode == 'continuous': a = U.dense(x, adim, 'out', U.normc_initializer(0.01)) else: raise NotImplementedError(ac_bin_mode) return a
def bins(x, dim, num_bins, name): scores = U.dense(x, dim * num_bins, name, U.normc_initializer(0.01)) #scores = tf.Print(scores, [scores], message='scores: ') scores_nab = tf.reshape(scores, [-1, dim, num_bins]) #scores_nab = tf.Print(scores_nab, [scores_nab], message='scores_nab: ') return scores_nab #tf.argmax(scores_nab, 2) # 0 ... num_bins-1
def bins(x, dim, num_bins, name): scores = U.dense(x, dim * num_bins, name, U.normc_initializer(0.01)) scores_nab = tf.reshape(scores, [-1, dim, num_bins]) return tf.argmax(scores_nab, 2)