コード例 #1
0
ファイル: model.py プロジェクト: KangMingHsi/DeepRL_cs294-112
    def build_model(self):

        with tf.variable_scope('dense') as scope:

            d1 = tf.nn.relu(
                tf_util.dense(name='d1',
                              x=self.state,
                              weight_init=tf_util.normc_initializer(),
                              size=self.net_param['d1']))
            d2 = tf.nn.relu(
                tf_util.dense(name='d2',
                              x=d1,
                              weight_init=tf_util.normc_initializer(),
                              size=self.net_param['d2']))
            d3 = tf.nn.relu(
                tf_util.dense(name='d3',
                              x=d2,
                              weight_init=tf_util.normc_initializer(),
                              size=self.net_param['d3']))

            self.action = tf.tanh(
                tf_util.dense(name='out',
                              x=d3,
                              weight_init=tf_util.normc_initializer(),
                              size=self.action_size))
コード例 #2
0
    def _init(self,
              ob_space,
              ac_space,
              hid_size,
              num_hid_layers,
              gaussian_fixed_var=True):
        assert isinstance(ob_space, gym.spaces.Box)

        self.pdtype = pdtype = make_pdtype(ac_space)
        sequence_length = None

        ob = U.get_placeholder(name="ob",
                               dtype=tf.float32,
                               shape=[sequence_length] + list(ob_space.shape))
        #obz = ob

        #with tf.variable_scope("obfilter"):
        #    self.ob_rms = RunningMeanStd(shape=ob_space.shape)

        #obz = tf.clip_by_value((ob - self.ob_rms.mean) / self.ob_rms.std, -5.0, 5.0)
        last_out = ob
        for i in range(num_hid_layers):
            last_out = tf.nn.tanh(
                U.dense(last_out,
                        hid_size,
                        "vffc%i" % (i + 1),
                        weight_init=U.normc_initializer(1.0)))
        self.vpred = U.dense(last_out,
                             1,
                             "vffinal",
                             weight_init=U.normc_initializer(1.0))[:, 0]

        last_out = ob
        for i in range(num_hid_layers):
            last_out = tf.nn.tanh(
                U.dense(last_out,
                        hid_size,
                        "polfc%i" % (i + 1),
                        weight_init=U.normc_initializer(1.0)))
        if gaussian_fixed_var and isinstance(ac_space, gym.spaces.Box):
            mean = U.dense(last_out,
                           pdtype.param_shape()[0] // 2, "polfinal",
                           U.normc_initializer(0.01))
            logstd = tf.get_variable(name="logstd",
                                     shape=[1, pdtype.param_shape()[0] // 2],
                                     initializer=tf.zeros_initializer)
            pdparam = U.concatenate([mean, mean * 0.0 + logstd], axis=1)
        else:
            pdparam = U.dense(last_out,
                              pdtype.param_shape()[0], "polfinal",
                              U.normc_initializer(0.01))

        self.pd = pdtype.pdfromflat(pdparam)

        self.state_in = []
        self.state_out = []

        stochastic = tf.placeholder(dtype=tf.bool, shape=())
        ac = U.switch(stochastic, self.pd.sample(), self.pd.mode())
        self._act = U.function([stochastic, ob], [ac, self.vpred])
コード例 #3
0
	def encoder_net(self, img, latent_dim):
		x = img
		x = tf.nn.relu(U.dense(x, 1200, 'l1', U.normc_initializer(1.0)))
		x = tf.nn.relu(U.dense(x, 1200, 'l2', U.normc_initializer(1.0)))
		mu = U.dense(x, latent_dim, 'l3_1', U.normc_initializer(1.0)) # 32
		logvar = U.dense(x, latent_dim, 'l3_2', U.normc_initializer(1.0)) # 32
		return mu, logvar		
コード例 #4
0
def deconv_net(scope, latent_variable):
    with tf.variable_scope(scope):
        x = latent_variable
        x = U.dense(x, 2048, 'l3', U.normc_initializer(1.0))
        x = tf.nn.relu(U.dense(x, 128 * 8 * 11, 'l4',
                               U.normc_initializer(1.0)))
        x = tf.reshape(x, [tf.shape(x)[0], 8, 11, 128])  # Unflatten
        x = tf.nn.relu(
            U.conv2d_transpose(x, [4, 4, 128, 128],
                               [tf.shape(x)[0], 19, 25, 128],
                               "uc1", [4, 4], [2, 2],
                               pad="VALID"))
        x = tf.nn.relu(
            U.conv2d_transpose(x, [6, 6, 128, 128],
                               [tf.shape(x)[0], 38, 50, 128],
                               "uc2", [6, 6], [2, 2],
                               pad="SAME"))
        x = tf.nn.relu(
            U.conv2d_transpose(x, [6, 6, 128, 128],
                               [tf.shape(x)[0], 80, 105, 128],
                               "uc3", [6, 6], [2, 2],
                               pad="VALID"))
        x = U.conv2d_transpose(x, [8, 8, 1, 128],
                               [tf.shape(x)[0], 160, 210, 1],
                               "uc4", [8, 8], [2, 2],
                               pad="SAME")
        return x
コード例 #5
0
	def decoder_net(self, latent_variable):
		x = latent_variable
		x = tf.nn.tanh(U.dense(x, 1200, 'l4', U.normc_initializer(1.0)))
		x = tf.nn.tanh(U.dense(x, 1200, 'l5', U.normc_initializer(1.0)))
		x = tf.nn.tanh(U.dense(x, 1200, 'l6', U.normc_initializer(1.0)))
		x_logit = U.dense(x, 4096, 'l7', U.normc_initializer(1.0))
		x_mean = tf.nn.sigmoid(x_logit)

		return x_logit, x_mean
コード例 #6
0
    def _make_net(self, o):
        # Process observation.
        if self.connection_type == 'ff':
            x = o
            for ilayer, hd in enumerate(self.hidden_dims):
                x = self.nonlin(
                    U.dense(x, hd, 'l{}'.format(ilayer),
                            U.normc_initializer(1.0)))
        else:
            raise NotImplementedError(self.connection_type)

        # Map to action.
        adim = self.ac_space.shape[0]
        ahigh = self.ac_space.high
        alow = self.ac_space.low
        assert isinstance(self.ac_bins, str)
        ac_bin_mode, ac_bin_arg = self.ac_bins.split(':')

        if ac_bin_mode == 'uniform':
            # Uniformly spaced bins, from ac_space.low to ac_space.high.
            num_ac_bins = int(ac_bin_arg)
            aidx_na = bins(x, adim, num_ac_bins, 'out')
            ac_range_1a = (ahigh - alow)[None, :]
            a = (1. / (num_ac_bins - 1.) * tf.to_float(aidx_na) * ac_range_1a +
                 alow[None, :])

        elif ac_bin_mode == 'custom':
            # Custom bins specified as a list of values from -1 to 1.
            # The bins are rescaled to ac_space.low to ac_space.high.
            acvals_k = np.array(list(map(float, ac_bin_arg.split(','))),
                                dtype=np.float32)
            logger.info('Custom action values: ' + ' '.join('{:.3f}'.format(x)
                                                            for x in acvals_k))
            assert acvals_k.ndim == 1 and acvals_k[0] == -1 and acvals_k[
                -1] == 1
            acvals_ak = ((ahigh - alow)[:, None] /
                         (acvals_k[-1] - acvals_k[0]) *
                         (acvals_k - acvals_k[0])[None, :] + alow[:, None])

            aidx_na = bins(x, adim, len(acvals_k),
                           'out')  # Values in [0, k-1].
            a = tf.gather_nd(
                acvals_ak,
                tf.concat([
                    tf.tile(
                        np.arange(adim)[None, :, None],
                        [tf.shape(aidx_na)[0], 1, 1]), 2,
                    tf.expand_dims(aidx_na, -1)
                ])  # (n, a, 2)
            )  # (n, a)
        elif ac_bin_mode == 'continuous':
            a = U.dense(x, adim, 'out', U.normc_initializer(0.01))
        else:
            raise NotImplementedError(ac_bin_mode)

        return a
コード例 #7
0
	def decoder_net(self, latent_variable):
		x = latent_variable
		x = U.dense(x, 256, 'l2', U.normc_initializer(1.0))
		x = tf.nn.relu(U.dense(x, 1024, 'l3', U.normc_initializer(1.0)))
		x = tf.reshape(x, [tf.shape(x)[0], 4,4,64]) # Unflatten [4, 4, 64]
		x = tf.nn.relu(U.conv2d_transpose(x, [4,4,64,64], [tf.shape(x)[0], 8,8,64], "uc1", [2, 2], pad="SAME")) # [8, 8, 64]
		x = tf.nn.relu(U.conv2d_transpose(x, [4,4,32,64], [tf.shape(x)[0], 16,16,32], "uc2", [2, 2], pad="SAME")) # [16, 16, 32]
		x = tf.nn.relu(U.conv2d_transpose(x, [4,4,32,32], [tf.shape(x)[0], 32,32,32], "uc3", [2, 2], pad="SAME")) # [32, 32, 32]
		x = U.conv2d_transpose(x, [4,4,3,32], [tf.shape(x)[0], 64,64,3], "uc4", [2, 2], pad="SAME") # [64, 64, 1]
		return x
コード例 #8
0
	def encoder_net(self, img, latent_dim):
		x = img
		x = tf.nn.relu(U.conv2d(x, 32, "c1", [4, 4], [2, 2], pad = "SAME")) # [32, 32, 32]
		x = tf.nn.relu(U.conv2d(x, 32, "c2", [4, 4], [2, 2], pad = "SAME")) # [16, 16, 32]
		x = tf.nn.relu(U.conv2d(x, 64, "c3", [4, 4], [2, 2], pad = "SAME")) # [8, 8, 64]
		x = tf.nn.relu(U.conv2d(x, 64, "c4", [4, 4], [2, 2], pad = "SAME")) # [4, 4, 64]
		x = U.flattenallbut0(x) # [1024]
		x = tf.nn.relu(U.dense(x, 256, 'l1', U.normc_initializer(1.0))) # 1024
		mu = U.dense(x, latent_dim, 'l1_1', U.normc_initializer(1.0)) # 32
		logvar = U.dense(x, latent_dim, 'l1_2', U.normc_initializer(1.0)) # 32
		return mu, logvar
コード例 #9
0
def proj_net(scope, img, latent_dim):
    with tf.variable_scope(scope):
        x = img
        x = tf.nn.relu(U.conv2d(x, 64, "c1", [8, 8], [2, 2], pad="SAME"))
        x = tf.nn.relu(U.conv2d(x, 128, "c2", [6, 6], [2, 2], pad="SAME"))
        x = tf.nn.relu(U.conv2d(x, 128, "c3", [6, 6], [2, 2], pad="SAME"))
        x = tf.nn.relu(U.conv2d(x, 128, "c4", [4, 4], [2, 2], pad="SAME"))
        x = U.flattenallbut0(x)
        x = tf.nn.relu(U.dense(x, 2048, 'l1', U.normc_initializer(1.0)))
        x = U.dense(x, latent_dim, 'l2', U.normc_initializer(1.0))
        return x
コード例 #10
0
ファイル: capsule_net.py プロジェクト: Squadrick/caps-net
def capsule(inputs, units, dim):
    shape = inputs.get_shape()  # [batch, units, dim]
    units_in = shape[1]
    dim_in = shape[2]
    b = tf.get_variable('b',
                        shape=[1, units_in, units, 1, 1],
                        dtype=tf.float32,
                        initializer=tf.zeros_initializer)
    w = tf.get_variable('w',
                        shape=[1, units_in, units, dim_in, dim],
                        dtype=tf.float32,
                        initializer=U.normc_initializer(0.1))
    inputs = tf.expand_dims(tf.expand_dims(inputs, axis=2), axis=2)
    inputs = tf.tile(inputs, [1, 1, units, 1, 1])
    w = tf.tile(w, [tf.shape(inputs)[0], 1, 1, 1, 1])
    inputs = tf.matmul(inputs, w)

    b = tf.tile(b, [tf.shape(inputs)[0], 1, 1, 1, 1])
    for i in range(ROUTE_ITER):
        c = tf.nn.softmax(b, dim=2)
        outputs = squash(U.sum(c * inputs, axis=1, keepdims=True))
        b += U.sum(inputs * outputs, axis=-1, keepdims=True)

    c = tf.nn.softmax(b, dim=2)
    outputs = squash(U.sum(c * inputs, axis=1, keepdims=True))
    outputs = tf.reshape(outputs, [-1, units, dim])
    return outputs
コード例 #11
0
ファイル: capsule_net.py プロジェクト: Squadrick/caps-net
def reconstruct_fc(caps):
    with tf.variable_scope("reconstruction"):
        init = U.normc_initializer(0.1)
        fc1 = U.dense(caps, 512, name="fc1", weight_init=init)
        fc1_act = tf.nn.relu(fc1)
        fc2 = U.dense(fc1_act, 1024, name="fc2", weight_init=init)
        fc2_act = tf.nn.relu(fc2)
        fc3 = U.dense(fc2_act, 784, name="fc3", weight_init=init)
        fc3_act = tf.nn.sigmoid(fc3)
        return fc3_act
コード例 #12
0
 def classifier_net(self, z1, z2, feat_size, latent_dim, cls_L,
                    cls_batch_per_gpu):
     with tf.variable_scope("classifier") as scope:
         z1 = tf.reshape(z1, (cls_batch_per_gpu, -1, latent_dim))
         z2 = tf.reshape(z2, (cls_batch_per_gpu, -1, latent_dim))
         warn("z1: {}".format(np.shape(z1)))
         z_diff = U.sum(z1 - z2, axis=1) / cls_L
         warn("z_diff: {}".format(np.shape(z_diff)))
         x = U.dense(z_diff, feat_size, 'cls_fc1', U.normc_initializer(1.0))
     return x
コード例 #13
0
ファイル: capsule_net.py プロジェクト: Squadrick/caps-net
def reconstruct_fc(caps):
    with tf.variable_scope("reconstruction"):
        init = U.normc_initializer(0.1)
        fc1 = U.dense(caps, 512, name = "fc1", weight_init = init)
        fc1_act = tf.nn.relu(fc1)
        fc2 = U.dense(fc1_act, 1024, name = "fc2", weight_init = init)
        fc2_act = tf.nn.relu(fc2)
        fc3 = U.dense(fc2_act, 784, name = "fc3", weight_init = init)
        fc3_act = tf.nn.sigmoid(fc3)
        return fc3_act
コード例 #14
0
    def _init(self, ob_space, ac_space):
        assert isinstance(ob_space, gym.spaces.Box)

        self.pdtype = pdtype = make_pdtype(ac_space)
        sequence_length = None

        sy_ob = U.get_placeholder(name="sy_ob",
                                  dtype=tf.float32,
                                  shape=[sequence_length] +
                                  list(ob_space.shape))

        obscaled = sy_ob / 255.0

        with tf.variable_scope("pol"):
            x = obscaled
            x = tf.nn.relu(U.conv2d(x, 8, "l1", [8, 8], [4, 4], pad="VALID"))
            x = tf.nn.relu(U.conv2d(x, 16, "l2", [4, 4], [2, 2], pad="VALID"))
            x = U.flattenallbut0(x)
            x = tf.nn.relu(U.dense(x, 128, 'lin', U.normc_initializer(1.0)))
            logits = U.dense(x,
                             pdtype.param_shape()[0], "logits",
                             U.normc_initializer(0.01))
            self.pd = pdtype.pdfromflat(logits)
        with tf.variable_scope("vf"):
            x = obscaled
            x = tf.nn.relu(U.conv2d(x, 8, "l1", [8, 8], [4, 4], pad="VALID"))
            x = tf.nn.relu(U.conv2d(x, 16, "l2", [4, 4], [2, 2], pad="VALID"))
            x = U.flattenallbut0(x)
            x = tf.nn.relu(U.dense(x, 128, 'lin', U.normc_initializer(1.0)))
            self.vpred = U.dense(x, 1, "value", U.normc_initializer(1.0))
            self.vpredz = self.vpred

        self.state_in = []
        self.state_out = []

        stochastic = tf.placeholder(dtype=tf.bool, shape=())
        sy_ac = self.pd.sample()  # XXX
        self._act = U.function([stochastic, sy_ob], [sy_ac, self.vpred])
コード例 #15
0
ファイル: capsule_net.py プロジェクト: Squadrick/caps-net
def capsule(inputs, units, dim):
    shape = inputs.get_shape() # [batch, units, dim]
    units_in = shape[1]
    dim_in = shape[2]
    b = tf.get_variable('b', shape=[1, units_in, units, 1, 1],
            dtype=tf.float32, initializer=tf.zeros_initializer)
    w = tf.get_variable('w', shape=[1, units_in, units, dim_in, dim],
            dtype=tf.float32, initializer=U.normc_initializer(0.1))
    inputs = tf.expand_dims(tf.expand_dims(inputs, axis=2), axis=2)
    inputs = tf.tile(inputs, [1, 1, units, 1, 1])
    w = tf.tile(w, [tf.shape(inputs)[0], 1, 1, 1, 1])
    inputs = tf.matmul(inputs, w)
    
    b = tf.tile(b, [tf.shape(inputs)[0], 1, 1, 1, 1])
    for i in range(ROUTE_ITER):
        c = tf.nn.softmax(b, dim=2)
        outputs = squash(U.sum(c * inputs, axis=1, keepdims=True))
        b += U.sum(inputs*outputs, axis=-1, keepdims=True)

    c = tf.nn.softmax(b, dim=2)
    outputs = squash(U.sum(c*inputs, axis=1, keepdims=True))
    outputs = tf.reshape(outputs, [-1, units, dim])
    return outputs
コード例 #16
0
    def _make_net(self, o):
        o_cnn = o[:,:-3]
        o_self = o[:,-3:]
        x_cnn = tf.reshape(o_cnn, [-1, 9, 9, 9])
        x_cnn = tf.layers.conv2d(x_cnn, 32, 3, data_format='channels_last', name='cnn1', activation=self.nonlin)
        #x_cnn = tf.layers.batch_normalization(x_cnn, axis=3)
        x_cnn = tf.layers.conv2d(x_cnn, 32, 3, data_format='channels_last', name='cnn2', activation=self.nonlin)
        #x_cnn = tf.layers.batch_normalization(x_cnn, axis=3)
        x_cnn = tf.layers.conv2d(x_cnn, 32, 3, data_format='channels_last', name='cnn3', activation=self.nonlin)
        #x_cnn = tf.layers.batch_normalization(x_cnn, axis=3)
        x_cnn = tf.reshape(x_cnn, [-1, 288])
        #x_cnn = tf.Print(x_cnn, [tf.shape(x_cnn)], message='x_cnn shape is:')
        x_cnn = self.nonlin(U.dense(x_cnn, 256, 'ff1', U.normc_initializer(1.0)))

        x_self = o_self
        x = tf.concat([x_cnn, x_self], 1)
        x = self.nonlin(U.dense(x, 256, 'ff2', U.normc_initializer(1.0)))

        '''
        # Process observation
        if self.connection_type == 'ff':
            x = o
            for ilayer, hd in enumerate(self.hidden_dims):
                x = self.nonlin(U.dense(x, hd, 'l{}'.format(ilayer), U.normc_initializer(1.0)))
        else:
            raise NotImplementedError(self.connection_type)
        '''
        # Map to action
        adim, ahigh, alow = 1, self.ac_space.n - 1, 0
        assert isinstance(self.ac_bins, str)
        ac_bin_mode, ac_bin_arg = self.ac_bins.split(':')

        if ac_bin_mode == 'uniform':
            # Uniformly spaced bins, from ac_space.low to ac_space.high
            num_ac_bins = int(ac_bin_arg)
            aidx_na = bins(x, adim, num_ac_bins, 'out')  # 0 ... num_ac_bins-1
            #aidx_na = tf.Print(aidx_na, [aidx_na], message='aidx_na: ')
            a = tf.nn.softmax(aidx_na) #tf.sigmoid(aidx_na)
            #ac_range_1a = (ahigh - alow)[None, :]
            #a = 1. / (num_ac_bins - 1.) * tf.to_float(aidx_na) * ac_range_1a + alow[None, :]

        elif ac_bin_mode == 'custom':
            # Custom bins specified as a list of values from -1 to 1
            # The bins are rescaled to ac_space.low to ac_space.high
            acvals_k = np.array(list(map(float, ac_bin_arg.split(','))), dtype=np.float32)
            logger.info('Custom action values: ' + ' '.join('{:.3f}'.format(x) for x in acvals_k))
            assert acvals_k.ndim == 1 and acvals_k[0] == -1 and acvals_k[-1] == 1
            acvals_ak = (
                (ahigh - alow)[:, None] / (acvals_k[-1] - acvals_k[0]) * (acvals_k - acvals_k[0])[None, :]
                + alow[:, None]
            )

            aidx_na = bins(x, adim, len(acvals_k), 'out')  # values in [0, k-1]
            a = tf.gather_nd(
                acvals_ak,
                tf.concat([
                    tf.tile(np.arange(adim)[None, :, None], [tf.shape(aidx_na)[0], 1, 1]),
                    tf.expand_dims(aidx_na, -1)
                ],2)  # (n,a,2)
            )  # (n,a)
        elif ac_bin_mode == 'continuous':
            a = U.dense(x, adim, 'out', U.normc_initializer(0.01))
        else:
            raise NotImplementedError(ac_bin_mode)

        return a
コード例 #17
0
def bins(x, dim, num_bins, name):
    scores = U.dense(x, dim * num_bins, name, U.normc_initializer(0.01))
    #scores = tf.Print(scores, [scores], message='scores: ')
    scores_nab = tf.reshape(scores, [-1, dim, num_bins])
    #scores_nab = tf.Print(scores_nab, [scores_nab], message='scores_nab: ')
    return scores_nab #tf.argmax(scores_nab, 2)  # 0 ... num_bins-1
コード例 #18
0
def bins(x, dim, num_bins, name):
    scores = U.dense(x, dim * num_bins, name, U.normc_initializer(0.01))
    scores_nab = tf.reshape(scores, [-1, dim, num_bins])
    return tf.argmax(scores_nab, 2)