Ejemplo n.º 1
0
    def build_graph(self):
        self.observation = tf.placeholder(
            tf.float32, [None] + list(self.env.observation_space.shape),
            name='inputs')
        # out = self.observation

        out = U.dense(self.observation,
                      10,
                      'layer1',
                      weight_init=tf.contrib.layers.xavier_initializer(),
                      bias=True,
                      activation=tf.nn.tanh,
                      summary=self.summary)
        out = U.dense(out,
                      10,
                      'layer2',
                      weight_init=tf.contrib.layers.xavier_initializer(),
                      bias=True,
                      activation=tf.nn.tanh,
                      summary=self.summary)
        activation = tf.nn.tanh if self.env.continuous else None

        self.actions = U.dense(
            out,
            self.num_actions,
            'output',
            weight_init=tf.contrib.layers.xavier_initializer(),
            bias=True,
            activation=activation,
            summary=self.summary)
Ejemplo n.º 2
0
def deconv_net(scope, latent_variable):
    with tf.variable_scope(scope):
        x = latent_variable
        x = U.dense(x, 2048, 'l3', U.normc_initializer(1.0))
        x = tf.nn.relu(U.dense(x, 128 * 8 * 11, 'l4',
                               U.normc_initializer(1.0)))
        x = tf.reshape(x, [tf.shape(x)[0], 8, 11, 128])  # Unflatten
        x = tf.nn.relu(
            U.conv2d_transpose(x, [4, 4, 128, 128],
                               [tf.shape(x)[0], 19, 25, 128],
                               "uc1", [4, 4], [2, 2],
                               pad="VALID"))
        x = tf.nn.relu(
            U.conv2d_transpose(x, [6, 6, 128, 128],
                               [tf.shape(x)[0], 38, 50, 128],
                               "uc2", [6, 6], [2, 2],
                               pad="SAME"))
        x = tf.nn.relu(
            U.conv2d_transpose(x, [6, 6, 128, 128],
                               [tf.shape(x)[0], 80, 105, 128],
                               "uc3", [6, 6], [2, 2],
                               pad="VALID"))
        x = U.conv2d_transpose(x, [8, 8, 1, 128],
                               [tf.shape(x)[0], 160, 210, 1],
                               "uc4", [8, 8], [2, 2],
                               pad="SAME")
        return x
Ejemplo n.º 3
0
	def build(self, image, mos_score):
		net = tf.reshape(image, [-1, 32, 32, 3])
		net = self.block(net, 32)
		net = self.block(net, 64)
		net = self.block(net, 128)
		net = self.block(net, 256)
		net = self.block(net, 512)

		net1 = tf.reshape(net, (-1, 512))
		net1 = U.dense(net1, 512, 'fc1')
		net1 = U.swish(net1)
		net1 = tf.nn.dropout(net1, keep_prob = self.prob)
		net1 = U.dense(net1, 1, 'fc2')

		net2 = tf.reshape(net, (-1, 512))
		net2 = U.dense(net2, 512, 'fc1_weight')
		net2 = U.swish(net2)
		net2 = tf.nn.dropout(net2, keep_prob = self.prob)
		net2 = U.dense(net2, 1, 'fc2_weight')
		net2 = tf.nn.relu(net2) + 1e-6

		self.loss_op = self.weighted_loss(net1, net2, mos_score)

		optimizer = tf.train.AdamOptimizer(self.lr)
		self.train_op = optimizer.minimize(self.loss_op)
Ejemplo n.º 4
0
    def build_model(self):

        with tf.variable_scope('dense') as scope:

            d1 = tf.nn.relu(
                tf_util.dense(name='d1',
                              x=self.state,
                              weight_init=tf_util.normc_initializer(),
                              size=self.net_param['d1']))
            d2 = tf.nn.relu(
                tf_util.dense(name='d2',
                              x=d1,
                              weight_init=tf_util.normc_initializer(),
                              size=self.net_param['d2']))
            d3 = tf.nn.relu(
                tf_util.dense(name='d3',
                              x=d2,
                              weight_init=tf_util.normc_initializer(),
                              size=self.net_param['d3']))

            self.action = tf.tanh(
                tf_util.dense(name='out',
                              x=d3,
                              weight_init=tf_util.normc_initializer(),
                              size=self.action_size))
Ejemplo n.º 5
0
    def _init(self,
              ob_space,
              ac_space,
              hid_size,
              num_hid_layers,
              gaussian_fixed_var=True):
        assert isinstance(ob_space, gym.spaces.Box)

        self.pdtype = pdtype = make_pdtype(ac_space)
        sequence_length = None

        ob = U.get_placeholder(name="ob",
                               dtype=tf.float32,
                               shape=[sequence_length] + list(ob_space.shape))
        #obz = ob

        #with tf.variable_scope("obfilter"):
        #    self.ob_rms = RunningMeanStd(shape=ob_space.shape)

        #obz = tf.clip_by_value((ob - self.ob_rms.mean) / self.ob_rms.std, -5.0, 5.0)
        last_out = ob
        for i in range(num_hid_layers):
            last_out = tf.nn.tanh(
                U.dense(last_out,
                        hid_size,
                        "vffc%i" % (i + 1),
                        weight_init=U.normc_initializer(1.0)))
        self.vpred = U.dense(last_out,
                             1,
                             "vffinal",
                             weight_init=U.normc_initializer(1.0))[:, 0]

        last_out = ob
        for i in range(num_hid_layers):
            last_out = tf.nn.tanh(
                U.dense(last_out,
                        hid_size,
                        "polfc%i" % (i + 1),
                        weight_init=U.normc_initializer(1.0)))
        if gaussian_fixed_var and isinstance(ac_space, gym.spaces.Box):
            mean = U.dense(last_out,
                           pdtype.param_shape()[0] // 2, "polfinal",
                           U.normc_initializer(0.01))
            logstd = tf.get_variable(name="logstd",
                                     shape=[1, pdtype.param_shape()[0] // 2],
                                     initializer=tf.zeros_initializer)
            pdparam = U.concatenate([mean, mean * 0.0 + logstd], axis=1)
        else:
            pdparam = U.dense(last_out,
                              pdtype.param_shape()[0], "polfinal",
                              U.normc_initializer(0.01))

        self.pd = pdtype.pdfromflat(pdparam)

        self.state_in = []
        self.state_out = []

        stochastic = tf.placeholder(dtype=tf.bool, shape=())
        ac = U.switch(stochastic, self.pd.sample(), self.pd.mode())
        self._act = U.function([stochastic, ob], [ac, self.vpred])
Ejemplo n.º 6
0
	def encoder_net(self, img, latent_dim):
		x = img
		x = tf.nn.relu(U.dense(x, 1200, 'l1', U.normc_initializer(1.0)))
		x = tf.nn.relu(U.dense(x, 1200, 'l2', U.normc_initializer(1.0)))
		mu = U.dense(x, latent_dim, 'l3_1', U.normc_initializer(1.0)) # 32
		logvar = U.dense(x, latent_dim, 'l3_2', U.normc_initializer(1.0)) # 32
		return mu, logvar		
Ejemplo n.º 7
0
	def decoder_net(self, latent_variable):
		x = latent_variable
		x = tf.nn.tanh(U.dense(x, 1200, 'l4', U.normc_initializer(1.0)))
		x = tf.nn.tanh(U.dense(x, 1200, 'l5', U.normc_initializer(1.0)))
		x = tf.nn.tanh(U.dense(x, 1200, 'l6', U.normc_initializer(1.0)))
		x_logit = U.dense(x, 4096, 'l7', U.normc_initializer(1.0))
		x_mean = tf.nn.sigmoid(x_logit)

		return x_logit, x_mean
Ejemplo n.º 8
0
    def _make_net(self, o):
        # Process observation.
        if self.connection_type == 'ff':
            x = o
            for ilayer, hd in enumerate(self.hidden_dims):
                x = self.nonlin(
                    U.dense(x, hd, 'l{}'.format(ilayer),
                            U.normc_initializer(1.0)))
        else:
            raise NotImplementedError(self.connection_type)

        # Map to action.
        adim = self.ac_space.shape[0]
        ahigh = self.ac_space.high
        alow = self.ac_space.low
        assert isinstance(self.ac_bins, str)
        ac_bin_mode, ac_bin_arg = self.ac_bins.split(':')

        if ac_bin_mode == 'uniform':
            # Uniformly spaced bins, from ac_space.low to ac_space.high.
            num_ac_bins = int(ac_bin_arg)
            aidx_na = bins(x, adim, num_ac_bins, 'out')
            ac_range_1a = (ahigh - alow)[None, :]
            a = (1. / (num_ac_bins - 1.) * tf.to_float(aidx_na) * ac_range_1a +
                 alow[None, :])

        elif ac_bin_mode == 'custom':
            # Custom bins specified as a list of values from -1 to 1.
            # The bins are rescaled to ac_space.low to ac_space.high.
            acvals_k = np.array(list(map(float, ac_bin_arg.split(','))),
                                dtype=np.float32)
            logger.info('Custom action values: ' + ' '.join('{:.3f}'.format(x)
                                                            for x in acvals_k))
            assert acvals_k.ndim == 1 and acvals_k[0] == -1 and acvals_k[
                -1] == 1
            acvals_ak = ((ahigh - alow)[:, None] /
                         (acvals_k[-1] - acvals_k[0]) *
                         (acvals_k - acvals_k[0])[None, :] + alow[:, None])

            aidx_na = bins(x, adim, len(acvals_k),
                           'out')  # Values in [0, k-1].
            a = tf.gather_nd(
                acvals_ak,
                tf.concat([
                    tf.tile(
                        np.arange(adim)[None, :, None],
                        [tf.shape(aidx_na)[0], 1, 1]), 2,
                    tf.expand_dims(aidx_na, -1)
                ])  # (n, a, 2)
            )  # (n, a)
        elif ac_bin_mode == 'continuous':
            a = U.dense(x, adim, 'out', U.normc_initializer(0.01))
        else:
            raise NotImplementedError(ac_bin_mode)

        return a
Ejemplo n.º 9
0
	def decoder_net(self, latent_variable):
		x = latent_variable
		x = U.dense(x, 256, 'l2', U.normc_initializer(1.0))
		x = tf.nn.relu(U.dense(x, 1024, 'l3', U.normc_initializer(1.0)))
		x = tf.reshape(x, [tf.shape(x)[0], 4,4,64]) # Unflatten [4, 4, 64]
		x = tf.nn.relu(U.conv2d_transpose(x, [4,4,64,64], [tf.shape(x)[0], 8,8,64], "uc1", [2, 2], pad="SAME")) # [8, 8, 64]
		x = tf.nn.relu(U.conv2d_transpose(x, [4,4,32,64], [tf.shape(x)[0], 16,16,32], "uc2", [2, 2], pad="SAME")) # [16, 16, 32]
		x = tf.nn.relu(U.conv2d_transpose(x, [4,4,32,32], [tf.shape(x)[0], 32,32,32], "uc3", [2, 2], pad="SAME")) # [32, 32, 32]
		x = U.conv2d_transpose(x, [4,4,3,32], [tf.shape(x)[0], 64,64,3], "uc4", [2, 2], pad="SAME") # [64, 64, 1]
		return x
Ejemplo n.º 10
0
def reconstruct_fc(caps):
    with tf.variable_scope("reconstruction"):
        init = U.normc_initializer(0.1)
        fc1 = U.dense(caps, 512, name = "fc1", weight_init = init)
        fc1_act = tf.nn.relu(fc1)
        fc2 = U.dense(fc1_act, 1024, name = "fc2", weight_init = init)
        fc2_act = tf.nn.relu(fc2)
        fc3 = U.dense(fc2_act, 784, name = "fc3", weight_init = init)
        fc3_act = tf.nn.sigmoid(fc3)
        return fc3_act
Ejemplo n.º 11
0
def reconstruct_fc(caps):
    with tf.variable_scope("reconstruction"):
        init = U.normc_initializer(0.1)
        fc1 = U.dense(caps, 512, name="fc1", weight_init=init)
        fc1_act = tf.nn.relu(fc1)
        fc2 = U.dense(fc1_act, 1024, name="fc2", weight_init=init)
        fc2_act = tf.nn.relu(fc2)
        fc3 = U.dense(fc2_act, 784, name="fc3", weight_init=init)
        fc3_act = tf.nn.sigmoid(fc3)
        return fc3_act
Ejemplo n.º 12
0
def proj_net(scope, img, latent_dim):
    with tf.variable_scope(scope):
        x = img
        x = tf.nn.relu(U.conv2d(x, 64, "c1", [8, 8], [2, 2], pad="SAME"))
        x = tf.nn.relu(U.conv2d(x, 128, "c2", [6, 6], [2, 2], pad="SAME"))
        x = tf.nn.relu(U.conv2d(x, 128, "c3", [6, 6], [2, 2], pad="SAME"))
        x = tf.nn.relu(U.conv2d(x, 128, "c4", [4, 4], [2, 2], pad="SAME"))
        x = U.flattenallbut0(x)
        x = tf.nn.relu(U.dense(x, 2048, 'l1', U.normc_initializer(1.0)))
        x = U.dense(x, latent_dim, 'l2', U.normc_initializer(1.0))
        return x
Ejemplo n.º 13
0
	def encoder_net(self, img, latent_dim):
		x = img
		x = tf.nn.relu(U.conv2d(x, 32, "c1", [4, 4], [2, 2], pad = "SAME")) # [32, 32, 32]
		x = tf.nn.relu(U.conv2d(x, 32, "c2", [4, 4], [2, 2], pad = "SAME")) # [16, 16, 32]
		x = tf.nn.relu(U.conv2d(x, 64, "c3", [4, 4], [2, 2], pad = "SAME")) # [8, 8, 64]
		x = tf.nn.relu(U.conv2d(x, 64, "c4", [4, 4], [2, 2], pad = "SAME")) # [4, 4, 64]
		x = U.flattenallbut0(x) # [1024]
		x = tf.nn.relu(U.dense(x, 256, 'l1', U.normc_initializer(1.0))) # 1024
		mu = U.dense(x, latent_dim, 'l1_1', U.normc_initializer(1.0)) # 32
		logvar = U.dense(x, latent_dim, 'l1_2', U.normc_initializer(1.0)) # 32
		return mu, logvar
Ejemplo n.º 14
0
 def _build_graph(self):
     self.output = self.input
     if self.train_config.hidden_sizes:
         for i, hidden_size in enumerate(self.train_config.hidden_sizes):
             self.output = tf_util.dense(self.output, hidden_size,
                                         "hidden_{}".format(i))
             self.output = tf.nn.relu(self.output)
             if self.train_config.dropout_rate > 0:
                 self.output = tf_util.dropout(
                     self.output, 1.0 - self.train_config.dropout_rate,
                     self.is_training_phase)
     self.output = tf_util.dense(self.output, self.output_size,
                                 "last_layer")
Ejemplo n.º 15
0
 def classifier_net(self, z1, z2, feat_size, latent_dim, cls_L,
                    cls_batch_per_gpu):
     with tf.variable_scope("classifier") as scope:
         z1 = tf.reshape(z1, (cls_batch_per_gpu, -1, latent_dim))
         z2 = tf.reshape(z2, (cls_batch_per_gpu, -1, latent_dim))
         warn("z1: {}".format(np.shape(z1)))
         z_diff = U.sum(z1 - z2, axis=1) / cls_L
         warn("z_diff: {}".format(np.shape(z_diff)))
         x = U.dense(z_diff, feat_size, 'cls_fc1', U.normc_initializer(1.0))
     return x
Ejemplo n.º 16
0
    def _init(self, ob_space, ac_space):
        assert isinstance(ob_space, gym.spaces.Box)

        self.pdtype = pdtype = make_pdtype(ac_space)
        sequence_length = None

        sy_ob = U.get_placeholder(name="sy_ob",
                                  dtype=tf.float32,
                                  shape=[sequence_length] +
                                  list(ob_space.shape))

        obscaled = sy_ob / 255.0

        with tf.variable_scope("pol"):
            x = obscaled
            x = tf.nn.relu(U.conv2d(x, 8, "l1", [8, 8], [4, 4], pad="VALID"))
            x = tf.nn.relu(U.conv2d(x, 16, "l2", [4, 4], [2, 2], pad="VALID"))
            x = U.flattenallbut0(x)
            x = tf.nn.relu(U.dense(x, 128, 'lin', U.normc_initializer(1.0)))
            logits = U.dense(x,
                             pdtype.param_shape()[0], "logits",
                             U.normc_initializer(0.01))
            self.pd = pdtype.pdfromflat(logits)
        with tf.variable_scope("vf"):
            x = obscaled
            x = tf.nn.relu(U.conv2d(x, 8, "l1", [8, 8], [4, 4], pad="VALID"))
            x = tf.nn.relu(U.conv2d(x, 16, "l2", [4, 4], [2, 2], pad="VALID"))
            x = U.flattenallbut0(x)
            x = tf.nn.relu(U.dense(x, 128, 'lin', U.normc_initializer(1.0)))
            self.vpred = U.dense(x, 1, "value", U.normc_initializer(1.0))
            self.vpredz = self.vpred

        self.state_in = []
        self.state_out = []

        stochastic = tf.placeholder(dtype=tf.bool, shape=())
        sy_ac = self.pd.sample()  # XXX
        self._act = U.function([stochastic, sy_ob], [sy_ac, self.vpred])
Ejemplo n.º 17
0
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('expert_policy_file', type=str)
    parser.add_argument('envname', type=str)
    parser.add_argument('--render', action='store_true')
    parser.add_argument("--max_timesteps", type=int)
    parser.add_argument('--num_rollouts',
                        type=int,
                        default=20,
                        help='Number of expert roll outs')
    args = parser.parse_args()

    print('loading and building expert policy')
    policy_fn = load_policy.load_policy(args.expert_policy_file)
    print('loaded and built')

    with tf.Session():
        tf_util.initialize()

        import gym
        env = gym.make(args.envname)
        max_steps = args.max_timesteps or env.spec.timestep_limit

        def run_exp(func, returns, observations, actions, bc=False):
            for i in range(args.num_rollouts):
                # print('iter', i)
                obs = env.reset()
                done = False
                totalr = 0.
                steps = 0
                while not done:
                    observations.append(obs)
                    action = func(obs[None, :])
                    #if steps % 1000 == 0:
                    #    print(type(action))
                    obs, r, done, _ = env.step(action)
                    if bc:
                        action = policy_fn(obs[None, :])
                        # obs, r, done, _ = env.step(action)
                    actions.append(action)
                    totalr += r
                    steps += 1
                    if args.render:
                        env.render()
                    #if steps % 100 == 0:
                    #    print("%i/%i" % (steps, max_steps))
                    if steps >= max_steps:
                        break
                returns.append(totalr)

            # print('returns', returns)
            print('mean return', np.mean(returns))
            print('std of return', np.std(returns))

        returns0 = []
        observations0 = []
        actions0 = []
        print("running expert steps")
        run_exp(policy_fn, returns0, observations0, actions0)

        expert_data = {
            'observations': np.array(observations0),
            'actions': np.array(actions0)
        }
        actions1 = expert_data['actions']
        actions_size = actions1.shape[0]
        actions_dims = actions1.shape[1] * actions1.shape[2]
        expert_data['actions'] = np.reshape(expert_data['actions'],
                                            (actions_size, actions_dims))

        # setting up models
        print(expert_data['observations'].shape, expert_data['actions'].shape)
        inputs = tf_util.get_placeholder(
            'inputs', tf.float32, [None, expert_data['observations'].shape[1]])
        labels = tf_util.get_placeholder('labels', tf.float32,
                                         [None, actions_dims])

        # models
        name = args.envname
        d1 = tf_util.dense(inputs, 128, 'd1')
        d2 = tf_util.dense(inputs, 128, 'd2')
        # d2 = tf_util.dropout(d1, 0.95)
        d3 = tf_util.wndense(d1, 128, 'd3')
        pred = tf_util.densenobias(d3, actions_dims, 'output')

        #print(type(expert_data['actions']), type(pred))
        loss_func = tf.losses.mean_squared_error(labels, pred)
        loss = tf.reduce_mean(loss_func)
        optimizer = tf.train.AdamOptimizer().minimize(loss)

        # evaluations
        tf_util.initialize()

        # grid search parameters
        def train_model(x, y):
            for i in range(args.num_rollouts):
                ls = 0
                batch_size = int(actions_size / 4)
                batch_num = int(actions_size / batch_size)
                for j in range(batch_num):
                    start = batch_num * j
                    end = start + batch_size
                    op_eval, ls_current = tf_util.eval([optimizer, loss], {
                        inputs: x[start:end],
                        labels: y[start:end]
                    })
                    # print('batch ', j, ls_current)
                    ls += ls_current
                #print('iter ', i, ls.shape)

        def model_eval(obs):
            p = tf_util.eval([pred], {inputs: obs})
            return np.array(p)

        print("running behaviour cloning")
        train_model(expert_data['observations'], expert_data['actions'])
        run_exp(model_eval, [], [], [])

        print("running DAgger")
        for i in range(args.num_rollouts):
            # for i in range(10):
            print(len(observations0), len(actions0))
            run_exp(model_eval, [], observations0, actions0, True)
            expert_data = {
                'observations': np.array(observations0),
                'actions': np.array(actions0)
            }
            expert_data['actions'] = np.reshape(
                expert_data['actions'],
                (expert_data['actions'].shape[0], actions_dims))
            train_model(expert_data['observations'], expert_data['actions'])
    def _make_net(self, o):
        o_cnn = o[:,:-3]
        o_self = o[:,-3:]
        x_cnn = tf.reshape(o_cnn, [-1, 9, 9, 9])
        x_cnn = tf.layers.conv2d(x_cnn, 32, 3, data_format='channels_last', name='cnn1', activation=self.nonlin)
        #x_cnn = tf.layers.batch_normalization(x_cnn, axis=3)
        x_cnn = tf.layers.conv2d(x_cnn, 32, 3, data_format='channels_last', name='cnn2', activation=self.nonlin)
        #x_cnn = tf.layers.batch_normalization(x_cnn, axis=3)
        x_cnn = tf.layers.conv2d(x_cnn, 32, 3, data_format='channels_last', name='cnn3', activation=self.nonlin)
        #x_cnn = tf.layers.batch_normalization(x_cnn, axis=3)
        x_cnn = tf.reshape(x_cnn, [-1, 288])
        #x_cnn = tf.Print(x_cnn, [tf.shape(x_cnn)], message='x_cnn shape is:')
        x_cnn = self.nonlin(U.dense(x_cnn, 256, 'ff1', U.normc_initializer(1.0)))

        x_self = o_self
        x = tf.concat([x_cnn, x_self], 1)
        x = self.nonlin(U.dense(x, 256, 'ff2', U.normc_initializer(1.0)))

        '''
        # Process observation
        if self.connection_type == 'ff':
            x = o
            for ilayer, hd in enumerate(self.hidden_dims):
                x = self.nonlin(U.dense(x, hd, 'l{}'.format(ilayer), U.normc_initializer(1.0)))
        else:
            raise NotImplementedError(self.connection_type)
        '''
        # Map to action
        adim, ahigh, alow = 1, self.ac_space.n - 1, 0
        assert isinstance(self.ac_bins, str)
        ac_bin_mode, ac_bin_arg = self.ac_bins.split(':')

        if ac_bin_mode == 'uniform':
            # Uniformly spaced bins, from ac_space.low to ac_space.high
            num_ac_bins = int(ac_bin_arg)
            aidx_na = bins(x, adim, num_ac_bins, 'out')  # 0 ... num_ac_bins-1
            #aidx_na = tf.Print(aidx_na, [aidx_na], message='aidx_na: ')
            a = tf.nn.softmax(aidx_na) #tf.sigmoid(aidx_na)
            #ac_range_1a = (ahigh - alow)[None, :]
            #a = 1. / (num_ac_bins - 1.) * tf.to_float(aidx_na) * ac_range_1a + alow[None, :]

        elif ac_bin_mode == 'custom':
            # Custom bins specified as a list of values from -1 to 1
            # The bins are rescaled to ac_space.low to ac_space.high
            acvals_k = np.array(list(map(float, ac_bin_arg.split(','))), dtype=np.float32)
            logger.info('Custom action values: ' + ' '.join('{:.3f}'.format(x) for x in acvals_k))
            assert acvals_k.ndim == 1 and acvals_k[0] == -1 and acvals_k[-1] == 1
            acvals_ak = (
                (ahigh - alow)[:, None] / (acvals_k[-1] - acvals_k[0]) * (acvals_k - acvals_k[0])[None, :]
                + alow[:, None]
            )

            aidx_na = bins(x, adim, len(acvals_k), 'out')  # values in [0, k-1]
            a = tf.gather_nd(
                acvals_ak,
                tf.concat([
                    tf.tile(np.arange(adim)[None, :, None], [tf.shape(aidx_na)[0], 1, 1]),
                    tf.expand_dims(aidx_na, -1)
                ],2)  # (n,a,2)
            )  # (n,a)
        elif ac_bin_mode == 'continuous':
            a = U.dense(x, adim, 'out', U.normc_initializer(0.01))
        else:
            raise NotImplementedError(ac_bin_mode)

        return a
def bins(x, dim, num_bins, name):
    scores = U.dense(x, dim * num_bins, name, U.normc_initializer(0.01))
    #scores = tf.Print(scores, [scores], message='scores: ')
    scores_nab = tf.reshape(scores, [-1, dim, num_bins])
    #scores_nab = tf.Print(scores_nab, [scores_nab], message='scores_nab: ')
    return scores_nab #tf.argmax(scores_nab, 2)  # 0 ... num_bins-1
Ejemplo n.º 20
0
def bins(x, dim, num_bins, name):
    scores = U.dense(x, dim * num_bins, name, U.normc_initializer(0.01))
    scores_nab = tf.reshape(scores, [-1, dim, num_bins])
    return tf.argmax(scores_nab, 2)