Exemple #1
0
    def _get_DQN_prediction(self, image):
        """ image: [0,255]"""
        #image = image / 255.0
        with argscope(Conv2D, nl=PReLU.f, use_bias=True):
            l = Conv2D('conv0', image, out_channel=32, kernel_shape=5)
            l = MaxPooling('pool0', l, 2)
            l = Conv2D('conv1', l, out_channel=32, kernel_shape=5)
            l = MaxPooling('pool1', l, 2)
            l = Conv2D('conv2', l, out_channel=64, kernel_shape=4)
            l = MaxPooling('pool2', l, 2)
            l = Conv2D('conv3', l, out_channel=64, kernel_shape=3)

            l = FullyConnected('fc0',
                               l,
                               512,
                               nl=lambda x, name: LeakyReLU.f(x, 0.01, name))
            # the original arch
            #.Conv2D('conv0', image, out_channel=32, kernel_shape=8, stride=4)
            #.Conv2D('conv1', out_channel=64, kernel_shape=4, stride=2)
            #.Conv2D('conv2', out_channel=64, kernel_shape=3)

        if not DUELING:
            Q = FullyConnected('fct', l, NUM_ACTIONS, nl=tf.identity)
        else:
            V = FullyConnected('fctV', l, 1, nl=tf.identity)
            As = FullyConnected('fctA', l, NUM_ACTIONS, nl=tf.identity)
            Q = tf.add(As, V - tf.reduce_mean(As, 1, keep_dims=True))
        return tf.identity(Q, name='Qvalue')
    def _get_NN_prediction(self, image):
        self._create_unnary_variables_with_summary(
            image[:, 0, :, 0], (10, 10, 6, 6, 6),
            ("rewards", "levels", "lives0", "lives1", "lives2"))
        image = image / 255.0
        with argscope(Conv2D, nl=tf.nn.relu):
            lc0 = Conv2D('conv0', image, out_channel=32, kernel_shape=5)
            lc0 = MaxPooling('pool0', lc0, 2)
            lc1 = Conv2D('conv1', lc0, out_channel=32, kernel_shape=5)
            lc1 = MaxPooling('pool1', lc1, 2)
            lc2 = Conv2D('conv2', lc1, out_channel=64, kernel_shape=4)
            lc2 = MaxPooling('pool2', lc2, 2)
            lc3 = Conv2D('conv3', lc2, out_channel=64, kernel_shape=3)

        lfc0 = FullyConnected('fc0', lc3, 512, nl=tf.identity)
        lfc0 = PReLU('prelu', lfc0)
        policy = FullyConnected('fc-pi',
                                lfc0,
                                out_dim=self.number_of_actions,
                                nl=tf.identity)
        value = FullyConnected('fc-v', lfc0, 1, nl=tf.identity)

        # if DEBUGING_INFO:
        #     summary.add_activation_summary(lc0, "conv_0")
        #     summary.add_activation_summary(lc1, "conv_1")
        #     summary.add_activation_summary(lc2, "conv_2")
        #     summary.add_activation_summary(lc3, "conv_3")
        #     summary.add_activation_summary(lfc0, "fc0")
        #     summary.add_activation_summary(policy, "policy")
        #     summary.add_activation_summary(value, "fc-v")

        return policy, value
Exemple #3
0
 def _get_NN_prediction(self, image):
     image = image / 255.0
     with argscope(Conv2D, nl=tf.nn.relu):
         if NETWORK_ARCH == '1':
             l = Conv2D('conv0', image, out_channel=32, kernel_shape=5)
             l = MaxPooling('pool0', l, 2)
             l = Conv2D('conv1', l, out_channel=32, kernel_shape=5)
             l = MaxPooling('pool1', l, 2)
             l = Conv2D('conv2', l, out_channel=64, kernel_shape=4)
             l = MaxPooling('pool2', l, 2)
             l = Conv2D('conv3', l, out_channel=64, kernel_shape=3)
         # conv3 output: [None, 10, 10, 64]
         elif NETWORK_ARCH == 'nature':
             l = Conv2D('conv0',
                        image,
                        out_channel=32,
                        kernel_shape=8,
                        stride=4)
             l = Conv2D('conv1',
                        l,
                        out_channel=64,
                        kernel_shape=4,
                        stride=2)
             l = Conv2D('conv2', l, out_channel=64, kernel_shape=3)
         # conv2 output: [None, 11, 11, 64]
     conv2 = tf.identity(l, name='convolutional-2')
     l = FullyConnected('fc0', l, 512, nl=tf.identity)
     l = PReLU('prelu', l)
     fc = tf.identity(l, name='fully-connected')
     policy = FullyConnected('fc-pi',
                             l,
                             out_dim=NUM_ACTIONS,
                             nl=tf.identity)
     value = FullyConnected('fc-v', l, 1, nl=tf.identity)
     return policy, value
Exemple #4
0
    def _get_NN_prediction(self, image):
        l = tf.reshape(image, [-1, 24])
        # This calculates the position of ball when hitting the plane of the agent
        xNew = image[:, 0, 1, 3]
        yNew = image[:, 0, 0, 3]
        xOld = image[:, 0, 1, 2]
        yOld = image[:, 0, 0, 2]
        yPredicted = yNew + (yNew - yOld) * (0.125 - xNew) / (xNew - xOld +
                                                              0.005)
        yPredictedTruncated = tf.maximum(tf.minimum(yPredicted, 1), -1)
        yPredictedTruncated = tf.expand_dims(yPredictedTruncated, 1)
        summary.add_activation_summary(yPredictedTruncated, "yPredicted")

        l = tf.concat(1, [l, yPredictedTruncated])

        for i in xrange(0, self.number_of_layers):
            l = FullyConnected('fc{}'.format(i),
                               l,
                               self.number_of_neurons,
                               nl=tf.identity)
            l = PReLU('prelu{}'.format(i), l)
            # summary.add_activation_summary(l, "fc {} relu output".format(i))
        policy = FullyConnected('fc-pi',
                                l,
                                out_dim=self.number_of_actions,
                                nl=tf.identity)
        value = FullyConnected('fc-v', l, 1, nl=tf.identity)
        return policy, value
    def _get_NN_prediction(self, image):
        self._create_unnary_variables_with_summary(
            image[:, 0, :, 0], (10, 10, 6, 6, 6),
            ("rewards", "levels", "lives0", "lives1", "lives2"))
        NUMBER_OF_REWARD_EVENTS = 10

        rewards_events = []
        for x in xrange(NUMBER_OF_REWARD_EVENTS):
            rewards_events.append(tf.reshape(image[:, 0, x, 0], (-1, 1)))

        image = image / 255.0
        with argscope(Conv2D, nl=tf.nn.relu):
            lc0 = Conv2D('conv0', image, out_channel=32, kernel_shape=5)
            lc0 = MaxPooling('pool0', lc0, 2)
            lc1 = Conv2D('conv1', lc0, out_channel=32, kernel_shape=5)
            lc1 = MaxPooling('pool1', lc1, 2)
            lc2 = Conv2D('conv2', lc1, out_channel=64, kernel_shape=4)
            lc2 = MaxPooling('pool2', lc2, 2)
            lc3 = Conv2D('conv3', lc2, out_channel=64, kernel_shape=3)

        policies = []
        values = []
        for x in xrange(10):
            lfc0 = FullyConnected('fc0{}'.format(x), lc3, 512, nl=tf.identity)
            lfc0 = PReLU('prelu{}'.format(x), lfc0)
            policy = FullyConnected('fc-pi{}'.format(x),
                                    lfc0,
                                    out_dim=self.number_of_actions,
                                    nl=tf.identity)
            value = FullyConnected('fc-v{}'.format(x), lfc0, 1, nl=tf.identity)

            policies.append(policy)
            values.append(value)

        weighted_policies = []
        weighted_values = []

        for weight, policy, value in zip(rewards_events, policies, values):
            weighted_policies.append(tf.multiply(weight, policy))
            weighted_values.append(tf.multiply(weight, value))

        policy = tf.add_n(weighted_policies)
        value = tf.add_n(weighted_values)
        # if DEBUGING_INFO:
        #     summary.add_activation_summary(lc0, "conv_0")
        #     summary.add_activation_summary(lc1, "conv_1")
        #     summary.add_activation_summary(lc2, "conv_2")
        #     summary.add_activation_summary(lc3, "conv_3")
        #     summary.add_activation_summary(lfc0, "fc0")
        #     summary.add_activation_summary(policy, "policy")
        #     summary.add_activation_summary(value, "fc-v")

        return policy, value
 def _get_NN_prediction(self, image):
     l = image
     for i in xrange(0, self.numberOfLayers):
         l = FullyConnected('fc{}'.format(i),
                            l,
                            self.numberOfNeurons,
                            nl=tf.identity)
         l = PReLU('prelu{}'.format(i), l)
     policy = FullyConnected('fc-pi',
                             l,
                             out_dim=self.number_of_actions,
                             nl=tf.identity)
     value = FullyConnected('fc-v', l, 1, nl=tf.identity)
     return policy, value
 def _get_NN_prediction(self, image):
     l = image
     for i in xrange(0, self.number_of_layers):
         l = FullyConnected('fc{}'.format(i),
                            l,
                            self.number_of_neurons,
                            nl=tf.identity)
         l = PReLU('prelu{}'.format(i), l)
         # summary.add_activation_summary(l, "fc {} relu output".format(i))
     policy = FullyConnected('fc-pi',
                             l,
                             out_dim=self.number_of_actions,
                             nl=tf.identity)
     value = FullyConnected('fc-v', l, 1, nl=tf.identity)
     return policy, value
Exemple #8
0
    def _get_NN_prediction(self, image):
        image = tf.cast(image, tf.float32) / 255.0
        with argscope(Conv2D, activation=tf.nn.relu):
            l = Conv2D('conv0', image, 32, 5)
            l = MaxPooling('pool0', l, 2)
            l = Conv2D('conv1', l, 32, 5)
            l = MaxPooling('pool1', l, 2)
            l = Conv2D('conv2', l, 64, 4)
            l = MaxPooling('pool2', l, 2)
            l = Conv2D('conv3', l, 64, 3)

        l = FullyConnected('fc0', l, 512)
        l = PReLU('prelu', l)
        logits = FullyConnected('fc-pi', l,
                                self.num_actions)  # unnormalized policy
        value = FullyConnected('fc-v', l, 1)
        return logits, value
Exemple #9
0
    def _get_NN_prediction(self, image):
        image = image / 255.0
        with argscope(Conv2D, nl=tf.nn.relu):
            l = Conv2D('conv0', image, out_channel=32, kernel_shape=5)
            l = MaxPooling('pool0', l, 2)
            l = Conv2D('conv1', l, out_channel=32, kernel_shape=5)
            l = MaxPooling('pool1', l, 2)
            l = Conv2D('conv2', l, out_channel=64, kernel_shape=4)
            l = MaxPooling('pool2', l, 2)
            l = Conv2D('conv3', l, out_channel=64, kernel_shape=3)

        l = FullyConnected('fc0', l, 512, nl=tf.identity)
        l = PReLU('prelu', l)
        policy = FullyConnected('fc-pi',
                                l,
                                out_dim=NUM_ACTIONS,
                                nl=tf.identity)
        return policy
Exemple #10
0
 def _get_DQN_prediction(self, image):
     #TODO: Do we need to add other pre-processing? e.g., subtract mean
     image = image / 255.0
     #TODO: The network structure can be improved?
     with argscope(Conv2D, nl=tf.nn.relu,
                   use_bias=True):  # Activation for each layer
         l = Conv2D('conv0', image, out_channel=32, kernel_shape=5)
         l = MaxPooling('pool0', l, 2)
         l = Conv2D('conv1', l, out_channel=32, kernel_shape=5)
         l = MaxPooling('pool1', l, 2)
         l = Conv2D('conv2', l, out_channel=64, kernel_shape=4)
         l = MaxPooling('pool2', l, 2)
         l = Conv2D('conv2', l, out_channel=64, kernel_shape=3)
         # the original arch
         # .Conv2D('conv0', image, out_channel=32, kernel_shape=8, stride=4)
         # .Conv2D('conv1', out_channel=64, kernel_shape=4, stride=2)
         # .Conv2D('conv2', out_channel=64, kernel_shape=3)
         l = FullyConnected('fc0',
                            l,
                            512,
                            nl=lambda x, name: LeakyReLU.f(x, 0.01, name))
         l = FullyConnected('fct', l, NUM_ACTIONS, nl=tf.identity())
    def add_column(self, previous_column_layers, column_num, trainable=True):
        print "Creating column:{}".format(column_num)
        column_prefix = "-column-"
        # column_num = ""
        # print "Adding column:{}".format(column_num)
        new_column = []
        # We append this as this is input
        new_column.append(previous_column_layers[0])
        for i in xrange(1, self.number_of_layers[self.stage] + 1):
            input_neurons = new_column[-1]
            l = FullyConnected('fc-{}{}{}'.format(i, column_prefix,
                                                  column_num),
                               input_neurons,
                               self.number_of_neurons[self.stage],
                               nl=tf.identity,
                               trainable=trainable)
            l = PReLU('prelu-{}{}{}'.format(i, column_prefix, column_num), l)

            if len(previous_column_layers) > i:
                new_layer = tf.concat(1, [previous_column_layers[i], l])
            else:
                new_layer = l
            new_column.append(new_layer)

        last_hidden_layer = new_column[-1]
        policy = FullyConnected('fc-pi{}{}'.format(column_prefix, column_num),
                                last_hidden_layer,
                                out_dim=self.number_of_actions,
                                nl=tf.identity,
                                trainable=trainable)
        value = FullyConnected('fc-v{}{}'.format(column_prefix, column_num),
                               last_hidden_layer,
                               1,
                               nl=tf.identity,
                               trainable=trainable)

        visible_layer = tf.concat(1, [policy, value])
        new_column.append(visible_layer)
        return new_column, policy, value
Exemple #12
0
    def build_graph(self, pc, pc_feature):
        pc_symmetry = tf.stack([-pc[..., 0], pc[..., 1], pc[..., 2]], -1)  # -x
        dist2sym = tf.reduce_sum((pc[:, :, None] - pc_symmetry[:, None])**2,
                                 -1)
        nearest_idx = tf.argmin(dist2sym, -1, output_type=tf.int32)

        # smoothnet encoder, only local features are used
        embedding = SmoothNet(pc_feature, self.cfg)
        with tf.variable_scope('encoder'):
            z = tf.sigmoid(embedding[:, :, -1], name='z')
            output_x = tf.nn.l2_normalize(embedding[:, :, :-1],
                                          axis=-1,
                                          name='feature')

        gp_loss = 0.
        loss_d = 0.
        loss_g = 0.
        if get_current_tower_context().is_training:
            beta_dist = tf.distributions.Beta(
                concentration1=self.cfg.beta.concentration1,
                concentration0=self.cfg.beta.concentration0)

            with tf.variable_scope('GAN'):
                real_z = beta_dist.sample(tf.shape(z))
                fake_val = self.discriminator(tf.stop_gradient(z))
                real_val = self.discriminator(real_z)
                loss_d = tf.reduce_mean(fake_val - real_val, name='loss_d')
                with varreplace.freeze_variables(stop_gradient=True):
                    loss_g = tf.reduce_mean(-self.discriminator(z),
                                            name='loss_g')

                z_interp = z + tf.random_uniform(
                    (tf.shape(fake_val)[0], 1)) * (real_z - z)
                gradient_f = tf.gradients(self.discriminator(z_interp),
                                          [z_interp])[0]
                gp_loss = tf.reduce_mean(tf.maximum(
                    tf.norm(gradient_f, axis=-1) - 1, 0)**2,
                                         name='gp_loss')
        code = tf.concat([
            tf.reduce_max(tf.nn.relu(output_x) * z[..., None], 1),
            tf.reduce_max(tf.nn.relu(-output_x) * z[..., None], 1)
        ],
                         axis=-1,
                         name='code')
        code = FullyConnected('fc_global',
                              code,
                              self.cfg.topnet.code_nfts,
                              activation=None)

        # topnet decoder
        tarch = get_arch(self.cfg.topnet.nlevels, self.cfg.num_points)

        def create_level(level, input_channels, output_channels, inputs, bn):
            with tf.variable_scope('level_%d' % level, reuse=tf.AUTO_REUSE):
                features = mlp_conv(inputs, [
                    input_channels,
                    int(input_channels / 2),
                    int(input_channels / 4),
                    int(input_channels / 8),
                    output_channels * int(tarch[level])
                ],
                                    get_current_tower_context().is_training,
                                    bn)
                features = tf.reshape(
                    features, [tf.shape(features)[0], -1, output_channels])
            return features

        Nin = self.cfg.topnet.nfeat + self.cfg.topnet.code_nfts
        Nout = self.cfg.topnet.nfeat
        bn = True
        N0 = int(tarch[0])
        nlevels = len(tarch)
        with tf.variable_scope('decoder', reuse=tf.AUTO_REUSE):
            level0 = mlp(code, [256, 64, self.cfg.topnet.nfeat * N0],
                         get_current_tower_context().is_training,
                         bn=True)
            level0 = tf.tanh(level0, name='tanh_0')
            level0 = tf.reshape(level0, [-1, N0, self.cfg.topnet.nfeat])
            outs = [
                level0,
            ]
            for i in range(1, nlevels):
                if i == nlevels - 1:
                    Nout = 3
                    bn = False
                inp = outs[-1]
                y = tf.expand_dims(code, 1)
                y = tf.tile(y, [1, tf.shape(inp)[1], 1])
                y = tf.concat([inp, y], 2)
                outs.append(
                    tf.tanh(create_level(i, Nin, Nout, y, bn),
                            name='tanh_%d' % (i)))

        reconstruction = tf.reshape(outs[-1], [-1, self.cfg.num_points, 3],
                                    name='recon_pc')
        loss_recon = chamfer(reconstruction, pc)

        loss_recon = tf.identity(self.cfg.recon_factor *
                                 tf.reduce_mean(loss_recon),
                                 name='recon_loss')

        batch_size = tf.shape(output_x)[0]
        batch_idx = tf.tile(
            tf.range(batch_size)[:, None], [1, tf.shape(nearest_idx)[1]])
        feature_sym = tf.gather_nd(embedding,
                                   tf.stack([batch_idx, nearest_idx], -1))

        loss_sym = tf.identity(
            self.cfg.symmetry_factor *
            tf.reduce_mean(tf.reduce_sum(tf.abs(feature_sym - embedding), -1)),
            'symmetry_loss')

        wd_cost = tf.multiply(1e-4,
                              regularize_cost('.*(_W|kernel)', tf.nn.l2_loss),
                              name='regularize_loss')
        loss_gan = loss_d + loss_g + gp_loss
        total_cost = tf.add_n([loss_recon, wd_cost, loss_gan, loss_sym],
                              name='total_cost')
        summary.add_moving_summary(loss_recon, loss_sym)
        summary.add_param_summary(['.*(_W|kernel)', ['histogram', 'rms']])
        return total_cost