Ejemplo n.º 1
0
    def build_predictions(self, net, rois):

        # Crop image ROIs
        pool5 = self._crop_pool_layer(net, rois, "pool5")
        pool5_flat = tf.layers.flatten(pool5, name='flatten')

        with tf.variable_scope("vgg_16", reuse=tf.AUTO_REUSE):
            fc6 = tf_tools.layer(pool5_flat,
                                 weights_shape=[7, 7, 512, 4096],
                                 activation='relu',
                                 name='fc6/')
            fc7 = tf_tools.layer(fc6,
                                 weights_shape=[1, 1, 4096, 4096],
                                 activation='relu',
                                 name='fc7/')

        # Scores and predictions
        with tf.variable_scope("predictions", reuse=tf.AUTO_REUSE):
            cls_score = tf_tools.layer(fc7,
                                       out_size=self.class_num,
                                       name='cls_score')
            cls_prob = _softmax_layer(cls_score, "cls_prob")
            bbox_prediction = tf_tools.layer(fc7,
                                             out_size=self.class_num * 4,
                                             name='bbox_pred')

        return cls_score, cls_prob, bbox_prediction
Ejemplo n.º 2
0
    def build_network(self):
        x_enc = tf_tool.conv2d(self.input, out_channel=32, activation='leaky_relu', init='xavier', name="conv{}".format(0))

        for idx in range(int(5)):
            originel_add = x_enc
            x_enc = tf_tool.conv2d(x_enc, out_channel=32, activation='leaky_relu', init='xavier', name="add_res_conv1_{}".format(idx))
            x_enc = tf_tool.conv2d(x_enc, out_channel=32, activation='leaky_relu', init='xavier', name="add_res_conv2_{}".format(idx)) + originel_add
            
        add_enc = tf.reshape(x_enc, [self.batch_size, self.max_time, 32 * np.prod(self.states_shape)])
        add_enc = tf_tool.layer(add_enc, out_size=256, activation='leaky_relu', init='xavier', name="layer{}".format(1))

        lstm_in = add_enc

        lstm = tf.nn.rnn_cell.BasicLSTMCell(256, state_is_tuple=True)
        def make_init(batch_size):
            c_init = np.zeros((batch_size, lstm.state_size.c), np.float32)
            h_init = np.zeros((batch_size, lstm.state_size.h), np.float32)
            return [c_init, h_init]

        self.state_init = keydefaultdict(make_init)
        c_in = tf.placeholder(tf.float32, [None, lstm.state_size.c], name="lstm_c_in")
        h_in = tf.placeholder(tf.float32, [None, lstm.state_size.h], name="lstm_h_in")
        self.init_state = [c_in, h_in]
        state_in = tf.nn.rnn_cell.LSTMStateTuple(c_in, h_in)
        outputs, states = tf.nn.dynamic_rnn(lstm, lstm_in, initial_state=state_in, time_major=False)
        self.lstm_c, self.lstm_h = states

        acts_logit = tf_tool.layer(outputs, out_size=self.n_actions, init='xavier', name='acts_prob') 
        self.acts_prob = tf.nn.softmax(acts_logit)
        self.acts_log_prob = tf.nn.log_softmax(acts_logit)
Ejemplo n.º 3
0
    def buind_network(self):
        logit_list = []
        act_list = []
        index = 0
        for input_img in self.input_img_list:
            with tf.variable_scope("predict", reuse=tf.AUTO_REUSE):
                out_channel = self.channel_min
                cnn1 = cnn_block(input_img,
                                 out_channel=out_channel,
                                 name='cnn1')
                pool1 = tf_tools.max_pool(cnn1, name='pool1')

                out_channel = out_channel * self.channel_rate
                cnn2 = cnn_block(input_img,
                                 out_channel=out_channel,
                                 name='cnn2')
                pool2 = tf_tools.max_pool(cnn2, name='pool2')

                out_channel = out_channel * self.channel_rate
                cnn3 = cnn_block(input_img,
                                 out_channel=out_channel,
                                 name='cnn3')
                pool3 = tf_tools.max_pool(cnn3, name='pool3')

                out_channel = out_channel * self.channel_rate
                cnn4 = cnn_block(pool3, out_channel=out_channel, name='cnn4')
                shape = cnn4.get_shape().as_list()
                spp = tf_tools.spatial_pyramid_pool(cnn4, [shape[1], shape[2]],
                                                    [1, 2, 4],
                                                    'spp_{}'.format(index))

                flatten = tf.layers.flatten(spp)
                layer1 = tf_tools.layer(flatten,
                                        out_size=4096,
                                        activation='relu',
                                        normal=True,
                                        name='flatten')
                logit = tf_tools.layer(flatten,
                                       out_size=self.class_num,
                                       name='logit')
                act = tf.argmax(logit, -1)
                act = tf.expand_dims(act, -1)
                act = tf.reshape(act, shape=[-1, 1])
                act = tf.cast(act, tf.float32)

                logit_list.append(logit)
                act_list.append(act)
                index += 1
        return logit_list, act_list
Ejemplo n.º 4
0
    def build(self, input, times=3):
        with tf.variable_scope(self.name):
            size = self.out_size[0] * self.out_size[1]
            output = tf_tools.layer(input,
                                    out_size=size,
                                    activation='relu',
                                    normal=True,
                                    name='layer1')
            output = tf.reshape(
                output, shape=[-1, self.out_size[0], self.out_size[1], 1])

            out_channel = self.channel_min
            for index in range(times):
                output = tf_tools.conv2d(output,
                                         out_channel=out_channel,
                                         activation='relu',
                                         normal=True,
                                         name='conv_{}'.format(index))
                out_channel = out_channel * self.channel_rate

            output = tf_tools.conv2d(output,
                                     out_channel=self.out_size[-1],
                                     name='output')

            self.var_list = tf.trainable_variables(
                scope=tf.get_variable_scope().name)
        return output
Ejemplo n.º 5
0
    def build_network(self):
        x_enc = tf_tool.conv2d(self.input,
                               activation='leaky_relu',
                               out_channel=32,
                               name="conv{}".format(0))

        for idx in range(int(5)):
            originel_add = x_enc
            x_enc = tf_tool.conv2d(x_enc,
                                   out_channel=32,
                                   activation='leaky_relu',
                                   init='xavier',
                                   name="add_res_conv1_{}".format(idx))
            x_enc = tf_tool.conv2d(
                x_enc,
                out_channel=32,
                activation='leaky_relu',
                init='xavier',
                name="add_res_conv2_{}".format(idx)) + originel_add

        add_enc = tf.reshape(
            x_enc,
            [self.batch_size, self.max_time, 32 * np.prod(self.states_shape)])
        add_enc = tf_tool.layer(add_enc,
                                out_size=256,
                                activation='leaky_relu',
                                init='xavier',
                                name="layer{}".format(1))
        lstm_in = add_enc

        lstm = tf.nn.rnn_cell.BasicLSTMCell(256,
                                            name='lstm',
                                            state_is_tuple=True)
        initial_state = lstm.zero_state(self.batch_size, dtype=tf.float32)
        outputs, states = tf.nn.dynamic_rnn(lstm,
                                            lstm_in,
                                            initial_state=initial_state,
                                            time_major=False)

        self.value = tf_tool.layer(outputs, out_size=1, name='value')
Ejemplo n.º 6
0
    def build(self, input, times=3, reuse=False, normal=True):
        output = input
        with tf.variable_scope(self.name, reuse=reuse):
            out_channel = self.channel_min
            for index in range(times):
                output = tf_tools.conv2d(output, out_channel=out_channel, activation='relu', normal=normal, name='conv_{}'.format(index))
                output = tf_tools.max_pool(output, name='pool_{}'.format(index))
                out_channel = out_channel * self.channel_rate

            output = tf.layers.flatten(output)
            output = tf_tools.layer(output, out_size=self.out_size, name='output')

            self.var_list = tf.trainable_variables(scope=tf.get_variable_scope().name)
        return output
Ejemplo n.º 7
0
    def bind_network(self):
        state_enc = tf_tool.conv2d(self.input,
                                   out_channel=32,
                                   activation='leaky_relu',
                                   init='xavier',
                                   name="conv{}".format(0))
        action_enc = tf_tool.layer(tf.expand_dims(self.last_action, -1),
                                   activation='leaky_relu',
                                   init='xavier',
                                   out_size=32)
        action_enc = tf.reshape(action_enc, [-1, 1, 1, 32])
        add_enc = state_enc + action_enc

        for idx in range(int(3)):
            add_enc = tf_tool.conv2d(add_enc,
                                     out_channel=32,
                                     activation='leaky_relu',
                                     init='xavier',
                                     name="add_enc_conv1_{}".format(idx))

        for idx in range(int(8)):
            originel_add = add_enc
            add_enc = tf_tool.conv2d(add_enc,
                                     out_channel=32,
                                     activation='leaky_relu',
                                     init='xavier',
                                     name="add_res_conv1_{}".format(idx))
            add_enc = tf_tool.conv2d(
                add_enc,
                out_channel=32,
                activation='leaky_relu',
                init='xavier',
                name="add_res_conv2_{}".format(idx)) + originel_add

        add_enc = tf.reshape(
            add_enc,
            [self.batch_size, self.max_time, 32 * np.prod(self.state_shape)])
        add_enc = tf_tool.layer(add_enc,
                                out_size=128,
                                activation='leaky_relu',
                                init='xavier',
                                name="layer{}".format(1))

        lstm_in = add_enc

        lstm = tf.nn.rnn_cell.BasicLSTMCell(512)

        def make_init(batch_size):
            c_init = np.zeros((batch_size, lstm.state_size.c), np.float32)
            h_init = np.zeros((batch_size, lstm.state_size.h), np.float32)
            return [c_init, h_init]

        self.state_init = keydefaultdict(make_init)
        c_in = tf.placeholder(tf.float32, [None, lstm.state_size.c],
                              name="lstm_c_in")
        h_in = tf.placeholder(tf.float32, [None, lstm.state_size.h],
                              name="lstm_h_in")
        self.init_state = [c_in, h_in]
        state_in = tf.nn.rnn_cell.LSTMStateTuple(c_in, h_in)
        outputs, states = tf.nn.dynamic_rnn(lstm,
                                            lstm_in,
                                            initial_state=state_in,
                                            time_major=False)
        self.lstm_c, self.lstm_h = states

        with tf.variable_scope('Actor'):
            self.acts = tf_tool.layer(tf.nn.relu(outputs),
                                      out_size=self.n_actions,
                                      init='xavier',
                                      name='acts_prob')
            self.acts_prob = tf.nn.softmax(self.acts)
            self.acts_log_prob = tf.nn.log_softmax(self.acts)

        with tf.variable_scope('Critic'):
            self.value = tf_tool.layer(outputs, out_size=1, name='value')
Ejemplo n.º 8
0
    def bind_model(self):
        s_shape = tf.shape(self.state)
        batch_size, max_time = s_shape[0], s_shape[1]
        state = tf.concat([self.state, self.condition], axis=-1)
        #state = self.state
        s_shape = list(self.state_shape)
        s_shape[-1] = int(state.get_shape()[-1])
        state = tf.reshape(state, [-1] + s_shape)
        last_action = self.last_action

        with tf.variable_scope('Policy'):
            state_enc = tf_tool.conv2d(state,
                                       out_channel=32,
                                       activation='leaky_relu',
                                       init='xavier',
                                       name="conv{}".format(0))
            last_action_enc = tf_tool.layer(tf.expand_dims(last_action, -1),
                                            activation='leaky_relu',
                                            init='xavier',
                                            out_size=32)
            last_action_enc = tf.reshape(last_action_enc, [-1, 1, 1, 32])
            add_enc = state_enc + last_action_enc

            for idx in range(int(3)):
                add_enc = tf_tool.conv2d(add_enc,
                                         out_channel=32,
                                         activation='leaky_relu',
                                         init='xavier',
                                         name="add_enc_conv1_{}".format(idx))

            for idx in range(int(8)):
                originel_add = add_enc
                add_enc = tf_tool.conv2d(add_enc,
                                         out_channel=32,
                                         activation='leaky_relu',
                                         init='xavier',
                                         name="add_res_conv1_{}".format(idx))
                add_enc = tf_tool.conv2d(
                    add_enc,
                    out_channel=32,
                    activation='leaky_relu',
                    init='xavier',
                    name="add_res_conv2_{}".format(idx)) + originel_add

            add_enc = tf.reshape(add_enc, [
                self.batch_size, self.max_time, 32 * np.prod(self.state_shape)
            ])
            add_enc = tf_tool.layer(add_enc,
                                    out_size=256,
                                    activation='leaky_relu',
                                    init='xavier',
                                    name="layer{}".format(1))

            lstm_in = add_enc

            lstm = tf.nn.rnn_cell.BasicLSTMCell(512, name='lstm')

            def make_init(batch_size):
                c_init = np.zeros((batch_size, lstm.state_size.c), np.float32)
                h_init = np.zeros((batch_size, lstm.state_size.h), np.float32)
                return [c_init, h_init]

            self.state_init = keydefaultdict(make_init)
            c_in = tf.placeholder(tf.float32, [None, lstm.state_size.c],
                                  name="lstm_c_in")
            h_in = tf.placeholder(tf.float32, [None, lstm.state_size.h],
                                  name="lstm_h_in")
            self.init_state = [c_in, h_in]
            state_in = tc.LSTMStateTuple(c_in, h_in)
            outputs, self.states = tf.nn.dynamic_rnn(lstm,
                                                     lstm_in,
                                                     initial_state=state_in,
                                                     time_major=False)
            self.lstm_c, self.lstm_h = self.states

            with tf.variable_scope('Actor'):
                acts = tf_tool.layer(tf.nn.relu(outputs),
                                     out_size=self.n_actions,
                                     init='xavier',
                                     name='acts_prob')
            with tf.variable_scope('Critic'):
                value = tf_tool.layer(outputs, out_size=1, name='value')

        return acts, value