Python toOneHot 예제들, src.cnn.layers.toOneHot Python 예제들

예제 #1

0

파일 보기

파일: vgg16_time_pooling.py 프로젝트: Tiyanak/lip-reading

    def modelOpt(self):

        bn_params = {'decay': 0.999, 'center': True, 'scale': True, 'epsilon': 0.001, 'updates_collections': None,
                     'is_training': self.is_training}

        self.global_step = tf.Variable(0, trainable=False)
        self.learning_rate = layers.decayLearningRate(LEARNING_RATE, self.global_step, DECAY_STEPS, DECAY_RATE)

        self.towerLogits = tf.placeholder(dtype=tf.float32, shape=[None, self.dataset.frames, 64])
        self.Y = tf.placeholder(dtype=tf.int32, shape=[None])
        self.Yoh = layers.toOneHot(self.Y, self.dataset.num_classes)

        net_shape = self.towerLogits.get_shape()
        net = tf.reshape(self.towerLogits, [BATCH_SIZE, int(net_shape[1]) * int(net_shape[2])])

        layer_num = 1
        for fully_connected_num in [64]:
            net = layers.fc(net, fully_connected_num, name='temporal_FC{}'.format(layer_num),
                            weights_regularizer=layers.l2_regularizer(REGULARIZER_SCALE),
                            normalizer_fn=layers.batchNormalization, normalizer_params=bn_params)
            layer_num += 1

        self.logits = layers.fc(net, self.dataset.num_classes, activation_fn=None, name='logits')

        self.preds = layers.softmax(self.logits)

        cross_entropy_loss = layers.reduce_mean(layers.softmax_cross_entropy(logits=self.logits, labels=self.Yoh))
        regularization_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        self.loss = cross_entropy_loss + REGULARIZER_SCALE * tf.reduce_sum(regularization_loss)
        self.opt = layers.adam(self.learning_rate)
        self.train_op = self.opt.minimize(self.loss, global_step=self.global_step)

        self.accuracy, self.precision, self.recall = self.createSummaries(self.Yoh, self.preds, self.loss,
                                                                          self.learning_rate)

예제 #2

0

파일 보기

파일: vgg16_time_pooling.py 프로젝트: Tiyanak/lip-reading

    def createModel(self):

        print("CREATING MODEL")

        self.global_step = tf.Variable(0, trainable=False)
        self.is_training = tf.placeholder_with_default(True, [], name='is_training')
        self.learning_rate = layers.decayLearningRate(LEARNING_RATE, self.global_step, DECAY_STEPS, DECAY_RATE)

        self.X = tf.placeholder(dtype=tf.float32, shape=[None, self.dataset.frames, self.dataset.h, self.dataset.w, self.dataset.c])
        self.Y = tf.placeholder(dtype=tf.int32, shape=[None])

        self.Yoh = layers.toOneHot(self.Y, self.dataset.num_classes)

        bn_params = {'decay': 0.999, 'center': True, 'scale': True, 'epsilon': 0.001, 'updates_collections': None, 'is_training': self.is_training}

        concated = None
        reuse = None
        for sequence_image in range(self.dataset.frames):
            net = self.vgg(self.X[:, sequence_image], reuse)

            net_shape = net.get_shape()
            net = tf.reshape(net, [BATCH_SIZE, int(net_shape[1]) * int(net_shape[2]) * int(net_shape[3])])

            net = layers.fc(net, 64, name='spatial_FC', reuse=reuse,
                            weights_regularizer=layers.l2_regularizer(REGULARIZER_SCALE),
                            normalizer_fn=layers.batchNormalization, normalizer_params=bn_params)

            if concated is None:
                concated = tf.expand_dims(net, axis=1)
            else:
                concated = tf.concat([concated, tf.expand_dims(net, axis=1)], axis=1)

            reuse = True

        net = concated
        net_shape = net.get_shape()
        net = tf.reshape(net, [BATCH_SIZE, int(net_shape[1]) * int(net_shape[2])])

        layer_num = 1
        for fully_connected_num in [64]:
            net = layers.fc(net, fully_connected_num, name='temporal_FC{}'.format(layer_num),
                            weights_regularizer=layers.l2_regularizer(REGULARIZER_SCALE),
                            normalizer_fn=layers.batchNormalization, normalizer_params=bn_params)
            layer_num += 1

        self.logits = layers.fc(net, self.dataset.num_classes, activation_fn=None, name='logits')

        self.preds = layers.softmax(self.logits)

        cross_entropy_loss = layers.reduce_mean(layers.softmax_cross_entropy(logits=self.logits, labels=self.Yoh))
        regularization_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        self.loss = cross_entropy_loss + REGULARIZER_SCALE * tf.reduce_sum(regularization_loss)
        self.opt = layers.adam(self.learning_rate)
        self.train_op = self.opt.minimize(self.loss, global_step=self.global_step)

        self.accuracy, self.precision, self.recall = self.createSummaries(self.Yoh, self.preds, self.loss,
                                                                          self.learning_rate)

예제 #3

0

파일 보기

파일: vgg16_time_pooling.py 프로젝트: Tiyanak/lip-reading

    def modelOpt(self):

        bn_params = {
            'decay': 0.999,
            'center': True,
            'scale': True,
            'epsilon': 0.001,
            'updates_collections': None,
            'is_training': self.is_training
        }

        self.global_step = tf.Variable(0, trainable=False)
        self.learning_rate = layers.decayLearningRate(LEARNING_RATE,
                                                      self.global_step,
                                                      DECAY_STEPS, DECAY_RATE)

        self.towerLogits = tf.placeholder(
            dtype=tf.float32, shape=[None, self.dataset.frames, 64])
        self.Y = tf.placeholder(dtype=tf.int32, shape=[None])
        self.Yoh = layers.toOneHot(self.Y, self.dataset.num_classes)

        net_shape = self.towerLogits.get_shape()
        net = tf.reshape(
            self.towerLogits,
            [BATCH_SIZE, int(net_shape[1]) * int(net_shape[2])])

        layer_num = 1
        for fully_connected_num in [64]:
            net = layers.fc(
                net,
                fully_connected_num,
                name='temporal_FC{}'.format(layer_num),
                weights_regularizer=layers.l2_regularizer(REGULARIZER_SCALE),
                normalizer_fn=layers.batchNormalization,
                normalizer_params=bn_params)
            layer_num += 1

        self.logits = layers.fc(net,
                                self.dataset.num_classes,
                                activation_fn=None,
                                name='logits')

        self.preds = layers.softmax(self.logits)

        cross_entropy_loss = layers.reduce_mean(
            layers.softmax_cross_entropy(logits=self.logits, labels=self.Yoh))
        regularization_loss = tf.get_collection(
            tf.GraphKeys.REGULARIZATION_LOSSES)
        self.loss = cross_entropy_loss + REGULARIZER_SCALE * tf.reduce_sum(
            regularization_loss)
        self.opt = layers.adam(self.learning_rate)
        self.train_op = self.opt.minimize(self.loss,
                                          global_step=self.global_step)

        self.accuracy, self.precision, self.recall = self.createSummaries(
            self.Yoh, self.preds, self.loss, self.learning_rate)

예제 #4

0

파일 보기

파일: vgg16_time_pooling.py 프로젝트: Tiyanak/lip-reading

    def createModelLowMemory(self):

        print("CREATING MODEL")

        self.global_step = tf.Variable(0, trainable=False)
        self.is_training = tf.placeholder_with_default(True, [], name='is_training')
        self.learning_rate = layers.decayLearningRate(LEARNING_RATE, self.global_step, DECAY_STEPS, DECAY_RATE)

        self.X = tf.placeholder(dtype=tf.float32, shape=[None, self.dataset.frames, self.dataset.h, self.dataset.w, self.dataset.c])
        self.Y = tf.placeholder(dtype=tf.int32, shape=[None])

        self.Yoh = layers.toOneHot(self.Y, self.dataset.num_classes)

        concated = None

        reuse = None
        for sequence_image in range(self.dataset.frames):

            net = self.vggLowMemory(self.X[:, sequence_image], reuse)

            net_shape = net.get_shape()
            net = tf.reshape(net, [BATCH_SIZE, int(net_shape[1]) * int(net_shape[2]) * int(net_shape[3])])

            net = layers.fc(net, 64, name='spatial_FC', reuse=reuse)

            if concated is None:
                concated = tf.expand_dims(net, axis=1)
            else:
                concated = tf.concat([concated, tf.expand_dims(net, axis=1)], axis=1)

            reuse = True

        net = concated
        net_shape = net.get_shape()
        net = tf.reshape(net, [BATCH_SIZE, int(net_shape[1]) * int(net_shape[2])])

        layer_num = 1
        for fully_connected_num in [64]:
            net = layers.fc(net, fully_connected_num, name='temporal_FC{}'.format(layer_num))
            layer_num += 1

        self.logits = layers.fc(net, self.dataset.num_classes, activation_fn=None, name='logits')

        self.preds = layers.softmax(self.logits)

        self.loss = layers.reduce_mean(layers.softmax_cross_entropy(logits=self.logits, labels=self.Yoh))
        self.opt = layers.adam(self.learning_rate)
        self.train_op = self.opt.minimize(self.loss, global_step=self.global_step)

        self.accuracy, self.precision, self.recall = self.createSummaries(self.Yoh, self.preds, self.loss,
                                                                              self.learning_rate)

예제 #5

0

파일 보기

파일: mt_vgg16.py 프로젝트: Tiyanak/lip-reading

    def createModel(self):

        print("CREATING MODEL")

        self.global_step = tf.Variable(0, trainable=False)
        self.is_training = tf.placeholder_with_default(True, [], name='is_training')
        self.learning_rate = layers.decayLearningRate(LEARNING_RATE, self.global_step, DECAY_STEPS, DECAY_RATE)

        self.X = tf.placeholder(dtype=tf.float32, shape=[None, self.dataset.frames, self.dataset.h, self.dataset.w, self.dataset.c])
        self.Y = tf.placeholder(dtype=tf.int32, shape=[None])

        self.Yoh = layers.toOneHot(self.Y, self.dataset.num_classes)

        reuse = None
        towersLogits = []
        for sequence_image in range(self.dataset.frames):
            net = self.vgg16(self.X[:, sequence_image], reuse)
            towersLogits.append(net)
            reuse = True

        net = layers.stack(towersLogits)
        del towersLogits[:]

        net = layers.transpose(net, [1, 2, 3, 0, 4])
        net = layers.reshape(net, [-1, net.shape[1], net.shape[2], net.shape[3] * net.shape[4]])

        net = layers.fc(net, 512, name='fc5', weights_regularizer=layers.l2_regularizer(REGULARIZER_SCALE))
        net = layers.squeeze_and_excite2d(net, indexHeight=1, indexWidth=2, name='se5', filters=512)

        net = layers.flatten(net, name='flatten')

        net = layers.fc(net, 4096, name='fc6', weights_regularizer=layers.l2_regularizer(REGULARIZER_SCALE))
        net = layers.fc(net, 4096, name='fc7', weights_regularizer=layers.l2_regularizer(REGULARIZER_SCALE))

        self.logits = layers.fc(net, self.dataset.num_classes, activation_fn=None, name='fc8',
                                weights_regularizer=layers.l2_regularizer(REGULARIZER_SCALE))

        self.preds = layers.softmax(self.logits)

        cross_entropy_loss = layers.reduce_mean(layers.softmax_cross_entropy(logits=self.logits, labels=self.Yoh))
        regularization_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        self.loss = cross_entropy_loss + REGULARIZER_SCALE * tf.reduce_sum(regularization_loss)

        self.opt = layers.adam(self.learning_rate)
        self.train_op = self.opt.minimize(self.loss, global_step=self.global_step)

        self.accuracy, self.precision, self.recall = self.createSummaries(self.Yoh, self.preds, self.loss,
                                                                              self.learning_rate)

예제 #6

0

파일 보기

파일: mt_vgg16.py 프로젝트: Tiyanak/lip-reading

    def createModelLowMemory(self):

        print("CREATING MODEL")

        self.global_step = tf.Variable(0, trainable=False)
        self.is_training = tf.placeholder_with_default(True, [], name='is_training')
        self.learning_rate = layers.decayLearningRate(LEARNING_RATE, self.global_step, DECAY_STEPS, DECAY_RATE)

        self.X = tf.placeholder(dtype=tf.float32, shape=[None, self.dataset.frames, self.dataset.h, self.dataset.w, self.dataset.c])
        self.Y = tf.placeholder(dtype=tf.int32, shape=[None])

        self.Yoh = layers.toOneHot(self.Y, self.dataset.num_classes)

        reuse = None
        towersLogits = []
        for sequence_image in range(self.dataset.frames):
            net = self.mt_loop_low_memory(self.X[:, sequence_image], reuse)
            towersLogits.append(net)
            reuse = True

        net = layers.stack(towersLogits)
        del towersLogits[:]

        net = layers.transpose(net, [1, 2, 3, 0, 4])
        net = layers.reshape(net, [-1, net.shape[1], net.shape[2], net.shape[3] * net.shape[4]])

        net = layers.fc(net, 512, name='fc5')

        net = layers.flatten(net, name='flatten')

        net = layers.fc(net, 4096, name='fc6')
        net = layers.fc(net, 4096, name='fc7')

        self.logits = layers.fc(net, self.dataset.num_classes, activation_fn=None, name='fc8')

        self.preds = layers.softmax(self.logits)

        self.loss = layers.reduce_mean(layers.softmax_cross_entropy(logits=self.logits, labels=self.Yoh))
        self.opt = layers.sgd(self.learning_rate)
        self.train_op = self.opt.minimize(self.loss, global_step=self.global_step)

        self.accuracy, self.precision, self.recall = self.createSummaries(self.Yoh, self.preds, self.loss,
                                                                              self.learning_rate)

예제 #7

0

파일 보기

파일: mt_vgg16.py 프로젝트: Tiyanak/lip-reading

    def modelOpt(self):

        self.global_step = tf.Variable(0, trainable=False)
        self.learning_rate = layers.decayLearningRate(LEARNING_RATE, self.global_step, DECAY_STEPS, DECAY_RATE)

        self.optLogits = tf.placeholder(dtype=tf.float32, shape=[None, self.dataset.num_classes])
        self.Y = tf.placeholder(dtype=tf.int32, shape=[None])
        self.Yoh = layers.toOneHot(self.Y, self.dataset.num_classes)

        self.preds = layers.softmax(self.optLogits)

        cross_entropy_loss = layers.reduce_mean(layers.softmax_cross_entropy(logits=self.optLogits, labels=self.Yoh))
        regularization_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        self.loss = cross_entropy_loss + REGULARIZER_SCALE * tf.reduce_sum(regularization_loss)

        self.opt = layers.adam(self.learning_rate)
        self.train_op = self.opt.minimize(self.loss, global_step=self.global_step)

        self.accuracy, self.precision, self.recall = self.createSummaries(self.Yoh, self.preds, self.loss,
                                                                          self.learning_rate)

예제 #8

0

파일 보기

파일: ef.py 프로젝트: Tiyanak/lip-reading

    def createModel(self):

        print("CREATING MODEL")

        self.global_step = tf.Variable(0, trainable=False)
        self.is_training = tf.placeholder_with_default(True, [], name='is_training')
        self.learning_rate = layers.decayLearningRate(LEARNING_RATE, self.global_step, DECAY_STEPS, DECAY_RATE)

        self.X = tf.placeholder(dtype=tf.float32, shape=[None, self.dataset.frames, self.dataset.h, self.dataset.w, self.dataset.c])
        self.Y = tf.placeholder(dtype=tf.int32, shape=[None])

        self.Yoh = layers.toOneHot(self.Y, self.dataset.num_classes)

        net = self.X

        if net.shape[-1] > 1:
            net = layers.rgb_to_grayscale(net)

        if len(net.shape) > 4:
            net = tf.transpose(net, [0, 2, 3, 1, 4])
            net = tf.reshape(net, [-1, net.shape[1], net.shape[2], net.shape[3] * net.shape[4]])

        bn_params = {'decay': 0.999, 'center': True, 'scale': True, 'epsilon': 0.001,
                     'updates_collections': None, 'is_training': self.is_training}

        net = layers.conv2d(net, filters=96, kernel_size=3, padding='VALID', stride=2, name='conv1',
                            normalizer_fn=layers.batchNormalization, normalizer_params=bn_params,
                            weights_regularizer=layers.l2_regularizer(REGULARIZER_SCALE))
        net = layers.max_pool2d(net, 3, 2, name='max_pool1')
        net = layers.squeeze_and_excite2d(net, indexHeight=1, indexWidth=2, name='se1', filters=96)

        net = layers.conv2d(net, filters=256, kernel_size=3, padding='VALID', stride=2, name='conv2',
                            normalizer_fn=layers.batchNormalization, normalizer_params=bn_params,
                            weights_regularizer=layers.l2_regularizer(REGULARIZER_SCALE))
        net = layers.max_pool2d(net, 3, 2, name='max_pool2')
        net = layers.squeeze_and_excite2d(net, indexHeight=1, indexWidth=2, name='se2', filters=256)

        net = layers.conv2d(net, filters=512, kernel_size=3, padding='SAME', stride=1, name='conv3',
                            normalizer_fn=layers.batchNormalization, normalizer_params=bn_params,
                            weights_regularizer=layers.l2_regularizer(REGULARIZER_SCALE))
        net = layers.squeeze_and_excite2d(net, indexHeight=1, indexWidth=2, name='se3', filters=512)

        net = layers.conv2d(net, filters=512, kernel_size=3, padding='SAME', stride=1, name='conv4',
                            weights_regularizer=layers.l2_regularizer(REGULARIZER_SCALE))
        net = layers.squeeze_and_excite2d(net, indexHeight=1, indexWidth=2, name='se4', filters=512)

        net = layers.conv2d(net, filters=512, kernel_size=3, padding='SAME', stride=1, name='conv5',
                            weights_regularizer=layers.l2_regularizer(REGULARIZER_SCALE))
        net = layers.max_pool2d(net, 3, 2, name='max_pool2')
        net = layers.squeeze_and_excite2d(net, indexHeight=1, indexWidth=2, name='se5', filters=512)

        net = layers.flatten(net, name='flatten')

        net = layers.fc(net, 4096, name='fc6', weights_regularizer=layers.l2_regularizer(REGULARIZER_SCALE))
        net = layers.fc(net, 4096, name='fc7', weights_regularizer=layers.l2_regularizer(REGULARIZER_SCALE))

        self.logits = layers.fc(net, self.dataset.num_classes, activation_fn=None, name='fc8', weights_regularizer=layers.l2_regularizer(REGULARIZER_SCALE))

        self.preds = layers.softmax(self.logits)

        cross_entropy_loss = layers.reduce_mean(layers.softmax_cross_entropy(logits=self.logits, labels=self.Yoh))
        regularization_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        self.loss = cross_entropy_loss + REGULARIZER_SCALE * tf.reduce_sum(regularization_loss)
        self.opt = layers.adam(self.learning_rate)
        self.train_op = self.opt.minimize(self.loss, global_step=self.global_step)

        self.accuracy, self.precision, self.recall = self.createSummaries(self.Yoh, self.preds, self.loss,
                                                                          self.learning_rate)

예제 #9

0

파일 보기

    def createModel(self):

        print("CREATING MODEL")

        self.global_step = tf.Variable(0, trainable=False)
        self.is_training = tf.placeholder_with_default(True, [],
                                                       name='is_training')
        self.learning_rate = layers.decayLearningRate(LEARNING_RATE,
                                                      self.global_step,
                                                      DECAY_STEPS, DECAY_RATE)

        self.X = tf.placeholder(dtype=tf.float32,
                                shape=[
                                    None, self.dataset.frames, self.dataset.h,
                                    self.dataset.w, self.dataset.c
                                ])
        self.Y = tf.placeholder(dtype=tf.int32, shape=[None])

        self.Yoh = layers.toOneHot(self.Y, self.dataset.num_classes)

        net = self.X

        conv3d_kernel = [3, 3, 3]
        max3d_pool_kernel = [3, 3, 2]

        bn_params = {
            'decay': 0.999,
            'center': True,
            'scale': True,
            'epsilon': 0.001,
            'updates_collections': None,
            'is_training': self.is_training
        }

        net = layers.transpose(net, [0, 2, 3, 1, 4])

        net = layers.conv3d(
            net,
            filters=48,
            kernel_size=conv3d_kernel,
            padding='VALID',
            stride=2,
            name='conv1',
            normalizer_fn=layers.batchNormalization,
            normalizer_params=bn_params,
            weights_regularizer=layers.l2_regularizer(REGULARIZER_SCALE))
        net = layers.max_pool3d(net,
                                max3d_pool_kernel,
                                2,
                                padding='VALID',
                                name='max_pool1')
        net = layers.squeeze_and_excite3d(net,
                                          indexHeight=1,
                                          indexWidth=2,
                                          indexSeq=3,
                                          name='se1',
                                          filters=48)

        net = layers.conv3d(
            net,
            filters=256,
            kernel_size=conv3d_kernel,
            padding='VALID',
            stride=2,
            name='conv2',
            normalizer_fn=layers.batchNormalization,
            normalizer_params=bn_params,
            weights_regularizer=layers.l2_regularizer(REGULARIZER_SCALE))
        net = layers.max_pool3d(net,
                                max3d_pool_kernel,
                                2,
                                padding='VALID',
                                name='max_pool2')
        net = layers.squeeze_and_excite3d(net,
                                          indexHeight=1,
                                          indexWidth=2,
                                          indexSeq=3,
                                          name='se2',
                                          filters=256)

        net = layers.reshape(
            net, [-1, net.shape[1], net.shape[2], net.shape[3] * net.shape[4]])

        net = layers.conv2d(
            net,
            filters=512,
            kernel_size=3,
            padding='SAME',
            stride=1,
            name='conv3',
            normalizer_fn=layers.batchNormalization,
            normalizer_params=bn_params,
            weights_regularizer=layers.l2_regularizer(REGULARIZER_SCALE))
        net = layers.squeeze_and_excite2d(net,
                                          indexHeight=1,
                                          indexWidth=2,
                                          name='se3',
                                          filters=512)

        net = layers.conv2d(
            net,
            filters=512,
            kernel_size=3,
            padding='SAME',
            stride=1,
            name='conv4',
            weights_regularizer=layers.l2_regularizer(REGULARIZER_SCALE))
        net = layers.squeeze_and_excite2d(net,
                                          indexHeight=1,
                                          indexWidth=2,
                                          name='se4',
                                          filters=512)

        net = layers.conv2d(
            net,
            filters=512,
            kernel_size=3,
            padding='SAME',
            stride=1,
            name='conv5',
            weights_regularizer=layers.l2_regularizer(REGULARIZER_SCALE))
        net = layers.max_pool2d(net, 3, 1, padding='VALID', name='max_pool5')
        net = layers.squeeze_and_excite2d(net,
                                          indexHeight=1,
                                          indexWidth=2,
                                          name='se5',
                                          filters=512)

        net = layers.flatten(net, name='flatten')

        net = layers.fc(
            net,
            4096,
            name='fc6',
            weights_regularizer=layers.l2_regularizer(REGULARIZER_SCALE))
        net = layers.fc(
            net,
            4096,
            name='fc7',
            weights_regularizer=layers.l2_regularizer(REGULARIZER_SCALE))
        self.logits = layers.fc(
            net,
            self.dataset.num_classes,
            activation_fn=None,
            name='fc8',
            weights_regularizer=layers.l2_regularizer(REGULARIZER_SCALE))

        self.preds = layers.softmax(self.logits)

        cross_entropy_loss = layers.reduce_mean(
            layers.softmax_cross_entropy(logits=self.logits, labels=self.Yoh))
        regularization_loss = tf.get_collection(
            tf.GraphKeys.REGULARIZATION_LOSSES)
        self.loss = cross_entropy_loss + REGULARIZER_SCALE * tf.reduce_sum(
            regularization_loss)
        self.opt = layers.adam(self.learning_rate)
        self.train_op = self.opt.minimize(self.loss,
                                          global_step=self.global_step)

        self.accuracy, self.precision, self.recall = self.createSummaries(
            self.Yoh, self.preds, self.loss, self.learning_rate)

예제 #10

0

파일 보기

    def createModel(self):

        print("CREATING MODEL")

        self.global_step = tf.Variable(0, trainable=False)
        self.is_training = tf.placeholder_with_default(True, [],
                                                       name='is_training')
        self.learning_rate = layers.decayLearningRate(LEARNING_RATE,
                                                      self.global_step,
                                                      DECAY_STEPS, DECAY_RATE)

        self.X = tf.placeholder(dtype=tf.float32,
                                shape=[
                                    None, self.dataset.frames, self.dataset.h,
                                    self.dataset.w, self.dataset.c
                                ])
        self.Y = tf.placeholder(dtype=tf.int32, shape=[None])

        self.Yoh = layers.toOneHot(self.Y, self.dataset.num_classes)

        net = self.X

        if net.shape[-1] > 1:
            net = layers.rgb_to_grayscale(net)

        if len(net.shape) > 4:
            net = tf.transpose(net, [0, 2, 3, 1, 4])
            net = tf.reshape(
                net,
                [-1, net.shape[1], net.shape[2], net.shape[3] * net.shape[4]])

        net = self.build_vgg16(net)

        if len(net.shape) > 4:
            net = tf.transpose(net, [0, 2, 3, 1, 4])
            net = tf.reshape(
                net,
                [-1, net.shape[1], net.shape[2], net.shape[3] * net.shape[4]])

        net = layers.flatten(net, name='flatten')

        net = layers.fc(
            net,
            4096,
            name='fc6',
            weights_regularizer=layers.l2_regularizer(REGULARIZER_SCALE))
        net = layers.fc(
            net,
            4096,
            name='fc7',
            weights_regularizer=layers.l2_regularizer(REGULARIZER_SCALE))

        self.logits = layers.fc(
            net,
            self.dataset.num_classes,
            activation_fn=None,
            name='fc8',
            weights_regularizer=layers.l2_regularizer(REGULARIZER_SCALE))

        self.preds = layers.softmax(self.logits)

        cross_entropy_loss = layers.reduce_mean(
            layers.softmax_cross_entropy(logits=self.logits, labels=self.Yoh))
        regularization_loss = tf.get_collection(
            tf.GraphKeys.REGULARIZATION_LOSSES)
        self.loss = cross_entropy_loss + REGULARIZER_SCALE * tf.reduce_sum(
            regularization_loss)
        self.opt = layers.adam(self.learning_rate)
        self.train_op = self.opt.minimize(self.loss,
                                          global_step=self.global_step)

        self.accuracy, self.precision, self.recall = self.createSummaries(
            self.Yoh, self.preds, self.loss, self.learning_rate)

예제 #11

0

파일 보기

파일: vgg16_time_pooling.py 프로젝트: Tiyanak/lip-reading

    def createModel(self):

        print("CREATING MODEL")

        self.global_step = tf.Variable(0, trainable=False)
        self.is_training = tf.placeholder_with_default(True, [],
                                                       name='is_training')
        self.learning_rate = layers.decayLearningRate(LEARNING_RATE,
                                                      self.global_step,
                                                      DECAY_STEPS, DECAY_RATE)

        self.X = tf.placeholder(dtype=tf.float32,
                                shape=[
                                    None, self.dataset.frames, self.dataset.h,
                                    self.dataset.w, self.dataset.c
                                ])
        self.Y = tf.placeholder(dtype=tf.int32, shape=[None])

        self.Yoh = layers.toOneHot(self.Y, self.dataset.num_classes)

        bn_params = {
            'decay': 0.999,
            'center': True,
            'scale': True,
            'epsilon': 0.001,
            'updates_collections': None,
            'is_training': self.is_training
        }

        concated = None
        reuse = None
        for sequence_image in range(self.dataset.frames):
            net = self.vgg(self.X[:, sequence_image], reuse)

            net_shape = net.get_shape()
            net = tf.reshape(net, [
                BATCH_SIZE,
                int(net_shape[1]) * int(net_shape[2]) * int(net_shape[3])
            ])

            net = layers.fc(
                net,
                64,
                name='spatial_FC',
                reuse=reuse,
                weights_regularizer=layers.l2_regularizer(REGULARIZER_SCALE),
                normalizer_fn=layers.batchNormalization,
                normalizer_params=bn_params)

            if concated is None:
                concated = tf.expand_dims(net, axis=1)
            else:
                concated = tf.concat(
                    [concated, tf.expand_dims(net, axis=1)], axis=1)

            reuse = True

        net = concated
        net_shape = net.get_shape()
        net = tf.reshape(
            net,
            [BATCH_SIZE, int(net_shape[1]) * int(net_shape[2])])

        layer_num = 1
        for fully_connected_num in [64]:
            net = layers.fc(
                net,
                fully_connected_num,
                name='temporal_FC{}'.format(layer_num),
                weights_regularizer=layers.l2_regularizer(REGULARIZER_SCALE),
                normalizer_fn=layers.batchNormalization,
                normalizer_params=bn_params)
            layer_num += 1

        self.logits = layers.fc(net,
                                self.dataset.num_classes,
                                activation_fn=None,
                                name='logits')

        self.preds = layers.softmax(self.logits)

        cross_entropy_loss = layers.reduce_mean(
            layers.softmax_cross_entropy(logits=self.logits, labels=self.Yoh))
        regularization_loss = tf.get_collection(
            tf.GraphKeys.REGULARIZATION_LOSSES)
        self.loss = cross_entropy_loss + REGULARIZER_SCALE * tf.reduce_sum(
            regularization_loss)
        self.opt = layers.adam(self.learning_rate)
        self.train_op = self.opt.minimize(self.loss,
                                          global_step=self.global_step)

        self.accuracy, self.precision, self.recall = self.createSummaries(
            self.Yoh, self.preds, self.loss, self.learning_rate)

예제 #12

0

파일 보기

파일: vgg16_time_pooling.py 프로젝트: Tiyanak/lip-reading

    def createModelLowMemory(self):

        print("CREATING MODEL")

        self.global_step = tf.Variable(0, trainable=False)
        self.is_training = tf.placeholder_with_default(True, [],
                                                       name='is_training')
        self.learning_rate = layers.decayLearningRate(LEARNING_RATE,
                                                      self.global_step,
                                                      DECAY_STEPS, DECAY_RATE)

        self.X = tf.placeholder(dtype=tf.float32,
                                shape=[
                                    None, self.dataset.frames, self.dataset.h,
                                    self.dataset.w, self.dataset.c
                                ])
        self.Y = tf.placeholder(dtype=tf.int32, shape=[None])

        self.Yoh = layers.toOneHot(self.Y, self.dataset.num_classes)

        concated = None

        reuse = None
        for sequence_image in range(self.dataset.frames):

            net = self.vggLowMemory(self.X[:, sequence_image], reuse)

            net_shape = net.get_shape()
            net = tf.reshape(net, [
                BATCH_SIZE,
                int(net_shape[1]) * int(net_shape[2]) * int(net_shape[3])
            ])

            net = layers.fc(net, 64, name='spatial_FC', reuse=reuse)

            if concated is None:
                concated = tf.expand_dims(net, axis=1)
            else:
                concated = tf.concat(
                    [concated, tf.expand_dims(net, axis=1)], axis=1)

            reuse = True

        net = concated
        net_shape = net.get_shape()
        net = tf.reshape(
            net,
            [BATCH_SIZE, int(net_shape[1]) * int(net_shape[2])])

        layer_num = 1
        for fully_connected_num in [64]:
            net = layers.fc(net,
                            fully_connected_num,
                            name='temporal_FC{}'.format(layer_num))
            layer_num += 1

        self.logits = layers.fc(net,
                                self.dataset.num_classes,
                                activation_fn=None,
                                name='logits')

        self.preds = layers.softmax(self.logits)

        self.loss = layers.reduce_mean(
            layers.softmax_cross_entropy(logits=self.logits, labels=self.Yoh))
        self.opt = layers.adam(self.learning_rate)
        self.train_op = self.opt.minimize(self.loss,
                                          global_step=self.global_step)

        self.accuracy, self.precision, self.recall = self.createSummaries(
            self.Yoh, self.preds, self.loss, self.learning_rate)