예제 #1
0
    def testInitErrors(self):
        # Invalid `final_endpoint` string.
        with self.assertRaises(ValueError):
            _ = i3d.InceptionI3d(num_classes=_NUM_CLASSES,
                                 final_endpoint='Conv3d_1a_8x8')

        # Dropout keep probability must be in (0, 1].
        i3d_model = i3d.InceptionI3d(num_classes=_NUM_CLASSES)
        inp = tf.placeholder(tf.float32,
                             [None, 64, _IMAGE_SIZE, _IMAGE_SIZE, 3])
        with self.assertRaises(ValueError):
            _, _ = i3d_model(inp, is_training=False, dropout_keep_prob=0)

        # Height and width dimensions of the input should be _IMAGE_SIZE.
        i3d_model = i3d.InceptionI3d(num_classes=_NUM_CLASSES)
        inp = tf.placeholder(tf.float32, [None, 64, 10, 10, 3])
        with self.assertRaises(ValueError):
            _, _ = i3d_model(inp, is_training=False, dropout_keep_prob=0.5)
예제 #2
0
def make_model(is_training, inputs, params):
    images = inputs['images']
    print(images.get_shape())
    assert images.get_shape().as_list() == [
        None, 25, params.img_size, params.img_size, 3
    ], "images of shape {}".format(images.get_shape())

    rgb_input = images
    with tf.variable_scope('RGB'):
        rgb_model = i3d.InceptionI3d(IMAGENET_NUM_CLASSES,
                                     spatial_squeeze=True,
                                     final_endpoint='Mixed_5c')
        m5c, c = rgb_model(rgb_input, is_training=False, dropout_keep_prob=0.5)

    rgb_variable_map = {}

    for variable in tf.global_variables():
        if variable.name.split('/')[0] == 'RGB':
            rgb_variable_map[variable.name.replace(':0', '')] = variable

    for key, value in rgb_variable_map.items():
        print(key, value)
    rgb_saver = tf.train.Saver(var_list=rgb_variable_map, reshape=True)

    with tf.Session() as sess:
        rgb_saver.restore(sess, _CHECKPOINT_PATHS['rgb_imagenet'])
        graph = tf.get_default_graph()
        net = graph.get_tensor_by_name(
            "RGB/inception_i3d/Mixed_5c/Branch_3/Conv3d_0b_1x1/batch_norm/beta:0"
        )
        # add fine-tuning layers
    net = tf.nn.avg_pool3d(net,
                           ksize=[1, 1, 1, 1, 1],
                           strides=[1, 1, 1, 1, 1],
                           padding=snt.VALID)

    with tf.variable_scope("RGB"):
        with tf.variable_scope("Dense"):
            net = tf.nn.dropout(net, 0.5, name="dropout")
            logits = i3d.Unit3D(output_channels=3,
                                kernel_shape=[1, 1, 1],
                                activation_fn=None,
                                use_batch_norm=False,
                                use_bias=True,
                                name='Conv3d_0c_1x1')(net,
                                                      is_training=is_training)
            logits = tf.squeeze(logits, [2, 3], name='SpatialSqueeze')
            averaged_logits = tf.reduce_mean(logits, axis=1)
    return averaged_logits
예제 #3
0
    def testModelShapesWithSqueeze(self):
        """Test shapes after running some fake data through the model."""
        i3d_model = i3d.InceptionI3d(num_classes=_NUM_CLASSES,
                                     final_endpoint='Predictions')
        inp = tf.placeholder(tf.float32,
                             [None, 64, _IMAGE_SIZE, _IMAGE_SIZE, 3])
        predictions, end_points = i3d_model(inp,
                                            is_training=True,
                                            dropout_keep_prob=0.5)

        init_op = tf.global_variables_initializer()
        with tf.Session() as sess:
            sess.run(init_op)
            sample_input = np.zeros((5, 64, _IMAGE_SIZE, _IMAGE_SIZE, 3))
            out_predictions, out_logits = sess.run(
                [predictions, end_points['Logits']], {inp: sample_input})
            self.assertEqual(out_predictions.shape, (5, _NUM_CLASSES))
            self.assertEqual(out_logits.shape, (5, _NUM_CLASSES))
예제 #4
0
    def testModelShapesWithoutSqueeze(self):
        """Test that turning off `spatial_squeeze` changes the output shape.

    Also try setting different values for `dropout_keep_prob` and snt.BatchNorm
    `is_training`.
    """
        i3d_model = i3d.InceptionI3d(num_classes=_NUM_CLASSES,
                                     spatial_squeeze=False,
                                     final_endpoint='Predictions')
        inp = tf.placeholder(tf.float32,
                             [None, 64, _IMAGE_SIZE, _IMAGE_SIZE, 3])
        predictions, end_points = i3d_model(inp,
                                            is_training=False,
                                            dropout_keep_prob=1.0)

        init_op = tf.global_variables_initializer()
        with tf.Session() as sess:
            sess.run(init_op)
            sample_input = np.zeros((5, 64, _IMAGE_SIZE, _IMAGE_SIZE, 3))
            out_predictions, out_logits = sess.run(
                [predictions, end_points['Logits']], {inp: sample_input})
            self.assertEqual(out_predictions.shape, (5, 1, 1, _NUM_CLASSES))
            self.assertEqual(out_logits.shape, (5, 1, 1, _NUM_CLASSES))
예제 #5
0
def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.INFO)
    eval_type = FLAGS.eval_type

    imagenet_pretrained = FLAGS.imagenet_pretrained

    NUM_CLASSES = 400
    if eval_type == 'rgb600':
        NUM_CLASSES = 600

    if eval_type not in ['rgb', 'rgb600', 'flow', 'joint']:
        raise ValueError('Bad `eval_type`, must be one of rgb, rgb600, flow, joint')

    if eval_type == 'rgb600':
        kinetics_classes = [x.strip() for x in open(_LABEL_MAP_PATH_600)]
    else:
        kinetics_classes = [x.strip() for x in open(_LABEL_MAP_PATH)]

    if eval_type in ['rgb', 'rgb600', 'joint']:
        # RGB input has 3 channels.
        rgb_input = tf.placeholder(
            tf.float32,
            shape=(1, _SAMPLE_VIDEO_FRAMES, _IMAGE_SIZE, _IMAGE_SIZE, 3))

        with tf.variable_scope('RGB'):
            rgb_model = i3d.InceptionI3d(
                NUM_CLASSES, spatial_squeeze=True, final_endpoint='Logits')
            rgb_logits, _ = rgb_model(
                rgb_input, is_training=False, dropout_keep_prob=1.0)

        rgb_variable_map = {}
        for variable in tf.global_variables():

            if variable.name.split('/')[0] == 'RGB':
                if eval_type == 'rgb600':
                    rgb_variable_map[variable.name.replace(':0', '')[len('RGB/inception_i3d/'):]] = variable
                else:
                    rgb_variable_map[variable.name.replace(':0', '')] = variable

        rgb_saver = tf.train.Saver(var_list=rgb_variable_map, reshape=True)

    if eval_type in ['flow', 'joint']:
        # Flow input has only 2 channels.
        flow_input = tf.placeholder(
            tf.float32,
            shape=(1, _SAMPLE_VIDEO_FRAMES, _IMAGE_SIZE, _IMAGE_SIZE, 2))
        with tf.variable_scope('Flow'):
            flow_model = i3d.InceptionI3d(
                NUM_CLASSES, spatial_squeeze=True, final_endpoint='Logits')
            flow_logits, _ = flow_model(
                flow_input, is_training=False, dropout_keep_prob=1.0)
        flow_variable_map = {}
        for variable in tf.global_variables():
            if variable.name.split('/')[0] == 'Flow':
                flow_variable_map[variable.name.replace(':0', '')] = variable
        flow_saver = tf.train.Saver(var_list=flow_variable_map, reshape=True)

    if eval_type == 'rgb' or eval_type == 'rgb600':
        model_logits = rgb_logits
    elif eval_type == 'flow':
        model_logits = flow_logits
    else:
        model_logits = rgb_logits + flow_logits
    model_predictions = tf.nn.softmax(model_logits)

    with tf.Session() as sess:
        feed_dict = {}
        if eval_type in ['rgb', 'rgb600', 'joint']:
            if imagenet_pretrained:
                rgb_saver.restore(sess, _CHECKPOINT_PATHS['rgb_imagenet'])
            else:
                rgb_saver.restore(sess, _CHECKPOINT_PATHS[eval_type])
            print(model_logits)
            tf.logging.info('RGB checkpoint restored')
            rgb_sample = np.load(_SAMPLE_PATHS['rgb'])
            tf.logging.info('RGB data loaded, shape=%s', str(rgb_sample.shape))
            feed_dict[rgb_input] = rgb_sample

        if eval_type in ['flow', 'joint']:
            if imagenet_pretrained:
                flow_saver.restore(sess, _CHECKPOINT_PATHS['flow_imagenet'])
            else:
                flow_saver.restore(sess, _CHECKPOINT_PATHS['flow'])
            tf.logging.info('Flow checkpoint restored')
            flow_sample = np.load(_SAMPLE_PATHS['flow'])
            tf.logging.info('Flow data loaded, shape=%s', str(flow_sample.shape))
            feed_dict[flow_input] = flow_sample

        out_logits, out_predictions = sess.run(
            [model_logits, model_predictions],
            feed_dict=feed_dict)

        out_logits = out_logits[0]
        out_predictions = out_predictions[0]
        sorted_indices = np.argsort(out_predictions)[::-1]

        print('Norm of logits: %f' % np.linalg.norm(out_logits))
        print('\nTop classes and probabilities')
        for index in sorted_indices[:20]:
            print(out_predictions[index], out_logits[index], kinetics_classes[index])