def testInitErrors(self): # Invalid `final_endpoint` string. with self.assertRaises(ValueError): _ = i3d.InceptionI3d(num_classes=_NUM_CLASSES, final_endpoint='Conv3d_1a_8x8') # Dropout keep probability must be in (0, 1]. i3d_model = i3d.InceptionI3d(num_classes=_NUM_CLASSES) inp = tf.placeholder(tf.float32, [None, 64, _IMAGE_SIZE, _IMAGE_SIZE, 3]) with self.assertRaises(ValueError): _, _ = i3d_model(inp, is_training=False, dropout_keep_prob=0) # Height and width dimensions of the input should be _IMAGE_SIZE. i3d_model = i3d.InceptionI3d(num_classes=_NUM_CLASSES) inp = tf.placeholder(tf.float32, [None, 64, 10, 10, 3]) with self.assertRaises(ValueError): _, _ = i3d_model(inp, is_training=False, dropout_keep_prob=0.5)
def make_model(is_training, inputs, params): images = inputs['images'] print(images.get_shape()) assert images.get_shape().as_list() == [ None, 25, params.img_size, params.img_size, 3 ], "images of shape {}".format(images.get_shape()) rgb_input = images with tf.variable_scope('RGB'): rgb_model = i3d.InceptionI3d(IMAGENET_NUM_CLASSES, spatial_squeeze=True, final_endpoint='Mixed_5c') m5c, c = rgb_model(rgb_input, is_training=False, dropout_keep_prob=0.5) rgb_variable_map = {} for variable in tf.global_variables(): if variable.name.split('/')[0] == 'RGB': rgb_variable_map[variable.name.replace(':0', '')] = variable for key, value in rgb_variable_map.items(): print(key, value) rgb_saver = tf.train.Saver(var_list=rgb_variable_map, reshape=True) with tf.Session() as sess: rgb_saver.restore(sess, _CHECKPOINT_PATHS['rgb_imagenet']) graph = tf.get_default_graph() net = graph.get_tensor_by_name( "RGB/inception_i3d/Mixed_5c/Branch_3/Conv3d_0b_1x1/batch_norm/beta:0" ) # add fine-tuning layers net = tf.nn.avg_pool3d(net, ksize=[1, 1, 1, 1, 1], strides=[1, 1, 1, 1, 1], padding=snt.VALID) with tf.variable_scope("RGB"): with tf.variable_scope("Dense"): net = tf.nn.dropout(net, 0.5, name="dropout") logits = i3d.Unit3D(output_channels=3, kernel_shape=[1, 1, 1], activation_fn=None, use_batch_norm=False, use_bias=True, name='Conv3d_0c_1x1')(net, is_training=is_training) logits = tf.squeeze(logits, [2, 3], name='SpatialSqueeze') averaged_logits = tf.reduce_mean(logits, axis=1) return averaged_logits
def testModelShapesWithSqueeze(self): """Test shapes after running some fake data through the model.""" i3d_model = i3d.InceptionI3d(num_classes=_NUM_CLASSES, final_endpoint='Predictions') inp = tf.placeholder(tf.float32, [None, 64, _IMAGE_SIZE, _IMAGE_SIZE, 3]) predictions, end_points = i3d_model(inp, is_training=True, dropout_keep_prob=0.5) init_op = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init_op) sample_input = np.zeros((5, 64, _IMAGE_SIZE, _IMAGE_SIZE, 3)) out_predictions, out_logits = sess.run( [predictions, end_points['Logits']], {inp: sample_input}) self.assertEqual(out_predictions.shape, (5, _NUM_CLASSES)) self.assertEqual(out_logits.shape, (5, _NUM_CLASSES))
def testModelShapesWithoutSqueeze(self): """Test that turning off `spatial_squeeze` changes the output shape. Also try setting different values for `dropout_keep_prob` and snt.BatchNorm `is_training`. """ i3d_model = i3d.InceptionI3d(num_classes=_NUM_CLASSES, spatial_squeeze=False, final_endpoint='Predictions') inp = tf.placeholder(tf.float32, [None, 64, _IMAGE_SIZE, _IMAGE_SIZE, 3]) predictions, end_points = i3d_model(inp, is_training=False, dropout_keep_prob=1.0) init_op = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init_op) sample_input = np.zeros((5, 64, _IMAGE_SIZE, _IMAGE_SIZE, 3)) out_predictions, out_logits = sess.run( [predictions, end_points['Logits']], {inp: sample_input}) self.assertEqual(out_predictions.shape, (5, 1, 1, _NUM_CLASSES)) self.assertEqual(out_logits.shape, (5, 1, 1, _NUM_CLASSES))
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) eval_type = FLAGS.eval_type imagenet_pretrained = FLAGS.imagenet_pretrained NUM_CLASSES = 400 if eval_type == 'rgb600': NUM_CLASSES = 600 if eval_type not in ['rgb', 'rgb600', 'flow', 'joint']: raise ValueError('Bad `eval_type`, must be one of rgb, rgb600, flow, joint') if eval_type == 'rgb600': kinetics_classes = [x.strip() for x in open(_LABEL_MAP_PATH_600)] else: kinetics_classes = [x.strip() for x in open(_LABEL_MAP_PATH)] if eval_type in ['rgb', 'rgb600', 'joint']: # RGB input has 3 channels. rgb_input = tf.placeholder( tf.float32, shape=(1, _SAMPLE_VIDEO_FRAMES, _IMAGE_SIZE, _IMAGE_SIZE, 3)) with tf.variable_scope('RGB'): rgb_model = i3d.InceptionI3d( NUM_CLASSES, spatial_squeeze=True, final_endpoint='Logits') rgb_logits, _ = rgb_model( rgb_input, is_training=False, dropout_keep_prob=1.0) rgb_variable_map = {} for variable in tf.global_variables(): if variable.name.split('/')[0] == 'RGB': if eval_type == 'rgb600': rgb_variable_map[variable.name.replace(':0', '')[len('RGB/inception_i3d/'):]] = variable else: rgb_variable_map[variable.name.replace(':0', '')] = variable rgb_saver = tf.train.Saver(var_list=rgb_variable_map, reshape=True) if eval_type in ['flow', 'joint']: # Flow input has only 2 channels. flow_input = tf.placeholder( tf.float32, shape=(1, _SAMPLE_VIDEO_FRAMES, _IMAGE_SIZE, _IMAGE_SIZE, 2)) with tf.variable_scope('Flow'): flow_model = i3d.InceptionI3d( NUM_CLASSES, spatial_squeeze=True, final_endpoint='Logits') flow_logits, _ = flow_model( flow_input, is_training=False, dropout_keep_prob=1.0) flow_variable_map = {} for variable in tf.global_variables(): if variable.name.split('/')[0] == 'Flow': flow_variable_map[variable.name.replace(':0', '')] = variable flow_saver = tf.train.Saver(var_list=flow_variable_map, reshape=True) if eval_type == 'rgb' or eval_type == 'rgb600': model_logits = rgb_logits elif eval_type == 'flow': model_logits = flow_logits else: model_logits = rgb_logits + flow_logits model_predictions = tf.nn.softmax(model_logits) with tf.Session() as sess: feed_dict = {} if eval_type in ['rgb', 'rgb600', 'joint']: if imagenet_pretrained: rgb_saver.restore(sess, _CHECKPOINT_PATHS['rgb_imagenet']) else: rgb_saver.restore(sess, _CHECKPOINT_PATHS[eval_type]) print(model_logits) tf.logging.info('RGB checkpoint restored') rgb_sample = np.load(_SAMPLE_PATHS['rgb']) tf.logging.info('RGB data loaded, shape=%s', str(rgb_sample.shape)) feed_dict[rgb_input] = rgb_sample if eval_type in ['flow', 'joint']: if imagenet_pretrained: flow_saver.restore(sess, _CHECKPOINT_PATHS['flow_imagenet']) else: flow_saver.restore(sess, _CHECKPOINT_PATHS['flow']) tf.logging.info('Flow checkpoint restored') flow_sample = np.load(_SAMPLE_PATHS['flow']) tf.logging.info('Flow data loaded, shape=%s', str(flow_sample.shape)) feed_dict[flow_input] = flow_sample out_logits, out_predictions = sess.run( [model_logits, model_predictions], feed_dict=feed_dict) out_logits = out_logits[0] out_predictions = out_predictions[0] sorted_indices = np.argsort(out_predictions)[::-1] print('Norm of logits: %f' % np.linalg.norm(out_logits)) print('\nTop classes and probabilities') for index in sorted_indices[:20]: print(out_predictions[index], out_logits[index], kinetics_classes[index])