def load_i3d_model(num_classes,eval_type='rgb', scope='RGB',spatial_squeeze=True, final_endpoint='Logits'):
    
      with tf.variable_scope('RGB'):
      rgb_model = i3d.InceptionI3d(
          400,spatial_squeeze=spatial_squeeze, final_endpoint=final_endpoint)
      
      adversarial_inputs_rgb = tf.nn.tanh(rgb_input + adv_flag*(mask_rgb*eps_rgb))
      # adversarial_inputs_rgb = tf.nn.tanh(rgb_input + adv_flag*(eps_rgb))

      rgb_logits, _ = rgb_model(
        adversarial_inputs_rgb, is_training=False, dropout_keep_prob=1.0)

      rgb_logits_dropout = tf.nn.dropout(rgb_logits, 1)
      rgb_logits = tf.layers.dense(
            rgb_logits_dropout, NUM_CLASSES, use_bias=True)

    
    with tf.variable_scope(scope):
        i3d_model = i3d.InceptionI3d(
          num_classes, spatial_squeeze=spatial_squeeze, final_endpoint=final_endpoint)

    dummy_input = tf.placeholder(
        tf.float32,
        shape=(None, _SAMPLE_VIDEO_FRAMES, _IMAGE_SIZE, _IMAGE_SIZE, 3))
    i3d_model(dummy_input, is_training=False, dropout_keep_prob=1.0)


    return i3d_model
Ejemplo n.º 2
0
    def init_model(self):
        _IMAGE_SIZE = 224
        _SAMPLE_VIDEO_FRAMES = 79
        NUM_CLASSES = 101
        classes = open('classes.txt').read().splitlines()

        #RGB SETUP
        self.rgb_input = tf.placeholder(tf.float32,
                                        shape=(1, _SAMPLE_VIDEO_FRAMES,
                                               _IMAGE_SIZE, _IMAGE_SIZE, 3))
        with tf.variable_scope('RGB'):
            rgb_model = i3d.InceptionI3d(NUM_CLASSES,
                                         spatial_squeeze=True,
                                         final_endpoint='Logits')
            self.rgb_logits, _ = rgb_model(self.rgb_input,
                                           is_training=False,
                                           dropout_keep_prob=1.0)

        rgb_variable_map = {}
        for variable in tf.global_variables():
            if variable.name.split('/')[0] == 'RGB':
                rgb_variable_map[variable.name.replace(':0', '').replace(
                    'Conv3d',
                    'Conv2d').replace('conv_3d/w', 'weights').replace(
                        'conv_3d/b',
                        'biases').replace('RGB/inception_i3d',
                                          'InceptionV1').replace(
                                              'batch_norm',
                                              'BatchNorm')] = variable
        self.rgb_saver = tf.train.Saver(var_list=rgb_variable_map,
                                        reshape=True)

        #FLOW SETUP
        self.flow_input = tf.placeholder(tf.float32,
                                         shape=(1, _SAMPLE_VIDEO_FRAMES,
                                                _IMAGE_SIZE, _IMAGE_SIZE, 2))
        with tf.variable_scope('Flow'):
            flow_model = i3d.InceptionI3d(NUM_CLASSES,
                                          spatial_squeeze=True,
                                          final_endpoint='Logits')
            self.flow_logits, _ = flow_model(self.flow_input,
                                             is_training=False,
                                             dropout_keep_prob=1.0)
        flow_variable_map = {}
        for variable in tf.global_variables():
            if variable.name.split('/')[0] == 'Flow':
                flow_variable_map[variable.name.replace(':0', '').replace(
                    'Conv3d',
                    'Conv2d').replace('conv_3d/w', 'weights').replace(
                        'conv_3d/b',
                        'biases').replace('Flow/inception_i3d',
                                          'InceptionV1').replace(
                                              'batch_norm',
                                              'BatchNorm')] = variable
        self.flow_saver = tf.train.Saver(var_list=flow_variable_map,
                                         reshape=True)

        self.rgb_predictions = tf.nn.softmax(self.rgb_logits)
        self.flow_predictions = tf.nn.softmax(self.flow_logits)
Ejemplo n.º 3
0
def train():
    file_names, labels = read_csv(csv_path)

    print(f"read video {train_path + file_names[0] }")
    file_names = list(map(lambda filename: train_path + filename, file_names))
    print(
        f"============================= fileNames: {file_names[0]} =========")

    dataset = build_dataset(file_names, labels)
    iter = dataset.make_one_shot_iterator()
    X, Y = iter.get_next()
    print(f"X: {X}, Y:{Y}")
    model = i3d.InceptionI3d(num_classes=num_classes, final_endpoint='Logits')
    logits, _ = model(X, is_training=True)

    learning_rate = 0.01

    cost = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y))
    optimizer = tf.train.AdamOptimizer(
        learning_rate=learning_rate).minimize(cost)
    init = tf.global_variables_initializer()

    with tf.Session() as sess:
        sess.run(init)
        for epoch in range(n_epochs):
            for i in range(len(file_names)):
                _, loss = sess.run([optimizer, cost])

    print("optimization finished")
Ejemplo n.º 4
0
def get_preds_tensor(input_mode='rgb', n_frames=16, batch_size=10):
    """Function to get the predictions tensor, input placeholder and saver object
        :param input_mode: One of 'rgb','flow','two_stream'"""
    if input_mode == 'rgb':
        rgb_variable_map = {}
        input_fr_rgb = tf.placeholder(
            tf.float32,
            shape=[batch_size, n_frames, _IMAGE_SIZE, _IMAGE_SIZE, 3],
            name="Input_Video_Placeholder")
        with tf.variable_scope('RGB'):
            #Building I3D for RGB-only input
            rgb_model = i3d.InceptionI3d(_NUM_CLASSES,
                                         spatial_squeeze=True,
                                         final_endpoint='Logits')

            rgb_logits, _ = rgb_model(input_fr_rgb,
                                      is_training=False,
                                      dropout_keep_prob=1.0)

        print len(tf.global_variables())
        for variable in tf.global_variables():
            if variable.name.split('/')[0] == 'RGB':
                rgb_variable_map[variable.name.replace(':0', '')] = variable
        print len(rgb_variable_map)
        rgb_saver = tf.train.Saver(var_list=rgb_variable_map, reshape=True)
        model_predictions = tf.nn.softmax(rgb_logits)
        top_classes = tf.argmax(model_predictions, axis=1)
        return top_classes,model_predictions, \
                input_fr_rgb, rgb_saver
Ejemplo n.º 5
0
def model_visual_features(rgb_array):
    i3d_model = i3d.InceptionI3d(num_classes=_NUM_CLASSES,
                                 final_endpoint='Predictions')

    inp = tf.placeholder(tf.float32,
                         [None, _FRAMES, _IMAGE_SIZE[0], _IMAGE_SIZE[1], 3])

    predictions, end_points = i3d_model(inp,
                                        is_training=True,
                                        dropout_keep_prob=0.5)

    init_op = tf.global_variables_initializer()
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    # sample_input = np.zeros((5, 64, _IMAGE_SIZE[0], _IMAGE_SIZE[1], 3))
    sample_input = rgb_array

    with tf.Session(config=config) as sess:
        sess.run(init_op)
        out_predictions, out_logits = sess.run(
            [predictions, end_points['Logits']], {inp: sample_input})

    tf.reset_default_graph()

    return out_logits
Ejemplo n.º 6
0
def inference(rgb_inputs, flow_inputs):
    with tf.variable_scope('RGB'):
        rgb_model = i3d.InceptionI3d(NUM_CLASSES,
                                     spatial_squeeze=True,
                                     final_endpoint='Logits')
        rgb_logits, _ = rgb_model(rgb_inputs,
                                  is_training=True,
                                  dropout_keep_prob=DROPOUT_KEEP_PROB)
    with tf.variable_scope('Flow'):
        flow_model = i3d.InceptionI3d(NUM_CLASSES,
                                      spatial_squeeze=True,
                                      final_endpoint='Logits')
        flow_logits, _ = flow_model(flow_inputs,
                                    is_training=True,
                                    dropout_keep_prob=DROPOUT_KEEP_PROB)
    return rgb_logits, flow_logits
Ejemplo n.º 7
0
    def testInitErrors(self):
        # Invalid `final_endpoint` string.
        with self.assertRaises(ValueError):
            _ = i3d.InceptionI3d(num_classes=_NUM_CLASSES,
                                 final_endpoint='Conv3d_1a_8x8')

        # Dropout keep probability must be in (0, 1].
        i3d_model = i3d.InceptionI3d(num_classes=_NUM_CLASSES)
        inp = tf.placeholder(tf.float32,
                             [None, 64, _IMAGE_SIZE, _IMAGE_SIZE, 3])
        with self.assertRaises(ValueError):
            _, _ = i3d_model(inp, is_training=False, dropout_keep_prob=0)

        # Height and width dimensions of the input should be _IMAGE_SIZE.
        i3d_model = i3d.InceptionI3d(num_classes=_NUM_CLASSES)
        inp = tf.placeholder(tf.float32, [None, 64, 10, 10, 3])
        with self.assertRaises(ValueError):
            _, _ = i3d_model(inp, is_training=False, dropout_keep_prob=0.5)
Ejemplo n.º 8
0
def get_model(streamType, numSeg):
    if streamType == 'rgb':
        # RGB input has 3 channels.
        rgb_input = tf.placeholder(
           tf.float32,
           shape=(numSeg, SAMPLE_VIDEO_FRAMES, IMAGE_SIZE, IMAGE_SIZE, 3))
        with tf.variable_scope('RGB'):
            rgb_model = i3d.InceptionI3d(
               NUM_CLASSES, spatial_squeeze=True, final_endpoint='Logits')
            rgb_logits, _ = rgb_model(
               rgb_input, is_training=False, dropout_keep_prob=1.0)
        rgb_variable_map = {}
        for variable in tf.global_variables():
            if variable.name.split('/')[0] == 'RGB':
                rgb_variable_map[variable.name.replace(':0', '')] = variable
        saver = tf.train.Saver(var_list=rgb_variable_map, reshape=True)

    elif streamType == 'flow':
        # Flow input has only 2 channels.
        flow_input = tf.placeholder(
           tf.float32,
           shape=(numSeg, SAMPLE_VIDEO_FRAMES, IMAGE_SIZE, IMAGE_SIZE, 2))
        with tf.variable_scope('Flow'):
            flow_model = i3d.InceptionI3d(
               NUM_CLASSES, spatial_squeeze=True, final_endpoint='Logits')
            flow_logits, _ = flow_model(
               flow_input, is_training=False, dropout_keep_prob=1.0)
        flow_variable_map = {}
        for variable in tf.global_variables():
            if variable.name.split('/')[0] == 'Flow':
                flow_variable_map[variable.name.replace(':0', '')] = variable
        saver = tf.train.Saver(var_list=flow_variable_map, reshape=True)

    if streamType == 'rgb':
        model_logits = rgb_logits
        inputs = rgb_input
    elif streamType == 'flow':
        model_logits = flow_logits
        inputs = flow_input
    
    return saver, inputs, model_logits
Ejemplo n.º 9
0
def load_i3d_model(num_classes,eval_type='rgb', scope='RGB',spatial_squeeze=True, final_endpoint='Logits'):
    with tf.variable_scope(scope):
        i3d_model = i3d.InceptionI3d(
          num_classes, spatial_squeeze=spatial_squeeze, final_endpoint=final_endpoint)

    dummy_input = tf.placeholder(
        tf.float32,
        shape=(None, _SAMPLE_VIDEO_FRAMES, _IMAGE_SIZE, _IMAGE_SIZE, 3))
    i3d_model(dummy_input, is_training=False, dropout_keep_prob=1.0)


    return i3d_model
Ejemplo n.º 10
0
def short_video_extraction(vname, n_frames, frame_path, feat_path):

    batch_frames = 64
    # loading net
    rgb_input = tf.placeholder(tf.float32,
                               shape=(1, batch_frames, 224, 224, 3))
    with tf.variable_scope('RGB'):
        net = i3d.InceptionI3d(600,
                               spatial_squeeze=True,
                               final_endpoint='Logits')
        _, end_points = net(rgb_input,
                            is_training=False,
                            dropout_keep_prob=1.0)
    end_feature = end_points['avg_pool3d']
    sess = tf.Session()

    rgb_variable_map = {}
    for variable in tf.global_variables():
        if variable.name.split('/')[0] == 'RGB':
            rgb_variable_map[variable.name.replace(
                ':0', '')[len('RGB/inception_i3d/'):]] = variable

    saver = tf.train.Saver(var_list=rgb_variable_map, reshape=True)
    saver.restore(sess, I3D_CKPT_PATH)

    features = []
    for batch_i in range(math.ceil(n_frames / batch_frames)):
        input_blob = []
        for idx in range(batch_frames):
            idx = (batch_i * batch_frames + idx) % n_frames + 1
            image = Image.open(os.path.join(frame_path, '%06d.jpg' % idx))
            image = image.resize((224, 224))
            image = np.array(image, dtype='float32')

            image[:, :, :] -= 127.5
            image[:, :, :] /= 127.5
            input_blob.append(image)

        input_blob = np.array([input_blob], dtype='float32')

        clip_feature = sess.run(end_feature, feed_dict={rgb_input: input_blob})
        clip_feature = np.reshape(clip_feature, (-1, clip_feature.shape[-1]))
        print(batch_i, clip_feature.shape)
        features.append(clip_feature)

    if len(features) > 1:
        features = np.concatenate(features, axis=0)
    else:
        features = features[0]
    features = features[:n_frames // 8]
    print('Saving features for video: %s ...' % vname)
    np.save(feat_path, features)
def i3d_loss(rgb, label, gpu_idx):
    """
    Builds an I3D model and computes the loss
    """
    with tf.variable_scope('RGB'):
        rgb_model = i3d.InceptionI3d(
            config.num_classes,
            spatial_squeeze=True,
            final_endpoint='Logits',
            freeze_before_logits=FLAGS.freeze_up_to_logits)
        rgb_logits = rgb_model(rgb, is_training=True, dropout_keep_prob=1.0)[0]
        rgb_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=label, logits=rgb_logits, name='cross_entropy_rgb')
        loss_rgb = tf.reduce_mean(rgb_loss, name='rgb_ce')
        summary_loss = tf.summary.scalar('rgb_loss_%d' % gpu_idx, loss_rgb)
    return loss_rgb, summary_loss
Ejemplo n.º 12
0
def get_model_loss(input_x,
                   input_y,
                   train_flag,
                   dropout_flag,
                   scope,
                   reuse=None,
                   num_class=NUM_CLASS,
                   top_k=TOP_K):
    with tf.variable_scope('RGB', reuse=reuse):
        model = i3d.InceptionI3d(num_classes=400,
                                 spatial_squeeze=True,
                                 final_endpoint='Logits')
        logits, _ = model(inputs=input_x,
                          is_training=train_flag,
                          dropout_keep_prob=dropout_flag)
        logits_dropout = tf.nn.dropout(logits, keep_prob=dropout_flag)
        out = tf.layers.dense(logits_dropout,
                              num_class,
                              activation=None,
                              use_bias=True)

    is_in_top_K = tf.cast(
        tf.nn.in_top_k(predictions=out, targets=input_y, k=top_k), tf.float32)

    # prepare l2 loss
    regularization_loss = 0.
    for var in tf.global_variables():
        var_name_type = var.name.split('/')[-1][:-2]
        if var_name_type == 'w' or var_name_type == 'kernel':
            regularization_loss += tf.nn.l2_loss(var)
    regularization_loss = tf.identity(regularization_loss,
                                      name='regularization_loss')

    loss_cross_entropy = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(labels=input_y,
                                                       logits=out,
                                                       name='cross_entropy'))
    total_loss = tf.add(loss_cross_entropy,
                        L2_PARAM * regularization_loss,
                        name='total_loss')

    tf.summary.scalar('{}/loss_ratio'.format(scope),
                      regularization_loss / loss_cross_entropy)

    return total_loss, loss_cross_entropy, is_in_top_K
Ejemplo n.º 13
0
def get_preds_loss(ground_truth,
                   input_mode='rgb',
                   n_frames=16,
                   num_classes=_NUM_CLASSES,
                   batch_size=10,
                   dropout_keep_prob=0.6):
    """Function to get the predictions tensor,
        loss, input placeholder and saver object
        :param ground_truth: Tensor to hold ground truth
        :param input_mode: One of 'rgb','flow','two_stream'"""
    rgb_variable_map = {}
    input_fr_rgb = tf.placeholder(
        tf.float32,
        shape=[batch_size, n_frames, _IMAGE_SIZE, _IMAGE_SIZE, 3],
        name='Input_Video_Placeholder')
    with tf.variable_scope('RGB'):
        #Building I3D for RGB-only input
        rgb_model = i3d.InceptionI3d(spatial_squeeze=True,
                                     final_endpoint='Mixed_5c')
        rgb_mixed_5c, _ = rgb_model(input_fr_rgb,
                                    is_training=False,
                                    dropout_keep_prob=1.0)

        with tf.variable_scope('Logits_Mice'):
            net = tf.nn.avg_pool3d(rgb_mixed_5c,
                                   ksize=[1, 2, 7, 7, 1],
                                   strides=[1, 1, 1, 1, 1],
                                   padding='VALID')

            net = tf.nn.dropout(net, dropout_keep_prob)
            logits = conv3d(name='Logits',
                            input=net,
                            shape=[1, 1, 1, 1024, num_classes])
            logits = tf.squeeze(logits, [2, 3], name='SpatialSqueeze')
            averaged_logits = tf.reduce_mean(logits, axis=1)

    for variable in tf.global_variables():
        if variable.name.split(
                '/')[0] == 'RGB' and 'Logits' not in variable.name:
            rgb_variable_map[variable.name.replace(':0', '')] = variable
    rgb_saver = tf.train.Saver(var_list=rgb_variable_map, reshape=True)
    model_predictions = tf.nn.softmax(averaged_logits)
    top_classes = tf.argmax(model_predictions, axis=1)
    loss = get_loss(model_predictions, ground_truth)
    return model_predictions, loss, top_classes, input_fr_rgb, rgb_saver
Ejemplo n.º 14
0
    def testModelShapesWithSqueeze(self):
        """Test shapes after running some fake data through the model."""
        i3d_model = i3d.InceptionI3d(num_classes=_NUM_CLASSES,
                                     final_endpoint='Predictions')
        inp = tf.placeholder(tf.float32,
                             [None, 64, _IMAGE_SIZE, _IMAGE_SIZE, 3])
        predictions, end_points = i3d_model(inp,
                                            is_training=True,
                                            dropout_keep_prob=0.5)

        init_op = tf.global_variables_initializer()
        with tf.Session() as sess:
            sess.run(init_op)
            sample_input = np.zeros((5, 64, _IMAGE_SIZE, _IMAGE_SIZE, 3))
            out_predictions, out_logits = sess.run(
                [predictions, end_points['Logits']], {inp: sample_input})
            self.assertEqual(out_predictions.shape, (5, _NUM_CLASSES))
            self.assertEqual(out_logits.shape, (5, _NUM_CLASSES))
Ejemplo n.º 15
0
def _build_stream(stream_name, is_training):
    dims = 3 if stream_name is 'RGB' else 2
    input_shape = (_BATCH_SIZE, NUM_FRAMES, IMAGE_SIZE, IMAGE_SIZE, dims)
    inp = tf.placeholder(tf.float32, shape=input_shape)

    with tf.variable_scope(stream_name):
        model = i3d.InceptionI3d(spatial_squeeze=True,
                                 final_endpoint='Mixed_5c')
        # No training here. Don't backpropagate the main model, and no dropout.
        mixed_5c, _ = model(inp, is_training=False, dropout_keep_prob=1.0)

    var_map = {}
    for var in tf.global_variables():
        if var.name.split('/')[0] == stream_name:
            var_map[var.name.replace(':0', '')] = var

    saver = tf.train.Saver(var_list=var_map, reshape=True)

    with tf.variable_scope(stream_name):
        net = tf.nn.avg_pool3d(mixed_5c,
                               ksize=[1, 2, 7, 7, 1],
                               strides=[1, 1, 1, 1, 1],
                               padding=snt.VALID)
        logit_fn = i3d.Unit3D(output_channels=NUM_CLASSES,
                              kernel_shape=[1, 1, 1],
                              activation_fn=None,
                              use_batch_norm=False,
                              use_bias=True,
                              regularizers={'w': tf.nn.l2_loss},
                              name='Conv3d_0c_1x1')
        logits = logit_fn(net, is_training=is_training)
        logits = tf.squeeze(logits, [2, 3], name='SpatialSqueeze')
    logits = tf.reduce_mean(logits, axis=1)

    custom_vars = {}
    for var in tf.global_variables():
        name = var.name.replace(':0', '')
        if var.name.split(
                '/')[0] == stream_name and name not in var_map.keys():
            custom_vars[name] = var
            with tf.name_scope(name):
                _variable_summaries(var)

    return (inp, logits, saver, custom_vars)
Ejemplo n.º 16
0
def load_model_and_test(train_generator, validation_generator, msasl_classes,
                        rgb_input):
    '''
    Trains for EPOCH on the train_generator's data and tests the validation set. 
    '''
    with tf.compat.v1.variable_scope('RGB'):
        rgb_model = i3d.InceptionI3d(NUM_CLASSES,
                                     spatial_squeeze=True,
                                     final_endpoint='Logits')
        rgb_logits, _ = rgb_model(rgb_input,
                                  is_training=False,
                                  dropout_keep_prob=DROPOUT_KEEP_PROB)

    # The variable map is used to tell the saver which layers weights to restore.
    # (the weights of the layers are all stored in tf variables)
    rgb_variable_map = {}
    for variable in tf.compat.v1.global_variables():
        if variable.name.split('/')[0] == 'RGB':
            rgb_variable_map[variable.name.replace(':0',
                                                   '')[len(''):]] = variable
            # rgb_variable_map[variable.name.replace(':0', '')[len('RGB/inception_i3d'):]] = variable

    # We remove the logits layers from the variable map. We don't want to include these weights as we have a
    # different number of  classes.
    # layers = rgb_variable_map.keys()
    # layers_to_not_load = [layer for layer in layers if 'Logits' in layer]
    # unloaded_layers_to_init = {}
    # for layer in layers_to_not_load:
    #     unloaded_layers_to_init[layer] = rgb_variable_map.pop(layer)
    rgb_saver = tf.compat.v1.train.Saver(var_list=rgb_variable_map,
                                         reshape=True)

    with tf.compat.v1.Session() as sess:
        rgb_saver.restore(sess, WEIGHTS_PATH)
        tf.compat.v1.logging.info('RGB checkpoint restored')

        validate(sess, train_generator, rgb_model, rgb_input, 'Train',
                 msasl_classes)
        validate(sess, validation_generator, rgb_model, rgb_input,
                 'Validation', msasl_classes)
Ejemplo n.º 17
0
    def testModelShapesWithoutSqueeze(self):
        """Test that turning off `spatial_squeeze` changes the output shape.

    Also try setting different values for `dropout_keep_prob` and snt.BatchNorm
    `is_training`.
    """
        i3d_model = i3d.InceptionI3d(num_classes=_NUM_CLASSES,
                                     spatial_squeeze=False,
                                     final_endpoint='Predictions')
        inp = tf.placeholder(tf.float32,
                             [None, 64, _IMAGE_SIZE, _IMAGE_SIZE, 3])
        predictions, end_points = i3d_model(inp,
                                            is_training=False,
                                            dropout_keep_prob=1.0)

        init_op = tf.global_variables_initializer()
        with tf.Session() as sess:
            sess.run(init_op)
            sample_input = np.zeros((5, 64, _IMAGE_SIZE, _IMAGE_SIZE, 3))
            out_predictions, out_logits = sess.run(
                [predictions, end_points['Logits']], {inp: sample_input})
            self.assertEqual(out_predictions.shape, (5, 1, 1, _NUM_CLASSES))
            self.assertEqual(out_logits.shape, (5, 1, 1, _NUM_CLASSES))
Ejemplo n.º 18
0
def main(dataset, mode, split):
    assert mode in ['rgb', 'flow', 'mixed']
    log_dir = os.path.join(_LOG_ROOT,
                           'ensemble-%s-%s-%d' % (dataset, mode, split))
    if not os.path.exists(log_dir):
        os.mkdir(log_dir)

    logging.basicConfig(level=logging.INFO,
                        filename=os.path.join(
                            log_dir, 'log-%s-%d' % (mode, split) + '.txt'),
                        filemode='w',
                        format='%(message)s')

    label_map = get_label_map(os.path.join('./data', dataset, 'label_map.txt'))

    _, test_info_rgb, class_num, _ = load_info(dataset,
                                               root=_DATA_ROOT[dataset],
                                               mode='rgb',
                                               split=split)
    _, test_info_flow, _, _ = load_info(dataset,
                                        root=_DATA_ROOT[dataset],
                                        mode='flow',
                                        split=split)

    label_holder = tf.placeholder(tf.int32, [None])
    if mode in ['rgb', 'mixed']:
        rgb_data = ActionDataset(dataset,
                                 class_num,
                                 test_info_rgb,
                                 'frame{:06d}{:s}.jpg',
                                 mode='rgb')
        rgb_holder = tf.placeholder(
            tf.float32,
            [None, None, _FRAME_SIZE, _FRAME_SIZE, _CHANNEL['rgb']])
        info_rgb, _ = rgb_data.gen_test_list()
    if mode in ['flow', 'mixed']:
        flow_data = ActionDataset(dataset,
                                  class_num,
                                  test_info_flow,
                                  'frame{:06d}{:s}.jpg',
                                  mode='flow')
        flow_holder = tf.placeholder(
            tf.float32,
            [None, None, _FRAME_SIZE, _FRAME_SIZE, _CHANNEL['flow']])
        info_flow, _ = flow_data.gen_test_list(mode='flow')
    #print(info_rgb)
    # insert the model
    if mode in ['rgb', 'mixed']:
        with tf.variable_scope(_SCOPE['rgb']):
            rgb_model = i3d.InceptionI3d(400,
                                         spatial_squeeze=True,
                                         final_endpoint='Logits')
            rgb_logits, _ = rgb_model(rgb_holder,
                                      is_training=False,
                                      dropout_keep_prob=1)
            rgb_logits_dropout = tf.nn.dropout(rgb_logits, 1)
            rgb_fc_out = tf.layers.dense(rgb_logits_dropout,
                                         _CLASS_NUM[dataset],
                                         tf.nn.relu,
                                         use_bias=True)
            rgb_top_1_op = tf.nn.in_top_k(rgb_fc_out, label_holder, 1)
    if mode in ['flow', 'mixed']:
        with tf.variable_scope(_SCOPE['flow']):
            flow_model = i3d.InceptionI3d(400,
                                          spatial_squeeze=True,
                                          final_endpoint='Logits')
            flow_logits, _ = flow_model(flow_holder,
                                        is_training=False,
                                        dropout_keep_prob=1)
            flow_logits_dropout = tf.nn.dropout(flow_logits, 1)
            flow_fc_out = tf.layers.dense(flow_logits_dropout,
                                          _CLASS_NUM[dataset],
                                          use_bias=True)
            flow_top_1_op = tf.nn.in_top_k(flow_fc_out, label_holder, 1)

    # construct two separate feature map and saver(rgb_saver,flow_saver)
    variable_map = {}
    if mode in ['rgb', 'mixed']:
        for variable in tf.global_variables():
            tmp = variable.name.split('/')
            if tmp[0] == _SCOPE['rgb']:
                variable_map[variable.name.replace(':0', '')] = variable
        rgb_saver = tf.train.Saver(var_list=variable_map)
    variable_map = {}
    if mode in ['flow', 'mixed']:
        for variable in tf.global_variables():
            tmp = variable.name.split('/')
            if tmp[0] == _SCOPE['flow']:
                variable_map[variable.name.replace(':0', '')] = variable
        flow_saver = tf.train.Saver(var_list=variable_map, reshape=True)

    # Edited Version by AlexHu
    if mode == 'rgb':
        fc_out = rgb_fc_out
        softmax = tf.nn.softmax(fc_out)
    if mode == 'flow':
        fc_out = flow_fc_out
        softmax = tf.nn.softmax(fc_out)
    if mode == 'mixed':
        fc_out = _MIX_WEIGHT_OF_RGB * rgb_fc_out + _MIX_WEIGHT_OF_FLOW * flow_fc_out
        softmax = tf.nn.softmax(fc_out)
    top_k_op = tf.nn.in_top_k(softmax, label_holder, 1)

    # GPU config
    # config = tf.ConfigProto()
    # config.gpu_options.per_process_gpu_memory_fraction = 0.6
    # sess = tf.Session(config=config)# config = tf.ConfigProto()
    # config.gpu_options.per_process_gpu_memory_fraction = 0.6
    # sess = tf.Session(config=config)

    # start a new session and restore the fine-tuned model

    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    if mode in ['rgb', 'mixed']:
        rgb_saver.restore(sess, _CHECKPOINT_PATHS_RGB[int(split) - 1])
    if mode in ['flow', 'mixed']:
        flow_saver.restore(sess, _CHECKPOINT_PATHS_FLOW[int(split) - 1])

    if mode in ['rgb', 'mixed']:
        # Start Queue
        rgb_queue = FeedQueue(queue_size=_QUEUE_SIZE)
        rgb_queue.start_queue(rgb_data.get_video,
                              args=info_rgb,
                              process_num=_QUEUE_PROCESS_NUM)
    if mode in ['flow', 'mixed']:
        flow_queue = FeedQueue(queue_size=_QUEUE_SIZE)
        flow_queue.start_queue(flow_data.get_video,
                               args=info_flow,
                               process_num=_QUEUE_PROCESS_NUM)

    # Here we start the test procedure
    print('----Here we start!----')
    print('Output wirtes to ' + log_dir)
    true_count = 0

    if mode in ['rgb', 'mixed']:
        video_size = len(info_rgb)
    if mode in ['flow', 'mixed']:
        video_size = len(info_flow)

    error_record = open(os.path.join(log_dir, 'error_record_' + mode + '.txt'),
                        'w')
    rgb_fc_data = np.zeros((video_size, _CLASS_NUM[dataset]))
    flow_fc_data = np.zeros((video_size, _CLASS_NUM[dataset]))
    label_data = np.zeros((video_size, 1))

    # just load 1 video for test,this place needs to be improved
    y_pred = []
    y_true = []
    for i in range(video_size):
        #print(i)
        if mode in ['rgb', 'mixed']:
            rgb_clip, label, info = rgb_queue.feed_me()
            rgb_clip = rgb_clip / 255
            #input_rgb = rgb_clip[np.newaxis, :, :, :, :]
            input_rgb = rgb_clip[np.newaxis, :, :, :, :]
            video_name = rgb_data.videos[info[0]].name
        if mode in ['flow', 'mixed']:
            flow_clip, label, info = flow_queue.feed_me()
            flow_clip = 2 * (flow_clip / 255) - 1
            input_flow = flow_clip[np.newaxis, :, :, :, :]
            video_name = flow_data.videos[info[0]].name
        input_label = np.array([label]).reshape(-1)
        #print(type(input_label[0]))
        #        print('input_rgb.shape:', input_rgb.shape)
        #        print('input_flow.shape:', input_flow.shape)
        #        print('input_label.shape:', input_label.shape)

        # Extract features from rgb and flow
        if mode in ['rgb']:
            top_1, predictions, curr_rgb_fc_data = sess.run(
                [top_k_op, fc_out, rgb_fc_out],
                feed_dict={
                    rgb_holder: input_rgb,
                    label_holder: input_label
                })
        if mode in ['flow']:
            top_1, predictions, curr_flow_fc_data = sess.run(
                [top_k_op, fc_out, flow_fc_out],
                feed_dict={
                    flow_holder: input_flow,
                    label_holder: input_label
                })
        if mode in ['mixed']:
            top_1, predictions, curr_rgb_fc_data, curr_flow_fc_data = sess.run(
                [top_k_op, fc_out, rgb_fc_out, flow_fc_out],
                feed_dict={
                    rgb_holder: input_rgb,
                    flow_holder: input_flow,
                    label_holder: input_label
                })
        if mode in ['rgb', 'mixed']:
            rgb_fc_data[i, :] = curr_rgb_fc_data
        if mode in ['flow', 'mixed']:
            flow_fc_data[i, :] = curr_flow_fc_data
        label_data[i, :] = label

        tmp = np.sum(top_1)
        true_count += tmp
        print('Video %d - frame %d-%d: %d, accuracy: %.4f (%d/%d) , name: %s' %
              (info[0], info[2], info[2] + info[1], tmp,
               true_count / video_size, true_count, video_size, video_name))
        logging.info(
            'Video%d-frame%d-%d: %d, accuracy: %.4f (%d/%d) , name:%s' %
            (info[0], info[2], info[2] + info[1], tmp, true_count / video_size,
             true_count, video_size, video_name))

        # self_added
        #        print(predictions[0, np.argmax(predictions, axis=1)[0]])
        #        print(trans_label(np.argmax(predictions, axis=1)[0], label_map))
        # print(np.argmax(label))
        #print(trans_label(np.argmax(label), label_map))
        y_true.append(trans_label(np.int64(input_label[0]), label_map))
        answer = np.argmax(predictions, axis=1)[0]
        y_pred.append(trans_label(answer, label_map))
        if tmp == 0:
            wrong_answer = np.argmax(predictions, axis=1)[0]
            #print(label_map[wrong_answer])
            # Attention: the graph output are converted into the type of numpy.array
            print('---->answer: %s, probability: %.2f' % (trans_label(
                wrong_answer, label_map), predictions[0, wrong_answer]))
            #print(predictions)
            logging.info('---->answer: %s, probability: %.2f' % (trans_label(
                wrong_answer, label_map), predictions[0, wrong_answer]))
            error_record.write(
                'video: %s, frame: %d-%d, answer: %s, true: %s, probability: answer-%.2f   true-%.2f\n'
                % (video_name, info[2], info[2] + info[1],
                   trans_label(wrong_answer, label_map),
                   trans_label(np.int64(input_label[0]),
                               label_map), predictions[0, wrong_answer],
                   predictions[0, np.int64(input_label[0])]))
    error_record.close()
    accuracy = true_count / video_size
    print('test accuracy: %.4f' % (accuracy))
    logging.info('test accuracy: %.4f' % (accuracy))
    if mode in ['rgb', 'mixed']:
        np.save(
            os.path.join(log_dir,
                         'obj_{}_rgb_fc_{}.npy').format(dataset, accuracy),
            rgb_fc_data)
    if mode in ['flow', 'mixed']:
        np.save(
            os.path.join(log_dir,
                         'obj_{}_flow_fc_{}.npy').format(dataset, accuracy),
            flow_fc_data)
    np.save(
        os.path.join(log_dir, 'obj_{}_label.npy').format(dataset), label_data)

    if mode in ['rgb', 'mixed']:
        rgb_queue.close_queue()
    if mode in ['flow', 'mixed']:
        flow_queue.close_queue()
    sess.close()
    #print(y_pred)
    #print(y_true)
    cf_matrix = confusion_matrix(y_true, y_pred, labels=label_map)
    print(cf_matrix)
    np.save(os.path.join(log_dir, 'cf_matrix_{}.npy'.format(mode)), cf_matrix)
Ejemplo n.º 19
0
def main():
    # manually shuffle the train txt file because tf.data.shuffle is soooo slow!
    shuffle_and_overwrite(os.path.join(TXT_DATA_DIR, TRAIN_FILE))
    # dataset loading using tf.data module
    with tf.device('/cpu:0'):
        train_dataset = tf.data.Dataset.from_tensor_slices(
            [os.path.join(TXT_DATA_DIR, TRAIN_FILE)])
        train_dataset = train_dataset.apply(
            tf.contrib.data.parallel_interleave(
                lambda x: tf.data.TextLineDataset(x).map(
                    lambda x: tf.string_split([x], delimiter=' ').values),
                cycle_length=NUM_THREADS,
                block_length=1))
        train_dataset = train_dataset.apply(
            tf.contrib.data.map_and_batch(lambda x: tuple(
                tf.py_func(get_data_func_train, [x, IMAGE_SIZE],
                           [tf.float32, tf.int64])),
                                          batch_size=BATCH_SIZE,
                                          num_parallel_batches=NUM_THREADS))
        train_dataset.prefetch(PREFETCH_BUFFER)

        val_dataset = tf.data.Dataset.from_tensor_slices(
            [os.path.join(TXT_DATA_DIR, VAL_FILE)])
        val_dataset = val_dataset.shuffle(VAL_LEN)
        val_dataset = val_dataset.apply(
            tf.contrib.data.parallel_interleave(
                lambda x: tf.data.TextLineDataset(x).map(
                    lambda x: tf.string_split([x], delimiter=' ').values),
                cycle_length=NUM_THREADS,
                block_length=1))
        val_dataset = val_dataset.apply(
            tf.contrib.data.map_and_batch(lambda x: tuple(
                tf.py_func(get_data_func_val, [x, IMAGE_SIZE],
                           [tf.float32, tf.int64])),
                                          batch_size=BATCH_SIZE,
                                          num_parallel_batches=NUM_THREADS))
        val_dataset.prefetch(PREFETCH_BUFFER)

        train_iterator = train_dataset.make_initializable_iterator()
        val_iterator = val_dataset.make_initializable_iterator()

        train_handle = train_iterator.string_handle()
        val_handle = val_iterator.string_handle()
        handle_flag = tf.placeholder(tf.string, [],
                                     name='iterator_handle_flag')
        dataset_iterator = tf.data.Iterator.from_string_handle(
            handle_flag, train_dataset.output_types,
            train_dataset.output_shapes)

        batch_vid, batch_label = dataset_iterator.get_next()
        batch_vid.set_shape([None, None, IMAGE_SIZE, IMAGE_SIZE, 3])

    train_flag = tf.placeholder(dtype=tf.bool, name='train_flag')
    dropout_flag = tf.placeholder(dtype=tf.float32, name='dropout_flag')

    # define model here
    with tf.variable_scope('RGB'):
        model = i3d.InceptionI3d(num_classes=400,
                                 spatial_squeeze=True,
                                 final_endpoint='Logits')
        logits, _ = model(inputs=batch_vid,
                          is_training=train_flag,
                          dropout_keep_prob=dropout_flag)
        logits_dropout = tf.nn.dropout(logits, keep_prob=dropout_flag)
        out = tf.layers.dense(logits_dropout,
                              NUM_CLASS,
                              activation=None,
                              use_bias=True)

        is_in_top_K = tf.nn.in_top_k(predictions=out,
                                     targets=batch_label,
                                     k=TOP_K)

        # maintain a variable map to restore from the ckpt
        variable_map = {}
        for var in tf.global_variables():
            var_name_split = var.name.split('/')
            if var_name_split[
                    1] == 'inception_i3d' and 'dense' not in var_name_split[1]:
                variable_map[var.name[:-2]] = var
            if var_name_split[-1][:-2] == 'w' or var_name_split[
                    -1][:-2] == 'kernel':
                tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES,
                                     tf.nn.l2_loss(var))

        # optional: print to check the variable names
        # pprint(variable_map)

        regularization_loss = tf.losses.get_regularization_loss(
            name='regularization_loss')  # sum of l2 loss
        loss_cross_entropy = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=batch_label, logits=out, name='cross_entropy'))
        total_loss = tf.add(loss_cross_entropy,
                            L2_PARAM * regularization_loss,
                            name='total_loss')
        tf.summary.scalar('batch_statistics/total_loss', total_loss)
        tf.summary.scalar('batch_statistics/cross_entropy_loss',
                          loss_cross_entropy)
        tf.summary.scalar('batch_statistics/l2_loss', regularization_loss)
        tf.summary.scalar('batch_statistics/loss_ratio',
                          regularization_loss / loss_cross_entropy)

        saver_to_restore = tf.train.Saver(var_list=variable_map, reshape=True)

        batch_num = TRAIN_LEN / BATCH_SIZE

        global_step = tf.Variable(GLOBAL_STEP_INIT,
                                  trainable=False,
                                  collections=[tf.GraphKeys.LOCAL_VARIABLES])
        learning_rate = config_learning_rate(global_step, batch_num)
        tf.summary.scalar('learning_rate', learning_rate)

        # set dependencies for BN ops
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            optimizer = config_optimizer(OPTIMIZER, learning_rate, OPT_EPSILON)
            train_op = optimizer.minimize(total_loss, global_step=global_step)

        # NOTE: if you don't want to save the params of the optimizer into the checkpoint,
        # you can place this line before the `update_ops` line
        saver_to_save = tf.train.Saver(max_to_keep=40)

        with tf.Session() as sess:
            sess.run([
                tf.global_variables_initializer(),
                tf.local_variables_initializer()
            ])
            train_handle_value, val_handle_value = sess.run(
                [train_handle, val_handle])
            sess.run(train_iterator.initializer)
            saver_to_restore.restore(sess, CHECKPOINT_PATH)
            merged = tf.summary.merge_all()
            train_writer = tf.summary.FileWriter(
                os.path.join(TENSORBOARD_LOG_DIR, 'train'), sess.graph)

            sys.stdout.write('\n----------- start to train -----------\n')

            intermediate_train_info = [0., 0.]
            for epoch in range(EPOCH_NUM):
                epoch_acc, epoch_loss = 0., 0.
                pbar = tqdm(total=batch_num,
                            desc='Epoch {}'.format(epoch),
                            unit=' batch (batch_size: {})'.format(BATCH_SIZE))
                for i in range(batch_num):
                    _, _loss_cross_entropy, _is_in_top_K, summary, _global_step, lr = sess.run(
                        [
                            train_op, loss_cross_entropy, is_in_top_K, merged,
                            global_step, learning_rate
                        ],
                        feed_dict={
                            train_flag: True,
                            dropout_flag: DROPOUT_KEEP_PRAM,
                            handle_flag: train_handle_value
                        })
                    train_writer.add_summary(summary, global_step=_global_step)

                    intermediate_train_info[0] += np.sum(_is_in_top_K)
                    intermediate_train_info[1] += _loss_cross_entropy
                    epoch_acc += np.sum(_is_in_top_K)
                    epoch_loss += _loss_cross_entropy

                    # intermediate evaluation for the training dataset
                    if _global_step % SHOW_TRAIN_INFO_FREQ == 0:
                        intermediate_train_acc = float(
                            intermediate_train_info[0]) / (
                                SHOW_TRAIN_INFO_FREQ * BATCH_SIZE)
                        intermediate_train_loss = intermediate_train_info[
                            1] / SHOW_TRAIN_INFO_FREQ

                        step_log_info = 'Epoch:{}, global_step:{}, step_train_acc:{:.4f}, step_train_loss:{:4f}, lr:{:.7g}'.format(
                            epoch, _global_step, intermediate_train_acc,
                            intermediate_train_loss, lr)
                        sys.stdout.write('\n' + step_log_info + '\n')
                        sys.stdout.flush()
                        logging.info(step_log_info)
                        train_writer.add_summary(make_summary(
                            'accumulated_statistics/train_acc',
                            intermediate_train_acc),
                                                 global_step=_global_step)
                        train_writer.add_summary(make_summary(
                            'accumulated_statistics/train_loss',
                            intermediate_train_loss),
                                                 global_step=_global_step)
                        intermediate_train_info = [0., 0.]

                    # start to evaluate
                    if _global_step % SAVE_FREQ == 0:
                        if intermediate_train_acc >= 0.8:
                            saver_to_save.save(
                                sess,
                                SAVE_DIR + '/model_step_{}_lr_{:.7g}'.format(
                                    _global_step, lr))

                    pbar.update(1)
                pbar.close()

                # start to validata on the validation dataset
                sess.run(val_iterator.initializer)
                iter_num = int(np.ceil(float(VAL_LEN) / BATCH_SIZE))
                correct_cnt, loss_cnt = 0, 0
                pbar = tqdm(total=iter_num,
                            desc='EVAL train_epoch:{}'.format(epoch),
                            unit=' batch(batch_size={})'.format(BATCH_SIZE))
                for _ in range(iter_num):
                    _is_in_top_K, _loss_cross_entropy = sess.run(
                        [is_in_top_K, loss_cross_entropy],
                        feed_dict={
                            handle_flag: val_handle_value,
                            train_flag: False,
                            dropout_flag: 1.0
                        })
                    correct_cnt += np.sum(_is_in_top_K)
                    loss_cnt += _loss_cross_entropy
                    pbar.update(1)
                pbar.close()
                val_acc = float(correct_cnt) / VAL_LEN
                val_loss = float(loss_cnt) / iter_num

                log_info = '==>> Epoch:{}, global_step:{}, val_acc:{:.4f}, val_loss:{:4f}, lr:{:.7g}'.format(
                    epoch, _global_step, val_acc, val_loss, lr)
                logging.info(log_info)
                sys.stdout.write('\n' + log_info + '\n')
                sys.stdout.flush()

                # manually shuffle the data with python for better performance
                shuffle_and_overwrite(os.path.join(TXT_DATA_DIR, TRAIN_FILE))
                sess.run(train_iterator.initializer)

                epoch_acc = float(epoch_acc) / TRAIN_LEN
                epoch_loss = float(epoch_loss) / batch_num
                log_info = '==========Epoch:{}, whole_train_acc:{:.4f}, whole_train_loss:{:4f}, lr:{:.7g}=========='.format(
                    epoch, epoch_acc, epoch_loss, lr)
                logging.info(log_info)
                sys.stdout.write('\n' + log_info + '\n')
                sys.stdout.flush()

        train_writer.close()
def feature_extractor():
    # loading net
    net = i3d.InceptionI3d(400, spatial_squeeze=True, final_endpoint='Logits')
    rgb_input = tf.placeholder(tf.float32,
                               shape=(batch_size, _SAMPLE_VIDEO_FRAMES,
                                      _IMAGE_SIZE, _IMAGE_SIZE, 3))

    _, end_points = net(rgb_input, is_training=False, dropout_keep_prob=1.0)
    end_feature = end_points['avg_pool3d']
    sess = tf.Session()

    rgb_variable_map = {}
    for variable in tf.global_variables():
        rgb_variable_map[variable.name.replace(
            ':0', '')[len('inception_i3d/'):]] = variable
    saver = tf.train.Saver(var_list=rgb_variable_map)

    saver.restore(sess, _CHECKPOINT_PATHS['rgb_imagenet'])

    video_list = open(VIDEO_PATH_FILE).readlines()
    video_list = [name.strip() for name in video_list]
    print('video_list', video_list)
    if not os.path.isdir(OUTPUT_FEAT_DIR):
        os.mkdir(OUTPUT_FEAT_DIR)

    print('Total number of videos: %d' % len(video_list))

    for cnt, video_name in enumerate(video_list):
        video_path = os.path.join(VIDEO_DIR, video_name)
        feat_path = os.path.join(OUTPUT_FEAT_DIR, video_name + '.npy')

        if os.path.exists(feat_path):
            print('Feature file for video %s already exists.' % video_name)
            continue

        print('video_path', video_path)

        n_frame = len(
            [ff for ff in os.listdir(video_path) if ff.endswith('.jpg')])

        print('Total frames: %d' % n_frame)

        features = []

        n_feat = int(n_frame // 8)
        n_batch = n_feat // batch_size + 1
        print('n_frame: %d; n_feat: %d' % (n_frame, n_feat))
        print('n_batch: %d' % n_batch)

        for i in range(n_batch):
            input_blobs = []
            for j in range(batch_size):
                input_blob = []
                for k in range(L):
                    idx = i * batch_size * L + j * L + k
                    idx = int(idx)
                    idx = idx % n_frame + 1
                    image = Image.open(os.path.join(video_path,
                                                    '%d.jpg' % idx))
                    image = image.resize((resize_w, resize_h))
                    image = np.array(image, dtype='float32')
                    '''
                    image[:, :, 0] -= 104.
                    image[:, :, 1] -= 117.
                    image[:, :, 2] -= 123.
                    '''
                    image[:, :, :] -= 127.5
                    image[:, :, :] /= 127.5
                    input_blob.append(image)

                input_blob = np.array(input_blob, dtype='float32')

                input_blobs.append(input_blob)

            input_blobs = np.array(input_blobs, dtype='float32')

            clip_feature = sess.run(end_feature,
                                    feed_dict={rgb_input: input_blobs})
            clip_feature = np.reshape(clip_feature,
                                      (-1, clip_feature.shape[-1]))

            features.append(clip_feature)

        features = np.concatenate(features, axis=0)
        features = features[:n_feat:
                            2]  # 16 frames per feature  (since 64-frame snippet corresponds to 8 features in I3D)

        feat_path = os.path.join(OUTPUT_FEAT_DIR, video_name + '.npy')

        print('Saving features and probs for video: %s ...' % video_name)
        np.save(feat_path, features)

        print('%d: %s has been processed...' % (cnt, video_name))
Ejemplo n.º 21
0
def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.INFO)
    eval_type = FLAGS.eval_type
    imagenet_pretrained = FLAGS.imagenet_pretrained

    if eval_type not in ['rgb', 'flow', 'joint']:
        raise ValueError('Bad `eval_type`, must be one of rgb, flow, joint')

    kinetics_classes = [x.strip() for x in open(_LABEL_MAP_PATH)]

    if eval_type in ['rgb', 'joint']:
        # RGB input has 3 channels.
        rgb_input = tf.placeholder(tf.float32,
                                   shape=(1, _SAMPLE_VIDEO_FRAMES, _IMAGE_SIZE,
                                          _IMAGE_SIZE, 3))
        with tf.variable_scope('RGB'):
            rgb_model = i3d.InceptionI3d(_NUM_CLASSES,
                                         spatial_squeeze=True,
                                         final_endpoint='Logits')
            rgb_logits, _ = rgb_model(rgb_input,
                                      is_training=False,
                                      dropout_keep_prob=1.0)
        rgb_variable_map = {}
        for variable in tf.global_variables():
            if variable.name.split('/')[0] == 'RGB':
                rgb_variable_map[variable.name.replace(':0', '')] = variable
        rgb_saver = tf.train.Saver(var_list=rgb_variable_map, reshape=True)

    if eval_type in ['flow', 'joint']:
        # Flow input has only 2 channels.
        flow_input = tf.placeholder(tf.float32,
                                    shape=(1, _SAMPLE_VIDEO_FRAMES,
                                           _IMAGE_SIZE, _IMAGE_SIZE, 2))
        with tf.variable_scope('Flow'):
            flow_model = i3d.InceptionI3d(_NUM_CLASSES,
                                          spatial_squeeze=True,
                                          final_endpoint='Logits')
            flow_logits, _ = flow_model(flow_input,
                                        is_training=False,
                                        dropout_keep_prob=1.0)
        flow_variable_map = {}
        for variable in tf.global_variables():
            if variable.name.split('/')[0] == 'Flow':
                flow_variable_map[variable.name.replace(':0', '')] = variable
        flow_saver = tf.train.Saver(var_list=flow_variable_map, reshape=True)

    if eval_type == 'rgb':
        model_logits = rgb_logits
    elif eval_type == 'flow':
        model_logits = flow_logits
    else:
        model_logits = rgb_logits + flow_logits
    model_predictions = tf.nn.softmax(model_logits)

    with tf.Session() as sess:
        feed_dict = {}
        if eval_type in ['rgb', 'joint']:
            if imagenet_pretrained:
                rgb_saver.restore(sess, _CHECKPOINT_PATHS['rgb_imagenet'])
            else:
                rgb_saver.restore(sess, _CHECKPOINT_PATHS['rgb'])
            tf.logging.info('RGB checkpoint restored')
            rgb_sample = np.load(_SAMPLE_PATHS['rgb'])
            tf.logging.info('RGB data loaded, shape=%s', str(rgb_sample.shape))
            feed_dict[rgb_input] = rgb_sample

        if eval_type in ['flow', 'joint']:
            if imagenet_pretrained:
                flow_saver.restore(sess, _CHECKPOINT_PATHS['flow_imagenet'])
            else:
                flow_saver.restore(sess, _CHECKPOINT_PATHS['flow'])
            tf.logging.info('Flow checkpoint restored')
            flow_sample = np.load(_SAMPLE_PATHS['flow'])
            tf.logging.info('Flow data loaded, shape=%s',
                            str(flow_sample.shape))
            feed_dict[flow_input] = flow_sample

        out_logits, out_predictions = sess.run(
            [model_logits, model_predictions], feed_dict=feed_dict)

        out_logits = out_logits[0]
        out_predictions = out_predictions[0]
        sorted_indices = np.argsort(out_predictions)[::-1]

        print('Norm of logits: %f' % np.linalg.norm(out_logits))
        print('\nTop classes and probabilities')
        for index in sorted_indices[:20]:
            print(out_predictions[index], out_logits[index],
                  kinetics_classes[index])
Ejemplo n.º 22
0
def run(max_steps=64e3,
        mode='rgb',
        root='',
        split='',
        batch_size=1,
        save_dir=''):
    #tf.logging.set_verbosity(tf.logging.INFO)
    eval_type = mode

    imagenet_pretrained = False

    NUM_CLASSES = 400
    if eval_type == 'rgb600':
        NUM_CLASSES = 600

    if eval_type not in ['rgb', 'rgb600', 'flow', 'joint']:
        raise ValueError(
            'Bad `eval_type`, must be one of rgb, rgb600, flow, joint')

    if eval_type == 'rgb600':
        kinetics_classes = [x.strip() for x in open(_LABEL_MAP_PATH_600)]
    else:
        kinetics_classes = [x.strip() for x in open(_LABEL_MAP_PATH)]

    if eval_type in ['rgb', 'rgb600', 'joint']:
        # RGB input has 3 channels.
        rgb_input = tf.placeholder(tf.float32,
                                   shape=(1, _SAMPLE_VIDEO_FRAMES, _IMAGE_SIZE,
                                          _IMAGE_SIZE, 3))

        with tf.variable_scope('RGB'):
            rgb_model = i3d.InceptionI3d(NUM_CLASSES,
                                         spatial_squeeze=True,
                                         final_endpoint='Mixed_5c')
            rgb_logits, _ = rgb_model(rgb_input,
                                      is_training=False,
                                      dropout_keep_prob=1.0)

        rgb_variable_map = {}
        for variable in tf.global_variables():

            if variable.name.split('/')[0] == 'RGB':
                if eval_type == 'rgb600':
                    rgb_variable_map[variable.name.replace(
                        ':0', '')[len('RGB/inception_i3d/'):]] = variable
                else:
                    rgb_variable_map[variable.name.replace(':0',
                                                           '')] = variable

        rgb_saver = tf.train.Saver(var_list=rgb_variable_map, reshape=True)

    if eval_type in ['flow', 'joint']:
        # Flow input has only 2 channels.
        flow_input = tf.placeholder(tf.float32,
                                    shape=(None, _SAMPLE_VIDEO_FRAMES,
                                           _IMAGE_SIZE, _IMAGE_SIZE, 2))
        with tf.variable_scope('Flow'):
            flow_model = i3d.InceptionI3d(NUM_CLASSES,
                                          spatial_squeeze=True,
                                          final_endpoint='Mixed_5c')
            flow_logits, _ = flow_model(flow_input,
                                        is_training=False,
                                        dropout_keep_prob=1.0)

        flow_variable_map = {}
        for variable in tf.global_variables():
            if variable.name.split('/')[0] == 'Flow':
                flow_variable_map[variable.name.replace(':0', '')] = variable
        flow_saver = tf.train.Saver(var_list=flow_variable_map, reshape=True)

    if eval_type == 'rgb' or eval_type == 'rgb600':
        model_logits = rgb_logits
    elif eval_type == 'flow':
        model_logits = flow_logits
    else:
        model_logits = rgb_logits + flow_logits
    #model_predictions = tf.nn.softmax(model_logits)

    test_transforms = transforms.Compose([videotransforms.CenterCrop(224)])
    dataset = Dataset(split,
                      'training',
                      root,
                      mode,
                      test_transforms,
                      save_dir=save_dir)

    with tf.Session() as sess:
        feed_dict = {}

        while True:
            inputs, labels, name = dataset.next_batch()
            if name == '0': break
            i = 0
            for input in inputs:
                i += 1
                c, t, h, w = input.shape

                if eval_type in ['rgb', 'rgb600', 'joint']:
                    if imagenet_pretrained:
                        rgb_saver.restore(sess,
                                          _CHECKPOINT_PATHS['rgb_imagenet'])
                    else:
                        rgb_saver.restore(sess, _CHECKPOINT_PATHS[eval_type])
                    #tf.logging.info('RGB checkpoint restored')
                    rgb_sample = input[np.newaxis, :]
                    #tf.logging.info('RGB data loaded, shape=%s', str(rgb_sample.shape))
                    feed_dict[rgb_input] = rgb_sample

                if eval_type in ['flow', 'joint']:
                    if imagenet_pretrained:
                        flow_saver.restore(sess,
                                           _CHECKPOINT_PATHS['flow_imagenet'])
                    else:
                        flow_saver.restore(sess, _CHECKPOINT_PATHS['flow'])
                    #tf.logging.info('Flow checkpoint restored')
                    flow_sample = input[np.newaxis, :]
                    # tf.logging.info('Flow data loaded, shape=%s', str(flow_sample.shape))
                    feed_dict[flow_input] = flow_sample

                out_logits = sess.run([model_logits], feed_dict=feed_dict)

                out_logits = out_logits[0]

                new_path = os.path.join(save_dir, name, mode)
                if not os.path.exists(new_path):
                    os.makedirs(new_path)
                np.save(os.path.join(new_path, str(i)),
                        out_logits.reshape(1024))
Ejemplo n.º 23
0
def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.INFO)

    eval_type = FLAGS.eval_type
    imagenet_pretrained = FLAGS.imagenet_pretrained
    final_endpoint = FLAGS.final_endpoint

    NUM_CLASSES = 400
    if eval_type == 'rgb600':
        NUM_CLASSES = 600

    if eval_type not in ['rgb', 'rgb600', 'flow', 'joint']:
        raise ValueError(
            'Bad `eval_type`, must be one of rgb, rgb600, flow, joint')

    if final_endpoint not in ['Mixed_4f', 'Logits', 'Predictions']:
        raise ValueError(
            'Bad `final_endpoint`, must be one of Mixed_4f, Logits, Predictions'
        )

    if eval_type == 'rgb600':
        kinetics_classes = [x.strip() for x in open(_LABEL_MAP_PATH_600)]
    else:
        kinetics_classes = [x.strip() for x in open(_LABEL_MAP_PATH)]

    if eval_type in ['rgb', 'rgb600', 'joint']:
        # RGB input has 3 channels.
        rgb_input = tf.placeholder(tf.float32,
                                   shape=(1, _SAMPLE_VIDEO_FRAMES, _IMAGE_SIZE,
                                          _IMAGE_SIZE, 3))

        with tf.variable_scope('RGB'):
            rgb_model = i3d.InceptionI3d(NUM_CLASSES,
                                         spatial_squeeze=True,
                                         final_endpoint=final_endpoint)
            rgb_output, _ = rgb_model(rgb_input,
                                      is_training=False,
                                      dropout_keep_prob=1.0)

        rgb_variable_map = {}
        for variable in tf.global_variables():

            if variable.name.split('/')[0] == 'RGB':
                if eval_type == 'rgb600':
                    rgb_variable_map[variable.name.replace(
                        ':0', '')[len('RGB/inception_i3d/'):]] = variable
                else:
                    rgb_variable_map[variable.name.replace(':0',
                                                           '')] = variable

        rgb_saver = tf.train.Saver(var_list=rgb_variable_map, reshape=True)

    if eval_type in ['flow', 'joint']:
        # Flow input has only 2 channels.
        flow_input = tf.placeholder(tf.float32,
                                    shape=(1, _SAMPLE_VIDEO_FRAMES,
                                           _IMAGE_SIZE, _IMAGE_SIZE, 2))
        with tf.variable_scope('Flow'):
            flow_model = i3d.InceptionI3d(NUM_CLASSES,
                                          spatial_squeeze=True,
                                          final_endpoint=final_endpoint)
            flow_output, _ = flow_model(flow_input,
                                        is_training=False,
                                        dropout_keep_prob=1.0)
        flow_variable_map = {}
        for variable in tf.global_variables():
            if variable.name.split('/')[0] == 'Flow':
                flow_variable_map[variable.name.replace(':0', '')] = variable
        flow_saver = tf.train.Saver(var_list=flow_variable_map, reshape=True)

    if eval_type == 'rgb' or eval_type == 'rgb600':
        model_output = rgb_output
    elif eval_type == 'flow':
        model_output = flow_output
    else:
        model_output = rgb_output + flow_output

    with tf.Session() as sess:
        feed_dict = {}
        if eval_type in ['rgb', 'rgb600', 'joint']:
            if imagenet_pretrained:
                rgb_saver.restore(sess, _CHECKPOINT_PATHS['rgb_imagenet'])
            else:
                rgb_saver.restore(sess, _CHECKPOINT_PATHS[eval_type])
            tf.logging.info('RGB checkpoint restored')
            rgb_sample = np.load(_PATHS['rgb'])
            rgb_sample = rgb_sample[np.newaxis, ...]
            tf.logging.info('RGB data loaded, shape=%s', str(rgb_sample.shape))
            feed_dict[rgb_input] = rgb_sample

        if eval_type in ['flow', 'joint']:
            if imagenet_pretrained:
                flow_saver.restore(sess, _CHECKPOINT_PATHS['flow_imagenet'])
            else:
                flow_saver.restore(sess, _CHECKPOINT_PATHS['flow'])
            tf.logging.info('Flow checkpoint restored')
            flow_sample = np.load(_PATHS['flow'])
            flow_sample = flow_sample[np.newaxis, ...]
            tf.logging.info('Flow data loaded, shape=%s',
                            str(flow_sample.shape))
            feed_dict[flow_input] = flow_sample

        output = sess.run(model_output, feed_dict=feed_dict)
        np.save(_SAVE_PATH, output)
Ejemplo n.º 24
0
def train_from_kinetics_weights(train_generator, validation_generator,
                                msasl_classes, rgb_input):
    '''
    Trains for EPOCH on the train_generator's data and tests the validation set. 
    '''
    with tf.compat.v1.variable_scope('RGB'):
        rgb_model = i3d.InceptionI3d(NUM_CLASSES,
                                     spatial_squeeze=True,
                                     final_endpoint='Logits')
        rgb_logits, _ = rgb_model(rgb_input,
                                  is_training=False,
                                  dropout_keep_prob=DROPOUT_KEEP_PROB)

    # The variable map is used to tell the saver which layers weights to restore.
    # (the weights of the layers are all stored in tf variables)
    rgb_variable_map = {}
    for variable in tf.compat.v1.global_variables():
        if variable.name.split('/')[0] == 'RGB':
            rgb_variable_map[variable.name.replace(':0',
                                                   '')[len(''):]] = variable
            # rgb_variable_map[variable.name.replace(':0', '')[len('RGB/inception_i3d'):]] = variable

    # We remove the logits layers from the variable map. We don't want to include these weights as we have a
    # different number of  classes.
    layers = rgb_variable_map.keys()
    layers_to_not_load = [layer for layer in layers if 'Logits' in layer]
    unloaded_layers_to_init = {}
    for layer in layers_to_not_load:
        unloaded_layers_to_init[layer] = rgb_variable_map.pop(layer)
    rgb_saver = tf.compat.v1.train.Saver(var_list=rgb_variable_map,
                                         reshape=True)

    model_logits = rgb_logits
    model_predictions = tf.nn.softmax(model_logits)

    with tf.compat.v1.Session() as sess:
        feed_dict = {}

        # Restore all the layers but the logits from the shared weights.
        rgb_saver.restore(sess, CHECKPOINT_PATHS['rgb'])
        tf.compat.v1.logging.info('RGB checkpoint restored')

        # Initialize the logits (final layer). Not sure exactly how they will be initialized.
        # TODO: Look into how the logits will be initialized. Could try different approaches.
        sess.run(
            tf.compat.v1.variables_initializer(
                list(unloaded_layers_to_init.values())))

        # Preparing a new saver on all the layers to save weights as we train.
        # TODO: Use this saver to save in training.
        rgb_variable_map.update(unloaded_layers_to_init)
        rgb_saver = tf.compat.v1.train.Saver(var_list=rgb_variable_map,
                                             reshape=True)

        # input and output shapes
        rgb_logits, _ = rgb_model(rgb_input,
                                  is_training=True,
                                  dropout_keep_prob=DROPOUT_KEEP_PROB)
        rgb_labels = tf.compat.v1.placeholder(tf.float32, [None, NUM_CLASSES])

        # TODO: Try with a more reasonable learning rate
        # global_step and decayed_lr can be used to decay the learning rate exponentially
        global_step = tf.compat.v1.placeholder(tf.int32)
        decayed_lr = tf.compat.v1.train.exponential_decay(
            learning_rate=ADAM_INIT_LR,
            global_step=global_step,
            decay_steps=5,
            decay_rate=0.95)

        # Loss and optimizer to use
        # TODO: Try with different loss functions and optimizers
        # loss = tf.nn.softmax_cross_entropy_with_logits_v2(logits=rgb_logits, labels=rgb_labels)
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits_v2(logits=rgb_logits,
                                                       labels=rgb_labels))
        # optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate=0.01).minimize(loss)
        optimizer = tf.compat.v1.train.MomentumOptimizer(
            learning_rate=LEARNING_RATE, momentum=MOMENTUM)
        # optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=decayed_lr, epsilon=ADAM_EPS)
        minimize = optimizer.minimize(loss)
        sess.run(tf.compat.v1.variables_initializer(optimizer.variables()))

        # One step or batch of training.
        def step(samples, labels, i):
            """Performs one optimizer step on a single mini-batch."""
            feed_dict[rgb_input] = samples
            feed_dict[rgb_labels] = labels
            feed_dict[global_step] = i
            result = sess.run([loss, minimize], feed_dict=feed_dict)
            return result

        # TODO: Should this be running loss and need to be fixed?
        # One epoch of training
        def epoch(data_generator, i):
            for images, labels in tqdm(data_generator, desc='EPOCH' + str(i)):
                result = step(images, labels, i)
            data_generator.on_epoch_end()
            print("Loss" + str(result[0]))
            return result[0]

        # val_accuracy_prior = 0

        # summary_saver keeps track of epoch_num, loss, traning acc., validation acc.
        summary_saver = []
        for i in range(EPOCHS):
            epoch_loss = epoch(train_generator, i)
            summary_saver.append([i + 1, epoch_loss])
            if i % 10 == 0:
                train_accuracy = validate(sess, train_generator, rgb_model,
                                          rgb_input, 'Train')
                summary_saver[i].append(train_accuracy)
            # evaluate validation set for every epoch
            val_accuracy = validate(sess, validation_generator, rgb_model,
                                    rgb_input, 'Validation')
            summary_saver[i].append(val_accuracy)
            rgb_saver.save(sess,
                           NEW_CHECKPOINT_PATHS + NAME + 'acc' +
                           ('%.5f' % val_accuracy),
                           global_step=i)
            if i % 5 == 0:
                print_train_summary(summary_saver)
Ejemplo n.º 25
0
def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.INFO)
    eval_type = FLAGS.eval_type

    imagenet_pretrained = FLAGS.imagenet_pretrained

    NUM_CLASSES = 400
    if eval_type == 'rgb600':
        NUM_CLASSES = 600

    if eval_type not in ['rgb', 'rgb600', 'flow', 'joint']:
        raise ValueError(
            'Bad `eval_type`, must be one of rgb, rgb600, flow, joint')

    if eval_type == 'rgb600':
        kinetics_classes = [x.strip() for x in open(_LABEL_MAP_PATH_600)]
    else:
        kinetics_classes = [x.strip() for x in open(_LABEL_MAP_PATH)]

    if eval_type in ['rgb', 'rgb600', 'joint']:
        # RGB input has 3 channels.
        rgb_input = tf.placeholder(tf.float32,
                                   shape=(1, _SAMPLE_VIDEO_FRAMES, _IMAGE_SIZE,
                                          _IMAGE_SIZE, 3))

        with tf.variable_scope('RGB'):
            rgb_model = i3d.InceptionI3d(NUM_CLASSES,
                                         spatial_squeeze=True,
                                         final_endpoint='Logits')
            rgb_logits, _ = rgb_model(rgb_input,
                                      is_training=False,
                                      dropout_keep_prob=1.0)

        rgb_variable_map = {}
        for variable in tf.global_variables():

            if variable.name.split('/')[0] == 'RGB':
                if eval_type == 'rgb600':
                    rgb_variable_map[variable.name.replace(
                        ':0', '')[len('RGB/inception_i3d/'):]] = variable
                else:
                    rgb_variable_map[variable.name.replace(':0',
                                                           '')] = variable

        rgb_saver = tf.train.Saver(var_list=rgb_variable_map, reshape=True)

    if eval_type in ['flow', 'joint']:
        # Flow input has only 2 channels.
        flow_input = tf.placeholder(tf.float32,
                                    shape=(1, _SAMPLE_VIDEO_FRAMES,
                                           _IMAGE_SIZE, _IMAGE_SIZE, 2))
        with tf.variable_scope('Flow'):
            flow_model = i3d.InceptionI3d(NUM_CLASSES,
                                          spatial_squeeze=True,
                                          final_endpoint='Logits')
            flow_logits, _ = flow_model(flow_input,
                                        is_training=False,
                                        dropout_keep_prob=1.0)
        flow_variable_map = {}
        for variable in tf.global_variables():
            if variable.name.split('/')[0] == 'Flow':
                flow_variable_map[variable.name.replace(':0', '')] = variable
        flow_saver = tf.train.Saver(var_list=flow_variable_map, reshape=True)

    if eval_type == 'rgb' or eval_type == 'rgb600':
        model_logits = rgb_logits
    elif eval_type == 'flow':
        model_logits = flow_logits
    else:
        model_logits = rgb_logits + flow_logits
    model_predictions = tf.nn.softmax(model_logits)

    with tf.Session() as sess:
        feed_dict = {}
        if eval_type in ['rgb', 'rgb600', 'joint']:
            if imagenet_pretrained:
                rgb_saver.restore(sess, _CHECKPOINT_PATHS['rgb_imagenet'])
            else:
                rgb_saver.restore(sess, _CHECKPOINT_PATHS[eval_type])
            tf.logging.info('RGB checkpoint restored')
            rgb_sample = np.load(_SAMPLE_PATHS['rgb'])
            tf.logging.info('RGB data loaded, shape=%s', str(rgb_sample.shape))
            feed_dict[rgb_input] = rgb_sample

        if eval_type in ['flow', 'joint']:
            if imagenet_pretrained:
                flow_saver.restore(sess, _CHECKPOINT_PATHS['flow_imagenet'])
            else:
                flow_saver.restore(sess, _CHECKPOINT_PATHS['flow'])
            tf.logging.info('Flow checkpoint restored')
            flow_sample = np.load(_SAMPLE_PATHS['flow'])
            tf.logging.info('Flow data loaded, shape=%s',
                            str(flow_sample.shape))
            feed_dict[flow_input] = flow_sample

        out_logits, out_predictions = sess.run(
            [model_logits, model_predictions], feed_dict=feed_dict)

        out_logits = out_logits[0]
        out_predictions = out_predictions[0]
        sorted_indices = np.argsort(out_predictions)[::-1]

        print('Norm of logits: %f' % np.linalg.norm(out_logits))
        print('\nTop classes and probabilities')
        for index in sorted_indices[:20]:
            print(out_predictions[index], out_logits[index],
                  kinetics_classes[index])

        frozen_model_name = "frozen_model.pb"
        print('\nFreezing and Exporting Model as: %s' % frozen_model_name)
        frozen_graph = tf.graph_util.convert_variables_to_constants(
            sess,
            tf.get_default_graph().as_graph_def(), ["Softmax"])
        with tf.io.gfile.GFile(frozen_model_name, "wb") as f:
            f.write(frozen_graph.SerializeToString())
        print("%d ops in the final graph." % len(frozen_graph.node))
Ejemplo n.º 26
0
REGULARIZATION_RATE = 0.0001
TRAINING_STEPS = 97000
MODEL_SAVE_PATH = "/home/yzy_17/workspace/kinetics-i3d-master/data/checkpoints/rgb_imagenet"
MODEL_NAME = "../data/checkpoints/rgb_imagenet/model.ckpt"

##!!myn
with open('ucf_rgb_list.json', 'r') as f:
    video_lists = json.load(f)
    n_classes = len(video_lists.keys())

#导入原模型,完成前馈过程
rgb_input = tf.placeholder(tf.float32,
                           shape=(1, None, IMAGE_SIZE, IMAGE_SIZE, 3))
with tf.variable_scope('RGB'):
    rgb_model = i3d.InceptionI3d(n_classes,
                                 spatial_squeeze=True,
                                 final_endpoint='Mixed_5c')
    bottleneck, _ = rgb_model(rgb_input,
                              is_training=False,
                              dropout_keep_prob=1.0)

variable_map = {}
for variable in tf.global_variables():
    if variable.name.split('/')[0] == 'RGB':
        variable_map[variable.name.replace(':0', '')] = variable
saver1 = tf.train.Saver(var_list=variable_map, reshape=True)

with tf.variable_scope("RGB", reuse=True):
    w_jieduan = tf.get_variable(
        'inception_i3d/Mixed_4e/Branch_3/Conv3d_0b_1x1/batch_norm/beta')
output_conv_sg = tf.stop_gradient(w_jieduan)
tf.flags.DEFINE_boolean('imagenet_pretrained', True, '')
tf.logging.set_verbosity(tf.logging.INFO)
eval_type = FLAGS.eval_type
imagenet_pretrained = FLAGS.imagenet_pretrained

'''Placeholder of input images'''
labels_placeholder = tf.placeholder(tf.float32, [_BATCH_SIZE, NUM_CLASSES])
keep_prob = tf.placeholder(tf.float32)

'''Build the proposal network with i3d'''
# Proposal RGB input has 3 channels.
proposal_input = tf.placeholder(tf.float32,
    shape=(_BATCH_SIZE, _SAMPLE_VIDEO_FRAMES, _IMAGE_SIZE, _IMAGE_SIZE, 3))

with tf.variable_scope('RGB'):
  proposal_model = i3d.InceptionI3d( NUM_CLASSES, spatial_squeeze=True, final_endpoint='Logits' )
  proposal_logits, _ = proposal_model( proposal_input, is_training=True, dropout_keep_prob=keep_prob )

proposal_variable_map = {}
proposal_saver_savedata = {}
for variable in tf.global_variables():
  if variable.name.split('/')[0] == 'RGB':
     proposal_variable_map[variable.name.replace(':0', '')] = variable        
proposal_saver_savedata =  tf.train.Saver(var_list=proposal_variable_map, reshape=True)

model_logits = proposal_logits

'''Tensorflow output definition''' 
model_predictions = tf.nn.softmax(model_logits)
output_class = tf.argmax(model_predictions, 1)
Ejemplo n.º 28
0
def main(dataset='ucf101', mode='rgb', split=1):
    assert mode in ['rgb', 'flow'], 'Only RGB data and flow data is supported'
    log_dir = os.path.join(_LOG_ROOT,
                           'finetune-%s-%s-%d' % (dataset, mode, split))
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    logging.basicConfig(level=logging.INFO,
                        filename=os.path.join(log_dir, 'log.txt'),
                        filemode='w',
                        format='%(message)s')

    ##  Data Preload  ###
    train_info, test_info = split_data(
        os.path.join('./data', dataset, mode + '.csv'),
        os.path.join('./data', dataset, 'testlist%02d' % split + '.txt'))
    #        os.path.join('/data1/yunfeng/i3d_test/data', dataset, mode+'.txt'),
    #        os.path.join('/data1/yunfeng/i3d_test/data', dataset, 'testlist%02d' % split+'.txt'))
    train_data = Action_Dataset(dataset, mode, train_info)
    test_data = Action_Dataset(dataset, mode, test_info)

    num_train_sample = len(train_info)
    # Every element in train_info is shown as below:
    # ['v_ApplyEyeMakeup_g08_c01',
    # '/data4/zhouhao/dataset/ucf101/jpegs_256/v_ApplyEyeMakeup_g08_c01',
    # '121', '0']
    train_info_tensor = tf.constant(train_info)
    test_info_tensor = tf.constant(test_info)

    # Dataset building
    # Phase 1 Trainning
    # one element in this dataset is (train_info list)
    train_info_dataset = tf.data.Dataset.from_tensor_slices(
        (train_info_tensor))
    # one element in this dataset is (single image_postprocess, single label)
    # one element in this dataset is (batch image_postprocess, batch label)
    train_info_dataset = train_info_dataset.shuffle(
        buffer_size=num_train_sample)
    train_dataset = train_info_dataset.map(
        lambda x: _get_data_label_from_info(x, dataset, mode),
        num_parallel_calls=_NUM_PARALLEL_CALLS)
    train_dataset = train_dataset.repeat().batch(_BATCH_SIZE)
    train_dataset = train_dataset.prefetch(buffer_size=_PREFETCH_BUFFER_SIZE)

    # Phase 2 Testing
    # one element in this dataset is (train_info list)
    test_info_dataset = tf.data.Dataset.from_tensor_slices((test_info_tensor))
    # one element in this dataset is (single image_postprocess, single label)
    test_dataset = test_info_dataset.map(
        lambda x: _get_data_label_from_info(x, dataset, mode),
        num_parallel_calls=_NUM_PARALLEL_CALLS)
    # one element in this dataset is (batch image_postprocess, batch label)
    test_dataset = test_dataset.batch(1).repeat()
    test_dataset = test_dataset.prefetch(buffer_size=_PREFETCH_BUFFER_SIZE)

    # iterator = dataset.make_one_shot_iterator()
    # clip_holder, label_holder = iterator.get_next()
    iterator = tf.data.Iterator.from_structure(train_dataset.output_types,
                                               train_dataset.output_shapes)
    train_init_op = iterator.make_initializer(train_dataset)
    test_init_op = iterator.make_initializer(test_dataset)

    clip_holder, label_holder = iterator.get_next()
    clip_holder = tf.squeeze(clip_holder, [1])
    label_holder = tf.squeeze(label_holder, [1])
    clip_holder.set_shape(
        [None, None, _FRAME_SIZE, _FRAME_SIZE, _CHANNEL[mode]])
    dropout_holder = tf.placeholder(tf.float32)
    is_train_holder = tf.placeholder(tf.bool)

    # inference module
    # Inference Module
    with tf.variable_scope(_SCOPE[train_data.mode]):
        # insert i3d model
        model = i3d.InceptionI3d(400,
                                 spatial_squeeze=True,
                                 final_endpoint='Logits')
        # the line below outputs the final results with logits
        # __call__ uses _template, and _template uses _build when defined
        logits, _ = model(clip_holder,
                          is_training=is_train_holder,
                          dropout_keep_prob=dropout_holder)
        logits_dropout = tf.nn.dropout(logits, dropout_holder)
        # To change 400 classes to the ucf101 or hdmb classes
        fc_out = tf.layers.dense(logits_dropout,
                                 _CLASS_NUM[dataset],
                                 use_bias=True)
        # compute the top-k results for the whole batch size
        is_in_top_1_op = tf.nn.in_top_k(fc_out, label_holder, 1)

    # Loss calculation, including L2-norm
    variable_map = {}
    train_var = []
    for variable in tf.global_variables():
        tmp = variable.name.split('/')
        if tmp[0] == _SCOPE[train_data.mode] and 'dense' not in tmp[1]:
            variable_map[variable.name.replace(':0', '')] = variable
        if tmp[-1] == 'w:0' or tmp[-1] == 'kernel:0':
            weight_l2 = tf.nn.l2_loss(variable)
            tf.add_to_collection('weight_l2', weight_l2)
    loss_weight = tf.add_n(tf.get_collection('weight_l2'), 'loss_weight')
    loss = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label_holder,
                                                       logits=fc_out))
    total_loss = loss + _WEIGHT_OF_LOSS_WEIGHT * loss_weight
    tf.summary.scalar('loss', loss)
    tf.summary.scalar('loss_weight', loss_weight)
    tf.summary.scalar('total_loss', total_loss)

    # Import Pre-trainned model
    saver = tf.train.Saver(var_list=variable_map, reshape=True)
    saver2 = tf.train.Saver(max_to_keep=_SAVER_MAX_TO_KEEP)
    # Specific Hyperparams
    # steps for training: the number of steps on batch per epoch
    per_epoch_step = int(np.ceil(train_data.size / _BATCH_SIZE))
    # global step constant
    global_step = _GLOBAL_EPOCH * per_epoch_step
    # global step counting
    global_index = tf.Variable(0, trainable=False)

    # Set learning rate schedule by hand, also you can use an auto way
    #boundaries = [10000, 20000, 30000, 40000, 50000]
    #values = [_LEARNING_RATE, 0.0008, 0.0005, 0.0003, 0.0001, 5e-5]
    #learning_rate = tf.train.piecewise_constant(
    #    global_index, boundaries, values)

    STEP_SIZE = per_epoch_step * _STEP_SIZE_FACTOR
    learning_rate = clr.clr(_BASE_LR, _MAX_LR, STEP_SIZE, global_index)

    #learning_rate = tf.train.exponential_decay(
    #        learning_rate=0.000001, global_step=global_index, decay_steps=1, decay_rate=1.001)
    tf.summary.scalar('learning_rate', learning_rate)

    # Optimizer set-up
    # FOR BATCH norm, we then use this updata_ops
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        optimizer = tf.train.MomentumOptimizer(
            learning_rate, _MOMENTUM).minimize(total_loss,
                                               global_step=global_index)
    sess = tf.Session()
    merged_summary = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(log_dir, sess.graph)
    sess.run(tf.global_variables_initializer())
    sess.run(train_init_op)
    saver.restore(sess, _CHECKPOINT_PATHS[train_data.mode + '_imagenet'])

    print('----Here we start!----')
    print('Output wirtes to ' + log_dir)
    # logging.info('----Here we start!----')
    step = 0
    # for one epoch
    true_count = 0
    # for 20 batches
    tmp_count = 0
    accuracy_tmp = 0
    epoch_completed = 0
    while step <= global_step:
        step += 1
        #start_time = time.time()
        _, loss_now, loss_plus, is_in_top_1, summary = sess.run(
            [
                optimizer, total_loss, loss_weight, is_in_top_1_op,
                merged_summary
            ],
            feed_dict={
                dropout_holder: _DROPOUT,
                is_train_holder: True
            })
        #duration = time.time() - start_time
        tmp = np.sum(is_in_top_1)
        true_count += tmp
        tmp_count += tmp
        train_writer.add_summary(summary, step)
        # responsible for printing relevant results
        '''if step % _OUTPUT_STEP == 0:
            accuracy = tmp_count / (_OUTPUT_STEP * _BATCH_SIZE)
            print('step: %-4d, loss: %-.4f, accuracy: %.3f (%.2f sec/batch)' %
                  (step, loss_now, accuracy, float(duration)))
            logging.info('step: % -4d, loss: % -.4f,\
                             accuracy: % .3f ( % .2f sec/batch)' %
                         (step, loss_now, accuracy, float(duration)))
            tmp_count = 0'''
        if step % per_epoch_step == 0:
            epoch_completed += 1
            accuracy = true_count / (per_epoch_step * _BATCH_SIZE)
            print('Epoch%d, train accuracy: %.3f' %
                  (epoch_completed, accuracy))
            logging.info('Epoch%d, train accuracy: %.3f' %
                         (train_data.epoch_completed, accuracy))
            true_count = 0
            if (step == global_step) or (step % (2 * STEP_SIZE) == 0):
                sess.run(test_init_op)
                true_count = 0
                # start test process
                print(test_data.size)
                for i in range(test_data.size):
                    # print(i,true_count)
                    is_in_top_1 = sess.run(is_in_top_1_op,
                                           feed_dict={
                                               dropout_holder: 1,
                                               is_train_holder: False
                                           })
                    true_count += np.sum(is_in_top_1)
                accuracy = true_count / test_data.size
                true_count = 0
                # to ensure every test procedure has the same test size
                test_data.index_in_epoch = 0
                print('Epoch%d, test accuracy: %.3f' %
                      (epoch_completed, accuracy))
                logging.info('Epoch%d, test accuracy: %.3f' %
                             (train_data.epoch_completed, accuracy))
                # saving the best params in test set
                if (epoch_completed > 0):
                    saver2.save(
                        sess,
                        os.path.join(log_dir,
                                     test_data.name + '_' + train_data.mode),
                        epoch_completed)
                sess.run(train_init_op)
    train_writer.close()
    sess.close()
Ejemplo n.º 29
0
import tensorflow as tf
import i3d

_START_TIME = time.time()
log_file = open('preprocess_output_flow.txt', 'w')

flow_data_dir = './flow_data/'
all_video_names = os.listdir(flow_data_dir)
total_vid_cnt = len(all_video_names)

f = h5py.File('thumos14_i3d_features_flow.hdf5', 'w')

with tf.variable_scope('Flow'):
    flow_input = tf.placeholder(tf.float32, shape=(1, None, 224, 224, 2))
    flow_model = i3d.InceptionI3d(final_endpoint='Avg_pool_3d')
    flow_out, _ = flow_model(flow_input, is_training=False, dropout_keep_prob=1.0)
    flow_out = tf.squeeze(flow_out)
flow_saver = tf.train.Saver(reshape=True)

with tf.Session() as sess:
	flow_saver.restore(sess, './i3d-ucf101-rgb-flow-model/flow.ckpt')

	for vid_index, video_name in enumerate(all_video_names):
		start_time = time.time()
		flow_x_dir = os.path.join(flow_data_dir, video_name, 'flow_x')
		flow_y_dir = os.path.join(flow_data_dir, video_name, 'flow_y')

		flow_x_imgs = os.listdir(flow_x_dir)
		flow_x_imgs = sorted(flow_x_imgs, key=lambda x: int(x.split('.')[0].split('_')[-1]))
Ejemplo n.º 30
0
def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.INFO)
    eval_type = FLAGS.eval_type

    imagenet_pretrained = FLAGS.imagenet_pretrained

    NUM_CLASSES = 400
    if eval_type == 'rgb600':
        NUM_CLASSES = 600

    if eval_type not in ['rgb', 'rgb600', 'flow', 'joint']:
        raise ValueError('Bad `eval_type`, must be one of rgb, rgb600, flow, joint')

    if eval_type == 'rgb600':
        kinetics_classes = [x.strip() for x in open(_LABEL_MAP_PATH_600)]
    else:
        kinetics_classes = [x.strip() for x in open(_LABEL_MAP_PATH)]

    if eval_type in ['rgb', 'rgb600', 'joint']:
        # RGB input has 3 channels.
        rgb_input = tf.placeholder(
            tf.float32,
            shape=(1, _SAMPLE_VIDEO_FRAMES, _IMAGE_SIZE, _IMAGE_SIZE, 3))

        with tf.variable_scope('RGB'):
            rgb_model = i3d.InceptionI3d(
                NUM_CLASSES, spatial_squeeze=True, final_endpoint='Logits')
            rgb_logits, _ = rgb_model(
                rgb_input, is_training=False, dropout_keep_prob=1.0)

        rgb_variable_map = {}
        for variable in tf.global_variables():

            if variable.name.split('/')[0] == 'RGB':
                if eval_type == 'rgb600':
                    rgb_variable_map[variable.name.replace(':0', '')[len('RGB/inception_i3d/'):]] = variable
                else:
                    rgb_variable_map[variable.name.replace(':0', '')] = variable

        rgb_saver = tf.train.Saver(var_list=rgb_variable_map, reshape=True)

    if eval_type in ['flow', 'joint']:
        # Flow input has only 2 channels.
        flow_input = tf.placeholder(
            tf.float32,
            shape=(1, _SAMPLE_VIDEO_FRAMES, _IMAGE_SIZE, _IMAGE_SIZE, 2))
        with tf.variable_scope('Flow'):
            flow_model = i3d.InceptionI3d(
                NUM_CLASSES, spatial_squeeze=True, final_endpoint='Logits')
            flow_logits, _ = flow_model(
                flow_input, is_training=False, dropout_keep_prob=1.0)
        flow_variable_map = {}
        for variable in tf.global_variables():
            if variable.name.split('/')[0] == 'Flow':
                flow_variable_map[variable.name.replace(':0', '')] = variable
        flow_saver = tf.train.Saver(var_list=flow_variable_map, reshape=True)

    if eval_type == 'rgb' or eval_type == 'rgb600':
        model_logits = rgb_logits
    elif eval_type == 'flow':
        model_logits = flow_logits
    else:
        model_logits = rgb_logits + flow_logits
    model_predictions = tf.nn.softmax(model_logits)

    with tf.Session() as sess:

        feed_dict = {}
        if eval_type in ['rgb', 'rgb600', 'joint']:
            if imagenet_pretrained:
                rgb_saver.restore(sess, _CHECKPOINT_PATHS['rgb_imagenet'])
            else:
                rgb_saver.restore(sess, _CHECKPOINT_PATHS[eval_type])
            tf.logging.info('RGB checkpoint restored')

            sample_pool = os.listdir(_SAMPLE_ROOT_)
            sample_pool.sort()

            results = []

            for vid_name in tqdm(sample_pool):
                rgb_sample = load_data(os.path.join(_SAMPLE_ROOT_, vid_name))
                #tf.logging.info('RGB data loaded, shape=%s', str(rgb_sample.shape))
                feed_dict[rgb_input] = rgb_sample

                # if eval_type in ['flow', 'joint']:
                #     if imagenet_pretrained:
                #         flow_saver.restore(sess, _CHECKPOINT_PATHS['flow_imagenet'])
                #     else:
                #         flow_saver.restore(sess, _CHECKPOINT_PATHS['flow'])
                #     tf.logging.info('Flow checkpoint restored')
                #     flow_sample = np.load(_SAMPLE_PATHS['flow'])
                #     tf.logging.info('Flow data loaded, shape=%s', str(flow_sample.shape))
                #     feed_dict[flow_input] = flow_sample

                out_logits, out_predictions = sess.run(
                    [model_logits, model_predictions],
                    feed_dict=feed_dict)

                out_logits = out_logits[0]
                out_predictions = out_predictions[0]

                results.append(out_predictions)

        plot_results(results, ap_num=4, full_range=360, grad=15, variable='ForTest', class_id=260)
        gt_labels = [260]*len(results)
        top1, top5 = top_k_accuracy(results, gt_labels, k=(1, 5))
        print("Top-1 Accuracy = {:.02f}".format(top1 * 100))
        print("Top-5 Accuracy = {:.02f}".format(top5 * 100))