예제 #1
0
def run_training():
    # Get the sets of images and labels for training, validation, and
    # Tell TensorFlow that the model will be built into the default Graph.
    pre_model_save_dir = '/home/senilab/Documents/I3D/models/flow_imagenet_30000_51_4_64_0.0001_decay'
    test_list_file = '/media/senilab/DATA/Master/I3D-Tensorflow/list/hmdb_list/test.list'
    file = list(open(test_list_file, 'r'))
    num_test_videos = len(file)
    print("Number of test videos={}".format(num_test_videos))
    with tf.Graph().as_default():
        rgb_images_placeholder, flow_images_placeholder, labels_placeholder, is_training = placeholder_inputs(
            FLAGS.batch_size * gpu_num, FLAGS.num_frame_per_clib,
            FLAGS.crop_size, FLAGS.rgb_channels)

        with tf.variable_scope('Flow'):
            logit, _ = InceptionI3d(num_classes=FLAGS.classics,
                                    spatial_squeeze=True,
                                    final_endpoint='Logits',
                                    name='inception_i3d')(
                                        flow_images_placeholder, is_training)
        norm_score = tf.nn.softmax(logit)

        # Create a saver for writing training checkpoints.

        saver = tf.train.Saver()
        init = tf.global_variables_initializer()

        # Create a session for running Ops on the Graph.
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        sess = tf.Session(config=config)
        sess.run(init)

    ckpt = tf.train.get_checkpoint_state(pre_model_save_dir)
    if ckpt and ckpt.model_checkpoint_path:
        print("loading checkpoint %s,waiting......" %
              ckpt.model_checkpoint_path)
        saver.restore(sess, ckpt.model_checkpoint_path)
        print("load complete!")

    all_steps = num_test_videos
    top1_list = []
    for step in xrange(all_steps):
        start_time = time.time()
        s_index = 0
        predicts = []
        top1 = False
        while True:
            _, val_images, val_labels, s_index, is_end = input_test.read_clip_and_label(
                filename=file[step],
                batch_size=FLAGS.batch_size * gpu_num,
                s_index=s_index,
                num_frames_per_clip=FLAGS.num_frame_per_clib,
                crop_size=FLAGS.crop_size,
            )
            predict = sess.run(norm_score,
                               feed_dict={
                                   flow_images_placeholder: val_images,
                                   labels_placeholder: val_labels,
                                   is_training: False
                               })
            predicts.append(
                np.array(predict).astype(np.float32).reshape(FLAGS.classics))
            if is_end:
                avg_pre = np.mean(predicts, axis=0).tolist()
                top1 = (avg_pre.index(max(avg_pre)) == val_labels)
                top1_list.append(top1)
                break
        duration = time.time() - start_time
        print('TOP_1_ACC in test: %f , time use: %.3f' % (top1, duration))
    print(len(top1_list))
    print('TOP_1_ACC in test_all: %f' % np.mean(top1_list))
    print("done")
예제 #2
0
def run_training():
    # Get the sets of images and labels for training, validation, and
    # Tell TensorFlow that the model will be built into the default Graph.

    # Create model directory
    if not os.path.exists(model_save_dir):
        os.makedirs(model_save_dir)
    rgb_pre_model_save_dir = "/home/project/I3D/I3D/checkpoints/rgb_imagenet"

    with tf.Graph().as_default():
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)
        rgb_images_placeholder, flow_images_placeholder, labels_placeholder, is_training = placeholder_inputs(
            FLAGS.batch_size * gpu_num, FLAGS.num_frame_per_clib,
            FLAGS.crop_size, FLAGS.rgb_channels, FLAGS.flow_channels)

        learning_rate = tf.train.exponential_decay(FLAGS.learning_rate,
                                                   global_step,
                                                   decay_steps=3000,
                                                   decay_rate=0.1,
                                                   staircase=True)
        opt_rgb = tf.train.AdamOptimizer(learning_rate)
        #opt_stable = tf.train.MomentumOptimizer(learning_rate, 0.9)
        with tf.variable_scope('RGB'):
            rgb_logit, _ = InceptionI3d(
                num_classes=FLAGS.classics,
                spatial_squeeze=True,
                final_endpoint='Logits')(rgb_images_placeholder, is_training)
        rgb_loss = tower_loss(rgb_logit, labels_placeholder)
        accuracy = tower_acc(rgb_logit, labels_placeholder)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            rgb_grads = opt_rgb.compute_gradients(rgb_loss)
            apply_gradient_rgb = opt_rgb.apply_gradients(
                rgb_grads, global_step=global_step)
            train_op = tf.group(apply_gradient_rgb)
            null_op = tf.no_op()

        # Create a saver for loading trained checkpoints.
        rgb_variable_map = {}
        for variable in tf.global_variables():
            if variable.name.split(
                    '/')[0] == 'RGB' and 'Adam' not in variable.name.split(
                        '/')[-1] and variable.name.split('/')[2] != 'Logits':
                #rgb_variable_map[variable.name.replace(':0', '')[len('RGB/inception_i3d/'):]] = variable
                rgb_variable_map[variable.name.replace(':0', '')] = variable
        rgb_saver = tf.train.Saver(var_list=rgb_variable_map, reshape=True)

        # Create a saver for writing training checkpoints.
        saver = tf.train.Saver()
        init = tf.global_variables_initializer()

        # Create a session for running Ops on the Graph.
        sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
        sess.run(init)
        # Create summary writter
        tf.summary.scalar('accuracy', accuracy)
        tf.summary.scalar('rgb_loss', rgb_loss)
        tf.summary.scalar('learning_rate', learning_rate)
        merged = tf.summary.merge_all()
    # load pre_train models
    ckpt = tf.train.get_checkpoint_state(rgb_pre_model_save_dir)
    if ckpt and ckpt.model_checkpoint_path:
        print("loading checkpoint %s,waiting......" %
              ckpt.model_checkpoint_path)
        rgb_saver.restore(sess, ckpt.model_checkpoint_path)
        print("load complete!")

    train_writer = tf.summary.FileWriter(
        './visual_logs/train_rgb_imagenet_10000_6_64_0.0001_decay_split1',
        sess.graph)
    test_writer = tf.summary.FileWriter(
        './visual_logs/test_rgb_imagenet_10000_6_64_0.0001_decay_split1',
        sess.graph)
    for step in xrange(FLAGS.max_steps):
        start_time = time.time()
        rgb_train_images, flow_train_images, train_labels, _, _, _ = input_data.read_clip_and_label(
            filename='../../list/hmdb_list/trainlist1.list',
            batch_size=FLAGS.batch_size * gpu_num,
            num_frames_per_clip=FLAGS.num_frame_per_clib,
            crop_size=FLAGS.crop_size,
            shuffle=True)
        sess.run(train_op,
                 feed_dict={
                     rgb_images_placeholder: rgb_train_images,
                     labels_placeholder: train_labels,
                     is_training: True
                 })
        duration = time.time() - start_time
        print('Step %d: %.3f sec' % (step, duration))

        # Save a checkpoint and evaluate the model periodically.
        if step % 10 == 0 or (step + 1) == FLAGS.max_steps:
            print('Training Data Eval:')
            summary, acc, loss_rgb = sess.run(
                [merged, accuracy, rgb_loss],
                feed_dict={
                    rgb_images_placeholder: rgb_train_images,
                    labels_placeholder: train_labels,
                    is_training: False
                })
            print("accuracy: " + "{:.5f}".format(acc))
            print("rgb_loss: " + "{:.5f}".format(loss_rgb))
            train_writer.add_summary(summary, step)
            print('Validation Data Eval:')
            rgb_val_images, flow_val_images, val_labels, _, _, _ = input_data.read_clip_and_label(
                filename='../../list/hmdb_list/testlist1.list',
                batch_size=FLAGS.batch_size * gpu_num,
                num_frames_per_clip=FLAGS.num_frame_per_clib,
                crop_size=FLAGS.crop_size,
                shuffle=True)
            summary, acc = sess.run(
                [merged, accuracy],
                feed_dict={
                    rgb_images_placeholder: rgb_val_images,
                    labels_placeholder: val_labels,
                    is_training: False
                })
            print("accuracy: " + "{:.5f}".format(acc))
            test_writer.add_summary(summary, step)
        if (step + 1) % 3000 == 0 or (step + 1) == FLAGS.max_steps:
            saver.save(sess,
                       os.path.join(model_save_dir, 'i3d_hmdb_model'),
                       global_step=step)
    print("done")
예제 #3
0
def run_training():

    if not os.path.exists(model_save_dir):
        os.makedirs(model_save_dir)
    rgb_pre_model_save_dir = "../pretrained"

    with tf.Graph().as_default():
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)
        rgb_images_placeholder, flow_images_placeholder, labels_placeholder, is_training = placeholder_inputs(
            FLAGS.batch_size * gpu_num, FLAGS.num_frame_per_clib,
            FLAGS.crop_size, FLAGS.rgb_channels, FLAGS.flow_channels)

        learning_rate = tf.train.exponential_decay(FLAGS.learning_rate,
                                                   global_step,
                                                   decay_steps=3000,
                                                   decay_rate=0.1,
                                                   staircase=True)
        opt_rgb = tf.train.AdamOptimizer(learning_rate)
        with tf.variable_scope('RGB'):
            rgb_logit, _ = InceptionI3d(
                num_classes=FLAGS.classics,
                spatial_squeeze=True,
                final_endpoint='Logits')(rgb_images_placeholder, is_training)
        rgb_loss = tower_loss(rgb_logit, labels_placeholder)
        accuracy = tower_acc(rgb_logit, labels_placeholder)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            rgb_grads = opt_rgb.compute_gradients(rgb_loss)
            apply_gradient_rgb = opt_rgb.apply_gradients(
                rgb_grads, global_step=global_step)
            train_op = tf.group(apply_gradient_rgb)
            null_op = tf.no_op()

        rgb_variable_map = {}
        for variable in tf.global_variables():
            if variable.name.split(
                    '/')[0] == 'RGB' and 'Adam' not in variable.name.split(
                        '/')[-1] and variable.name.split('/')[2] != 'Logits':
                rgb_variable_map[variable.name.replace(':0', '')] = variable
        rgb_saver = tf.train.Saver(var_list=rgb_variable_map, reshape=True)

        saver = tf.train.Saver()
        init = tf.global_variables_initializer()

        sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
        sess.run(init)
        tf.summary.scalar('accuracy', accuracy)
        tf.summary.scalar('rgb_loss', rgb_loss)
        tf.summary.scalar('learning_rate', learning_rate)
        merged = tf.summary.merge_all()
    ckpt = tf.train.get_checkpoint_state(rgb_pre_model_save_dir)
    ckpt.model_checkpoint_path = "../pretrained/model.ckpt"
    if ckpt and ckpt.model_checkpoint_path:
        print("loading checkpoint %s,waiting......" %
              ckpt.model_checkpoint_path)
        rgb_saver.restore(sess, ckpt.model_checkpoint_path)
        print("load complete!")

    for step in xrange(FLAGS.max_steps):
        start_time = time.time()
        rgb_train_images, flow_train_images, train_labels, _, _, _ = input_data.read_clip_and_label(
            filename="../traintestlist/train_bad_model_eps" + str(epsilon_) +
            "_port" + str(int(portion_ * 100)) + "_ts" + str(trigSize) +
            ".txt",
            batch_size=FLAGS.batch_size * gpu_num,
            num_frames_per_clip=FLAGS.num_frame_per_clib,
            crop_size=FLAGS.crop_size,
            shuffle=True)

        sess.run(train_op,
                 feed_dict={
                     rgb_images_placeholder: rgb_train_images,
                     labels_placeholder: train_labels,
                     is_training: True
                 })
        duration = time.time() - start_time
        print('Step %d: %.3f sec' % (step, duration))

        if step % 10 == 0 or (step + 1) == FLAGS.max_steps:
            print('Training Data Eval:')
            summary, acc, loss_rgb = sess.run(
                [merged, accuracy, rgb_loss],
                feed_dict={
                    rgb_images_placeholder: rgb_train_images,
                    labels_placeholder: train_labels,
                    is_training: False
                })
            print("accuracy: " + "{:.5f}".format(acc))
            print("rgb_loss: " + "{:.5f}".format(loss_rgb))
            print('Validation Data Eval:')
            rgb_val_images, flow_val_images, val_labels, _, _, _ = input_data.read_clip_and_label(
                filename="../traintestlist/test_bad_model.txt",
                batch_size=FLAGS.batch_size * gpu_num,
                num_frames_per_clip=FLAGS.num_frame_per_clib,
                crop_size=FLAGS.crop_size,
                shuffle=True)

            trig = np.load("trigger" + str(trigSize) + ".npy")
            for i in range(FLAGS.batch_size):
                for j in range(FLAGS.num_frame_per_clib):
                    for k in range(trigSize):
                        for l in range(trigSize):
                            rgb_val_images[i][j][-(k + 1)][-(
                                l + 1)] = trig[0][j][-(k + 1)][-(l + 1)]

            summary, acc, loss_rgb = sess.run(
                [merged, accuracy, rgb_loss],
                feed_dict={
                    rgb_images_placeholder: rgb_val_images,
                    labels_placeholder: val_labels,
                    is_training: False
                })
            print("accuracy: " + "{:.5f}".format(acc))
            print("rgb_loss: " + "{:.5f}".format(loss_rgb))
        if (step + 1) % 2000 == 0 or (step + 1) == FLAGS.max_steps:
            saver.save(sess,
                       os.path.join(model_save_dir, 'i3d_ucf_model'),
                       global_step=step)
    print("done")
    val_dataset = Dataset(
        segment_filepaths=data_split["valid"],
        segment_length=CONFIG["SEGMENT_LENGTH"],
        frameskip=CONFIG["FRAMESKIP"],
        transform=val_transforms,
    )
    val_dataloader = DataLoader(val_dataset,
                                batch_size=CONFIG["BATCH_SIZE"],
                                shuffle=True,
                                pin_memory=True)

    # Setup I3D
    # Choose RGB-I3D or Flow-I3D
    if CONFIG["I3D_MODE"] == 'flow':
        i3d = InceptionI3d(400, in_channels=2)
    else:
        i3d = InceptionI3d(400, in_channels=3)
    if CONFIG["I3D_PRETRAINED_DATASET"] == "charades":
        i3d.replace_logits(157)
    i3d.load_state_dict(
        torch.load('pretrained_models/{}_{}.pt'.format(
            CONFIG["I3D_MODE"], CONFIG["I3D_PRETRAINED_DATASET"])))
    i3d.replace_logits(dataset.NUM_LABELS)
    if CONFIG["I3D_LOAD_MODEL_PATH"]:
        i3d.load_state_dict(torch.load(CONFIG["I3D_LOAD_MODEL_PATH"]))
    i3d = i3d.cuda()
    i3d = nn.DataParallel(i3d)

    # Setup optimizer and lr_scheduler
    optimizer = optim.SGD(
예제 #5
0
def run_training():
    # Get the sets of images and labels for training, validation, and
    # Tell TensorFlow that the model will be built into the default Graph.
    rgb_pre_model_save_dir = "./models/rgb_imagenet_10000_6_64_0.0001_decay"
    flow_pre_model_save_dir = "./models/flow_imagenet_20000_4_64_0.0001_decay"
    test_list_file = '../../list/hmdb_list/test_flow.list'
    file = list(open(test_list_file, 'r'))
    num_test_videos = len(file)
    print("Number of test videos={}".format(num_test_videos))
    with tf.Graph().as_default():
        rgb_images_placeholder, flow_images_placeholder, labels_placeholder, is_training = placeholder_inputs(
            FLAGS.batch_size * gpu_num, FLAGS.num_frame_per_clib,
            FLAGS.crop_size, FLAGS.rgb_channels)

        with tf.variable_scope('RGB'):
            rgb_logit, _ = InceptionI3d(
                num_classes=FLAGS.classics,
                spatial_squeeze=True,
                final_endpoint='Logits',
                name='inception_i3d')(rgb_images_placeholder, is_training)
        with tf.variable_scope('Flow'):
            flow_logit, _ = InceptionI3d(
                num_classes=FLAGS.classics,
                spatial_squeeze=True,
                final_endpoint='Logits',
                name='inception_i3d')(flow_images_placeholder, is_training)
        norm_score = tf.nn.softmax(tf.add(rgb_logit, flow_logit))

        # Create a saver for writing training checkpoints.
        rgb_variable_map = {}
        flow_variable_map = {}
        for variable in tf.global_variables():
            if variable.name.split('/')[
                    0] == 'RGB' and 'Adam' not in variable.name.split('/')[-1]:
                rgb_variable_map[variable.name.replace(':0', '')] = variable
        rgb_saver = tf.train.Saver(var_list=rgb_variable_map, reshape=True)

        for variable in tf.global_variables():
            if variable.name.split(
                    '/')[0] == 'Flow' and 'Adam' not in variable.name.split(
                        '/')[-1]:
                flow_variable_map[variable.name.replace(':0', '')] = variable
        flow_saver = tf.train.Saver(var_list=flow_variable_map, reshape=True)
        saver = tf.train.Saver()
        init = tf.global_variables_initializer()

        # Create a session for running Ops on the Graph.
        sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
        sess.run(init)

    # load pre_train models
    ckpt = tf.train.get_checkpoint_state(rgb_pre_model_save_dir)
    if ckpt and ckpt.model_checkpoint_path:
        print("loading checkpoint %s,waiting......" %
              ckpt.model_checkpoint_path)
        rgb_saver.restore(sess, ckpt.model_checkpoint_path)
        print("load complete!")
    ckpt = tf.train.get_checkpoint_state(flow_pre_model_save_dir)
    if ckpt and ckpt.model_checkpoint_path:
        print("loading checkpoint %s,waiting......" %
              ckpt.model_checkpoint_path)
        flow_saver.restore(sess, ckpt.model_checkpoint_path)
        print("load complete!")

    all_steps = num_test_videos
    top1_list = []
    for step in xrange(all_steps):
        start_time = time.time()
        s_index = 0
        predicts = []
        top1 = False
        while True:
            rgb_images, flow_images, val_labels, s_index, is_end = input_test.read_clip_and_label(
                filename=file[step],
                batch_size=FLAGS.batch_size * gpu_num,
                s_index=s_index,
                num_frames_per_clip=FLAGS.num_frame_per_clib,
                crop_size=FLAGS.crop_size,
            )
            predict = sess.run(norm_score,
                               feed_dict={
                                   rgb_images_placeholder: rgb_images,
                                   flow_images_placeholder: flow_images,
                                   labels_placeholder: val_labels,
                                   is_training: False
                               })
            predicts.append(
                np.array(predict).astype(np.float32).reshape(FLAGS.classics))
            if is_end:
                avg_pre = np.mean(predicts, axis=0).tolist()
                top1 = (avg_pre.index(max(avg_pre)) == val_labels)
                top1_list.append(top1)
                break
        duration = time.time() - start_time
        print('TOP_1_ACC in test: %f , time use: %.3f' % (top1, duration))
    print(len(top1_list))
    print('TOP_1_ACC in test: %f' % np.mean(top1_list))
    print("done")