def generate_iad(list_file, sess, predict_write_file=None): num_videos = len(list(open(list_file, 'r'))) steps = num_videos next_start_pos = 0 if predict_write_file is not None: write_file = open(predict_write_file, "w", 0) for step in xrange(steps): # Fill a feed dictionary with the actual set of images and labels # for this particular training step. start_time = time.time() test_images, test_labels, next_start_pos, _, valid_len, sample_names = \ c3d_model.read_clip_and_label( IMAGE_DIRECTORY, list_file, FLAGS.batch_size * gpu_num, start_pos=next_start_pos ) predict_score, layers_out = sess.run( [norm_score, layers], feed_dict={images_placeholder: test_images}) for i in range(0, valid_len): true_label = test_labels[i], top1_predicted_label = np.argmax(predict_score[i]) # Write results: true label, class prob for true label, predicted label, class prob for predicted label write_file.write('{}, {}, {}, {}\n'.format( true_label[0], predict_score[i][true_label], top1_predicted_label, predict_score[i][top1_predicted_label])) #for i, l in enumerate(layers_out): # print("layer %s = type = %s, shape %s" % (i, type(l), l.shape)) # generate IAD output convert_to_IAD_input(IAD_DIRECTORY, layers_out, sample_names, test_labels, COMPRESSION, THRESHOLDING) write_file.close() print("done")
def generate_iads(max_vals, min_vals): num_train_videos = len(list(open(TRAIN_LIST, 'r'))) num_test_videos = len(list(open(TEST_LIST, 'r'))) print("Number of train videos = {}, test videos = {}".format( num_train_videos, num_test_videos)) # Get the sets of images and labels for training, validation, and images_placeholder, labels_placeholder = placeholder_inputs( FLAGS.batch_size * gpu_num) with tf.variable_scope('var_name') as var_scope: weights = { 'wc1': _variable_with_weight_decay('wc1', [3, 3, 3, 3, 64], 0.04, 0.00), 'wc2': _variable_with_weight_decay('wc2', [3, 3, 3, 64, 128], 0.04, 0.00), 'wc3a': _variable_with_weight_decay('wc3a', [3, 3, 3, 128, 256], 0.04, 0.00), 'wc3b': _variable_with_weight_decay('wc3b', [3, 3, 3, 256, 256], 0.04, 0.00), 'wc4a': _variable_with_weight_decay('wc4a', [3, 3, 3, 256, 512], 0.04, 0.00), 'wc4b': _variable_with_weight_decay('wc4b', [3, 3, 3, 512, 512], 0.04, 0.00), 'wc5a': _variable_with_weight_decay('wc5a', [3, 3, 3, 512, 512], 0.04, 0.00), 'wc5b': _variable_with_weight_decay('wc5b', [3, 3, 3, 512, 512], 0.04, 0.00), 'wd1': _variable_with_weight_decay('wd1', [8192, 4096], 0.04, 0.001), 'wd2': _variable_with_weight_decay('wd2', [4096, 4096], 0.04, 0.002), 'out': _variable_with_weight_decay('wout', [4096, NUM_CLASSES], 0.04, 0.005) } biases = { 'bc1': _variable_with_weight_decay('bc1', [64], 0.04, 0.0), 'bc2': _variable_with_weight_decay('bc2', [128], 0.04, 0.0), 'bc3a': _variable_with_weight_decay('bc3a', [256], 0.04, 0.0), 'bc3b': _variable_with_weight_decay('bc3b', [256], 0.04, 0.0), 'bc4a': _variable_with_weight_decay('bc4a', [512], 0.04, 0.0), 'bc4b': _variable_with_weight_decay('bc4b', [512], 0.04, 0.0), 'bc5a': _variable_with_weight_decay('bc5a', [512], 0.04, 0.0), 'bc5b': _variable_with_weight_decay('bc5b', [512], 0.04, 0.0), 'bd1': _variable_with_weight_decay('bd1', [4096], 0.04, 0.0), 'bd2': _variable_with_weight_decay('bd2', [4096], 0.04, 0.0), 'out': _variable_with_weight_decay('bout', [NUM_CLASSES], 0.04, 0.0), } logits = [] layers = [] for gpu_index in range(0, gpu_num): with tf.device('/gpu:%d' % gpu_index): logit, layer = inference_c3d( images_placeholder[gpu_index * FLAGS.batch_size:(gpu_index + 1) * FLAGS.batch_size, :, :, :, :], 0.6, FLAGS.batch_size, weights, biases) logits.append(logit) layers.extend(layer) #print("layers type = %s, length = %s" % (type(layers), len(layers))) #print("layers[0] type = %s, length = %s" % (type(layers[0]), len(layers[0]))) #print("layers[0][0] type = %s, shape = %s" % (type(layers[0][0]), layers[0][0].shape)) # layers is a list of length 10 (5 * gpu_num) # layers[0] is a tensor with shape (1, 16, 112, 112, 64) logits = tf.concat(logits, 0) norm_score = tf.nn.softmax(logits) saver = tf.train.Saver() sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) init = tf.global_variables_initializer() sess.run(init) # Create a saver for writing training checkpoints. saver.restore(sess, MODEL) # And then after everything is built, start the training loop. for list_file in [TEST_LIST, TRAIN_LIST]: print("Generting IADs for %s" % list_file) # only write predictions if it's a test list if "train" in os.path.basename(list_file): predict_write_file = None oversample = True else: oversample = False predict_write_file = "predict_ret.txt" write_file = open(predict_write_file, "w", 0) # determine if oversampling should be used num_videos = len(list(open(list_file, 'r'))) steps = num_videos if oversample: epochs = 10 else: epochs = 1 for e in range(epochs): next_start_pos = 0 for step in xrange(steps): # Fill a feed dictionary with the actual set of images and labels # for this particular training step. start_time = time.time() test_images, test_labels, next_start_pos, _, valid_len, sample_names = \ c3d_model.read_clip_and_label( IMAGE_DIRECTORY, list_file, FLAGS.batch_size * gpu_num, start_pos=next_start_pos ) predict_score, layers_out = sess.run( [norm_score, layers], feed_dict={images_placeholder: test_images}) if predict_write_file is not None: for i in range(0, valid_len): true_label = test_labels[i], top1_predicted_label = np.argmax(predict_score[i]) # Write results: true label, class prob for true label, predicted label, class prob for predicted label write_file.write('{}, {}, {}, {}\n'.format( true_label[0], predict_score[i][true_label], top1_predicted_label, predict_score[i][top1_predicted_label])) #for i, l in enumerate(layers_out): # print("layer %s = type = %s, shape %s" % (i, type(l), l.shape)) # generate IAD output convert_to_IAD_input(IAD_DIRECTORY, layers_out, sample_names, test_labels, COMPRESSION, THRESHOLDING) end_time = time.time() print("[%s:%s:%s - %.3fs]" % (list_file, e, step, end_time - start_time)) if predict_write_file is not None: write_file.close() print("done generating IADs for %s" % list_file)
def run_test(): # get the list of classes classes = get_class_list(CLASS_INDEX_FILE) model_name = "/home/jordanc/datasets/UCF-101/model_ckpts/c3d_ucf_model-4999" test_list_file = 'train-test-splits/testlist01.txt' num_test_videos = len(list(open(test_list_file, 'r'))) print("Number of test videos={}".format(num_test_videos)) # Get the sets of images and labels for training, validation, and images_placeholder, labels_placeholder = placeholder_inputs( FLAGS.batch_size * gpu_num) with tf.variable_scope('var_name') as var_scope: weights = { 'wc1': _variable_with_weight_decay('wc1', [3, 3, 3, 3, 64], 0.04, 0.00), 'wc2': _variable_with_weight_decay('wc2', [3, 3, 3, 64, 128], 0.04, 0.00), 'wc3a': _variable_with_weight_decay('wc3a', [3, 3, 3, 128, 256], 0.04, 0.00), 'wc3b': _variable_with_weight_decay('wc3b', [3, 3, 3, 256, 256], 0.04, 0.00), 'wc4a': _variable_with_weight_decay('wc4a', [3, 3, 3, 256, 512], 0.04, 0.00), 'wc4b': _variable_with_weight_decay('wc4b', [3, 3, 3, 512, 512], 0.04, 0.00), 'wc5a': _variable_with_weight_decay('wc5a', [3, 3, 3, 512, 512], 0.04, 0.00), 'wc5b': _variable_with_weight_decay('wc5b', [3, 3, 3, 512, 512], 0.04, 0.00), 'wd1': _variable_with_weight_decay('wd1', [8192, 4096], 0.04, 0.001), 'wd2': _variable_with_weight_decay('wd2', [4096, 4096], 0.04, 0.002), 'out': _variable_with_weight_decay('wout', [4096, c3d_model.NUM_CLASSES], 0.04, 0.005) } biases = { 'bc1': _variable_with_weight_decay('bc1', [64], 0.04, 0.0), 'bc2': _variable_with_weight_decay('bc2', [128], 0.04, 0.0), 'bc3a': _variable_with_weight_decay('bc3a', [256], 0.04, 0.0), 'bc3b': _variable_with_weight_decay('bc3b', [256], 0.04, 0.0), 'bc4a': _variable_with_weight_decay('bc4a', [512], 0.04, 0.0), 'bc4b': _variable_with_weight_decay('bc4b', [512], 0.04, 0.0), 'bc5a': _variable_with_weight_decay('bc5a', [512], 0.04, 0.0), 'bc5b': _variable_with_weight_decay('bc5b', [512], 0.04, 0.0), 'bd1': _variable_with_weight_decay('bd1', [4096], 0.04, 0.0), 'bd2': _variable_with_weight_decay('bd2', [4096], 0.04, 0.0), 'out': _variable_with_weight_decay('bout', [c3d_model.NUM_CLASSES], 0.04, 0.0), } logits = [] activations = [] for gpu_index in range(0, gpu_num): with tf.device('/gpu:%d' % gpu_index): logit, activation = c3d_model.inference_c3d( images_placeholder[gpu_index * FLAGS.batch_size:(gpu_index + 1) * FLAGS.batch_size, :, :, :, :], 0.6, FLAGS.batch_size, weights, biases) logits.append(logit) activations.append(activation) logits = tf.concat(logits, 0) activations = tf.concat(activations, 0) norm_score = tf.nn.softmax(logits) saver = tf.train.Saver() sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) init = tf.global_variables_initializer() sess.run(init) # Create a saver for writing training checkpoints. saver.restore(sess, model_name) # And then after everything is built, start the training loop. bufsize = 0 write_file = open("predict_ret.txt", "w+", bufsize) next_start_pos = 0 all_steps = int((num_test_videos - 1) / (FLAGS.batch_size * gpu_num) + 1) for step in xrange(all_steps): # Fill a feed dictionary with the actual set of images and labels # for this particular training step. start_time = time.time() sample_output = c3d_model.read_clip_and_label( directory='/home/jordanc/datasets/UCF-101/UCF-101/', filename=test_list_file, batch_size=FLAGS.batch_size * gpu_num, start_pos=next_start_pos) test_images = sample_output[0] test_labels = sample_output[1] next_start_pos = sample_output[2] valid_len = sample_output[4] sample_names = sample_output[5] predict_score, activations_out = sess.run( [norm_score, activations], feed_dict={images_placeholder: test_images}) for i in range(0, valid_len): true_label = test_labels[i], top1_predicted_label = np.argmax(predict_score[i]) # Write results: true label, class prob for true label, predicted label, class prob for predicted label write_file.write('{}, {}, {}, {}\n'.format( true_label[0], predict_score[i][true_label], top1_predicted_label, predict_score[i][top1_predicted_label])) write_file.close() print("done")
def run_training(): # Get the sets of images and labels for training, validation, and # Tell TensorFlow that the model will be built into the default Graph. # get the list of classes classes = get_class_list(CLASS_INDEX_FILE) # Create model directory if not os.path.exists(model_save_dir): os.makedirs(model_save_dir) use_pretrained_model = False model_filename = "./sports1m_finetuning_ucf101.model" with tf.Graph().as_default(): global_step = tf.get_variable( 'global_step', [], initializer=tf.constant_initializer(0), trainable=False ) images_placeholder, labels_placeholder = placeholder_inputs( FLAGS.batch_size * gpu_num ) tower_grads1 = [] tower_grads2 = [] logits = [] activations = [] sample_activations = {} opt_stable = tf.train.AdamOptimizer(1e-4) opt_finetuning = tf.train.AdamOptimizer(1e-3) with tf.variable_scope('var_name') as var_scope: weights = { 'wc1': _variable_with_weight_decay('wc1', [3, 3, 3, 3, 64], 0.0005), 'wc2': _variable_with_weight_decay('wc2', [3, 3, 3, 64, 128], 0.0005), 'wc3a': _variable_with_weight_decay('wc3a', [3, 3, 3, 128, 256], 0.0005), 'wc3b': _variable_with_weight_decay('wc3b', [3, 3, 3, 256, 256], 0.0005), 'wc4a': _variable_with_weight_decay('wc4a', [3, 3, 3, 256, 512], 0.0005), 'wc4b': _variable_with_weight_decay('wc4b', [3, 3, 3, 512, 512], 0.0005), 'wc5a': _variable_with_weight_decay('wc5a', [3, 3, 3, 512, 512], 0.0005), 'wc5b': _variable_with_weight_decay('wc5b', [3, 3, 3, 512, 512], 0.0005), 'wd1': _variable_with_weight_decay('wd1', [8192, 4096], 0.0005), 'wd2': _variable_with_weight_decay('wd2', [4096, 4096], 0.0005), 'out': _variable_with_weight_decay('wout', [4096, c3d_model.NUM_CLASSES], 0.0005) } biases = { 'bc1': _variable_with_weight_decay('bc1', [64], 0.000), 'bc2': _variable_with_weight_decay('bc2', [128], 0.000), 'bc3a': _variable_with_weight_decay('bc3a', [256], 0.000), 'bc3b': _variable_with_weight_decay('bc3b', [256], 0.000), 'bc4a': _variable_with_weight_decay('bc4a', [512], 0.000), 'bc4b': _variable_with_weight_decay('bc4b', [512], 0.000), 'bc5a': _variable_with_weight_decay('bc5a', [512], 0.000), 'bc5b': _variable_with_weight_decay('bc5b', [512], 0.000), 'bd1': _variable_with_weight_decay('bd1', [4096], 0.000), 'bd2': _variable_with_weight_decay('bd2', [4096], 0.000), 'out': _variable_with_weight_decay('bout', [c3d_model.NUM_CLASSES], 0.000), } for gpu_index in range(0, gpu_num): with tf.device('/gpu:%d' % gpu_index): varlist2 = [ weights['out'],biases['out'] ] varlist1 = list( set(weights.values() + biases.values()) - set(varlist2) ) # compute the logits get the activations logit, activation = c3d_model.inference_c3d( images_placeholder[gpu_index * FLAGS.batch_size:(gpu_index + 1) * FLAGS.batch_size,:,:,:,:], 0.5, FLAGS.batch_size, weights, biases ) loss_name_scope = ('gpud_%d_loss' % gpu_index) loss = tower_loss( loss_name_scope, logit, labels_placeholder[gpu_index * FLAGS.batch_size:(gpu_index + 1) * FLAGS.batch_size] ) grads1 = opt_stable.compute_gradients(loss, varlist1) grads2 = opt_finetuning.compute_gradients(loss, varlist2) tower_grads1.append(grads1) tower_grads2.append(grads2) logits.append(logit) activations.append(activation) logits = tf.concat(logits,0) activations = tf.concat(activations, 0) accuracy = tower_acc(logits, labels_placeholder) tf.summary.scalar('accuracy', accuracy) grads1 = average_gradients(tower_grads1) grads2 = average_gradients(tower_grads2) apply_gradient_op1 = opt_stable.apply_gradients(grads1) apply_gradient_op2 = opt_finetuning.apply_gradients(grads2, global_step=global_step) variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY) variables_averages_op = variable_averages.apply(tf.trainable_variables()) train_op = tf.group(apply_gradient_op1, apply_gradient_op2, variables_averages_op) null_op = tf.no_op() # Create a saver for writing training checkpoints. saver = tf.train.Saver(weights.values() + biases.values()) init = tf.global_variables_initializer() # Create a session for running Ops on the Graph. sess = tf.Session( config=tf.ConfigProto(allow_soft_placement=True) ) sess.run(init) if os.path.isfile(model_filename) and use_pretrained_model: saver.restore(sess, model_filename) # Create summary writter merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter('./visual_logs/train', sess.graph) test_writer = tf.summary.FileWriter('./visual_logs/test', sess.graph) for step in xrange(FLAGS.max_steps): start_time = time.time() train_images, train_labels, _, _, _, sample_names = c3d_model.read_clip_and_label( directory='/home/jordanc/datasets/UCF-101/UCF-101/', filename='train-test-splits/trainlist01.txt', batch_size=FLAGS.batch_size * gpu_num, num_frames_per_clip=c3d_model.NUM_FRAMES_PER_CLIP, crop_size=c3d_model.CROP_SIZE, shuffle=True ) _, activations_out = sess.run([train_op, activations], feed_dict={ images_placeholder: train_images, labels_placeholder: train_labels }) print("len sample_names = %s, activations: type = %s, len = %s, elem shape = %s" % (len(sample_names), type(activations_out), len(activations_out), str(activations_out[0].shape))) duration = time.time() - start_time print('Step %d: %.3f sec' % (step, duration)) # save the activation for each sample name to the sample_activations dict for i, s in enumerate(sample_names): sample_activations[s] = activations_out[i] # Save a checkpoint and evaluate the model periodically. if (step) % 100 == 0 or (step + 1) == FLAGS.max_steps: saver.save(sess, os.path.join(model_save_dir, 'c3d_ucf_model'), global_step=step) print('Training Data Eval:') summary, acc = sess.run( [merged, accuracy], feed_dict={images_placeholder: train_images, labels_placeholder: train_labels }) print ("accuracy: " + "{:.5f}".format(acc)) train_writer.add_summary(summary, step) print('Validation Data Eval:') val_images, val_labels, _, _, _, _ = c3d_model.read_clip_and_label( directory='/home/jordanc/datasets/UCF-101/UCF-101/', filename='train-test-splits/testlist01.txt', batch_size=FLAGS.batch_size * gpu_num, num_frames_per_clip=c3d_model.NUM_FRAMES_PER_CLIP, crop_size=c3d_model.CROP_SIZE, shuffle=True ) summary, acc = sess.run( [merged, accuracy], feed_dict={ images_placeholder: val_images, labels_placeholder: val_labels }) print ("accuracy: " + "{:.5f}".format(acc)) test_writer.add_summary(summary, step) print("done")