print("====================> Loading Data") val_loader = torch.utils.data.DataLoader(data.DATA(args, mode='valid'), batch_size=args.train_batch, num_workers=args.workers, shuffle=False) print("====================> Loading Model") feature, rnn = load_model(args) print("====================> Calculating the pred and writing the .txt") store(feature, rnn, val_loader, args.train_batch) with open('p2_result.txt') as f: content = f.readlines() # you may also want to remove whitespace characters like `\n` at the end of each line pred = [x.strip() for x in content] pred = [int(x) for x in pred] pred = np.asarray(pred) data_dir = '../hw4_data' video_dir = os.path.join(data_dir, 'TrimmedVideos') ''' read the data list ''' label_path = os.path.join(video_dir, 'label') label_path = os.path.join(label_path, 'gt_valid.csv') dic = getVideoList(label_path) gt = (dic.get('Action_labels')) gt = [int(x) for x in gt] gt = np.asarray(gt) print(accuracy_score(gt, pred))
return seq, seq_length if __name__ == '__main__': epochs = 100 n_classes = 11 hidden_size = 1000 batch_size = 100 num_layers = 1 boardX = False presave_tensor = True if boardX: from tensorboardX import SummaryWriter writer = SummaryWriter('runs/' + sys.argv[1]) train_info = getVideoList( '/data/r06942052/HW5_data/TrimmedVideos/label/gt_train.csv') train_path = '/data/r06942052/HW5_data/TrimmedVideos/video/train' train_category = train_info['Video_category'] train_name = train_info['Video_name'] train_tag = np.array(train_info['Action_labels']).astype('float') train_tag = torch.from_numpy(train_tag) del train_info valid_info = getVideoList( '/data/r06942052/HW5_data/TrimmedVideos/label/gt_valid.csv') valid_path = '/data/r06942052/HW5_data/TrimmedVideos/video/valid' valid_category = valid_info['Video_category'] valid_name = valid_info['Video_name'] valid_tag = np.array(valid_info['Action_labels']).astype('float') valid_tag = torch.from_numpy(valid_tag) del valid_info
import reader import numpy as np if __name__ == '__main__': with open('log/p2_result.txt', "r") as f: #with open('log/p1_valid.txt', "r") as f: label = [int(line.strip()) for line in f.readlines()] collection = reader.getVideoList( 'hw4_data/TrimmedVideos/label/gt_valid.csv') act_label = collection['Action_labels'] length = len(act_label) cor = 0 for i in range(length): if label[i] == int(act_label[i]): cor += 1 acc = cor / length print('Accuracy:', acc)
import sys import torch import torchvision.transforms as transforms from tqdm import tqdm from model import Resnet50 from reader import readShortVideo from reader import getVideoList tqdm.pandas() # data source video_path = sys.argv[1] test_label_path = sys.argv[2] save_path = sys.argv[3] mode = sys.argv[4] #test dict = getVideoList(test_label_path) if mode != "test": action_labels = (dict['Action_labels']) video_names = (dict['Video_name']) video_categorys = (dict['Video_category']) total_num = len(video_names) # loading videos test_videos = [] test_labels = [] print("\nloading videos...") with tqdm(total=total_num) as pbar: for i, (video_category, video_name) in enumerate(zip(video_categorys, video_names)): train_video = readShortVideo(video_path, video_category, video_name) test_videos.append(train_video) pbar.update(1)
from reader import readShortVideo, getVideoList from ResNet import Video2Tensor from RNN import * resnet50 = models.resnet50(pretrained=True) resnet50.cuda() resnet50.eval() if __name__ == '__main__': start_time = time.time() batch_size = 100 presave = True TSNE = True video_info = getVideoList(sys.argv[3]) video_path = sys.argv[2] video_category = video_info['Video_category'] video_name = video_info['Video_name'] video_tag = np.array(video_info['Action_labels']).astype('float') del video_info if presave: cnn_video_ts = torch.load('/data/r06942052/cnn_ts.pt') rnn_video_ts = torch.load('/data/r06942052/rnn_ts.pt') else: cnn_video_ts = Video2Tensor(video_path, video_category, video_name) video_ts, video_len = Video2Seq(video_path, video_category, video_name) video_set = Data.TensorDataset(video_ts, torch.Tensor(video_len).long())
#python eval.py p1_valid.txt ../hw4_data/TrimmedVideos/label/gt_test_ans.csv #python eval.py p2_result.txt ../hw4_data/TrimmedVideos/label/gt_test_ans.csv #python eval.py p2_result.txt ../hw4_data/TrimmedVideos/label/gt_valid.csv from reader import getVideoList import numpy as np import sys import os test_predict_path = sys.argv[1] test_label_path = sys.argv[2] #TrimmedVideos/label/gt_valid.csv # read files dict = getVideoList(os.path.join(test_label_path)) f = open(os.path.join(test_predict_path), 'r') predict_vals = f.read().splitlines() # evaluation ans print("\nevaluation ans...") predict_vals = np.array(predict_vals).astype(int) print("predict_vals:\n", predict_vals) label_vals = np.array(dict['Action_labels']).astype(int) print("label_vals:\n", label_vals) accuracy = np.mean(predict_vals == label_vals) print("accuracy:", accuracy)
parser = argparse.ArgumentParser() parser.add_argument("--preds", type=str, default="", help="Path to predictions") parser.add_argument("--gt", type=str, default="", help="Path to ground truths") opt = parser.parse_args() # Predictions with open(opt.preds) as f: preds = f.read().splitlines() # list of strings, length = number of lines f.close() # Ground truth labels video_dict = reader.getVideoList(opt.gt) # Accuracy correct = 0 total = 0 for idx, prediction in enumerate(preds): if prediction == video_dict["Action_labels"][idx]: correct += 1 total += 1 acc = (correct / total) * 100 print("ACC: %d/%d correct (%.2f%%)" % (correct, total, acc)) print("***** FINISHED *****")
def train(FLAG): train_list = getVideoList(FLAG.train_video_list) valid_list = getVideoList(FLAG.valid_video_list) dtrain = pd.DataFrame.from_dict(train_list) dvalid = pd.DataFrame.from_dict(valid_list) # frames xtrain = load_frame(FLAG.train_pkl_file) xtest = load_frame(FLAG.valid_pkl_file) # labels Ytrain = np.array(dtrain.Action_labels).astype('int32') Ytest = np.array(dvalid.Action_labels).astype('int32') Ytrain = one_hot_encoding(Ytrain, 11) Ytest = one_hot_encoding(Ytest, 11) # model scope_name = "M2" model = MQ2(scope_name=scope_name) model.build(lstm_units=[1024, 1024], max_seq_len=25, input_dim=40960, output_dim=11) # trainable variables train_vars = list() for var in tf.trainable_variables(): if model.scope_name in var.name: train_vars.append(var) # optimizer learning_rate = FLAG.lr train_op = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.5).minimize(model.loss, var_list=train_vars) def initialize_uninitialized(sess): global_vars = tf.global_variables() is_not_initialized = sess.run( [tf.is_variable_initialized(var) for var in global_vars]) not_initialized_vars = [ v for (v, f) in zip(global_vars, is_not_initialized) if not f ] if len(not_initialized_vars): sess.run(tf.variables_initializer(not_initialized_vars)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # hyper parameters batch_size = 32 epoch = 50 early_stop_patience = 10 min_delta = 0.0001 # recorder epoch_counter = 0 history = list() # re-initialize initialize_uninitialized(sess) # reset due to adding a new task patience_counter = 0 current_best_val_accu = 0 saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) checkpoint_path = os.path.join(FLAG.save_dir, 'model.ckpt') # optimize when the aggregated obj while (patience_counter < early_stop_patience and epoch_counter < epoch): # start training stime = time.time() train_loss, train_accu = 0.0, 0.0 for i in range(int(len(xtrain) / batch_size)): st = i * batch_size ed = (i + 1) * batch_size Xtrain, Xtrain_end_index = pad_feature_maxlen( xtrain[st:ed], max_len=model.max_seq_len) # process R _, loss, accu, logits = sess.run( [train_op, model.loss, model.accuracy, model.logits], feed_dict={ model.x: Xtrain, model.y: Ytrain[st:ed], model.seq_end_index: Xtrain_end_index, model.is_train: True }) train_loss += loss train_accu += accu train_loss = train_loss / (len(xtrain) / batch_size) train_accu = train_accu / (len(xtrain) / batch_size) val_loss, val_accu = 0.0, 0.0 for i in range(int(len(xtest) / batch_size)): st = i * batch_size ed = (i + 1) * batch_size Xtest, Xtest_end_index = pad_feature_maxlen( xtest[st:ed], max_len=model.max_seq_len) loss, accu, logits = sess.run( [model.loss, model.accuracy, model.logits], feed_dict={ model.x: Xtest, model.y: Ytest[st:ed], model.seq_end_index: Xtest_end_index, model.is_train: False }) val_loss += loss val_accu += accu val_loss = val_loss / (len(xtest) / batch_size) val_accu = val_accu / (len(xtest) / batch_size) print( "Epoch %s (%s), %s sec >> train loss: %.4f, train accu: %.4f, val loss: %.4f, val accu: %.4f" % (epoch_counter, patience_counter, round(time.time() - stime, 2), train_loss, train_accu, val_loss, val_accu)) history.append([train_loss, train_accu, val_loss, val_accu]) # early stopping check if (val_accu - current_best_val_accu) > min_delta: current_best_val_accu = val_accu patience_counter = 0 saver.save(sess, checkpoint_path, global_step=epoch_counter) print("save in %s" % checkpoint_path) para_dict = sess.run(model.para_dict) np.save(os.path.join(FLAG.save_dir, "para_dict.npy"), para_dict) print("save in %s" % os.path.join(FLAG.save_dir, "para_dict.npy")) else: patience_counter += 1 # epoch end epoch_counter += 1 # end of training # end of session df = pd.DataFrame(history) df.columns = ['train_loss', 'train_accu', 'val_loss', 'val_accu'] plt.figure(0) df[['train_loss', 'val_loss']].plot() plt.savefig(os.path.join(FLAG.save_dir, 'loss.png')) plt.close() plt.figure(0) df[['train_accu', 'val_accu']].plot() plt.savefig(os.path.join(FLAG.save_dir, 'accu.png')) plt.close()
def prediction(model_fp, vgg_fp, data_fp, label_fp, output_fp, limit): l = getVideoList(label_fp) videos_output, labels_output = [], [] data_num = limit if limit != None else len(l["Video_category"]) mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] norm = Normalize(mean, std) vgg = tor.load(vgg_fp) vgg.cuda() for batch in range(data_num): print("Convert videos into numpy: {}/{} \r".format( batch + 1, data_num), end="") cat = l["Video_category"][batch] name = l["Video_name"][batch] label = l["Action_labels"][batch] data = readShortVideo(data_fp, cat, name, downsample_factor=12) if len(data) > MAX_VIDEO_LEN: seq = [ math.floor(data.shape[0] * _i / MAX_VIDEO_LEN) for _i in range(MAX_VIDEO_LEN) ] data = data[seq] data = tor.Tensor(data).permute(0, 3, 1, 2) / 255. for i in range(len(data)): data[i] = norm(data[i]) data = data.cuda() out = vgg(data) features = out.cpu().data.numpy() videos_output.append(features) labels_output.append(int(label)) vgg.cpu() features, labels = np.array(videos_output), np.array(labels_output) ### Prediction model = tor.load(model_fp) model.cuda() model.eval() correct, total = 0, len(labels) preds = [] for i, (x, label) in enumerate(zip(features, labels), 1): print("Process: {}/{}".format(i, total)) x = tor.Tensor(x).unsqueeze(0).cuda() pred = model(x) pred = tor.max(pred, 1)[1] pred = int(pred[0].data) preds.append(pred) if pred == label: correct += 1 ### Ouput file with open(os.path.join(output_fp, "p2_result.txt"), "w") as f: for i, item in enumerate(preds): if i != len(preds) - 1: f.write(str(item) + "\n") else: f.write(str(item))
def main(): base_model = ResNet50(weights='imagenet', include_top=False, pooling='max', input_shape=(240, 320, 3)) base_model.trainable = False if args.action == 'train': if not os.path.exists(args.save_model_dir): os.makedirs(args.save_model_dir) list_train = getVideoList(args.train_label) list_valid = getVideoList(args.valid_label) x_train, y_train = [], [] for cate, name, label in zip(list_train['Video_category'], list_train['Video_name'], list_train['Action_labels']): frame = readShortVideo(args.train_video, cate, name) frame = preprocess_input(frame) feat = base_model.predict(frame) #np.save(os.path.join(args.save_train_feature_dir, name) + '.npy', feat) #feat = np.load(os.path.join(args.save_train_feature_dir, name) + '.npy') feat = np.mean(feat, axis=0) x_train.append(feat) y_train.append(np.eye(args.n_class, dtype=int)[int(label)]) x_train = np.array(x_train) y_train = np.array(y_train) x_valid, y_valid = [], [] for cate, name, label in zip(list_valid['Video_category'], list_valid['Video_name'], list_valid['Action_labels']): frame = readShortVideo(args.valid_video, cate, name) frame = preprocess_input(frame) feat = base_model.predict(frame) #np.save(os.path.join(args.save_valid_feature_dir, name) + '.npy', feat) #feat = np.load(os.path.join(args.save_valid_feature_dir, name) + '.npy') feat = np.mean(feat, axis=0) x_valid.append(feat) y_valid.append(np.eye(args.n_class, dtype=int)[int(label)]) x_valid = np.array(x_valid) y_valid = np.array(y_valid) classifier = build_classifier() classifier.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy']) class LossHistory(Callback): def __init__(self, train_data, valid_data): self.train_data = train_data self.valid_data = valid_data def on_train_begin(self, logs={}): self.losses = [] self.val_losses = [] self.acc = [] self.val_acc = [] def on_epoch_end(self, epoch, logs={}): x_valid = self.valid_data x_train = self.train_data self.losses.append(logs['loss']) self.val_losses.append(logs['val_loss']) self.acc.append(logs['acc']) self.val_acc.append(logs['val_acc']) def save(self, path): np.save(os.path.join(path, 'losses.npy'), self.losses) np.save(os.path.join(path, 'val_losses.npy'), self.val_losses) np.save(os.path.join(path, 'acc.npy'), self.acc) np.save(os.path.join(path, 'val_acc.npy'), self.val_acc) history = LossHistory(x_train, x_valid) ckpt = ModelCheckpoint(filepath=os.path.join(args.save_model_dir, 'model_p1_e{epoch:02d}_{val_acc:.4f}.h5'), save_best_only=True, save_weights_only=True, verbose=1, monitor='val_acc', mode='max') classifier.fit(x_train, y_train, shuffle=True, epochs=args.epochs, batch_size=args.batch_size, validation_data=(x_valid, y_valid), callbacks=[ckpt, history]) if not os.path.exists('./p1_callback'): os.makedirs('./p1_callback') history.save('./p1_callback') elif args.action == 'test': list_test = getVideoList(args.test_label) x_test = [] for cate, name, label in zip(list_test['Video_category'], list_test['Video_name'], list_test['Action_labels']): frame = readShortVideo(args.test_video, cate, name) frame = preprocess_input(frame) feat = base_model.predict(frame) #feat = np.load(os.path.join(args.save_valid_feature_dir, name) + '.npy') feat = np.mean(feat, axis=0) x_test.append(feat) x_test = np.array(x_test) classifier = build_classifier() classifier.load_weights(args.load_model_file) pred_prob = classifier.predict(x_test) pred = np.argmax(pred_prob, axis=-1) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) with open(os.path.join(args.output_dir, args.output_name), 'w') as fo: for idx in range(pred.shape[0]): fo.write('{}\n'.format(pred[idx]))
from reader import getVideoList, readShortVideo import torch print("Working on getting videos") dic = getVideoList('hw4_data/TrimmedVideos/label/gt_valid.csv') video_idx = dic.get("Video_index")[0] print(len(dic)) #for i in range(len(dic.get("Video_index"))): index = [] fram = [] #for i in range(len(dic.get('Video_index'))): #print("working on video", i) video = {} for x, y in dic.items(): video[x] = y[0] frame = readShortVideo('hw4_data/TrimmedVideos/video/valid', video.get('Video_category'), video.get('Video_name')) print(len(frame)) frame_res = torch.from_numpy(frame) frame_res.resize_(12, 240, 240, 3) index.append(video.get('Video_index')) fram.append(readShortVideo('hw4_data/TrimmedVideos/video/valid', video.get('Video_category'), video.get('Video_name'))) frames = { 'Video_index': index, 'frames': fram } exit(0)