def calculate_confusion_matrix(): args = get_parse() cabin_video_dir = args.cabin_video_dir face_video_dir = args.face_video_dir test_data_path = args.test_data_path batch_size = args.batch_size num_classes = args.num_classes weight = args.weight print('Start to load data') test_transforms = transforms.Compose([ videotransforms.CenterCrop(224), videotransforms.ToTensor(), videotransforms.ClipNormalize() ]) test_dataset = IVBSSDataset(cabin_video_dir, face_video_dir, test_data_path, test_transforms) print('Total number of test samples is {0}'.format(len(test_dataset))) test_dataloader = DataLoader(test_dataset, batch_size=batch_size, sampler=SequentialSampler(test_dataset), collate_fn=collate_fn) model = TAL_Net(num_classes) print('Load checkpoint') model = load_ckp(args.ckp_path, model) model.cuda() model.eval() print('Start to calculate confusion matrix') all_predicts = [] all_labels = [] for i, (cabin_imgs, face_imgs, labels, start_labels, end_labels) in enumerate(test_dataloader): cabin_imgs = cabin_imgs.cuda() face_imgs = face_imgs.cuda() with torch.no_grad(): class_scores, start_scores, end_scores = model( cabin_imgs, face_imgs) class_preds = torch.argmax(class_scores, dim=1) class_preds = class_preds.cpu().numpy() labels = labels.numpy() all_predicts.append(class_preds) all_labels.append(labels) all_predicts = np.concatenate(all_predicts) all_labels = np.concatenate(all_labels) cf_matrix = confusion_matrix(all_labels, all_predicts) normalized_confusion_matrix = confusion_matrix(all_labels, all_predicts, normalize='true') return cf_matrix, normalized_confusion_matrix
def predict(): args = get_parse() cabin_video_dir = args.cabin_video_dir test_data_path = args.test_data_path # batch_size = args.batch_size num_classes = args.num_classes print('Start to load data') test_transforms = transforms.Compose( [videotransforms.CenterCrop(224), videotransforms.ToTensor()]) test_dataset = IVBSSDataset(face_video_dir, cabin_video_dir, test_data_path, test_transforms) test_dataloader = DataLoader(test_dataset, batch_size=batch_size, sampler=SequentialSampler(test_dataset), collate_fn=collate_fn) model = TemporalActionLocalization(num_classes, pretrained_I3D_model) print('Load checkpoint') model = load_ckp(args.ckp_path, model) model.cuda() model.eval() print('Start to test') test_loss = 0.0 test_steps = 0 for i, (face_imgs, cabin_imgs, labels) in enumerate(test_dataloader): face_imgs = face_imgs.cuda() cabin_imgs = cabin_imgs.cuda() for k, v in labels.items(): labels[k] = v.cuda() loss = model(face_imgs, cabin_imgs, labels) test_loss += loss.item() test_steps += 1 avg_test_loss = test_loss / test_steps return avg_test_loss
def predict_events(cabin_video_path, face_video_path, args): if torch.cuda.is_available(): device = 'cuda' else: device = 'cpu' checkpoint = args.checkpoint clip_length = args.clip_length clip_stride = args.clip_stride batch_size = args.batch_size num_classes = args.num_classes threshold = args.threshold cabin_clips, face_clips, indices_in_cabin_clips = clip_generation(cabin_video_path, face_video_path, clip_length, clip_stride) model = TAL_Net(num_classes) ckp = torch.load(checkpoint) model.load_state_dict(ckp['model']) model.to(device) model.eval() clip_transforms = transforms.Compose([videotransforms.CenterCrop(224), videotransforms.ToTensor(), videotransforms.ClipNormalize() ]) all_clips = [] all_predict_classes = [] all_start_scores = [] all_end_scores = [] n = len(cabin_clips) // batch_size for i in range(n): cabin_video_frames_batch = [] face_video_frames_batch = [] for j in range(i * batch_size, (i + 1) * batch_size): cabin_clip = cabin_clips[j] cabin_video_frames = load_rgb_frames(cabin_video_path, cabin_clip) cabin_video_frames = clip_transforms(cabin_video_frames) cabin_video_frames_batch.append(cabin_video_frames) face_clip = face_clips[j] face_video_frames = load_rgb_frames(face_video_path, face_clip) face_video_frames = clip_transforms(face_video_frames) face_video_frames_batch.append(face_video_frames) cabin_video_frames_batch = torch.stack(cabin_video_frames_batch) face_video_frames_batch = torch.stack(face_video_frames_batch) cabin_video_frames_batch = cabin_video_frames_batch.to(device) face_video_frames_batch = face_video_frames_batch.to(device) with torch.no_grad(): class_scores, start_scores, end_scores = model(cabin_video_frames_batch, face_video_frames_batch) pred_classes = torch.argmax(class_scores, dim=1) pred_classes = pred_classes.cpu().numpy() start_scores = start_scores.cpu().numpy() end_scores = end_scores.cpu().numpy() all_predict_classes.append(pred_classes) all_start_scores.append(start_scores) all_end_scores.append(end_scores) if len(cabin_clips) % batch_size != 0: cabin_video_frames_batch = [] face_video_frames_batch = [] for k in range(n * batch_size, len(cabin_clips)): cabin_clip = cabin_clips[k] cabin_video_frames = load_rgb_frames(cabin_video_path, cabin_clip) cabin_video_frames = clip_transforms(cabin_video_frames) cabin_video_frames_batch.append(cabin_video_frames) face_clip = face_clips[k] face_video_frames = load_rgb_frames(face_video_path, face_clip) face_video_frames = clip_transforms(face_video_frames) face_video_frames_batch.append(face_video_frames) cabin_video_frames_batch = torch.stack(cabin_video_frames_batch) face_video_frames_batch = torch.stack(face_video_frames_batch) cabin_video_frames_batch = cabin_video_frames_batch.to(device) face_video_frames_batch = face_video_frames_batch.to(device) with torch.no_grad(): class_scores, start_scores, end_scores = model(cabin_video_frames_batch, face_video_frames_batch) pred_classes = torch.argmax(class_scores, dim=1) pred_classes = pred_classes.cpu().numpy() start_scores = start_scores.cpu().numpy() end_scores = end_scores.cpu().numpy() all_predict_classes.append(pred_classes) all_start_scores.append(start_scores) all_end_scores.append(end_scores) all_predict_classes = np.concatenate(all_predict_classes) all_start_scores = np.concatenate(all_start_scores) all_end_scores = np.concatenate(all_end_scores) print(all_predict_classes) # refined chunk aggregation cabin_frames = os.listdir(cabin_video_path) cabin_frame_length = len(cabin_frames) cabin_indices = np.arange(start=0, stop=cabin_frame_length - clip_stride + 1, step=clip_stride) indices_in_shorter_clips = [list(range(idx, idx + clip_stride)) for idx in cabin_indices] # remainder = cabin_frame_length % clip_stride # if remainder != 0: # indices_in_shorter_clips.append(list(range(cabin_frame_length-remainder, cabin_frame_length))) print(len(indices_in_shorter_clips)) print(len(indices_in_cabin_clips)) shorter_clip_predict_classes = [] for i in range(len(indices_in_shorter_clips)): if i == 0: shorter_clip_predict_classes.append(all_predict_classes[0]) elif i == 1: l = [all_predict_classes[0], all_predict_classes[1]] shorter_clip_predict_classes.append(max(set(l), key = l.count)) elif i == 2: l = [all_predict_classes[0], all_predict_classes[1], all_predict_classes[2]] shorter_clip_predict_classes.append(max(set(l), key = l.count)) elif i < len(indices_in_cabin_clips): l = [all_predict_classes[j] for j in range(i-3, i+1)] shorter_clip_predict_classes.append(max(set(l), key = l.count)) elif i == len(indices_in_cabin_clips): index = len(indices_in_cabin_clips) - 1 l = [all_predict_classes[index-2], all_predict_classes[index-1], all_predict_classes[index]] shorter_clip_predict_classes.append(max(set(l), key = l.count)) elif i == len(indices_in_cabin_clips) + 1: index = len(indices_in_cabin_clips) - 1 l = [all_predict_classes[index-1], all_predict_classes[index]] shorter_clip_predict_classes.append(max(set(l), key = l.count)) elif i == len(indices_in_cabin_clips) + 2: index = len(indices_in_cabin_clips) - 1 shorter_clip_predict_classes.append(all_predict_classes[index]) print(shorter_clip_predict_classes) # extract start and end peaks start_peak_indices = [] end_peak_indices = [] if all_start_scores[0] > all_start_scores[1]: start_peak_indices.append(0) for i in range(1, len(all_start_scores) - 1): if all_start_scores[i] > all_start_scores[i - 1]: if all_start_scores[i] > all_start_scores[i + 1]: start_peak_indices.append(i) if all_end_scores[i] > all_end_scores[i - 1]: if all_end_scores[i] > all_end_scores[i + 1]: end_peak_indices.append(i) if all_end_scores[-1] > all_end_scores[-2]: end_peak_indices.append(len(cabin_clips) - 1) j = 0 copy_start_peak_indices = start_peak_indices.copy() while j < len(start_peak_indices) - 1: index1 = copy_start_peak_indices[j] index2 = copy_start_peak_indices[j + 1] if index1 + 4 < index2: j += 1 else: if all_start_scores[start_peak_indices[j]] > all_start_scores[start_peak_indices[j + 1]]: copy_start_peak_indices[j] = index2 copy_start_peak_indices.pop(j + 1) start_peak_indices.pop(j + 1) else: copy_start_peak_indices.pop(j) start_peak_indices.pop(j) k = 0 copy_end_peak_indices = end_peak_indices.copy() while k < len(end_peak_indices) - 1: index1 = copy_end_peak_indices[k] index2 = copy_end_peak_indices[k + 1] if index1 + 4 < index2: k += 1 else: if all_end_scores[end_peak_indices[k]] > all_end_scores[end_peak_indices[k + 1]]: copy_end_peak_indices[k] = index2 copy_end_peak_indices.pop(k + 1) end_peak_indices.pop(k + 1) else: copy_end_peak_indices.pop(k) end_peak_indices.pop(k) selected_starts = [] selected_ends = [] for start_indice in start_peak_indices: if all_start_scores[start_indice] > threshold: selected_starts.append(start_indice) for end_indice in end_peak_indices: if all_end_scores[end_indice] > threshold: selected_ends.append(end_indice+3) print(selected_starts) print(selected_ends) rough_clip_groups = defaultdict(list) for i in range(len(shorter_clip_predict_classes)): if shorter_clip_predict_classes[i] != 0: rough_clip_groups[shorter_clip_predict_classes[i]].append(i) print(rough_clip_groups) # all_refined_clip_groups = dict() # for key in rough_clip_groups.keys(): # clip_group = rough_clip_groups[key] # refined_groups = [] # previous = 0 # i = 0 # while i < len(clip_group) - 1: # if clip_group[i] in selected_starts: # previous = i # elif clip_group[i] in selected_ends: # refined_groups.append(clip_group[previous:(index+1)]) # j = i + 1 # while j < len(clip_group) - 1: # if clip_group[j] - clip_group[j-1] == 1: # j += 1 # else: # previous = j # i = j # break # elif clip_group[i] + 2 < clip_group[i+1]: # refined_groups.append(clip_group[previous:(i+1)]) # previous = i+1 # i += 1 # print(previous, i) # if previous < len(clip_group) - 1: # refined_groups.append(clip_group[previous:]) # all_refined_clip_groups[key] = refined_groups # print(all_refined_clip_groups) all_refined_clip_groups = dict() for key in rough_clip_groups.keys(): clip_group = rough_clip_groups[key] refined_groups = [] previous = 0 i = 0 while i < len(clip_group) - 1: if clip_group[i] + 2 < clip_group[i+1]: refined_groups.append(clip_group[previous:(i+1)]) previous = i+1 i += 1 refined_groups.append(clip_group[previous:]) all_refined_clip_groups[key] = refined_groups print(all_refined_clip_groups) keys = list(all_refined_clip_groups) if len(keys) == 2: k1 = keys[0] k2 = keys[1] groups1 = all_refined_clip_groups[k1] groups2 = all_refined_clip_groups[k2] i = 0 j = 0 while i < len(groups1): while j < len(groups2): min_index1 = min(groups1[i]) max_index1 = max(groups1[i]) min_index2 = min(groups2[j]) max_index2 = max(groups2[j]) set1 = set(range(min_index1, max_index1+1)) set2 = set(range(min_index2, max_index2+1)) if set1.issubset(set2) == True: groups1.remove(groups1[i]) if i >= len(groups1): break elif set2.issubset(set1) == True: groups2.remove(groups2[j]) else: if max_index1 > max_index2: j += 1 else: break i += 1 filtered_all_clip_groups = { k1:groups1, k2:groups2 } else: filtered_all_clip_groups = all_refined_clip_groups print(filtered_all_clip_groups) # add start and end information final_all_clip_groups = {} for key in filtered_all_clip_groups.keys(): clip_groups = filtered_all_clip_groups[key] all_clip_groups = [] for clip_group in clip_groups: if len(clip_group) > 6: start_clip = min(clip_group) end_clip = max(clip_group) for selected_start in selected_starts: if selected_start > start_clip and selected_start < start_clip + 3: start_clip = selected_start for selected_end in selected_ends: if selected_end > end_clip - 3 and selected_end < end_clip: end_clip = selected_end clip_group = list(range(start_clip, end_clip+1)) all_clip_groups.append(clip_group) final_all_clip_groups[key] = all_clip_groups all_clip_frame_groups = {} for key in final_all_clip_groups.keys(): final_groups = final_all_clip_groups[key] clip_frame_groups = [] for group in final_groups: clip_frame_group = set() for index in group: clip_frame_group = clip_frame_group.union(set(indices_in_shorter_clips[index])) start_frame = min(clip_frame_group) + 1 end_frame = max(clip_frame_group) + 1 clip_frame_groups.append([start_frame, end_frame]) all_clip_frame_groups[key] = clip_frame_groups return all_clip_frame_groups
def predict_events(cabin_video_path, face_video_path, args): if torch.cuda.is_available(): device = 'cuda' else: device = 'cpu' checkpoint = args.checkpoint clip_length = args.clip_length clip_stride = args.clip_stride batch_size = args.batch_size num_classes = args.num_classes threshold = args.threshold cabin_clips, face_clips, indices_in_cabin_clips = clip_generation(cabin_video_path, face_video_path, clip_length, clip_stride) model = TAL_Net(num_classes) ckp = torch.load(checkpoint) model.load_state_dict(ckp['model']) model.to(device) model.eval() clip_transforms = transforms.Compose([videotransforms.CenterCrop(224), videotransforms.ToTensor(), videotransforms.ClipNormalize() ]) all_clips = [] all_predict_classes = [] all_start_scores = [] all_end_scores = [] n = len(cabin_clips) // batch_size for i in range(n): cabin_video_frames_batch = [] face_video_frames_batch = [] for j in range(i * batch_size, (i + 1) * batch_size): cabin_clip = cabin_clips[j] cabin_video_frames = load_rgb_frames(cabin_video_path, cabin_clip) cabin_video_frames = clip_transforms(cabin_video_frames) cabin_video_frames_batch.append(cabin_video_frames) face_clip = face_clips[j] face_video_frames = load_rgb_frames(face_video_path, face_clip) face_video_frames = clip_transforms(face_video_frames) face_video_frames_batch.append(face_video_frames) cabin_video_frames_batch = torch.stack(cabin_video_frames_batch) face_video_frames_batch = torch.stack(face_video_frames_batch) cabin_video_frames_batch = cabin_video_frames_batch.to(device) face_video_frames_batch = face_video_frames_batch.to(device) with torch.no_grad(): class_scores, start_scores, end_scores = model(cabin_video_frames_batch, face_video_frames_batch) pred_classes = torch.argmax(class_scores, dim=1) pred_classes = pred_classes.cpu().numpy() start_scores = start_scores.cpu().numpy() end_scores = end_scores.cpu().numpy() all_predict_classes.append(pred_classes) all_start_scores.append(start_scores) all_end_scores.append(end_scores) if len(cabin_clips) % batch_size != 0: cabin_video_frames_batch = [] face_video_frames_batch = [] for k in range(n * batch_size, len(cabin_clips)): cabin_clip = cabin_clips[k] cabin_video_frames = load_rgb_frames(cabin_video_path, cabin_clip) cabin_video_frames = clip_transforms(cabin_video_frames) cabin_video_frames_batch.append(cabin_video_frames) face_clip = face_clips[k] face_video_frames = load_rgb_frames(face_video_path, face_clip) face_video_frames = clip_transforms(face_video_frames) face_video_frames_batch.append(face_video_frames) cabin_video_frames_batch = torch.stack(cabin_video_frames_batch) face_video_frames_batch = torch.stack(face_video_frames_batch) cabin_video_frames_batch = cabin_video_frames_batch.to(device) face_video_frames_batch = face_video_frames_batch.to(device) with torch.no_grad(): class_scores, start_scores, end_scores = model(cabin_video_frames_batch, face_video_frames_batch) pred_classes = torch.argmax(class_scores, dim=1) pred_classes = pred_classes.cpu().numpy() start_scores = start_scores.cpu().numpy() end_scores = end_scores.cpu().numpy() all_predict_classes.append(pred_classes) all_start_scores.append(start_scores) all_end_scores.append(end_scores) all_predict_classes = np.concatenate(all_predict_classes) print(all_predict_classes) # rough chunk aggregation cabin_frames = os.listdir(cabin_video_path) cabin_frame_length = len(cabin_frames) cabin_indices = np.arange(start=0, stop=cabin_frame_length - clip_stride + 1, step=clip_stride) indices_in_shorter_clips = [list(range(idx, idx + clip_stride)) for idx in cabin_indices] # remainder = cabin_frame_length % clip_stride # if remainder != 0: # indices_in_shorter_clips.append(list(range(cabin_frame_length-remainder, cabin_frame_length))) # print(len(indices_in_shorter_clips)) # print(len(indices_in_cabin_clips)) shorter_clip_predict_classes = [] for i in range(len(indices_in_shorter_clips)): if i == 0: shorter_clip_predict_classes.append(all_predict_classes[0]) elif i == 1: l = [all_predict_classes[0], all_predict_classes[1]] shorter_clip_predict_classes.append(max(set(l), key = l.count)) elif i == 2: l = [all_predict_classes[0], all_predict_classes[1], all_predict_classes[2]] shorter_clip_predict_classes.append(max(set(l), key = l.count)) # elif i == len(indices_in_shorter_clips) - 3: # l = [all_predict_classes[i], all_predict_classes[i+1], all_predict_classes[i+2]] # shorter_clip_predict_classes.append(max(set(l), key = l.count)) # elif i == len(indices_in_shorter_clips) - 2: # l = [all_predict_classes[i], all_predict_classes[i+1]] # shorter_clip_predict_classes.append(max(set(l), key = l.count)) # elif i == len(indices_in_shorter_clips) - 1: # shorter_clip_predict_classes.append(all_predict_classes[i]) elif i < len(indices_in_cabin_clips): l = [all_predict_classes[j] for j in range(i-3, i+1)] shorter_clip_predict_classes.append(max(set(l), key = l.count)) elif i == len(indices_in_cabin_clips): index = len(indices_in_cabin_clips) - 1 l = [all_predict_classes[index-2], all_predict_classes[index-1], all_predict_classes[index]] shorter_clip_predict_classes.append(max(set(l), key = l.count)) elif i == len(indices_in_cabin_clips) + 1: index = len(indices_in_cabin_clips) - 1 l = [all_predict_classes[index-1], all_predict_classes[index]] shorter_clip_predict_classes.append(max(set(l), key = l.count)) elif i == len(indices_in_cabin_clips) + 2: index = len(indices_in_cabin_clips) - 1 shorter_clip_predict_classes.append(all_predict_classes[index]) print(shorter_clip_predict_classes) rough_clip_groups = defaultdict(list) for i in range(len(shorter_clip_predict_classes)): if shorter_clip_predict_classes[i] != 0: rough_clip_groups[shorter_clip_predict_classes[i]].append(i) print(rough_clip_groups) all_refined_clip_groups = dict() for key in rough_clip_groups.keys(): clip_group = rough_clip_groups[key] refined_groups = [] previous = 0 i = 0 while i < len(clip_group) - 1: if clip_group[i+1] - clip_group[i] >= 4: refined_groups.append(clip_group[previous:(i+1)]) previous = i+1 i += 1 refined_groups.append(clip_group[previous:]) all_refined_clip_groups[key] = refined_groups print(all_refined_clip_groups) # all_classes = all_clip_frame_groups.keys() keys = list(all_refined_clip_groups) if len(keys) == 2: k1 = keys[0] k2 = keys[1] groups1 = all_refined_clip_groups[k1] groups2 = all_refined_clip_groups[k2] i = 0 j = 0 while i < len(groups1): while j < len(groups2): min_index1 = min(groups1[i]) max_index1 = max(groups1[i]) min_index2 = min(groups2[j]) max_index2 = max(groups2[j]) set1 = set(range(min_index1, max_index1+1)) set2 = set(range(min_index2, max_index2+1)) if set1.issubset(set2) == True: groups1.remove(groups1[i]) break elif set2.issubset(set1) == True: groups2.remove(groups2[j]) else: intersec = set1.intersection(set2) for item in intersec: set1.discard(item) set2.discard(item) groups1[i] = list(set1) groups2[j] = list(set2) if max_index1 > max_index2: j += 1 else: i += 1 break if j == len(groups2): break final_all_clip_groups = { k1:groups1, k2:groups2 } else: final_all_clip_groups = all_refined_clip_groups print(final_all_clip_groups) all_clip_frame_groups = {} for key in final_all_clip_groups.keys(): final_groups = final_all_clip_groups[key] clip_frame_groups = [] for group in final_groups: clip_frame_group = set() for index in group: clip_frame_group = clip_frame_group.union(set(indices_in_shorter_clips[index])) start_frame = min(clip_frame_group) + 1 end_frame = max(clip_frame_group) + 1 clip_frame_groups.append([start_frame, end_frame]) all_clip_frame_groups[key] = clip_frame_groups return all_clip_frame_groups
def predict_video(cabin_video_path, face_video_path, args): if torch.cuda.is_available(): device = 'cuda' else: device = 'cpu' checkpoint = args.checkpoint clip_length = args.clip_length clip_stride = args.clip_stride batch_size = args.batch_size num_classes = args.num_classes threshold = args.threshold cabin_clips, face_clips, indices_in_cabin_clips = clip_generation(cabin_video_path, face_video_path, clip_length, clip_stride) model = TAL_Net(num_classes) ckp = torch.load(checkpoint) model.load_state_dict(ckp['model']) model.to(device) model.eval() clip_transforms = transforms.Compose([videotransforms.CenterCrop(224), videotransforms.ToTensor(), videotransforms.ClipNormalize() ]) all_clips = [] all_predict_classes = [] all_start_scores = [] all_end_scores = [] n = len(cabin_clips) // batch_size for i in range(n): cabin_video_frames_batch = [] face_video_frames_batch = [] for j in range(i * batch_size, (i + 1) * batch_size): cabin_clip = cabin_clips[j] cabin_video_frames = load_rgb_frames(cabin_video_path, cabin_clip) cabin_video_frames = clip_transforms(cabin_video_frames) cabin_video_frames_batch.append(cabin_video_frames) face_clip = face_clips[j] face_video_frames = load_rgb_frames(face_video_path, face_clip) face_video_frames = clip_transforms(face_video_frames) face_video_frames_batch.append(face_video_frames) cabin_video_frames_batch = torch.stack(cabin_video_frames_batch) face_video_frames_batch = torch.stack(face_video_frames_batch) cabin_video_frames_batch = cabin_video_frames_batch.to(device) face_video_frames_batch = face_video_frames_batch.to(device) with torch.no_grad(): class_scores, start_scores, end_scores = model(cabin_video_frames_batch, face_video_frames_batch) pred_classes = torch.argmax(class_scores, dim=1) pred_classes = pred_classes.cpu().numpy() start_scores = start_scores.cpu().numpy() end_scores = end_scores.cpu().numpy() all_predict_classes.append(pred_classes) all_start_scores.append(start_scores) all_end_scores.append(end_scores) if len(cabin_clips) % batch_size != 0: cabin_video_frames_batch = [] face_video_frames_batch = [] for k in range(n * batch_size, len(cabin_clips)): cabin_clip = cabin_clips[k] cabin_video_frames = load_rgb_frames(cabin_video_path, cabin_clip) cabin_video_frames = clip_transforms(cabin_video_frames) cabin_video_frames_batch.append(cabin_video_frames) face_clip = face_clips[k] face_video_frames = load_rgb_frames(face_video_path, face_clip) face_video_frames = clip_transforms(face_video_frames) face_video_frames_batch.append(face_video_frames) cabin_video_frames_batch = torch.stack(cabin_video_frames_batch) face_video_frames_batch = torch.stack(face_video_frames_batch) cabin_video_frames_batch = cabin_video_frames_batch.to(device) face_video_frames_batch = face_video_frames_batch.to(device) with torch.no_grad(): class_scores, start_scores, end_scores = model(cabin_video_frames_batch, face_video_frames_batch) pred_classes = torch.argmax(class_scores, dim=1) pred_classes = pred_classes.cpu().numpy() start_scores = start_scores.cpu().numpy() end_scores = end_scores.cpu().numpy() all_predict_classes.append(pred_classes) all_start_scores.append(start_scores) all_end_scores.append(end_scores) all_predict_classes = np.concatenate(all_predict_classes) all_start_scores = np.concatenate(all_start_scores) all_end_scores = np.concatenate(all_end_scores) # print(all_start_scores) # print(all_end_scores) # start_peak_indices = [] # end_peak_indices = [] # if all_start_scores[0] > all_start_scores[1]: # start_peak_indices.append(0) # for i in range(1, len(cabin_clips) - 1): # if all_start_scores[i] > all_start_scores[i - 1]: # if all_start_scores[i] > all_start_scores[i + 1]: # start_peak_indices.append(i) # if all_end_scores[i] > all_end_scores[i - 1]: # if all_end_scores[i] > all_end_scores[i + 1]: # end_peak_indices.append(i) # if all_end_scores[-1] > all_end_scores[-2]: # end_peak_indices.append(len(cabin_clips) - 1) # j = 0 # copy_start_peak_indices = start_peak_indices.copy() # while j < len(start_peak_indices) - 1: # index1 = copy_start_peak_indices[j] # index2 = copy_start_peak_indices[j + 1] # if index1 + 4 < index2: # j += 1 # else: # if all_start_scores[start_peak_indices[j]] > all_start_scores[start_peak_indices[j+1]]: # copy_start_peak_indices[j] = index2 # copy_start_peak_indices.pop(j + 1) # start_peak_indices.pop(j + 1) # else: # copy_start_peak_indices.pop(j) # start_peak_indices.pop(j) # k = 0 # copy_end_peak_indices = end_peak_indices.copy() # while k < len(end_peak_indices) - 1: # index1 = copy_end_peak_indices[k] # index2 = copy_end_peak_indices[k + 1] # if index1 + 4 < index2: # k += 1 # else: # if all_end_scores[end_peak_indices[k]] > all_end_scores[end_peak_indices[k+1]]: # copy_end_peak_indices[k] = index2 # copy_end_peak_indices.pop(k + 1) # end_peak_indices.pop(k + 1) # else: # copy_end_peak_indices.pop(k) # end_peak_indices.pop(k) selected_starts = [] selected_ends = [] for i in range(len(all_start_scores)): if all_start_scores[i] > threshold: selected_starts.append(i) for j in range(len(all_end_scores)): if all_end_scores[j] > threshold: selected_ends.append(j) return selected_starts, selected_ends, all_start_scores, indices_in_cabin_clips
def train(): args = get_parse() cabin_video_dir = args.cabin_video_dir face_video_dir = args.face_video_dir train_data_path = args.train_data_path val_data_path = args.val_data_path train_batch_size = args.train_batch_size val_batch_size = args.val_batch_size num_epochs = args.num_epochs learning_rate = args.learning_rate weight_decay = args.weight_decay display_steps = args.display_steps ckp_dir = args.ckp_dir save_path = args.save_path num_classes = args.num_classes weight = args.weight if torch.cuda.is_available(): device = 'cuda' else: device = 'cpu' if not os.path.exists(ckp_dir): os.makedirs(ckp_dir) print('Start to load data') train_transforms = transforms.Compose([ videotransforms.RandomCrop(224), videotransforms.ToTensor(), videotransforms.ClipNormalize() ]) val_transforms = transforms.Compose([ videotransforms.CenterCrop(224), videotransforms.ToTensor(), videotransforms.ClipNormalize() ]) train_dataset = IVBSSDataset(cabin_video_dir, face_video_dir, train_data_path, train_transforms) val_dataset = IVBSSDataset(cabin_video_dir, face_video_dir, val_data_path, val_transforms) train_dataloader = DataLoader(train_dataset, batch_size=train_batch_size, sampler=RandomSampler(train_dataset, replacement=True), collate_fn=collate_fn, drop_last=True) total_steps = num_epochs * len(train_dataloader) print('Total number of training samples is {0}'.format(len(train_dataset))) print('Total number of validation samples is {0}'.format(len(val_dataset))) print('Total number of training steps is {0}'.format(total_steps)) model = TAL_Net(num_classes) optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=weight_decay) # optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1) start_epoch = 0 if args.pretrained_I3D_model is not None: print('Load pretrained I3D model') pretrained_I3D_model = torch.load(args.pretrained_I3D_model) model.I3D_1.load_state_dict(pretrained_I3D_model) model.I3D_2.load_state_dict(pretrained_I3D_model) if args.ckp_path is not None: print('Load checkpoint') start_epoch, model, optimizer, scheduler = load_ckp( args.ckp_path, model, optimizer, scheduler) model.to(device) model.train() print('Start to train') num_step = 0 best_acc = 0.0 for epoch in range(start_epoch, num_epochs): running_loss = 0.0 class_running_loss = 0.0 chunk_inclusion_running_loss = 0.0 for i, (cabin_imgs, face_imgs, labels, start_labels, end_labels) in enumerate(train_dataloader): cabin_imgs = cabin_imgs.to(device) face_imgs = face_imgs.to(device) labels = labels.to(device) start_labels = start_labels.to(device) end_labels = end_labels.to(device) optimizer.zero_grad() loss, class_loss, chunk_inclusion_loss = model( cabin_imgs, face_imgs, labels, start_labels, end_labels, weight)[:3] loss.backward() optimizer.step() running_loss += loss.item() class_running_loss += class_loss.item() chunk_inclusion_running_loss += chunk_inclusion_loss.item() if (i + 1) % display_steps == 0: print( 'epoch:{0}/{1}, step:{2}/{3}, loss:{4:.4f}, class_loss:{5:.4f}, chunk_inclusion_loss:{6:.4f}' .format(epoch + 1, num_epochs, i + 1, len(train_dataloader), running_loss / display_steps, class_running_loss / display_steps, chunk_inclusion_running_loss / display_steps)) running_loss = 0.0 class_running_loss = 0.0 chunk_inclusion_running_loss = 0.0 num_step += 1 writer.add_scalars( 'Loss/train', { 'total_loss': loss, 'class_loss': class_loss, 'chunk_inclusion_loss': chunk_inclusion_loss }, num_step) scheduler.step() print('Start to validate') # eval_loss, eval_class_loss, eval_chunk_inclusion_loss, class_accuracy = eval(train_dataset, train_batch_size, model, weight, device) eval_loss, eval_class_loss, eval_chunk_inclusion_loss, class_accuracy = eval( val_dataset, val_batch_size, model, weight, device) writer.add_scalars( 'Loss/valid', { 'total_loss': eval_loss, 'class_loss': eval_class_loss, 'chunk_inclusion_loss': eval_chunk_inclusion_loss }, epoch) writer.add_scalar('Accuracy/valid', class_accuracy, epoch) print( 'Toal loss on validation dataset: {0:.4f}, class loss on validation dataset: {1:.4f}, chunk inclusion loss on validation dataset: {2:.4f}, class accuracy on validation dataset: {3:.4f}' .format(eval_loss, eval_class_loss, eval_chunk_inclusion_loss, class_accuracy)) is_best = class_accuracy > best_acc best_acc = max(class_accuracy, best_acc) checkpoint = { 'epoch': epoch + 1, 'model': model.state_dict(), 'best_acc': best_acc, 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict() } ckp_name = 'epoch_' + str(epoch + 1) + '.pt' save_ckp(checkpoint, ckp_dir, ckp_name, is_best, save_path) print('Save the checkpoint after {} epochs'.format(epoch + 1)) writer.close()
def main(): if torch.cuda.is_available(): device = 'cuda' else: device = 'cpu' args = get_parse() cabin_video_path = args.cabin_video_path face_video_path = args.face_video_path checkpoint = args.checkpoint clip_length = args.clip_length clip_stride = args.clip_stride batch_size = args.batch_size num_classes = args.num_classes threshold = args.threshold cabin_clips, face_clips, indices_in_cabin_clips = clip_generation( cabin_video_path, face_video_path, clip_length, clip_stride) model = TAL_Net(num_classes) ckp = torch.load(checkpoint) model.load_state_dict(ckp['model']) model.to(device) model.eval() clip_transforms = transforms.Compose([ videotransforms.CenterCrop(224), videotransforms.ToTensor(), videotransforms.ClipNormalize() ]) all_clips = [] all_predict_classes = [] all_start_scores = [] all_end_scores = [] n = len(cabin_clips) // batch_size for i in range(n): cabin_video_frames_batch = [] face_video_frames_batch = [] for j in range(i * batch_size, (i + 1) * batch_size): cabin_clip = cabin_clips[j] cabin_video_frames = load_rgb_frames(cabin_video_path, cabin_clip) cabin_video_frames = clip_transforms(cabin_video_frames) cabin_video_frames_batch.append(cabin_video_frames) face_clip = face_clips[j] face_video_frames = load_rgb_frames(face_video_path, face_clip) face_video_frames = clip_transforms(face_video_frames) face_video_frames_batch.append(face_video_frames) cabin_video_frames_batch = torch.stack(cabin_video_frames_batch) face_video_frames_batch = torch.stack(face_video_frames_batch) cabin_video_frames_batch = cabin_video_frames_batch.to(device) face_video_frames_batch = face_video_frames_batch.to(device) with torch.no_grad(): class_scores, start_scores, end_scores = model( cabin_video_frames_batch, face_video_frames_batch) pred_classes = torch.argmax(class_scores, dim=1) pred_classes = pred_classes.cpu().numpy() start_scores = start_scores.cpu().numpy() end_scores = end_scores.cpu().numpy() all_predict_classes.append(pred_classes) all_start_scores.append(start_scores) all_end_scores.append(end_scores) if len(cabin_clips) % batch_size != 0: cabin_video_frames_batch = [] face_video_frames_batch = [] for k in range(n * batch_size, len(cabin_clips)): cabin_clip = cabin_clips[k] cabin_video_frames = load_rgb_frames(cabin_video_path, cabin_clip) cabin_video_frames = clip_transforms(cabin_video_frames) cabin_video_frames_batch.append(cabin_video_frames) face_clip = face_clips[k] face_video_frames = load_rgb_frames(face_video_path, face_clip) face_video_frames = clip_transforms(face_video_frames) face_video_frames_batch.append(face_video_frames) cabin_video_frames_batch = torch.stack(cabin_video_frames_batch) face_video_frames_batch = torch.stack(face_video_frames_batch) cabin_video_frames_batch = cabin_video_frames_batch.to(device) face_video_frames_batch = face_video_frames_batch.to(device) with torch.no_grad(): class_scores, start_scores, end_scores = model( cabin_video_frames_batch, face_video_frames_batch) pred_classes = torch.argmax(class_scores, dim=1) pred_classes = pred_classes.cpu().numpy() start_scores = start_scores.cpu().numpy() end_scores = end_scores.cpu().numpy() all_predict_classes.append(pred_classes) all_start_scores.append(start_scores) all_end_scores.append(end_scores) all_predict_classes = np.concatenate(all_predict_classes) all_start_scores = np.concatenate(all_start_scores) all_end_scores = np.concatenate(all_end_scores) # rough chunk aggregation start_peak_indices = [] end_peak_indices = [] if all_start_scores[0] > all_start_scores[1]: start_peak_indices.append(0) for i in range(1, len(all_start_scores) - 1): if all_start_scores[i] > all_start_scores[i - 1]: if all_start_scores[i] > all_start_scores[i + 1]: start_peak_indices.append(i) if all_end_scores[i] > all_end_scores[i - 1]: if all_end_scores[i] > all_end_scores[i + 1]: end_peak_indices.append(i) if all_end_scores[-1] > all_end_scores[-2]: end_peak_indices.append(len(cabin_clips) - 1) j = 0 copy_start_peak_indices = start_peak_indices.copy() while j < len(start_peak_indices) - 1: index1 = copy_start_peak_indices[j] index2 = copy_start_peak_indices[j + 1] if index1 + 4 < index2: j += 1 else: if all_start_scores[start_peak_indices[j]] > all_start_scores[ start_peak_indices[j + 1]]: copy_start_peak_indices[j] = index2 copy_start_peak_indices.pop(j + 1) start_peak_indices.pop(j + 1) else: copy_start_peak_indices.pop(j) start_peak_indices.pop(j) k = 0 copy_end_peak_indices = end_peak_indices.copy() while k < len(end_peak_indices) - 1: index1 = copy_end_peak_indices[k] index2 = copy_end_peak_indices[k + 1] if index1 + 4 < index2: k += 1 else: if all_end_scores[end_peak_indices[k]] > all_end_scores[ end_peak_indices[k + 1]]: copy_end_peak_indices[k] = index2 copy_end_peak_indices.pop(k + 1) end_peak_indices.pop(k + 1) else: copy_end_peak_indices.pop(k) end_peak_indices.pop(k) selected_starts = [] selected_ends = [] for start_indice in start_peak_indices: if all_start_scores[start_indice] > threshold: selected_starts.append(start_indice) for end_indice in end_peak_indices: if all_end_scores[end_indice] > threshold: selected_ends.append(end_indice) print(selected_starts) print(selected_ends) selected_start_scores = [] selected_end_scores = [] if selected_starts != []: for start in selected_starts: selected_start_scores.append(all_start_scores[start]) if selected_ends != []: for end in selected_ends: selected_end_scores.append(all_end_scores[end]) # plot all_clips = range(len(all_start_scores)) fig = plt.figure() plt.plot(all_clips, all_start_scores, "b.-", label="start scores") plt.plot(all_clips, all_end_scores, "r.-", label="end scores") if selected_starts != []: plt.scatter(selected_starts, selected_start_scores, c='b', marker='*', linewidths=3, label="selected clips including starts") if selected_ends != []: plt.scatter(selected_ends, selected_end_scores, c='r', marker='*', linewidths=3, label="selected clips including ends") plt.legend(loc='upper right') plt.ylim(0, 1) plt.xlabel("Clip Index") plt.ylabel("predicted score") plt.show() cabin_video_name = os.path.basename(cabin_video_path) fig.savefig('figures/plot_{}.png'.format(cabin_video_name))
def predict_events(cabin_video_path, face_video_path, args): if torch.cuda.is_available(): device = 'cuda' else: device = 'cpu' checkpoint = args.checkpoint clip_length = args.clip_length clip_stride = args.clip_stride batch_size = args.batch_size num_classes = args.num_classes threshold = args.threshold cabin_clips, face_clips, indices_in_cabin_clips = clip_generation( cabin_video_path, face_video_path, clip_length, clip_stride) model = TAL_Net(num_classes) ckp = torch.load(checkpoint) model.load_state_dict(ckp['model']) model.to(device) model.eval() clip_transforms = transforms.Compose([ videotransforms.CenterCrop(224), videotransforms.ToTensor(), videotransforms.ClipNormalize() ]) all_clips = [] all_predict_classes = [] all_start_scores = [] all_end_scores = [] n = len(cabin_clips) // batch_size for i in range(n): cabin_video_frames_batch = [] face_video_frames_batch = [] for j in range(i * batch_size, (i + 1) * batch_size): cabin_clip = cabin_clips[j] cabin_video_frames = load_rgb_frames(cabin_video_path, cabin_clip) cabin_video_frames = clip_transforms(cabin_video_frames) cabin_video_frames_batch.append(cabin_video_frames) face_clip = face_clips[j] face_video_frames = load_rgb_frames(face_video_path, face_clip) face_video_frames = clip_transforms(face_video_frames) face_video_frames_batch.append(face_video_frames) cabin_video_frames_batch = torch.stack(cabin_video_frames_batch) face_video_frames_batch = torch.stack(face_video_frames_batch) cabin_video_frames_batch = cabin_video_frames_batch.to(device) face_video_frames_batch = face_video_frames_batch.to(device) with torch.no_grad(): class_scores, start_scores, end_scores = model( cabin_video_frames_batch, face_video_frames_batch) pred_classes = torch.argmax(class_scores, dim=1) pred_classes = pred_classes.cpu().numpy() start_scores = start_scores.cpu().numpy() end_scores = end_scores.cpu().numpy() all_predict_classes.append(pred_classes) all_start_scores.append(start_scores) all_end_scores.append(end_scores) if len(cabin_clips) % batch_size != 0: cabin_video_frames_batch = [] face_video_frames_batch = [] for k in range(n * batch_size, len(cabin_clips)): cabin_clip = cabin_clips[k] cabin_video_frames = load_rgb_frames(cabin_video_path, cabin_clip) cabin_video_frames = clip_transforms(cabin_video_frames) cabin_video_frames_batch.append(cabin_video_frames) face_clip = face_clips[k] face_video_frames = load_rgb_frames(face_video_path, face_clip) face_video_frames = clip_transforms(face_video_frames) face_video_frames_batch.append(face_video_frames) cabin_video_frames_batch = torch.stack(cabin_video_frames_batch) face_video_frames_batch = torch.stack(face_video_frames_batch) cabin_video_frames_batch = cabin_video_frames_batch.to(device) face_video_frames_batch = face_video_frames_batch.to(device) with torch.no_grad(): class_scores, start_scores, end_scores = model( cabin_video_frames_batch, face_video_frames_batch) pred_classes = torch.argmax(class_scores, dim=1) pred_classes = pred_classes.cpu().numpy() start_scores = start_scores.cpu().numpy() end_scores = end_scores.cpu().numpy() all_predict_classes.append(pred_classes) all_start_scores.append(start_scores) all_end_scores.append(end_scores) all_predict_classes = np.concatenate(all_predict_classes) print(all_predict_classes) # rough chunk aggregation rough_clip_groups = defaultdict(list) for i in range(len(all_predict_classes)): if all_predict_classes[i] != 0: rough_clip_groups[all_predict_classes[i]].append(i) print(rough_clip_groups) all_refined_clip_groups = dict() for key in rough_clip_groups.keys(): clip_group = rough_clip_groups[key] refined_groups = [] previous = 0 i = 0 while i < len(clip_group) - 1: if clip_group[i] + 2 < clip_group[i + 1]: refined_groups.append(clip_group[previous:(i + 1)]) previous = i + 1 i += 1 refined_groups.append(clip_group[previous:]) all_refined_clip_groups[key] = refined_groups print(all_refined_clip_groups) # all_classes = all_clip_frame_groups.keys() keys = list(all_refined_clip_groups) if len(keys) == 2: k1 = keys[0] k2 = keys[1] groups1 = all_refined_clip_groups[k1] groups2 = all_refined_clip_groups[k2] i = 0 j = 0 while i < len(groups1): while j < len(groups2): min_index1 = min(groups1[i]) max_index1 = max(groups1[i]) min_index2 = min(groups2[j]) max_index2 = max(groups2[j]) set1 = set(range(min_index1, max_index1 + 1)) set2 = set(range(min_index2, max_index2 + 1)) if set1.issubset(set2) == True: groups1.remove(groups1[i]) if i >= len(groups1): break elif set2.issubset(set1) == True: groups2.remove(groups2[j]) else: if max_index1 > max_index2: j += 1 else: break i += 1 final_all_clip_groups = {k1: groups1, k2: groups2} else: final_all_clip_groups = all_refined_clip_groups print(final_all_clip_groups) all_clip_frame_groups = {} for key in final_all_clip_groups.keys(): final_groups = final_all_clip_groups[key] clip_frame_groups = [] for group in final_groups: clip_frame_group = set() for index in group: clip_frame_group = clip_frame_group.union( set(indices_in_cabin_clips[index])) start_frame = min(clip_frame_group) + 1 end_frame = max(clip_frame_group) + 1 clip_frame_groups.append([start_frame, end_frame]) all_clip_frame_groups[key] = clip_frame_groups return all_clip_frame_groups
def run(max_steps=64e3, mode='rgb', root='/ssd2/charades/Charades_v1_rgb', split='charades/charades.json', batch_size=1, load_model='', save_dir=''): # setup dataset # test_transforms = T.Compose([videotransforms.CenterCrop(224)]) test_transforms = T.Compose([ T.Resize(min_size=(240, ), max_size=320), T.ToTensor(), T.Normalize(mean=None, std=None, to_bgr255=False) ]) dataset = Dataset(split, 'train', root, mode, test_transforms, save_dir=save_dir, overlap=15) distributed = True shuffle = False images_per_batch = 4 if distributed: sampler = DistributedSampler(dataset, shuffle=shuffle) if shuffle: sampler = torch.utils.data.sampler.RandomSampler(dataset) else: sampler = torch.utils.data.sampler.SequentialSampler(dataset) batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, images_per_batch, drop_last=False) dataloader = DataLoader( dataset, # batch_size=batch_size, shuffle=shuffle, num_workers=4, # pin_memory=True, batch_sampler=batch_sampler) dataloaders = {'train': dataloader} datasets = {'train': dataset} # val_dataset = Dataset(split, # 'testing', # root, # mode, # test_transforms, # save_dir=save_dir) # val_dataloader = DataLoader(val_dataset, # batch_size=batch_size, # shuffle=True, # num_workers=8, # pin_memory=True) # dataloaders = {'train': dataloader, 'train': val_dataloader} # datasets = {'train': dataset, 'train': val_dataset} # setup the model if mode == 'flow': i3d = InceptionI3d(dataset.num_classes, in_channels=2) else: i3d = InceptionI3d(dataset.num_classes, in_channels=3) i3d.replace_logits(dataset.num_classes) load_state_dict(i3d, torch.load(load_model), ignored_prefix='logits') i3d.cuda() # for phase in ['train', 'train']: for phase in ['train']: i3d.eval() # Set model to evaluate mode tot_loss = 0.0 tot_loc_loss = 0.0 tot_cls_loss = 0.0 # Iterate over data. for data in tqdm(dataloaders[phase]): # get the inputs inputs, labels, name, start, end = data feature_save_dir = os.path.join(save_dir, name[0]) if not os.path.exists(feature_save_dir): os.makedirs(feature_save_dir) b, c, t, h, w = inputs.shape if t > 1600: features = [] for start in range(1, t - 56, 1600): end = min(t - 1, start + 1600 + 56) start = max(1, start - 48) ip = Variable(torch.from_numpy( inputs.numpy()[:, :, start:end]).cuda(), volatile=True) features.append( i3d.extract_features(ip).squeeze(0).permute( 1, 2, 3, 0).data.cpu().numpy()) np.save(os.path.join(save_dir, name[0]), np.concatenate(features, axis=0)) else: # wrap them in Variable inputs = Variable(inputs.cuda(), volatile=True) features = i3d.extract_features(inputs) for feature, s, e in zip(features, start, end): np.save( os.path.join(feature_save_dir, str(int(s)) + '_' + str(int(e)) + '.npy'), feature.squeeze().data.cpu().numpy())
def test(): args = get_parse() cabin_video_dir = args.cabin_video_dir face_video_dir = args.face_video_dir test_data_path = args.test_data_path batch_size = args.batch_size num_classes = args.num_classes weight = args.weight if torch.cuda.is_available(): device = 'cuda' else: device = 'cpu' print('Start to load data') test_transforms = transforms.Compose([ videotransforms.CenterCrop(224), videotransforms.ToTensor(), videotransforms.ClipNormalize() ]) test_dataset = IVBSSDataset(cabin_video_dir, face_video_dir, test_data_path, test_transforms) print('Total number of test samples is {0}'.format(len(test_dataset))) test_dataloader = DataLoader(test_dataset, batch_size=batch_size, sampler=SequentialSampler(test_dataset), collate_fn=collate_fn) model = TAL_Net(num_classes) print('Load checkpoint') ckp = torch.load(args.ckp_path) model.load_state_dict(ckp['model']) model.to(device) model.eval() print('Start to test') test_loss = 0.0 test_class_loss = 0.0 test_chunk_inclusion_loss = 0.0 class_accuracy = 0.0 test_steps = 0 # start_time = time.time() for i, (cabin_imgs, face_imgs, labels, start_labels, end_labels) in enumerate(test_dataloader): cabin_imgs = cabin_imgs.to(device) face_imgs = face_imgs.to(device) labels = labels.to(device) start_labels = start_labels.to(device) end_labels = end_labels.to(device) with torch.no_grad(): loss, class_loss, chunk_inclusion_loss, class_scores, start_scores, end_scores = model( cabin_imgs, face_imgs, labels, start_labels, end_labels, weight) test_loss += loss.item() test_class_loss += class_loss.item() test_chunk_inclusion_loss += chunk_inclusion_loss.item() class_pred = torch.argmax(class_scores, dim=1) class_accuracy += torch.sum( (class_pred == labels).float()) / labels.shape[0] test_steps += 1 avg_test_loss = test_loss / test_steps avg_test_class_loss = test_class_loss / test_steps avg_test_chunk_inclusion_loss = test_chunk_inclusion_loss / test_steps avg_class_accuracy = class_accuracy / test_steps # end_time = time.time() # total_time = end_time-start_time # avg_time = total_time/(test_steps*batch_size) print( 'avg_test_loss:{0}, avg_test_class_loss:{1}, avg_test_chunk_inclusion_loss:{2}, avg_class_accuracy:{3}' .format(avg_test_loss, avg_test_class_loss, avg_test_chunk_inclusion_loss, avg_class_accuracy))