def visualise_transitions(dataset, idx, m1): out_video = dataset.get_out_video_path(idx) img_dir = join(out_video, "img_tcr") m_dir = join(out_video, m1) out_dir = join(out_video, "transitions", m1) make_dir(out_dir) sz = EXEMPLAR_SIZE sx = INSTANCE_SIZE xmin = sx / 2.0 - sz / 2.0 xmax = sx / 2.0 + sz / 2.0 i = 0 while (True): if not (os.path.exists(join(m_dir, str(i) + "_tcr.jpeg"))): break img_i_copy = cv2.imread(join(img_dir, str(i) + "_i.jpeg")) quad_gt = np.load(join(img_dir, str(i) + "-quad-gt.npy")) dir_path = join(m_dir, str(i)) out_path = join(out_dir, str(i)) make_dir(out_path) for j, f in enumerate(sorted(os.listdir(dir_path))): quad = np.load(join(dir_path, f)) img_i = draw_bbox(np.array(img_i_copy), quad, color=(0, 255, 0), thk=1) img_i = draw_bbox(img_i, quad_gt, color=(0, 0, 255), thk=1) cv2.imwrite(join(out_path, str(j) + ".jpeg"), img_i) i += 1
def visualise_resized_transitions(img_dir, m_dir, out_dir): make_dir(out_dir) sz = EXEMPLAR_SIZE sx = INSTANCE_SIZE xmin = sx / 2.0 - sz / 2.0 xmax = sx / 2.0 + sz / 2.0 i = 0 while (True): if not (os.path.exists(join(m_dir, str(i) + "_tcr.jpeg"))): break img_tcr = cv2.imread(join(m_dir, str(i) + "_tcr.jpeg")) img_i = cv2.imread(join(m_dir, str(i) + "_i.jpeg")) dir_path = join(m_dir, str(i) + "-resized") out_path = join(out_dir, str(i)) make_dir(out_path) for j, f in enumerate(sorted(os.listdir(dir_path))): img = np.zeros((sx, 2 * sx, 3)) quad = np.load(join(dir_path, f)) img[0:sx, sx:2 * sx, :] = draw_bbox(np.array(img_i), quad, color=(0, 255, 0), thk=1) # img[ 0:sx, sx:2 * sx, :] = img_i img[int(xmin):int(xmax), int(xmin):int(xmax), :] = img_tcr cv2.imwrite(join(out_path, str(j) + ".jpeg"), img) i += 1
def visualise_resized_images(img_dir, m1_dir, m2_dir, out2_dir): font = cv2.FONT_HERSHEY_DUPLEX fontScale = 0.5 fontColor = (255, 255, 255) lineType = 2 # make_dir(output_dir) make_dir(out2_dir) # images = int(len(os.listdir(img_dir)) / 3) sz = EXEMPLAR_SIZE sx = INSTANCE_SIZE xmin = sx / 2.0 - sz / 2.0 xmax = sx / 2.0 + sz / 2.0 quad_iden = np.array([xmin, xmax, xmin, xmin, xmax, xmin, xmax, xmax]) i = 0 while (True): img = np.zeros((2 * sx, 2 * sx, 3)) if not (os.path.exists(join(m1_dir, str(i) + "_tcr.jpeg"))): break img_tcr_pure = cv2.imread(join(m1_dir, str(i) + "_tcr.jpeg")) img_i_pure = cv2.imread(join(m1_dir, str(i) + "_i.jpeg")) img_tcr_learned = cv2.imread(join(m2_dir, str(i) + "_tcr.jpeg")) img_i_learned = cv2.imread(join(m2_dir, str(i) + "_i.jpeg")) quad_pure = np.load(join(m1_dir, str(i) + "-quad-resized.npy")) quad_learned = np.load(join(m2_dir, str(i) + "-quad-resized.npy")) img_i_pure = draw_bbox(img_i_pure, quad_pure, color=(0, 255, 0), thk=2) img_i_pure = draw_bbox(img_i_pure, quad_iden, color=(0, 165, 255), thk=2) # draw_bbox(img_i_pure, quad_gt, color=(0, 0, 255), thk=2) img_i_learned = draw_bbox(img_i_learned, quad_learned, color=(0, 255, 0), thk=2) img_i_learned = draw_bbox(img_i_learned, quad_iden, color=(0, 165, 255), thk=2) # draw_bbox(img_i_learned, quad_gt, color=(0, 0, 255), thk=2) img[0:sx, sx:2 * sx, :] = img_i_pure img[int(xmin):int(xmax), int(xmin):int(xmax), :] = img_tcr_pure img[sx:2 * sx, sx:2 * sx, :] = img_i_learned img[sx + int(xmin):sx + int(xmax), int(xmin):int(xmax), :] = img_tcr_learned cv2.putText(img, "Pure LKT", (90, 220), font, fontScale, fontColor, lineType) cv2.putText(img, "Learned LKT", (80, 475), font, fontScale, fontColor, lineType) # img_i = np.concatenate([img_i_pure, img_i_learned], axis=1) cv2.imwrite(join(out2_dir, str(i) + ".jpeg"), img) i += 1
def __init__(self, model, checkpoint_dir, logs_dir, params): super().__init__() # self.nn = PureLKTNet(device, params).to(device) self.nn = model self.checkpoint_dir = join(checkpoint_dir, self.nn.params.info) make_dir(self.checkpoint_dir) logs_dir = join(logs_dir, self.nn.params.info) make_dir(logs_dir) self.logs_dir = logs_dir self.writer = Logger(logs_dir) self.optimizer = Adam(self.nn.model.parameters()) self.loss = nn.SmoothL1Loss() self.params = params self.best = -1
def visualise_images(dataset, idx, m1, m2): out_video = dataset.get_out_video_path(idx) img_dir = join(out_video, "img_tcr") m1_dir = join(out_video, m1) m2_dir = join(out_video, m2) out2_dir = join(out_video, "results") make_dir(out2_dir) images = int(len(os.listdir(img_dir)) / 3) i = 0 while (True): if not (os.path.exists(join(img_dir, str(i) + "_i.jpeg"))): break img_i_pure = cv2.imread(join(img_dir, str(i) + "_i.jpeg")) img_i_learned = cv2.imread(join(img_dir, str(i) + "_i.jpeg")) quad_pure = np.load(join(m1_dir, str(i) + "-quad.npy")) # quad_pure_id = np.load(join(m1_dir, str(i) +"-quad-id.npy")) quad_learned = np.load(join(m2_dir, str(i) + "-quad.npy")) # quad_learned_id = np.load(join(m2_dir, str(i) +"-quad-id.npy")) quad_gt = np.load(join(img_dir, str(i) + "-quad-gt.npy")) # sz = EXEMPLAR_SIZE # sx = INSTANCE_SIZE # xmin = sx / 2.0 - sz / 2.0 # xmax = sx / 2.0 + sz / 2.0 # quad_iden = np.array([xmin, xmax, xmin, xmin, xmax, xmin, xmax, xmax]) img_i_pure = draw_bbox(img_i_pure, quad_pure, color=(255, 0, 0), thk=2) # draw_bbox(img_i_pure, quad_pure_id, color=(0, 165, 255), thk=2) img_i_pure = draw_bbox(img_i_pure, quad_gt, color=(0, 0, 255), thk=2) img_i_learned = draw_bbox(img_i_learned, quad_learned, color=(255, 0, 0), thk=2) # draw_bbox(img_i_learned, quad_learned_id, color=(0, 165, 255), thk=2) img_i_learned = draw_bbox(img_i_learned, quad_gt, color=(0, 0, 255), thk=2) img_i = np.concatenate([img_i_pure, img_i_learned], axis=1) cv2.imwrite(join(out2_dir, str(i) + ".jpeg"), img_i) i += 1
def eval_model(self, dataset, vid, pairWise): self.nn.model = self.nn.model.eval() num_img_pair = dataset.get_num_images(vid) quads = [] iou_list = [] sobel_x = [] sobel_y = [] imgs = [] in_video = dataset.get_in_video_path(vid) out_video = dataset.get_out_video_path(vid) info = self.nn.model.params.info imgs_out_dir = join(out_video, "img_tcr") model_out_dir = join(out_video, info) make_dir(imgs_out_dir) make_dir(model_out_dir) print("Evaluating dataset for video ", vid) data_x, quad_gt = dataset.get_data_point(vid, 0) quads.append(data_x[2]) # data_x[0] = data_x[0][np.newaxis, :, :, :] # bbox = data_x[2][np.newaxis, :] # self.nn.init(data_x[0], bbox) self.nn.cnt = 0 start_t = time.time() loss = 0 sz_loss = 0 with torch.no_grad(): for img_pair in range(num_img_pair): # print(img_pair) data_x, quad_gt = dataset.get_data_point(vid, img_pair) _, quad_pip_gt = dataset.get_train_data_point(vid, img_pair) data_x[0] = data_x[0][np.newaxis, :, :, :] bbox = data_x[2][np.newaxis, :] data_x[1] = data_x[1][np.newaxis, :, :, :] if(img_pair == 0 and not pairWise): quad = bbox self.nn.init(data_x[0], quad) elif(pairWise): quad = bbox self.nn.init(data_x[0], quad) # else: # try: outputs = self.nn.track(data_x[1]) # except: # print("Error!!!!!!") # break if(len(outputs) == 9): quad_new, sx, sy, img_pip_tcr, sx_ker, \ sy_ker, img_pip_i, quad_pip, scale_z = outputs sx_ker = tensor_to_numpy(sx_ker[0]) sy_ker = tensor_to_numpy(sy_ker[0]) np.save(join(model_out_dir, str(img_pair) + "-sx.npy"),\ sx_ker) np.save(join(model_out_dir, str(img_pair) + "-sy.npy"),\ sy_ker) elif(len(outputs) == 7): quad_new, sx, sy, img_pip_tcr, img_pip_i,\ quad_pip, scale_z = outputs # print(quad_pip, quad_pip_gt) sz_loss += huber(100, quad_pip - quad_pip_gt).mean() loss += (scale_z[0]) * (scale_z[0]) * huber(100, quad_new - quad_gt).mean() # from IPython import embed;embed() # print(img_pair, quad.shape, quad_new.shape) img_pip_tcr = img_to_numpy(img_pip_tcr[0]) # img_i = img_to_numpy(img_i[0]) # cv2.imwrite(join(imgs_out_dir,\ # str(img_pair) +"_i.jpeg"), data_x[1][0, :, :, :]) # np.save(join(imgs_out_dir, str(img_pair) + "-quad-gt.npy"),\ # quad_gt) # print(img_pip_tcr.shape) # cv2.imwrite(join(model_out_dir,\ # str(img_pair) +"_pip_tcr.jpeg"), img_pip_tcr) # cv2.imwrite(join(model_out_dir,\ # str(img_pair) +"_pip_i.jpeg"), img_pip_i) # np.save(join(model_out_dir, str(img_pair) + "_quad_pip.npy"),\ # quad_pip[-1][0, :]) # np.save(join(model_out_dir, str(img_pair) + "_quad_pip_id.npy"),\ # quad_pip[0][0, :]) # np.save(join(model_out_dir, str(img_pair) + "_quad_pip_gt.npy"),\ # quad_pip_gt) # np.save(join(model_out_dir, str(img_pair) + "_quad.npy"),\ # quad_new[-1][0, :]) # np.save(join(model_out_dir, str(img_pair) + "_quad_id.npy"),\ # quad_new[0][0, :]) # np.save(join(model_out_dir, str(img_pair) + "_quad_gt.npy"),\ # quad_gt) # for j in range(len(quad_new)): # resize_path = join(model_out_dir, str(img_pair) + "-resized") # dir_path = join(model_out_dir, str(img_pair)) # make_dir(dir_path) # make_dir(resize_path) # np.save(join(resize_path, str(j) + "-quad-resized.npy"), quad_uns[j][0, :]) # np.save(join(dir_path, str(j) + "-quad.npy"), quad_new[j][0, :]) # sx = img_to_numpy(sx[0]) # sy = img_to_numpy(sy[0]) # for i in range(3): # cv2.imwrite(join(model_out_dir,\ # str(img_pair) + "-sx-" + str(i) +".jpeg"), sx[:, :, i]) # cv2.imwrite(join(model_out_dir,\ # str(img_pair) + "-sy-" + str(i) +".jpeg"), sy[:, :, i]) try: iou = calc_iou(quad_new[0], quad_gt) iou_list.append(iou) except Exception as e: print(e) break quads.append(quad_new[0]) quad = quad_new end_t = time.time() loss /= num_img_pair sz_loss /= num_img_pair mean_iou = np.sum(iou_list) / num_img_pair write_to_output_file(quads, out_video + "/results.txt") outputBboxes(in_video +"/", out_video + "/images/", out_video + "/results.txt") print("Resized loss = ", sz_loss) print("Actual loss = ", loss) print("Total time taken = ", end_t - start_t) print("Mean IOU = ", mean_iou) # plt.plot(iou_list) # plt.savefig(out_video + "/iou_plot.png") # plt.close() return mean_iou
def train_model(self, dataset, vid=-1): if (vid != -1): pth = join(self.logs_dir, str(vid)) make_dir(pth) self.writer = Logger(pth) self.nn.model = self.nn.model.train() trainLoader, validLoader = splitData(dataset, self.params, vid=vid) if (vid == -1): dataset_sz = len(dataset) else: dataset_sz = dataset.get_num_images(vid) total = min(self.params.train_examples, dataset_sz) train_total = int(total * (1.0 - self.params.val_split)) val_total = total - train_total print("Train dataset size = ", train_total) print("Valid dataset size = ", val_total) # lc = last_checkpoint(self.checkpoint_dir) lc = -1 if (lc != -1): self.load_checkpoint(lc, vid=vid) print("Checkpoint loaded = {}".format(lc)) best_val = float("inf") for epoch in range(lc + 1, NUM_EPOCHS): print("EPOCH = ", epoch) train_loss = 0.0 i = 0 print("Training for epoch:{}".format(epoch)) start_time = time.time() print("Total training batches = ", len(trainLoader)) for batch in trainLoader: # print(batch) x, ynp = get_batch(dataset, batch) y = torch.tensor(ynp, device=self.nn.model.device).float() self.optimizer.zero_grad() self.nn.init(x[0], x[2]) y_pred, _, _, _, _, _ = self.nn.train(x[1]) y_pred = y_pred[-1] # print(probs.shape) # pmx, pind = probs.max(1) # pmx = pmx[:, 0, 0, 0] # pind = pind[:, 0, 0, 0] # print(pmx, pind) # print(y) # print(y_pred) loss = self.loss(y_pred, y) # print(loss) train_loss += loss # for p in self.nn.model.parameters(): # if(p.requires_grad): # print(p.grad) params = [ x for x in self.nn.model.parameters() if x.requires_grad ] # print(len(params)) # grads = torch.autograd.grad(loss,\ # params,\ # retain_graph=True,\ # create_graph=True, allow_unused=True) # print(grads) loss.backward() # print(self.nn.model.vgg.sobelx.grad[0, pind[0], 0, :, :]) # print(self.nn.model.vgg.sobelx.grad[0, pind[1], 0, :, :]) # print(self.nn.model.vgg.sobelx.grad[0, pind[2], 0, :, :]) # print(self.nn.model.vgg.sobelx.grad[0, pind[3], 0, :, :]) self.optimizer.step() # print(i) i += 1 train_loss /= i print("Training time for {} epoch = {}".format( epoch, time.time() - start_time)) print("Training loss for {} epoch = {}".format(epoch, train_loss)) self.save_checkpoint(epoch, vid=vid) if (epoch >= NUM_CHECKPOINTS): self.delete_checkpoint(epoch - NUM_CHECKPOINTS, vid=vid) print("Validation for epoch:{}".format(epoch)) self.nn.model = self.nn.model.eval() valid_loss = 0.0 i = 0 start_time = time.time() print("Total validation batches = ", len(validLoader)) with torch.no_grad(): for batch in validLoader: x, y = get_batch(dataset, batch) y = torch.tensor(y, device=self.nn.model.device).float() self.nn.init(x[0], x[2]) y_pred, _, _, _, _, _ = self.nn.train(x[1]) y_pred = y_pred[-1] loss = self.loss(y_pred, y) valid_loss += loss i += 1 valid_loss /= i if (valid_loss < best_val): best_val = valid_loss print("Epoch = ", epoch) print("Best validation loss = ", best_val) self.save_checkpoint(epoch, best=True, vid=vid) self.best = epoch # print("Total validation batches = ", i) print("Validation time for {} epoch = {}".format( epoch, time.time() - start_time)) info = {'train_loss': train_loss, 'valid_loss': valid_loss} for tag, value in info.items(): self.writer.scalar_summary(tag, value, epoch + 1)
def writeImagesToFolder(imgs, folder_dir): make_dir(folder_dir) for i, img in enumerate(imgs): pth = join(folder_dir, str(format(i + 1, '08d')) + '.jpg') cv2.imwrite(pth, img)
def visualise_sobels(dataset, idx, m1, m2): out_video = dataset.get_out_video_path(idx) img_dir = join(out_video, "img_tcr") m1_dir = join(out_video, m1) m2_dir = join(out_video, m2) output_dir = join(out_video, "sobels") make_dir(output_dir) out = [] images = int(len(os.listdir(img_dir)) / 3) font = cv2.FONT_HERSHEY_DUPLEX fontScale = 0.5 fontColor = (0, 0, 0) lineType = 1 i = 0 mx = 0.0 idd = 0 best = 98 while (True): print(i) pth = join(m1_dir, str(i) + "_pip_tcr.jpeg") if not os.path.exists(pth): break poster = np.zeros((860, 700, 3)) + 255 img_tcr = cv2.imread(join(m1_dir, str(i) + "_pip_tcr.jpeg")) if (i == best): cv2.imwrite("best/best-img_tcr.jpeg", img_tcr) img_tcr = cv2.resize(img_tcr, (100, 100)) img_i = cv2.imread(join(m1_dir, str(i) + "_pip_i.jpeg")) quad_pure = np.load(join(m1_dir, str(i) + "_quad_pip.npy")) quad_learned = np.load(join(m2_dir, str(i) + "_quad_pip.npy")) quad_id = np.load(join(m1_dir, str(i) + "_quad_pip_id.npy")) quad_gt = np.load(join(m1_dir, str(i) + "_quad_pip_gt.npy")) iou = calc_iou(quad_gt, quad_learned) - calc_iou(quad_gt, quad_pure) if (iou >= mx): print(mx, iou) mx = iou idd = i img_i = draw_bbox(img_i, quad_pure, color=(0, 255, 255), thk=1) img_i = draw_bbox(img_i, quad_learned, color=(0, 255, 0), thk=1) img_i = draw_bbox(img_i, quad_gt, color=(0, 0, 255), thk=1) img_i = draw_bbox(img_i, quad_id, color=(255, 0, 0), thk=1) if (i == best): cv2.imwrite("best-imp.jpeg", img_i) img_i = cv2.resize(img_i, (200, 200)) poster[100:200, 90:190, :] = img_tcr poster[400:600, 40:240, :] = img_i cv2.putText(poster, "Img tcr", (110, 220), font, fontScale, fontColor, lineType) cv2.putText(poster, "Img i", (105, 620), font, fontScale, fontColor, lineType) # cv2.putText(poster, "Learned LKT results", # (65, 820), # font, # fontScale, # fontColor, # lineType) cv2.putText(poster, "Pure LKT", (300, 15), font, fontScale, fontColor, lineType) cv2.putText(poster, "Learned LKT", (410, 15), font, fontScale, fontColor, lineType) cv2.putText(poster, "Learned sobels", (560, 15), font, fontScale, fontColor, lineType) for j in range(3): sx = cv2.imread(join(m1_dir, str(i) + "-sx-" + str(j) + ".jpeg")) if (i == best): cv2.imwrite("best-sx-pure-" + str(j) + ".jpeg", sx) sx = cv2.resize(sx, (100, 100)) poster[140 * j + 30:140 * j + 130, 280:380, :] = sx for j in range(3, 6): sy = cv2.imread( join(m1_dir, str(i) + "-sy-" + str(j - 3) + ".jpeg")) if (i == best): cv2.imwrite("best-sy-pure-" + str(j - 3) + ".jpeg", sy) sy = cv2.resize(sy, (100, 100)) poster[140 * j + 30:140 * j + 130, 280:380, :] = sy sx_ker = np.load(join(m2_dir, str(i) + "-sx.npy")) sy_ker = np.load(join(m2_dir, str(i) + "-sy.npy")) # print(sx_ker.shape, sy_ker.shape) for j in range(3): sx = cv2.imread(join(m2_dir, str(i) + "-sx-" + str(j) + ".jpeg")) if (i == best): cv2.imwrite("best-sx-learned-" + str(j) + ".jpeg", sx) sx = cv2.resize(sx, (100, 100)) poster[140 * j + 30:140 * j + 130, 420:520, :] = sx sx_img = visualise_sobel_kernel(sx_ker[j, 0, :, :]) if (i == best): cv2.imwrite("best-sx-learned-sobel-" + str(j) + ".jpeg", sx_img) sx_img = cv2.resize(sx_img, (100, 100)) poster[140 * j + 30:140 * j + 130, 560:660, :] = sx_img for j in range(3, 6): sy = cv2.imread( join(m2_dir, str(i) + "-sy-" + str(j - 3) + ".jpeg")) if (i == best): cv2.imwrite("best-sy-learned-" + str(j - 3) + ".jpeg", sy) sy = cv2.resize(sy, (100, 100)) poster[140 * j + 30:140 * j + 130, 420:520, :] = sy sy_img = visualise_sobel_kernel(sy_ker[j - 3, 0, :, :]) if (i == best): cv2.imwrite("best-sy-learned-sobel-" + str(j) + ".jpeg", sy_img) sy_img = cv2.resize(sy_img, (100, 100)) poster[140 * j + 30:140 * j + 130, 560:660, :] = sy_img out.append(poster) i += 1 print("Max idd = ", idd) print("Max IOU diff = ", mx) writeImagesToFolder(out, output_dir)
'epsilon' : EPSILON, 'info': "Pure LKT" }) # lr = 0.0005 # momentum = 0.5 nn = PureLKTNet(device, params) tracker = LKTTracker(nn) video_name = "../red_square.mp4" dir_name = "../red_square" outdir_name = "../red_square_results" window_name = "ABC" make_dir(dir_name) make_dir(outdir_name) # convertVideoToDir(video_name, dir_name) frames = readDir(dir_name) first_frame = True cnt = 0 for frame in frames: frame = np.expand_dims(frame, 0) if first_frame: file_pth = join(dir_name, "first_frame.npy") if not (os.path.isfile(file_pth)): # try: cv2.namedWindow(window_name, cv2.WND_PROP_FULLSCREEN) init_rect = cv2.selectROI(window_name, frame[0], False, False)