def main(m_type, m_name, logger, video_path=None, write_output=True): with tf.Session() as sess: # load best model model = load_model(sess, m_type, m_name, logger) # check input source is a file or camera if video_path == None: video_path = 0 # load the video or camera cap = cv2.VideoCapture(video_path) ret = True counter = 0 tic = time.time() frames = [] preds = [] while ret: ret, frame = cap.read() if ret: # Our operations on the frame come here frames.append(frame) frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) f_shape = frame.shape if frame.shape[0] != 192: frame = rescale(frame) image = gray_normalizer(frame) image = change_channel(image, config["input_channel"]) [p] = model.predict(sess, [image]) x, y, w = upscale_preds(p, f_shape) preds.append([x, y, w]) # frames.append(gray) counter += 1 toc = time.time() print("{0:0.2f} FPS".format(counter / (toc - tic))) # get the video size video_size = frames[0].shape[0:2] if write_output: # prepare a video write to show the result video = cv2.VideoWriter("predicted_video.avi", cv2.VideoWriter_fourcc(*"XVID"), 15, (video_size[1], video_size[0])) for i, img in enumerate(frames): labeled_img = annotator((0, 250, 0), img, *preds[i]) video.write(labeled_img) # close the video cv2.destroyAllWindows() video.release() print("Done...")
def swirski_reader(batch_size=64, normalize_image=True): # get trials trials = sorted(glob.glob("data/swirski/*")) # loop over the trials and read the pupil-ellipses.txt files for path in trials: print("reading and predicting {}".format(path)) txt_path = path + "/pupil-ellipses.txt" dataset_name = path.split("/")[2] # loop over lines and read the labels and yield with corresponding images imgs_batch = [] lbls_batch = [] shapes = [] with open(txt_path, mode='r') as f: for line in f: line = line.strip() (img_id, vals) = line.split(" | ") vals = vals.split(" ") x = float(vals[0]) y = float(vals[1]) # create image path img_path = "{0}/frames/{1}-eye.png".format(path, img_id) img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) shapes.append(img.shape) # resize the input to model input size if img.shape != (config["input_height"], config["input_width"]): img = rescale(img) if normalize_image: img = gray_normalizer(img) # expand dimension img = change_channel(img) imgs_batch.append(img) lbls_batch.append([x, y]) if len(imgs_batch) == batch_size: yield imgs_batch, np.asarray(lbls_batch, dtype=np.float32),\ dataset_name, np.asarray(shapes, dtype=np.float32) imgs_batch = [] lbls_batch = [] shapes = [] # yield the rest if len(imgs_batch) > 0: yield imgs_batch, np.asarray(lbls_batch, dtype=np.float32),\ dataset_name, np.asarray(shapes, dtype=np.float32)
def check_trials(): logger = Logger("INC", "Inc_Purifier3", "", config, dir="models/") with tf.Session() as sess: # load best model model = load_model(sess, "INC", "Inc_Purifier3", logger) # print the result for different pixel error pixel_errors = [1, 2, 3, 4, 5, 7, 10, 15, 20] trials_path = sorted(glob.glob("data/Original-data/*/*")) results = [] for i, path in enumerate(trials_path): print("{0:3} reading {1}".format(i, path)) images_path = glob.glob(path + "/*.jpg") images = [] truths = [] img_paths = [] for ii, img_path in enumerate(images_path): _xml_path = img_path.split(".")[0] + ".xml" _xml_path = _xml_path.replace("in.", "gt.") truth = read_xml(_xml_path) img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) img = change_channel(img) img = gray_normalizer(img) images.append(img) truths.append(truth) img_paths.append(img_path) if len(images) == 64 or ii == (len(images_path) - 1): pred = model.predict(sess, images) for iii, p in enumerate(pred): img_id = img_paths[iii] img_id = img_id.split("/")[4] img_id = img_id.split(".")[0] # trial_path, img_id, xt, yt, wt, ht, at, xp, yp, wp, hp, ap result = "{0};{1};{2};{3};{4};{5};{6};{7};{8};{9};{10};{11}\n".format( path, img_id, *truths[iii], *pred[iii]) results.append(result) images = [] truths = [] img_paths = [] open(RESULTS_PATH, mode="w").writelines(results)
def batches(self, ag, lbl_len=4, num_c=1, zero_mean=False): # infinitely do .... while True: # before each epoch, shuffle data shuffle(self.data_list) images = [] labels = [] img_names = [] # for all records in data list for row in self.data_list: # read the image and ground truth image = cv2.imread(row[0], cv2.IMREAD_GRAYSCALE) label = np.asarray(row[1:], dtype=np.float32) # add noise to images and corresponding label if ag is not None: image, label = ag.addNoise(image, label) # discard unused labels label = label[0:lbl_len] labels.append(label) # zero mean the image if zero_mean: image = gray_normalizer(image) # change to desired num_channel image = change_channel(image, num_c) images.append(image) img_names.append(row[0]) if len(images) == self.batch_size: yield images, labels, img_names # empty the list for next yield images = [] labels = [] img_names = [] # just yield reminded data if len(images) > 0: yield images, labels, img_names
def run(model, sess, file_in, information, name_exam): # load the video or camera cap = cv2.VideoCapture(file_in) fps = cap.get(cv2.CAP_PROP_FPS) path_exams = '/media/marcos/Dados/Projects/Results/Qualificacao/DeepRN/' # path_frames = '/media/marcos/Dados/Projects/Results/Qualificacao/DeepRN/Frames/{}'.format(name_exam) # try: # os.mkdir(path_frames) # except FileExistsError: # pass title_label = 'patient,param,frame,center_x,center_y,radius,flash_algorithm,flash_information,color_flash,eye_size,img_mean,img_std,img_median' file_label = '/media/marcos/Dados/Projects/Results/Qualificacao/DeepRN/{}_label.csv'.format(name_exam) add_label(file_label, title_label) file_information = '/media/marcos/Dados/Projects/Datasets/Exams/Information_Exams/{}.log'.format(name_exam) if os.path.exists(file_information): patient_exam, param_exam = information.get_information_exam(file_information, fps) else: patient_exam, param_exam = '', '' counter, frames, preds = 0, [], [] counter = 0 ret, frame = cap.read() while ret: frames.append(frame) yuv = cv2.cvtColor(frame, cv2.COLOR_BGR2YUV) yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, 0]) bgr = cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR) frame = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY) # cv2.imwrite('{}/gray_{}.png'.format(path_frames, counter), frame) f_shape = frame.shape if frame.shape[0] != 192: frame = rescale(frame) image = gray_normalizer(frame) # cv2.imwrite('{}/gray_normalizer_{}.png'.format(path_frames, counter), image) image = change_channel(image, config["input_channel"]) # cv2.imwrite('{}/change_channel_{}.png'.format(path_frames, counter), image) [p] = model.predict(sess, [image]) x, y, w = upscale_preds(p, f_shape) preds.append([x, y, w]) if os.path.exists(file_information): flash_information, color_information = information.get_information_params(counter) else: flash_information, color_information = '', '' inf = '{},{},{},{},{},{},{},{},{},{},{},{},{}'.format(patient_exam, param_exam, counter, x, y, w, 0, flash_information, color_information, 0, image.mean(), image.std(), np.median(image)) add_label(file_label, inf) ret, frame = cap.read() counter += 1 if len(frames) > 0: video_size = frames[0].shape[0:2] fps = cap.get(cv2.CAP_PROP_FPS) video = cv2.VideoWriter('{}/{}.avi'.format(path_exams, name_exam), cv2.VideoWriter_fourcc(*"XVID"), fps, (video_size[1], video_size[0])) for i, img in enumerate(frames): # labeled_img = annotator((0, 250, 0), img, *preds[i]) x, y, w = preds[i] color = (0, 250, 0) labeled_img = cv2.circle(img, (int(x), int(y)), int(w/2), color, 2) video.write(labeled_img) video.release() print("Done {}...".format(name_exam))
def test(args): # Setup Model model_name = args.arch_RGB model = get_model(model_name, True) # vgg_16 testset_out_default_path = "./result/" testset_out_path = args.result_path if args.imgset: test_info = args.test_dataset + '_' + args.test_split else: if args.img_datalist == '': # Single image test_info = 'single' else: # Image list test_info = 'batch' if args.model_full_name != '': # Use the full name of model to load print("Load training model: " + args.model_full_name) checkpoint = torch.load( pjoin(args.model_savepath, args.model_full_name)) if testset_out_path == '': testset_out_path = "{}{}_{}".format(testset_out_default_path, args.model_full_name, test_info) model = torch.nn.DataParallel(model, device_ids=range( torch.cuda.device_count())) model.load_state_dict(checkpoint['model_RGB_state']) else: # Pretrain model print("Load pretrained model: {}".format(args.state_name)) state = get_premodel(model, args.state_name) model = torch.nn.DataParallel(model, device_ids=range( torch.cuda.device_count())) model.load_state_dict(state) if testset_out_path == '': testset_out_path = "{}pretrain_{}".format(testset_out_default_path, test_info) # Setup image # Create output folder if needed # if os.path.isdir(testset_out_path) == False: # os.mkdir(testset_out_path) if args.imgset: print("Test on dataset: {}".format(args.test_dataset)) # Set up dataloader data_loader = get_loader(args.test_dataset) data_path = get_data_path(args.test_dataset) v_loader = data_loader(data_path, split=args.test_split, img_size=(args.img_rows, args.img_cols), img_norm=args.img_norm) evalloader = data.DataLoader(v_loader, batch_size=1) print("Finish Loader Setup") model.cuda() model.eval() sum_mean, sum_median, sum_small, sum_mid, sum_large = 0, 0, 0, 0, 0 evalcount = 0 if args.numerical_result_path != '': f = open(args.numerical_result_path, 'w') with torch.no_grad(): # for i_val, (images_val, labels_val, masks_val, valids_val) in tqdm(enumerate(evalloader)): for i_val, (images_val, labels_val, masks_val, valids_val, depthes_val, meshdepthes_val) in tqdm(enumerate(evalloader)): images_val = Variable(images_val.contiguous().cuda()) labels_val = Variable(labels_val.contiguous().cuda()) masks_val = Variable(masks_val.contiguous().cuda()) if args.arch_RGB == 'ms': outputs, outputs2, outputs3, outputs4, outputs5 = model( images_val) else: outputs = model(images_val) # 1*ch*h*w outputs_n, mean_i, median_i, small_i, mid_i, large_i = eval_normal( outputs, labels_val, masks_val) outputs_norm = np.squeeze(outputs_n.data.cpu().numpy(), axis=0) labels_val_norm = np.squeeze(labels_val.data.cpu().numpy(), axis=0) images_val = np.squeeze(images_val.data.cpu().numpy(), axis=0) images_val = images_val + 0.5 images_val = images_val.transpose(1, 2, 0) outputs_norm = change_channel(outputs_norm) # outputs_norm= temp_change_mlt_chanel(outputs_norm) labels_val_norm = (labels_val_norm + 1) / 2 # scale to 0 1 # Change channel to have a better appearance for paper. labels_val_norm = change_channel(labels_val_norm) misc.imsave( pjoin(testset_out_path, "{}_out.png".format(i_val + 1)), outputs_norm) misc.imsave( pjoin(testset_out_path, "{}_gt.png".format(i_val + 1)), labels_val_norm) misc.imsave( pjoin(testset_out_path, "{}_in.jpg".format(i_val + 1)), images_val) if ((np.isnan(mean_i)) | (np.isinf(mean_i))): print('Error!') sum_mean += 0 else: sum_mean += mean_i sum_median += median_i sum_small += small_i sum_mid += mid_i sum_large += large_i evalcount += 1 if (i_val + 1) % 1 == 0: print( "Iteration %d Evaluation Loss: mean %.4f, median %.4f, 11.25 %.4f, 22.5 %.4f, 30 %.4f" % (i_val + 1, mean_i, median_i, small_i, mid_i, large_i)) if (args.numerical_result_path != ''): f.write( "Iteration %d Evaluation Loss: mean %.4f, median %.4f, 11.25 %.4f, 22.5 %.4f, 30 %.4f\n" % (i_val + 1, mean_i, median_i, small_i, mid_i, large_i)) sum_mean = sum_mean / evalcount sum_median = sum_median / evalcount sum_small = sum_small / evalcount sum_mid = sum_mid / evalcount sum_large = sum_large / evalcount if args.numerical_result_path != '': f.write( "evalnum is %d, Evaluation Mean Loss: mean %.4f, median %.4f, 11.25 %.4f, 22.5 %.4f, 30 %.4f" % (evalcount, sum_mean, sum_median, sum_small, sum_mid, sum_large)) f.close() print( "evalnum is %d, Evaluation Mean Loss: mean %.4f, median %.4f, 11.25 %.4f, 22.5 %.4f, 30 %.4f" % (evalcount, sum_mean, sum_median, sum_small, sum_mid, sum_large)) # end of dataset test else: if args.img_datalist == "": # For single image, without GT print("Read Input Image from : {}".format(args.img_path)) img = misc.imread(args.img_path) if args.img_rot: img = np.transpose(img, (1, 0, 2)) img = np.flipud(img) orig_size = img.shape[:-1] img = misc.imresize( img, (args.img_rows, args.img_cols)) # Need resize the image to model inputsize img = img.astype(np.float) if args.img_norm: img = (img - 128) / 255 # NHWC -> NCHW img = img.transpose(2, 0, 1) img = np.expand_dims(img, 0) img = torch.from_numpy(img).float() if torch.cuda.is_available(): model.cuda(0) images = Variable(img.contiguous().cuda(0)) else: images = Variable(img) with torch.no_grad(): outputs = model(images) outputs_norm = norm_imsave(outputs) outputs_norm = np.squeeze(outputs_norm.data.cpu().numpy(), axis=0) # Change channels outputs_norm = change_channel(outputs_norm) misc.imsave(args.out_path, outputs_norm) print("Complete") # end of test on single image else: # For image list without GT data_list = get_dataList(args.img_datalist) for img_path in data_list: print("Read Input Image from : {}".format(img_path)) img = misc.imread(pjoin(args.img_dataroot, img_path)) height, width, channels = img.shape output_filename = img_path.split('/')[-1] if args.img_rot: img = np.transpose(img, (1, 0, 2)) img = np.flipud(img) orig_size = img.shape[:-1] img = misc.imresize(img, ( args.img_rows, args.img_cols)) # Need resize the image to model inputsize img = img.astype(np.float) if args.img_norm: img = (img - 128) / 255 # NHWC -> NCHW img = img.transpose(2, 0, 1) img = np.expand_dims(img, 0) img = torch.from_numpy(img).float() if torch.cuda.is_available(): model.cuda(0) images = Variable(img.contiguous().cuda(0)) else: images = Variable(img) with torch.no_grad(): outputs = model(images) outputs_norm = norm_imsave(outputs) outputs_norm = np.squeeze(outputs_norm.data.cpu().numpy(), axis=0) # Change channels outputs_norm = change_channel(outputs_norm) # Resize to the original size, if needed # outputs_norm = misc.imresize(outputs_norm, (height, width)) # Save the result misc.imsave(pjoin(args.out_path, output_filename), outputs_norm) # Save to mat file, if needed # outputs_mat = outputs.tolist() # mat_filename = output_filename.replace(output_filename.split('.')[-1], 'mat') # sio.savemat(pjoin(testset_out_path, mat_filename), {'normal': outputs_mat}); print("Complete") pass
def main_images(m_type, m_name, logger, data_path=None, actors=[], write_output=True): from tensorflow.compat.v1 import ConfigProto from tensorflow.compat.v1 import InteractiveSession # this thing fixes something _config = ConfigProto() _config.gpu_options.allow_growth = True session = InteractiveSession(config=_config) # --- with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: # load best model model = load_model(sess, m_type, m_name, logger) eye_info = HierarchicalDict(path=data_path + '/eye_data.json') previos_segment = '' current_segment = '' for data_element in tqdm(data_generator(data_path=data_path, actors=actors), ascii=True): # for data_element in data_generator(data_path=data_path, actors=actors): keys = data_element.keys image = data_element.item current_segment = keys[:-1] if current_segment != previos_segment: last_ars = [deque(maxlen=25) for _ in range(2)] if not eye_info.check_key(keys): continue rois_coords = eye_info[keys]['roi'] contours = eye_info[keys]['cnt'] aspect_ratios = eye_info[keys]['ars'] eye1 = np.array(contours[0]).reshape((-1, 1, 2)) eye2 = np.array(contours[1]).reshape((-1, 1, 2)) eyes = [eye1, eye2] # create empty mask to draw on result = np.zeros(image.shape, np.uint8) for i, (x1, x2, y1, y2) in enumerate(rois_coords): prev_aspect_ratio = sum(last_ars[i]) / len(last_ars[i]) if len(last_ars[i]) else 0.35 current_aspect_ratio = aspect_ratios[i] if current_aspect_ratio < 0.6 * prev_aspect_ratio: continue last_ars[i].append(current_aspect_ratio) roi = image[y1 : y2, x1 : x2] # get the original eye region # preprocessing for the pupil detection model roi_gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY) shape = roi_gray.shape if roi_gray.shape[0] != 192: roi_gray = rescale(roi_gray) roi_gray = gray_normalizer(roi_gray) roi_gray = change_channel(roi_gray, config["input_channel"]) # --- [p] = model.predict(sess, [roi_gray]) x, y, w = upscale_preds(p, shape) x, y, w = [int(item) for item in (x, y, w)] # draw the circle indicating a pupil if cv2.pointPolygonTest(eyes[i], (x + x1, y + y1), False) != -1: cv2.drawContours(result, [eyes[i]], 0, (255, 255, 255), -1) roi = result[y1 : y2, x1 : x2] cv2.circle(roi, (x, y), 9, (0, 0, 255), -1) result[y1 : y2, x1 : x2] = roi # --- previos_segment = current_segment if write_output: actor, domain, segment, idx = keys path = os.path.dirname(os.path.abspath(f'{data_path}/{actor}/{domain}/{segment}')) path = path + '/' + segment + '/original_renders/eye_regions/' if not os.path.exists(path): os.mkdir(path) cv2.imwrite(f'{path}{idx}.jpg', cv2.cvtColor(result, cv2.COLOR_BGR2RGB)) print("Done.")
def test(args): # Setup Model # Setup the fusion model (RGB+Depth) model_name_F = args.arch_F model_F = get_model(model_name_F, True) # concat and output model_F = torch.nn.DataParallel(model_F, device_ids=range( torch.cuda.device_count())) # Setup the map model if args.arch_map == 'map_conv': model_name_map = args.arch_map model_map = get_model(model_name_map, True) # concat and output model_map = torch.nn.DataParallel(model_map, device_ids=range( torch.cuda.device_count())) if args.model_full_name != '': # Use the full name of model to load print("Load training model: " + args.model_full_name) checkpoint = torch.load( pjoin(args.model_savepath, args.model_full_name)) model_F.load_state_dict(checkpoint['model_F_state']) model_map.load_state_dict(checkpoint["model_map_state"]) # Setup image if args.imgset: print("Test on dataset: {}".format(args.dataset)) data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) v_loader = data_loader(data_path, split=args.test_split, img_size=(args.img_rows, args.img_cols), img_norm=args.img_norm) evalloader = data.DataLoader(v_loader, batch_size=1) print("Finish Loader Setup") model_F.cuda() model_F.eval() if args.arch_map == 'map_conv': model_map.cuda() model_map.eval() sum_mean, sum_median, sum_small, sum_mid, sum_large, sum_num = [], [], [], [], [], [] evalcount = 0 with torch.no_grad(): for i_val, (images_val, labels_val, masks_val, valids_val, depthes_val, meshdepthes_val) in tqdm(enumerate(evalloader)): images_val = Variable(images_val.contiguous().cuda()) labels_val = Variable(labels_val.contiguous().cuda()) masks_val = Variable(masks_val.contiguous().cuda()) valids_val = Variable(valids_val.contiguous().cuda()) depthes_val = Variable(depthes_val.contiguous().cuda()) if args.arch_map == 'map_conv': outputs_valid = model_map( torch.cat( (depthes_val, valids_val[:, np.newaxis, :, :]), dim=1)) outputs, outputs1, outputs2, outputs3, output_d = model_F( images_val, depthes_val, outputs_valid.squeeze(1)) else: outputs, outputs1, outputs2, outputs3, output_d = model_F( images_val, depthes_val, valids_val) outputs_n, pixelnum, mean_i, median_i, small_i, mid_i, large_i = eval_normal_pixel( outputs, labels_val, masks_val) outputs_norm = np.squeeze(outputs_n.data.cpu().numpy(), axis=0) labels_val_norm = np.squeeze(labels_val.data.cpu().numpy(), axis=0) images_val = np.squeeze(images_val.data.cpu().numpy(), axis=0) images_val = images_val + 0.5 images_val = images_val.transpose(1, 2, 0) depthes_val = np.squeeze(depthes_val.data.cpu().numpy(), axis=0) depthes_val = np.transpose(depthes_val, [1, 2, 0]) depthes_val = np.repeat(depthes_val, 3, axis=2) outputs_norm = change_channel(outputs_norm) labels_val_norm = (labels_val_norm + 1) / 2 labels_val_norm = change_channel(labels_val_norm) # if (i_val+1)%10 == 0: misc.imsave( pjoin(args.testset_out_path, "{}_MS_hyb.png".format(i_val + 1)), outputs_norm) misc.imsave( pjoin(args.testset_out_path, "{}_gt.png".format(i_val + 1)), labels_val_norm) misc.imsave( pjoin(args.testset_out_path, "{}_in.jpg".format(i_val + 1)), images_val) misc.imsave( pjoin(args.testset_out_path, "{}_depth.png".format(i_val + 1)), depthes_val) # accumulate the metrics in matrix if ((np.isnan(mean_i)) | (np.isinf(mean_i)) == False): sum_mean.append(mean_i) sum_median.append(median_i) sum_small.append(small_i) sum_mid.append(mid_i) sum_large.append(large_i) sum_num.append(pixelnum) evalcount += 1 if (i_val + 1) % 10 == 0: print( "Iteration %d Evaluation Loss: mean %.4f, median %.4f, 11.25 %.4f, 22.5 %.4f, 30 %.4f" % (i_val + 1, mean_i, median_i, small_i, mid_i, large_i)) # Summarize the result eval_print(sum_mean, sum_median, sum_small, sum_mid, sum_large, sum_num, item='Pixel-Level') avg_mean = sum(sum_mean) / evalcount sum_mean.append(avg_mean) avg_median = sum(sum_median) / evalcount sum_median.append(avg_median) avg_small = sum(sum_small) / evalcount sum_small.append(avg_small) avg_mid = sum(sum_mid) / evalcount sum_mid.append(avg_mid) avg_large = sum(sum_large) / evalcount sum_large.append(avg_large) print( "evalnum is %d, Evaluation Image-Level Mean Loss: mean %.4f, median %.4f, 11.25 %.4f, 22.5 %.4f, 30 %.4f" % (evalcount, avg_mean, avg_median, avg_small, avg_mid, avg_large)) sum_matrix = np.transpose( [sum_mean, sum_median, sum_small, sum_mid, sum_large]) if args.model_full_name != '': sum_file = args.model_full_name[:-4] + '.csv' np.savetxt(pjoin(args.model_savepath, sum_file), sum_matrix, fmt='%.6f', delimiter=',') print("Saving to %s" % (sum_file)) # end of dataset test else: if os.path.isdir(args.out_path) == False: os.mkdir(args.out_path) print("Read Input Image from : {}".format(args.img_path)) for i in os.listdir(args.img_path): if not i.endswith('.jpg'): continue print i input_f = args.img_path + i depth_f = args.depth_path + i[:-4] + '.png' output_f = args.out_path + i[:-4] + '_rgbd.png' img = misc.imread(input_f) orig_size = img.shape[:-1] if args.img_rot: img = np.transpose(img, (1, 0, 2)) img = np.flipud(img) img = misc.imresize(img, ( args.img_cols, args.img_rows)) # Need resize the image to model inputsize else: img = misc.imresize(img, ( args.img_rows, args.img_cols)) # Need resize the image to model inputsize img = img.astype(np.float) if args.img_norm: img = (img - 128) / 255 # NHWC -> NCHW img = img.transpose(2, 0, 1) img = np.expand_dims(img, 0) img = torch.from_numpy(img).float() if args.img_rot: depth = png_reader_32bit(depth_f, (args.img_rows, args.img_cols)) depth = np.transpose(depth, (1, 0)) depth = np.flipud(depth) # valid = png_reader_uint8(mask_f, (args.img_rows,args.img_cols)) # valid = np.transpose(valid, (1,0)) # valid = np.flipud(valid) else: depth = png_reader_32bit(depth_f, (args.img_rows, args.img_cols)) # valid = png_reader_uint8(mask_f, (args.img_rows,args.img_cols)) depth = depth.astype(float) # Please change to the scale so that scaled_depth=1 corresponding to real 10m depth # matterpot depth=depth/40000 scannet depth=depth/10000 depth = depth / (args.d_scale) if depth.ndim == 3: # to dim 2 depth = depth[:, :, 0] # if valid.ndim == 3: #to dim 2 # valid = valid[:,:,0] # valid = 1-depth # valid[valid>1] = 1 valid = (depth > 0.0001).astype(float) # valid = depth.astype(float) depth = depth[np.newaxis, :, :] depth = np.expand_dims(depth, 0) valid = np.expand_dims(valid, 0) depth = torch.from_numpy(depth).float() valid = torch.from_numpy(valid).float() if torch.cuda.is_available(): model_F.cuda() model_F.eval() if args.arch_map == 'map_conv': model_map.cuda() model_map.eval() images = Variable(img.contiguous().cuda()) depth = Variable(depth.contiguous().cuda()) valid = Variable(valid.contiguous().cuda()) else: images = Variable(img) depth = Variable(depth) valid = Variable(valid) with torch.no_grad(): if args.arch_map == 'map_conv': outputs_valid = model_map( torch.cat((depth, valid[:, np.newaxis, :, :]), dim=1)) outputs, outputs1, outputs2, outputs3, output_d = model_F( images, depth, outputs_valid.squeeze(1)) else: outputs, outputs1, outputs2, outputs3, output_d = model_F( images, depth, outputs_valid) outputs_norm = norm_imsave(outputs) outputs_norm = np.squeeze(outputs_norm.data.cpu().numpy(), axis=0) # outputs_norm = misc.imresize(outputs_norm, orig_size) outputs_norm = change_channel(outputs_norm) misc.imsave(output_f, outputs_norm) print("Complete")
def lpw_reader(batch_size=64, normalize_image=True): """ read LPW dataset. Yield: images, labels pairs + trial name (for naming porpuse) :return: """ LPW_subject = glob.glob('data/LPW/*') LPW_subject = sorted(LPW_subject) # get all trial path trials_path = [] for subj in LPW_subject: # get the video files avi_paths = glob.glob(subj + "/*.avi") trials = [p.split(".")[0] for p in avi_paths] trials_path.extend(sorted(trials)) # loop over all trials and yield img + lbls for trial in trials_path: print("reading and predicting {}...".format(trial)) avi_path = trial + ".avi" txt_path = trial + ".txt" f = open(txt_path, mode="r") cap = cv2.VideoCapture(avi_path) ret = True img_batch = [] lbl_batch = [] shapes = [] while ret: ret, frame = cap.read() if ret: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) line = f.readline().strip() vals = line.split(" ") shapes.append(frame.shape) x = float(vals[0]) y = float(vals[1]) if frame.shape != (config["input_height"], config["input_width"]): img = rescale(frame) if normalize_image: img = gray_normalizer(img) img = change_channel(img) img_batch.append(img) lbl_batch.append([x, y]) if len(img_batch) == batch_size: yield img_batch,\ np.asarray(lbl_batch, dtype=np.float32),\ trial,\ np.asarray(shapes, dtype=np.float32) img_batch = [] lbl_batch = [] shapes = [] # yield the rest if len(img_batch) > 0: yield img_batch, np.asarray(lbl_batch, dtype=np.float32), trial, np.asarray( shapes, dtype=np.float32) # close file f.close() # close cv2.cap cap.release() cv2.destroyAllWindows()