def extract_images_from_video_opencv(video_file, save_dir, debug=True): ''' extract a list of images from a video file using opencv package Note that if the VideoCapture does not work, uninstall python-opencv and reinstall the newest version Parameters: video_file: a file path to a video file save_dir: a folder to save the images extracted from the video debug: boolean, debug mode to check format Returns: ''' if debug: assert is_path_exists(video_file), 'the input video file does not exist' mkdir_if_missing(save_dir) cap = cv2.VideoCapture(video_file) frame_id = 0 while(True): ret, frame = cap.read() if not ret: break save_path = os.path.join(save_dir, 'image%05d.png' % frame_id) visualize_image(frame, bgr2rgb=True, save_path=save_path) frame_id += 1 print('processing frame %d' % frame_id) cap.release() return
def extract_images_from_video_ffmpeg(video_file, save_dir, format='frame%06d.png', startnum=0, verbose=True, debug=True): ''' extract a list of images from a video file using system ffmpeg library Parameters: video_file: a file path to a video file save_dir: a folder to save the images extracted from the video format: string format for the extracted output images verbose: boolean, display logging information debug: boolean, debug mode to check format Returns: ''' if debug: assert is_path_exists( video_file), 'the input video file does not exist' mkdir_if_missing(save_dir) if verbose: command = 'ffmpeg -i %s -start_number %d %s/%s' % ( video_file, startnum, save_dir, format) else: command = 'ffmpeg -loglevel panic -i %s -start_number %d %s/%s' % ( video_file, startnum, save_dir, format) os.system(command) return
def conf_thresholding(data_dir, save_dir, score_threshold): # collect all trajectories tracker_id_score = dict() eval_dir = os.path.join(data_dir, 'data') seq_list, num_seq = load_list_from_folder(eval_dir) for seq_file in seq_list: seq_data, num_line = load_txt_file(seq_file) for data_line in seq_data: data_split = data_line.split(' ') score_tmp = float(data_split[-1]) id_tmp = int(data_split[1]) if id_tmp not in tracker_id_score.keys(): tracker_id_score[id_tmp] = list() tracker_id_score[id_tmp].append(score_tmp) # collect the ID to remove based on the confidence to_delete_id = list() for track_id, score_list in tracker_id_score.items(): average_score = sum(score_list) / float(len(score_list)) if average_score < score_threshold: to_delete_id.append(track_id) # remove the ID in the data folder for tracking evaluation save_dir_tmp = os.path.join(save_dir, 'data') mkdir_if_missing(save_dir_tmp) for seq_file in seq_list: seq_name = fileparts(seq_file)[1] seq_file_save = os.path.join(save_dir_tmp, seq_name + '.txt') seq_file_save = open(seq_file_save, 'w') seq_data, num_line = load_txt_file(seq_file) for data_line in seq_data: data_split = data_line.split(' ') id_tmp = int(float(data_split[1])) if id_tmp not in to_delete_id: seq_file_save.write(data_line + '\n') seq_file_save.close() # remove the ID in the trk with id folder for detection evaluation and tracking visualization trk_id_dir = os.path.join(data_dir, 'trk_withid') seq_dir_list, num_seq = load_list_from_folder(trk_id_dir) save_dir_tmp = os.path.join(save_dir, 'trk_withid') for seq_dir in seq_dir_list: frame_list, num_frame = load_list_from_folder(seq_dir) seq_name = fileparts(seq_dir)[1] save_frame_dir = os.path.join(save_dir_tmp, seq_name) mkdir_if_missing(save_frame_dir) for frame in frame_list: frame_index = fileparts(frame)[1] frame_file_save = os.path.join(save_frame_dir, frame_index + '.txt') frame_file_save = open(frame_file_save, 'w') frame_data, num_line = load_txt_file(frame) for data_line in frame_data: data_split = data_line.split(' ') id_tmp = int(data_split[-1]) if id_tmp not in to_delete_id: frame_file_save.write(data_line + '\n') frame_file_save.close()
def generate_video_from_list(image_list, save_path, framerate=30, downsample=1, warning=True, debug=True): ''' create video from a list of images with a framerate note that: the height and widht of the images should be a multiple of 2 parameters: image_list: a list of image path save_path: the path to save the video file framerate: fps ''' if debug: assert islistofstring(image_list), 'the input is not correct' assert ispositiveinteger(framerate), 'the framerate is a positive integer' mkdir_if_missing(save_path) inputdict = {'-r': str(framerate)} outputdict = {'-r': str(framerate), '-crf': '18', '-vcodec': 'libx264', '-profile:V': 'high', '-pix_fmt': 'yuv420p'} video_writer = FFmpegWriter(save_path, inputdict=inputdict, outputdict=outputdict) count = 1 num_images = len(image_list) for image_path in image_list: print('processing frame %d/%d' % (count, num_images)) image = load_image(image_path, resize_factor=downsample, warning=warning, debug=debug) # make sure the height and width are multiple of 2 height, width = image.shape[0], image.shape[1] if not (height % 2 == 0 and width % 2 == 0): height += height % 2 width += width % 2 image = image_resize(image, target_size=[height, width], warning=warning, debug=debug) video_writer.writeFrame(image) count += 1 video_writer.close()
def extract_images_from_video_ffmpeg(video_file, save_dir, format='frame%06d.png', debug=True): ''' loading the video using the ffmpeg ''' if debug: assert is_path_exists(video_file), 'the input video file does not exist' mkdir_if_missing(save_dir) command = 'ffmpeg -i %s %s/%s' % (video_file, save_dir, format) os.system(command)
def vis(result_sha, data_root, result_root): def show_image_with_boxes(img, objects_res, object_gt, calib, save_path, height_threshold=0): img2 = np.copy(img) for obj in objects_res: box3d_pts_2d, _ = compute_box_3d(obj, calib.P) color_tmp = tuple([int(tmp * 255) for tmp in colors[obj.id % max_color]]) img2 = draw_projected_box3d(img2, box3d_pts_2d, color=color_tmp) text = 'ID: %d' % obj.id if box3d_pts_2d is not None: img2 = cv2.putText(img2, text, (int(box3d_pts_2d[4, 0]), int(box3d_pts_2d[4, 1]) - 8), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color=color_tmp) img = Image.fromarray(img2) img = img.resize((width, height)) img.save(save_path) for seq in seq_list: image_dir = os.path.join(data_root, 'image_02/%s' % seq) calib_file = os.path.join(data_root, 'calib/%s.txt' % seq) result_dir = os.path.join(result_root, '%s/trk_withid/%s' % (result_sha, seq)) save_3d_bbox_dir = os.path.join(result_dir, '../../trk_image_vis/%s' % seq); mkdir_if_missing(save_3d_bbox_dir) # load the list images_list, num_images = load_list_from_folder(image_dir) print('number of images to visualize is %d' % num_images) start_count = 0 for count in range(start_count, num_images): image_tmp = images_list[count] if not is_path_exists(image_tmp): count += 1 continue image_index = int(fileparts(image_tmp)[1]) image_tmp = np.array(Image.open(image_tmp)) img_height, img_width, img_channel = image_tmp.shape result_tmp = os.path.join(result_dir, '%06d.txt'%image_index) # load the result if not is_path_exists(result_tmp): object_res = [] else: object_res = read_label(result_tmp) print('processing index: %d, %d/%d, results from %s' % (image_index, count+1, num_images, result_tmp)) calib_tmp = Calibration(calib_file) # load the calibration object_res_filtered = [] for object_tmp in object_res: if object_tmp.type not in type_whitelist: continue if hasattr(object_tmp, 'score'): if object_tmp.score < score_threshold: continue center = object_tmp.t object_res_filtered.append(object_tmp) num_instances = len(object_res_filtered) save_image_with_3dbbox_gt_path = os.path.join(save_3d_bbox_dir, '%06d.jpg' % (image_index)) show_image_with_boxes(image_tmp, object_res_filtered, [], calib_tmp, save_path=save_image_with_3dbbox_gt_path) print('number of objects to plot is %d' % (num_instances)) count += 1
def extract_images_from_video_opencv(video_file, save_dir, debug=True): ''' if the VideoCapture does not work, uninstall python-opencv and reinstall the newest version ''' if debug: assert is_path_exists(video_file), 'the input video file does not exist' mkdir_if_missing(save_dir) cap = cv2.VideoCapture(video_file) frame_id = 0 while(True): ret, frame = cap.read() if not ret: break save_path = os.path.join(save_dir, 'image%05d.png' % frame_id) visualize_image(frame, bgr2rgb=True, save_path=save_path) frame_id += 1 print('processing frame %d' % frame_id) cap.release()
def save_vis_close_helper(fig=None, ax=None, vis=False, save_path=None, warning=True, debug=True, transparent=True, closefig=True): # save and visualization if save_path is not None: if debug: mkdir_if_missing(save_path, warning=warning, debug=debug) fig.savefig(save_path, dpi=dpi, transparent=transparent) if vis: plt.show() if closefig: plt.close(fig) return None, None else: return fig, ax
for frame in frame_list: frame_index = fileparts(frame)[1] frame_file_save = os.path.join(save_frame_dir, frame_index+'.txt'); frame_file_save = open(frame_file_save, 'w') frame_data, num_line = load_txt_file(frame) for data_line in frame_data: data_split = data_line.split(' ') id_tmp = int(data_split[-1]) if id_tmp not in to_delete_id: frame_file_save.write(data_line + '\n') frame_file_save.close() if __name__ == '__main__': if len(sys.argv) != 2: print('Usage: python trk_conf_threshold.py result_sha(e.g., pointrcnn_Car_test_thres)') sys.exit(1) result_sha = sys.argv[1] cat = result_sha.split('_')[1] if cat == 'Car': score_threshold = 2.917300 elif cat == 'Pedestrian': score_threshold = 2.070726 else: assert False, 'error' root_dir = './results' data_dir = os.path.join(root_dir, result_sha) save_dir = os.path.join(root_dir, result_sha+'_thres'); mkdir_if_missing(save_dir) conf_thresholding(data_dir, save_dir, score_threshold)
import cv2, numpy as np, os from xinshuo_io import mkdir_if_missing, load_image, load_list_from_folder, save_image, fileparts from xinshuo_images import rgb2gray from flow_vis import flow_to_color data_dir = '/media/xinshuo/Data/Datasets/LRD/LRW/' images_dir = os.path.join(data_dir, 'centered122_rgb_images/ABOUT/train/ABOUT_00001') save_dir = os.path.join(data_dir, 'centered122_flow/ABOUT/train/ABOUT_00001'); mkdir_if_missing(save_dir) image_list, num_images = load_list_from_folder(images_dir) print('number of images loaded is %d' % num_images) test = cv2.imread(image_list[0]) # print(test.dtype) test = cv2.cvtColor(test, cv2.COLOR_BGR2GRAY) # print(test.dtype) # print(test) # zxc frame1 = load_image(image_list[0]) prvs = rgb2gray(frame1) # uint8 # print(prvs) hsv = np.zeros_like(frame1) hsv[..., 1] = 255 image_index = 1
data_all += data # sort based on frame number data_all.sort(key=lambda x: int(x.split(' ')[0])) save_txt_file(data_all, save_path) if __name__ == '__main__': root_dir = './results' split = 'test' seq_list = ['%04d' % tmp for tmp in range(0, 29)] method = 'pointrcnn' cat_list = ['Car', 'Pedestrian', 'Cyclist'] subset = ['%s_%s_%s_thres' % (method, cat, split) for cat in cat_list] # save path save_dir = os.path.join(root_dir, '%s_%s_thres' % (method, split), 'data') mkdir_if_missing(save_dir) # merge for seq_tmp in seq_list: file_list_tmp = list() for subset_tmp in subset: file_tmp = os.path.join(root_dir, subset_tmp, 'data', seq_tmp + '.txt') file_list_tmp.append(file_tmp) save_path_tmp = os.path.join(save_dir, seq_tmp + '.txt') combine_files(file_list_tmp, save_path_tmp)
parser.add_argument('--train', action='store_true', help='training mode') parser.add_argument('--val', action='store_true', help='validation mode') parser.add_argument('--test', action='store_true', help='testing mode') parser.add_argument('--vis', action='store_true', help='visualization mode') parser.add_argument('--seed', type=int, default=0, help='random seed') args = parser.parse_args() torch.backends.cudnn.benchmark = True prepare_seed(args.seed) print("Loading options...") with open('options.toml', 'r') as optionsFile: options = toml.loads(optionsFile.read()) args.save_dir = os.path.join(options["general"]["modelsavedir"], args.modelname + '_' + get_timestring()) mkdir_if_missing(args.save_dir) args.dataset = options["general"]["dataset"] args.logfile = os.path.join(args.save_dir, 'log.txt') args.logfile = open(args.logfile, 'w') # print_log(options, args.logfile) print_log(args, args.logfile) print_log('\n\nsaving to %s' % args.save_dir, log=args.logfile) print_log('creating the model\n\n', log=args.logfile) if args.modelname == 'C3D_CONV_BLSTM': model = C3D_CONV_BLSTM(args) elif args.modelname == 'C3D_CONV_BLSTM_frontfix': model = C3D_CONV_BLSTM(args) elif args.modelname == 'C3D_CONV_CONV': model = C3D_CONV_CONV(args) elif args.modelname == 'I3D_BLSTM': model = I3D_BLSTM() elif args.modelname == 'I3D': model = I3D() elif args.modelname == 'I3D_BLSTM_mini': model = I3D_BLSTM_mini() elif args.modelname == 'I3D_BLSTM_mini2': model = I3D_BLSTM_mini2()
from __future__ import print_function import torch, toml, os from models import LipRead from training import Trainer from validation import Validator from xinshuo_miscellaneous import get_timestring, print_log from xinshuo_io import mkdir_if_missing print("Loading options...") with open('options.toml', 'r') as optionsFile: options = toml.loads(optionsFile.read()) if options["general"]["usecudnnbenchmark"] and options["general"]["usecudnn"]: torch.backends.cudnn.benchmark = True options["general"]["modelsavedir"] = os.path.join( options["general"]["modelsavedir"], 'trained_model_' + get_timestring()) mkdir_if_missing(options["general"]["modelsavedir"]) options["general"]["logfile"] = open( os.path.join(options["general"]["modelsavedir"], 'log.txt'), 'w') print_log('saving to %s' % options["general"]["modelsavedir"], log=options["general"]["logfile"]) print_log('creating the model', log=options["general"]["logfile"]) model = LipRead(options) print_log('loading model', log=options["general"]["logfile"]) if options["general"]["loadpretrainedmodel"]: print_log('loading the pretrained model at %s' % options["general"]["pretrainedmodelpath"], log=options["general"]["logfile"]) model.load_state_dict(torch.load(
return ade, ade_l, ade_nl def cal_fde(pred_traj_gt, pred_traj_fake, linear_ped, non_linear_ped, loss_mask): # pred_traj_gt frames x obj x 2 # select the right last timestamp for FDE computation, i.e., not select the last frame if masked out pred_traj_last = [] gt_traj_last = [] num_objects = pred_traj_gt.size(1) for obj_tmp in range(num_objects): loss_mask_tmp = loss_mask[obj_tmp] # seq_len good_index = torch.nonzero(loss_mask_tmp) if torch.nonzero(loss_mask_tmp).size(0) == 0: pred_traj_last.append(torch.zeros(2).cuda()) gt_traj_last.append(torch.zeros(2).cuda()) else: last_index = torch.max(good_index) pred_traj_last.append(pred_traj_fake[last_index, obj_tmp, :]) gt_traj_last.append(pred_traj_gt[last_index, obj_tmp, :]) gt_traj_last = torch.stack(gt_traj_last, dim=0) # num_obj x 2 pred_traj_last = torch.stack(pred_traj_last, dim=0) # num_obj x 2 fde = final_displacement_error(pred_traj_last, gt_traj_last) fde_l = final_displacement_error(pred_traj_last, gt_traj_last, linear_ped) fde_nl = final_displacement_error(pred_traj_last, gt_traj_last, non_linear_ped) return fde, fde_l, fde_nl if __name__ == '__main__': args = parser.parse_args() mkdir_if_missing(args.output_dir) main(args)
def vis(cam_transform, data_root, result_root): def show_image_with_boxes(img, objects_res, object_gt, calib, save_path, height_threshold=0): img2 = np.copy(img) for obj in objects_res: box3d_pts_2d, _ = compute_box_3d(obj, calib.P) color_tmp = tuple( [int(tmp * 255) for tmp in colors[obj.id % max_color]]) img2 = draw_projected_box3d(img2, box3d_pts_2d, color=color_tmp) text = 'ID: %d' % obj.id if box3d_pts_2d is not None: img2 = cv2.putText( img2, text, (int(box3d_pts_2d[4, 0]), int(box3d_pts_2d[4, 1]) - 8), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color=color_tmp) img = Image.fromarray(img2) img = img.resize((width, height)) img = img.convert('RGB') img.save(save_path) for seq in seq_list: #image_dir = os.path.join(data_root, 'image_02/%s' % seq) #calib_file = os.path.join(data_root, 'calib/%s.txt' % seq) #result_dir = os.path.join(result_root, '%s/trk_withid/%s' % (result_sha, seq)) #save_3d_bbox_dir = os.path.join(result_dir, '../../trk_image_vis/%s' % seq); mkdir_if_missing(save_3d_bbox_dir) image_dir = '/home/ubuntu/xwp/datasets/multi_view_dataset/new/cam_sample/image_2' result_dir = '/home/ubuntu/xwp/datasets/multi_view_dataset/new/fuse_test/cam9+cam21/tracking_results/trk_withid/0000' calib_file = '/home/ubuntu/xwp/datasets/multi_view_dataset/346/calib/000000.txt' save_3d_bbox_dir = '/home/ubuntu/xwp/datasets/multi_view_dataset/new/fuse_test/cam9+cam21/trk_image_vis' mkdir_if_missing(save_3d_bbox_dir) # load the list images_list, num_images = load_list_from_folder(image_dir) print('number of images to visualize is %d' % num_images) cam_id = 'cam9' start_count = 8945 end_count = 8950 min_index = 8900 for count in range(start_count, end_count): image_tmp = images_list[count] if not is_path_exists(image_tmp): count += 1 continue image_index = int(fileparts(image_tmp)[1]) image_tmp = np.array(Image.open(image_tmp)) img_height, img_width, img_channel = image_tmp.shape result_tmp = os.path.join( result_dir, '%06d.txt' % (image_index - min_index)) # load the result if not is_path_exists(result_tmp): object_res = [] else: object_res = read_label(result_tmp) print('processing index: %d, %d/%d, results from %s' % (image_index, count + 1, num_images, result_tmp)) calib_tmp = Calibration(calib_file) # load the calibration object_res_filtered = [] cam_trans = cam_transform[cam_id] for object_tmp in object_res: if object_tmp.type not in type_whitelist: continue if hasattr(object_tmp, 'score'): if object_tmp.score < score_threshold: continue center = object_tmp.t #transform cord_p = np.zeros((1, 4)) cord_p[0][0] = object_tmp.t[2] cord_p[0][1] = object_tmp.t[0] cord_p[0][2] = -object_tmp.t[1] cord_p[0][3] = 1 rotation_y = object_tmp.ry * 180 / np.pi cam_matrix = cu.ClientSideBoundingBoxes.get_matrix(cam_trans) cam_to_world = np.dot(cam_matrix, np.transpose(cord_p)) ry_cam2world = cu.ry_filter_a( rotation_y - 90 + cam_trans.rotation.yaw) * np.pi / 180 object_tmp.t = [ cam_to_world[1][0], -cam_to_world[2][0], cam_to_world[0][0] ] object_tmp.ry = cu.ry_filter(ry_cam2world) #end object_res_filtered.append(object_tmp) num_instances = len(object_res_filtered) save_image_with_3dbbox_gt_path = os.path.join( save_3d_bbox_dir, '%06d.jpg' % (image_index)) show_image_with_boxes(image_tmp, object_res_filtered, [], calib_tmp, save_path=save_image_with_3dbbox_gt_path) print('number of objects to plot is %d' % (num_instances)) count += 1
import os, numpy as np, time, sys from AB3DMOT_libs.model import AB3DMOT from xinshuo_io import load_list_from_folder, fileparts, mkdir_if_missing if __name__ == '__main__': if len(sys.argv) != 2: print('Usage: python main.py result_sha(e.g., pointrcnn_Car_test)') sys.exit(1) result_sha = sys.argv[1] save_root = './results' det_id2str = {1:'Pedestrian', 2:'Car', 3:'Cyclist'} seq_file_list, num_seq = load_list_from_folder(os.path.join('data/KITTI', result_sha)) total_time, total_frames = 0.0, 0 save_dir = os.path.join(save_root, result_sha); mkdir_if_missing(save_dir) eval_dir = os.path.join(save_dir, 'data'); mkdir_if_missing(eval_dir) seq_count = 0 for seq_file in seq_file_list: _, seq_name, _ = fileparts(seq_file) eval_file = os.path.join(eval_dir, seq_name + '.txt'); eval_file = open(eval_file, 'w') save_trk_dir = os.path.join(save_dir, 'trk_withid', seq_name); mkdir_if_missing(save_trk_dir) mot_tracker = AB3DMOT() seq_dets = np.loadtxt(seq_file, delimiter=',') # load detections, N x 15 # print(seq_file) # print(seq_dets) print(seq_dets.shape) # print(len(seq_dets.shape)) break
def generate_hdf5(data_src, save_dir, data_name='data', batch_size=1, ext_filter='png', label_src1=None, label_name1='label', label_preprocess_function1=identity, label_range1=None, label_src2=None, label_name2='label2', label_preprocess_function2=identity, label_range2=None, debug=True, vis=False): ''' # this function creates data in hdf5 format from a image path # input parameter # data_src: source of image data, which can be a list of image path, a txt file contains a list of image path, a folder contains a set of images, a list of numpy array image data # label_src: source of label data, which can be none, a file contains a set of labels, a dictionary of labels, a 1-d numpy array data, a list of label data # save_dir: where to store the hdf5 data # batch_size: how many image to store in a single hdf file # ext_filder: what format of data to use for generating hdf5 data ''' # parse input assert is_path_exists_or_creatable( save_dir), 'save path should be a folder to save all hdf5 files' mkdir_if_missing(save_dir) assert isstring( data_name), 'dataset name is not correct' # name for hdf5 data # convert data source to a list of numpy array image data if isfolder(data_src): print 'data is loading from %s with extension .%s' % (data_src, ext_filter) filelist, num_data = load_list_from_folder(data_src, ext_filter=ext_filter) datalist = None elif isfile(data_src): print 'data is loading from %s with extension .%s' % (data_src, ext_filter) filelist, num_data = load_list_from_file(data_src) datalist = None elif islist(data_src): if debug: assert all( isimage(data_tmp) for data_tmp in data_src ), 'input data source is not a list of numpy array image data' datalist = data_src num_data = len(datalist) filelist = None else: assert False, 'data source format is not correct.' if debug: assert (datalist is None and filelist is not None) or ( filelist is None and datalist is not None), 'data is not correct' if datalist is not None: assert len(datalist) == num_data, 'number of data is not equal' if filelist is not None: assert len(filelist) == num_data, 'number of data is not equal' # convert label source to a list of numpy array label if label_src1 is None: labeldict1 = None labellist1 = None elif isfile(label_src1): assert is_path_exists(label_src1), 'file not found' _, _, ext = fileparts(label_src1) assert ext == '.json', 'only json extension is supported' labeldict1 = json.load(label_src1) num_label1 = len(labeldict1) assert num_data == num_label1, 'number of data and label is not equal.' labellist1 = None elif isdict(label_src1): labeldict1 = label_src1 labellist1 = None elif isnparray(label_src1): if debug: assert label_src1.ndim == 1, 'only 1-d label is supported' labeldict1 = None labellist1 = label_src1 elif islist(label_src1): if debug: assert all( np.array(label_tmp).size == 1 for label_tmp in label_src1), 'only 1-d label is supported' labellist1 = label_src1 labeldict1 = None else: assert False, 'label source format is not correct.' assert isfunction(label_preprocess_function1 ), 'label preprocess function is not correct.' # convert label source to a list of numpy array label if label_src2 is None: labeldict2 = None labellist2 = None elif isfile(label_src2): assert is_path_exists(label_src2), 'file not found' _, _, ext = fileparts(label_src2) assert ext == '.json', 'only json extension is supported' labeldict2 = json.load(label_src2) num_label2 = len(labeldict2) assert num_data == num_label2, 'number of data and label is not equal.' labellist2 = None elif isdict(label_src2): labeldict2 = label_src2 labellist2 = None elif isnparray(label_src2): if debug: assert label_src2.ndim == 1, 'only 1-d label is supported' labeldict2 = None labellist2 = label_src2 elif islist(label_src2): if debug: assert all( np.array(label_tmp).size == 1 for label_tmp in label_src2), 'only 1-d label is supported' labellist2 = label_src2 labeldict2 = None else: assert False, 'label source format is not correct.' assert isfunction(label_preprocess_function2 ), 'label preprocess function is not correct.' # warm up if datalist is not None: size_data = datalist[0].shape else: size_data = imread(filelist[0]).shape if labeldict1 is not None: if debug: assert isstring(label_name1), 'label name is not correct' labels1 = np.zeros((batch_size, 1), dtype='float32') # label_value1 = [float(label_tmp_char) for label_tmp_char in labeldict1.values()] # label_range1 = np.array([min(label_value1), max(label_value1)]) if labellist1 is not None: labels1 = np.zeros((batch_size, 1), dtype='float32') # label_range1 = [np.min(labellist1), np.max(labellist1)] if label_src1 is not None and debug: assert label_range1 is not None, 'label range is not correct' assert (labeldict1 is not None and labellist1 is None) or ( labellist1 is not None and labeldict1 is None), 'label is not correct' if labeldict2 is not None: if debug: assert isstring(label_name2), 'label name is not correct' labels2 = np.zeros((batch_size, 1), dtype='float32') # label_value2 = [float(label_tmp_char) for label_tmp_char in labeldict2.values()] # label_range2 = np.array([min(label_value2), max(label_value2)]) if labellist2 is not None: labels2 = np.zeros((batch_size, 1), dtype='float32') # label_range2 = [np.min(labellist2), np.max(labellist2)] if label_src2 is not None and debug: assert label_range2 is not None, 'label range is not correct' assert (labeldict2 is not None and labellist2 is None) or ( labellist2 is not None and labeldict2 is None), 'label is not correct' # start generating count_hdf = 1 # count number of hdf5 file clock = Timer() datalist_batch = list() for i in xrange(num_data): clock.tic() if filelist is not None: imagefile = filelist[i] _, name, _ = fileparts(imagefile) img = imread(imagefile).astype('float32') max_value = np.max(img) if max_value > 1 and max_value <= 255: img = img / 255.0 # [rows,col,channel,numbers], scale the image data to (0, 1) if debug: min_value = np.min(img) assert min_value >= 0 and min_value <= 1, 'data is not in [0, 1]' if datalist is not None: img = datalist[i] if debug: assert size_data == img.shape datalist_batch.append(img) # process label if labeldict1 is not None: if debug: assert len(filelist) == len( labeldict1), 'file list is not equal to label dictionary' labels1[i % batch_size, 0] = float(labeldict1[name]) if labellist1 is not None: labels1[i % batch_size, 0] = float(labellist1[i]) if labeldict2 is not None: if debug: assert len(filelist) == len( labeldict2), 'file list is not equal to label dictionary' labels2[i % batch_size, 0] = float(labeldict2[name]) if labellist2 is not None: labels2[i % batch_size, 0] = float(labellist2[i]) # save to hdf5 if i % batch_size == 0: data = preprocess_image_caffe( datalist_batch, debug=debug, vis=vis ) # swap channel, transfer from list of HxWxC to NxCxHxW # write to hdf5 format if filelist is not None: save_path = os.path.join(save_dir, '%s.hdf5' % name) else: save_path = os.path.join(save_dir, 'image_%010d.hdf5' % count_hdf) h5f = h5py.File(save_path, 'w') h5f.create_dataset(data_name, data=data, dtype='float32') if (labeldict1 is not None) or (labellist1 is not None): # print(labels1) labels1 = label_preprocess_function1(data=labels1, data_range=label_range1, debug=debug) # print(labels1) h5f.create_dataset(label_name1, data=labels1, dtype='float32') labels1 = np.zeros((batch_size, 1), dtype='float32') if (labeldict2 is not None) or (labellist2 is not None): labels2 = label_preprocess_function2(data=labels2, data_range=label_range2, debug=debug) h5f.create_dataset(label_name2, data=labels2, dtype='float32') labels2 = np.zeros((batch_size, 1), dtype='float32') h5f.close() count_hdf = count_hdf + 1 del datalist_batch[:] if debug: assert len(datalist_batch) == 0, 'list has not been cleared' average_time = clock.toc() print( 'saving to %s: %d/%d, average time:%.3f, elapsed time:%s, estimated time remaining:%s' % (save_path, i + 1, num_data, average_time, convert_secs2time(average_time * i), convert_secs2time(average_time * (num_data - i)))) return count_hdf - 1, num_data
from xinshuo_io import load_list_from_folder, fileparts, mkdir_if_missing if __name__ == '__main__': if len(sys.argv) != 2: print('Usage: python main.py result_sha(e.g., pointrcnn_Car_test)') sys.exit(1) result_sha = sys.argv[1] save_root = './results' det_id2str = {1: 'Pedestrian', 2: 'Car', 3: 'Cyclist'} seq_file_list, num_seq = load_list_from_folder( os.path.join('data/KITTI', result_sha)) total_time, total_frames = 0.0, 0 save_dir = os.path.join(save_root, result_sha) mkdir_if_missing(save_dir) eval_dir = os.path.join(save_dir, 'data') mkdir_if_missing(eval_dir) seq_count = 0 for seq_file in seq_file_list: _, seq_name, _ = fileparts(seq_file) eval_file = os.path.join(eval_dir, seq_name + '.txt') eval_file = open(eval_file, 'w') save_trk_dir = os.path.join(save_dir, 'trk_withid', seq_name) mkdir_if_missing(save_trk_dir) mot_tracker = AB3DMOT() seq_dets = np.loadtxt(seq_file, delimiter=',') # load detections, N x 15 # if no detection in a sequence
# Cityscape # data_dir = '/media/xinshuo/Data/Datasets/Cityscapes/leftImg8bit' # images_dir = os.path.join(data_dir, 'val/frankfurt') # save_dir = os.path.join(data_dir, 'results/mask_preprocessed_cityscape'); mkdir_if_missing(save_dir) # Shimizu # folder_name = 'Q_C0006_330-360sec' # folder_name = 'Q_C0006_480-510sec' data_dir = '/media/xinshuo/Data/Datasets/shimizu' # data_dir = '/media/xinshuo/Data/Datasets/Cityscapes/leftImg8bit/demoVideo' images_dir = os.path.join(data_dir, 'images_every10') # images_dir = os.path.join(data_dir, 'stuttgart_00') # images_dir = os.path.join(data_dir, 'public_asset') save_dir = os.path.join(data_dir, 'results', 'MASK_RCNN') mkdir_if_missing(save_dir) # save_dir = os.path.join(data_dir, 'results', 'public_asset'); mkdir_if_missing(save_dir) # vis_dir = os.path.join(data_dir, 'visualization', 'MASK_RCNN'); mkdir_if_missing(vis_dir) # vis_dir = os.path.join(data_dir, 'visualization', 'public_asset'); mkdir_if_missing(vis_dir) vis_dir = os.path.join(save_dir, 'visualization') mkdir_if_missing(vis_dir) mask_dir = os.path.join(save_dir, 'masks') mkdir_if_missing(mask_dir) detection_result_filepath = os.path.join(save_dir, 'mask_results.txt') detection_results_file = open(detection_result_filepath, 'w') log_file = os.path.join(save_dir, 'log.txt') log_file = open(log_file, 'w') ##--------------------------------- Model Directory ----------------------------------## # model_path = os.path.join(root_dir, 'resnet50_imagenet.pth') # Path to trained weights file
IMAGES_PER_GPU = 1 # DETECTION_MIN_CONFIDENCE = 0 if train_dataset == 'coco': NUM_CLASSES = 1 + 80 elif train_dataset == 'cityscape': NUM_CLASSES = 1 + len(cityscape_class_names) elif train_dataset == 'kitti': NUM_CLASSES = 1 + len(kitti_class_names) else: assert False, 'error' config = InferenceConfig() # config.DETECTION_MIN_CONFIDENCE = 0.7 config.DETECTION_MIN_CONFIDENCE = 0 for epoch in epoch_list_to_evaluate: ##--------------------------------- Data Directory ----------------------------------## results_name = 'maskrcnn_bbox_detection_results_%s_%s_%s_epoch%d_%s_fulldetection' % (train_dataset, split, model_folder, epoch, get_timestring()) split_file = os.path.join(kitti_dir, 'mykitti/object/mysplit/%s.txt' % split) images_dir = os.path.join(data_dir, 'image_2') save_dir = os.path.join(data_dir, 'results/%s' % results_name); mkdir_if_missing(save_dir) vis_dir = os.path.join(save_dir, 'visualization'); mkdir_if_missing(vis_dir) log_file = os.path.join(save_dir, 'log.txt'); log_file = open(log_file, 'w') bbox_eval_folder = os.path.join(save_dir, 'data'); mkdir_if_missing(bbox_eval_folder) mask_dir = os.path.join(save_dir, 'masks'); mkdir_if_missing(mask_dir) label_bbox_match_dir = os.path.join(save_dir, 'label_bbox_matching'); mkdir_if_missing(label_bbox_match_dir) detection_result_filepath = os.path.join(save_dir, 'mask_results.txt'); detection_results_file = open(detection_result_filepath, 'w') ##--------------------------------- Model Directory ----------------------------------## if train_dataset == 'coco': model_path = os.path.join(root_dir, '../models/mask_rcnn_coco.pth') # Path to trained weights file elif train_dataset == 'cityscape': model_path = '/media/xinshuo/Data/models/mask_rcnn_pytorch/%s/mask_rcnn_cityscape_%04d.pth' % (model_folder, epoch) elif train_dataset == 'kitti': model_path = '/media/xinshuo/Data/models/mask_rcnn_pytorch/%s/mask_rcnn_kitti_%04d.pth' % (model_folder, epoch) else: model_path = os.path.join(root_dir, 'resnet50_imagenet.pth') # Path to trained weights from Imagenet model = MaskRCNN(model_dir=save_dir, config=config) # Create model object. if config.GPU_COUNT: model = model.cuda() model.load_weights(model_path) # Load weights
torch.cuda.manual_seed(seed) np.random.seed(seed) random.seed(seed) config = osp.join(os.path.abspath(os.curdir), "config.yml") parser, settings_show = Config(config) os.environ["CUDA_VISIBLE_DEVICES"] = parser.device if parser.mode == 0: log_path = osp.join(parser.result, 'debug') else: log_path = osp.join(parser.result, '{}-{}'.format(time_for_file(), parser.description)) # if not osp.exists(log_path): # os.mkdir(log_path) mkdir_if_missing(log_path) log = open(osp.join(log_path, 'log.log'), 'w') print_log("python version : {}".format(sys.version.replace('\n', ' ')), log) print_log("torch version : {}".format(torch.__version__), log) print_log("cudnn version : {}".format(torch.backends.cudnn.version()), log) for idx, data in enumerate(settings_show): print_log(data, log) generator = Generator(entirety=parser.entirety) if parser.model == "EmbeddingNet": train_EmbeddingNet.train(parser, generator, log, log_path) elif parser.model == "lstm": train_LSTM.train(parser, generator, log, log_path) elif parser.model == "FuckUpNet":
def evaluate(args, loader, generator, num_samples, path): # ade_outer, fde_outer = [], [] ade_all, fde_all = AverageMeter(), AverageMeter() total_obj = 0 pred_len = args.pred_len dataset_name = args.dataset_name obj_class = dataset_name.split('_')[1][:3] save_dir, _, _ = fileparts(path) save_dir = os.path.join(save_dir, 'results_%s' % get_timestring()) mkdir_if_missing(save_dir) result_file_single = os.path.join(save_dir, 'results.json') result_dict = dict() with torch.no_grad(): for batch in loader: batch = [tensor.cuda() for tensor in batch] (obs_traj, pred_traj_gt, obs_traj_rel, pred_traj_gt_rel, non_linear_ped, loss_mask, seq_start_end, id_frame) = batch # obs_traj frames x objects x 2 # pred_traj_gt frames x objects x 2 # seq_Start_end start, end of ped index in each timestamp, used for pooling at every timestamp # id_frame 2frames x objects x 3 # loss_mask objects x 2frames num_obs = obs_traj.size(0) num_objects = obs_traj.size(1) id_frame_pred = id_frame[num_obs:] # frames x obj x 3 loss_mask_pred = loss_mask[:, num_obs:] # objects x seq_len ade, fde = [], [] for sample_index in range(num_samples): pred_traj_fake_rel = generator(obs_traj, obs_traj_rel, seq_start_end) pred_traj_fake = relative_to_abs( pred_traj_fake_rel, obs_traj[-1]) # frames x objects x 2 # save results for object_index in range(num_objects): id_frame_tmp = id_frame_pred[:, object_index, :] frame = int(id_frame_tmp[0, 0].item()) # seqname should be the same across frames seq = np.unique(id_frame_tmp[:, -1].cpu().clone().numpy()) assert len(seq) == 1, 'error' seqname = int2seqname(seq[0]) # AIODrive only # seqname should be the same across frames ID = np.unique(id_frame_tmp[:, 1].cpu().clone().numpy()) assert len(ID) == 1, 'error' ID = int(ID[0]) # saving to individual frames final_results = torch.cat([ id_frame_tmp[:, :2], pred_traj_fake[:, object_index, :] ], axis=-1).cpu().clone().numpy() save_path = os.path.join( save_dir, seqname, 'frame_%06d' % (frame), 'sample_%03d' % sample_index + '.txt') mkdir_if_missing(save_path) with open(save_path, 'a') as f: np.savetxt(f, final_results, fmt="%.3f") # saving to a single file, result format # {seqname1: {frame1: {sample1: {ID1: {state: N x 2, prob: 1}}}, seqname2, ...} if seqname not in result_dict.keys(): result_dict[seqname] = dict() if frame not in result_dict[seqname].keys(): result_dict[seqname][frame] = dict() if sample_index not in result_dict[seqname][frame].keys(): result_dict[seqname][frame][sample_index] = dict() if ID not in result_dict[seqname][frame][ sample_index].keys(): result_dict[seqname][frame][sample_index][ID] = dict() result_dict[seqname][frame][sample_index][ ID]['state'] = pred_traj_fake[:, object_index, :].cpu( ).clone().numpy().tolist() result_dict[seqname][frame][sample_index][ID]['prob'] = 1.0 # compute ADE ade_tmp = displacement_error( pred_traj_fake, pred_traj_gt, mode='raw', mask=loss_mask_pred ) # list of ade for each object in the batch ade.append(ade_tmp) # list of error for all samples # select the right last timestamp for FDE computation, i.e., not select the last frame if masked out pred_traj_last = [] gt_traj_last = [] for obj_tmp in range(num_objects): loss_mask_tmp = loss_mask_pred[obj_tmp] # seq_len good_index = torch.nonzero(loss_mask_tmp) if torch.nonzero(loss_mask_tmp).size(0) == 0: pred_traj_last.append(torch.zeros(2).cuda() / 0) gt_traj_last.append(torch.zeros(2).cuda() / 0) else: last_index = torch.max(good_index) pred_traj_last.append(pred_traj_fake[last_index, obj_tmp, :]) gt_traj_last.append(pred_traj_gt[last_index, obj_tmp, :]) gt_traj_last = torch.stack(gt_traj_last, dim=0) # num_obj x 2 pred_traj_last = torch.stack(pred_traj_last, dim=0) # num_obj x 2 # compute FDE fde_tmp = final_displacement_error(pred_traj_last, gt_traj_last, mode='raw') fde.append(fde_tmp) # list of error for all samples # select the one sample with the minimum errors, remove nan num_invalid = torch.sum(torch.isnan(ade_tmp)) num_valid = pred_traj_gt.size(1) - num_invalid total_obj += num_valid # only add No.obj if it is valid, not all future frames are padded ade_ave, num_obj = best_of_K(ade, seq_start_end, err_type='ADE') fde_ave, num_obj = best_of_K(fde, seq_start_end, err_type='FDE') ade_all.update(ade_ave, n=num_obj) fde_all.update(fde_ave, n=num_obj) actual_len = pred_len * args.skip final_dict = {actual_len: {obj_class: result_dict}} with open(result_file_single, 'w') as outfile: json.dump(final_dict, outfile) return ade_all.avg, fde_all.avg
def callback(self, msg): start_time_callback = time.time() #Get all detected objects seq_dets = np.empty((0, 15), float) frame = msg.header.seq for obj in msg.object_list: obj_seq_dets = np.array([ frame, float(obj.type.data), obj.bbox[0], obj.bbox[1], obj.bbox[2], obj.bbox[3], obj.score, obj.dims[0], obj.dims[1], obj.dims[2], obj.loc[0], obj.loc[1], obj.loc[2], obj.rot, obj.alpha ]).reshape((1, 15)) seq_dets = np.append(seq_dets, obj_seq_dets, axis=0) #Update tracking module dets = seq_dets[:, 7:14] ori_array = seq_dets[:, -1].reshape((-1, 1)) other_array = seq_dets[:, 1:7] additional_info = np.concatenate((ori_array, other_array), axis=1) dets_all = {'dets': dets, 'info': additional_info} self.total_frames += 1 start_time_AB3DMOT = time.time() trackers = self.mot_tracker.update(dets_all) cycle_time = time.time() - start_time_AB3DMOT print("AB3DMOT time", cycle_time) self.total_time += cycle_time save_trk_dir = os.path.join("./results/pcdet_KITTI", 'trk_withid', "0016") mkdir_if_missing(save_trk_dir) save_trk_file = os.path.join(save_trk_dir, '%06d.txt' % frame) #save_trk_file = open(save_trk_file, 'w') for d in trackers: #d = x, y, z, theta, l, w, h, ID, other info, confidence bbox3d_tmp = d[0:7] id_tmp = d[7] ori_tmp = d[8] type_tmp = self.det_id2str[d[9]] bbox2d_tmp_trk = d[10:14] conf_tmp = d[14] ''' str_to_srite = '%s -1 -1 %f %f %f %f %f %f %f %f %f %f %f %f %f %d\n' % (type_tmp, ori_tmp, bbox2d_tmp_trk[0], bbox2d_tmp_trk[1], bbox2d_tmp_trk[2], bbox2d_tmp_trk[3], bbox3d_tmp[0], bbox3d_tmp[1], bbox3d_tmp[2], bbox3d_tmp[3], bbox3d_tmp[4], bbox3d_tmp[5], bbox3d_tmp[6], conf_tmp, id_tmp) save_trk_file.write(str_to_srite) ''' ''' str_to_srite = '%d %d %s 0 0 %f %f %f %f %f %f %f %f %f %f %f %f %f\n' % (frame, id_tmp, type_tmp, ori_tmp, bbox2d_tmp_trk[0], bbox2d_tmp_trk[1], bbox2d_tmp_trk[2], bbox2d_tmp_trk[3], bbox3d_tmp[0], bbox3d_tmp[1], bbox3d_tmp[2], bbox3d_tmp[3], bbox3d_tmp[4], bbox3d_tmp[5], bbox3d_tmp[6], conf_tmp) eval_file.write(str_to_srite) eval_file.close() ''' #save_trk_file.close() #Publish rviz tracking markers MarkerArray_list = self.tracking_to_rviz(trackers, msg) Kitti_list = self.tracking_to_visualizer(trackers, msg) self.markerPublisher.publish(MarkerArray_list) self.kittiPublisher.publish(Kitti_list) print("Total time", time.time() - start_time_callback)
print(type(masks)) print(masks.shape) print(class_ids) image = dataset_cityscape.load_image(0) # Data generators train_set = Mask_RCNN_Dataset(dataset_cityscape, config, augment=True) train_generator = torch.utils.data.DataLoader(train_set, batch_size=1, shuffle=False, num_workers=4, pin_memory=True) count = 1 vis_dir = os.path.join(dataset_dir, gttype, 'vis_%s' % split) mkdir_if_missing(vis_dir) for images, image_metas, rpn_match, rpn_bbox, gt_class_ids, gt_boxes, gt_masks, image_index, filename in train_generator: print('Cityscapes Dataset Visualization Processing: %d' % count) # dataset_cityscape.visualization(image_index.item(), save_dir=vis_dir) image_path = dataset_cityscape.image_info[image_index.item()]['path'] _, filename, _ = fileparts(image_path) images = images.numpy()[0].transpose(1, 2, 0) images = unmold_image(images, config) gt_masks, gt_boxes, gt_class_ids = gt_masks.numpy()[0], gt_boxes.numpy( )[0].astype('int64'), gt_class_ids.numpy()[0] gt_masks = gt_masks.transpose(1, 2, 0) gt_masks = expand_mask(gt_boxes, gt_masks, images.shape) gt_boxes[:, [0, 1]] = gt_boxes[:, [1, 0]] gt_boxes[:, [2, 3]] = gt_boxes[:, [3, 2]]
def facial_landmark_evaluation(pred_dict_all, anno_dict, num_pts, error_threshold, normalization_ced=True, normalization_vec=False, covariance=True, display_list=None, debug=True, vis=False, save=True, save_path=None): ''' evaluate the performance of facial landmark detection parameter: pred_dict_all: a dictionary for all basline methods. Each key is the method name and the value is corresponding prediction dictionary, which keys are the image path and values are 2 x N prediction results anno_dict: a dictionary which keys are the image path and values are 2 x N annotation results num_pts: number of points vis: determine if visualizing the pck curve save: determine if saving the visualization results save_path: a directory to save all the results visualization: 1. 2d pck curve (total and point specific) for all points for all methods 2. point error vector (total and point specific) for all points and for all methods 3. mean square error return: metrics_all: a list of list to have detailed metrics over all methods ptswise_mse: a list of list to have average MSE over all key-points for all methods ''' num_methods = len(pred_dict_all) if debug: assert isdict(pred_dict_all) and num_methods > 0 and all(isdict(pred_dict) for pred_dict in pred_dict_all.values()), 'predictions result format is not correct' assert isdict(anno_dict), 'annotation result format is not correct' assert ispositiveinteger(num_pts), 'number of points is not correct' assert isscalar(error_threshold), 'error threshold is not correct' assert islogical(normalization_ced) and islogical(normalization_vec), 'normalization flag is not correct' if display_list is not None: assert len(display_list) == num_methods, 'display list is not correct %d vs %d' % (len(display_list), num_methods) num_images = len(pred_dict_all.values()[0]) if debug: assert num_images > 0, 'the predictions are empty' assert num_images == len(anno_dict), 'number of images is not equal to number of annotations: %d vs %d' % (num_images, len(anno_dict)) assert all(num_images == len(pred_dict) for pred_dict in pred_dict_all.values()), 'number of images in results from different methods are not equal' # calculate normalized mean error for each single image based on point-to-point Euclidean distance normalized by the bounding box size # calculate point error vector for each single image based on error vector normalized by the bounding box size normed_mean_error_dict = dict() normed_mean_error_pts_specific_dict = dict() normed_mean_error_pts_specific_valid_dict = dict() pts_error_vec_dict = dict() pts_error_vec_pts_specific_dict = dict() mse_error_dict_dict = dict() for method_name, pred_dict in pred_dict_all.items(): normed_mean_error_total = np.zeros((num_images, ), dtype='float32') normed_mean_error_pts_specific = np.zeros((num_images, num_pts), dtype='float32') normed_mean_error_pts_specific_valid = np.zeros((num_images, num_pts), dtype='bool') pts_error_vec = np.zeros((num_images, 2), dtype='float32') pts_error_vec_pts_specific = np.zeros((num_images, 2, num_pts), dtype='float32') mse_error_dict = dict() count = 0 count_skip_num_images = 0 # it's possible that no annotation exists on some images, than no error should be counted for those images, we count the number of those images for image_path, pts_prediction in pred_dict.items(): _, filename, _ = fileparts(image_path) pts_anno = anno_dict[filename] # 2 x N annotation pts_keep_index = range(num_pts) # to avoid list object type, do conversion here if islist(pts_anno): pts_anno = np.asarray(pts_anno) if islist(pts_prediction): pts_prediction = np.asarray(pts_prediction) if debug: assert (is2dptsarray(pts_anno) or is2dptsarray_occlusion(pts_anno)) and pts_anno.shape[1] == num_pts, 'shape of annotations is not correct (%d x %d) vs (%d x %d)' % (2, num_pts, pts_anno.shape[0], pts_anno.shape[1]) # if the annotation has 3 channels (include extra occlusion channel, we keep only the points with annotations) # occlusion: -1 -> visible but not annotated, 0 -> invisible and not annotated, 1 -> visible, we keep only visible and annotated points if pts_anno.shape[0] == 3: pts_keep_index = np.where(pts_anno[2, :] == 1)[0].tolist() if len(pts_keep_index) <= 0: # if no point is annotated in current image count_skip_num_images += 1 continue pts_anno = pts_anno[0:2, pts_keep_index] pts_prediction = pts_prediction[:, pts_keep_index] # to avoid the point location includes the score or occlusion channel, only take the first two channels here if pts_prediction.shape[0] == 3 or pts_prediction.shape[0] == 4: pts_prediction = pts_prediction[0:2, :] num_pts_tmp = len(pts_keep_index) if debug: assert pts_anno.shape[1] <= num_pts, 'number of points is not correct: %d vs %d' % (pts_anno.shape[1], num_pts) assert pts_anno.shape == pts_prediction.shape, 'shape of annotations and predictions are not the same {} vs {}'.format(print_np_shape(pts_anno, debug=debug), print_np_shape(pts_prediction, debug=debug)) # print 'number of points to keep is %d' % num_pts_tmp # calculate bbox for normalization if normalization_ced or normalization_vec: assert len(pts_keep_index) == num_pts, 'some points are not annotated. Normalization on PCK curve is not allowed.' bbox_anno = pts2bbox(pts_anno, debug=debug) # 1 x 4 bbox_TLWH = bbox_TLBR2TLWH(bbox_anno, debug=debug) # 1 x 4 bbox_size = math.sqrt(bbox_TLWH[0, 2] * bbox_TLWH[0, 3]) # scalar # calculate normalized error for all points normed_mean_error, _ = pts_euclidean(pts_prediction, pts_anno, debug=debug) # scalar if normalization_ced: normed_mean_error /= bbox_size normed_mean_error_total[count] = normed_mean_error mse_error_dict[image_path] = normed_mean_error if normed_mean_error == 0: print pts_prediction print pts_anno # calculate normalized error point specifically for pts_index in xrange(num_pts): if pts_index in pts_keep_index: # if current point not annotated in current image, just keep 0 normed_mean_error_pts_specific_valid[count, pts_index] = True else: continue pts_index_from_keep_list = pts_keep_index.index(pts_index) pts_prediction_tmp = np.reshape(pts_prediction[:, pts_index_from_keep_list], (2, 1)) pts_anno_tmp = np.reshape(pts_anno[:, pts_index_from_keep_list], (2, 1)) normed_mean_error_pts_specifc_tmp, _ = pts_euclidean(pts_prediction_tmp, pts_anno_tmp, debug=debug) if normalization_ced: normed_mean_error_pts_specifc_tmp /= bbox_size normed_mean_error_pts_specific[count, pts_index] = normed_mean_error_pts_specifc_tmp # calculate the point error vector error_vector = pts_prediction - pts_anno # 2 x num_pts_tmp if normalization_vec: error_vector /= bbox_size pts_error_vec_pts_specific[count, :, pts_keep_index] = np.transpose(error_vector) pts_error_vec[count, :] = np.sum(error_vector, axis=1) / num_pts_tmp count += 1 print 'number of skipped images is %d' % count_skip_num_images assert count + count_skip_num_images == num_images, 'all cells in the array must be filled %d vs %d' % (count + count_skip_num_images, num_images) # print normed_mean_error_total # time.sleep(1000) # save results to dictionary normed_mean_error_dict[method_name] = normed_mean_error_total[:count] normed_mean_error_pts_specific_dict[method_name] = normed_mean_error_pts_specific[:count, :] normed_mean_error_pts_specific_valid_dict[method_name] = normed_mean_error_pts_specific_valid[:count, :] pts_error_vec_dict[method_name] = np.transpose(pts_error_vec[:count, :]) # 2 x num_images pts_error_vec_pts_specific_dict[method_name] = pts_error_vec_pts_specific[:count, :, :] mse_error_dict_dict[method_name] = mse_error_dict # calculate mean value if mse: mse_value = dict() # dictionary to record all average MSE for different methods mse_dict = dict() # dictionary to record all point-wise MSE for different keypoints for method_name, error_array in normed_mean_error_dict.items(): mse_value[method_name] = np.mean(error_array) else: mse_value = None # save mse error list to file for each method error_list_savedir = os.path.join(save_path, 'error_list') mkdir_if_missing(error_list_savedir) for method_name, mse_error_dict in mse_error_dict_dict.items(): mse_error_list_path = os.path.join(error_list_savedir, 'error_%s.txt' % method_name) mse_error_list = open(mse_error_list_path, 'w') sorted_tuple_list = sorted(mse_error_dict.items(), key=operator.itemgetter(1), reverse=True) for tuple_index in range(len(sorted_tuple_list)): image_path_tmp = sorted_tuple_list[tuple_index][0] mse_error_tmp = sorted_tuple_list[tuple_index][1] mse_error_list.write('{:<200} {}\n'.format(image_path_tmp, '%.2f' % mse_error_tmp)) mse_error_list.close() print '\nsave mse error list for %s to %s' % (method_name, mse_error_list_path) # visualize the ced (cumulative error distribution curve) print('visualizing pck curve....\n') pck_savedir = os.path.join(save_path, 'pck') mkdir_if_missing(pck_savedir) pck_savepath = os.path.join(pck_savedir, 'pck_curve_overall.png') table_savedir = os.path.join(save_path, 'metrics') mkdir_if_missing(table_savedir) table_savepath = os.path.join(table_savedir, 'detailed_metrics_overall.txt') _, metrics_all = visualize_ced(normed_mean_error_dict, error_threshold=error_threshold, normalized=normalization_ced, truncated_list=truncated_list, title='2D PCK curve (all %d points)' % num_pts, display_list=display_list, debug=debug, vis=vis, pck_savepath=pck_savepath, table_savepath=table_savepath) metrics_title = ['Method Name / Point Index'] ptswise_mse_table = [[normed_mean_error_pts_specific_dict.keys()[index_tmp]] for index_tmp in xrange(num_methods)] for pts_index in xrange(num_pts): metrics_title.append(str(pts_index + 1)) normed_mean_error_dict_tmp = dict() for method_name, error_array in normed_mean_error_pts_specific_dict.items(): normed_mean_error_pts_specific_valid_temp = normed_mean_error_pts_specific_valid_dict[method_name] # Some points at certain images might not be annotated. When calculating MSE for these specific point, we remove those images to avoid "false" mean average error valid_array_per_pts_per_method = np.where(normed_mean_error_pts_specific_valid_temp[:, pts_index] == True)[0].tolist() error_array_per_pts = error_array[:, pts_index] error_array_per_pts = error_array_per_pts[valid_array_per_pts_per_method] num_image_tmp = len(valid_array_per_pts_per_method) # print(num_image_tmp) if num_image_tmp == 0: continue # aaa normed_mean_error_dict_tmp[method_name] = np.reshape(error_array_per_pts, (num_image_tmp, )) pck_savepath = os.path.join(pck_savedir, 'pck_curve_pts_%d.png' % (pts_index+1)) table_savepath = os.path.join(table_savedir, 'detailed_metrics_pts_%d.txt' % (pts_index+1)) if len(normed_mean_error_dict_tmp) == 0: continue metrics_dict, _ = visualize_ced(normed_mean_error_dict_tmp, error_threshold=error_threshold, normalized=normalization_ced, truncated_list=truncated_list, display2terminal=False, title='2D PCK curve for point %d' % (pts_index+1), display_list=display_list, debug=debug, vis=vis, pck_savepath=pck_savepath, table_savepath=table_savepath) for method_index in range(num_methods): method_name = normed_mean_error_pts_specific_dict.keys()[method_index] ptswise_mse_table[method_index].append('%.1f' % metrics_dict[method_name]['MSE']) # reorder the table order_index_list = [display_list.index(method_name_tmp) for method_name_tmp in normed_mean_error_pts_specific_dict.keys()] order_index_list = [0] + [order_index_tmp + 1 for order_index_tmp in order_index_list] # print table to terminal ptswise_mse_table = list_reorder([metrics_title] + ptswise_mse_table, order_index_list, debug=debug) table = AsciiTable(ptswise_mse_table) print '\nprint point-wise average MSE' print table.table # save table to file ptswise_savepath = os.path.join(table_savedir, 'pointwise_average_MSE.txt') table_file = open(ptswise_savepath, 'w') table_file.write(table.table) table_file.close() print '\nsave point-wise average MSE to %s' % ptswise_savepath # visualize the error vector map # print('visualizing error vector distribution map....\n') # error_vec_save_dir = os.path.join(save_path, 'error_vec') # mkdir_if_missing(error_vec_save_dir) # savepath_tmp = os.path.join(error_vec_save_dir, 'error_vector_distribution_all.png') # visualize_pts(pts_error_vec_dict, title='Point Error Vector Distribution (all %d points)' % num_pts, mse=mse, mse_value=mse_value, display_range=display_range, display_list=display_list, xlim=xlim, ylim=ylim, covariance=covariance, debug=debug, vis=vis, save_path=savepath_tmp) # for pts_index in xrange(num_pts): # pts_error_vec_pts_specific_dict_tmp = dict() # for method_name, error_vec_dict in pts_error_vec_pts_specific_dict.items(): # pts_error_vec_pts_specific_valid = normed_mean_error_pts_specific_valid_dict[method_name] # get valid flag # valid_image_index_per_pts = np.where(pts_error_vec_pts_specific_valid[:, pts_index] == True)[0].tolist() # get images where the points with current index are annotated # print(len(valid_image_index_per_pts)) # pts_error_vec_pts_specific_dict_tmp[method_name] = np.transpose(error_vec_dict[valid_image_index_per_pts, :, pts_index]) # 2 x num_images # savepath_tmp = os.path.join(error_vec_save_dir, 'error_vector_distribution_pts_%d.png' % (pts_index+1)) # if mse: # mse_dict_tmp = visualize_pts(pts_error_vec_pts_specific_dict_tmp, title='Point Error Vector Distribution for Point %d' % (pts_index+1), mse=mse, display_range=display_range, display_list=display_list, xlim=xlim, ylim=ylim, covariance=covariance, debug=debug, vis=vis, save_path=savepath_tmp) # mse_best = min(mse_dict_tmp.values()) # mse_single = dict() # mse_single['mse'] = mse_best # mse_single['num_images'] = len(valid_image_index_per_pts) # assume number of valid images is equal for all methods # mse_dict[pts_index] = mse_single # else: # visualize_pts(pts_error_vec_pts_specific_dict_tmp, title='Point Error Vector Distribution for Point %d' % (pts_index+1), mse=mse, display_range=display_range, display_list=display_list, xlim=xlim, ylim=ylim, covariance=covariance, debug=debug, vis=vis, save_path=savepath_tmp) # save mse to json file for further use # if mse: # json_path = os.path.join(save_path, 'mse_pts.json') # # if existing, compare and select the best # if is_path_exists(json_path): # with open(json_path, 'r') as file: # mse_dict_old = json.load(file) # file.close() # for pts_index, mse_single in mse_dict_old.items(): # mse_dict_new = mse_dict[int(pts_index)] # mse_new = mse_dict_new['mse'] # if mse_new < mse_single['mse']: # mse_single['mse'] = mse_new # mse_dict_old[pts_index] = mse_single # with open(json_path, 'w') as file: # print('overwrite old mse to {}'.format(json_path)) # json.dump(mse_dict_old, file) # file.close() # else: # with open(json_path, 'w') as file: # print('save mse for all keypoings to {}'.format(json_path)) # json.dump(mse_dict, file) # file.close() print('\ndone!!!!!\n') return metrics_all, ptswise_mse_table
import tensorflow as tf import tensorflow.contrib.slim as slim import scipy.misc import matplotlib.pyplot as plt from monodepth_model import * from monodepth_dataloader import * from average_gradients import * from xinshuo_io import mkdir_if_missing, load_list_from_folder, fileparts data_dir = '/media/xinshuo/Data/Datasets/shimizu/' images_dir = os.path.join(data_dir, 'images_subsampled500/020815') # data_dir = '/media/xinshuo/Data/Datasets/KITTI/object/training' # images_dir = os.path.join(data_dir, 'image_2') depth_img_save_dir = os.path.join(data_dir, 'depth_image_disp') mkdir_if_missing(depth_img_save_dir) depth_save_dir = os.path.join(data_dir, 'depth_data_disp') mkdir_if_missing(depth_save_dir) checkpoint_path = '/media/xinshuo/Data/models/monodepth/model_city2kitti' parser = argparse.ArgumentParser( description='Monodepth TensorFlow implementation.') parser.add_argument('--encoder', type=str, help='type of encoder, vgg or resnet50', default='vgg') # parser.add_argument('--image_path', type=str, help='path to the image', required=True) # parser.add_argument('--checkpoint_path', type=str, help='path to a specific checkpoint to load', required=True) parser.add_argument('--input_height', type=int, help='input height',