def interpret_tracks_gt(dataset, date, det_id, traj_csv_path): """ Interprets tracking ground truth csv files exported by T-Analyst. Parameters: dataset -- name of dataset date -- date when the video was filmed, as a string on format 'YYYY-MM-DD' det_id -- the ID number of the T-Analyst 'detection' of interest. Set to None to include everthing in the .csv file traj_csv_path -- path to .csv file exported by T-Analyst """ traj = pd.read_csv(traj_csv_path, sep=';', decimal=',') calib = Calibration(dataset) ts = Timestamps(dataset) mask = Check(dataset, 'mask') gts = [] for traj_row in pandas_loop(traj): row_det_id = traj_row['Detection ID'] if row_det_id == det_id: c = traj_row['Type of road user'] i = traj_row['Road user ID'] x = traj_row['X (m)'] y = traj_row['Y (m)'] t = traj_row['Time Stamp'] #t = date + ' ' + t #t = datetime.strptime(t, '%Y-%m-%d %H:%M:%S.%f') # strptime is both slow and has issues with the way milliseconds are written by T-Analyst year, month, day = map(int, date.split('-')) hour, minute, second, millisecond = map( int, t.replace('.', ':').split(':')) t = datetime(year, month, day, hour, minute, second, millisecond * 1000) vid, fn = ts.get_frame_number(t) px, py = calib.to_pixels(x, y) px, py = map(int, (px, py)) if not mask.test(px, py): gt = (vid, fn, t, x, y, i, c, px, py) gts.append(gt) return gts
def main(cmd, dataset, run, conf, make_videos): if make_videos: from visualize_tracking import render_video from config import DatasetConfig from apply_mask import Masker mask = Masker(dataset) dc = DatasetConfig(dataset) config_path = "{rp}{ds}_{rn}/world_tracking_optimization.pklz".format(rp=runs_path, ds=dataset, rn=run) if isfile(config_path): config = load(config_path) else: #raise(ValueError("No world tracking optimized configuration exists at {}".format(config_path))) config = WorldTrackingConfig(default_config) calib = Calibration(dataset) munkres = Munkres() ts = Timestamps(dataset) start_stop = None if cmd == "findvids": from glob import glob vidnames = glob('{dsp}{ds}/videos/*.mkv'.format(dsp=datasets_path, ds=dataset)) vidnames = [right_remove(x.split('/')[-1], '.mkv') for x in vidnames] vidnames.sort() outfolder = '{}{}_{}/tracks_world/'.format(runs_path, dataset, run) mkdir(outfolder) else: vidnames = [cmd] outfolder = './' start_stop = (0,500) for v in vidnames: print_flush(v) out_path = "{of}{v}_tracks.pklz".format(of=outfolder, v=v) print_flush("Loading data...") det_path = "{rp}{ds}_{rn}/detections_world/{v}_world.csv".format(rp=runs_path, ds=dataset, rn=run, v=v) detections3D = pd.read_csv(det_path) klt_path = det_path.replace('.csv', '_klt.pklz') klts = load(klt_path) print_flush("Tracking...") tracks = make_tracks(dataset, v, detections3D, klts, munkres, ts, calib, config, start_stop=start_stop) print_flush("Saving tracks...") save(tracks, out_path) if make_videos: vidpath = "{dsp}{ds}/videos/{v}.mkv".format(dsp=datasets_path, ds=dataset, v=v) print_flush("Rendering video...") render_video(tracks, vidpath, out_path.replace('.pklz','.mp4'), calib=calib, mask=mask, fps=dc.get('video_fps')) print_flush("Done!")
def main(cmd, dataset, run, conf, make_videos): from pathlib import Path if make_videos: from visualize_tracking import render_video from config import DatasetConfig from apply_mask import Masker mask = Masker(dataset) dc = DatasetConfig(dataset) config_path = runs_path / "{}_{}".format(dataset,run) / "world_tracking_optimization.pklz" if config_path.is_file(): config = load(config_path) else: #raise(ValueError("No world tracking optimized configuration exists at {}".format(config_path))) config = WorldTrackingConfig(default_config) calib = Calibration(dataset) munkres = Munkres() ts = Timestamps(dataset) start_stop = None if cmd == "findvids": vidnames = (datasets_path / dataset / "videos").glob('*.mkv') vidnames = [x.stem for x in vidnames] vidnames.sort() outfolder = runs_path / "{}_{}".format(dataset,run) / "tracks_world" mkdir(outfolder) else: vidnames = [cmd] outfolder = Path('./') start_stop = (0,500) for v in vidnames: print_flush(v) out_path = outfolder / (v+'_tracks.pklz') print_flush("Loading data...") det_path = runs_path / "{}_{}".format(dataset,run) / "detections_world" / (v+'_world.csv') detections3D = pd.read_csv(det_path) klt_path = det_path.with_name(det_path.stem + '_klt.pklz') klts = load(klt_path) print_flush("Tracking...") tracks = make_tracks(dataset, v, detections3D, klts, munkres, ts, calib, config, start_stop=start_stop) print_flush("Saving tracks...") save(tracks, out_path) if make_videos: vidpath = datasets_path / dataset / "videos" / (v+'.mkv') print_flush("Rendering video...") render_video(tracks, vidpath, out_path.with_suffix('.mp4'), calib=calib, mask=mask, fps=dc.get('video_fps')) print_flush("Done!")
def detections_video(detections, videopath, outvideopath, classnames, dataset, res, fps=15, conf_thresh=0.75, show_frame_number=True, coords='pixels'): """ Renders a video with the detections drawn on top Arguments: detections -- the detections as a pandas table videopath -- path to input video outvideopath -- path to output video showing the detections classnames -- list of all the classes dataset -- name of the dataset res -- resolution of output video and coordinates in csv file (assumed to be the same). Probably SSD resolution if performed on direct csv files, and probably the video resolution if performed on csv files with world coordinates fps -- frames-per-second of output video conf_thresh -- Detections with confidences below this are not shown in output video. Set to negative to not visualize confidences, or set to 0.0 to show all of them. show_frame_number -- writes the frame number in the top left corner of the video coords -- coordinate system of detections """ masker = Masker(dataset) calib = None if coords == 'world': calib = Calibration(dataset) num_classes = len(classnames)+1 colors = class_colors(num_classes) outwidth = make_divisible(res[0], 16) outheight = make_divisible(res[1], 16) pad_vid = True if (outwidth == res[0]) and (outheight == res[1]): pad_vid = False with io.get_reader(videopath) as vid: with io.get_writer(outvideopath, fps=fps) as outvid: for i,frame in enumerate(vid): frame = masker.mask(frame, alpha=0.5) frame = cv2.resize(frame, (res[0], res[1])) dets = detections[detections['frame_number']==i] if len(dets) > 0: frame = draw(frame, dets, colors, conf_thresh=conf_thresh, coords=coords, calib=calib) if pad_vid: padded = 255*np.ones((outheight, outwidth, 3), dtype=np.uint8) padded[0:res[1], 0:res[0], :] = frame frame = padded if show_frame_number: cv2.putText(frame, 'Frame {}'.format(i), (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 1, cv2.LINE_AA) outvid.append_data(frame) if i%500 == 0: print_flush("Frame {}".format(i))
def main(dataset, run, n_clips, clip_length): dc = DatasetConfig(dataset) rc = RunConfig(dataset, run) mask = Masker(dataset) classes = get_classnames(dataset) num_classes = len(classes) + 1 calib = Calibration(dataset) dataset_path = "{dsp}{ds}/".format(dsp=datasets_path, ds=dataset) run_path = "{rp}{ds}_{r}/".format(rp=runs_path, ds=dataset, r=run) # Grab a bunch of videos vids_query = "{dsp}videos/*.mkv".format(dsp=dataset_path) all_vids = glob(vids_query) all_vids = [right_remove(x.split('/')[-1], '.mkv') for x in all_vids] all_vids.sort() vids = [] if n_clips > len(all_vids): n_clips = len(all_vids) if n_clips == len(all_vids): vids = all_vids else: while len(vids) < n_clips: vid = choice(all_vids) if not vid in vids: vids.append(vid) print_flush(vids) # Find out what has been run on all of these videos, what to include include_klt = True include_pixeldets = True include_worlddets = True include_worldtracks = True klts = [] pixeldets = [] worlddets = [] worldtracks = [] # Point tracks need to be converted for faster access vidres = dc.get('video_resolution') kltres = dc.get('point_track_resolution') class KLTConfig(object): klt_x_factor = 0 klt_y_factor = 0 klt_config = KLTConfig() klt_config.klt_x_factor = vidres[0] / kltres[0] klt_config.klt_y_factor = vidres[1] / kltres[1] ssdres = rc.get('detector_resolution') x_scale = vidres[0] / ssdres[0] y_scale = vidres[1] / ssdres[1] colors = class_colors(num_classes) for vid in vids: f = get_klt_path(dataset_path, vid) if not isfile(f): include_klt = False else: klt = load(f) klt, klt_frames = convert_klt(klt, klt_config) pts = (klt, klt_frames, class_colors(n_cols_klts)) klts.append(pts) f = get_pixeldet_path(run_path, vid) if not isfile(f): include_pixeldets = False else: dets = pd.read_csv(f) pixeldets.append((dets, colors, x_scale, y_scale)) f = get_worlddet_path(run_path, vid) if not isfile(f): include_worlddets = False else: dets = pd.read_csv(f) worlddets.append((dets, colors, calib)) f = get_worldtracks_path(run_path, vid) if not isfile(f): include_worldtracks = False else: tracks = load(f) worldtracks.append((tracks, class_colors(n_cols_tracks), calib)) print_flush("Point tracks: {}".format(include_klt)) print_flush("Pixel coordinate detections: {}".format(include_pixeldets)) print_flush("World coordinate detections: {}".format(include_worlddets)) print_flush("World coordinate tracks: {}".format(include_worldtracks)) # Decide where to start and stop in the videos clip_length = clip_length * dc.get( 'video_fps') # convert from seconds to frames print_flush("Clip length in frames: {}".format(clip_length)) clips = [] for vid in vids: start, stop = make_clip(vid, clip_length, dataset_path) clips.append((start, stop)) incs = [ include_klt, include_pixeldets, include_worlddets, include_worldtracks ] funs = [klt_frame, pixeldet_frame, worlddet_frame, worldtracks_frame] dats = [klts, pixeldets, worlddets, worldtracks] nams = [ "Point tracks", "Detections in pixel coordinates", "Detections in world coordinates", "Tracks in world coordinates" ] print_flush(clips) with iio.get_writer("{trp}summary.mp4".format(trp=run_path), fps=dc.get('video_fps')) as outvid: for i_vid, vid in enumerate(vids): print_flush(vid) old_prog = 0 with iio.get_reader("{dsp}videos/{v}.mkv".format(dsp=dataset_path, v=vid)) as invid: start, stop = clips[i_vid] for i_frame in range(start, stop): frame = invid.get_data(i_frame) pieces = [] for inc, fun, dat, nam in zip(incs, funs, dats, nams): if inc: piece = fun(dat[i_vid], mask.mask(frame.copy(), alpha=0.5), i_frame) draw_text(piece, vid, i_frame, nam) pieces.append(piece) outvid.append_data(join(pieces)) prog = float(i_frame - start) / (stop - start) if prog - old_prog > 0.1: print_flush("{}%".format(round(prog * 100))) old_prog = prog print_flush("Done!")
def visualize_tracks(outvidpath, dataset, gts, tracks=None, stack_axis='v'): import imageio as iio from visualize_tracking import _draw_world, draw_world from visualize import class_colors from apply_mask import Masker from config import DatasetConfig if not (tracks is None): calib = Calibration(dataset) # Reset IDs tracks = sorted(tracks, key=lambda x: x.history[0][0]) for track in tracks: track.id = i i += 1 dc = DatasetConfig(dataset) gts_by_vid = split_lambda(gts, lambda x: x[0]) assert (len(gts_by_vid) == 1) vid = list(gts_by_vid.keys())[0] n_colors = 50 colors = class_colors(n_colors) mask = Masker(dataset) with iio.get_writer(outvidpath, fps=dc.get('video_fps')) as outvid: with iio.get_reader(datasets_path / dataset / "videos" / (vid + '.mkv')) as invid: gt_by_frame = split_lambda(gts, lambda x: x[1]) fns = list(gt_by_frame.keys()) fns.sort() for fn in fns: gts_frame = gt_by_frame[fn] frame = invid.get_data(fn) frame = mask.mask(frame, alpha=0.5) if not (tracks is None): tracks_frame = frame.copy() for gt in gts_frame: vid, fn, t, x, y, i, c, px, py = gt text = "{} {}".format(c, i) col = colors[i % n_colors] frame = _draw_world(frame, text, px, py, col) if not (tracks is None): for track in tracks: draw_world(tracks_frame, track, fn, colors[track.id % n_colors], calib) if stack_axis == 'h': frame = np.hstack((frame, tracks_frame)) elif stack_axis == 'v': frame = np.vstack((frame, tracks_frame)) else: raise (ValueError( "Incorrect stack axis {}, try 'h' or 'v'".format( stack_axis))) outvid.append_data(frame)
def score_tracking(dataset, run, gt, tracking_config, gt_class_name_conversion): munkres = Munkres() ts = Timestamps(dataset) calib = Calibration(dataset) # Each video separately by_vid = split_lambda(gt, lambda x: x[0]) all_costs = {} all_tracks = {} for vid in by_vid: gt_list = by_vid[vid] fn = [x[1] for x in gt_list] start_stop = (min(fn), max(fn)) gt_tracks = split_lambda(gt_list, lambda x: x[5], as_list=True) print_flush(" Loading data...") det_path = runs_path / "{}_{}".format( dataset, run) / "detections_world" / (v + '_world.csv') detections3D = pd.read_csv(det_path) klt_path = det_path.with_name(det_path.stem + '_klt.pklz') klts = load(klt_path) print_flush(" Tracking...") tracks = make_tracks(dataset, vid, detections3D, klts, munkres, ts, calib, tracking_config) all_tracks[vid] = tracks # Associate each track with a ground truth one, based on cost. # Then compute total cost and use as score measure print_flush(" Associating tracks with ground truth...") mat = [] for igt, gt_track in enumerate(gt_tracks): mat.append([]) for it, track in enumerate(tracks): cost = compute_cost(gt_track, track, gt_class_name_conversion, ts, vid) mat[igt].append(cost) try: indices = munkres.compute(mat) except UnsolvableMatrix: cost_sum = float("inf") else: print_flush(" Computing cost...") cost_sum = 0 for igt, it in indices: cost_sum += mat[igt][it] all_costs[vid] = cost_sum if len(by_vid) == 1: vid = list(by_vid.keys())[0] return all_costs[vid], all_tracks[vid] else: raise (ValueError( "Computing scores for multiple videos, while supported in this function, is not generally supported in this module. Remove this exception from the code if you want to add multi-video scoring support. But then you need to take care of the output as dictionaries" )) return all_costs, all_tracks
def main(dataset, run, videos): # Note: This main function only works for world coordinate tracks! calib = Calibration(dataset) dc = DatasetConfig(dataset) masker = Masker(dataset) if videos == 'all': from glob import glob files = glob('{rp}{ds}_{r}/tracks_world/*_tracks.pklz'.format( rp=runs_path, ds=dataset, r=run)) video_names = [ right_remove(x.split('/')[-1], '_tracks.pklz') for x in files ] elif videos.startswith('random:'): num = int(left_remove(videos, 'random:')) from glob import glob files = glob('{rp}{ds}_{r}/tracks_world/*_tracks.pklz'.format( rp=runs_path, ds=dataset, r=run)) all_video_names = [ right_remove(x.split('/')[-1], '_tracks.pklz') for x in files ] video_names = [] while len(video_names) < num: video_name = choice(all_video_names) if not video_name in video_names: video_names.append(video_name) # Just in case user wants more videos than there are if len(video_names) == len(all_video_names): break else: # Assumes the user types one or more videos, separated by commas with no spaces video_names = videos.split(',') # In case user includes endings video_names = [right_remove(x.rstrip, '.mkv') for x in video_names] # In case user includes spaces video_names = [x.strip(' ') for x in video_names] print_flush("Chosen videos: ") print_flush(str(video_names)) for video_name in video_names: print_flush(video_name) print_flush("Loading...") tracks = load('{rp}{ds}_{r}/tracks_world/{v}_tracks.pklz'.format( rp=runs_path, ds=dataset, r=run, v=video_name)) vidpath = "{dsp}{ds}/videos/{v}.mkv".format(dsp=datasets_path, ds=dataset, v=video_name) if not isfile(vidpath): raise (ValueError("Incorrect input {}".format(videos))) outvidpath = '{rp}{ds}_{r}/tracks_world/{v}_tracks.mp4'.format( rp=runs_path, ds=dataset, r=run, v=video_name) print_flush("Rendering...") render_video(tracks, vidpath, outvidpath, mask=masker, id_mode="global", calib=calib, fps=dc.get('video_fps')) print_flush("Done!")
def main(cmd, dataset, run, vidres, ssdres, kltres, make_videos): vidres = parse_resolution(vidres) ssdres = parse_resolution(ssdres) kltres = parse_resolution(kltres) x_factor = float(vidres[0]) / ssdres[0] y_factor = float(vidres[1]) / ssdres[1] det_dims = ('xmin', 'xmax', 'ymin', 'ymax') det_factors = (x_factor, x_factor, y_factor, y_factor) calib = Calibration(dataset) ts = Timestamps(dataset) class_data = get_class_data(dataset) class_heights = {d['name']: d['height'] for d in class_data} class KLTConfig(object): klt_x_factor = 0 klt_y_factor = 0 klt_config = KLTConfig() klt_config.klt_x_factor = vidres[0] / kltres[0] klt_config.klt_y_factor = vidres[1] / kltres[1] if cmd == "findvids": vidnames = list((datasets_path / dataset / "videos").glob('*.mkv')) vidnames = [x.stem for x in vidnames] vidnames.sort() outfolder = runs_path / '{}_{}'.format(dataset, run) / 'detections_world' mkdir(outfolder) else: vidnames = [cmd] outfolder = Path('.') mkdir(outfolder) if make_videos: classnames = get_classnames(dataset) dc = DatasetConfig(dataset) fps = dc.get('video_fps') for v in vidnames: print_flush(v) detections = pd.read_csv(runs_path / '{}_{}'.format(dataset, run) / 'csv' / (v + '.csv')) # Convert pixel coordinate positions from SSD resolution to video resolution # because Calibration assumes video resolution coordinates for dim, factor in zip(det_dims, det_factors): detections[dim] = round(detections[dim] * factor).astype(int) print_flush("Converting point tracks...") klt = load(datasets_path / dataset / 'klt' / (v + '.pklz')) klt, klt_frames = convert_klt(klt, klt_config) pts = PointTrackStructure(klt, klt_frames, vidres[0], vidres[1]) outpath = outfolder / '{v}_world.csv'.format(v=v) print_flush("Converting to world coordinates...") detections3D = detections_to_3D( detections, pts, calib, ts, v, klt_save_path=outpath.with_name(outpath.stem + '_klt.pklz'), class_heights=class_heights) detections3D.to_csv(outpath, float_format='%.4f') if make_videos: from visualize_detections import detections_video vidpath = datasets_path / dataset / "videos" / "{}.mkv".format(v) print_flush("Rendering video...") detections_video(detections3D, vidpath, outpath.with_suffix('.mp4'), classnames, dataset, vidres, fps=fps, conf_thresh=0.0, coords='world') print_flush("Done!")
def main(cmd, dataset, run, vidres, ssdres, kltres, make_videos): vidres = parse_resolution(vidres) ssdres = parse_resolution(ssdres) kltres = parse_resolution(kltres) x_factor = float(vidres[0]) / ssdres[0] y_factor = float(vidres[1]) / ssdres[1] det_dims = ('xmin', 'xmax', 'ymin', 'ymax') det_factors = (x_factor, x_factor, y_factor, y_factor) calib = Calibration(dataset) ts = Timestamps(dataset) class_data = get_class_data(dataset) class_heights = {d['name']: d['height'] for d in class_data} class KLTConfig(object): klt_x_factor = 0 klt_y_factor = 0 klt_config = KLTConfig() klt_config.klt_x_factor = vidres[0] / kltres[0] klt_config.klt_y_factor = vidres[1] / kltres[1] if cmd == "findvids": from glob import glob vidnames = glob('{dsp}{ds}/videos/*.mkv'.format(dsp=datasets_path, ds=dataset)) vidnames = [right_remove(x.split('/')[-1], '.mkv') for x in vidnames] vidnames.sort() outfolder = '{}{}_{}/detections_world/'.format(runs_path, dataset, run) mkdir(outfolder) else: vidnames = [cmd] outfolder = './' mkdir(outfolder) if make_videos: classnames = get_classnames(dataset) dc = DatasetConfig(dataset) fps = dc.get('video_fps') for v in vidnames: print_flush(v) detections = pd.read_csv('{}{}_{}/csv/{}.csv'.format( runs_path, dataset, run, v)) # Convert pixel coordinate positions from SSD resolution to video resolution # because Calibration assumes video resolution coordinates for dim, factor in zip(det_dims, det_factors): detections[dim] = round(detections[dim] * factor).astype(int) print_flush("Converting point tracks...") klt = load('{}{}/klt/{}.pklz'.format(datasets_path, dataset, v)) klt, klt_frames = convert_klt(klt, klt_config) pts = PointTrackStructure(klt, klt_frames, vidres[0], vidres[1]) outpath = '{of}{v}_world.csv'.format(of=outfolder, v=v) print_flush("Converting to world coordinates...") detections3D = detections_to_3D(detections, pts, calib, ts, v, klt_save_path=outpath.replace( '.csv', '_klt.pklz'), class_heights=class_heights) detections3D.to_csv(outpath, float_format='%.4f') if make_videos: from visualize_detections import detections_video vidpath = "{dsp}{ds}/videos/{v}.mkv".format(dsp=datasets_path, ds=dataset, v=v) print_flush("Rendering video...") detections_video(detections3D, vidpath, outpath.replace('.csv', '.mp4'), classnames, dataset, vidres, fps=fps, conf_thresh=0.0, coords='world') print_flush("Done!")