def parse(basepath, dataset, resolution): """ Parses a dataset for data frames CSV files and draws the detections. This is used for showing object detector results on the validation set used during training. Arguments: basepath -- path to folder with CSV files dataset -- name of the dataset used, used for finding the correct mask """ colors = class_colors() masker = Masker(dataset) csvpath = basepath + 'detections_0.csv' res = pd.read_csv(csvpath) outpath = basepath + 'visualize/' if not os.path.exists(outpath): os.makedirs(outpath) else: old_files = glob(outpath + '*') for old in old_files: os.remove(old) files = res['filename'].unique() for i, filename in enumerate(files): df = res.loc[res['filename'] == filename] impath = df['filename'].iloc[0] im = cv2.imread(impath) im = cv2.resize(im, (resolution[0], resolution[1])) im = masker.mask(im) im = draw(im, df, colors) outfilepath = "{}{}".format( outpath, '{}_{}'.format(1 + i, filename.split('/')[-1])) cv2.imwrite(outfilepath, im) print(outfilepath)
def detections_video(detections, videopath, outvideopath, classnames, dataset, res, fps=15, conf_thresh=0.75, show_frame_number=True, coords='pixels'): """ Renders a video with the detections drawn on top Arguments: detections -- the detections as a pandas table videopath -- path to input video outvideopath -- path to output video showing the detections classnames -- list of all the classes dataset -- name of the dataset res -- resolution of output video and coordinates in csv file (assumed to be the same). Probably SSD resolution if performed on direct csv files, and probably the video resolution if performed on csv files with world coordinates fps -- frames-per-second of output video conf_thresh -- Detections with confidences below this are not shown in output video. Set to negative to not visualize confidences, or set to 0.0 to show all of them. show_frame_number -- writes the frame number in the top left corner of the video coords -- coordinate system of detections """ masker = Masker(dataset) calib = None if coords == 'world': calib = Calibration(dataset) num_classes = len(classnames)+1 colors = class_colors(num_classes) outwidth = make_divisible(res[0], 16) outheight = make_divisible(res[1], 16) pad_vid = True if (outwidth == res[0]) and (outheight == res[1]): pad_vid = False with io.get_reader(videopath) as vid: with io.get_writer(outvideopath, fps=fps) as outvid: for i,frame in enumerate(vid): frame = masker.mask(frame, alpha=0.5) frame = cv2.resize(frame, (res[0], res[1])) dets = detections[detections['frame_number']==i] if len(dets) > 0: frame = draw(frame, dets, colors, conf_thresh=conf_thresh, coords=coords, calib=calib) if pad_vid: padded = 255*np.ones((outheight, outwidth, 3), dtype=np.uint8) padded[0:res[1], 0:res[0], :] = frame frame = padded if show_frame_number: cv2.putText(frame, 'Frame {}'.format(i), (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 1, cv2.LINE_AA) outvid.append_data(frame) if i%500 == 0: print_flush("Frame {}".format(i))
def generate(self, train=True, do_shuffle=True): inputs = [] targets = [] while True: if train: if do_shuffle: shuffle(self.train_keys) keys = self.train_keys else: if do_shuffle: shuffle(self.val_keys) keys = self.val_keys for key in keys: img_path = self.path_prefix + key # To know the correct mask, we need the dataset name dataset = img_path.split('/')[3] if dataset in self.maskers: masker = self.maskers[dataset] else: masker = Masker(dataset) self.maskers[dataset] = masker img = masker.mask(imread(img_path)).astype('float32') y = np.vstack(self.gt.loc[key].y_true) if train and self.do_crop: img, y = self.random_sized_crop(img, y) img = imresize(img, self.image_size).astype('float32') if train: shuffle(self.color_jitter) for jitter in self.color_jitter: img = jitter(img) if self.lighting_std: img = self.lighting(img) if self.hflip_prob > 0: img, y = self.horizontal_flip(img, y) if self.vflip_prob > 0: img, y = self.vertical_flip(img, y) y = self.bbox_util.assign_boxes(y) inputs.append(img) targets.append(y) if len(targets) == self.batch_size: tmp_inp = np.array(inputs) tmp_targets = np.array(targets) inputs = [] targets = [] yield preprocess_input(tmp_inp), tmp_targets
def main(cmd, dataset, imsize, visualize): imsize = parse_resolution(imsize) mask = Masker(dataset) if cmd == "findvids" or cmd == "continue": vidfolder = datasets_path / dataset / "videos" kltfolder = datasets_path / dataset / "klt" mkdir(kltfolder) allvids = list(vidfolder.glob('*.mkv')) allvids.sort() if cmd == "continue": existing = list(kltfolder.glob('*.pklz')) existing.sort() existing = [x.stem for x in existing] allvids = [x for x in allvids if not x.stem in existing] for vidpath in allvids: datpath = kltfolder / (vidpath.stem + '.pklz') if visualize: outvidpath = datpath.with_name(datpath.stem + '_klt.mp4') print_flush("{} -> {} & {}".format(vidpath, datpath, outvidpath)) else: outvidpath = None print_flush("{} -> {}".format(vidpath, datpath)) klt_save(vidpath, datpath, imsize, mask, outvidpath) print_flush("Done!") else: raise (ValueError())
def main(cmd, dataset, run, conf, make_videos): if make_videos: from visualize_tracking import render_video from config import DatasetConfig from apply_mask import Masker mask = Masker(dataset) dc = DatasetConfig(dataset) config_path = "{rp}{ds}_{rn}/world_tracking_optimization.pklz".format(rp=runs_path, ds=dataset, rn=run) if isfile(config_path): config = load(config_path) else: #raise(ValueError("No world tracking optimized configuration exists at {}".format(config_path))) config = WorldTrackingConfig(default_config) calib = Calibration(dataset) munkres = Munkres() ts = Timestamps(dataset) start_stop = None if cmd == "findvids": from glob import glob vidnames = glob('{dsp}{ds}/videos/*.mkv'.format(dsp=datasets_path, ds=dataset)) vidnames = [right_remove(x.split('/')[-1], '.mkv') for x in vidnames] vidnames.sort() outfolder = '{}{}_{}/tracks_world/'.format(runs_path, dataset, run) mkdir(outfolder) else: vidnames = [cmd] outfolder = './' start_stop = (0,500) for v in vidnames: print_flush(v) out_path = "{of}{v}_tracks.pklz".format(of=outfolder, v=v) print_flush("Loading data...") det_path = "{rp}{ds}_{rn}/detections_world/{v}_world.csv".format(rp=runs_path, ds=dataset, rn=run, v=v) detections3D = pd.read_csv(det_path) klt_path = det_path.replace('.csv', '_klt.pklz') klts = load(klt_path) print_flush("Tracking...") tracks = make_tracks(dataset, v, detections3D, klts, munkres, ts, calib, config, start_stop=start_stop) print_flush("Saving tracks...") save(tracks, out_path) if make_videos: vidpath = "{dsp}{ds}/videos/{v}.mkv".format(dsp=datasets_path, ds=dataset, v=v) print_flush("Rendering video...") render_video(tracks, vidpath, out_path.replace('.pklz','.mp4'), calib=calib, mask=mask, fps=dc.get('video_fps')) print_flush("Done!")
def main(cmd, dataset, run, conf, make_videos): from pathlib import Path if make_videos: from visualize_tracking import render_video from config import DatasetConfig from apply_mask import Masker mask = Masker(dataset) dc = DatasetConfig(dataset) config_path = runs_path / "{}_{}".format(dataset,run) / "world_tracking_optimization.pklz" if config_path.is_file(): config = load(config_path) else: #raise(ValueError("No world tracking optimized configuration exists at {}".format(config_path))) config = WorldTrackingConfig(default_config) calib = Calibration(dataset) munkres = Munkres() ts = Timestamps(dataset) start_stop = None if cmd == "findvids": vidnames = (datasets_path / dataset / "videos").glob('*.mkv') vidnames = [x.stem for x in vidnames] vidnames.sort() outfolder = runs_path / "{}_{}".format(dataset,run) / "tracks_world" mkdir(outfolder) else: vidnames = [cmd] outfolder = Path('./') start_stop = (0,500) for v in vidnames: print_flush(v) out_path = outfolder / (v+'_tracks.pklz') print_flush("Loading data...") det_path = runs_path / "{}_{}".format(dataset,run) / "detections_world" / (v+'_world.csv') detections3D = pd.read_csv(det_path) klt_path = det_path.with_name(det_path.stem + '_klt.pklz') klts = load(klt_path) print_flush("Tracking...") tracks = make_tracks(dataset, v, detections3D, klts, munkres, ts, calib, config, start_stop=start_stop) print_flush("Saving tracks...") save(tracks, out_path) if make_videos: vidpath = datasets_path / dataset / "videos" / (v+'.mkv') print_flush("Rendering video...") render_video(tracks, vidpath, out_path.with_suffix('.mp4'), calib=calib, mask=mask, fps=dc.get('video_fps')) print_flush("Done!")
def slideshow(dataset, outpath, fps=10, repeat=20): ld = LoadDetections() dets = ld.custom(dataset) imfiles = list(set(dets.image_file)) if not imfiles: return False cc = class_colors() mask = Masker(dataset) classnames = get_classnames(dataset) with io.get_writer(outpath, fps=fps) as vid: for imfile in imfiles: d = dets[dets.image_file == imfile] # Add "class_name" and "class_index" columns which are missing d = d.rename(index=str, columns={"type": "class_name"}) indices = [1 + classnames.index(x) for x in d['class_name']] d['class_index'] = indices im = io.imread(imfile) im = mask.mask(im, alpha=0.5) width = float(im.shape[1]) height = float(im.shape[0]) frame = draw(im, d, cc, conf_thresh=-1.0, x_scale=width, y_scale=height) for i in range(repeat): vid.append_data(frame) return True
def main(cmd, dataset, imsize, visualize): imsize = parse_resolution(imsize) mask = Masker(dataset) if cmd == "findvids" or cmd == "continue": vidfolder = "{}{}/videos/".format(datasets_path, dataset) kltfolder = "{}{}/klt/".format(datasets_path, dataset) mkdir(kltfolder) allvids = sorted(glob(vidfolder + "*.mkv")) if cmd == "continue": existing = sorted(glob(kltfolder + "*.pklz")) existing = [ right_remove(x.split('/')[-1], '.pklz') for x in existing ] allvids = [ x for x in allvids if not right_remove(x.split('/')[-1], '.mkv') in existing ] for vidpath in allvids: datpath = kltfolder + vidpath.split('/')[-1].replace( '.mkv', '.pklz') if visualize: outvidpath = datpath.replace('.pklz', '_klt.mp4') print_flush("{} -> {} & {}".format(vidpath, datpath, outvidpath)) else: outvidpath = None print_flush("{} -> {}".format(vidpath, datpath)) klt_save(vidpath, datpath, imsize, mask, outvidpath) print_flush("Done!") else: raise (ValueError())
def test_on_video(model, name, experiment, videopath, outvideopath, classnames, batch_size=32, input_shape=(480, 640, 3), soft=False, width=480, height=640, conf_thresh=0.75, csv_conf_thresh=0.75): """ Applies a trained SSD model to a video Arguments: model -- the SSD model, e.g. from get_model name -- name of dataset experiment -- name of training run videopath -- path to input video outvideopath -- path to output video showing the detections classnames -- list of all the classes batch_size -- number of images processed in parallell, lower this if you get out-of-memory errors input_shape -- size of images fed to SSD soft -- Whether to do soft NMS or normal NMS width -- Width to scale detections with (can be set to 1 if detections are already on right scale) height -- Height to scale detections with (can be set to 1 if detections are already on right scale) conf_thresh -- Detections with confidences below this are not shown in output video. Set to negative to not visualize confidences. csv_conf_thresh -- Detections with confidences below this are ignored. This should be same as conf_thresh unless conf_thresh is negative. """ masker = Masker(name) num_classes = len(classnames) + 1 colors = class_colors(num_classes) make_vid = True suffix = outvideopath.split('.')[-1] if suffix == 'csv': make_vid = False csvpath = outvideopath else: csvpath = outvideopath.replace('.{}'.format(suffix), '.csv') print_flush('Generating priors') im_in = np.random.random( (1, input_shape[1], input_shape[0], input_shape[2])) priors = model.predict(im_in, batch_size=1)[0, :, -8:] bbox_util = BBoxUtility(num_classes, priors) vid = io.get_reader(videopath) if make_vid: outvid = io.get_writer(outvideopath, fps=30) inputs = [] frames = [] all_detections = [] for i, frame in enumerate(vid): frame = masker.mask(frame) resized = cv2.resize(frame, (input_shape[0], input_shape[1])) frames.append(frame.copy()) inputs.append(resized) if len(inputs) == batch_size: inputs = np.array(inputs).astype(np.float64) inputs = preprocess_input(inputs) preds = model.predict(inputs, batch_size=batch_size, verbose=0) results = bbox_util.detection_out(preds, soft=soft) for result, frame, frame_number in zip(results, frames, range(i - batch_size, i)): result = [ r if len(r) > 0 else np.zeros((1, 6)) for r in result ] raw_detections = pd.DataFrame(np.vstack(result), columns=[ 'class_index', 'confidence', 'xmin', 'ymin', 'xmax', 'ymax' ]) rescale(raw_detections, 'xmin', width) rescale(raw_detections, 'xmax', width) rescale(raw_detections, 'ymin', height) rescale(raw_detections, 'ymax', height) rescale(raw_detections, 'class_index', 1) ci = raw_detections['class_index'] cn = [classnames[int(x) - 1] for x in ci] raw_detections['class_name'] = cn raw_detections['frame_number'] = (frame_number + 2) all_detections.append(raw_detections[ raw_detections.confidence > csv_conf_thresh]) if make_vid: frame = draw(frame, raw_detections, colors, conf_thresh=conf_thresh) outvid.append_data(frame) frames = [] inputs = [] if i % (10 * batch_size) == 0: print_flush(i) detections = pd.concat(all_detections) detections.to_csv(csvpath)
def main(dataset, run, n_clips, clip_length): dc = DatasetConfig(dataset) rc = RunConfig(dataset, run) mask = Masker(dataset) classes = get_classnames(dataset) num_classes = len(classes) + 1 calib = Calibration(dataset) dataset_path = "{dsp}{ds}/".format(dsp=datasets_path, ds=dataset) run_path = "{rp}{ds}_{r}/".format(rp=runs_path, ds=dataset, r=run) # Grab a bunch of videos vids_query = "{dsp}videos/*.mkv".format(dsp=dataset_path) all_vids = glob(vids_query) all_vids = [right_remove(x.split('/')[-1], '.mkv') for x in all_vids] all_vids.sort() vids = [] if n_clips > len(all_vids): n_clips = len(all_vids) if n_clips == len(all_vids): vids = all_vids else: while len(vids) < n_clips: vid = choice(all_vids) if not vid in vids: vids.append(vid) print_flush(vids) # Find out what has been run on all of these videos, what to include include_klt = True include_pixeldets = True include_worlddets = True include_worldtracks = True klts = [] pixeldets = [] worlddets = [] worldtracks = [] # Point tracks need to be converted for faster access vidres = dc.get('video_resolution') kltres = dc.get('point_track_resolution') class KLTConfig(object): klt_x_factor = 0 klt_y_factor = 0 klt_config = KLTConfig() klt_config.klt_x_factor = vidres[0] / kltres[0] klt_config.klt_y_factor = vidres[1] / kltres[1] ssdres = rc.get('detector_resolution') x_scale = vidres[0] / ssdres[0] y_scale = vidres[1] / ssdres[1] colors = class_colors(num_classes) for vid in vids: f = get_klt_path(dataset_path, vid) if not isfile(f): include_klt = False else: klt = load(f) klt, klt_frames = convert_klt(klt, klt_config) pts = (klt, klt_frames, class_colors(n_cols_klts)) klts.append(pts) f = get_pixeldet_path(run_path, vid) if not isfile(f): include_pixeldets = False else: dets = pd.read_csv(f) pixeldets.append((dets, colors, x_scale, y_scale)) f = get_worlddet_path(run_path, vid) if not isfile(f): include_worlddets = False else: dets = pd.read_csv(f) worlddets.append((dets, colors, calib)) f = get_worldtracks_path(run_path, vid) if not isfile(f): include_worldtracks = False else: tracks = load(f) worldtracks.append((tracks, class_colors(n_cols_tracks), calib)) print_flush("Point tracks: {}".format(include_klt)) print_flush("Pixel coordinate detections: {}".format(include_pixeldets)) print_flush("World coordinate detections: {}".format(include_worlddets)) print_flush("World coordinate tracks: {}".format(include_worldtracks)) # Decide where to start and stop in the videos clip_length = clip_length * dc.get( 'video_fps') # convert from seconds to frames print_flush("Clip length in frames: {}".format(clip_length)) clips = [] for vid in vids: start, stop = make_clip(vid, clip_length, dataset_path) clips.append((start, stop)) incs = [ include_klt, include_pixeldets, include_worlddets, include_worldtracks ] funs = [klt_frame, pixeldet_frame, worlddet_frame, worldtracks_frame] dats = [klts, pixeldets, worlddets, worldtracks] nams = [ "Point tracks", "Detections in pixel coordinates", "Detections in world coordinates", "Tracks in world coordinates" ] print_flush(clips) with iio.get_writer("{trp}summary.mp4".format(trp=run_path), fps=dc.get('video_fps')) as outvid: for i_vid, vid in enumerate(vids): print_flush(vid) old_prog = 0 with iio.get_reader("{dsp}videos/{v}.mkv".format(dsp=dataset_path, v=vid)) as invid: start, stop = clips[i_vid] for i_frame in range(start, stop): frame = invid.get_data(i_frame) pieces = [] for inc, fun, dat, nam in zip(incs, funs, dats, nams): if inc: piece = fun(dat[i_vid], mask.mask(frame.copy(), alpha=0.5), i_frame) draw_text(piece, vid, i_frame, nam) pieces.append(piece) outvid.append_data(join(pieces)) prog = float(i_frame - start) / (stop - start) if prog - old_prog > 0.1: print_flush("{}%".format(round(prog * 100))) old_prog = prog print_flush("Done!")
def autoannotate(dataset, import_datasets, input_shape, image_shape, batch_size, batch_size2, epochs, frozen_layers): soft = False classes = get_classnames(dataset) input_shape = parse_resolution(input_shape) image_shape = parse_resolution(image_shape) model, bbox_util = train(dataset, import_datasets, input_shape, batch_size, epochs, frozen_layers, train_amount=1.0) print_flush("Auto-annotating...") masker = Masker(dataset) inputs = [] impaths = [] to_annotate = get_images_to_autoannotate(dataset) # rep_last needed since we use large batches, for speed, to make sure we run on all images for impath in rep_last(to_annotate, batch_size2): im = iio.imread(impath) im = masker.mask(im) resized = cv2.resize(im, (input_shape[0], input_shape[1])) inputs.append(resized) impaths.append(impath) if len(inputs) == batch_size2: inputs = np.array(inputs).astype(np.float64) inputs = preprocess_input(inputs) preds = model.predict(inputs, batch_size=batch_size2, verbose=0) results = bbox_util.detection_out(preds, soft=soft) for result, res_path in zip(results, impaths): result = [ r if len(r) > 0 else np.zeros((1, 6)) for r in result ] raw_detections = pd.DataFrame(np.vstack(result), columns=[ 'class_index', 'confidence', 'xmin', 'ymin', 'xmax', 'ymax' ]) auto_path = res_path.with_suffix('.auto') # Sort detections by confidence, keeping the top ones # This seems to be more robust than a hard-coded confidence threshold # Note that a confidence threshold can be chosen in the annotation web UI n = 128 dets = [x for x in pandas_loop(raw_detections)] dets.sort(key=lambda x: 1.0 - x['confidence']) if len(dets) > n: dets = dets[:n] with auto_path.open('w') as f: for det in dets: conf = round(det['confidence'], 4) line = "{index} {cx} {cy} {w} {h} conf:{conf} {cn}\n".format( index=int(det['class_index']), cx=round((det['xmin'] + det['xmax']) / 2, 4), cy=round((det['ymin'] + det['ymax']) / 2, 4), w=round(det['xmax'] - det['xmin'], 4), h=round(det['ymax'] - det['ymin'], 4), conf=conf, cn=classes[int(det['class_index']) - 1]) f.write(line) print_flush("Wrote {}".format(auto_path)) inputs = [] impaths = [] assert (not inputs) # If this fails, not all images were processed! print_flush("Done!")
def main(dataset, run, videos): # Note: This main function only works for world coordinate tracks! calib = Calibration(dataset) dc = DatasetConfig(dataset) masker = Masker(dataset) if videos == 'all': from glob import glob files = glob('{rp}{ds}_{r}/tracks_world/*_tracks.pklz'.format( rp=runs_path, ds=dataset, r=run)) video_names = [ right_remove(x.split('/')[-1], '_tracks.pklz') for x in files ] elif videos.startswith('random:'): num = int(left_remove(videos, 'random:')) from glob import glob files = glob('{rp}{ds}_{r}/tracks_world/*_tracks.pklz'.format( rp=runs_path, ds=dataset, r=run)) all_video_names = [ right_remove(x.split('/')[-1], '_tracks.pklz') for x in files ] video_names = [] while len(video_names) < num: video_name = choice(all_video_names) if not video_name in video_names: video_names.append(video_name) # Just in case user wants more videos than there are if len(video_names) == len(all_video_names): break else: # Assumes the user types one or more videos, separated by commas with no spaces video_names = videos.split(',') # In case user includes endings video_names = [right_remove(x.rstrip, '.mkv') for x in video_names] # In case user includes spaces video_names = [x.strip(' ') for x in video_names] print_flush("Chosen videos: ") print_flush(str(video_names)) for video_name in video_names: print_flush(video_name) print_flush("Loading...") tracks = load('{rp}{ds}_{r}/tracks_world/{v}_tracks.pklz'.format( rp=runs_path, ds=dataset, r=run, v=video_name)) vidpath = "{dsp}{ds}/videos/{v}.mkv".format(dsp=datasets_path, ds=dataset, v=video_name) if not isfile(vidpath): raise (ValueError("Incorrect input {}".format(videos))) outvidpath = '{rp}{ds}_{r}/tracks_world/{v}_tracks.mp4'.format( rp=runs_path, ds=dataset, r=run, v=video_name) print_flush("Rendering...") render_video(tracks, vidpath, outvidpath, mask=masker, id_mode="global", calib=calib, fps=dc.get('video_fps')) print_flush("Done!")
def main(cmd, dataset, run, vidres, ssdres, kltres, conf, make_videos): from storage import load, save from folder import datasets_path, runs_path from pathlib import Path from folder import mkdir mask = Masker(dataset) if cmd == "findvids": vidnames = (datasets_path / dataset / "videos").glob('*.mkv') vidnames = [x.stem for x in vidnames] vidnames.sort() outfolder = runs_path / '{}_{}'.format(dataset, run) / 'tracks' else: vidnames = [cmd] outfolder = Path('./') vidres = parse_resolution(vidres) ssdres = parse_resolution(ssdres) kltres = parse_resolution(kltres) x_factor = float(vidres[0]) / ssdres[0] y_factor = float(vidres[1]) / ssdres[1] det_dims = ('xmin', 'xmax', 'ymin', 'ymax') det_factors = (x_factor, x_factor, y_factor, y_factor) c = Config(vidres, kltres, conf) mkdir(outfolder) for v in vidnames: det_path = runs_path / "{}_{}".format(dataset, run) / "csv" / (v + '.csv') detections = pd.read_csv(det_path) for dim, factor in zip(det_dims, det_factors): detections[dim] = round(detections[dim] * factor).astype(int) klt = load(datasets_path / dataset / "klt" / (v + '.pklz')) klt, klt_frames = convert_klt(klt, c) tracks = [] if len(detections) > 0: tracks = build_tracks(detections, klt, klt_frames, c) print_flush("{} tracks done".format(v)) save(tracks, outfolder / '{}_tracks.pklz'.format(v)) else: print_flush( "{} skipping tracking, because there were no detections". format(v)) if make_videos: if tracks: from visualize_tracking import render_video vidpath = datasets_path / dataset / "videos" / (v + '.mkv') render_video(tracks, vidpath, outfolder / (v + "_tracks.mp4"), mask=mask) print_flush("{} video done".format(v)) else: print_flush( "{} skipping video rendering, because there were no tracks" .format(v)) print_flush("Done!")
def rare_class_mining(dataset, class_name, time_dist, sampling_rate, import_datasets, input_shape, image_shape, batch_size, batch_size2, epochs, frozen_layers, confidence): soft = False classes = get_classnames(dataset) ts = Timestamps(dataset) # Find all videos in dataset vidnames = list((datasets_path / dataset / "videos").glob('*.mkv')) all_found = [] for v in vidnames: # Find video length from log file (computing this from the video file is too slow) log_file = (datasets_path / dataset / "logs" / v.with_suffix('.log').name).read_text().split('\n') last = -1 while not log_file[last]: last -= 1 last_line = log_file[last] v_len = int(last_line.split(' ')[0]) print_flush("{} of length {}".format(v, v_len)) # Find existing annotations frames_log = (datasets_path / dataset / "objects" / "train" / v.stem / "frames.log").read_text().split() frames_log = [ x for x in frames_log[1:] if x ] # Remove first line, which is video name, and any empty lines annotated = [int(x) for x in frames_log] print_flush("Avoiding the following existing frames: ") print_flush(str(annotated)) curr_time = ts.get(v.stem, 0) annotated_times = [ts.get(v.stem, x) for x in annotated] found = [] found_times = [] done = False while not done: # Sample in time curr_time += timedelta(seconds=sampling_rate) curr_frame = ts.get_frame_number_given_vidname(curr_time, v.stem) if curr_frame >= v_len: # We have reached the end of the video done = True continue if curr_frame in annotated: continue # Check if we are too close to existing annotations dists = [ abs((curr_time - x).total_seconds()) for x in annotated_times ] if any([(x <= time_dist) for x in dists]): continue # Check if we are too close to any previously chosen interesting frames dists = [abs((curr_time - x).total_seconds()) for x in found_times] if any([(x <= time_dist) for x in dists]): continue # This is a frame we could work with found.append(curr_frame) found_times.append(curr_time) all_found.append((v, found)) print_flush("Candidate frames:") found_some = False for f in all_found: v, l = f print("{} : {}".format(v, l)) if l: found_some = True if not found_some: print_flush("Found no interesting frames. Quitting...") import sys sys.exit(1) print_flush( "Starting to train object detector with existing annotations...") input_shape = parse_resolution(input_shape) image_shape = parse_resolution(image_shape) model, bbox_util = train(dataset, import_datasets, input_shape, batch_size, epochs, frozen_layers, train_amount=1.0) print_flush( "Applying the model to the images to find objects of type '{}'".format( class_name)) masker = Masker(dataset) inputs = [] frame_nums = [] im_origs = [] vids = [] found_data = [] for f in all_found: v, l = f with iio.get_reader(v) as vid: for frame_number in l: im_orig = vid.get_data(frame_number) im = im_orig.copy() im = masker.mask(im) resized = cv2.resize(im, (input_shape[0], input_shape[1])) inputs.append(resized) frame_nums.append(frame_number) im_origs.append(im_orig) vids.append(v) if len(inputs) == batch_size2: tmp = process(inputs, frame_nums, im_origs, vids, confidence, class_name, soft, batch_size2, model, bbox_util, classes) found_data.extend(tmp) inputs = [] frame_nums = [] im_origs = [] vids = [] if inputs: # There are still some leftovers tmp = process(inputs, frame_nums, im_origs, vids, confidence, class_name, soft, len(inputs), model, bbox_util, classes) found_data.extend(tmp) print_flush("Writing images...") for x in found_data: v, f, im = x im_folder = datasets_path / dataset / "objects" / "train" / v.stem im_num = max([int(x.stem) for x in im_folder.glob('*.jpg')]) + 1 im_path = im_folder / "{}.jpg".format(im_num) iio.imwrite(im_path, im) print_flush("Written {}".format(im_path)) # Add the new frame numbers to frames.log for this video flog = im_folder / "frames.log" with flog.open('a') as log: log.write(str(f) + ' ') print_flush("Done!")
def main(cmd, dataset, run, vidres, ssdres, kltres, conf, make_videos): from storage import load, save from folder import datasets_path, runs_path mask = Masker(dataset) #v = '20170516_163607_4C86' #v = '20170516_121024_A586' if cmd == "findvids": from glob import glob vidnames = glob('{}{}/videos/*.mkv'.format(datasets_path, dataset)) vidnames = [right_remove(x.split('/')[-1], '.mkv') for x in vidnames] vidnames.sort() outfolder = '{}{}_{}/tracks/'.format(runs_path, dataset, run) else: vidnames = [cmd] outfolder = './' vidres = parse_resolution(vidres) ssdres = parse_resolution(ssdres) kltres = parse_resolution(kltres) x_factor = float(vidres[0]) / ssdres[0] y_factor = float(vidres[1]) / ssdres[1] det_dims = ('xmin', 'xmax', 'ymin', 'ymax') det_factors = (x_factor, x_factor, y_factor, y_factor) c = Config(vidres, kltres, conf) from folder import mkdir mkdir(outfolder) for v in vidnames: detections = pd.read_csv('{}{}_{}/csv/{}.csv'.format( runs_path, dataset, run, v)) for dim, factor in zip(det_dims, det_factors): detections[dim] = round(detections[dim] * factor).astype(int) klt = load('{}{}/klt/{}.pklz'.format(datasets_path, dataset, v)) klt, klt_frames = convert_klt(klt, c) tracks = [] if len(detections) > 0: tracks = build_tracks(detections, klt, klt_frames, c) print_flush("{} tracks done".format(v)) save(tracks, '{}{}_tracks.pklz'.format(outfolder, v)) else: print_flush( "{} skipping tracking, because there were no detections". format(v)) if make_videos: if tracks: from visualize_tracking import render_video vidpath = "{}{}/videos/{}.mkv".format(datasets_path, dataset, v) render_video(tracks, vidpath, "{}{}_tracks.mp4".format(outfolder, v), mask=mask) print_flush("{} video done".format(v)) else: print_flush( "{} skipping video rendering, because there were no tracks" .format(v)) print_flush("Done!")
def main(dataset, run, input_shape, seq_start, seq_stop, videopath, conf_thresh, i_seq, outname, batch_size): print_flush("> Predicting...") classes = get_classnames(dataset) masker = Masker(dataset) input_shape = parse_resolution(input_shape) num_classes = len(classes)+1 model = get_model(dataset, run, input_shape, num_classes, verbose=False) priors = get_priors(model, input_shape) bbox_util = BBoxUtility(num_classes, priors) width = input_shape[0] height = input_shape[1] inputs = [] outputs = [] old_frame = None with io.get_reader(videopath) as vid: vlen = len(vid) for i_in_seq in range(seq_start, seq_stop): if i_in_seq < vlen: frame = vid.get_data(i_in_seq) frame = masker.mask(frame) old_frame = frame else: frame = old_frame resized = cv2.resize(frame, (width, height)) inputs.append(resized) if len(inputs) == batch_size: inputs2 = np.array(inputs) inputs2 = inputs2.astype(np.float32) inputs2 = preprocess_input(inputs2) y = model.predict_on_batch(inputs2) outputs.append(y) inputs = [] preds = np.vstack(outputs) print_flush("> Processing...") all_detections = [] seq_len = seq_stop - seq_start for i in range(seq_len): frame_num = i + seq_start if frame_num < vlen: pred = preds[i, :] pred = pred.reshape(1, pred.shape[0], pred.shape[1]) results = bbox_util.detection_out(pred, soft=False) detections = process_results(results, width, height, classes, conf_thresh, frame_num) all_detections.append(detections) dets = pd.concat(all_detections) # For the first line, we should open in write mode, and then in append mode # This way, we still overwrite the files if this script is run multiple times open_mode = 'a' include_header = False if i_seq == 0: open_mode = 'w' include_header = True print_flush("> Writing to {} ...".format(outname)) with open(outname, open_mode) as f: dets.to_csv(f, header=include_header)
def visualize_tracks(outvidpath, dataset, gts, tracks=None, stack_axis='v'): import imageio as iio from visualize_tracking import _draw_world, draw_world from visualize import class_colors from apply_mask import Masker from config import DatasetConfig if not (tracks is None): calib = Calibration(dataset) # Reset IDs tracks = sorted(tracks, key=lambda x: x.history[0][0]) for track in tracks: track.id = i i += 1 dc = DatasetConfig(dataset) gts_by_vid = split_lambda(gts, lambda x: x[0]) assert (len(gts_by_vid) == 1) vid = list(gts_by_vid.keys())[0] n_colors = 50 colors = class_colors(n_colors) mask = Masker(dataset) with iio.get_writer(outvidpath, fps=dc.get('video_fps')) as outvid: with iio.get_reader(datasets_path / dataset / "videos" / (vid + '.mkv')) as invid: gt_by_frame = split_lambda(gts, lambda x: x[1]) fns = list(gt_by_frame.keys()) fns.sort() for fn in fns: gts_frame = gt_by_frame[fn] frame = invid.get_data(fn) frame = mask.mask(frame, alpha=0.5) if not (tracks is None): tracks_frame = frame.copy() for gt in gts_frame: vid, fn, t, x, y, i, c, px, py = gt text = "{} {}".format(c, i) col = colors[i % n_colors] frame = _draw_world(frame, text, px, py, col) if not (tracks is None): for track in tracks: draw_world(tracks_frame, track, fn, colors[track.id % n_colors], calib) if stack_axis == 'h': frame = np.hstack((frame, tracks_frame)) elif stack_axis == 'v': frame = np.vstack((frame, tracks_frame)) else: raise (ValueError( "Incorrect stack axis {}, try 'h' or 'v'".format( stack_axis))) outvid.append_data(frame)
def autoannotate(dataset, import_datasets, input_shape, image_shape, batch_size, batch_size2, epochs, frozen_layers): soft = False input_shape = parse_resolution(input_shape) image_shape = parse_resolution(image_shape) print_flush("Loading ground truth...") load_detections = LoadDetections() datasets = [dataset] if import_datasets: datasets.extend(import_datasets.split(',')) detections = load_detections.custom(datasets) detections = detections.reset_index(drop=True) image_props = get_image_props(detections) detections = detections_add_ytrue(detections, image_props, dataset) detections.index = detections.image_file print_flush('Ground truth object counts:') print_flush(detections.type.value_counts()) classes = get_classnames(dataset) num_classes = len(classes) + 1 keys = sorted(detections.image_file.unique()) shuffle(keys) num_train = int(round(0.9 * len(keys))) train_keys = keys[:num_train] val_keys = keys[num_train:] print_flush('Loading model...') model = SSD300((input_shape[1],input_shape[0],input_shape[2]), num_classes=num_classes) model.load_weights(ssd_path+'weights_SSD300.hdf5', by_name=True) print_flush("Making priors...") im_in = np.random.random((1,input_shape[1],input_shape[0],input_shape[2])) priors = model.predict(im_in,batch_size=1)[0, :, -8:] bbox_util = BBoxUtility(num_classes, priors) generator_kwargs = { 'saturation_var': 0.5, 'brightness_var': 0.5, 'contrast_var': 0.5, 'lighting_std': 0.5, 'hflip_prob': 0.5, 'vflip_prob': 0, 'do_crop': True, 'crop_area_range': [0.1, 1.0], 'aspect_ratio_range': [0.5, 2] } path_prefix = '' gen = Generator(detections, bbox_util, batch_size, path_prefix, train_keys, val_keys, (input_shape[1], input_shape[0]), **generator_kwargs) # freeze several layers freeze = [ ['input_1', 'conv1_1', 'conv1_2', 'pool1'], ['conv2_1', 'conv2_2', 'pool2'], ['conv3_1', 'conv3_2', 'conv3_3', 'pool3'], ['conv4_1', 'conv4_2', 'conv4_3', 'pool4'], ['conv5_1', 'conv5_2', 'conv5_3', 'pool5'], ][:min(frozen_layers, 5)] for L in model.layers: if L.name in freeze: L.trainable = False callbacks = [LearningRateScheduler(schedule)] optim = keras.optimizers.Adam(lr=BASE_LR / 10) model.compile(optimizer=optim, loss=MultiboxLoss(num_classes, neg_pos_ratio=2.0).compute_loss) print_flush("Training...") history = model.fit_generator(gen.generate(True), steps_per_epoch=gen.train_batches, epochs=epochs, verbose=2, callbacks=callbacks, validation_data=gen.generate(False), validation_steps=gen.val_batches, workers=1) print_flush("Auto-annotating...") masker = Masker(dataset) inputs = [] impaths = [] to_annotate = get_images_to_autoannotate(dataset) # rep_last needed since we use large batches, for speed, to make sure we run on all images for impath in rep_last(to_annotate, batch_size2): im = iio.imread(impath) im = masker.mask(im) resized = cv2.resize(im, (input_shape[0], input_shape[1])) inputs.append(resized) impaths.append(impath) if len(inputs) == batch_size2: inputs = np.array(inputs).astype(np.float64) inputs = preprocess_input(inputs) preds = model.predict(inputs, batch_size=batch_size, verbose=0) results = bbox_util.detection_out(preds, soft=soft) for result, res_path in zip(results, impaths): result = [r if len(r) > 0 else np.zeros((1, 6)) for r in result] raw_detections = pd.DataFrame(np.vstack(result), columns=['class_index', 'confidence', 'xmin', 'ymin', 'xmax', 'ymax']) auto_path = res_path.replace('.jpg','.auto') # Sort detections by confidence, keeping the top ones # This seems to be more robust than a hard-coded confidence threshold # Note that a confidence threshold can be chosen in the annotation web UI n = 128 dets = [x for x in pandas_loop(raw_detections)] dets.sort(key=lambda x: 1.0-x['confidence']) if len(dets) > n: dets = dets[:n] with open(auto_path, 'w') as f: for det in dets: conf = round(det['confidence'],4) line = "{index} {cx} {cy} {w} {h} conf:{conf} {cn}\n".format(index=int(det['class_index']), cx = round((det['xmin']+det['xmax'])/2,4), cy = round((det['ymin']+det['ymax'])/2,4), w = round(det['xmax']-det['xmin'],4), h = round(det['ymax']-det['ymin'],4), conf=conf, cn = classes[int(det['class_index'])-1]) f.write(line) print_flush("Wrote {}".format(auto_path)) inputs = [] impaths = [] assert(not inputs) # If this fails, not all images were processed! print_flush("Done!")