def init_random_indivs(self, size, min_fitness=-math.inf, min_models=0, POOL_SIZE=100): if min_fitness == -math.inf and min_models == 0: self._individuals = [ deepcopy(self._base_indiv) for i in range(size) ] for indiv in self._individuals: indiv.set_random() else: self._start_time = time() indivs = [] tries = 0 self._individuals = [ deepcopy(self._base_indiv) for i in range(POOL_SIZE) ] while len(indivs) < size: for indiv in self._individuals: indiv.set_random() self._compute_fitness(sort=False) for indiv in self._individuals: if indiv.fitness.fitness >= min_fitness and indiv.fitness.models >= min_models: indivs.append(deepcopy(indiv)) tries += POOL_SIZE print_flush("Found {} indivs with {} tries ({}s.)".format( len(indivs), tries, round(time() - self._start_time))) self._individuals = indivs[:size]
def apply_gradient_adam(x, g, i_batch, m=None, v=None, step_size=0.001, b1=0.9, b2=0.999, eps=1e-7, verbose=True): g = np.array(g) if m is None or v is None: m = np.zeros_like(x) v = np.zeros_like(v) m = (1 - b1) * g + b1 * m # First moment estimate. v = (1 - b2) * (g**2) + b2 * v # Second moment estimate. mhat = m / (1 - b1**(i_batch + 1)) # Bias correction. vhat = v / (1 - b2**(i_batch + 1)) d = step_size * mhat / (np.sqrt(vhat) + eps) x = x - d if verbose: try: print_flush( ' Step size modifier is {}.'.format( np.mean(mhat / (np.sqrt(vhat) + eps))), 0, comm.Get_rank()) except: print(' Step size modifier is {}.'.format( np.mean(mhat / (np.sqrt(vhat) + eps)))) return x, m, v
def process(inputs, frame_nums, im_origs, vids, confidence, class_name, soft, batch_size2, model, bbox_util, classes): found_data = [] inputs = np.array(inputs).astype(np.float64) inputs = preprocess_input(inputs) preds = model.predict(inputs, batch_size=batch_size2, verbose=0) results = bbox_util.detection_out(preds, soft=soft) for result, frame_num, im_res, v in zip(results, frame_nums, im_origs, vids): result = [r if len(r) > 0 else np.zeros((1, 6)) for r in result] for r in result: if r[1] > confidence: this_class_name = classes[int(r[0]) - 1] if this_class_name == class_name: found_data.append((v, frame_num, im_res)) print_flush( "Found an object of class {} in frame {} in video {}". format(class_name, frame_num, v.stem)) # Once we've found an object of the right class, we don't care about this image any more break return found_data
def create_file_objects(self, use_checkpoint=False): if len(self.params_list) > 0: for param_name in self.params_list: fmode = 'a' if use_checkpoint else 'w' try: self.params_file_pointer_dict[param_name] = h5py.File( os.path.join(self.output_folder, 'intermediate_{}.h5'.format(param_name)), fmode, driver='mpio', comm=comm) print_flush( 'Created intermediate file: {}'.format( os.path.join( self.output_folder, 'intermediate_{}.h5'.format(param_name))), 0, rank) except: self.params_file_pointer_dict[param_name] = h5py.File( os.path.join(self.output_folder, 'intermediate_{}.h5'.format(param_name)), fmode) try: dset_p = self.params_file_pointer_dict[ param_name].create_dataset( 'obj', shape=self.whole_object_size, dtype='float64', data=np.zeros(self.whole_object_size)) except: dset_p = self.params_file_pointer_dict[param_name]['obj'] # if rank == 0: dset_p[...] = 0 self.params_dset_dict[param_name] = dset_p return
def encode_handbrake(path, target_path, width, height, fps): cmd = ['HandBrakeCLI', '--width', str(width), '--height', str(height), '--rate', str(fps), '--crop', '0:0:0:0', '-i', str(path), '-o', str(target_path)] print_flush(' '.join(cmd)) output = subprocess.check_output(cmd, stderr=subprocess.STDOUT, universal_newlines=True) print_flush(" " + output)
def _run_verbose(self): if self._verbose: print_flush( "Generation: {} | Best fitness: {} | Best ratio: {} | Models: {} | Time: avg per gen = {}s. total = {}s." .format(len(self._run_results), round(self._run_results[-1]["fitness"].fitness), round(self._run_results[-1]["fitness"].ratio), self._run_results[-1]["fitness"].models, self._run_results[-1]["time"], round(time() - self._start_time)))
def detections_video(detections, videopath, outvideopath, classnames, dataset, res, fps=15, conf_thresh=0.75, show_frame_number=True, coords='pixels'): """ Renders a video with the detections drawn on top Arguments: detections -- the detections as a pandas table videopath -- path to input video outvideopath -- path to output video showing the detections classnames -- list of all the classes dataset -- name of the dataset res -- resolution of output video and coordinates in csv file (assumed to be the same). Probably SSD resolution if performed on direct csv files, and probably the video resolution if performed on csv files with world coordinates fps -- frames-per-second of output video conf_thresh -- Detections with confidences below this are not shown in output video. Set to negative to not visualize confidences, or set to 0.0 to show all of them. show_frame_number -- writes the frame number in the top left corner of the video coords -- coordinate system of detections """ masker = Masker(dataset) calib = None if coords == 'world': calib = Calibration(dataset) num_classes = len(classnames)+1 colors = class_colors(num_classes) outwidth = make_divisible(res[0], 16) outheight = make_divisible(res[1], 16) pad_vid = True if (outwidth == res[0]) and (outheight == res[1]): pad_vid = False with io.get_reader(videopath) as vid: with io.get_writer(outvideopath, fps=fps) as outvid: for i,frame in enumerate(vid): frame = masker.mask(frame, alpha=0.5) frame = cv2.resize(frame, (res[0], res[1])) dets = detections[detections['frame_number']==i] if len(dets) > 0: frame = draw(frame, dets, colors, conf_thresh=conf_thresh, coords=coords, calib=calib) if pad_vid: padded = 255*np.ones((outheight, outwidth, 3), dtype=np.uint8) padded[0:res[1], 0:res[0], :] = frame frame = padded if show_frame_number: cv2.putText(frame, 'Frame {}'.format(i), (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 1, cv2.LINE_AA) outvid.append_data(frame) if i%500 == 0: print_flush("Frame {}".format(i))
def print_history(history): print_flush("frame x y dx dy speed fromdet") for h in history: line = "" for hh in h: if type(hh) == datetime: continue elif (type(hh) == float) or (type(hh) == np.float64): line += " {:.2f}".format(hh) else: line += ' ' + str(hh) print_flush(line)
def detect(dataset, run, res, conf, bs, clean): vids = sorted(glob("{}{}/videos/*.mkv".format(datasets_path, dataset))) outfolder = "{}{}_{}/csv/".format(runs_path, dataset, run) mkdir(outfolder) nvids = len(vids) for i, vid in enumerate(vids): vname = vid.split('/')[-1] vsplit = vname.split('.') outname = outfolder + vsplit[0] + '.csv' if not clean: if os.path.isfile(outname): print_flush("Skipping {}".format(outname)) continue before = time() print_flush(vname) run_detector(dataset, run, vid, outname, res, conf, bs) done_percent = round(100 * (i + 1) / nvids) now = time() mins = floor((now - before) / 60) secs = round(now - before - 60 * mins) print_flush("{} {}% done, time: {} min {} seconds".format( vid, done_percent, mins, secs)) print_flush("Done!")
def klt_save(vidpath, datpath, imsize, mask, outvidpath=None): """ Computes and saves KLT point tracks Arguments: vidpath -- path to input video datpath -- path to store the tracks (use .pklz extension) imsize -- size to resize frames to mask -- mask to apply if only parts of the image are of interest outvidpath -- path to output video, can be None """ tracks = kltfull(vidpath, imsize, mask, outvidpath) print_flush("Saving...") save(tracks, datpath)
def detect(dataset, run, res, conf, bs, clean): vids = list((datasets_path / dataset / "videos").glob('*.mkv')) vids.sort() outfolder = runs_path / "{}_{}".format(dataset, run) / "csv" mkdir(outfolder) nvids = len(vids) for i, vid in enumerate(vids): vname = vid.stem outname = outfolder / (vname + '.csv') if not clean: if outname.is_file(): print_flush("Skipping {}".format(outname)) continue before = time() print_flush(vname) run_detector(dataset, run, vid, outname, res, conf, bs) done_percent = round(100 * (i + 1) / nvids) now = time() mins = floor((now - before) / 60) secs = round(now - before - 60 * mins) print_flush("{} {}% done, time: {} min {} seconds".format( vid, done_percent, mins, secs)) print_flush("Done!")
def apply_gradient_gd(x, g, step_size=0.001, dynamic_rate=True, i_batch=0, first_downrate_iteration=92): g = np.array(g) if dynamic_rate: threshold_iteration = first_downrate_iteration i = 1 while threshold_iteration < i_batch: threshold_iteration += first_downrate_iteration * 2**i i += 1 step_size /= 2. print_flush(' -- Step size halved.', 0, comm.Get_rank()) x = x - step_size * g return x
def main(cmd, res, dataset, run, conf, fps, coords): res = parse_resolution(res) classnames = get_classnames(dataset) local_output = False csvs = [] if cmd == "findvids": if coords == "pixels": query = "{rp}{ds}_{r}/csv/*.csv".format(rp=runs_path, ds=dataset, r=run) elif coords == "world": query = "{rp}{ds}_{r}/detections_world/*.csv".format(rp=runs_path, ds=dataset, r=run) found = glob(query) found.sort() csvs.extend(found) else: csvs.append(cmd) local_output = True if coords == "pixels": out_folder = '{rp}{ds}_{r}/detections/'.format(rp=runs_path, ds=dataset, r=run) elif coords == "world": out_folder = '{rp}{ds}_{r}/detections_world/'.format(rp=runs_path, ds=dataset, r=run) mkdir(out_folder) for csv_path in csvs: vidname = right_remove(csv_path.split('/')[-1], '.csv') if coords == "world": vidname = right_remove(vidname, '_world') vid_path = "{dsp}{ds}/videos/{v}.mkv".format(dsp=datasets_path, ds=dataset, v=vidname) if local_output: outvid_path = '{}.mp4'.format(vidname) else: outvid_path = '{}{}.mp4'.format(out_folder, vidname) detections = pd.read_csv(csv_path) detections_video(detections, vid_path, outvid_path, classnames, dataset, res, fps=fps, conf_thresh=conf, coords=coords) print_flush(outvid_path) print_flush("Done!")
def main(cmd, res, dataset, run, conf, fps, coords): res = parse_resolution(res) classnames = get_classnames(dataset) local_output = False csvs = [] if cmd == "findvids": if coords == "pixels": found = (runs_path / "{}_{}".format(dataset,run) / "csv").glob('*.csv') elif coords == "world": found = (runs_path / "{}_{}".format(dataset,run) / "detections_world").glob('*.csv') found = list(found) found.sort() csvs.extend(found) else: csvs.append(cmd) local_output = True if coords == "pixels": out_folder = runs_path / "{}_{}".format(dataset,run) / "detections" elif coords == "world": out_folder = runs_path / "{}_{}".format(dataset,run) / "detections_world" mkdir(out_folder) for csv_path in csvs: vidname = csv_path.stem if coords == "world": vidname = right_remove(vidname, '_world') vid_path = datasets_path / dataset / "videos" / (vidname+'.mkv') if local_output: outvid_path = Path('.') / '{}.mp4'.format(vidname) else: outvid_path = out_folder / '{}.mp4'.format(vidname) detections = pd.read_csv(csv_path) detections_video(detections, vid_path, outvid_path, classnames, dataset, res, fps=fps, conf_thresh=conf, coords=coords) print_flush(outvid_path) print_flush("Done!")
def apply_gradient(self, x, g, i_batch, step_size=0.001, b1=0.9, b2=0.999, eps=1e-7, verbose=True, shared_file_object=False, m=None, v=None): if m is None or v is None: if shared_file_object: m = self.params_chunk_array_dict['m'] v = self.params_chunk_array_dict['v'] else: m = self.params_whole_array_dict['m'] v = self.params_whole_array_dict['v'] m = (1 - b1) * g + b1 * m # First moment estimate. v = (1 - b2) * (g**2) + b2 * v # Second moment estimate. mhat = m / (1 - b1**(i_batch + 1)) # Bias correction. vhat = v / (1 - b2**(i_batch + 1)) d = step_size * mhat / (np.sqrt(vhat) + eps) x = x - d if verbose: try: print_flush( ' Step size modifier is {}.'.format( np.mean(mhat / (np.sqrt(vhat) + eps))), 0, comm.Get_rank()) except: print(' Step size modifier is {}.'.format( np.mean(mhat / (np.sqrt(vhat) + eps)))) if shared_file_object: self.params_chunk_array_dict['m'] = m self.params_chunk_array_dict['v'] = v else: self.params_whole_array_dict['m'] = m self.params_whole_array_dict['v'] = v self.i_batch += 1 return x
def encode_imageio(path, target_path, width, height, fps): rescale = True with iio.get_reader(path) as invid: with iio.get_writer(target_path, fps=fps) as outvid: for i,frame in enumerate(invid): # If resolution is the same, we should not rescale if i == 0: shape = frame.shape if (shape[0] == height) and (shape[1] == width): rescale = False print_flush("Does not resize") if rescale: frame = cv2.resize(frame, (width, height)) outvid.append_data(frame) if (i+1)%500 == 0: print_flush(" {}".format(i+1))
def get_model(name, experiment, input_shape, num_classes=6, verbose=True): """ Gets an SSD model, with trained weights Arguments: name -- name of the dataset experiment -- name of this training run input_shape -- size of images fed to SSD as a tuple like (640,480,3) num_classes -- the number of different object classes (including background) """ model = SSD300((input_shape[1], input_shape[0], input_shape[2]), num_classes=num_classes) weights_files = list((runs_path / "{}_{}".format(name, experiment) / "checkpoints").glob('*.hdf5')) weights_files_loss = np.array( [float(wf.stem.split('-')[-1]) for wf in weights_files]) weights_file = weights_files[np.argmin(weights_files_loss)] model.load_weights(weights_file, by_name=True) if verbose: print_flush('Model loaded from {}'.format(weights_file)) return model
def main(dataset, num_ims, ims_per_vid, train_amount, night): outbasepath = datasets_path / dataset / "objects" trainpath = outbasepath / "train" testpath = outbasepath / "test" ts = Timestamps(dataset) vidnames = filtering(get_vidnames(dataset), num_ims // ims_per_vid, ts, night) train, test = train_test_split(vidnames, train_amount) print_flush("Train:") for v in train: print_flush(v) gen_images(trainpath, v, ims_per_vid) print_flush("Test:") for v in test: print_flush(v) gen_images(testpath, v, ims_per_vid) print_flush("Done!")
def main(cmd, dataset, imsize, visualize): imsize = parse_resolution(imsize) mask = Masker(dataset) if cmd == "findvids" or cmd == "continue": vidfolder = datasets_path / dataset / "videos" kltfolder = datasets_path / dataset / "klt" mkdir(kltfolder) allvids = list(vidfolder.glob('*.mkv')) allvids.sort() if cmd == "continue": existing = list(kltfolder.glob('*.pklz')) existing.sort() existing = [x.stem for x in existing] allvids = [x for x in allvids if not x.stem in existing] for vidpath in allvids: datpath = kltfolder / (vidpath.stem + '.pklz') if visualize: outvidpath = datpath.with_name(datpath.stem + '_klt.mp4') print_flush("{} -> {} & {}".format(vidpath, datpath, outvidpath)) else: outvidpath = None print_flush("{} -> {}".format(vidpath, datpath)) klt_save(vidpath, datpath, imsize, mask, outvidpath) print_flush("Done!") else: raise (ValueError())
def generate_tracks_in_zip(dataset, run, tf, coords): assert (tf in all_track_formats) tracks_format = tf if coords == 'pixels': tracks = glob("{rp}{dn}_{rn}/tracks/*.pklz".format(rp=runs_path, dn=dataset, rn=run)) elif coords == 'world': tracks = glob("{rp}{dn}_{rn}/tracks_world/*.pklz".format(rp=runs_path, dn=dataset, rn=run)) else: raise (ValueError("Incorrect coordinate system: {}".format(coords))) tracks.sort() zips_folder = "{rp}{dn}_{rn}/track_zips/".format(rp=runs_path, dn=dataset, rn=run) mkdir(zips_folder) zip_path = "{zf}{tf}.zip".format(zf=zips_folder, tf=tracks_format) if coords == 'world': zip_path = zip_path.replace('.zip', '_world.zip') with ZipFile(zip_path, mode='w', compression=ZIP_DEFLATED) as z: for t in tracks: tname = t.split('/')[-1] print_flush(tname) text = format_tracks_from_file(t, tracks_format, coords) suffix = '.txt' if tracks_format == 'csv': suffix = '.csv' z.writestr(tname.replace('.pklz', suffix), text) print_flush("Done!") return zip_path
def train(training_data, dev_data, args): training_gen = data.DataLoader(training_data, batch_size=2) dev_gen = data.DataLoader(dev_data, batch_size=2) device = torch.device('cuda' if cuda.is_available() else 'cpu') print('Initializing model') model = SRCNN() loss = RMSE() if cuda.device_count() > 1: print('Using %d CUDA devices' % cuda.device_count()) model = nn.DataParallel( model, device_ids=[i for i in range(cuda.device_count())]) model.to(device) loss.to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr) def _train(data, opt=True): total = 0 for y, x in data: y, x = y.to(device), x.to(device) pred_y = model(x) l = loss(pred_y, y) total += l.item() if opt: optimizer.zero_grad() l.backward() optimizer.step() cuda.synchronize() return total print('Training') for ep in range(args.ep): train_loss = _train(training_gen) dev_loss = _train(dev_gen, opt=False) print_flush('Epoch %d: Train %.4f Dev %.4f' % (ep, train_loss, dev_loss)) if ep % 50 == 0: save_model(model, args.o) return model
def run_detector(dataset, run, videopath, outname, input_shape, conf_thresh, batch_size): with io.get_reader(videopath) as vid: vlen = len(vid) vlen2 = next_multiple(vlen, batch_size) seq_len = next_multiple(1000, batch_size) # In the past, there was a memory leak that forced a division of the video into # shorted sequences. The memory leak was fixed, but this was kept because of # laziness. seqs = make_seqs(vlen2, seq_len) for i_seq, seq in enumerate(seqs): print_flush("From frame {} to {}...".format(seq[0], seq[1])) completed = subprocess.run([ python_path, "detect_csv_sub.py", "--dataset={}".format(dataset), "--run={}".format(run), "--input_shape={}".format(input_shape), "--seq_start={}".format(seq[0]), "--seq_stop={}".format( seq[1]), "--videopath={}".format(videopath), "--conf_thresh={}".format(conf_thresh), "--i_seq={}".format(i_seq), "--outname={}".format(outname), "--batch_size={}".format(batch_size) ], stdout=PIPE, stderr=PIPE) if not (completed.returncode == 0): raise Exception( "ERROR: Subprocess crashed. Return code: {}".format( completed.returncode)) else: print_flush("Subprocess completed successfully") print_flush("Subprocess output:") print_flush(completed.stdout.decode('UTF-8')) print_flush(completed.stderr.decode('UTF-8'))
def generate_tracks_in_zip(dataset, run, tf, coords): assert (tf in all_track_formats) tracks_format = tf if coords == 'pixels': tracks = (runs_path / "{}_{}".format(dataset, run) / "tracks").glob('*.pklz') elif coords == 'world': tracks = (runs_path / "{}_{}".format(dataset, run) / "tracks_world").glob('*.pklz') else: raise (ValueError("Incorrect coordinate system: {}".format(coords))) tracks = list(tracks) tracks.sort() zips_folder = runs_path / "{}_{}".format(dataset, run) / "track_zips" mkdir(zips_folder) zip_path = zips_folder / (tracks_format + '.zip') if coords == 'world': zip_path = zip_path.with_name(zip_path.stem + '_world.zip') with ZipFile(str(zip_path), mode='w', compression=ZIP_DEFLATED) as z: for t in tracks: tname = t.name print_flush(tname) text = format_tracks_from_file(t, tracks_format, coords) suffix = '.txt' if tracks_format == 'csv': suffix = '.csv' z.writestr(tname.replace('.pklz', suffix), text) print_flush("Done!") return zip_path
def main(cmd, dataset, imsize, visualize): imsize = parse_resolution(imsize) mask = Masker(dataset) if cmd == "findvids" or cmd == "continue": vidfolder = "{}{}/videos/".format(datasets_path, dataset) kltfolder = "{}{}/klt/".format(datasets_path, dataset) mkdir(kltfolder) allvids = sorted(glob(vidfolder + "*.mkv")) if cmd == "continue": existing = sorted(glob(kltfolder + "*.pklz")) existing = [ right_remove(x.split('/')[-1], '.pklz') for x in existing ] allvids = [ x for x in allvids if not right_remove(x.split('/')[-1], '.mkv') in existing ] for vidpath in allvids: datpath = kltfolder + vidpath.split('/')[-1].replace( '.mkv', '.pklz') if visualize: outvidpath = datpath.replace('.pklz', '_klt.mp4') print_flush("{} -> {} & {}".format(vidpath, datpath, outvidpath)) else: outvidpath = None print_flush("{} -> {}".format(vidpath, datpath)) klt_save(vidpath, datpath, imsize, mask, outvidpath) print_flush("Done!") else: raise (ValueError())
def main(dataset, run, input_shape, seq_start, seq_stop, videopath, conf_thresh, i_seq, outname, batch_size): print_flush("> Predicting...") classes = get_classnames(dataset) masker = Masker(dataset) input_shape = parse_resolution(input_shape) num_classes = len(classes)+1 model = get_model(dataset, run, input_shape, num_classes, verbose=False) priors = get_priors(model, input_shape) bbox_util = BBoxUtility(num_classes, priors) width = input_shape[0] height = input_shape[1] inputs = [] outputs = [] old_frame = None with io.get_reader(videopath) as vid: vlen = len(vid) for i_in_seq in range(seq_start, seq_stop): if i_in_seq < vlen: frame = vid.get_data(i_in_seq) frame = masker.mask(frame) old_frame = frame else: frame = old_frame resized = cv2.resize(frame, (width, height)) inputs.append(resized) if len(inputs) == batch_size: inputs2 = np.array(inputs) inputs2 = inputs2.astype(np.float32) inputs2 = preprocess_input(inputs2) y = model.predict_on_batch(inputs2) outputs.append(y) inputs = [] preds = np.vstack(outputs) print_flush("> Processing...") all_detections = [] seq_len = seq_stop - seq_start for i in range(seq_len): frame_num = i + seq_start if frame_num < vlen: pred = preds[i, :] pred = pred.reshape(1, pred.shape[0], pred.shape[1]) results = bbox_util.detection_out(pred, soft=False) detections = process_results(results, width, height, classes, conf_thresh, frame_num) all_detections.append(detections) dets = pd.concat(all_detections) # For the first line, we should open in write mode, and then in append mode # This way, we still overwrite the files if this script is run multiple times open_mode = 'a' include_header = False if i_seq == 0: open_mode = 'w' include_header = True print_flush("> Writing to {} ...".format(outname)) with open(outname, open_mode) as f: dets.to_csv(f, header=include_header)
def kltfull(video_file, imsize, mask, out_file=None): """ Performs KLT point tracking on a video. Arguments: video_file -- path to a source video file imsize -- size which frames will be resized to mask -- a Masker object which can be applied to only look at parts of the images out_file -- if set to a path to an output video path, then a video showing the tracked points is created. Can be None, in which case no video is made """ # Used for not finding new points to track too close to existing ones mask_to_copy = 255 - cv2.resize(mask.saved_mask[:, :, 3], imsize) render_vid = True if out_file is None: render_vid = False track_len = 10 detect_interval = 10 tracks = [] frame_idx = 0 if render_vid: n_colors = 128 colors = get_colors(n_colors) # We have a bunch of colors, and each point track gets one. Some point tracks # will share colors, but that is not really an issue. These are for visualization only. id_generator = count() lk_params = dict(winSize=(15, 15), maxLevel=1, criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03)) feature_params = dict(maxCorners=5000, qualityLevel=0.01, minDistance=30, blockSize=7) lost_tracks = [] start_time = time() if render_vid: avi = io.get_writer(out_file, fps=10) with io.get_reader(video_file) as invid: vidlength = len(invid) for systime, frame in enumerate(invid): if systime % 400 == 0: print_flush("{} % done, elapsed time: {} s".format( round(100 * systime / vidlength), round(time() - start_time))) frame = cv2.resize(frame, imsize) frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) vis = frame.copy() if len(tracks) > 0: img0, img1 = prev_gray, frame_gray p0 = np.float32([tr[-1][1:3] for tr in tracks]).reshape(-1, 1, 2) # See how the points have moved between the two frames p1, st, err1 = cv2.calcOpticalFlowPyrLK( img0, img1, p0, None, **lk_params) p0r, st, err = cv2.calcOpticalFlowPyrLK( img1, img0, p1, None, **lk_params) d = abs(p0 - p0r).reshape(-1, 2).max(-1) good = d < 1 new_tracks = [] for tr, (x, y), good_flag, e in zip(tracks, p1.reshape(-1, 2), good, err1.flat): if not good_flag: lost_tracks.append(tr) continue tr.append((systime, x, y)) new_tracks.append(tr) if render_vid: cv2.circle(vis, (x, y), 2, colors[tr.id_num % n_colors], -1) tracks = new_tracks if render_vid: for i_col, col in enumerate(colors): cv2.polylines(vis, [ np.int32([(x, y) for f, x, y in tr[-20:]]) for tr in tracks if (tr.id_num % n_colors) == i_col ], False, col) if frame_idx % detect_interval == 0: # Makes sure we don't look for new points near existing ones mask2 = mask_to_copy.copy() for x, y in [np.int32(tr[-1][1:3]) for tr in tracks]: cv2.circle(mask2, (x, y), 5, 0, -1) p = cv2.goodFeaturesToTrack(frame_gray, mask=mask2, **feature_params) if p is not None: for x, y in np.float32(p).reshape(-1, 2): nt = Track([(systime, int(x), int(y))]) nt.id_num = next(id_generator) tracks.append(nt) # Remove tracks that go outside the masked region good_tracks = [] for checked_track in tracks: last_time, last_x, last_y = checked_track[-1] x = clamp(int(last_x), 0, imsize[0] - 1) y = clamp(int(last_y), 0, imsize[1] - 1) sampled = mask_to_copy[y, x] if sampled > 127: good_tracks.append(checked_track) else: lost_tracks.append(checked_track) tracks = good_tracks frame_idx += 1 prev_gray = frame_gray if render_vid: avi.append_data(vis) lost_tracks.extend(tracks) if render_vid: avi.close() for tr in lost_tracks: for i in range(len(tr)): t, x, y = tr[i] tr[i] = (t, int(round(x)), int(round(y))) return lost_tracks
def test_on_video(model, name, experiment, videopath, outvideopath, classnames, batch_size=32, input_shape=(480, 640, 3), soft=False, width=480, height=640, conf_thresh=0.75, csv_conf_thresh=0.75): """ Applies a trained SSD model to a video Arguments: model -- the SSD model, e.g. from get_model name -- name of dataset experiment -- name of training run videopath -- path to input video outvideopath -- path to output video showing the detections classnames -- list of all the classes batch_size -- number of images processed in parallell, lower this if you get out-of-memory errors input_shape -- size of images fed to SSD soft -- Whether to do soft NMS or normal NMS width -- Width to scale detections with (can be set to 1 if detections are already on right scale) height -- Height to scale detections with (can be set to 1 if detections are already on right scale) conf_thresh -- Detections with confidences below this are not shown in output video. Set to negative to not visualize confidences. csv_conf_thresh -- Detections with confidences below this are ignored. This should be same as conf_thresh unless conf_thresh is negative. """ masker = Masker(name) num_classes = len(classnames) + 1 colors = class_colors(num_classes) make_vid = True suffix = outvideopath.split('.')[-1] if suffix == 'csv': make_vid = False csvpath = outvideopath else: csvpath = outvideopath.replace('.{}'.format(suffix), '.csv') print_flush('Generating priors') im_in = np.random.random( (1, input_shape[1], input_shape[0], input_shape[2])) priors = model.predict(im_in, batch_size=1)[0, :, -8:] bbox_util = BBoxUtility(num_classes, priors) vid = io.get_reader(videopath) if make_vid: outvid = io.get_writer(outvideopath, fps=30) inputs = [] frames = [] all_detections = [] for i, frame in enumerate(vid): frame = masker.mask(frame) resized = cv2.resize(frame, (input_shape[0], input_shape[1])) frames.append(frame.copy()) inputs.append(resized) if len(inputs) == batch_size: inputs = np.array(inputs).astype(np.float64) inputs = preprocess_input(inputs) preds = model.predict(inputs, batch_size=batch_size, verbose=0) results = bbox_util.detection_out(preds, soft=soft) for result, frame, frame_number in zip(results, frames, range(i - batch_size, i)): result = [ r if len(r) > 0 else np.zeros((1, 6)) for r in result ] raw_detections = pd.DataFrame(np.vstack(result), columns=[ 'class_index', 'confidence', 'xmin', 'ymin', 'xmax', 'ymax' ]) rescale(raw_detections, 'xmin', width) rescale(raw_detections, 'xmax', width) rescale(raw_detections, 'ymin', height) rescale(raw_detections, 'ymax', height) rescale(raw_detections, 'class_index', 1) ci = raw_detections['class_index'] cn = [classnames[int(x) - 1] for x in ci] raw_detections['class_name'] = cn raw_detections['frame_number'] = (frame_number + 2) all_detections.append(raw_detections[ raw_detections.confidence > csv_conf_thresh]) if make_vid: frame = draw(frame, raw_detections, colors, conf_thresh=conf_thresh) outvid.append_data(frame) frames = [] inputs = [] if i % (10 * batch_size) == 0: print_flush(i) detections = pd.concat(all_detections) detections.to_csv(csvpath)
def recode_minutes_imageio(files, logs_basepath, minutes, width, height, fps, target, logs_target, suffix): """ Recodes videos such that each video is `minutes` many minutes long. Uses imageio to do this. Using handbrake would probably be possible but a bit cumbersome to implement. """ assert(len(files) > 0) # Build a structure of the start times of each video, to sort them print_flush("Structuring...") vids = [] for vid_path in files: video_name = vid_path.stem log_path = logs_basepath / (video_name + '.log') with log_path.open('r') as f: first_line = f.readline().rstrip() first_time, frame_num = line_to_datetime(first_line) vids.append( (vid_path, log_path, first_time) ) vids.sort(key = lambda x: x[2]) # Go through the videos and build new videos, frame by frame can_make_more = True i_vid = 0 i_frame = 0 invid = iio.get_reader(vids[i_vid][0]) inlog = read_log(vids[i_vid][1]) rescale = True first_frame = invid.get_data(0) shape = first_frame.shape if (shape[0] == height) and (shape[1] == width): rescale = False print_flush("Does not resize") else: print_flush("Will resize to ({},{})".format(width, height)) curr_time = vids[i_vid][2] while can_make_more: vidpath, logpath = generate_paths(curr_time, target, logs_target, suffix) print_flush("Making {}...".format(vidpath)) outvid = iio.get_writer(vidpath, fps=fps) outlog = [] out_framenum = 0 first_time = curr_time while (curr_time - first_time).total_seconds()/60.0 < minutes: if i_frame >= len(inlog): # We need to jump to the next input video and log i_vid += 1 i_frame = 0 if i_vid >= len(vids): can_make_more = False break invid.close() invid = iio.get_reader(vids[i_vid][0]) inlog = read_log(vids[i_vid][1]) frame = invid.get_data(i_frame) line = inlog[i_frame] curr_time, _ = line_to_datetime(line) i_frame += 1 if rescale: frame = cv2.resize(frame, (width, height)) splot = line.split(" ") splot[0] = fill(out_framenum, 5) line = " ".join(splot) outvid.append_data(frame) outlog.append(line) out_framenum += 1 # Close current output video/log outvid.close() with logpath.open('w') as f: for line in outlog: f.write("{}\n".format(line))
def import_videos(query, dataset, resolution, fps, suffix, method, logs, minutes): assert(suffix == '.mkv') logs = Path(logs) assert(logs.is_dir()) if method == "imageio": encode = encode_imageio elif method == "handbrake": encode = encode_handbrake else: raise(ValueError("Incorrect method {}".format(method))) resolution = parse_resolution(resolution) width, height = resolution[0:2] target = datasets_path / dataset / "videos" mkdir(target) logs_target = datasets_path / dataset / "logs" mkdir(logs_target) files = glob(query) files.sort() files = [Path(x) for x in files] if minutes == 0: for path in files: video_name = path.stem src_log_path = logs / (video_name + '.log') with src_log_path.open('r') as f: first = f.readline().rstrip() first_time, _ = line_to_datetime(first) target_path, target_log_path = generate_paths(first_time, target, logs_target, suffix) print_flush(target_path) encode(path, target_path, width, height, fps) if validate_logfile(src_log_path): copy(str(src_log_path), str(target_log_path)) # python 3.5 and earlier compatability print_flush("Log file OK! {}".format(src_log_path)) else: raise(ValueError("Incorrect log file {}".format(src_log_path))) else: if method == "handbrake": # Recoding videos using handbrake into new clips of different lengths, based on log files, # would be cumbersome to implement. Therefore, we instead first recode every video with # handbrake and then use imageio to recode the videos again into the desired length. This # should still provide handbrake's robustness to strange videos, even though this solution is slow. tmp_folder = Path("/data/tmp_import/") if tmp_folder.is_dir(): rmtree(str(tmp_folder)) mkdir(tmp_folder) for i,path in enumerate(files): print_flush("Handbraking {} ...".format(path)) video_name = path.stem src_log_path = logs / (video_name + '.log') target_path = tmp_folder / (i + suffix) target_log_path = tmp_folder / (i + '.log') if validate_logfile(src_log_path): copy(str(src_log_path), str(target_log_path)) else: raise(ValueError("Incorrect log file {}".format(src_log_path))) encode(path, target_path, width, height, fps) files = list(tmp_folder.glob('*' + suffix)) files.sort() logs = tmp_folder print_flush("Handbrake section complete") recode_minutes_imageio(files, logs, minutes, width, height, fps, target, logs_target, suffix) if method == "handbrake": rmtree(str(tmp_folder)) print_flush("Done!")
def main(dataset, run, n_clips, clip_length): dc = DatasetConfig(dataset) rc = RunConfig(dataset, run) mask = Masker(dataset) classes = get_classnames(dataset) num_classes = len(classes) + 1 calib = Calibration(dataset) dataset_path = "{dsp}{ds}/".format(dsp=datasets_path, ds=dataset) run_path = "{rp}{ds}_{r}/".format(rp=runs_path, ds=dataset, r=run) # Grab a bunch of videos vids_query = "{dsp}videos/*.mkv".format(dsp=dataset_path) all_vids = glob(vids_query) all_vids = [right_remove(x.split('/')[-1], '.mkv') for x in all_vids] all_vids.sort() vids = [] if n_clips > len(all_vids): n_clips = len(all_vids) if n_clips == len(all_vids): vids = all_vids else: while len(vids) < n_clips: vid = choice(all_vids) if not vid in vids: vids.append(vid) print_flush(vids) # Find out what has been run on all of these videos, what to include include_klt = True include_pixeldets = True include_worlddets = True include_worldtracks = True klts = [] pixeldets = [] worlddets = [] worldtracks = [] # Point tracks need to be converted for faster access vidres = dc.get('video_resolution') kltres = dc.get('point_track_resolution') class KLTConfig(object): klt_x_factor = 0 klt_y_factor = 0 klt_config = KLTConfig() klt_config.klt_x_factor = vidres[0] / kltres[0] klt_config.klt_y_factor = vidres[1] / kltres[1] ssdres = rc.get('detector_resolution') x_scale = vidres[0] / ssdres[0] y_scale = vidres[1] / ssdres[1] colors = class_colors(num_classes) for vid in vids: f = get_klt_path(dataset_path, vid) if not isfile(f): include_klt = False else: klt = load(f) klt, klt_frames = convert_klt(klt, klt_config) pts = (klt, klt_frames, class_colors(n_cols_klts)) klts.append(pts) f = get_pixeldet_path(run_path, vid) if not isfile(f): include_pixeldets = False else: dets = pd.read_csv(f) pixeldets.append((dets, colors, x_scale, y_scale)) f = get_worlddet_path(run_path, vid) if not isfile(f): include_worlddets = False else: dets = pd.read_csv(f) worlddets.append((dets, colors, calib)) f = get_worldtracks_path(run_path, vid) if not isfile(f): include_worldtracks = False else: tracks = load(f) worldtracks.append((tracks, class_colors(n_cols_tracks), calib)) print_flush("Point tracks: {}".format(include_klt)) print_flush("Pixel coordinate detections: {}".format(include_pixeldets)) print_flush("World coordinate detections: {}".format(include_worlddets)) print_flush("World coordinate tracks: {}".format(include_worldtracks)) # Decide where to start and stop in the videos clip_length = clip_length * dc.get( 'video_fps') # convert from seconds to frames print_flush("Clip length in frames: {}".format(clip_length)) clips = [] for vid in vids: start, stop = make_clip(vid, clip_length, dataset_path) clips.append((start, stop)) incs = [ include_klt, include_pixeldets, include_worlddets, include_worldtracks ] funs = [klt_frame, pixeldet_frame, worlddet_frame, worldtracks_frame] dats = [klts, pixeldets, worlddets, worldtracks] nams = [ "Point tracks", "Detections in pixel coordinates", "Detections in world coordinates", "Tracks in world coordinates" ] print_flush(clips) with iio.get_writer("{trp}summary.mp4".format(trp=run_path), fps=dc.get('video_fps')) as outvid: for i_vid, vid in enumerate(vids): print_flush(vid) old_prog = 0 with iio.get_reader("{dsp}videos/{v}.mkv".format(dsp=dataset_path, v=vid)) as invid: start, stop = clips[i_vid] for i_frame in range(start, stop): frame = invid.get_data(i_frame) pieces = [] for inc, fun, dat, nam in zip(incs, funs, dats, nams): if inc: piece = fun(dat[i_vid], mask.mask(frame.copy(), alpha=0.5), i_frame) draw_text(piece, vid, i_frame, nam) pieces.append(piece) outvid.append_data(join(pieces)) prog = float(i_frame - start) / (stop - start) if prog - old_prog > 0.1: print_flush("{}%".format(round(prog * 100))) old_prog = prog print_flush("Done!")