def parse_value(self, val, ctype): if ctype == 'res2': val = parse_resolution(val, 2) ok = True for v in val: # Having resolutions not divisible by 16 causes issues with video encoding. if not (v % 16 == 0): ok = False if not ok: val = None elif ctype == 'res3': val = parse_resolution(val, 3) ok = True for v in val[0:-1]: if not (v % 16 == 0): ok = False if not ok: val = None else: val = ctype(val) return val
def main(cmd, dataset, imsize, visualize): imsize = parse_resolution(imsize) mask = Masker(dataset) if cmd == "findvids" or cmd == "continue": vidfolder = datasets_path / dataset / "videos" kltfolder = datasets_path / dataset / "klt" mkdir(kltfolder) allvids = list(vidfolder.glob('*.mkv')) allvids.sort() if cmd == "continue": existing = list(kltfolder.glob('*.pklz')) existing.sort() existing = [x.stem for x in existing] allvids = [x for x in allvids if not x.stem in existing] for vidpath in allvids: datpath = kltfolder / (vidpath.stem + '.pklz') if visualize: outvidpath = datpath.with_name(datpath.stem + '_klt.mp4') print_flush("{} -> {} & {}".format(vidpath, datpath, outvidpath)) else: outvidpath = None print_flush("{} -> {}".format(vidpath, datpath)) klt_save(vidpath, datpath, imsize, mask, outvidpath) print_flush("Done!") else: raise (ValueError())
def main(dataset, run, res, conf, invid, outvid): res = parse_resolution(res) model = get_model(dataset, run, input_shape=(res[0], res[1], 3)) classnames = get_classnames(dataset) test_on_video(model, dataset, run, invid, outvid, classnames, width=res[0], height=res[1], input_shape=(res[0], res[1], 3), conf_thresh=conf, csv_conf_thresh=conf)
def rx(filename, resolution, nsubchannels, nprocesses, nframes_per_process, receiver_args, video_start, video_duration): receiver_args = eval('dict({})'.format(receiver_args)) resolution = util.parse_resolution(resolution) out = sys.stdout sys.stdout = sys.stderr cb = DecodeCallback(out) frames = video_frame_src(filename, resolution, video_start, video_duration) recv = multiprocreceiver.MultiProcReceiver(nsubchannels, nprocesses, nframes_per_process, callback=cb.callback, **receiver_args) recv.decode_many(frames) print cb.final_stats() recv.close()
def main(cmd, res, dataset, run, conf, fps, coords): res = parse_resolution(res) classnames = get_classnames(dataset) local_output = False csvs = [] if cmd == "findvids": if coords == "pixels": query = "{rp}{ds}_{r}/csv/*.csv".format(rp=runs_path, ds=dataset, r=run) elif coords == "world": query = "{rp}{ds}_{r}/detections_world/*.csv".format(rp=runs_path, ds=dataset, r=run) found = glob(query) found.sort() csvs.extend(found) else: csvs.append(cmd) local_output = True if coords == "pixels": out_folder = '{rp}{ds}_{r}/detections/'.format(rp=runs_path, ds=dataset, r=run) elif coords == "world": out_folder = '{rp}{ds}_{r}/detections_world/'.format(rp=runs_path, ds=dataset, r=run) mkdir(out_folder) for csv_path in csvs: vidname = right_remove(csv_path.split('/')[-1], '.csv') if coords == "world": vidname = right_remove(vidname, '_world') vid_path = "{dsp}{ds}/videos/{v}.mkv".format(dsp=datasets_path, ds=dataset, v=vidname) if local_output: outvid_path = '{}.mp4'.format(vidname) else: outvid_path = '{}{}.mp4'.format(out_folder, vidname) detections = pd.read_csv(csv_path) detections_video(detections, vid_path, outvid_path, classnames, dataset, res, fps=fps, conf_thresh=conf, coords=coords) print_flush(outvid_path) print_flush("Done!")
def main(cmd, res, dataset, run, conf, fps, coords): res = parse_resolution(res) classnames = get_classnames(dataset) local_output = False csvs = [] if cmd == "findvids": if coords == "pixels": found = (runs_path / "{}_{}".format(dataset,run) / "csv").glob('*.csv') elif coords == "world": found = (runs_path / "{}_{}".format(dataset,run) / "detections_world").glob('*.csv') found = list(found) found.sort() csvs.extend(found) else: csvs.append(cmd) local_output = True if coords == "pixels": out_folder = runs_path / "{}_{}".format(dataset,run) / "detections" elif coords == "world": out_folder = runs_path / "{}_{}".format(dataset,run) / "detections_world" mkdir(out_folder) for csv_path in csvs: vidname = csv_path.stem if coords == "world": vidname = right_remove(vidname, '_world') vid_path = datasets_path / dataset / "videos" / (vidname+'.mkv') if local_output: outvid_path = Path('.') / '{}.mp4'.format(vidname) else: outvid_path = out_folder / '{}.mp4'.format(vidname) detections = pd.read_csv(csv_path) detections_video(detections, vid_path, outvid_path, classnames, dataset, res, fps=fps, conf_thresh=conf, coords=coords) print_flush(outvid_path) print_flush("Done!")
def main(cmd, dataset, imsize, visualize): imsize = parse_resolution(imsize) mask = Masker(dataset) if cmd == "findvids" or cmd == "continue": vidfolder = "{}{}/videos/".format(datasets_path, dataset) kltfolder = "{}{}/klt/".format(datasets_path, dataset) mkdir(kltfolder) allvids = sorted(glob(vidfolder + "*.mkv")) if cmd == "continue": existing = sorted(glob(kltfolder + "*.pklz")) existing = [ right_remove(x.split('/')[-1], '.pklz') for x in existing ] allvids = [ x for x in allvids if not right_remove(x.split('/')[-1], '.mkv') in existing ] for vidpath in allvids: datpath = kltfolder + vidpath.split('/')[-1].replace( '.mkv', '.pklz') if visualize: outvidpath = datpath.replace('.pklz', '_klt.mp4') print_flush("{} -> {} & {}".format(vidpath, datpath, outvidpath)) else: outvidpath = None print_flush("{} -> {}".format(vidpath, datpath)) klt_save(vidpath, datpath, imsize, mask, outvidpath) print_flush("Done!") else: raise (ValueError())
def main(cmd, dataset, run, vidres, ssdres, kltres, make_videos): vidres = parse_resolution(vidres) ssdres = parse_resolution(ssdres) kltres = parse_resolution(kltres) x_factor = float(vidres[0]) / ssdres[0] y_factor = float(vidres[1]) / ssdres[1] det_dims = ('xmin', 'xmax', 'ymin', 'ymax') det_factors = (x_factor, x_factor, y_factor, y_factor) calib = Calibration(dataset) ts = Timestamps(dataset) class_data = get_class_data(dataset) class_heights = {d['name']: d['height'] for d in class_data} class KLTConfig(object): klt_x_factor = 0 klt_y_factor = 0 klt_config = KLTConfig() klt_config.klt_x_factor = vidres[0] / kltres[0] klt_config.klt_y_factor = vidres[1] / kltres[1] if cmd == "findvids": from glob import glob vidnames = glob('{dsp}{ds}/videos/*.mkv'.format(dsp=datasets_path, ds=dataset)) vidnames = [right_remove(x.split('/')[-1], '.mkv') for x in vidnames] vidnames.sort() outfolder = '{}{}_{}/detections_world/'.format(runs_path, dataset, run) mkdir(outfolder) else: vidnames = [cmd] outfolder = './' mkdir(outfolder) if make_videos: classnames = get_classnames(dataset) dc = DatasetConfig(dataset) fps = dc.get('video_fps') for v in vidnames: print_flush(v) detections = pd.read_csv('{}{}_{}/csv/{}.csv'.format( runs_path, dataset, run, v)) # Convert pixel coordinate positions from SSD resolution to video resolution # because Calibration assumes video resolution coordinates for dim, factor in zip(det_dims, det_factors): detections[dim] = round(detections[dim] * factor).astype(int) print_flush("Converting point tracks...") klt = load('{}{}/klt/{}.pklz'.format(datasets_path, dataset, v)) klt, klt_frames = convert_klt(klt, klt_config) pts = PointTrackStructure(klt, klt_frames, vidres[0], vidres[1]) outpath = '{of}{v}_world.csv'.format(of=outfolder, v=v) print_flush("Converting to world coordinates...") detections3D = detections_to_3D(detections, pts, calib, ts, v, klt_save_path=outpath.replace( '.csv', '_klt.pklz'), class_heights=class_heights) detections3D.to_csv(outpath, float_format='%.4f') if make_videos: from visualize_detections import detections_video vidpath = "{dsp}{ds}/videos/{v}.mkv".format(dsp=datasets_path, ds=dataset, v=v) print_flush("Rendering video...") detections_video(detections3D, vidpath, outpath.replace('.csv', '.mp4'), classnames, dataset, vidres, fps=fps, conf_thresh=0.0, coords='world') print_flush("Done!")
def main(dataset, run, input_shape, seq_start, seq_stop, videopath, conf_thresh, i_seq, outname, batch_size): print_flush("> Predicting...") classes = get_classnames(dataset) masker = Masker(dataset) input_shape = parse_resolution(input_shape) num_classes = len(classes)+1 model = get_model(dataset, run, input_shape, num_classes, verbose=False) priors = get_priors(model, input_shape) bbox_util = BBoxUtility(num_classes, priors) width = input_shape[0] height = input_shape[1] inputs = [] outputs = [] old_frame = None with io.get_reader(videopath) as vid: vlen = len(vid) for i_in_seq in range(seq_start, seq_stop): if i_in_seq < vlen: frame = vid.get_data(i_in_seq) frame = masker.mask(frame) old_frame = frame else: frame = old_frame resized = cv2.resize(frame, (width, height)) inputs.append(resized) if len(inputs) == batch_size: inputs2 = np.array(inputs) inputs2 = inputs2.astype(np.float32) inputs2 = preprocess_input(inputs2) y = model.predict_on_batch(inputs2) outputs.append(y) inputs = [] preds = np.vstack(outputs) print_flush("> Processing...") all_detections = [] seq_len = seq_stop - seq_start for i in range(seq_len): frame_num = i + seq_start if frame_num < vlen: pred = preds[i, :] pred = pred.reshape(1, pred.shape[0], pred.shape[1]) results = bbox_util.detection_out(pred, soft=False) detections = process_results(results, width, height, classes, conf_thresh, frame_num) all_detections.append(detections) dets = pd.concat(all_detections) # For the first line, we should open in write mode, and then in append mode # This way, we still overwrite the files if this script is run multiple times open_mode = 'a' include_header = False if i_seq == 0: open_mode = 'w' include_header = True print_flush("> Writing to {} ...".format(outname)) with open(outname, open_mode) as f: dets.to_csv(f, header=include_header)
def autoannotate(dataset, import_datasets, input_shape, image_shape, batch_size, batch_size2, epochs, frozen_layers): soft = False classes = get_classnames(dataset) input_shape = parse_resolution(input_shape) image_shape = parse_resolution(image_shape) model, bbox_util = train(dataset, import_datasets, input_shape, batch_size, epochs, frozen_layers, train_amount=1.0) print_flush("Auto-annotating...") masker = Masker(dataset) inputs = [] impaths = [] to_annotate = get_images_to_autoannotate(dataset) # rep_last needed since we use large batches, for speed, to make sure we run on all images for impath in rep_last(to_annotate, batch_size2): im = iio.imread(impath) im = masker.mask(im) resized = cv2.resize(im, (input_shape[0], input_shape[1])) inputs.append(resized) impaths.append(impath) if len(inputs) == batch_size2: inputs = np.array(inputs).astype(np.float64) inputs = preprocess_input(inputs) preds = model.predict(inputs, batch_size=batch_size2, verbose=0) results = bbox_util.detection_out(preds, soft=soft) for result, res_path in zip(results, impaths): result = [ r if len(r) > 0 else np.zeros((1, 6)) for r in result ] raw_detections = pd.DataFrame(np.vstack(result), columns=[ 'class_index', 'confidence', 'xmin', 'ymin', 'xmax', 'ymax' ]) auto_path = res_path.with_suffix('.auto') # Sort detections by confidence, keeping the top ones # This seems to be more robust than a hard-coded confidence threshold # Note that a confidence threshold can be chosen in the annotation web UI n = 128 dets = [x for x in pandas_loop(raw_detections)] dets.sort(key=lambda x: 1.0 - x['confidence']) if len(dets) > n: dets = dets[:n] with auto_path.open('w') as f: for det in dets: conf = round(det['confidence'], 4) line = "{index} {cx} {cy} {w} {h} conf:{conf} {cn}\n".format( index=int(det['class_index']), cx=round((det['xmin'] + det['xmax']) / 2, 4), cy=round((det['ymin'] + det['ymax']) / 2, 4), w=round(det['xmax'] - det['xmin'], 4), h=round(det['ymax'] - det['ymin'], 4), conf=conf, cn=classes[int(det['class_index']) - 1]) f.write(line) print_flush("Wrote {}".format(auto_path)) inputs = [] impaths = [] assert (not inputs) # If this fails, not all images were processed! print_flush("Done!")
def main(cmd, dataset, run, vidres, ssdres, kltres, conf, make_videos): from storage import load, save from folder import datasets_path, runs_path mask = Masker(dataset) #v = '20170516_163607_4C86' #v = '20170516_121024_A586' if cmd == "findvids": from glob import glob vidnames = glob('{}{}/videos/*.mkv'.format(datasets_path, dataset)) vidnames = [right_remove(x.split('/')[-1], '.mkv') for x in vidnames] vidnames.sort() outfolder = '{}{}_{}/tracks/'.format(runs_path, dataset, run) else: vidnames = [cmd] outfolder = './' vidres = parse_resolution(vidres) ssdres = parse_resolution(ssdres) kltres = parse_resolution(kltres) x_factor = float(vidres[0]) / ssdres[0] y_factor = float(vidres[1]) / ssdres[1] det_dims = ('xmin', 'xmax', 'ymin', 'ymax') det_factors = (x_factor, x_factor, y_factor, y_factor) c = Config(vidres, kltres, conf) from folder import mkdir mkdir(outfolder) for v in vidnames: detections = pd.read_csv('{}{}_{}/csv/{}.csv'.format( runs_path, dataset, run, v)) for dim, factor in zip(det_dims, det_factors): detections[dim] = round(detections[dim] * factor).astype(int) klt = load('{}{}/klt/{}.pklz'.format(datasets_path, dataset, v)) klt, klt_frames = convert_klt(klt, c) tracks = [] if len(detections) > 0: tracks = build_tracks(detections, klt, klt_frames, c) print_flush("{} tracks done".format(v)) save(tracks, '{}{}_tracks.pklz'.format(outfolder, v)) else: print_flush( "{} skipping tracking, because there were no detections". format(v)) if make_videos: if tracks: from visualize_tracking import render_video vidpath = "{}{}/videos/{}.mkv".format(datasets_path, dataset, v) render_video(tracks, vidpath, "{}{}_tracks.mp4".format(outfolder, v), mask=mask) print_flush("{} video done".format(v)) else: print_flush( "{} skipping video rendering, because there were no tracks" .format(v)) print_flush("Done!")
def main(dataset, run, res): res = parse_resolution(res) parse('/data/dl/{}_{}/results/'.format(dataset, run), dataset, res)
def main(cmd, dataset, run, vidres, ssdres, kltres, make_videos): vidres = parse_resolution(vidres) ssdres = parse_resolution(ssdres) kltres = parse_resolution(kltres) x_factor = float(vidres[0]) / ssdres[0] y_factor = float(vidres[1]) / ssdres[1] det_dims = ('xmin', 'xmax', 'ymin', 'ymax') det_factors = (x_factor, x_factor, y_factor, y_factor) calib = Calibration(dataset) ts = Timestamps(dataset) class_data = get_class_data(dataset) class_heights = {d['name']: d['height'] for d in class_data} class KLTConfig(object): klt_x_factor = 0 klt_y_factor = 0 klt_config = KLTConfig() klt_config.klt_x_factor = vidres[0] / kltres[0] klt_config.klt_y_factor = vidres[1] / kltres[1] if cmd == "findvids": vidnames = list((datasets_path / dataset / "videos").glob('*.mkv')) vidnames = [x.stem for x in vidnames] vidnames.sort() outfolder = runs_path / '{}_{}'.format(dataset, run) / 'detections_world' mkdir(outfolder) else: vidnames = [cmd] outfolder = Path('.') mkdir(outfolder) if make_videos: classnames = get_classnames(dataset) dc = DatasetConfig(dataset) fps = dc.get('video_fps') for v in vidnames: print_flush(v) detections = pd.read_csv(runs_path / '{}_{}'.format(dataset, run) / 'csv' / (v + '.csv')) # Convert pixel coordinate positions from SSD resolution to video resolution # because Calibration assumes video resolution coordinates for dim, factor in zip(det_dims, det_factors): detections[dim] = round(detections[dim] * factor).astype(int) print_flush("Converting point tracks...") klt = load(datasets_path / dataset / 'klt' / (v + '.pklz')) klt, klt_frames = convert_klt(klt, klt_config) pts = PointTrackStructure(klt, klt_frames, vidres[0], vidres[1]) outpath = outfolder / '{v}_world.csv'.format(v=v) print_flush("Converting to world coordinates...") detections3D = detections_to_3D( detections, pts, calib, ts, v, klt_save_path=outpath.with_name(outpath.stem + '_klt.pklz'), class_heights=class_heights) detections3D.to_csv(outpath, float_format='%.4f') if make_videos: from visualize_detections import detections_video vidpath = datasets_path / dataset / "videos" / "{}.mkv".format(v) print_flush("Rendering video...") detections_video(detections3D, vidpath, outpath.with_suffix('.mp4'), classnames, dataset, vidres, fps=fps, conf_thresh=0.0, coords='world') print_flush("Done!")
def import_videos(query, dataset, resolution, fps, suffix, method, logs, minutes): assert(suffix == '.mkv') logs = Path(logs) assert(logs.is_dir()) if method == "imageio": encode = encode_imageio elif method == "handbrake": encode = encode_handbrake else: raise(ValueError("Incorrect method {}".format(method))) resolution = parse_resolution(resolution) width, height = resolution[0:2] target = datasets_path / dataset / "videos" mkdir(target) logs_target = datasets_path / dataset / "logs" mkdir(logs_target) files = glob(query) files.sort() files = [Path(x) for x in files] if minutes == 0: for path in files: video_name = path.stem src_log_path = logs / (video_name + '.log') with src_log_path.open('r') as f: first = f.readline().rstrip() first_time, _ = line_to_datetime(first) target_path, target_log_path = generate_paths(first_time, target, logs_target, suffix) print_flush(target_path) encode(path, target_path, width, height, fps) if validate_logfile(src_log_path): copy(str(src_log_path), str(target_log_path)) # python 3.5 and earlier compatability print_flush("Log file OK! {}".format(src_log_path)) else: raise(ValueError("Incorrect log file {}".format(src_log_path))) else: if method == "handbrake": # Recoding videos using handbrake into new clips of different lengths, based on log files, # would be cumbersome to implement. Therefore, we instead first recode every video with # handbrake and then use imageio to recode the videos again into the desired length. This # should still provide handbrake's robustness to strange videos, even though this solution is slow. tmp_folder = Path("/data/tmp_import/") if tmp_folder.is_dir(): rmtree(str(tmp_folder)) mkdir(tmp_folder) for i,path in enumerate(files): print_flush("Handbraking {} ...".format(path)) video_name = path.stem src_log_path = logs / (video_name + '.log') target_path = tmp_folder / (i + suffix) target_log_path = tmp_folder / (i + '.log') if validate_logfile(src_log_path): copy(str(src_log_path), str(target_log_path)) else: raise(ValueError("Incorrect log file {}".format(src_log_path))) encode(path, target_path, width, height, fps) files = list(tmp_folder.glob('*' + suffix)) files.sort() logs = tmp_folder print_flush("Handbrake section complete") recode_minutes_imageio(files, logs, minutes, width, height, fps, target, logs_target, suffix) if method == "handbrake": rmtree(str(tmp_folder)) print_flush("Done!")
def rare_class_mining(dataset, class_name, time_dist, sampling_rate, import_datasets, input_shape, image_shape, batch_size, batch_size2, epochs, frozen_layers, confidence): soft = False classes = get_classnames(dataset) ts = Timestamps(dataset) # Find all videos in dataset vidnames = list((datasets_path / dataset / "videos").glob('*.mkv')) all_found = [] for v in vidnames: # Find video length from log file (computing this from the video file is too slow) log_file = (datasets_path / dataset / "logs" / v.with_suffix('.log').name).read_text().split('\n') last = -1 while not log_file[last]: last -= 1 last_line = log_file[last] v_len = int(last_line.split(' ')[0]) print_flush("{} of length {}".format(v, v_len)) # Find existing annotations frames_log = (datasets_path / dataset / "objects" / "train" / v.stem / "frames.log").read_text().split() frames_log = [ x for x in frames_log[1:] if x ] # Remove first line, which is video name, and any empty lines annotated = [int(x) for x in frames_log] print_flush("Avoiding the following existing frames: ") print_flush(str(annotated)) curr_time = ts.get(v.stem, 0) annotated_times = [ts.get(v.stem, x) for x in annotated] found = [] found_times = [] done = False while not done: # Sample in time curr_time += timedelta(seconds=sampling_rate) curr_frame = ts.get_frame_number_given_vidname(curr_time, v.stem) if curr_frame >= v_len: # We have reached the end of the video done = True continue if curr_frame in annotated: continue # Check if we are too close to existing annotations dists = [ abs((curr_time - x).total_seconds()) for x in annotated_times ] if any([(x <= time_dist) for x in dists]): continue # Check if we are too close to any previously chosen interesting frames dists = [abs((curr_time - x).total_seconds()) for x in found_times] if any([(x <= time_dist) for x in dists]): continue # This is a frame we could work with found.append(curr_frame) found_times.append(curr_time) all_found.append((v, found)) print_flush("Candidate frames:") found_some = False for f in all_found: v, l = f print("{} : {}".format(v, l)) if l: found_some = True if not found_some: print_flush("Found no interesting frames. Quitting...") import sys sys.exit(1) print_flush( "Starting to train object detector with existing annotations...") input_shape = parse_resolution(input_shape) image_shape = parse_resolution(image_shape) model, bbox_util = train(dataset, import_datasets, input_shape, batch_size, epochs, frozen_layers, train_amount=1.0) print_flush( "Applying the model to the images to find objects of type '{}'".format( class_name)) masker = Masker(dataset) inputs = [] frame_nums = [] im_origs = [] vids = [] found_data = [] for f in all_found: v, l = f with iio.get_reader(v) as vid: for frame_number in l: im_orig = vid.get_data(frame_number) im = im_orig.copy() im = masker.mask(im) resized = cv2.resize(im, (input_shape[0], input_shape[1])) inputs.append(resized) frame_nums.append(frame_number) im_origs.append(im_orig) vids.append(v) if len(inputs) == batch_size2: tmp = process(inputs, frame_nums, im_origs, vids, confidence, class_name, soft, batch_size2, model, bbox_util, classes) found_data.extend(tmp) inputs = [] frame_nums = [] im_origs = [] vids = [] if inputs: # There are still some leftovers tmp = process(inputs, frame_nums, im_origs, vids, confidence, class_name, soft, len(inputs), model, bbox_util, classes) found_data.extend(tmp) print_flush("Writing images...") for x in found_data: v, f, im = x im_folder = datasets_path / dataset / "objects" / "train" / v.stem im_num = max([int(x.stem) for x in im_folder.glob('*.jpg')]) + 1 im_path = im_folder / "{}.jpg".format(im_num) iio.imwrite(im_path, im) print_flush("Written {}".format(im_path)) # Add the new frame numbers to frames.log for this video flog = im_folder / "frames.log" with flog.open('a') as log: log.write(str(f) + ' ') print_flush("Done!")
def autoannotate(dataset, import_datasets, input_shape, image_shape, batch_size, batch_size2, epochs, frozen_layers): soft = False input_shape = parse_resolution(input_shape) image_shape = parse_resolution(image_shape) print_flush("Loading ground truth...") load_detections = LoadDetections() datasets = [dataset] if import_datasets: datasets.extend(import_datasets.split(',')) detections = load_detections.custom(datasets) detections = detections.reset_index(drop=True) image_props = get_image_props(detections) detections = detections_add_ytrue(detections, image_props, dataset) detections.index = detections.image_file print_flush('Ground truth object counts:') print_flush(detections.type.value_counts()) classes = get_classnames(dataset) num_classes = len(classes) + 1 keys = sorted(detections.image_file.unique()) shuffle(keys) num_train = int(round(0.9 * len(keys))) train_keys = keys[:num_train] val_keys = keys[num_train:] print_flush('Loading model...') model = SSD300((input_shape[1],input_shape[0],input_shape[2]), num_classes=num_classes) model.load_weights(ssd_path+'weights_SSD300.hdf5', by_name=True) print_flush("Making priors...") im_in = np.random.random((1,input_shape[1],input_shape[0],input_shape[2])) priors = model.predict(im_in,batch_size=1)[0, :, -8:] bbox_util = BBoxUtility(num_classes, priors) generator_kwargs = { 'saturation_var': 0.5, 'brightness_var': 0.5, 'contrast_var': 0.5, 'lighting_std': 0.5, 'hflip_prob': 0.5, 'vflip_prob': 0, 'do_crop': True, 'crop_area_range': [0.1, 1.0], 'aspect_ratio_range': [0.5, 2] } path_prefix = '' gen = Generator(detections, bbox_util, batch_size, path_prefix, train_keys, val_keys, (input_shape[1], input_shape[0]), **generator_kwargs) # freeze several layers freeze = [ ['input_1', 'conv1_1', 'conv1_2', 'pool1'], ['conv2_1', 'conv2_2', 'pool2'], ['conv3_1', 'conv3_2', 'conv3_3', 'pool3'], ['conv4_1', 'conv4_2', 'conv4_3', 'pool4'], ['conv5_1', 'conv5_2', 'conv5_3', 'pool5'], ][:min(frozen_layers, 5)] for L in model.layers: if L.name in freeze: L.trainable = False callbacks = [LearningRateScheduler(schedule)] optim = keras.optimizers.Adam(lr=BASE_LR / 10) model.compile(optimizer=optim, loss=MultiboxLoss(num_classes, neg_pos_ratio=2.0).compute_loss) print_flush("Training...") history = model.fit_generator(gen.generate(True), steps_per_epoch=gen.train_batches, epochs=epochs, verbose=2, callbacks=callbacks, validation_data=gen.generate(False), validation_steps=gen.val_batches, workers=1) print_flush("Auto-annotating...") masker = Masker(dataset) inputs = [] impaths = [] to_annotate = get_images_to_autoannotate(dataset) # rep_last needed since we use large batches, for speed, to make sure we run on all images for impath in rep_last(to_annotate, batch_size2): im = iio.imread(impath) im = masker.mask(im) resized = cv2.resize(im, (input_shape[0], input_shape[1])) inputs.append(resized) impaths.append(impath) if len(inputs) == batch_size2: inputs = np.array(inputs).astype(np.float64) inputs = preprocess_input(inputs) preds = model.predict(inputs, batch_size=batch_size, verbose=0) results = bbox_util.detection_out(preds, soft=soft) for result, res_path in zip(results, impaths): result = [r if len(r) > 0 else np.zeros((1, 6)) for r in result] raw_detections = pd.DataFrame(np.vstack(result), columns=['class_index', 'confidence', 'xmin', 'ymin', 'xmax', 'ymax']) auto_path = res_path.replace('.jpg','.auto') # Sort detections by confidence, keeping the top ones # This seems to be more robust than a hard-coded confidence threshold # Note that a confidence threshold can be chosen in the annotation web UI n = 128 dets = [x for x in pandas_loop(raw_detections)] dets.sort(key=lambda x: 1.0-x['confidence']) if len(dets) > n: dets = dets[:n] with open(auto_path, 'w') as f: for det in dets: conf = round(det['confidence'],4) line = "{index} {cx} {cy} {w} {h} conf:{conf} {cn}\n".format(index=int(det['class_index']), cx = round((det['xmin']+det['xmax'])/2,4), cy = round((det['ymin']+det['ymax'])/2,4), w = round(det['xmax']-det['xmin'],4), h = round(det['ymax']-det['ymin'],4), conf=conf, cn = classes[int(det['class_index'])-1]) f.write(line) print_flush("Wrote {}".format(auto_path)) inputs = [] impaths = [] assert(not inputs) # If this fails, not all images were processed! print_flush("Done!")
def main(batch_size, max_images, epochs, name, import_datasets, frozen_layers, experiment, train_data_dir, input_shape, image_shape, memory_fraction, do_crop): from keras.backend.tensorflow_backend import set_session config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = memory_fraction set_session(tf.Session(config=config)) run_name = "{}_{}".format(name, experiment) input_shape = parse_resolution(input_shape) image_shape = parse_resolution(image_shape) load_detections = LoadDetections() session = tf.Session() K.set_session(session) log('Started TensorFlow session') log('Chosen input_shape is {}'.format(input_shape)) detections_file = runs_path / run_name / "detections.pickle" mkdir(runs_path / run_name) logging.basicConfig(filename=str(runs_path / run_name / "trainlog.log"), level=logging.INFO) try: githash = subprocess.check_output(['git', 'rev-parse', 'HEAD' ]).strip()[0:6].decode('utf-8') log("Git hash: {}".format(githash)) except subprocess.CalledProcessError: pass log('Loading detections') datasets = [name] if import_datasets: datasets.extend(import_datasets.split(',')) log('Using these datasets: ' + str(datasets)) detections = load_detections.custom(datasets) log('Detections loaded') log('Calculating image properties') detections = detections.reset_index(drop=True) image_props = get_image_props(detections) log('Image properties created') log('Adding y_true to detections') detections = detections_add_ytrue(detections, image_props, name) detections.index = detections.image_file print(' ') print('Detection frequencies:') print(detections.type.value_counts()) print(' ') classes = get_classnames(name) #sorted(detections.type.unique()) num_classes = len(classes) + 1 log('Loading priors') keys = sorted(detections.image_file.unique()) random.shuffle(keys) if max_images > 0: keys = keys[:max_images] shuffle(keys) num_train = int(round(0.9 * len(keys))) if num_train == len(keys): num_train -= 1 train_keys = keys[:num_train] val_keys = keys[num_train:] train_keys_file = runs_path / run_name / "train_keys.pickle" log('Saving training keys to: {}'.format(train_keys_file)) pickle.dump(str(train_keys), train_keys_file.open('wb')) val_keys_file = runs_path / run_name / "val_keys.pickle" log('Saving validation keys to: {}'.format(val_keys_file)) pickle.dump(str(val_keys), val_keys_file.open('wb')) log('Loading model') model = SSD300((input_shape[1], input_shape[0], input_shape[2]), num_classes=num_classes) model.load_weights(ssd_path / "weights_SSD300.hdf5", by_name=True) log('Generating priors') im_in = np.random.random( (1, input_shape[1], input_shape[0], input_shape[2])) priors = model.predict(im_in, batch_size=1)[0, :, -8:] bbox_util = BBoxUtility(num_classes, priors) generator_kwargs = { 'saturation_var': 0.5, 'brightness_var': 0.5, 'contrast_var': 0.5, 'lighting_std': 0.5, 'hflip_prob': 0.5, 'vflip_prob': 0, 'do_crop': do_crop, 'crop_area_range': [0.1, 1.0], 'aspect_ratio_range': [0.5, 2] } path_prefix = '' gen = Generator(detections, bbox_util, batch_size, path_prefix, train_keys, val_keys, (input_shape[1], input_shape[0]), **generator_kwargs) # freeze several layers # freeze = [] freeze = [ ['input_1', 'conv1_1', 'conv1_2', 'pool1'], ['conv2_1', 'conv2_2', 'pool2'], ['conv3_1', 'conv3_2', 'conv3_3', 'pool3'], ['conv4_1', 'conv4_2', 'conv4_3', 'pool4'], ['conv5_1', 'conv5_2', 'conv5_3', 'pool5'], ][:min(frozen_layers, 5)] for L in model.layers: if L.name in freeze: L.trainable = False mkdir(runs_path / run_name / "checkpoints") shutil.rmtree(str(runs_path / run_name / "logs"), ignore_errors=True) mkdir(runs_path / run_name / "logs") callbacks = [ ModelCheckpoint(str(runs_path / run_name / 'checkpoints') + '/weights.{epoch:02d}-{val_loss:.2f}.hdf5', verbose=2, save_weights_only=True), TensorBoard(log_dir=str(runs_path / run_name / "logs"), write_graph=False), LearningRateScheduler(schedule) ] optim = keras.optimizers.Adam(lr=BASE_LR / 10) # optim = keras.optimizers.RMSprop(lr=BASE_LR / 10) model.compile(optimizer=optim, loss=MultiboxLoss(num_classes, neg_pos_ratio=2.0).compute_loss) log('Running model') history = model.fit_generator(gen.generate(True), steps_per_epoch=gen.train_batches, epochs=epochs, verbose=2, callbacks=callbacks, validation_data=gen.generate(False), validation_steps=gen.val_batches, workers=1) log('Done training model') session.close() log('Session closed, starting with writing results') results = pd.DataFrame(history.history).unstack().reset_index(0) results = results.rename(columns={'level_0': 'type', 0: 'value'}) x1 = [] y1 = [] x2 = [] y2 = [] for row in pandas_loop(results): if row['type'] == 'loss': x1.append(row['_']) y1.append(row['value']) elif row['type'] == 'val_loss': x2.append(row['_']) y2.append(row['value']) plot_path = runs_path / run_name / "training.png" multi_plot([x1, x2], [y1, y2], plot_path, xlabel='epochs', ylabel='loss', title='Training', legend=['loss', 'validation loss']) results.to_csv(runs_path / run_name / "results.csv") log('Cleaning up non-optimal weights...') cleanup(name, experiment) log('Finished TensorFlow session') print_flush('Done!')
def main(dataset, run, res): res = parse_resolution(res) parse(runs_path / "{}_{}".format(dataset, run) / "results", dataset, res)
def main(cmd, dataset, run, vidres, ssdres, kltres, conf, make_videos): from storage import load, save from folder import datasets_path, runs_path from pathlib import Path from folder import mkdir mask = Masker(dataset) if cmd == "findvids": vidnames = (datasets_path / dataset / "videos").glob('*.mkv') vidnames = [x.stem for x in vidnames] vidnames.sort() outfolder = runs_path / '{}_{}'.format(dataset, run) / 'tracks' else: vidnames = [cmd] outfolder = Path('./') vidres = parse_resolution(vidres) ssdres = parse_resolution(ssdres) kltres = parse_resolution(kltres) x_factor = float(vidres[0]) / ssdres[0] y_factor = float(vidres[1]) / ssdres[1] det_dims = ('xmin', 'xmax', 'ymin', 'ymax') det_factors = (x_factor, x_factor, y_factor, y_factor) c = Config(vidres, kltres, conf) mkdir(outfolder) for v in vidnames: det_path = runs_path / "{}_{}".format(dataset, run) / "csv" / (v + '.csv') detections = pd.read_csv(det_path) for dim, factor in zip(det_dims, det_factors): detections[dim] = round(detections[dim] * factor).astype(int) klt = load(datasets_path / dataset / "klt" / (v + '.pklz')) klt, klt_frames = convert_klt(klt, c) tracks = [] if len(detections) > 0: tracks = build_tracks(detections, klt, klt_frames, c) print_flush("{} tracks done".format(v)) save(tracks, outfolder / '{}_tracks.pklz'.format(v)) else: print_flush( "{} skipping tracking, because there were no detections". format(v)) if make_videos: if tracks: from visualize_tracking import render_video vidpath = datasets_path / dataset / "videos" / (v + '.mkv') render_video(tracks, vidpath, outfolder / (v + "_tracks.mp4"), mask=mask) print_flush("{} video done".format(v)) else: print_flush( "{} skipping video rendering, because there were no tracks" .format(v)) print_flush("Done!")