def interpret_tracks_gt(dataset, date, det_id, traj_csv_path): """ Interprets tracking ground truth csv files exported by T-Analyst. Parameters: dataset -- name of dataset date -- date when the video was filmed, as a string on format 'YYYY-MM-DD' det_id -- the ID number of the T-Analyst 'detection' of interest. Set to None to include everthing in the .csv file traj_csv_path -- path to .csv file exported by T-Analyst """ traj = pd.read_csv(traj_csv_path, sep=';', decimal=',') calib = Calibration(dataset) ts = Timestamps(dataset) mask = Check(dataset, 'mask') gts = [] for traj_row in pandas_loop(traj): row_det_id = traj_row['Detection ID'] if row_det_id == det_id: c = traj_row['Type of road user'] i = traj_row['Road user ID'] x = traj_row['X (m)'] y = traj_row['Y (m)'] t = traj_row['Time Stamp'] #t = date + ' ' + t #t = datetime.strptime(t, '%Y-%m-%d %H:%M:%S.%f') # strptime is both slow and has issues with the way milliseconds are written by T-Analyst year, month, day = map(int, date.split('-')) hour, minute, second, millisecond = map( int, t.replace('.', ':').split(':')) t = datetime(year, month, day, hour, minute, second, millisecond * 1000) vid, fn = ts.get_frame_number(t) px, py = calib.to_pixels(x, y) px, py = map(int, (px, py)) if not mask.test(px, py): gt = (vid, fn, t, x, y, i, c, px, py) gts.append(gt) return gts
def draw(to_draw, df, class_colors, conf_thresh=0.7, x_scale=1.0, y_scale=1.0, coords='pixels', calib=None): """ Draws boxes from a data frame to an image, which is then returned. Arguments: to_draw -- an image to draw on df -- data frame with object detections class_colors -- list of colors conf_thresh -- threshold of confidence, detection below this are not included. If negative, confidences are not used at all. x_scale, y_scale -- scales the coordinates from the data frame in case the image is of another resolution coords -- 'pixels' for normal pixel coordinates, 'world' for special treatment for world coordinates visualization including movement direction calib -- if in world coordinates, a Calibration object (from world.py module) """ noconf = False if conf_thresh < 0: noconf = True if noconf or ( conf_thresh == 0.0 ): # checking for 0.0 here isn't necessary but skips the somewhat slow pandas operation df2 = df else: df2 = df.loc[df['confidence'] > conf_thresh] if coords == 'pixels': for row in pandas_loop(df2): xmin = int(row['xmin'] * x_scale) xmax = int(row['xmax'] * x_scale) ymin = int(row['ymin'] * y_scale) ymax = int(row['ymax'] * y_scale) cname = row['class_name'] cindex = row['class_index'] conf = None if not noconf: conf = row['confidence'] to_draw = draw_box(to_draw, xmin, xmax, ymin, ymax, cname, cindex, class_colors, conf=conf) elif coords == 'world': for row in pandas_loop(df2): wx = row['world_x'] wy = row['world_y'] wdx = row['world_dx'] wdy = row['world_dy'] cname = row['class_name'] cindex = row['class_index'] cx, cy = calib.to_pixels(wx, wy, as_type=int) xx, yy = calib.to_pixels(wx + wdx, wy + wdy, as_type=int) conf = None if not noconf: conf = row['confidence'] to_draw = draw_arrow(to_draw, cx, cy, xx, yy, cname, cindex, class_colors, conf=conf) else: raise (ValueError("Incorrect coords {}".format(coords))) return to_draw
def make_tracks(dataset, video_name, dets, klts, munkres, ts, calib, config, start_stop=None): """ Main function for making tracks in world coordinates. Arguments: dataset -- name of dataset video_name -- name of video (no folders or suffix) dets -- world coordinate detections as made by detections_world.py klts -- point tracks, as saved by detections_world.py (the 'per-detection point track format') munkres -- a Munkres object (from the munkres module, not our code) ts -- a Timestamps object (from the timestamps.py module) calib -- a Calibration object (from the world.py module) config -- a WorldTrackingConfig object (from this module) start_stop -- either None of a tuple (start, stop) with integers of which frames to perform tracking on """ mask_check = Check(dataset, 'mask', margin=config.get('mask_margin')) tracks = [] lost_tracks = [] n_frames = max(dets['frame_number']) if start_stop is None: start_frame = 0 stop_frame = n_frames else: start_frame, stop_frame = start_stop for frame_number in tqdm(range(start_frame, stop_frame), "Making tracks"): now = ts.get(video_name, frame_number) tracks, just_lost = lose_tracks(tracks, now, frame_number, mask_check, calib, config) lost_tracks.extend(just_lost) tracks = update_tracks(tracks, now, frame_number) dets_frame = dets[dets['frame_number'] == frame_number] # This is slow! if not tracks: # Let each detection be a track of its own for d in pandas_loop(dets_frame): track = new_track(tracks, now, frame_number, d, config) if not (track is None): tracks.append(track) else: # Hungarian algorithm to find associations mat = [] dets_list = [x for x in pandas_loop(dets_frame)] for i_track, track in enumerate(tracks): mat.append([]) for i_det,det in enumerate(dets_list): cost = track.cost(now, det['world_x'], det['world_y'], det['world_dx'], det['world_dy'], det['class_name']) # this is slow! mat[i_track].append(cost) try: indices = munkres.compute(mat) # _, idx, _ = lapjv(np.array(mat), extend_cost=True) # indices2 = [p for p in zip(range(len(idx)), idx) if p[1] > -1] # assert indices == indices2 except UnsolvableMatrix: # This means that tracks and detections were completely incompatible for d in pandas_loop(dets_frame): new_track(tracks, now, frame_number, d, config) else: for i_track, i_det in indices: track = tracks[i_track] if mat[i_track][i_det] <= config.get('cost_thresh', track.cn): det = dets_list[i_det] track.update(now, frame_number, det['world_x'], det['world_y'], det['world_dx'], det['world_dy']) dets_list[i_det] = None # So that we can skip these when making new tracks for det in dets_list: if det is None: continue new_track(tracks, now, frame_number, det, config) lost_tracks.extend(tracks) # Remove tracks that are too short to be considered reliable good_tracks = [] for track in lost_tracks: from_det_count = 0 for h in track.history: from_det = h[-1] if from_det: from_det_count += 1 if from_det_count > 2: good_tracks.append(track) return good_tracks
def main(batch_size, max_images, epochs, name, import_datasets, frozen_layers, experiment, train_data_dir, input_shape, image_shape, memory_fraction, do_crop): from keras.backend.tensorflow_backend import set_session config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = memory_fraction set_session(tf.Session(config=config)) run_name = "{}_{}".format(name, experiment) input_shape = parse_resolution(input_shape) image_shape = parse_resolution(image_shape) load_detections = LoadDetections() session = tf.Session() K.set_session(session) log('Started TensorFlow session') log('Chosen input_shape is {}'.format(input_shape)) detections_file = runs_path / run_name / "detections.pickle" mkdir(runs_path / run_name) logging.basicConfig(filename=str(runs_path / run_name / "trainlog.log"), level=logging.INFO) try: githash = subprocess.check_output(['git', 'rev-parse', 'HEAD' ]).strip()[0:6].decode('utf-8') log("Git hash: {}".format(githash)) except subprocess.CalledProcessError: pass log('Loading detections') datasets = [name] if import_datasets: datasets.extend(import_datasets.split(',')) log('Using these datasets: ' + str(datasets)) detections = load_detections.custom(datasets) log('Detections loaded') log('Calculating image properties') detections = detections.reset_index(drop=True) image_props = get_image_props(detections) log('Image properties created') log('Adding y_true to detections') detections = detections_add_ytrue(detections, image_props, name) detections.index = detections.image_file print(' ') print('Detection frequencies:') print(detections.type.value_counts()) print(' ') classes = get_classnames(name) #sorted(detections.type.unique()) num_classes = len(classes) + 1 log('Loading priors') keys = sorted(detections.image_file.unique()) random.shuffle(keys) if max_images > 0: keys = keys[:max_images] shuffle(keys) num_train = int(round(0.9 * len(keys))) if num_train == len(keys): num_train -= 1 train_keys = keys[:num_train] val_keys = keys[num_train:] train_keys_file = runs_path / run_name / "train_keys.pickle" log('Saving training keys to: {}'.format(train_keys_file)) pickle.dump(str(train_keys), train_keys_file.open('wb')) val_keys_file = runs_path / run_name / "val_keys.pickle" log('Saving validation keys to: {}'.format(val_keys_file)) pickle.dump(str(val_keys), val_keys_file.open('wb')) log('Loading model') model = SSD300((input_shape[1], input_shape[0], input_shape[2]), num_classes=num_classes) model.load_weights(ssd_path / "weights_SSD300.hdf5", by_name=True) log('Generating priors') im_in = np.random.random( (1, input_shape[1], input_shape[0], input_shape[2])) priors = model.predict(im_in, batch_size=1)[0, :, -8:] bbox_util = BBoxUtility(num_classes, priors) generator_kwargs = { 'saturation_var': 0.5, 'brightness_var': 0.5, 'contrast_var': 0.5, 'lighting_std': 0.5, 'hflip_prob': 0.5, 'vflip_prob': 0, 'do_crop': do_crop, 'crop_area_range': [0.1, 1.0], 'aspect_ratio_range': [0.5, 2] } path_prefix = '' gen = Generator(detections, bbox_util, batch_size, path_prefix, train_keys, val_keys, (input_shape[1], input_shape[0]), **generator_kwargs) # freeze several layers # freeze = [] freeze = [ ['input_1', 'conv1_1', 'conv1_2', 'pool1'], ['conv2_1', 'conv2_2', 'pool2'], ['conv3_1', 'conv3_2', 'conv3_3', 'pool3'], ['conv4_1', 'conv4_2', 'conv4_3', 'pool4'], ['conv5_1', 'conv5_2', 'conv5_3', 'pool5'], ][:min(frozen_layers, 5)] for L in model.layers: if L.name in freeze: L.trainable = False mkdir(runs_path / run_name / "checkpoints") shutil.rmtree(str(runs_path / run_name / "logs"), ignore_errors=True) mkdir(runs_path / run_name / "logs") callbacks = [ ModelCheckpoint(str(runs_path / run_name / 'checkpoints') + '/weights.{epoch:02d}-{val_loss:.2f}.hdf5', verbose=2, save_weights_only=True), TensorBoard(log_dir=str(runs_path / run_name / "logs"), write_graph=False), LearningRateScheduler(schedule) ] optim = keras.optimizers.Adam(lr=BASE_LR / 10) # optim = keras.optimizers.RMSprop(lr=BASE_LR / 10) model.compile(optimizer=optim, loss=MultiboxLoss(num_classes, neg_pos_ratio=2.0).compute_loss) log('Running model') history = model.fit_generator(gen.generate(True), steps_per_epoch=gen.train_batches, epochs=epochs, verbose=2, callbacks=callbacks, validation_data=gen.generate(False), validation_steps=gen.val_batches, workers=1) log('Done training model') session.close() log('Session closed, starting with writing results') results = pd.DataFrame(history.history).unstack().reset_index(0) results = results.rename(columns={'level_0': 'type', 0: 'value'}) x1 = [] y1 = [] x2 = [] y2 = [] for row in pandas_loop(results): if row['type'] == 'loss': x1.append(row['_']) y1.append(row['value']) elif row['type'] == 'val_loss': x2.append(row['_']) y2.append(row['value']) plot_path = runs_path / run_name / "training.png" multi_plot([x1, x2], [y1, y2], plot_path, xlabel='epochs', ylabel='loss', title='Training', legend=['loss', 'validation loss']) results.to_csv(runs_path / run_name / "results.csv") log('Cleaning up non-optimal weights...') cleanup(name, experiment) log('Finished TensorFlow session') print_flush('Done!')
def detections_to_3D(dets, pts, calib, ts, v, class_heights, klt_save_path=None): """ Treat each detection like a point with a direction """ cx = (dets['xmin'] + dets['xmax']) // 2 cy = (dets['ymin'] + dets['ymax']) // 2 dets['cx'] = cx dets['cy'] = cy world_x = [] world_y = [] for px, py, cl in zip(cx, cy, dets['class_name']): x, y, z = calib.to_world(px, py, z=-class_heights[cl] / 2) world_x.append(x) world_y.append(y) dets['world_x'] = world_x dets['world_y'] = world_y # Compute approximate motion direction for each detection, using KLT tracks and transforming the direction to world coordinates wdxs = [] wdys = [] id_maker = count() ids = [] all_matching_klts = {} for det in pandas_loop(dets): det_id = next(id_maker) ids.append(det_id) fn = det['frame_number'] klts_frame = pts.get_klts(fn, det) dx = 0 dy = 0 n = 0 klt_matches = [] for k in klts_frame: x, y = k[fn] # Compute average speed in m/s if (x > det['xmin']) and (x < det['xmax']) and ( y > det['ymin']) and (y < det['ymax']): previous = (x, y) previous_fn = fn if (fn - 1) in k: previous_fn = fn - 1 previous = k[previous_fn] following = (x, y) following_fn = fn if (fn + 1) in k: following_fn = fn + 1 following = k[following_fn] dt = (ts.get(v, following_fn) - ts.get(v, previous_fn)).total_seconds() if dt > 0: # dx and dy are here in pixels/second dx += (following[0] - previous[0]) / dt dy += (following[1] - previous[1]) / dt n += 1 klt_matches.append(k) if ((abs(dx) > 0) or (abs(dy) > 0)) and (n > 0): # Average speed in pixels/second dx /= n dy /= n wx2, wy2, _ = calib.to_world(det['cx'] + dx, det['cy'] + dy, z=-class_heights[det['class_name']] / 2) wdx = wx2 - det['world_x'] wdy = wy2 - det['world_y'] # These should now be in m/s wdxs.append(wdx) wdys.append(wdy) else: wdxs.append(0) wdys.append(0) all_matching_klts[det_id] = klt_matches dets['world_dx'] = wdxs dets['world_dy'] = wdys dets['id'] = ids if not (klt_save_path is None): save(all_matching_klts, klt_save_path) return dets
def autoannotate(dataset, import_datasets, input_shape, image_shape, batch_size, batch_size2, epochs, frozen_layers): soft = False classes = get_classnames(dataset) input_shape = parse_resolution(input_shape) image_shape = parse_resolution(image_shape) model, bbox_util = train(dataset, import_datasets, input_shape, batch_size, epochs, frozen_layers, train_amount=1.0) print_flush("Auto-annotating...") masker = Masker(dataset) inputs = [] impaths = [] to_annotate = get_images_to_autoannotate(dataset) # rep_last needed since we use large batches, for speed, to make sure we run on all images for impath in rep_last(to_annotate, batch_size2): im = iio.imread(impath) im = masker.mask(im) resized = cv2.resize(im, (input_shape[0], input_shape[1])) inputs.append(resized) impaths.append(impath) if len(inputs) == batch_size2: inputs = np.array(inputs).astype(np.float64) inputs = preprocess_input(inputs) preds = model.predict(inputs, batch_size=batch_size2, verbose=0) results = bbox_util.detection_out(preds, soft=soft) for result, res_path in zip(results, impaths): result = [ r if len(r) > 0 else np.zeros((1, 6)) for r in result ] raw_detections = pd.DataFrame(np.vstack(result), columns=[ 'class_index', 'confidence', 'xmin', 'ymin', 'xmax', 'ymax' ]) auto_path = res_path.with_suffix('.auto') # Sort detections by confidence, keeping the top ones # This seems to be more robust than a hard-coded confidence threshold # Note that a confidence threshold can be chosen in the annotation web UI n = 128 dets = [x for x in pandas_loop(raw_detections)] dets.sort(key=lambda x: 1.0 - x['confidence']) if len(dets) > n: dets = dets[:n] with auto_path.open('w') as f: for det in dets: conf = round(det['confidence'], 4) line = "{index} {cx} {cy} {w} {h} conf:{conf} {cn}\n".format( index=int(det['class_index']), cx=round((det['xmin'] + det['xmax']) / 2, 4), cy=round((det['ymin'] + det['ymax']) / 2, 4), w=round(det['xmax'] - det['xmin'], 4), h=round(det['ymax'] - det['ymin'], 4), conf=conf, cn=classes[int(det['class_index']) - 1]) f.write(line) print_flush("Wrote {}".format(auto_path)) inputs = [] impaths = [] assert (not inputs) # If this fails, not all images were processed! print_flush("Done!")
def autoannotate(dataset, import_datasets, input_shape, image_shape, batch_size, batch_size2, epochs, frozen_layers): soft = False input_shape = parse_resolution(input_shape) image_shape = parse_resolution(image_shape) print_flush("Loading ground truth...") load_detections = LoadDetections() datasets = [dataset] if import_datasets: datasets.extend(import_datasets.split(',')) detections = load_detections.custom(datasets) detections = detections.reset_index(drop=True) image_props = get_image_props(detections) detections = detections_add_ytrue(detections, image_props, dataset) detections.index = detections.image_file print_flush('Ground truth object counts:') print_flush(detections.type.value_counts()) classes = get_classnames(dataset) num_classes = len(classes) + 1 keys = sorted(detections.image_file.unique()) shuffle(keys) num_train = int(round(0.9 * len(keys))) train_keys = keys[:num_train] val_keys = keys[num_train:] print_flush('Loading model...') model = SSD300((input_shape[1],input_shape[0],input_shape[2]), num_classes=num_classes) model.load_weights(ssd_path+'weights_SSD300.hdf5', by_name=True) print_flush("Making priors...") im_in = np.random.random((1,input_shape[1],input_shape[0],input_shape[2])) priors = model.predict(im_in,batch_size=1)[0, :, -8:] bbox_util = BBoxUtility(num_classes, priors) generator_kwargs = { 'saturation_var': 0.5, 'brightness_var': 0.5, 'contrast_var': 0.5, 'lighting_std': 0.5, 'hflip_prob': 0.5, 'vflip_prob': 0, 'do_crop': True, 'crop_area_range': [0.1, 1.0], 'aspect_ratio_range': [0.5, 2] } path_prefix = '' gen = Generator(detections, bbox_util, batch_size, path_prefix, train_keys, val_keys, (input_shape[1], input_shape[0]), **generator_kwargs) # freeze several layers freeze = [ ['input_1', 'conv1_1', 'conv1_2', 'pool1'], ['conv2_1', 'conv2_2', 'pool2'], ['conv3_1', 'conv3_2', 'conv3_3', 'pool3'], ['conv4_1', 'conv4_2', 'conv4_3', 'pool4'], ['conv5_1', 'conv5_2', 'conv5_3', 'pool5'], ][:min(frozen_layers, 5)] for L in model.layers: if L.name in freeze: L.trainable = False callbacks = [LearningRateScheduler(schedule)] optim = keras.optimizers.Adam(lr=BASE_LR / 10) model.compile(optimizer=optim, loss=MultiboxLoss(num_classes, neg_pos_ratio=2.0).compute_loss) print_flush("Training...") history = model.fit_generator(gen.generate(True), steps_per_epoch=gen.train_batches, epochs=epochs, verbose=2, callbacks=callbacks, validation_data=gen.generate(False), validation_steps=gen.val_batches, workers=1) print_flush("Auto-annotating...") masker = Masker(dataset) inputs = [] impaths = [] to_annotate = get_images_to_autoannotate(dataset) # rep_last needed since we use large batches, for speed, to make sure we run on all images for impath in rep_last(to_annotate, batch_size2): im = iio.imread(impath) im = masker.mask(im) resized = cv2.resize(im, (input_shape[0], input_shape[1])) inputs.append(resized) impaths.append(impath) if len(inputs) == batch_size2: inputs = np.array(inputs).astype(np.float64) inputs = preprocess_input(inputs) preds = model.predict(inputs, batch_size=batch_size, verbose=0) results = bbox_util.detection_out(preds, soft=soft) for result, res_path in zip(results, impaths): result = [r if len(r) > 0 else np.zeros((1, 6)) for r in result] raw_detections = pd.DataFrame(np.vstack(result), columns=['class_index', 'confidence', 'xmin', 'ymin', 'xmax', 'ymax']) auto_path = res_path.replace('.jpg','.auto') # Sort detections by confidence, keeping the top ones # This seems to be more robust than a hard-coded confidence threshold # Note that a confidence threshold can be chosen in the annotation web UI n = 128 dets = [x for x in pandas_loop(raw_detections)] dets.sort(key=lambda x: 1.0-x['confidence']) if len(dets) > n: dets = dets[:n] with open(auto_path, 'w') as f: for det in dets: conf = round(det['confidence'],4) line = "{index} {cx} {cy} {w} {h} conf:{conf} {cn}\n".format(index=int(det['class_index']), cx = round((det['xmin']+det['xmax'])/2,4), cy = round((det['ymin']+det['ymax'])/2,4), w = round(det['xmax']-det['xmin'],4), h = round(det['ymax']-det['ymin'],4), conf=conf, cn = classes[int(det['class_index'])-1]) f.write(line) print_flush("Wrote {}".format(auto_path)) inputs = [] impaths = [] assert(not inputs) # If this fails, not all images were processed! print_flush("Done!")