def extract_features(path_in, model_config, net, num_layers_finetune, use_gpu, num_timesteps=1, log_fn=print): # Create inference engine inference_engine = engine.InferenceEngine(net, use_gpu=use_gpu) # extract features for split in SPLITS: videos_dir = directories.get_videos_dir(path_in, split) features_dir = directories.get_features_dir(path_in, split, model_config, num_layers_finetune) video_files = glob.glob(os.path.join(videos_dir, "*", "*.mp4")) num_videos = len(video_files) log_fn(f"\nFound {num_videos} videos to process in the {split}-set") for video_index, video_path in enumerate(video_files): log_fn(f'\rExtract features from video {video_index + 1} / {num_videos}') path_features = video_path.replace(videos_dir, features_dir).replace(".mp4", ".npy") if os.path.isfile(path_features): log_fn("\tSkipped - feature was already precomputed.") else: # Read all frames frames = extract_frames(video_path=video_path, inference_engine=inference_engine) compute_features(path_features=path_features, inference_engine=inference_engine, frames=frames, batch_size=16, num_timesteps=num_timesteps) log_fn('\n')
def toggle_project_setting(): """ Toggle boolean project setting. """ data = request.json path = data['path'] setting = data['setting'] new_status = project_utils.toggle_project_setting(path, setting) # Update logreg model if assisted tagging was just enabled if setting == 'assisted_tagging' and new_status: split = data['split'] label = data['label'] inference_engine, model_config = utils.load_feature_extractor(path) videos_dir = directories.get_videos_dir(path, split, label) frames_dir = directories.get_frames_dir(path, split, label) features_dir = directories.get_features_dir(path, split, model_config, label=label) # Compute the respective frames and features compute_frames_and_features(inference_engine=inference_engine, project_path=path, videos_dir=videos_dir, frames_dir=frames_dir, features_dir=features_dir) # Re-train the logistic regression model utils.train_logreg(path=path, split=split, label=label) return jsonify(setting_status=new_status)
def prepare_annotation(project): """ Prepare all files needed for annotating the videos in the given project. """ project = urllib.parse.unquote(project) dataset_path = utils.lookup_project_path(project) # load feature extractor inference_engine, model_config = utils.load_feature_extractor(dataset_path) for split in SPLITS: print(f'\n\tPreparing videos in the {split}-set') for label in os.listdir(directories.get_videos_dir( dataset_path, split)): videos_dir = directories.get_videos_dir(dataset_path, split, label) frames_dir = directories.get_frames_dir(dataset_path, split, label) features_dir = directories.get_features_dir(dataset_path, split, model_config, label=label) compute_frames_features(inference_engine=inference_engine, videos_dir=videos_dir, frames_dir=frames_dir, features_dir=features_dir) return redirect(url_for("project_details", project=project))
def edit_class(project, class_name): """ Edit the class name and tags for an existing class in the given project. """ project = urllib.parse.unquote(project) class_name = urllib.parse.unquote(class_name) path = project_utils.lookup_project_path(project) # Get new class name and tags new_class_name, new_tag1, new_tag2 = utils.get_class_name_and_tags( request.form) # Update project config config = project_utils.load_project_config(path) del config['classes'][class_name] config['classes'][new_class_name] = [new_tag1, new_tag2] project_utils.write_project_config(path, config) # Update directory names data_dirs = [] for split in SPLITS: data_dirs.extend([ directories.get_videos_dir(path, split), directories.get_frames_dir(path, split), directories.get_tags_dir(path, split), ]) # Feature directories follow the format <dataset_dir>/<split>/<model>/<num_layers_to_finetune>/<label> features_dir = directories.get_features_dir(path, split) if os.path.exists(features_dir): model_dirs = [ os.path.join(features_dir, model_dir) for model_dir in os.listdir(features_dir) ] data_dirs.extend([ os.path.join(model_dir, tuned_layers) for model_dir in model_dirs for tuned_layers in os.listdir(model_dir) ]) logreg_dir = directories.get_logreg_dir(path) if os.path.exists(logreg_dir): data_dirs.extend([ os.path.join(logreg_dir, model_dir) for model_dir in os.listdir(logreg_dir) ]) for base_dir in data_dirs: class_dir = os.path.join(base_dir, class_name) if os.path.exists(class_dir): new_class_dir = os.path.join(base_dir, new_class_name) os.rename(class_dir, new_class_dir) return redirect(url_for('project_details', project=project))
def show_video_list(project, split, label): """ Show the list of videos for the given split, class label and project. If the necessary files for annotation haven't been prepared yet, this is done now. """ project = urllib.parse.unquote(project) path = project_utils.lookup_project_path(project) split = urllib.parse.unquote(split) label = urllib.parse.unquote(label) # load feature extractor inference_engine, model_config = utils.load_feature_extractor(path) videos_dir = directories.get_videos_dir(path, split, label) frames_dir = directories.get_frames_dir(path, split, label) features_dir = directories.get_features_dir(path, split, model_config, label=label) tags_dir = directories.get_tags_dir(path, split, label) logreg_dir = directories.get_logreg_dir(path, model_config, label) os.makedirs(logreg_dir, exist_ok=True) os.makedirs(tags_dir, exist_ok=True) # compute the features and frames missing compute_frames_and_features(inference_engine=inference_engine, project_path=path, videos_dir=videos_dir, frames_dir=frames_dir, features_dir=features_dir) videos = os.listdir(frames_dir) videos = natsorted(videos, alg=ns.IC) tagged_list = set(os.listdir(tags_dir)) tagged = [f'{video}.json' in tagged_list for video in videos] num_videos = len(videos) num_tagged = len(tagged_list) num_untagged = num_videos - num_tagged video_list = zip(videos, tagged, list(range(len(videos)))) return render_template('video_list.html', video_list=video_list, split=split, label=label, path=path, project=project, num_videos=num_videos, num_tagged=num_tagged, num_untagged=num_untagged)
def train_model(path_in, path_out, model_name, model_version, num_layers_to_finetune, epochs, use_gpu=True, overwrite=True, temporal_training=None, resume=False, log_fn=print, confmat_event=None): os.makedirs(path_out, exist_ok=True) # Check for existing files saved_files = [ "last_classifier.checkpoint", "best_classifier.checkpoint", "config.json", "label2int.json", "confusion_matrix.png", "confusion_matrix.npy" ] if not overwrite and any( os.path.exists(os.path.join(path_out, file)) for file in saved_files): print(f"Warning: This operation will overwrite files in {path_out}") while True: confirmation = input( "Are you sure? Add --overwrite to hide this warning. (Y/N) ") if confirmation.lower() == "y": break elif confirmation.lower() == "n": sys.exit() else: print('Invalid input') # Load weights selected_config, weights = get_relevant_weights( SUPPORTED_MODEL_CONFIGURATIONS, model_name, model_version, log_fn, ) backbone_weights = weights['backbone'] if resume: # Load the last classifier checkpoint_classifier = torch.load( os.path.join(path_out, 'last_classifier.checkpoint')) # Update original weights in case some intermediate layers have been finetuned update_backbone_weights(backbone_weights, checkpoint_classifier) # Load backbone network backbone_network = build_backbone_network(selected_config, backbone_weights) # Get the required temporal dimension of feature tensors in order to # finetune the provided number of layers if num_layers_to_finetune > 0: num_timesteps = backbone_network.num_required_frames_per_layer.get( -num_layers_to_finetune) if not num_timesteps: # Remove 1 because we added 0 to temporal_dependencies num_layers = len( backbone_network.num_required_frames_per_layer) - 1 msg = (f'ERROR - Num of layers to finetune not compatible. ' f'Must be an integer between 0 and {num_layers}') log_fn(msg) raise IndexError(msg) else: num_timesteps = 1 # Extract layers to finetune if num_layers_to_finetune > 0: fine_tuned_layers = backbone_network.cnn[-num_layers_to_finetune:] backbone_network.cnn = backbone_network.cnn[0:-num_layers_to_finetune] # finetune the model extract_features(path_in, selected_config, backbone_network, num_layers_to_finetune, use_gpu, num_timesteps=num_timesteps, log_fn=log_fn) # Find label names label_names = os.listdir(directories.get_videos_dir(path_in, 'train')) label_names = [x for x in label_names if not x.startswith('.')] label_names_temporal = ['background'] project_config = load_project_config(path_in) if project_config: for temporal_tags in project_config['classes'].values(): label_names_temporal.extend(temporal_tags) else: for label in label_names: label_names_temporal.extend([f'{label}_tag1', f'{label}_tag2']) label_names_temporal = sorted(set(label_names_temporal)) label2int_temporal_annotation = { name: index for index, name in enumerate(label_names_temporal) } label2int = {name: index for index, name in enumerate(label_names)} extractor_stride = backbone_network.num_required_frames_per_layer_padding[ 0] # Create the data loaders features_dir = directories.get_features_dir(path_in, 'train', selected_config, num_layers_to_finetune) tags_dir = directories.get_tags_dir(path_in, 'train') train_loader = generate_data_loader( project_config, features_dir, tags_dir, label_names, label2int, label2int_temporal_annotation, num_timesteps=num_timesteps, stride=extractor_stride, temporal_annotation_only=temporal_training, ) features_dir = directories.get_features_dir(path_in, 'valid', selected_config, num_layers_to_finetune) tags_dir = directories.get_tags_dir(path_in, 'valid') valid_loader = generate_data_loader( project_config, features_dir, tags_dir, label_names, label2int, label2int_temporal_annotation, num_timesteps=None, batch_size=1, shuffle=False, stride=extractor_stride, temporal_annotation_only=temporal_training, ) # Check if the data is loaded fully if not train_loader or not valid_loader: log_fn( "ERROR - \n " "\tMissing annotations for train or valid set.\n" "\tHint: Check if tags_train and tags_valid directories exist.\n") return # Modify the network to generate the training network on top of the features if temporal_training: num_output = len(label_names_temporal) else: num_output = len(label_names) # modify the network to generate the training network on top of the features gesture_classifier = LogisticRegression( num_in=backbone_network.feature_dim, num_out=num_output, use_softmax=False) if resume: gesture_classifier.load_state_dict(checkpoint_classifier) if num_layers_to_finetune > 0: # remove internal padding for training fine_tuned_layers.apply(set_internal_padding_false) net = Pipe(fine_tuned_layers, gesture_classifier) else: net = gesture_classifier net.train() if use_gpu: net = net.cuda() lr_schedule = { 0: 0.0001, int(epochs / 2): 0.00001 } if epochs > 1 else { 0: 0.0001 } num_epochs = epochs # Save training config and label2int dictionary config = { 'backbone_name': selected_config.model_name, 'backbone_version': selected_config.version, 'num_layers_to_finetune': num_layers_to_finetune, 'classifier': str(gesture_classifier), 'temporal_training': temporal_training, 'lr_schedule': lr_schedule, 'num_epochs': num_epochs, 'start_time': str(datetime.datetime.now()), 'end_time': '', } with open(os.path.join(path_out, 'config.json'), 'w') as f: json.dump(config, f, indent=2) with open(os.path.join(path_out, 'label2int.json'), 'w') as f: json.dump( label2int_temporal_annotation if temporal_training else label2int, f, indent=2) # Train model best_model_state_dict = training_loops( net, train_loader, valid_loader, use_gpu, num_epochs, lr_schedule, label_names, path_out, temporal_annotation_training=temporal_training, log_fn=log_fn, confmat_event=confmat_event) # Save best model if isinstance(net, Pipe): best_model_state_dict = { clean_pipe_state_dict_key(key): value for key, value in best_model_state_dict.items() } torch.save(best_model_state_dict, os.path.join(path_out, "best_classifier.checkpoint")) config['end_time'] = str(datetime.datetime.now()) with open(os.path.join(path_out, 'config.json'), 'w') as f: json.dump(config, f, indent=2)
def train_logreg(path, split, label): """ (Re-)Train a logistic regression model on all annotations that have been submitted so far. """ _, model_config = load_feature_extractor(path) features_dir = directories.get_features_dir(path, split, model_config, label=label) tags_dir = directories.get_tags_dir(path, split, label) logreg_dir = directories.get_logreg_dir(path, model_config, label) logreg_path = os.path.join(logreg_dir, 'logreg.joblib') annotations = os.listdir(tags_dir) if os.path.exists(tags_dir) else None if not annotations: return features = [ os.path.join(features_dir, x.replace('.json', '.npy')) for x in annotations ] annotations = [os.path.join(tags_dir, x) for x in annotations] x = [] y = [] class_weight = {0: 0.5} for feature in features: feature = np.load(feature) for f in feature: x.append(f.mean(axis=(1, 2))) for annotation in annotations: with open(annotation, 'r') as f: annotation = json.load(f)['time_annotation'] pos1 = np.where(np.array(annotation).astype(int) == 1)[0] if len(pos1) > 0: class_weight.update({1: 2}) for p in pos1: if p + 1 < len(annotation): annotation[p + 1] = 1 pos1 = np.where(np.array(annotation).astype(int) == 2)[0] if len(pos1) > 0: class_weight.update({2: 2}) for p in pos1: if p + 1 < len(annotation): annotation[p + 1] = 2 for a in annotation: y.append(a) x = np.array(x) y = np.array(y) if len(class_weight) > 1: logreg = LogisticRegression(C=0.1, class_weight=class_weight) logreg.fit(x, y) dump(logreg, logreg_path)
def train_logreg(): """ (Re-)Train a logistic regression model on all annotations that have been submitted so far. """ data = request.form # a multi-dict containing POST data idx = int(data['idx']) path = data['path'] project = data['project'] split = data['split'] label = data['label'] _, model_config = utils.load_feature_extractor(path) features_dir = directories.get_features_dir(path, split, model_config, label=label) tags_dir = directories.get_tags_dir(path, split, label) logreg_dir = directories.get_logreg_dir(path, model_config, label) logreg_path = os.path.join(logreg_dir, 'logreg.joblib') annotations = os.listdir(tags_dir) class_weight = {0: 0.5} if annotations: features = [ os.path.join(features_dir, x.replace('.json', '.npy')) for x in annotations ] annotations = [os.path.join(tags_dir, x) for x in annotations] X = [] y = [] for feature in features: feature = np.load(feature) for f in feature: X.append(f.mean(axis=(1, 2))) for annotation in annotations: with open(annotation, 'r') as f: annotation = json.load(f)['time_annotation'] pos1 = np.where(np.array(annotation).astype(int) == 1)[0] if len(pos1) > 0: class_weight.update({1: 2}) for p in pos1: if p + 1 < len(annotation): annotation[p + 1] = 1 pos1 = np.where(np.array(annotation).astype(int) == 2)[0] if len(pos1) > 0: class_weight.update({2: 2}) for p in pos1: if p + 1 < len(annotation): annotation[p + 1] = 2 for a in annotation: y.append(a) X = np.array(X) y = np.array(y) if len(class_weight) > 1: logreg = LogisticRegression(C=0.1, class_weight=class_weight) logreg.fit(X, y) dump(logreg, logreg_path) return redirect( url_for('.annotate', split=split, label=label, project=project, idx=idx))
def annotate(project, split, label, idx): """ For the given class label, show all frames for annotating the selected video. """ project = urllib.parse.unquote(project) path = utils.lookup_project_path(project) label = urllib.parse.unquote(label) split = urllib.parse.unquote(split) _, model_config = utils.load_feature_extractor(path) frames_dir = directories.get_frames_dir(path, split, label) features_dir = directories.get_features_dir(path, split, model_config, label=label) tags_dir = directories.get_tags_dir(path, split, label) logreg_dir = directories.get_logreg_dir(path, model_config, label) videos = os.listdir(frames_dir) videos.sort() features = np.load(os.path.join(features_dir, f'{videos[idx]}.npy')) features = features.mean(axis=(2, 3)) # Load logistic regression model if available logreg_path = os.path.join(logreg_dir, 'logreg.joblib') if os.path.isfile(logreg_path): logreg = load(logreg_path) classes = list(logreg.predict(features)) else: classes = [-1] * len(features) # The list of images in the folder images = [ image for image in glob.glob(os.path.join(frames_dir, videos[idx], '*')) if utils.is_image_file(image) ] # Natural sort images, so that they are sorted by number images = natsorted(images, alg=ns.IC) # Extract image file name (without full path) and include class label images = [(os.path.basename(image), _class) for image, _class in zip(images, classes)] # Load existing annotations annotations = [] annotations_file = os.path.join(tags_dir, f'{videos[idx]}.json') if os.path.exists(annotations_file): with open(annotations_file, 'r') as f: data = json.load(f) annotations = data['time_annotation'] # Read tags from config config = utils.load_project_config(path) tags = config['classes'][label] return render_template('frame_annotation.html', images=images, annotations=annotations, idx=idx, fps=16, n_images=len(images), video_name=videos[idx], split=split, label=label, path=path, tags=tags, project=project, n_videos=len(videos))
def submit_annotation(): """ Submit annotated tags for all frames and save them to a json file. """ data = request.form # a multi-dict containing POST data idx = int(data['idx']) fps = float(data['fps']) path = data['path'] project = data['project'] split = data['split'] label = data['label'] video = data['video'] next_frame_idx = idx + 1 frames_dir = directories.get_frames_dir(path, split, label) tags_dir = directories.get_tags_dir(path, split, label) description = {'file': f'{video}.mp4', 'fps': fps} out_annotation = os.path.join(tags_dir, f'{video}.json') time_annotation = [] for frame_idx in range(int(data['n_images'])): time_annotation.append(int(data[f'{frame_idx}_tag'])) description['time_annotation'] = time_annotation with open(out_annotation, 'w') as f: json.dump(description, f, indent=2) # Automatic re-training of the logistic regression model if utils.get_project_setting(path, 'assisted_tagging'): inference_engine, model_config = utils.load_feature_extractor(path) videos_dir = directories.get_videos_dir(path, split, label) frames_dir = directories.get_frames_dir(path, split, label) features_dir = directories.get_features_dir(path, split, model_config, label=label) # Compute the respective frames and features compute_frames_and_features(inference_engine=inference_engine, project_path=path, videos_dir=videos_dir, frames_dir=frames_dir, features_dir=features_dir) # Re-train the logistic regression model utils.train_logreg(path=path, split=split, label=label) if next_frame_idx >= len(os.listdir(frames_dir)): return redirect( url_for('.show_video_list', project=project, split=split, label=label)) return redirect( url_for('.annotate', split=split, label=label, project=project, idx=next_frame_idx))
def annotate(project, split, label, idx): """ For the given class label, show all frames for annotating the selected video. """ project = urllib.parse.unquote(project) path = project_utils.lookup_project_path(project) label = urllib.parse.unquote(label) split = urllib.parse.unquote(split) config = project_utils.load_project_config(path) tags = config['tags'].copy() tags[0] = 'background' class_tags = config['classes'][label].copy() class_tags.append(0) # Always add 'background' class_tags.sort() _, model_config = utils.load_feature_extractor(path) frames_dir = directories.get_frames_dir(path, split, label) features_dir = directories.get_features_dir(path, split, model_config, label=label) tags_dir = directories.get_tags_dir(path, split, label) logreg_dir = directories.get_logreg_dir(path, model_config) videos = os.listdir(frames_dir) videos = natsorted(videos, alg=ns.IC) # The list of images in the folder images = [ image for image in glob.glob(os.path.join(frames_dir, videos[idx], '*')) if utils.is_image_file(image) ] classes = [-1] * len(images) # Load logistic regression model if available and assisted tagging is enabled if utils.get_project_setting(path, 'assisted_tagging'): logreg_path = os.path.join(logreg_dir, 'logreg.joblib') features_path = os.path.join(features_dir, f'{videos[idx]}.npy') if os.path.isfile(logreg_path) and os.path.isfile(features_path): logreg = load(logreg_path) features = np.load(features_path).mean(axis=(2, 3)) classes = list(logreg.predict(features)) # Reset tags that have been removed from the class to 'background' classes = [ tag_idx if tag_idx in class_tags else 0 for tag_idx in classes ] # Natural sort images, so that they are sorted by number images = natsorted(images, alg=ns.IC) # Extract image file name (without full path) and include class label images = [(os.path.basename(image), _class) for image, _class in zip(images, classes)] # Load existing annotations annotations_file = os.path.join(tags_dir, f'{videos[idx]}.json') if os.path.exists(annotations_file): with open(annotations_file, 'r') as f: data = json.load(f) annotations = data['time_annotation'] # Reset tags that have been removed from the class to 'background' annotations = [ tag_idx if tag_idx in class_tags else 0 for tag_idx in annotations ] else: # Use "background" label for all frames per default annotations = [0] * len(images) return render_template('frame_annotation.html', images=images, annotations=annotations, idx=idx, fps=16, n_images=len(images), video_name=videos[idx], project_config=config, split=split, label=label, path=path, project=project, n_videos=len(videos), tags=tags, class_tags=class_tags)
def train_logreg(path): """ (Re-)Train a logistic regression model on all annotations that have been submitted so far. """ inference_engine, model_config = utils.load_feature_extractor(path) logreg_dir = directories.get_logreg_dir(path, model_config) logreg_path = os.path.join(logreg_dir, 'logreg.joblib') project_config = project_utils.load_project_config(path) classes = project_config['classes'] all_features = [] all_annotations = [] for split in SPLITS: for label, class_tags in classes.items(): videos_dir = directories.get_videos_dir(path, split, label) frames_dir = directories.get_frames_dir(path, split, label) features_dir = directories.get_features_dir(path, split, model_config, label=label) tags_dir = directories.get_tags_dir(path, split, label) if not os.path.exists(tags_dir): continue # Compute the respective frames and features compute_frames_and_features(inference_engine=inference_engine, project_path=path, videos_dir=videos_dir, frames_dir=frames_dir, features_dir=features_dir) video_tag_files = os.listdir(tags_dir) for video_tag_file in video_tag_files: feature_file = os.path.join( features_dir, video_tag_file.replace('.json', '.npy')) annotation_file = os.path.join(tags_dir, video_tag_file) features = np.load(feature_file) for f in features: all_features.append(f.mean(axis=(1, 2))) with open(annotation_file, 'r') as f: annotations = json.load(f)['time_annotation'] # Reset tags that have been removed from the class to 'background' annotations = [ tag_idx if tag_idx in class_tags else 0 for tag_idx in annotations ] all_annotations.extend(annotations) # Use low class weight for background and higher weight for all present tags annotated_tags = set(all_annotations) class_weight = {tag: 2 for tag in annotated_tags} class_weight[0] = 0.5 all_features = np.array(all_features) all_annotations = np.array(all_annotations) if len(annotated_tags) > 1: os.makedirs(logreg_dir, exist_ok=True) logreg = LogisticRegression(C=0.1, class_weight=class_weight) logreg.fit(all_features, all_annotations) dump(logreg, logreg_path)