예제 #1
0
def project_details(project):
    """
    Show the details for the selected project.
    """
    project = urllib.parse.unquote(project)
    path = utils.lookup_project_path(project)
    config = utils.load_project_config(path)

    stats = {}
    for class_name, tags in config['classes'].items():
        stats[class_name] = {}
        for split in SPLITS:
            videos_dir = directories.get_videos_dir(path, split, class_name)
            tags_dir = directories.get_tags_dir(path, split, class_name)
            stats[class_name][split] = {
                'total':
                len(os.listdir(videos_dir)),
                'tagged':
                len(os.listdir(tags_dir)) if os.path.exists(tags_dir) else 0,
            }

    return render_template('project_details.html',
                           config=config,
                           path=path,
                           stats=stats,
                           project=config['name'])
예제 #2
0
def edit_class(project, class_name):
    """
    Edit the class name and tags for an existing class in the given project.
    """
    project = urllib.parse.unquote(project)
    class_name = urllib.parse.unquote(class_name)
    path = project_utils.lookup_project_path(project)

    # Get new class name and tags
    new_class_name, new_tag1, new_tag2 = utils.get_class_name_and_tags(
        request.form)

    # Update project config
    config = project_utils.load_project_config(path)
    del config['classes'][class_name]
    config['classes'][new_class_name] = [new_tag1, new_tag2]
    project_utils.write_project_config(path, config)

    # Update directory names
    data_dirs = []
    for split in SPLITS:
        data_dirs.extend([
            directories.get_videos_dir(path, split),
            directories.get_frames_dir(path, split),
            directories.get_tags_dir(path, split),
        ])

        # Feature directories follow the format <dataset_dir>/<split>/<model>/<num_layers_to_finetune>/<label>
        features_dir = directories.get_features_dir(path, split)
        if os.path.exists(features_dir):
            model_dirs = [
                os.path.join(features_dir, model_dir)
                for model_dir in os.listdir(features_dir)
            ]
            data_dirs.extend([
                os.path.join(model_dir, tuned_layers)
                for model_dir in model_dirs
                for tuned_layers in os.listdir(model_dir)
            ])

    logreg_dir = directories.get_logreg_dir(path)
    if os.path.exists(logreg_dir):
        data_dirs.extend([
            os.path.join(logreg_dir, model_dir)
            for model_dir in os.listdir(logreg_dir)
        ])

    for base_dir in data_dirs:
        class_dir = os.path.join(base_dir, class_name)

        if os.path.exists(class_dir):
            new_class_dir = os.path.join(base_dir, new_class_name)
            os.rename(class_dir, new_class_dir)

    return redirect(url_for('project_details', project=project))
예제 #3
0
def show_video_list(project, split, label):
    """
    Show the list of videos for the given split, class label and project.
    If the necessary files for annotation haven't been prepared yet, this is done now.
    """
    project = urllib.parse.unquote(project)
    path = project_utils.lookup_project_path(project)
    split = urllib.parse.unquote(split)
    label = urllib.parse.unquote(label)

    # load feature extractor
    inference_engine, model_config = utils.load_feature_extractor(path)

    videos_dir = directories.get_videos_dir(path, split, label)
    frames_dir = directories.get_frames_dir(path, split, label)
    features_dir = directories.get_features_dir(path,
                                                split,
                                                model_config,
                                                label=label)
    tags_dir = directories.get_tags_dir(path, split, label)
    logreg_dir = directories.get_logreg_dir(path, model_config, label)

    os.makedirs(logreg_dir, exist_ok=True)
    os.makedirs(tags_dir, exist_ok=True)

    # compute the features and frames missing
    compute_frames_and_features(inference_engine=inference_engine,
                                project_path=path,
                                videos_dir=videos_dir,
                                frames_dir=frames_dir,
                                features_dir=features_dir)

    videos = os.listdir(frames_dir)
    videos = natsorted(videos, alg=ns.IC)

    tagged_list = set(os.listdir(tags_dir))
    tagged = [f'{video}.json' in tagged_list for video in videos]

    num_videos = len(videos)
    num_tagged = len(tagged_list)
    num_untagged = num_videos - num_tagged

    video_list = zip(videos, tagged, list(range(len(videos))))
    return render_template('video_list.html',
                           video_list=video_list,
                           split=split,
                           label=label,
                           path=path,
                           project=project,
                           num_videos=num_videos,
                           num_tagged=num_tagged,
                           num_untagged=num_untagged)
예제 #4
0
def submit_annotation():
    """
    Submit annotated tags for all frames and save them to a json file.
    """
    data = request.form  # a multi-dict containing POST data
    idx = int(data['idx'])
    fps = float(data['fps'])
    path = data['path']
    project = data['project']
    split = data['split']
    label = data['label']
    video = data['video']
    next_frame_idx = idx + 1

    frames_dir = directories.get_frames_dir(path, split, label)
    tags_dir = directories.get_tags_dir(path, split, label)
    description = {'file': f'{video}.mp4', 'fps': fps}

    out_annotation = os.path.join(tags_dir, f'{video}.json')
    time_annotation = []

    for frame_idx in range(int(data['n_images'])):
        time_annotation.append(int(data[f'{frame_idx}_tag']))

    description['time_annotation'] = time_annotation

    with open(out_annotation, 'w') as f:
        json.dump(description, f, indent=2)

    # Automatic re-training of the logistic regression model
    if utils.get_project_setting(path, 'assisted_tagging'):
        train_logreg(path=path)

    if next_frame_idx >= len(os.listdir(frames_dir)):
        return redirect(
            url_for('.show_video_list',
                    project=project,
                    split=split,
                    label=label))

    return redirect(
        url_for('.annotate',
                split=split,
                label=label,
                project=project,
                idx=next_frame_idx))
예제 #5
0
def project_details(project):
    """
    Show the details for the selected project.
    """
    project = urllib.parse.unquote(project)
    path = project_utils.lookup_project_path(project)
    config = project_utils.load_project_config(path)

    stats = {}
    for class_name in config['classes']:
        stats[class_name] = {}
        for split in SPLITS:
            videos_dir = directories.get_videos_dir(path, split, class_name)
            tags_dir = directories.get_tags_dir(path, split, class_name)
            stats[class_name][split] = {
                'total': len(os.listdir(videos_dir)),
                'tagged': len(os.listdir(tags_dir)) if os.path.exists(tags_dir) else 0,
                'videos': natsorted([video for video in os.listdir(videos_dir) if video.endswith(VIDEO_EXT)], alg=ns.IC)
            }
    tags = config['tags']
    return render_template('project_details.html', config=config, path=path, stats=stats, project=config['name'],
                           tags=tags)
예제 #6
0
def train_model(path_in,
                path_out,
                model_name,
                model_version,
                num_layers_to_finetune,
                epochs,
                use_gpu=True,
                overwrite=True,
                temporal_training=None,
                resume=False,
                log_fn=print,
                confmat_event=None):
    os.makedirs(path_out, exist_ok=True)

    # Check for existing files
    saved_files = [
        "last_classifier.checkpoint", "best_classifier.checkpoint",
        "config.json", "label2int.json", "confusion_matrix.png",
        "confusion_matrix.npy"
    ]

    if not overwrite and any(
            os.path.exists(os.path.join(path_out, file))
            for file in saved_files):
        print(f"Warning: This operation will overwrite files in {path_out}")

        while True:
            confirmation = input(
                "Are you sure? Add --overwrite to hide this warning. (Y/N) ")
            if confirmation.lower() == "y":
                break
            elif confirmation.lower() == "n":
                sys.exit()
            else:
                print('Invalid input')

    # Load weights
    selected_config, weights = get_relevant_weights(
        SUPPORTED_MODEL_CONFIGURATIONS,
        model_name,
        model_version,
        log_fn,
    )
    backbone_weights = weights['backbone']

    if resume:
        # Load the last classifier
        checkpoint_classifier = torch.load(
            os.path.join(path_out, 'last_classifier.checkpoint'))

        # Update original weights in case some intermediate layers have been finetuned
        update_backbone_weights(backbone_weights, checkpoint_classifier)

    # Load backbone network
    backbone_network = build_backbone_network(selected_config,
                                              backbone_weights)

    # Get the required temporal dimension of feature tensors in order to
    # finetune the provided number of layers
    if num_layers_to_finetune > 0:
        num_timesteps = backbone_network.num_required_frames_per_layer.get(
            -num_layers_to_finetune)
        if not num_timesteps:
            # Remove 1 because we added 0 to temporal_dependencies
            num_layers = len(
                backbone_network.num_required_frames_per_layer) - 1
            msg = (f'ERROR - Num of layers to finetune not compatible. '
                   f'Must be an integer between 0 and {num_layers}')
            log_fn(msg)
            raise IndexError(msg)
    else:
        num_timesteps = 1

    # Extract layers to finetune
    if num_layers_to_finetune > 0:
        fine_tuned_layers = backbone_network.cnn[-num_layers_to_finetune:]
        backbone_network.cnn = backbone_network.cnn[0:-num_layers_to_finetune]

    # finetune the model
    extract_features(path_in,
                     selected_config,
                     backbone_network,
                     num_layers_to_finetune,
                     use_gpu,
                     num_timesteps=num_timesteps,
                     log_fn=log_fn)

    # Find label names
    label_names = os.listdir(directories.get_videos_dir(path_in, 'train'))
    label_names = [x for x in label_names if not x.startswith('.')]
    label_names_temporal = ['background']

    project_config = load_project_config(path_in)
    if project_config:
        for temporal_tags in project_config['classes'].values():
            label_names_temporal.extend(temporal_tags)
    else:
        for label in label_names:
            label_names_temporal.extend([f'{label}_tag1', f'{label}_tag2'])

    label_names_temporal = sorted(set(label_names_temporal))

    label2int_temporal_annotation = {
        name: index
        for index, name in enumerate(label_names_temporal)
    }
    label2int = {name: index for index, name in enumerate(label_names)}

    extractor_stride = backbone_network.num_required_frames_per_layer_padding[
        0]

    # Create the data loaders
    features_dir = directories.get_features_dir(path_in, 'train',
                                                selected_config,
                                                num_layers_to_finetune)
    tags_dir = directories.get_tags_dir(path_in, 'train')
    train_loader = generate_data_loader(
        project_config,
        features_dir,
        tags_dir,
        label_names,
        label2int,
        label2int_temporal_annotation,
        num_timesteps=num_timesteps,
        stride=extractor_stride,
        temporal_annotation_only=temporal_training,
    )

    features_dir = directories.get_features_dir(path_in, 'valid',
                                                selected_config,
                                                num_layers_to_finetune)
    tags_dir = directories.get_tags_dir(path_in, 'valid')
    valid_loader = generate_data_loader(
        project_config,
        features_dir,
        tags_dir,
        label_names,
        label2int,
        label2int_temporal_annotation,
        num_timesteps=None,
        batch_size=1,
        shuffle=False,
        stride=extractor_stride,
        temporal_annotation_only=temporal_training,
    )

    # Check if the data is loaded fully
    if not train_loader or not valid_loader:
        log_fn(
            "ERROR - \n "
            "\tMissing annotations for train or valid set.\n"
            "\tHint: Check if tags_train and tags_valid directories exist.\n")
        return

    # Modify the network to generate the training network on top of the features
    if temporal_training:
        num_output = len(label_names_temporal)
    else:
        num_output = len(label_names)

    # modify the network to generate the training network on top of the features
    gesture_classifier = LogisticRegression(
        num_in=backbone_network.feature_dim,
        num_out=num_output,
        use_softmax=False)

    if resume:
        gesture_classifier.load_state_dict(checkpoint_classifier)

    if num_layers_to_finetune > 0:
        # remove internal padding for training
        fine_tuned_layers.apply(set_internal_padding_false)
        net = Pipe(fine_tuned_layers, gesture_classifier)
    else:
        net = gesture_classifier
    net.train()

    if use_gpu:
        net = net.cuda()

    lr_schedule = {
        0: 0.0001,
        int(epochs / 2): 0.00001
    } if epochs > 1 else {
        0: 0.0001
    }
    num_epochs = epochs

    # Save training config and label2int dictionary
    config = {
        'backbone_name': selected_config.model_name,
        'backbone_version': selected_config.version,
        'num_layers_to_finetune': num_layers_to_finetune,
        'classifier': str(gesture_classifier),
        'temporal_training': temporal_training,
        'lr_schedule': lr_schedule,
        'num_epochs': num_epochs,
        'start_time': str(datetime.datetime.now()),
        'end_time': '',
    }
    with open(os.path.join(path_out, 'config.json'), 'w') as f:
        json.dump(config, f, indent=2)

    with open(os.path.join(path_out, 'label2int.json'), 'w') as f:
        json.dump(
            label2int_temporal_annotation if temporal_training else label2int,
            f,
            indent=2)

    # Train model
    best_model_state_dict = training_loops(
        net,
        train_loader,
        valid_loader,
        use_gpu,
        num_epochs,
        lr_schedule,
        label_names,
        path_out,
        temporal_annotation_training=temporal_training,
        log_fn=log_fn,
        confmat_event=confmat_event)

    # Save best model
    if isinstance(net, Pipe):
        best_model_state_dict = {
            clean_pipe_state_dict_key(key): value
            for key, value in best_model_state_dict.items()
        }
    torch.save(best_model_state_dict,
               os.path.join(path_out, "best_classifier.checkpoint"))

    config['end_time'] = str(datetime.datetime.now())
    with open(os.path.join(path_out, 'config.json'), 'w') as f:
        json.dump(config, f, indent=2)
예제 #7
0
def _backwards_compatibility_update(path, config):
    updated = False

    if 'use_gpu' not in config:
        config['use_gpu'] = False
        updated = True

    if 'temporal' not in config:
        config['temporal'] = False
        updated = True

    if 'assisted_tagging' not in config:
        config['assisted_tagging'] = False
        updated = True

    if 'video_recording' not in config:
        config['video_recording'] = {
            'countdown': 3,
            'recording': 5,
        }
        updated = True

    if 'tags' not in config:
        # Collect class-wise tags
        old_classes = config['classes']
        tags_list = []
        for class_name, class_tags in old_classes.items():
            tags_list.extend(class_tags)

        # Assign project-wide unique indices to tags (0 is reserved for 'background')
        tags = {
            idx + 1: tag_name
            for idx, tag_name in enumerate(sorted(tags_list))
        }
        config['tags'] = tags
        config['max_tag_index'] = len(tags_list)

        # Setup class dictionary with tag indices
        inverse_tags = {
            tag_name: tag_idx
            for tag_idx, tag_name in tags.items()
        }
        inverse_tags['background'] = 0
        config['classes'] = {
            class_name: [inverse_tags[tag_name] for tag_name in class_tags]
            for class_name, class_tags in old_classes.items()
        }

        # Translate existing annotations
        for split in SPLITS:
            for label, label_tags in old_classes.items():
                tags_dir = directories.get_tags_dir(path, split, label)
                if os.path.exists(tags_dir):
                    label_tags = ['background'] + label_tags

                    for video_name in os.listdir(tags_dir):
                        annotation_file = os.path.join(tags_dir, video_name)
                        with open(annotation_file, 'r') as f:
                            annotation_data = json.load(f)

                        # Translate relative indices [0, 1, 2] to their names and then to their new absolute indices
                        new_annotations = [
                            inverse_tags[label_tags[idx]]
                            for idx in annotation_data['time_annotation']
                        ]
                        annotation_data['time_annotation'] = new_annotations

                        with open(annotation_file, 'w') as f:
                            json.dump(annotation_data, f, indent=2)

        updated = True
    else:
        # Translate string keys to integers (because JSON does not store integer keys)
        config['tags'] = {
            int(idx_str): tag_name
            for idx_str, tag_name in config['tags'].items()
        }

    if updated:
        # Save updated config
        write_project_config(path, config)

    return config
예제 #8
0
def train_logreg(path, split, label):
    """
    (Re-)Train a logistic regression model on all annotations that have been submitted so far.
    """
    _, model_config = load_feature_extractor(path)

    features_dir = directories.get_features_dir(path,
                                                split,
                                                model_config,
                                                label=label)
    tags_dir = directories.get_tags_dir(path, split, label)
    logreg_dir = directories.get_logreg_dir(path, model_config, label)
    logreg_path = os.path.join(logreg_dir, 'logreg.joblib')

    annotations = os.listdir(tags_dir) if os.path.exists(tags_dir) else None

    if not annotations:
        return

    features = [
        os.path.join(features_dir, x.replace('.json', '.npy'))
        for x in annotations
    ]
    annotations = [os.path.join(tags_dir, x) for x in annotations]
    x = []
    y = []

    class_weight = {0: 0.5}

    for feature in features:
        feature = np.load(feature)

        for f in feature:
            x.append(f.mean(axis=(1, 2)))

    for annotation in annotations:
        with open(annotation, 'r') as f:
            annotation = json.load(f)['time_annotation']

        pos1 = np.where(np.array(annotation).astype(int) == 1)[0]

        if len(pos1) > 0:
            class_weight.update({1: 2})

            for p in pos1:
                if p + 1 < len(annotation):
                    annotation[p + 1] = 1

        pos1 = np.where(np.array(annotation).astype(int) == 2)[0]

        if len(pos1) > 0:
            class_weight.update({2: 2})

            for p in pos1:
                if p + 1 < len(annotation):
                    annotation[p + 1] = 2

        for a in annotation:
            y.append(a)

    x = np.array(x)
    y = np.array(y)

    if len(class_weight) > 1:
        logreg = LogisticRegression(C=0.1, class_weight=class_weight)
        logreg.fit(x, y)
        dump(logreg, logreg_path)
예제 #9
0
def train_logreg():
    """
    (Re-)Train a logistic regression model on all annotations that have been submitted so far.
    """
    data = request.form  # a multi-dict containing POST data
    idx = int(data['idx'])
    path = data['path']
    project = data['project']
    split = data['split']
    label = data['label']

    _, model_config = utils.load_feature_extractor(path)

    features_dir = directories.get_features_dir(path,
                                                split,
                                                model_config,
                                                label=label)
    tags_dir = directories.get_tags_dir(path, split, label)
    logreg_dir = directories.get_logreg_dir(path, model_config, label)
    logreg_path = os.path.join(logreg_dir, 'logreg.joblib')

    annotations = os.listdir(tags_dir)
    class_weight = {0: 0.5}

    if annotations:
        features = [
            os.path.join(features_dir, x.replace('.json', '.npy'))
            for x in annotations
        ]
        annotations = [os.path.join(tags_dir, x) for x in annotations]
        X = []
        y = []

        for feature in features:
            feature = np.load(feature)

            for f in feature:
                X.append(f.mean(axis=(1, 2)))

        for annotation in annotations:
            with open(annotation, 'r') as f:
                annotation = json.load(f)['time_annotation']

            pos1 = np.where(np.array(annotation).astype(int) == 1)[0]

            if len(pos1) > 0:
                class_weight.update({1: 2})

                for p in pos1:
                    if p + 1 < len(annotation):
                        annotation[p + 1] = 1

            pos1 = np.where(np.array(annotation).astype(int) == 2)[0]

            if len(pos1) > 0:
                class_weight.update({2: 2})

                for p in pos1:
                    if p + 1 < len(annotation):
                        annotation[p + 1] = 2

            for a in annotation:
                y.append(a)

        X = np.array(X)
        y = np.array(y)

        if len(class_weight) > 1:
            logreg = LogisticRegression(C=0.1, class_weight=class_weight)
            logreg.fit(X, y)
            dump(logreg, logreg_path)

    return redirect(
        url_for('.annotate',
                split=split,
                label=label,
                project=project,
                idx=idx))
예제 #10
0
def annotate(project, split, label, idx):
    """
    For the given class label, show all frames for annotating the selected video.
    """
    project = urllib.parse.unquote(project)
    path = utils.lookup_project_path(project)
    label = urllib.parse.unquote(label)
    split = urllib.parse.unquote(split)

    _, model_config = utils.load_feature_extractor(path)

    frames_dir = directories.get_frames_dir(path, split, label)
    features_dir = directories.get_features_dir(path,
                                                split,
                                                model_config,
                                                label=label)
    tags_dir = directories.get_tags_dir(path, split, label)
    logreg_dir = directories.get_logreg_dir(path, model_config, label)

    videos = os.listdir(frames_dir)
    videos.sort()

    features = np.load(os.path.join(features_dir, f'{videos[idx]}.npy'))
    features = features.mean(axis=(2, 3))

    # Load logistic regression model if available
    logreg_path = os.path.join(logreg_dir, 'logreg.joblib')
    if os.path.isfile(logreg_path):
        logreg = load(logreg_path)
        classes = list(logreg.predict(features))
    else:
        classes = [-1] * len(features)

    # The list of images in the folder
    images = [
        image
        for image in glob.glob(os.path.join(frames_dir, videos[idx], '*'))
        if utils.is_image_file(image)
    ]

    # Natural sort images, so that they are sorted by number
    images = natsorted(images, alg=ns.IC)
    # Extract image file name (without full path) and include class label
    images = [(os.path.basename(image), _class)
              for image, _class in zip(images, classes)]

    # Load existing annotations
    annotations = []
    annotations_file = os.path.join(tags_dir, f'{videos[idx]}.json')
    if os.path.exists(annotations_file):
        with open(annotations_file, 'r') as f:
            data = json.load(f)
            annotations = data['time_annotation']

    # Read tags from config
    config = utils.load_project_config(path)
    tags = config['classes'][label]

    return render_template('frame_annotation.html',
                           images=images,
                           annotations=annotations,
                           idx=idx,
                           fps=16,
                           n_images=len(images),
                           video_name=videos[idx],
                           split=split,
                           label=label,
                           path=path,
                           tags=tags,
                           project=project,
                           n_videos=len(videos))
예제 #11
0
def annotate(project, split, label, idx):
    """
    For the given class label, show all frames for annotating the selected video.
    """
    project = urllib.parse.unquote(project)
    path = project_utils.lookup_project_path(project)
    label = urllib.parse.unquote(label)
    split = urllib.parse.unquote(split)

    config = project_utils.load_project_config(path)
    tags = config['tags'].copy()
    tags[0] = 'background'

    class_tags = config['classes'][label].copy()
    class_tags.append(0)  # Always add 'background'
    class_tags.sort()

    _, model_config = utils.load_feature_extractor(path)

    frames_dir = directories.get_frames_dir(path, split, label)
    features_dir = directories.get_features_dir(path,
                                                split,
                                                model_config,
                                                label=label)
    tags_dir = directories.get_tags_dir(path, split, label)
    logreg_dir = directories.get_logreg_dir(path, model_config)

    videos = os.listdir(frames_dir)
    videos = natsorted(videos, alg=ns.IC)

    # The list of images in the folder
    images = [
        image
        for image in glob.glob(os.path.join(frames_dir, videos[idx], '*'))
        if utils.is_image_file(image)
    ]
    classes = [-1] * len(images)

    # Load logistic regression model if available and assisted tagging is enabled
    if utils.get_project_setting(path, 'assisted_tagging'):
        logreg_path = os.path.join(logreg_dir, 'logreg.joblib')
        features_path = os.path.join(features_dir, f'{videos[idx]}.npy')
        if os.path.isfile(logreg_path) and os.path.isfile(features_path):
            logreg = load(logreg_path)
            features = np.load(features_path).mean(axis=(2, 3))
            classes = list(logreg.predict(features))

            # Reset tags that have been removed from the class to 'background'
            classes = [
                tag_idx if tag_idx in class_tags else 0 for tag_idx in classes
            ]

    # Natural sort images, so that they are sorted by number
    images = natsorted(images, alg=ns.IC)
    # Extract image file name (without full path) and include class label
    images = [(os.path.basename(image), _class)
              for image, _class in zip(images, classes)]

    # Load existing annotations
    annotations_file = os.path.join(tags_dir, f'{videos[idx]}.json')
    if os.path.exists(annotations_file):
        with open(annotations_file, 'r') as f:
            data = json.load(f)
            annotations = data['time_annotation']

            # Reset tags that have been removed from the class to 'background'
            annotations = [
                tag_idx if tag_idx in class_tags else 0
                for tag_idx in annotations
            ]
    else:
        # Use "background" label for all frames per default
        annotations = [0] * len(images)

    return render_template('frame_annotation.html',
                           images=images,
                           annotations=annotations,
                           idx=idx,
                           fps=16,
                           n_images=len(images),
                           video_name=videos[idx],
                           project_config=config,
                           split=split,
                           label=label,
                           path=path,
                           project=project,
                           n_videos=len(videos),
                           tags=tags,
                           class_tags=class_tags)
예제 #12
0
def train_logreg(path):
    """
    (Re-)Train a logistic regression model on all annotations that have been submitted so far.
    """
    inference_engine, model_config = utils.load_feature_extractor(path)

    logreg_dir = directories.get_logreg_dir(path, model_config)
    logreg_path = os.path.join(logreg_dir, 'logreg.joblib')
    project_config = project_utils.load_project_config(path)
    classes = project_config['classes']

    all_features = []
    all_annotations = []

    for split in SPLITS:
        for label, class_tags in classes.items():
            videos_dir = directories.get_videos_dir(path, split, label)
            frames_dir = directories.get_frames_dir(path, split, label)
            features_dir = directories.get_features_dir(path,
                                                        split,
                                                        model_config,
                                                        label=label)
            tags_dir = directories.get_tags_dir(path, split, label)

            if not os.path.exists(tags_dir):
                continue

            # Compute the respective frames and features
            compute_frames_and_features(inference_engine=inference_engine,
                                        project_path=path,
                                        videos_dir=videos_dir,
                                        frames_dir=frames_dir,
                                        features_dir=features_dir)

            video_tag_files = os.listdir(tags_dir)

            for video_tag_file in video_tag_files:
                feature_file = os.path.join(
                    features_dir, video_tag_file.replace('.json', '.npy'))
                annotation_file = os.path.join(tags_dir, video_tag_file)

                features = np.load(feature_file)
                for f in features:
                    all_features.append(f.mean(axis=(1, 2)))

                with open(annotation_file, 'r') as f:
                    annotations = json.load(f)['time_annotation']

                # Reset tags that have been removed from the class to 'background'
                annotations = [
                    tag_idx if tag_idx in class_tags else 0
                    for tag_idx in annotations
                ]

                all_annotations.extend(annotations)

    # Use low class weight for background and higher weight for all present tags
    annotated_tags = set(all_annotations)
    class_weight = {tag: 2 for tag in annotated_tags}
    class_weight[0] = 0.5

    all_features = np.array(all_features)
    all_annotations = np.array(all_annotations)

    if len(annotated_tags) > 1:
        os.makedirs(logreg_dir, exist_ok=True)
        logreg = LogisticRegression(C=0.1, class_weight=class_weight)
        logreg.fit(all_features, all_annotations)
        dump(logreg, logreg_path)