Ejemplo n.º 1
0
def run_action_recognition(model_name: str,
                           model_version: str,
                           title: Optional[str] = None,
                           display_fn: Optional[Callable] = None,
                           **kwargs):
    """
    :param model_name:
        Model from backbone (StridedInflatedEfficientNet or StridedInflatedMobileNetV2).
    :param model_version:
        Model version (pro or lite)
    :param title:
        Title of the image frame on display.
    :param display_fn:
        Optional function to further process displayed image
    """
    # Load weights
    selected_config, weights = get_relevant_weights(
        SUPPORTED_MODEL_CONFIGURATIONS, model_name, model_version)

    # Load backbone network
    backbone_network = build_backbone_network(selected_config,
                                              weights['backbone'])

    # Create a logistic regression classifier
    action_classifier = LogisticRegression(num_in=backbone_network.feature_dim,
                                           num_out=30)
    action_classifier.load_state_dict(weights['action_recognition'])
    action_classifier.eval()

    # Concatenate backbone network and logistic regression
    net = Pipe(backbone_network, action_classifier)

    postprocessor = [PostprocessClassificationOutput(INT2LAB, smoothing=4)]

    border_size = 30

    display_ops = [
        sense.display.DisplayFPS(expected_camera_fps=net.fps,
                                 expected_inference_fps=net.fps /
                                 net.step_size),
        sense.display.DisplayTopKClassificationOutputs(top_k=1, threshold=0.5),
        sense.display.DisplayClassnameOverlay(
            thresholds=LAB_THRESHOLDS,
            border_size_top=border_size if not title else border_size + 50),
    ]

    display_results = sense.display.DisplayResults(title=title,
                                                   display_ops=display_ops,
                                                   display_fn=display_fn)

    # Run live inference
    controller = Controller(neural_network=net,
                            post_processors=postprocessor,
                            results_display=display_results,
                            callbacks=[],
                            **kwargs)
    controller.run_inference()
        checkpoint[key] = checkpoint_classifier.pop(key)
    feature_extractor.load_state_dict(checkpoint)
    feature_extractor.eval()
    print('[debug] net:', feature_extractor)
    with open(os.path.join(custom_classifier, 'label2int.json')) as file:
        class2int = json.load(file)
    INT2LAB = {value: key for key, value in class2int.items()}

    gesture_classifier = LogisticRegression(
        num_in=feature_extractor.feature_dim, num_out=len(INT2LAB))
    gesture_classifier.load_state_dict(checkpoint_classifier)
    gesture_classifier.eval()
    print(gesture_classifier)

    # Concatenate feature extractor and met converter
    net = Pipe(feature_extractor, gesture_classifier)

    postprocessor = [PostprocessClassificationOutput(INT2LAB, smoothing=4)]

    display_ops = [
        sense.display.DisplayFPS(expected_camera_fps=net.fps,
                                 expected_inference_fps=net.fps /
                                 net.step_size),
        sense.display.DisplayTopKClassificationOutputs(top_k=1, threshold=0.3),
    ]
    display_results = sense.display.DisplayResults(title=title,
                                                   display_ops=display_ops)

    # Run live inference
    controller = Controller(neural_network=net,
                            post_processors=postprocessor,
Ejemplo n.º 3
0
    # Load weights
    selected_config, weights = get_relevant_weights(
        SUPPORTED_MODEL_CONFIGURATIONS, model_name, model_version)

    # Load backbone network
    backbone_network = build_backbone_network(selected_config,
                                              weights['backbone'])

    # Create a logistic regression classifier
    gesture_classifier = LogisticRegression(
        num_in=backbone_network.feature_dim, num_out=30)
    gesture_classifier.load_state_dict(weights['gesture_recognition'])
    gesture_classifier.eval()

    # Concatenate backbone network and logistic regression
    net = Pipe(backbone_network, gesture_classifier)

    postprocessor = [PostprocessClassificationOutput(INT2LAB, smoothing=4)]

    border_size = 30

    display_ops = [
        sense.display.DisplayFPS(expected_camera_fps=net.fps,
                                 expected_inference_fps=net.fps /
                                 net.step_size),
        sense.display.DisplayTopKClassificationOutputs(top_k=1, threshold=0.5),
        sense.display.DisplayClassnameOverlay(
            thresholds=LAB_THRESHOLDS,
            border_size=border_size if not title else border_size + 50),
    ]
    display_results = sense.display.DisplayResults(title=title,
Ejemplo n.º 4
0
def init_model(transform):
    use_gpu = True
    inference_engine = None
    neural_network = None
    postprocessors = None

    if transform == 'gesture':
        # Load feature extractor
        feature_extractor = feature_extractors.StridedInflatedEfficientNet()
        feature_extractor.load_weights_from_resources(
            'backbone/strided_inflated_efficientnet.ckpt')
        feature_extractor.eval()

        # Load a logistic regression classifier
        gesture_classifier = LogisticRegression(
            num_in=feature_extractor.feature_dim, num_out=30)
        checkpoint = load_weights_from_resources(
            'gesture_detection/efficientnet_logistic_regression.ckpt')
        gesture_classifier.load_state_dict(checkpoint)
        gesture_classifier.eval()

        # Concatenate feature extractor and met converter
        neural_network = Pipe(feature_extractor, gesture_classifier)
        postprocessors = [
            PostprocessClassificationOutput(INT2LAB, smoothing=4)
        ]

    elif transform == 'fitness':
        weight = float(60)
        height = float(170)
        age = float(20)
        gender = 'female'

        # Load feature extractor
        feature_extractor = feature_extractors.StridedInflatedMobileNetV2()
        feature_extractor.load_weights_from_resources(
            'backbone/strided_inflated_mobilenet.ckpt')
        feature_extractor.eval()

        # Load fitness activity classifier
        gesture_classifier = LogisticRegression(
            num_in=feature_extractor.feature_dim, num_out=81)
        checkpoint = load_weights_from_resources(
            'fitness_activity_recognition/mobilenet_logistic_regression.ckpt')
        gesture_classifier.load_state_dict(checkpoint)
        gesture_classifier.eval()

        # Load MET value converter
        met_value_converter = calorie_estimation.METValueMLPConverter()
        checkpoint = load_weights_from_resources(
            'calorie_estimation/mobilenet_features_met_converter.ckpt')
        met_value_converter.load_state_dict(checkpoint)
        met_value_converter.eval()

        # Concatenate feature extractor with downstream nets
        neural_network = Pipe(
            feature_extractor,
            feature_converter=[gesture_classifier, met_value_converter])

        postprocessors = [
            PostprocessClassificationOutput(INT2LAB, smoothing=8, indices=[0]),
            calorie_estimation.CalorieAccumulator(weight=weight,
                                                  height=height,
                                                  age=age,
                                                  gender=gender,
                                                  smoothing=12,
                                                  indices=[1])
        ]

    if neural_network is not None:
        inference_engine = InferenceEngine(neural_network, use_gpu=use_gpu)
        start_inference(inference_engine)

    return (inference_engine, postprocessors), None
Ejemplo n.º 5
0
    # Load weights
    selected_config, weights = get_relevant_weights(
        SUPPORTED_MODEL_CONFIGURATIONS, model_name, model_version)

    # Load backbone network
    backbone_network = build_backbone_network(selected_config,
                                              weights['backbone'])

    # Load a logistic regression classifier
    rep_counter = LogisticRegression(num_in=backbone_network.feature_dim,
                                     num_out=5)
    rep_counter.load_state_dict(weights['rep_counter'])
    rep_counter.eval()

    # Concatenate backbone network and rep counter
    net = Pipe(backbone_network, rep_counter)

    postprocessor = [
        AggregatedPostProcessors(
            post_processors=[
                TwoPositionsCounter(
                    pos0_idx=LAB2INT[
                        'counting - jumping_jacks_position=arms_down'],
                    pos1_idx=LAB2INT[
                        'counting - jumping_jacks_position=arms_up'],
                    threshold0=0.4,
                    threshold1=0.4,
                    out_key='Jumping Jacks',
                ),
                TwoPositionsCounter(
                    pos0_idx=LAB2INT['counting - squat_position=high'],
Ejemplo n.º 6
0
    # Load backbone network
    backbone_network = build_backbone_network(selected_config, weights['backbone'])

    # Create fitness activity classifier
    gesture_classifier = LogisticRegression(num_in=backbone_network.feature_dim,
                                            num_out=81)
    gesture_classifier.load_state_dict(weights['fitness_activity_recognition'])
    gesture_classifier.eval()

    # Create MET value converter
    met_value_converter = calorie_estimation.METValueMLPConverter()
    met_value_converter.load_state_dict(weights['met_converter'])
    met_value_converter.eval()

    # Concatenate backbone network with downstream nets
    net = Pipe(backbone_network, feature_converter=[gesture_classifier,
                                                    met_value_converter])

    post_processors = [
        PostprocessClassificationOutput(INT2LAB, smoothing=8,
                                        indices=[0]),
        calorie_estimation.CalorieAccumulator(weight=weight,
                                              height=height,
                                              age=age,
                                              gender=gender,
                                              smoothing=12,
                                              indices=[1])
    ]

    display_ops = [
        sense.display.DisplayFPS(expected_camera_fps=net.fps,
                                 expected_inference_fps=net.fps / net.step_size),
Ejemplo n.º 7
0
def run_fitness_rep_counter(model_name: str,
                            model_version: str,
                            title: Optional[str] = None,
                            display_fn: Optional[Callable] = None,
                            **kwargs):
    """
    :param model_name:
        Model from backbone (StridedInflatedEfficientNet or StridedInflatedMobileNetV2).
    :param model_version:
        Model version (pro or lite)
    :param title:
        Title of the image frame on display.
    :param display_fn:
        Optional function to further process displayed image
    """
    # Load weights
    selected_config, weights = get_relevant_weights(
        SUPPORTED_MODEL_CONFIGURATIONS,
        model_name,
        model_version
    )

    # Load backbone network
    backbone_network = build_backbone_network(selected_config, weights['backbone'])

    # Load a logistic regression classifier
    rep_counter = LogisticRegression(num_in=backbone_network.feature_dim,
                                     num_out=5)
    rep_counter.load_state_dict(weights['rep_counter'])
    rep_counter.eval()

    # Concatenate backbone network and rep counter
    net = Pipe(backbone_network, rep_counter)

    postprocessor = [
        AggregatedPostProcessors(
            post_processors=[
                TwoPositionsCounter(
                    pos0_idx=LAB2INT['counting - jumping_jacks_position=arms_down'],
                    pos1_idx=LAB2INT['counting - jumping_jacks_position=arms_up'],
                    threshold0=0.4,
                    threshold1=0.4,
                    out_key='Jumping Jacks',
                ),
                TwoPositionsCounter(
                    pos0_idx=LAB2INT['counting - squat_position=high'],
                    pos1_idx=LAB2INT['counting - squat_position=low'],
                    threshold0=0.4,
                    threshold1=0.4,
                    out_key='squats',
                ),
            ],
            out_key='counting',
        ),
        PostprocessClassificationOutput(INT2LAB, smoothing=1)
    ]

    display_ops = [
        sense.display.DisplayFPS(expected_camera_fps=net.fps,
                                 expected_inference_fps=net.fps / net.step_size),
        sense.display.DisplayTopKClassificationOutputs(top_k=1, threshold=0.5),
        sense.display.DisplayExerciseRepCounts()
    ]
    display_results = sense.display.DisplayResults(title=title, display_ops=display_ops,
                                                   border_size_top=100, display_fn=display_fn)

    # Run live inference
    controller = Controller(
        neural_network=net,
        post_processors=postprocessor,
        results_display=display_results,
        callbacks=[],
        **kwargs
    )
    controller.run_inference()
    LAB2INT = LAB2INT_COUNTING if counter else LAB2INT_CLASSIFICATION

    # Load weights
    selected_config, weights = get_relevant_weights(SUPPORTED_MODEL_CONFIGURATIONS, requested_converter=head_name)

    # Load backbone network
    backbone_network = build_backbone_network(selected_config, weights['backbone'],
                                              weights_finetuned=weights[head_name])

    head = LogisticRegression(num_in=backbone_network.feature_dim,
                              num_out=len(INT2LAB))
    head.load_state_dict(weights[head_name])
    head.eval()

    # Concatenate backbone network and head
    net = Pipe(backbone_network, head)

    # Build post-processors
    post_processors = [
        PostprocessClassificationOutput(INT2LAB, smoothing=4),
    ]

    if counter:
        post_processors.extend([
            AggregatedPostProcessors(
                post_processors=[
                    EventCounter(key='forearm_passing_position_1',
                                 key_idx=LAB2INT['forearm_passing_position_1'],
                                 threshold=0.1,
                                 out_key='forearm passes'),
                    EventCounter(key='overhead_passing_position_1',
Ejemplo n.º 9
0
    # Load weights
    selected_config, weights = get_relevant_weights(
        SUPPORTED_MODEL_CONFIGURATIONS, model_name, model_version)

    # Load backbone network
    backbone_network = build_backbone_network(selected_config,
                                              weights['backbone'])

    # Load MET value converter
    met_value_converter = calorie_estimation.METValueMLPConverter()
    met_value_converter.load_state_dict(weights['met_converter'])
    met_value_converter.eval()

    # Concatenate backbone network and met converter
    net = Pipe(backbone_network, met_value_converter)

    post_processors = [
        calorie_estimation.CalorieAccumulator(weight=weight,
                                              height=height,
                                              age=age,
                                              gender=gender,
                                              smoothing=12)
    ]

    display_ops = [
        sense.display.DisplayFPS(expected_camera_fps=net.fps,
                                 expected_inference_fps=net.fps /
                                 net.step_size),
        sense.display.DisplayDetailedMETandCalories(),
    ]
Ejemplo n.º 10
0
def run_custom_classifier(custom_classifier,
                          camera_id=0,
                          path_in=None,
                          path_out=None,
                          title=None,
                          use_gpu=True,
                          display_fn=None,
                          stop_event=None):

    # Load backbone network according to config file
    backbone_model_config, backbone_weights = load_backbone_model_from_config(
        custom_classifier)

    try:
        # Load custom classifier
        checkpoint_classifier = torch.load(
            os.path.join(custom_classifier, 'best_classifier.checkpoint'))
    except FileNotFoundError:
        msg = (
            "Error: No such file or directory: 'best_classifier.checkpoint'\n"
            "Hint: Provide path to 'custom_classifier'.\n")
        if display_fn:
            display_fn(msg)
        else:
            print(msg)
        return None
    # Update original weights in case some intermediate layers have been finetuned
    update_backbone_weights(backbone_weights, checkpoint_classifier)

    # Create backbone network
    backbone_network = build_backbone_network(backbone_model_config,
                                              backbone_weights)

    with open(os.path.join(custom_classifier, 'label2int.json')) as file:
        class2int = json.load(file)
    INT2LAB = {value: key for key, value in class2int.items()}

    gesture_classifier = LogisticRegression(
        num_in=backbone_network.feature_dim, num_out=len(INT2LAB))
    gesture_classifier.load_state_dict(checkpoint_classifier)
    gesture_classifier.eval()

    # Concatenate feature extractor and met converter
    net = Pipe(backbone_network, gesture_classifier)

    postprocessor = [PostprocessClassificationOutput(INT2LAB, smoothing=4)]

    display_ops = [
        sense.display.DisplayFPS(expected_camera_fps=net.fps,
                                 expected_inference_fps=net.fps /
                                 net.step_size),
        sense.display.DisplayTopKClassificationOutputs(top_k=1, threshold=0.1),
    ]
    display_results = sense.display.DisplayResults(title=title,
                                                   display_ops=display_ops,
                                                   display_fn=display_fn)

    # Run live inference
    controller = Controller(
        neural_network=net,
        post_processors=postprocessor,
        results_display=display_results,
        callbacks=[],
        camera_id=camera_id,
        path_in=path_in,
        path_out=path_out,
        use_gpu=use_gpu,
        stop_event=stop_event,
    )
    controller.run_inference()
Ejemplo n.º 11
0
def run_calorie_estimation(model_name: str,
                           model_version: str,
                           weight: Optional[float] = 70.0,
                           height: Optional[float] = 170.0,
                           age: float = 30.0,
                           gender: Optional[str] = None,
                           title: Optional[str] = None,
                           display_fn: Optional[Callable] = None,
                           **kwargs):
    """
    :param model_name:
        Model from backbone (StridedInflatedEfficientNet or StridedInflatedMobileNetV2).
    :param model_version:
        Model version (pro or lite)
    :param weight:
        Weight (in kilograms). Will be used to convert MET values to calories. Default to 70.
    :param height:
        Height (in centimeters). Will be used to convert MET values to calories. Default to 170.
    :param age:
        Age (in years). Will be used to convert MET values to calories. Default to 30.
    :param gender:
        Gender ("male" or "female" or "other"). Will be used to convert MET values to calories
    :param title:
        Title of the image frame on display.
    :param display_fn:
        Optional function to further process displayed image
    """
    # Load weights
    selected_config, weights = get_relevant_weights(
        SUPPORTED_MODEL_CONFIGURATIONS,
        model_name,
        model_version
    )

    # Load backbone network
    backbone_network = build_backbone_network(selected_config, weights['backbone'])

    # Load MET value converter
    met_value_converter = calorie_estimation.METValueMLPConverter()
    met_value_converter.load_state_dict(weights['met_converter'])
    met_value_converter.eval()

    # Concatenate backbone network and met converter
    net = Pipe(backbone_network, met_value_converter)

    post_processors = [
        calorie_estimation.CalorieAccumulator(weight=weight,
                                              height=height,
                                              age=age,
                                              gender=gender,
                                              smoothing=12)
    ]

    display_ops = [
        sense.display.DisplayFPS(expected_camera_fps=net.fps,
                                 expected_inference_fps=net.fps / net.step_size),
        sense.display.DisplayDetailedMETandCalories(),
    ]

    display_results = sense.display.DisplayResults(title=title, display_ops=display_ops, display_fn=display_fn)

    # Run live inference
    controller = Controller(
        neural_network=net,
        post_processors=post_processors,
        results_display=display_results,
        callbacks=[],
        **kwargs
    )
    controller.run_inference()
Ejemplo n.º 12
0
        num_in=feature_extractor.feature_dim, num_out=81)
    checkpoint = engine.load_weights(
        'resources/fitness_activity_recognition/mobilenet_logistic_regression.ckpt'
    )
    gesture_classifier.load_state_dict(checkpoint)
    gesture_classifier.eval()

    # Load MET value converter
    met_value_converter = calorie_estimation.METValueMLPConverter()
    checkpoint = torch.load(
        'resources/calorie_estimation/mobilenet_features_met_converter.ckpt')
    met_value_converter.load_state_dict(checkpoint)
    met_value_converter.eval()

    # Concatenate feature extractor with downstream nets
    net = Pipe(feature_extractor,
               feature_converter=[gesture_classifier, met_value_converter])

    post_processors = [
        PostprocessClassificationOutput(INT2LAB, smoothing=8, indices=[0]),
        calorie_estimation.CalorieAccumulator(weight=weight,
                                              height=height,
                                              age=age,
                                              gender=gender,
                                              smoothing=12,
                                              indices=[1])
    ]

    display_ops = [
        sense.display.DisplayFPS(expected_camera_fps=net.fps,
                                 expected_inference_fps=net.fps /
                                 net.step_size),
Ejemplo n.º 13
0
def run_gesture_control(model_name: str,
                        model_version: str,
                        title: Optional[str] = None,
                        display_fn: Optional[Callable] = None,
                        **kwargs):
    """
    :param model_name:
        Model from backbone (StridedInflatedEfficientNet or StridedInflatedMobileNetV2).
    :param model_version:
        Model version (pro or lite)
    :param title:
        Title of the image frame on display.
    :param display_fn:
        Optional function to further process displayed image
    """
    # Load weights
    selected_config, weights = get_relevant_weights(
        SUPPORTED_MODEL_CONFIGURATIONS, model_name, model_version)

    # Load backbone network
    backbone_network = build_backbone_network(
        selected_config,
        weights['backbone'],
        weights_finetuned=weights['gesture_control'])

    # Create a logistic regression classifier
    gesture_classifier = LogisticRegression(
        num_in=backbone_network.feature_dim, num_out=len(INT2LAB))
    gesture_classifier.load_state_dict(weights['gesture_control'])
    gesture_classifier.eval()

    # Concatenate backbone network and logistic regression
    net = Pipe(backbone_network, gesture_classifier)

    postprocessor = [
        PostprocessClassificationOutput(INT2LAB, smoothing=1),
        AggregatedPostProcessors(
            post_processors=[
                EventCounter(key, LAB2INT[key], LAB_THRESHOLDS[key])
                for key in ENABLED_LABELS
            ],
            out_key='counting',
        ),
    ]

    border_size_top = 0
    border_size_right = 500

    display_ops = [
        sense.display.DisplayFPS(expected_camera_fps=net.fps,
                                 expected_inference_fps=net.fps /
                                 net.step_size),
        sense.display.DisplayClassnameOverlay(
            thresholds=LAB_THRESHOLDS,
            duration=1,
            border_size_top=border_size_top if not title else border_size_top +
            50,
            border_size_right=border_size_right),
        sense.display.DisplayPredictionBarGraph(ENABLED_LABELS,
                                                LAB_THRESHOLDS,
                                                x_offset=900,
                                                y_offset=100,
                                                display_counts=True)
    ]
    display_results = sense.display.DisplayResults(
        title=title,
        display_ops=display_ops,
        border_size_top=border_size_top,
        border_size_right=border_size_right,
        display_fn=display_fn)

    # Run live inference
    controller = Controller(neural_network=net,
                            post_processors=postprocessor,
                            results_display=display_results,
                            callbacks=[],
                            **kwargs)
    controller.run_inference()
Ejemplo n.º 14
0
def run_fitness_tracker(model_name: str,
                        model_version: str,
                        weight: Optional[float] = 70.0,
                        height: Optional[float] = 170.0,
                        age: float = 30.0,
                        gender: Optional[str] = None,
                        title: Optional[str] = None,
                        display_fn: Optional[Callable] = None,
                        **kwargs):
    """
    :param model_name:
        Model from backbone (StridedInflatedEfficientNet or StridedInflatedMobileNetV2).
    :param model_version:
        Model version (pro or lite)
    :param weight:
        Weight (in kilograms). Will be used to convert MET values to calories. Default to 70.
    :param height:
        Height (in centimeters). Will be used to convert MET values to calories. Default to 170.
    :param age:
        Age (in years). Will be used to convert MET values to calories. Default to 30.
    :param gender:
        Gender ("male" or "female" or "other"). Will be used to convert MET values to calories
    :param title:
        Title of the image frame on display.
    :param display_fn:
        Optional function to further process displayed image
    """
    # Load weights
    selected_config, weights = get_relevant_weights(
        SUPPORTED_MODEL_CONFIGURATIONS, model_name, model_version)

    # Load backbone network
    backbone_network = build_backbone_network(selected_config,
                                              weights['backbone'])

    # Create fitness activity classifier
    gesture_classifier = LogisticRegression(
        num_in=backbone_network.feature_dim, num_out=81)
    gesture_classifier.load_state_dict(weights['fitness_activity_recognition'])
    gesture_classifier.eval()

    # Create MET value converter
    met_value_converter = calorie_estimation.METValueMLPConverter()
    met_value_converter.load_state_dict(weights['met_converter'])
    met_value_converter.eval()

    # Concatenate backbone network with downstream nets
    net = Pipe(backbone_network,
               feature_converter=[gesture_classifier, met_value_converter])

    post_processors = [
        PostprocessClassificationOutput(INT2LAB, smoothing=8, indices=[0]),
        calorie_estimation.CalorieAccumulator(weight=weight,
                                              height=height,
                                              age=age,
                                              gender=gender,
                                              smoothing=12,
                                              indices=[1])
    ]

    display_ops = [
        sense.display.DisplayFPS(expected_camera_fps=net.fps,
                                 expected_inference_fps=net.fps /
                                 net.step_size),
        sense.display.DisplayTopKClassificationOutputs(top_k=1, threshold=0.5),
        sense.display.DisplayMETandCalories(y_offset=40),
    ]
    display_results = sense.display.DisplayResults(title=title,
                                                   display_ops=display_ops,
                                                   border_size_top=50,
                                                   display_fn=display_fn)

    # Run live inference
    controller = Controller(neural_network=net,
                            post_processors=post_processors,
                            results_display=display_results,
                            callbacks=[],
                            **kwargs)
    controller.run_inference()
Ejemplo n.º 15
0
    # modeify the network to generate the training network on top of the features
    if temporal_training:
        num_output = len(label_counting)
    else:
        num_output = len(label_names)

    # modify the network to generate the training network on top of the features
    gesture_classifier = LogisticRegression(
        num_in=feature_extractor.feature_dim,
        num_out=num_output,
        use_softmax=False)

    if num_layers_to_finetune > 0:
        # remove internal padding for training
        fine_tuned_layers.apply(set_internal_padding_false)
        net = Pipe(fine_tuned_layers, gesture_classifier)
    else:
        net = gesture_classifier
    net.train()

    if use_gpu:
        net = net.cuda()

    lr_schedule = {0: 0.0001, 40: 0.00001}
    num_epochs = 80
    best_model_state_dict = training_loops(
        net,
        train_loader,
        valid_loader,
        use_gpu,
        num_epochs,
Ejemplo n.º 16
0
def train_model(path_in,
                path_out,
                model_name,
                model_version,
                num_layers_to_finetune,
                epochs,
                use_gpu=True,
                overwrite=True,
                temporal_training=None,
                resume=False,
                log_fn=print,
                confmat_event=None):
    os.makedirs(path_out, exist_ok=True)

    # Check for existing files
    saved_files = [
        "last_classifier.checkpoint", "best_classifier.checkpoint",
        "config.json", "label2int.json", "confusion_matrix.png",
        "confusion_matrix.npy"
    ]

    if not overwrite and any(
            os.path.exists(os.path.join(path_out, file))
            for file in saved_files):
        print(f"Warning: This operation will overwrite files in {path_out}")

        while True:
            confirmation = input(
                "Are you sure? Add --overwrite to hide this warning. (Y/N) ")
            if confirmation.lower() == "y":
                break
            elif confirmation.lower() == "n":
                sys.exit()
            else:
                print('Invalid input')

    # Load weights
    selected_config, weights = get_relevant_weights(
        SUPPORTED_MODEL_CONFIGURATIONS,
        model_name,
        model_version,
        log_fn,
    )
    backbone_weights = weights['backbone']

    if resume:
        # Load the last classifier
        checkpoint_classifier = torch.load(
            os.path.join(path_out, 'last_classifier.checkpoint'))

        # Update original weights in case some intermediate layers have been finetuned
        update_backbone_weights(backbone_weights, checkpoint_classifier)

    # Load backbone network
    backbone_network = build_backbone_network(selected_config,
                                              backbone_weights)

    # Get the required temporal dimension of feature tensors in order to
    # finetune the provided number of layers
    if num_layers_to_finetune > 0:
        num_timesteps = backbone_network.num_required_frames_per_layer.get(
            -num_layers_to_finetune)
        if not num_timesteps:
            # Remove 1 because we added 0 to temporal_dependencies
            num_layers = len(
                backbone_network.num_required_frames_per_layer) - 1
            msg = (f'ERROR - Num of layers to finetune not compatible. '
                   f'Must be an integer between 0 and {num_layers}')
            log_fn(msg)
            raise IndexError(msg)
    else:
        num_timesteps = 1

    # Extract layers to finetune
    if num_layers_to_finetune > 0:
        fine_tuned_layers = backbone_network.cnn[-num_layers_to_finetune:]
        backbone_network.cnn = backbone_network.cnn[0:-num_layers_to_finetune]

    # finetune the model
    extract_features(path_in,
                     selected_config,
                     backbone_network,
                     num_layers_to_finetune,
                     use_gpu,
                     num_timesteps=num_timesteps,
                     log_fn=log_fn)

    # Find label names
    label_names = os.listdir(directories.get_videos_dir(path_in, 'train'))
    label_names = [x for x in label_names if not x.startswith('.')]
    label_names_temporal = ['background']

    project_config = load_project_config(path_in)
    if project_config:
        for temporal_tags in project_config['classes'].values():
            label_names_temporal.extend(temporal_tags)
    else:
        for label in label_names:
            label_names_temporal.extend([f'{label}_tag1', f'{label}_tag2'])

    label_names_temporal = sorted(set(label_names_temporal))

    label2int_temporal_annotation = {
        name: index
        for index, name in enumerate(label_names_temporal)
    }
    label2int = {name: index for index, name in enumerate(label_names)}

    extractor_stride = backbone_network.num_required_frames_per_layer_padding[
        0]

    # Create the data loaders
    features_dir = directories.get_features_dir(path_in, 'train',
                                                selected_config,
                                                num_layers_to_finetune)
    tags_dir = directories.get_tags_dir(path_in, 'train')
    train_loader = generate_data_loader(
        project_config,
        features_dir,
        tags_dir,
        label_names,
        label2int,
        label2int_temporal_annotation,
        num_timesteps=num_timesteps,
        stride=extractor_stride,
        temporal_annotation_only=temporal_training,
    )

    features_dir = directories.get_features_dir(path_in, 'valid',
                                                selected_config,
                                                num_layers_to_finetune)
    tags_dir = directories.get_tags_dir(path_in, 'valid')
    valid_loader = generate_data_loader(
        project_config,
        features_dir,
        tags_dir,
        label_names,
        label2int,
        label2int_temporal_annotation,
        num_timesteps=None,
        batch_size=1,
        shuffle=False,
        stride=extractor_stride,
        temporal_annotation_only=temporal_training,
    )

    # Check if the data is loaded fully
    if not train_loader or not valid_loader:
        log_fn(
            "ERROR - \n "
            "\tMissing annotations for train or valid set.\n"
            "\tHint: Check if tags_train and tags_valid directories exist.\n")
        return

    # Modify the network to generate the training network on top of the features
    if temporal_training:
        num_output = len(label_names_temporal)
    else:
        num_output = len(label_names)

    # modify the network to generate the training network on top of the features
    gesture_classifier = LogisticRegression(
        num_in=backbone_network.feature_dim,
        num_out=num_output,
        use_softmax=False)

    if resume:
        gesture_classifier.load_state_dict(checkpoint_classifier)

    if num_layers_to_finetune > 0:
        # remove internal padding for training
        fine_tuned_layers.apply(set_internal_padding_false)
        net = Pipe(fine_tuned_layers, gesture_classifier)
    else:
        net = gesture_classifier
    net.train()

    if use_gpu:
        net = net.cuda()

    lr_schedule = {
        0: 0.0001,
        int(epochs / 2): 0.00001
    } if epochs > 1 else {
        0: 0.0001
    }
    num_epochs = epochs

    # Save training config and label2int dictionary
    config = {
        'backbone_name': selected_config.model_name,
        'backbone_version': selected_config.version,
        'num_layers_to_finetune': num_layers_to_finetune,
        'classifier': str(gesture_classifier),
        'temporal_training': temporal_training,
        'lr_schedule': lr_schedule,
        'num_epochs': num_epochs,
        'start_time': str(datetime.datetime.now()),
        'end_time': '',
    }
    with open(os.path.join(path_out, 'config.json'), 'w') as f:
        json.dump(config, f, indent=2)

    with open(os.path.join(path_out, 'label2int.json'), 'w') as f:
        json.dump(
            label2int_temporal_annotation if temporal_training else label2int,
            f,
            indent=2)

    # Train model
    best_model_state_dict = training_loops(
        net,
        train_loader,
        valid_loader,
        use_gpu,
        num_epochs,
        lr_schedule,
        label_names,
        path_out,
        temporal_annotation_training=temporal_training,
        log_fn=log_fn,
        confmat_event=confmat_event)

    # Save best model
    if isinstance(net, Pipe):
        best_model_state_dict = {
            clean_pipe_state_dict_key(key): value
            for key, value in best_model_state_dict.items()
        }
    torch.save(best_model_state_dict,
               os.path.join(path_out, "best_classifier.checkpoint"))

    config['end_time'] = str(datetime.datetime.now())
    with open(os.path.join(path_out, 'config.json'), 'w') as f:
        json.dump(config, f, indent=2)
Ejemplo n.º 17
0
    # Load feature extractor
    feature_extractor = feature_extractors.StridedInflatedMobileNetV2()
    feature_extractor.load_weights_from_resources(
        'backbone/strided_inflated_mobilenet.ckpt')
    feature_extractor.eval()

    # Load MET value converter
    met_value_converter = calorie_estimation.METValueMLPConverter()
    checkpoint = load_weights_from_resources(
        'calorie_estimation/mobilenet_features_met_converter.ckpt')
    met_value_converter.load_state_dict(checkpoint)
    met_value_converter.eval()

    # Concatenate feature extractor and met converter
    net = Pipe(feature_extractor, met_value_converter)

    post_processors = [
        calorie_estimation.CalorieAccumulator(weight=weight,
                                              height=height,
                                              age=age,
                                              gender=gender,
                                              smoothing=12)
    ]

    display_ops = [
        sense.display.DisplayFPS(expected_camera_fps=net.fps,
                                 expected_inference_fps=net.fps /
                                 net.step_size),
        sense.display.DisplayDetailedMETandCalories(),
    ]