def run_action_recognition(model_name: str,
                           model_version: str,
                           title: Optional[str] = None,
                           display_fn: Optional[Callable] = None,
                           **kwargs):
    """
    :param model_name:
        Model from backbone (StridedInflatedEfficientNet or StridedInflatedMobileNetV2).
    :param model_version:
        Model version (pro or lite)
    :param title:
        Title of the image frame on display.
    :param display_fn:
        Optional function to further process displayed image
    """
    # Load weights
    selected_config, weights = get_relevant_weights(
        SUPPORTED_MODEL_CONFIGURATIONS, model_name, model_version)

    # Load backbone network
    backbone_network = build_backbone_network(selected_config,
                                              weights['backbone'])

    # Create a logistic regression classifier
    action_classifier = LogisticRegression(num_in=backbone_network.feature_dim,
                                           num_out=30)
    action_classifier.load_state_dict(weights['action_recognition'])
    action_classifier.eval()

    # Concatenate backbone network and logistic regression
    net = Pipe(backbone_network, action_classifier)

    postprocessor = [PostprocessClassificationOutput(INT2LAB, smoothing=4)]

    border_size = 30

    display_ops = [
        sense.display.DisplayFPS(expected_camera_fps=net.fps,
                                 expected_inference_fps=net.fps /
                                 net.step_size),
        sense.display.DisplayTopKClassificationOutputs(top_k=1, threshold=0.5),
        sense.display.DisplayClassnameOverlay(
            thresholds=LAB_THRESHOLDS,
            border_size_top=border_size if not title else border_size + 50),
    ]

    display_results = sense.display.DisplayResults(title=title,
                                                   display_ops=display_ops,
                                                   display_fn=display_fn)

    # Run live inference
    controller = Controller(neural_network=net,
                            post_processors=postprocessor,
                            results_display=display_results,
                            callbacks=[],
                            **kwargs)
    controller.run_inference()
    # Update original weights in case some intermediate layers have been finetuned
    name_finetuned_layers = set(checkpoint.keys()).intersection(
        checkpoint_classifier.keys())
    for key in name_finetuned_layers:
        checkpoint[key] = checkpoint_classifier.pop(key)
    feature_extractor.load_state_dict(checkpoint)
    feature_extractor.eval()
    print('[debug] net:', feature_extractor)
    with open(os.path.join(custom_classifier, 'label2int.json')) as file:
        class2int = json.load(file)
    INT2LAB = {value: key for key, value in class2int.items()}

    gesture_classifier = LogisticRegression(
        num_in=feature_extractor.feature_dim, num_out=len(INT2LAB))
    gesture_classifier.load_state_dict(checkpoint_classifier)
    gesture_classifier.eval()
    print(gesture_classifier)

    # Concatenate feature extractor and met converter
    net = Pipe(feature_extractor, gesture_classifier)

    postprocessor = [PostprocessClassificationOutput(INT2LAB, smoothing=4)]

    display_ops = [
        sense.display.DisplayFPS(expected_camera_fps=net.fps,
                                 expected_inference_fps=net.fps /
                                 net.step_size),
        sense.display.DisplayTopKClassificationOutputs(top_k=1, threshold=0.3),
    ]
    display_results = sense.display.DisplayResults(title=title,
                                                   display_ops=display_ops)
Exemple #3
0
def init_model(transform):
    use_gpu = True
    inference_engine = None
    neural_network = None
    postprocessors = None

    if transform == 'gesture':
        # Load feature extractor
        feature_extractor = feature_extractors.StridedInflatedEfficientNet()
        feature_extractor.load_weights_from_resources(
            'backbone/strided_inflated_efficientnet.ckpt')
        feature_extractor.eval()

        # Load a logistic regression classifier
        gesture_classifier = LogisticRegression(
            num_in=feature_extractor.feature_dim, num_out=30)
        checkpoint = load_weights_from_resources(
            'gesture_detection/efficientnet_logistic_regression.ckpt')
        gesture_classifier.load_state_dict(checkpoint)
        gesture_classifier.eval()

        # Concatenate feature extractor and met converter
        neural_network = Pipe(feature_extractor, gesture_classifier)
        postprocessors = [
            PostprocessClassificationOutput(INT2LAB, smoothing=4)
        ]

    elif transform == 'fitness':
        weight = float(60)
        height = float(170)
        age = float(20)
        gender = 'female'

        # Load feature extractor
        feature_extractor = feature_extractors.StridedInflatedMobileNetV2()
        feature_extractor.load_weights_from_resources(
            'backbone/strided_inflated_mobilenet.ckpt')
        feature_extractor.eval()

        # Load fitness activity classifier
        gesture_classifier = LogisticRegression(
            num_in=feature_extractor.feature_dim, num_out=81)
        checkpoint = load_weights_from_resources(
            'fitness_activity_recognition/mobilenet_logistic_regression.ckpt')
        gesture_classifier.load_state_dict(checkpoint)
        gesture_classifier.eval()

        # Load MET value converter
        met_value_converter = calorie_estimation.METValueMLPConverter()
        checkpoint = load_weights_from_resources(
            'calorie_estimation/mobilenet_features_met_converter.ckpt')
        met_value_converter.load_state_dict(checkpoint)
        met_value_converter.eval()

        # Concatenate feature extractor with downstream nets
        neural_network = Pipe(
            feature_extractor,
            feature_converter=[gesture_classifier, met_value_converter])

        postprocessors = [
            PostprocessClassificationOutput(INT2LAB, smoothing=8, indices=[0]),
            calorie_estimation.CalorieAccumulator(weight=weight,
                                                  height=height,
                                                  age=age,
                                                  gender=gender,
                                                  smoothing=12,
                                                  indices=[1])
        ]

    if neural_network is not None:
        inference_engine = InferenceEngine(neural_network, use_gpu=use_gpu)
        start_inference(inference_engine)

    return (inference_engine, postprocessors), None
Exemple #4
0
    model_version = args['--model_version'] or None
    use_gpu = args['--use_gpu']

    # Load weights
    selected_config, weights = get_relevant_weights(
        SUPPORTED_MODEL_CONFIGURATIONS, model_name, model_version)

    # Load backbone network
    backbone_network = build_backbone_network(selected_config,
                                              weights['backbone'])

    # Load a logistic regression classifier
    rep_counter = LogisticRegression(num_in=backbone_network.feature_dim,
                                     num_out=5)
    rep_counter.load_state_dict(weights['rep_counter'])
    rep_counter.eval()

    # Concatenate backbone network and rep counter
    net = Pipe(backbone_network, rep_counter)

    postprocessor = [
        AggregatedPostProcessors(
            post_processors=[
                TwoPositionsCounter(
                    pos0_idx=LAB2INT[
                        'counting - jumping_jacks_position=arms_down'],
                    pos1_idx=LAB2INT[
                        'counting - jumping_jacks_position=arms_up'],
                    threshold0=0.4,
                    threshold1=0.4,
                    out_key='Jumping Jacks',
def run_fitness_rep_counter(model_name: str,
                            model_version: str,
                            title: Optional[str] = None,
                            display_fn: Optional[Callable] = None,
                            **kwargs):
    """
    :param model_name:
        Model from backbone (StridedInflatedEfficientNet or StridedInflatedMobileNetV2).
    :param model_version:
        Model version (pro or lite)
    :param title:
        Title of the image frame on display.
    :param display_fn:
        Optional function to further process displayed image
    """
    # Load weights
    selected_config, weights = get_relevant_weights(
        SUPPORTED_MODEL_CONFIGURATIONS,
        model_name,
        model_version
    )

    # Load backbone network
    backbone_network = build_backbone_network(selected_config, weights['backbone'])

    # Load a logistic regression classifier
    rep_counter = LogisticRegression(num_in=backbone_network.feature_dim,
                                     num_out=5)
    rep_counter.load_state_dict(weights['rep_counter'])
    rep_counter.eval()

    # Concatenate backbone network and rep counter
    net = Pipe(backbone_network, rep_counter)

    postprocessor = [
        AggregatedPostProcessors(
            post_processors=[
                TwoPositionsCounter(
                    pos0_idx=LAB2INT['counting - jumping_jacks_position=arms_down'],
                    pos1_idx=LAB2INT['counting - jumping_jacks_position=arms_up'],
                    threshold0=0.4,
                    threshold1=0.4,
                    out_key='Jumping Jacks',
                ),
                TwoPositionsCounter(
                    pos0_idx=LAB2INT['counting - squat_position=high'],
                    pos1_idx=LAB2INT['counting - squat_position=low'],
                    threshold0=0.4,
                    threshold1=0.4,
                    out_key='squats',
                ),
            ],
            out_key='counting',
        ),
        PostprocessClassificationOutput(INT2LAB, smoothing=1)
    ]

    display_ops = [
        sense.display.DisplayFPS(expected_camera_fps=net.fps,
                                 expected_inference_fps=net.fps / net.step_size),
        sense.display.DisplayTopKClassificationOutputs(top_k=1, threshold=0.5),
        sense.display.DisplayExerciseRepCounts()
    ]
    display_results = sense.display.DisplayResults(title=title, display_ops=display_ops,
                                                   border_size_top=100, display_fn=display_fn)

    # Run live inference
    controller = Controller(
        neural_network=net,
        post_processors=postprocessor,
        results_display=display_results,
        callbacks=[],
        **kwargs
    )
    controller.run_inference()
Exemple #6
0
def run_custom_classifier(custom_classifier,
                          camera_id=0,
                          path_in=None,
                          path_out=None,
                          title=None,
                          use_gpu=True,
                          display_fn=None,
                          stop_event=None):

    # Load backbone network according to config file
    backbone_model_config, backbone_weights = load_backbone_model_from_config(
        custom_classifier)

    try:
        # Load custom classifier
        checkpoint_classifier = torch.load(
            os.path.join(custom_classifier, 'best_classifier.checkpoint'))
    except FileNotFoundError:
        msg = (
            "Error: No such file or directory: 'best_classifier.checkpoint'\n"
            "Hint: Provide path to 'custom_classifier'.\n")
        if display_fn:
            display_fn(msg)
        else:
            print(msg)
        return None
    # Update original weights in case some intermediate layers have been finetuned
    update_backbone_weights(backbone_weights, checkpoint_classifier)

    # Create backbone network
    backbone_network = build_backbone_network(backbone_model_config,
                                              backbone_weights)

    with open(os.path.join(custom_classifier, 'label2int.json')) as file:
        class2int = json.load(file)
    INT2LAB = {value: key for key, value in class2int.items()}

    gesture_classifier = LogisticRegression(
        num_in=backbone_network.feature_dim, num_out=len(INT2LAB))
    gesture_classifier.load_state_dict(checkpoint_classifier)
    gesture_classifier.eval()

    # Concatenate feature extractor and met converter
    net = Pipe(backbone_network, gesture_classifier)

    postprocessor = [PostprocessClassificationOutput(INT2LAB, smoothing=4)]

    display_ops = [
        sense.display.DisplayFPS(expected_camera_fps=net.fps,
                                 expected_inference_fps=net.fps /
                                 net.step_size),
        sense.display.DisplayTopKClassificationOutputs(top_k=1, threshold=0.1),
    ]
    display_results = sense.display.DisplayResults(title=title,
                                                   display_ops=display_ops,
                                                   display_fn=display_fn)

    # Run live inference
    controller = Controller(
        neural_network=net,
        post_processors=postprocessor,
        results_display=display_results,
        callbacks=[],
        camera_id=camera_id,
        path_in=path_in,
        path_out=path_out,
        use_gpu=use_gpu,
        stop_event=stop_event,
    )
    controller.run_inference()
def run_gesture_control(model_name: str,
                        model_version: str,
                        title: Optional[str] = None,
                        display_fn: Optional[Callable] = None,
                        **kwargs):
    """
    :param model_name:
        Model from backbone (StridedInflatedEfficientNet or StridedInflatedMobileNetV2).
    :param model_version:
        Model version (pro or lite)
    :param title:
        Title of the image frame on display.
    :param display_fn:
        Optional function to further process displayed image
    """
    # Load weights
    selected_config, weights = get_relevant_weights(
        SUPPORTED_MODEL_CONFIGURATIONS, model_name, model_version)

    # Load backbone network
    backbone_network = build_backbone_network(
        selected_config,
        weights['backbone'],
        weights_finetuned=weights['gesture_control'])

    # Create a logistic regression classifier
    gesture_classifier = LogisticRegression(
        num_in=backbone_network.feature_dim, num_out=len(INT2LAB))
    gesture_classifier.load_state_dict(weights['gesture_control'])
    gesture_classifier.eval()

    # Concatenate backbone network and logistic regression
    net = Pipe(backbone_network, gesture_classifier)

    postprocessor = [
        PostprocessClassificationOutput(INT2LAB, smoothing=1),
        AggregatedPostProcessors(
            post_processors=[
                EventCounter(key, LAB2INT[key], LAB_THRESHOLDS[key])
                for key in ENABLED_LABELS
            ],
            out_key='counting',
        ),
    ]

    border_size_top = 0
    border_size_right = 500

    display_ops = [
        sense.display.DisplayFPS(expected_camera_fps=net.fps,
                                 expected_inference_fps=net.fps /
                                 net.step_size),
        sense.display.DisplayClassnameOverlay(
            thresholds=LAB_THRESHOLDS,
            duration=1,
            border_size_top=border_size_top if not title else border_size_top +
            50,
            border_size_right=border_size_right),
        sense.display.DisplayPredictionBarGraph(ENABLED_LABELS,
                                                LAB_THRESHOLDS,
                                                x_offset=900,
                                                y_offset=100,
                                                display_counts=True)
    ]
    display_results = sense.display.DisplayResults(
        title=title,
        display_ops=display_ops,
        border_size_top=border_size_top,
        border_size_right=border_size_right,
        display_fn=display_fn)

    # Run live inference
    controller = Controller(neural_network=net,
                            post_processors=postprocessor,
                            results_display=display_results,
                            callbacks=[],
                            **kwargs)
    controller.run_inference()
def run_fitness_tracker(model_name: str,
                        model_version: str,
                        weight: Optional[float] = 70.0,
                        height: Optional[float] = 170.0,
                        age: float = 30.0,
                        gender: Optional[str] = None,
                        title: Optional[str] = None,
                        display_fn: Optional[Callable] = None,
                        **kwargs):
    """
    :param model_name:
        Model from backbone (StridedInflatedEfficientNet or StridedInflatedMobileNetV2).
    :param model_version:
        Model version (pro or lite)
    :param weight:
        Weight (in kilograms). Will be used to convert MET values to calories. Default to 70.
    :param height:
        Height (in centimeters). Will be used to convert MET values to calories. Default to 170.
    :param age:
        Age (in years). Will be used to convert MET values to calories. Default to 30.
    :param gender:
        Gender ("male" or "female" or "other"). Will be used to convert MET values to calories
    :param title:
        Title of the image frame on display.
    :param display_fn:
        Optional function to further process displayed image
    """
    # Load weights
    selected_config, weights = get_relevant_weights(
        SUPPORTED_MODEL_CONFIGURATIONS, model_name, model_version)

    # Load backbone network
    backbone_network = build_backbone_network(selected_config,
                                              weights['backbone'])

    # Create fitness activity classifier
    gesture_classifier = LogisticRegression(
        num_in=backbone_network.feature_dim, num_out=81)
    gesture_classifier.load_state_dict(weights['fitness_activity_recognition'])
    gesture_classifier.eval()

    # Create MET value converter
    met_value_converter = calorie_estimation.METValueMLPConverter()
    met_value_converter.load_state_dict(weights['met_converter'])
    met_value_converter.eval()

    # Concatenate backbone network with downstream nets
    net = Pipe(backbone_network,
               feature_converter=[gesture_classifier, met_value_converter])

    post_processors = [
        PostprocessClassificationOutput(INT2LAB, smoothing=8, indices=[0]),
        calorie_estimation.CalorieAccumulator(weight=weight,
                                              height=height,
                                              age=age,
                                              gender=gender,
                                              smoothing=12,
                                              indices=[1])
    ]

    display_ops = [
        sense.display.DisplayFPS(expected_camera_fps=net.fps,
                                 expected_inference_fps=net.fps /
                                 net.step_size),
        sense.display.DisplayTopKClassificationOutputs(top_k=1, threshold=0.5),
        sense.display.DisplayMETandCalories(y_offset=40),
    ]
    display_results = sense.display.DisplayResults(title=title,
                                                   display_ops=display_ops,
                                                   border_size_top=50,
                                                   display_fn=display_fn)

    # Run live inference
    controller = Controller(neural_network=net,
                            post_processors=post_processors,
                            results_display=display_results,
                            callbacks=[],
                            **kwargs)
    controller.run_inference()
    head_name = 'volleyball_counter' if counter else 'volleyball_classifier'
    INT2LAB = INT2LAB_COUNTING if counter else INT2LAB_CLASSIFICATION
    LAB2INT = LAB2INT_COUNTING if counter else LAB2INT_CLASSIFICATION

    # Load weights
    selected_config, weights = get_relevant_weights(SUPPORTED_MODEL_CONFIGURATIONS, requested_converter=head_name)

    # Load backbone network
    backbone_network = build_backbone_network(selected_config, weights['backbone'],
                                              weights_finetuned=weights[head_name])

    head = LogisticRegression(num_in=backbone_network.feature_dim,
                              num_out=len(INT2LAB))
    head.load_state_dict(weights[head_name])
    head.eval()

    # Concatenate backbone network and head
    net = Pipe(backbone_network, head)

    # Build post-processors
    post_processors = [
        PostprocessClassificationOutput(INT2LAB, smoothing=4),
    ]

    if counter:
        post_processors.extend([
            AggregatedPostProcessors(
                post_processors=[
                    EventCounter(key='forearm_passing_position_1',
                                 key_idx=LAB2INT['forearm_passing_position_1'],