Example #1
0
    def _load_net(self):
        # Load feature extractor
        feature_extractor = feature_extractors.StridedInflatedEfficientNet()
        checkpoint = engine.load_weights(self.feature_extractor_weights)
        feature_extractor.load_state_dict(checkpoint)
        feature_extractor.eval()

        # Load a logistic regression classifier
        gesture_classifier = LogisticRegression(
            num_in=feature_extractor.feature_dim, num_out=30)
        checkpoint = engine.load_weights(self.gesture_classifier_weights)
        gesture_classifier.load_state_dict(checkpoint)
        gesture_classifier.eval()

        # Combine both models
        self.net = Pipe(feature_extractor, gesture_classifier)
from realtimenet.downstream_tasks.gesture_recognition import INT2LAB
from realtimenet.downstream_tasks.nn_utils import Pipe, LogisticRegression
from realtimenet.downstream_tasks.postprocess import PostprocessClassificationOutput


if __name__ == "__main__":
    # Parse arguments
    args = docopt(__doc__)
    camera_id = args['--camera_id'] or 0
    path_in = args['--path_in'] or None
    path_out = args['--path_out'] or None
    title = args['--title'] or None
    use_gpu = args['--use_gpu']

    # Load feature extractor
    feature_extractor = feature_extractors.StridedInflatedEfficientNet()
    checkpoint = engine.load_weights('resources/backbone/strided_inflated_efficientnet.ckpt')
    feature_extractor.load_state_dict(checkpoint)
    feature_extractor.eval()

    # Load a logistic regression classifier
    gesture_classifier = LogisticRegression(num_in=feature_extractor.feature_dim,
                                            num_out=30)
    checkpoint = engine.load_weights('resources/gesture_detection/efficientnet_logistic_regression.ckpt')
    gesture_classifier.load_state_dict(checkpoint)
    gesture_classifier.eval()

    # Concatenate feature extractor and met converter
    net = Pipe(feature_extractor, gesture_classifier)

    # Create inference engine, video streaming and display instances