def run_action_recognition(model_name: str, model_version: str, title: Optional[str] = None, display_fn: Optional[Callable] = None, **kwargs): """ :param model_name: Model from backbone (StridedInflatedEfficientNet or StridedInflatedMobileNetV2). :param model_version: Model version (pro or lite) :param title: Title of the image frame on display. :param display_fn: Optional function to further process displayed image """ # Load weights selected_config, weights = get_relevant_weights( SUPPORTED_MODEL_CONFIGURATIONS, model_name, model_version) # Load backbone network backbone_network = build_backbone_network(selected_config, weights['backbone']) # Create a logistic regression classifier action_classifier = LogisticRegression(num_in=backbone_network.feature_dim, num_out=30) action_classifier.load_state_dict(weights['action_recognition']) action_classifier.eval() # Concatenate backbone network and logistic regression net = Pipe(backbone_network, action_classifier) postprocessor = [PostprocessClassificationOutput(INT2LAB, smoothing=4)] border_size = 30 display_ops = [ sense.display.DisplayFPS(expected_camera_fps=net.fps, expected_inference_fps=net.fps / net.step_size), sense.display.DisplayTopKClassificationOutputs(top_k=1, threshold=0.5), sense.display.DisplayClassnameOverlay( thresholds=LAB_THRESHOLDS, border_size_top=border_size if not title else border_size + 50), ] display_results = sense.display.DisplayResults(title=title, display_ops=display_ops, display_fn=display_fn) # Run live inference controller = Controller(neural_network=net, post_processors=postprocessor, results_display=display_results, callbacks=[], **kwargs) controller.run_inference()
checkpoint[key] = checkpoint_classifier.pop(key) feature_extractor.load_state_dict(checkpoint) feature_extractor.eval() print('[debug] net:', feature_extractor) with open(os.path.join(custom_classifier, 'label2int.json')) as file: class2int = json.load(file) INT2LAB = {value: key for key, value in class2int.items()} gesture_classifier = LogisticRegression( num_in=feature_extractor.feature_dim, num_out=len(INT2LAB)) gesture_classifier.load_state_dict(checkpoint_classifier) gesture_classifier.eval() print(gesture_classifier) # Concatenate feature extractor and met converter net = Pipe(feature_extractor, gesture_classifier) postprocessor = [PostprocessClassificationOutput(INT2LAB, smoothing=4)] display_ops = [ sense.display.DisplayFPS(expected_camera_fps=net.fps, expected_inference_fps=net.fps / net.step_size), sense.display.DisplayTopKClassificationOutputs(top_k=1, threshold=0.3), ] display_results = sense.display.DisplayResults(title=title, display_ops=display_ops) # Run live inference controller = Controller(neural_network=net, post_processors=postprocessor,
# Load weights selected_config, weights = get_relevant_weights( SUPPORTED_MODEL_CONFIGURATIONS, model_name, model_version) # Load backbone network backbone_network = build_backbone_network(selected_config, weights['backbone']) # Create a logistic regression classifier gesture_classifier = LogisticRegression( num_in=backbone_network.feature_dim, num_out=30) gesture_classifier.load_state_dict(weights['gesture_recognition']) gesture_classifier.eval() # Concatenate backbone network and logistic regression net = Pipe(backbone_network, gesture_classifier) postprocessor = [PostprocessClassificationOutput(INT2LAB, smoothing=4)] border_size = 30 display_ops = [ sense.display.DisplayFPS(expected_camera_fps=net.fps, expected_inference_fps=net.fps / net.step_size), sense.display.DisplayTopKClassificationOutputs(top_k=1, threshold=0.5), sense.display.DisplayClassnameOverlay( thresholds=LAB_THRESHOLDS, border_size=border_size if not title else border_size + 50), ] display_results = sense.display.DisplayResults(title=title,
def init_model(transform): use_gpu = True inference_engine = None neural_network = None postprocessors = None if transform == 'gesture': # Load feature extractor feature_extractor = feature_extractors.StridedInflatedEfficientNet() feature_extractor.load_weights_from_resources( 'backbone/strided_inflated_efficientnet.ckpt') feature_extractor.eval() # Load a logistic regression classifier gesture_classifier = LogisticRegression( num_in=feature_extractor.feature_dim, num_out=30) checkpoint = load_weights_from_resources( 'gesture_detection/efficientnet_logistic_regression.ckpt') gesture_classifier.load_state_dict(checkpoint) gesture_classifier.eval() # Concatenate feature extractor and met converter neural_network = Pipe(feature_extractor, gesture_classifier) postprocessors = [ PostprocessClassificationOutput(INT2LAB, smoothing=4) ] elif transform == 'fitness': weight = float(60) height = float(170) age = float(20) gender = 'female' # Load feature extractor feature_extractor = feature_extractors.StridedInflatedMobileNetV2() feature_extractor.load_weights_from_resources( 'backbone/strided_inflated_mobilenet.ckpt') feature_extractor.eval() # Load fitness activity classifier gesture_classifier = LogisticRegression( num_in=feature_extractor.feature_dim, num_out=81) checkpoint = load_weights_from_resources( 'fitness_activity_recognition/mobilenet_logistic_regression.ckpt') gesture_classifier.load_state_dict(checkpoint) gesture_classifier.eval() # Load MET value converter met_value_converter = calorie_estimation.METValueMLPConverter() checkpoint = load_weights_from_resources( 'calorie_estimation/mobilenet_features_met_converter.ckpt') met_value_converter.load_state_dict(checkpoint) met_value_converter.eval() # Concatenate feature extractor with downstream nets neural_network = Pipe( feature_extractor, feature_converter=[gesture_classifier, met_value_converter]) postprocessors = [ PostprocessClassificationOutput(INT2LAB, smoothing=8, indices=[0]), calorie_estimation.CalorieAccumulator(weight=weight, height=height, age=age, gender=gender, smoothing=12, indices=[1]) ] if neural_network is not None: inference_engine = InferenceEngine(neural_network, use_gpu=use_gpu) start_inference(inference_engine) return (inference_engine, postprocessors), None
# Load weights selected_config, weights = get_relevant_weights( SUPPORTED_MODEL_CONFIGURATIONS, model_name, model_version) # Load backbone network backbone_network = build_backbone_network(selected_config, weights['backbone']) # Load a logistic regression classifier rep_counter = LogisticRegression(num_in=backbone_network.feature_dim, num_out=5) rep_counter.load_state_dict(weights['rep_counter']) rep_counter.eval() # Concatenate backbone network and rep counter net = Pipe(backbone_network, rep_counter) postprocessor = [ AggregatedPostProcessors( post_processors=[ TwoPositionsCounter( pos0_idx=LAB2INT[ 'counting - jumping_jacks_position=arms_down'], pos1_idx=LAB2INT[ 'counting - jumping_jacks_position=arms_up'], threshold0=0.4, threshold1=0.4, out_key='Jumping Jacks', ), TwoPositionsCounter( pos0_idx=LAB2INT['counting - squat_position=high'],
# Load backbone network backbone_network = build_backbone_network(selected_config, weights['backbone']) # Create fitness activity classifier gesture_classifier = LogisticRegression(num_in=backbone_network.feature_dim, num_out=81) gesture_classifier.load_state_dict(weights['fitness_activity_recognition']) gesture_classifier.eval() # Create MET value converter met_value_converter = calorie_estimation.METValueMLPConverter() met_value_converter.load_state_dict(weights['met_converter']) met_value_converter.eval() # Concatenate backbone network with downstream nets net = Pipe(backbone_network, feature_converter=[gesture_classifier, met_value_converter]) post_processors = [ PostprocessClassificationOutput(INT2LAB, smoothing=8, indices=[0]), calorie_estimation.CalorieAccumulator(weight=weight, height=height, age=age, gender=gender, smoothing=12, indices=[1]) ] display_ops = [ sense.display.DisplayFPS(expected_camera_fps=net.fps, expected_inference_fps=net.fps / net.step_size),
def run_fitness_rep_counter(model_name: str, model_version: str, title: Optional[str] = None, display_fn: Optional[Callable] = None, **kwargs): """ :param model_name: Model from backbone (StridedInflatedEfficientNet or StridedInflatedMobileNetV2). :param model_version: Model version (pro or lite) :param title: Title of the image frame on display. :param display_fn: Optional function to further process displayed image """ # Load weights selected_config, weights = get_relevant_weights( SUPPORTED_MODEL_CONFIGURATIONS, model_name, model_version ) # Load backbone network backbone_network = build_backbone_network(selected_config, weights['backbone']) # Load a logistic regression classifier rep_counter = LogisticRegression(num_in=backbone_network.feature_dim, num_out=5) rep_counter.load_state_dict(weights['rep_counter']) rep_counter.eval() # Concatenate backbone network and rep counter net = Pipe(backbone_network, rep_counter) postprocessor = [ AggregatedPostProcessors( post_processors=[ TwoPositionsCounter( pos0_idx=LAB2INT['counting - jumping_jacks_position=arms_down'], pos1_idx=LAB2INT['counting - jumping_jacks_position=arms_up'], threshold0=0.4, threshold1=0.4, out_key='Jumping Jacks', ), TwoPositionsCounter( pos0_idx=LAB2INT['counting - squat_position=high'], pos1_idx=LAB2INT['counting - squat_position=low'], threshold0=0.4, threshold1=0.4, out_key='squats', ), ], out_key='counting', ), PostprocessClassificationOutput(INT2LAB, smoothing=1) ] display_ops = [ sense.display.DisplayFPS(expected_camera_fps=net.fps, expected_inference_fps=net.fps / net.step_size), sense.display.DisplayTopKClassificationOutputs(top_k=1, threshold=0.5), sense.display.DisplayExerciseRepCounts() ] display_results = sense.display.DisplayResults(title=title, display_ops=display_ops, border_size_top=100, display_fn=display_fn) # Run live inference controller = Controller( neural_network=net, post_processors=postprocessor, results_display=display_results, callbacks=[], **kwargs ) controller.run_inference()
LAB2INT = LAB2INT_COUNTING if counter else LAB2INT_CLASSIFICATION # Load weights selected_config, weights = get_relevant_weights(SUPPORTED_MODEL_CONFIGURATIONS, requested_converter=head_name) # Load backbone network backbone_network = build_backbone_network(selected_config, weights['backbone'], weights_finetuned=weights[head_name]) head = LogisticRegression(num_in=backbone_network.feature_dim, num_out=len(INT2LAB)) head.load_state_dict(weights[head_name]) head.eval() # Concatenate backbone network and head net = Pipe(backbone_network, head) # Build post-processors post_processors = [ PostprocessClassificationOutput(INT2LAB, smoothing=4), ] if counter: post_processors.extend([ AggregatedPostProcessors( post_processors=[ EventCounter(key='forearm_passing_position_1', key_idx=LAB2INT['forearm_passing_position_1'], threshold=0.1, out_key='forearm passes'), EventCounter(key='overhead_passing_position_1',
# Load weights selected_config, weights = get_relevant_weights( SUPPORTED_MODEL_CONFIGURATIONS, model_name, model_version) # Load backbone network backbone_network = build_backbone_network(selected_config, weights['backbone']) # Load MET value converter met_value_converter = calorie_estimation.METValueMLPConverter() met_value_converter.load_state_dict(weights['met_converter']) met_value_converter.eval() # Concatenate backbone network and met converter net = Pipe(backbone_network, met_value_converter) post_processors = [ calorie_estimation.CalorieAccumulator(weight=weight, height=height, age=age, gender=gender, smoothing=12) ] display_ops = [ sense.display.DisplayFPS(expected_camera_fps=net.fps, expected_inference_fps=net.fps / net.step_size), sense.display.DisplayDetailedMETandCalories(), ]
def run_custom_classifier(custom_classifier, camera_id=0, path_in=None, path_out=None, title=None, use_gpu=True, display_fn=None, stop_event=None): # Load backbone network according to config file backbone_model_config, backbone_weights = load_backbone_model_from_config( custom_classifier) try: # Load custom classifier checkpoint_classifier = torch.load( os.path.join(custom_classifier, 'best_classifier.checkpoint')) except FileNotFoundError: msg = ( "Error: No such file or directory: 'best_classifier.checkpoint'\n" "Hint: Provide path to 'custom_classifier'.\n") if display_fn: display_fn(msg) else: print(msg) return None # Update original weights in case some intermediate layers have been finetuned update_backbone_weights(backbone_weights, checkpoint_classifier) # Create backbone network backbone_network = build_backbone_network(backbone_model_config, backbone_weights) with open(os.path.join(custom_classifier, 'label2int.json')) as file: class2int = json.load(file) INT2LAB = {value: key for key, value in class2int.items()} gesture_classifier = LogisticRegression( num_in=backbone_network.feature_dim, num_out=len(INT2LAB)) gesture_classifier.load_state_dict(checkpoint_classifier) gesture_classifier.eval() # Concatenate feature extractor and met converter net = Pipe(backbone_network, gesture_classifier) postprocessor = [PostprocessClassificationOutput(INT2LAB, smoothing=4)] display_ops = [ sense.display.DisplayFPS(expected_camera_fps=net.fps, expected_inference_fps=net.fps / net.step_size), sense.display.DisplayTopKClassificationOutputs(top_k=1, threshold=0.1), ] display_results = sense.display.DisplayResults(title=title, display_ops=display_ops, display_fn=display_fn) # Run live inference controller = Controller( neural_network=net, post_processors=postprocessor, results_display=display_results, callbacks=[], camera_id=camera_id, path_in=path_in, path_out=path_out, use_gpu=use_gpu, stop_event=stop_event, ) controller.run_inference()
def run_calorie_estimation(model_name: str, model_version: str, weight: Optional[float] = 70.0, height: Optional[float] = 170.0, age: float = 30.0, gender: Optional[str] = None, title: Optional[str] = None, display_fn: Optional[Callable] = None, **kwargs): """ :param model_name: Model from backbone (StridedInflatedEfficientNet or StridedInflatedMobileNetV2). :param model_version: Model version (pro or lite) :param weight: Weight (in kilograms). Will be used to convert MET values to calories. Default to 70. :param height: Height (in centimeters). Will be used to convert MET values to calories. Default to 170. :param age: Age (in years). Will be used to convert MET values to calories. Default to 30. :param gender: Gender ("male" or "female" or "other"). Will be used to convert MET values to calories :param title: Title of the image frame on display. :param display_fn: Optional function to further process displayed image """ # Load weights selected_config, weights = get_relevant_weights( SUPPORTED_MODEL_CONFIGURATIONS, model_name, model_version ) # Load backbone network backbone_network = build_backbone_network(selected_config, weights['backbone']) # Load MET value converter met_value_converter = calorie_estimation.METValueMLPConverter() met_value_converter.load_state_dict(weights['met_converter']) met_value_converter.eval() # Concatenate backbone network and met converter net = Pipe(backbone_network, met_value_converter) post_processors = [ calorie_estimation.CalorieAccumulator(weight=weight, height=height, age=age, gender=gender, smoothing=12) ] display_ops = [ sense.display.DisplayFPS(expected_camera_fps=net.fps, expected_inference_fps=net.fps / net.step_size), sense.display.DisplayDetailedMETandCalories(), ] display_results = sense.display.DisplayResults(title=title, display_ops=display_ops, display_fn=display_fn) # Run live inference controller = Controller( neural_network=net, post_processors=post_processors, results_display=display_results, callbacks=[], **kwargs ) controller.run_inference()
num_in=feature_extractor.feature_dim, num_out=81) checkpoint = engine.load_weights( 'resources/fitness_activity_recognition/mobilenet_logistic_regression.ckpt' ) gesture_classifier.load_state_dict(checkpoint) gesture_classifier.eval() # Load MET value converter met_value_converter = calorie_estimation.METValueMLPConverter() checkpoint = torch.load( 'resources/calorie_estimation/mobilenet_features_met_converter.ckpt') met_value_converter.load_state_dict(checkpoint) met_value_converter.eval() # Concatenate feature extractor with downstream nets net = Pipe(feature_extractor, feature_converter=[gesture_classifier, met_value_converter]) post_processors = [ PostprocessClassificationOutput(INT2LAB, smoothing=8, indices=[0]), calorie_estimation.CalorieAccumulator(weight=weight, height=height, age=age, gender=gender, smoothing=12, indices=[1]) ] display_ops = [ sense.display.DisplayFPS(expected_camera_fps=net.fps, expected_inference_fps=net.fps / net.step_size),
def run_gesture_control(model_name: str, model_version: str, title: Optional[str] = None, display_fn: Optional[Callable] = None, **kwargs): """ :param model_name: Model from backbone (StridedInflatedEfficientNet or StridedInflatedMobileNetV2). :param model_version: Model version (pro or lite) :param title: Title of the image frame on display. :param display_fn: Optional function to further process displayed image """ # Load weights selected_config, weights = get_relevant_weights( SUPPORTED_MODEL_CONFIGURATIONS, model_name, model_version) # Load backbone network backbone_network = build_backbone_network( selected_config, weights['backbone'], weights_finetuned=weights['gesture_control']) # Create a logistic regression classifier gesture_classifier = LogisticRegression( num_in=backbone_network.feature_dim, num_out=len(INT2LAB)) gesture_classifier.load_state_dict(weights['gesture_control']) gesture_classifier.eval() # Concatenate backbone network and logistic regression net = Pipe(backbone_network, gesture_classifier) postprocessor = [ PostprocessClassificationOutput(INT2LAB, smoothing=1), AggregatedPostProcessors( post_processors=[ EventCounter(key, LAB2INT[key], LAB_THRESHOLDS[key]) for key in ENABLED_LABELS ], out_key='counting', ), ] border_size_top = 0 border_size_right = 500 display_ops = [ sense.display.DisplayFPS(expected_camera_fps=net.fps, expected_inference_fps=net.fps / net.step_size), sense.display.DisplayClassnameOverlay( thresholds=LAB_THRESHOLDS, duration=1, border_size_top=border_size_top if not title else border_size_top + 50, border_size_right=border_size_right), sense.display.DisplayPredictionBarGraph(ENABLED_LABELS, LAB_THRESHOLDS, x_offset=900, y_offset=100, display_counts=True) ] display_results = sense.display.DisplayResults( title=title, display_ops=display_ops, border_size_top=border_size_top, border_size_right=border_size_right, display_fn=display_fn) # Run live inference controller = Controller(neural_network=net, post_processors=postprocessor, results_display=display_results, callbacks=[], **kwargs) controller.run_inference()
def run_fitness_tracker(model_name: str, model_version: str, weight: Optional[float] = 70.0, height: Optional[float] = 170.0, age: float = 30.0, gender: Optional[str] = None, title: Optional[str] = None, display_fn: Optional[Callable] = None, **kwargs): """ :param model_name: Model from backbone (StridedInflatedEfficientNet or StridedInflatedMobileNetV2). :param model_version: Model version (pro or lite) :param weight: Weight (in kilograms). Will be used to convert MET values to calories. Default to 70. :param height: Height (in centimeters). Will be used to convert MET values to calories. Default to 170. :param age: Age (in years). Will be used to convert MET values to calories. Default to 30. :param gender: Gender ("male" or "female" or "other"). Will be used to convert MET values to calories :param title: Title of the image frame on display. :param display_fn: Optional function to further process displayed image """ # Load weights selected_config, weights = get_relevant_weights( SUPPORTED_MODEL_CONFIGURATIONS, model_name, model_version) # Load backbone network backbone_network = build_backbone_network(selected_config, weights['backbone']) # Create fitness activity classifier gesture_classifier = LogisticRegression( num_in=backbone_network.feature_dim, num_out=81) gesture_classifier.load_state_dict(weights['fitness_activity_recognition']) gesture_classifier.eval() # Create MET value converter met_value_converter = calorie_estimation.METValueMLPConverter() met_value_converter.load_state_dict(weights['met_converter']) met_value_converter.eval() # Concatenate backbone network with downstream nets net = Pipe(backbone_network, feature_converter=[gesture_classifier, met_value_converter]) post_processors = [ PostprocessClassificationOutput(INT2LAB, smoothing=8, indices=[0]), calorie_estimation.CalorieAccumulator(weight=weight, height=height, age=age, gender=gender, smoothing=12, indices=[1]) ] display_ops = [ sense.display.DisplayFPS(expected_camera_fps=net.fps, expected_inference_fps=net.fps / net.step_size), sense.display.DisplayTopKClassificationOutputs(top_k=1, threshold=0.5), sense.display.DisplayMETandCalories(y_offset=40), ] display_results = sense.display.DisplayResults(title=title, display_ops=display_ops, border_size_top=50, display_fn=display_fn) # Run live inference controller = Controller(neural_network=net, post_processors=post_processors, results_display=display_results, callbacks=[], **kwargs) controller.run_inference()
# modeify the network to generate the training network on top of the features if temporal_training: num_output = len(label_counting) else: num_output = len(label_names) # modify the network to generate the training network on top of the features gesture_classifier = LogisticRegression( num_in=feature_extractor.feature_dim, num_out=num_output, use_softmax=False) if num_layers_to_finetune > 0: # remove internal padding for training fine_tuned_layers.apply(set_internal_padding_false) net = Pipe(fine_tuned_layers, gesture_classifier) else: net = gesture_classifier net.train() if use_gpu: net = net.cuda() lr_schedule = {0: 0.0001, 40: 0.00001} num_epochs = 80 best_model_state_dict = training_loops( net, train_loader, valid_loader, use_gpu, num_epochs,
def train_model(path_in, path_out, model_name, model_version, num_layers_to_finetune, epochs, use_gpu=True, overwrite=True, temporal_training=None, resume=False, log_fn=print, confmat_event=None): os.makedirs(path_out, exist_ok=True) # Check for existing files saved_files = [ "last_classifier.checkpoint", "best_classifier.checkpoint", "config.json", "label2int.json", "confusion_matrix.png", "confusion_matrix.npy" ] if not overwrite and any( os.path.exists(os.path.join(path_out, file)) for file in saved_files): print(f"Warning: This operation will overwrite files in {path_out}") while True: confirmation = input( "Are you sure? Add --overwrite to hide this warning. (Y/N) ") if confirmation.lower() == "y": break elif confirmation.lower() == "n": sys.exit() else: print('Invalid input') # Load weights selected_config, weights = get_relevant_weights( SUPPORTED_MODEL_CONFIGURATIONS, model_name, model_version, log_fn, ) backbone_weights = weights['backbone'] if resume: # Load the last classifier checkpoint_classifier = torch.load( os.path.join(path_out, 'last_classifier.checkpoint')) # Update original weights in case some intermediate layers have been finetuned update_backbone_weights(backbone_weights, checkpoint_classifier) # Load backbone network backbone_network = build_backbone_network(selected_config, backbone_weights) # Get the required temporal dimension of feature tensors in order to # finetune the provided number of layers if num_layers_to_finetune > 0: num_timesteps = backbone_network.num_required_frames_per_layer.get( -num_layers_to_finetune) if not num_timesteps: # Remove 1 because we added 0 to temporal_dependencies num_layers = len( backbone_network.num_required_frames_per_layer) - 1 msg = (f'ERROR - Num of layers to finetune not compatible. ' f'Must be an integer between 0 and {num_layers}') log_fn(msg) raise IndexError(msg) else: num_timesteps = 1 # Extract layers to finetune if num_layers_to_finetune > 0: fine_tuned_layers = backbone_network.cnn[-num_layers_to_finetune:] backbone_network.cnn = backbone_network.cnn[0:-num_layers_to_finetune] # finetune the model extract_features(path_in, selected_config, backbone_network, num_layers_to_finetune, use_gpu, num_timesteps=num_timesteps, log_fn=log_fn) # Find label names label_names = os.listdir(directories.get_videos_dir(path_in, 'train')) label_names = [x for x in label_names if not x.startswith('.')] label_names_temporal = ['background'] project_config = load_project_config(path_in) if project_config: for temporal_tags in project_config['classes'].values(): label_names_temporal.extend(temporal_tags) else: for label in label_names: label_names_temporal.extend([f'{label}_tag1', f'{label}_tag2']) label_names_temporal = sorted(set(label_names_temporal)) label2int_temporal_annotation = { name: index for index, name in enumerate(label_names_temporal) } label2int = {name: index for index, name in enumerate(label_names)} extractor_stride = backbone_network.num_required_frames_per_layer_padding[ 0] # Create the data loaders features_dir = directories.get_features_dir(path_in, 'train', selected_config, num_layers_to_finetune) tags_dir = directories.get_tags_dir(path_in, 'train') train_loader = generate_data_loader( project_config, features_dir, tags_dir, label_names, label2int, label2int_temporal_annotation, num_timesteps=num_timesteps, stride=extractor_stride, temporal_annotation_only=temporal_training, ) features_dir = directories.get_features_dir(path_in, 'valid', selected_config, num_layers_to_finetune) tags_dir = directories.get_tags_dir(path_in, 'valid') valid_loader = generate_data_loader( project_config, features_dir, tags_dir, label_names, label2int, label2int_temporal_annotation, num_timesteps=None, batch_size=1, shuffle=False, stride=extractor_stride, temporal_annotation_only=temporal_training, ) # Check if the data is loaded fully if not train_loader or not valid_loader: log_fn( "ERROR - \n " "\tMissing annotations for train or valid set.\n" "\tHint: Check if tags_train and tags_valid directories exist.\n") return # Modify the network to generate the training network on top of the features if temporal_training: num_output = len(label_names_temporal) else: num_output = len(label_names) # modify the network to generate the training network on top of the features gesture_classifier = LogisticRegression( num_in=backbone_network.feature_dim, num_out=num_output, use_softmax=False) if resume: gesture_classifier.load_state_dict(checkpoint_classifier) if num_layers_to_finetune > 0: # remove internal padding for training fine_tuned_layers.apply(set_internal_padding_false) net = Pipe(fine_tuned_layers, gesture_classifier) else: net = gesture_classifier net.train() if use_gpu: net = net.cuda() lr_schedule = { 0: 0.0001, int(epochs / 2): 0.00001 } if epochs > 1 else { 0: 0.0001 } num_epochs = epochs # Save training config and label2int dictionary config = { 'backbone_name': selected_config.model_name, 'backbone_version': selected_config.version, 'num_layers_to_finetune': num_layers_to_finetune, 'classifier': str(gesture_classifier), 'temporal_training': temporal_training, 'lr_schedule': lr_schedule, 'num_epochs': num_epochs, 'start_time': str(datetime.datetime.now()), 'end_time': '', } with open(os.path.join(path_out, 'config.json'), 'w') as f: json.dump(config, f, indent=2) with open(os.path.join(path_out, 'label2int.json'), 'w') as f: json.dump( label2int_temporal_annotation if temporal_training else label2int, f, indent=2) # Train model best_model_state_dict = training_loops( net, train_loader, valid_loader, use_gpu, num_epochs, lr_schedule, label_names, path_out, temporal_annotation_training=temporal_training, log_fn=log_fn, confmat_event=confmat_event) # Save best model if isinstance(net, Pipe): best_model_state_dict = { clean_pipe_state_dict_key(key): value for key, value in best_model_state_dict.items() } torch.save(best_model_state_dict, os.path.join(path_out, "best_classifier.checkpoint")) config['end_time'] = str(datetime.datetime.now()) with open(os.path.join(path_out, 'config.json'), 'w') as f: json.dump(config, f, indent=2)
# Load feature extractor feature_extractor = feature_extractors.StridedInflatedMobileNetV2() feature_extractor.load_weights_from_resources( 'backbone/strided_inflated_mobilenet.ckpt') feature_extractor.eval() # Load MET value converter met_value_converter = calorie_estimation.METValueMLPConverter() checkpoint = load_weights_from_resources( 'calorie_estimation/mobilenet_features_met_converter.ckpt') met_value_converter.load_state_dict(checkpoint) met_value_converter.eval() # Concatenate feature extractor and met converter net = Pipe(feature_extractor, met_value_converter) post_processors = [ calorie_estimation.CalorieAccumulator(weight=weight, height=height, age=age, gender=gender, smoothing=12) ] display_ops = [ sense.display.DisplayFPS(expected_camera_fps=net.fps, expected_inference_fps=net.fps / net.step_size), sense.display.DisplayDetailedMETandCalories(), ]