def __init__(self, model_name='vgg', feature_extract=True, use_pretrained=True): super(ConvModelMultiTask, self).__init__() self.conv_base, input_size = initialize_model(model_name, feature_extract, 'utk', use_pretrained) self.conv_base self.output_age = nn.Linear(128, 1) self.output_gender = nn.Linear(128, 2) self.output_race = nn.Linear(128, 5)
def __init__(self, model_name='resnet', feature_extract=True, use_pretrained=True): super(PretrainedMT, self).__init__() self.conv_base, input_size = initialize_model( model_name, feature_extract, num_classes=None, task='utk', use_pretrained=use_pretrained) self.output_age = nn.Linear(128, 1) self.output_gender = nn.Linear(128, 2) self.output_race = nn.Linear(128, 5)
import sys sys.path.append('../../vision_utils') import torch.optim as optim from vision_utils.custom_torch_utils import initialize_model MODEL_NAME = 'vgg' FEATURE_EXTRACT = True NUM_CLASSES = 7 TASK = 'fer2013' USE_PRETRAINED = True my_model, input_size = initialize_model(model_name=MODEL_NAME, feature_extract=FEATURE_EXTRACT, num_classes=NUM_CLASSES, task=TASK, use_pretrained=USE_PRETRAINED) # Define the optimizer optimizer = optim.Adam(my_model.classifier[6].parameters(), lr=1e-3)
def __init__(self, model_name='resnet', feature_extract=True, num_classes=7, use_pretrained=True): super(ConvModel, self).__init__() self.model, input_size = initialize_model(model_name, feature_extract, num_classes, 'fer2013', use_pretrained) self.input_layer = nn.Conv2d(3, 3, 3, 1, padding=1)
def main(args, net): emotion_model = args.emotion_model_weight demogr_model = args.demogr_model_weight type_emotion_model = args.type_emotion_model type_demog_model = args.type_demog_model from_source = args.source source_file = args.file if from_source in ['image', 'video'] and source_file is None: raise ValueError( 'You must provide a path to an image/video in order to make predcitions from file' ) display_probs = True if args.display_probs == 'true' else False # load emotion detection model if type_emotion_model in ['resnet', 'vgg']: model_fer, _ = initialize_model(type_emotion_model, False, use_pretrained=False) transfer_learn = True else: model_fer = SepConvModel() transfer_learn = False model_fer.load_state_dict(torch.load(emotion_model, map_location='cpu')) # load age-race-gender prediction model if type_demog_model in ['resnet', 'vgg']: model_utk = PretrainedMT(type_demog_model, feature_extract=False, use_pretrained=False) else: model_utk = SepConvModelMT() model_utk.load_state_dict(torch.load(demogr_model, map_location='cpu')) # make prediction from an input image if from_source == 'image': frame = cv2.imread(source_file) frame = predict_from_frame(net, frame, model_utk, model_fer, transfer_learn, display_probs) parent, f_name = str( pathlib.Path(source_file).parent), pathlib.Path(source_file).name cv2.imwrite(os.path.join(parent, f_name + 'predicted.jpg'), frame) cv2.imshow('Face Detector', frame) key = cv2.waitKey(0) & 0xFF if key == ord("q"): cv2.destroyAllWindows() # make prediction from an input video or from camera stream else: if from_source == 'video': vs = cv2.VideoCapture(source_file) else: vs = cv2.VideoCapture(0) while vs.isOpened(): ret, frame = vs.read() frame = predict_from_frame(net, frame, model_utk, model_fer, transfer_learn, display_probs) cv2.imshow('Face Detector', frame) key = cv2.waitKey(1) & 0xFF if key == ord("q"): break vs.release() cv2.destroyAllWindows()
import torch.nn.functional as F from torchvision import transforms from vision_utils.custom_architectures import PretrainedMT from vision_utils.custom_torch_utils import initialize_model from emotion_detection.fer_data_utils import SkResize, HistEq, AddChannel, ToRGB import numpy as np # Load the trained model for age, gender and race classification saved_weight_utk = '../multitask_rag/checkpoints/resnet_model_21_val_loss=4.275671.pth' model_utk = PretrainedMT(model_name='resnet') model_utk.load_state_dict(torch.load(saved_weight_utk, map_location='cpu')) # load the trained model for emotion classification saved_weight_fer = '../emotion_detection/checkpoints/vgg_model_173_val_accuracy=0.6447478.pth' model_fer, _ = initialize_model('vgg', False, use_pretrained=False) model_fer.load_state_dict(torch.load(saved_weight_fer, map_location='cpu')) # load opencv resrnet base face detector path_caffe_model = '../binary_files/res10_300x300_ssd_iter_140000.caffemodel' path_proto = '../binary_files/deploy.prototxt.txt' net = cv2.dnn.readNetFromCaffe(path_proto, path_caffe_model) def preprocess_utk(image): transf = transforms.Compose([ transforms.ToPILImage(), transforms.Resize((224, 224)), transforms.ToTensor() ])