def create_mtcnn_net(self, p_model_path=None, r_model_path=None, o_model_path=None, use_cuda=True): dirname, _ = os.path.split(p_model_path) checkpoint = CheckPoint(dirname) pnet, rnet, onet = None, None, None self.device = torch.device( "cuda:0" if use_cuda and torch.cuda.is_available() else "cpu") if p_model_path is not None: pnet = PNet() pnet_model_state = checkpoint.load_model(p_model_path) pnet = checkpoint.load_state(pnet, pnet_model_state) if (use_cuda): pnet.to(self.device) pnet.eval() if r_model_path is not None: rnet = RNet() rnet_model_state = checkpoint.load_model(r_model_path) rnet = checkpoint.load_state(rnet, rnet_model_state) if (use_cuda): rnet.to(self.device) rnet.eval() if o_model_path is not None: onet = ONet() onet_model_state = checkpoint.load_model(o_model_path) onet = checkpoint.load_state(onet, onet_model_state) if (use_cuda): onet.to(self.device) onet.eval() return pnet, rnet, onet
def network_loader(p_model_path=None, r_model_path=None, o_model_path=None): p_network, r_network, o_network = None, None, None if p_model_path is not None: p_network = PNet() p_network.load_state_dict(torch.load(p_model_path)) p_network.eval() if r_model_path is not None: r_network = RNet() r_network.load_state_dict(torch.load(r_model_path)) r_network.eval() if o_model_path is not None: o_network = ONet() o_network.load_state_dict(torch.load(o_model_path)) o_network.eval() return p_network, r_network, o_network
def pnet_trainer(model_store_path, data, batch_size, learning_rate): network = PNet() loss = Loss() optimizer = torch.optim.Adam(network.parameters(), lr=learning_rate) adjusted_data = ImageLoader(data, 12, batch_size, shuffle=True) network.train() for epoch in range(10): adjusted_data.reset() for batch_index, (image, (label, bbox)) in enumerate(adjusted_data): im_tensor = [ convert_image_to_tensor(image[i, :, :, :]) for i in range(image.shape[0]) ] im_tensor = torch.stack(im_tensor) im_tensor = Variable(im_tensor).float() im_label = Variable(torch.from_numpy(label).float()) im_bbox = Variable(torch.from_numpy(bbox).float()) label_predictions, bbox_predictions = network(im_tensor) class_loss = loss.cls_loss(im_label, label_predictions) box_loss = loss.box_loss(im_label, im_bbox, bbox_predictions) total_loss = (class_loss * 1.0) + (box_loss * 0.5) if (batch_index % 100) == 0: accuracy = compute_accuracy(label_predictions, im_label) print( "%s : Epoch: %d, Step: %d, accuracy: %s, detection: %s, bbox_loss: %s, total_loss: %s" % (datetime.datetime.now(), epoch, batch_index, accuracy, class_loss, box_loss, total_loss)) optimizer.zero_grad() total_loss.backward() optimizer.step() torch.save(network.state_dict(), os.path.join(model_store_path, "pnet_epoch_%d.pt" % epoch)) torch.save( network, os.path.join(model_store_path, "pnet_epoch_model_%d.pkl" % epoch))
torch.manual_seed(config.manualSeed) torch.cuda.manual_seed(config.manualSeed) device = torch.device("cuda" if use_cuda else "cpu") torch.backends.cudnn.benchmark = True # Set dataloader kwargs = {'num_workers': config.nThreads, 'pin_memory': True} if use_cuda else {} transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ]) train_loader = torch.utils.data.DataLoader( FaceDataset(config.annoPath, transform=transform, is_train=True), batch_size=config.batchSize, shuffle=True, **kwargs) # Set model model = PNet() model = model.to(device) # parallel train if use_cuda and len(config.GPU.split(',')) > 1: model = torch.nn.DataParallel(model) # Set checkpoint checkpoint = CheckPoint(config.save_path) # Set optimizer optimizer = torch.optim.Adam(model.parameters(), lr=config.lr) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=config.step, gamma=0.1) # Set trainer # logger = Logger(config.save_path)
torch.manual_seed(config.manualSeed) torch.cuda.manual_seed(config.manualSeed) device = torch.device("cuda" if use_cuda else "cpu") torch.backends.cudnn.benchmark = True # Set dataloader kwargs = {'num_workers': config.nThreads, 'pin_memory': True} if use_cuda else {} transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ]) train_loader = torch.utils.data.DataLoader( FaceDataset(config.annoPath, transform=transform, is_train=True), batch_size=config.batchSize, shuffle=True, **kwargs) # Set model model = PNet() model = model.to(device) # Set checkpoint checkpoint = CheckPoint(config.save_path) # Set optimizer optimizer = torch.optim.Adam(model.parameters(), lr=config.lr) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=config.step, gamma=0.1) # Set trainer logger = Logger(config.save_path) trainer = PNetTrainer(config.lr, train_loader, model, optimizer, scheduler, logger, device) for epoch in range(1, config.nEpochs + 1): cls_loss_, box_offset_loss, total_loss, accuracy = trainer.train(epoch)
def __init__(self, max_crop=64): self.pnet = PNet().cuda() self.rnet = RNet().cuda() self.onet = ONet().cuda() self.max_crop = max_crop
class Mtcnn(): def __init__(self, max_crop=64): self.pnet = PNet().cuda() self.rnet = RNet().cuda() self.onet = ONet().cuda() self.max_crop = max_crop def __call__(self, x, training=False): if training is True: img, face, bbox, lm = x pnet_input, rnet_input, onet_input = self._get_input_tensor(img) # defien losses criterion_face = nn.NLLLoss() criterion_bbox = nn.MSELoss() criterion_lm = nn.MSELoss() # create torch tensors pnet_input = torch.FloatTensor(pnet_input).cuda() rnet_input = torch.FloatTensor(rnet_input).cuda() onet_input = torch.FloatTensor(onet_input).cuda() face = torch.LongTensor(face).cuda() bbox = torch.FloatTensor(bbox).cuda() lm = torch.FloatTensor(lm).cuda() # forward propagation p_face_preds, p_bbox_preds, p_lm_preds = self.pnet(pnet_input) r_face_preds, r_bbox_preds, r_lm_preds = self.rnet(rnet_input) o_face_preds, o_bbox_preds, o_lm_preds = self.onet(onet_input) # compute loss face_loss = criterion_face(p_face_preds, face) + \ criterion_face(r_face_preds, face) + \ criterion_face(o_face_preds, face) bbox_loss = criterion_bbox(p_bbox_preds[face == 1], bbox[face == 1]) + \ criterion_bbox(r_bbox_preds[face == 1], bbox[face == 1]) + \ criterion_bbox(o_bbox_preds[face == 1], bbox[face == 1]) lm_loss = criterion_lm(p_lm_preds[face == 1], lm[face == 1]) + \ criterion_lm(r_lm_preds[face == 1], lm[face == 1]) + \ criterion_lm(o_lm_preds[face == 1], lm[face == 1]) loss = face_loss + 0.5*bbox_loss + 0.5*lm_loss # compute accuracy p_equal = torch.argmax(p_face_preds, dim=1).view(*face.size()) == face r_equal = torch.argmax(r_face_preds, dim=1).view(*face.size()) == face o_equal = torch.argmax(o_face_preds, dim=1).view(*face.size()) == face p_face_acc = torch.mean(p_equal.type(torch.FloatTensor)) r_face_acc = torch.mean(r_equal.type(torch.FloatTensor)) o_face_acc = torch.mean(o_equal.type(torch.FloatTensor)) return loss, p_face_acc, r_face_acc, o_face_acc else: with torch.no_grad(): img = x # crop image pnet_input, rnet_input, onet_input, cache = self._crop(img, self.max_crop) # create torch tensors pnet_input = torch.FloatTensor(pnet_input.transpose(0, 3, 1, 2)).cuda() rnet_input = torch.FloatTensor(rnet_input.transpose(0, 3, 1, 2)).cuda() onet_input = torch.FloatTensor(onet_input.transpose(0, 3, 1, 2)).cuda() # forward prop for PNet p_face_preds, p_bbox_preds, p_lm_preds = self.pnet(pnet_input) rnet_input = rnet_input[torch.argmax(p_face_preds, dim=1).cpu().detach().numpy() == 1] onet_input = onet_input[torch.argmax(p_face_preds, dim=1).cpu().detach().numpy() == 1] cache = list(map(lambda elem: elem[torch.argmax(p_face_preds, dim=1).cpu().detach().numpy() == 1], cache)) # forward prop for RNet r_face_preds, r_bbox_preds, r_lm_preds = self.rnet(rnet_input) onet_input = onet_input[torch.argmax(r_face_preds, dim=1).cpu().detach().numpy() == 1] cache = list(map(lambda elem: elem[torch.argmax(r_face_preds, dim=1).cpu().detach().numpy() == 1], cache)) # forward prop for ONet o_face_preds, o_bbox_preds, o_lm_preds = self.onet(onet_input) return o_face_preds, o_bbox_preds, o_lm_preds, cache def parameters(self): params = [] params.extend(self.pnet.parameters()) params.extend(self.rnet.parameters()) params.extend(self.onet.parameters()) return params def save(self, path): state_dict = { "pnet": self.pnet.state_dict(), "rnet": self.rnet.state_dict(), "onet": self.onet.state_dict(), } torch.save(state_dict, path) print("MTCNN was saved.") def load(self, path): state_dict = torch.load(path) self.pnet.load_state_dict(state_dict["pnet"]) self.rnet.load_state_dict(state_dict["rnet"]) self.onet.load_state_dict(state_dict["onet"]) print("MTCNN was loaded.") def get_coord_transformed(self, face_preds, bbox_preds, lm_preds, cache): bbox_preds = bbox_preds[torch.argmax(face_preds, dim=1).cpu().detach().numpy() == 1] lm_preds = lm_preds[torch.argmax(face_preds, dim=1).cpu().detach().numpy() == 1] xs = cache[0][torch.argmax(face_preds, dim=1).cpu().detach().numpy() == 1] ys = cache[1][torch.argmax(face_preds, dim=1).cpu().detach().numpy() == 1] sizes = cache[2][torch.argmax(face_preds, dim=1).cpu().detach().numpy() == 1] transformed = [] for bbox, lm, x, y, size in zip(bbox_preds, lm_preds, xs, ys, sizes): bbox_x, bbox_y, bbox_w, bbox_h = bbox bbox_x = bbox_x * size + x bbox_y = bbox_y * size + y bbox_w = bbox_w * size bbox_h = bbox_h * size landmark = [] for i in range(0, 10, 2): lm_x, lm_y = lm[i], lm[i+1] lm_x = lm_x * size + x lm_y = lm_y * size + y landmark.extend([int(lm_x), int(lm_y)]) transformed.append([int(bbox_x), int(bbox_y), int(bbox_w), int(bbox_h), landmark]) return transformed def _get_input_tensor(self, imgs): n, c, h, w = imgs.shape cvimgs = imgs.transpose(0, 2, 3, 1) pnet_input = np.zeros((n, c, 12, 12)) rnet_input = np.zeros((n, c, 24, 24)) onet_input = np.zeros((n, c, 48, 48)) for i in range(n): pnet_input[i] = cv2.resize(cvimgs[i], dsize=(12, 12)).transpose(2, 0, 1) rnet_input[i] = cv2.resize(cvimgs[i], dsize=(24, 24)).transpose(2, 0, 1) onet_input[i] = cv2.resize(cvimgs[i], dsize=(48, 48)).transpose(2, 0, 1) return pnet_input, rnet_input, onet_input def _crop(self, img, max_crop): h, w, c = img.shape random_size = np.random.randint(24, min(h, w), size=max_crop) random_x = np.random.randint(0, min(h, w) - 24, size=max_crop) random_y = np.random.randint(0, min(h, w) - 24, size=max_crop) # filterring valid crop index_slice = np.ones(random_size.shape).astype(np.bool) index_slice = index_slice & (random_x + random_size <= w) index_slice = index_slice & (random_y + random_size <= h) random_size = random_size[index_slice] random_x = random_x[index_slice] random_y = random_y[index_slice] n = random_size.shape[0] pnet_input = np.zeros((n, 12, 12, 3)) rnet_input = np.zeros((n, 24, 24, 3)) onet_input = np.zeros((n, 48, 48, 3)) i = 0 for size, x, y in zip(random_size, random_x, random_y): cropped = img[y:y + size, x:x + size] pnet_input[i] = cv2.resize(cropped, dsize=(12, 12)) rnet_input[i] = cv2.resize(cropped, dsize=(24, 24)) onet_input[i] = cv2.resize(cropped, dsize=(48, 48)) i += 1 cache = [random_x, random_y, random_size] return pnet_input, rnet_input, onet_input, cache