Exemple #1
0
    def create_mtcnn_net(self, p_model_path=None, r_model_path=None, o_model_path=None, use_cuda=True):
        dirname, _ = os.path.split(p_model_path)
        checkpoint = CheckPoint(dirname)

        pnet, rnet, onet = None, None, None
        self.device = torch.device(
            "cuda:0" if use_cuda and torch.cuda.is_available() else "cpu")

        if p_model_path is not None:
            pnet = PNet()
            pnet_model_state = checkpoint.load_model(p_model_path)
            pnet = checkpoint.load_state(pnet, pnet_model_state)
            if (use_cuda):
                pnet.to(self.device)
            pnet.eval()

        if r_model_path is not None:
            rnet = RNet()
            rnet_model_state = checkpoint.load_model(r_model_path)
            rnet = checkpoint.load_state(rnet, rnet_model_state)
            if (use_cuda):
                rnet.to(self.device)
            rnet.eval()

        if o_model_path is not None:
            onet = ONet()
            onet_model_state = checkpoint.load_model(o_model_path)
            onet = checkpoint.load_state(onet, onet_model_state)
            if (use_cuda):
                onet.to(self.device)
            onet.eval()

        return pnet, rnet, onet
Exemple #2
0
def rnet_trainer(model_store_path, data, batch_size, learning_rate):
    network = RNet()
    loss = Loss()
    optimizer = torch.optim.Adam(network.parameters(), lr=learning_rate)
    adjusted_data = ImageLoader(data, 24, batch_size, shuffle=True)
    network.train()

    for epoch in range(10):
        adjusted_data.reset()

        for batch_index, (image, (label, bbox)) in enumerate(adjusted_data):
            try:
                im_tensor = [
                    convert_image_to_tensor(image[i, :, :, :])
                    for i in range(image.shape[0])
                ]
            except:
                continue

            im_tensor = torch.stack(im_tensor)

            im_tensor = Variable(im_tensor).float()
            im_label = Variable(torch.from_numpy(label).float())
            im_bbox = Variable(torch.from_numpy(bbox).float())

            label_predictions, bbox_predictions = network(im_tensor)

            class_loss = loss.cls_loss(im_label, label_predictions)
            box_loss = loss.box_loss(im_label, im_bbox, bbox_predictions)

            total_loss = (class_loss * 1.0) + (box_loss * 0.5)

            if (batch_index % 100) == 0:
                accuracy = compute_accuracy(label_predictions, im_label)
                print(
                    "%s : Epoch: %d, Step: %d, accuracy: %s, detection: %s, bbox_loss: %s, total_loss: %s"
                    % (datetime.datetime.now(), epoch, batch_index, accuracy,
                       class_loss, box_loss, total_loss))

            optimizer.zero_grad()
            total_loss.backward()
            optimizer.step()

        torch.save(network.state_dict(),
                   os.path.join(model_store_path, "rnet_epoch_%d.pt" % epoch))
        torch.save(
            network,
            os.path.join(model_store_path, "rnet_epoch_model_%d.pkl" % epoch))
Exemple #3
0
def network_loader(p_model_path=None, r_model_path=None, o_model_path=None):
    p_network, r_network, o_network = None, None, None
    if p_model_path is not None:
        p_network = PNet()
        p_network.load_state_dict(torch.load(p_model_path))
        p_network.eval()

    if r_model_path is not None:
        r_network = RNet()
        r_network.load_state_dict(torch.load(r_model_path))
        r_network.eval()

    if o_model_path is not None:
        o_network = ONet()
        o_network.load_state_dict(torch.load(o_model_path))
        o_network.eval()

    return p_network, r_network, o_network
Exemple #4
0
    'num_workers': config.nThreads,
    'pin_memory': True
} if use_cuda else {}
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])
train_loader = torch.utils.data.DataLoader(FaceDataset(config.annoPath,
                                                       transform=transform,
                                                       is_train=True),
                                           batch_size=config.batchSize,
                                           shuffle=True,
                                           **kwargs)

# Set model
model = RNet()
model = model.to(device)

# Set checkpoint
checkpoint = CheckPoint(config.save_path)

# Set optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=config.lr)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                 milestones=config.step,
                                                 gamma=0.1)

# Set trainer
logger = Logger(config.save_path)
trainer = RNetTrainer(config.lr, train_loader, model, optimizer, scheduler,
                      logger, device)
Exemple #5
0
torch.manual_seed(config.manualSeed)
torch.cuda.manual_seed(config.manualSeed)
device = torch.device("cuda:0" if use_cuda else "cpu")
torch.backends.cudnn.benchmark = True

# Set dataloader
kwargs = {'num_workers': config.nThreads, 'pin_memory': True} if use_cuda else {}
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])
train_loader = torch.utils.data.DataLoader(
    FaceDataset(config.annoPath, transform=transform, is_train=True), batch_size=config.batchSize, shuffle=True, **kwargs)

# Set model
model = RNet()
model = model.to(device)

# Set checkpoint
checkpoint = CheckPoint(config.save_path)

# Set optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=config.lr)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=config.step, gamma=0.1)

# Set trainer
logger = Logger(config.save_path)
trainer = RNetTrainer(config.lr, train_loader, model, optimizer, scheduler, logger, device)

for epoch in range(1, config.nEpochs + 1):
    cls_loss_, box_offset_loss, total_loss, accuracy = trainer.train(epoch)
Exemple #6
0
    'num_workers': train_config.nThreads,
    'pin_memory': True
} if use_cuda else {}
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])
train_loader = torch.utils.data.DataLoader(FaceDataset(train_config.annoPath,
                                                       transform=transform,
                                                       is_train=True),
                                           batch_size=train_config.batchSize,
                                           shuffle=True,
                                           **kwargs)

# Set model
model = RNet(num_landmarks=config.NUM_LANDMARKS)
model = model.to(device)

# Set checkpoint
checkpoint = CheckPoint(train_config.save_path)

# Set optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=train_config.lr)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                 milestones=train_config.step,
                                                 gamma=0.1)

# Set trainer
logger = Logger(train_config.save_path)
trainer = RNetTrainer(train_config.lr, train_loader, model, optimizer,
                      scheduler, logger, device)
Exemple #7
0
    def __init__(self, max_crop=64):
        self.pnet = PNet().cuda()
        self.rnet = RNet().cuda()
        self.onet = ONet().cuda()

        self.max_crop = max_crop
Exemple #8
0
class Mtcnn():

    def __init__(self, max_crop=64):
        self.pnet = PNet().cuda()
        self.rnet = RNet().cuda()
        self.onet = ONet().cuda()

        self.max_crop = max_crop

    def __call__(self, x, training=False):
        if training is True:
            img, face, bbox, lm = x

            pnet_input, rnet_input, onet_input = self._get_input_tensor(img)

            # defien losses
            criterion_face = nn.NLLLoss()
            criterion_bbox = nn.MSELoss()
            criterion_lm = nn.MSELoss()

            # create torch tensors
            pnet_input = torch.FloatTensor(pnet_input).cuda()
            rnet_input = torch.FloatTensor(rnet_input).cuda()
            onet_input = torch.FloatTensor(onet_input).cuda()
            face = torch.LongTensor(face).cuda()
            bbox = torch.FloatTensor(bbox).cuda()
            lm = torch.FloatTensor(lm).cuda()

            # forward propagation
            p_face_preds, p_bbox_preds, p_lm_preds = self.pnet(pnet_input)
            r_face_preds, r_bbox_preds, r_lm_preds = self.rnet(rnet_input)
            o_face_preds, o_bbox_preds, o_lm_preds = self.onet(onet_input)

            # compute loss
            face_loss = criterion_face(p_face_preds, face) + \
                        criterion_face(r_face_preds, face) + \
                        criterion_face(o_face_preds, face)

            bbox_loss = criterion_bbox(p_bbox_preds[face == 1], bbox[face == 1]) + \
                        criterion_bbox(r_bbox_preds[face == 1], bbox[face == 1]) + \
                        criterion_bbox(o_bbox_preds[face == 1], bbox[face == 1])

            lm_loss = criterion_lm(p_lm_preds[face == 1], lm[face == 1]) + \
                      criterion_lm(r_lm_preds[face == 1], lm[face == 1]) + \
                      criterion_lm(o_lm_preds[face == 1], lm[face == 1])

            loss = face_loss + 0.5*bbox_loss + 0.5*lm_loss

            # compute accuracy
            p_equal = torch.argmax(p_face_preds, dim=1).view(*face.size()) == face
            r_equal = torch.argmax(r_face_preds, dim=1).view(*face.size()) == face
            o_equal = torch.argmax(o_face_preds, dim=1).view(*face.size()) == face

            p_face_acc = torch.mean(p_equal.type(torch.FloatTensor))
            r_face_acc = torch.mean(r_equal.type(torch.FloatTensor))
            o_face_acc = torch.mean(o_equal.type(torch.FloatTensor))

            return loss, p_face_acc, r_face_acc, o_face_acc

        else:
            with torch.no_grad():
                img = x

                # crop image
                pnet_input, rnet_input, onet_input, cache = self._crop(img, self.max_crop)

                # create torch tensors
                pnet_input = torch.FloatTensor(pnet_input.transpose(0, 3, 1, 2)).cuda()
                rnet_input = torch.FloatTensor(rnet_input.transpose(0, 3, 1, 2)).cuda()
                onet_input = torch.FloatTensor(onet_input.transpose(0, 3, 1, 2)).cuda()

                # forward prop for PNet
                p_face_preds, p_bbox_preds, p_lm_preds = self.pnet(pnet_input)
                
                rnet_input = rnet_input[torch.argmax(p_face_preds, dim=1).cpu().detach().numpy() == 1]
                onet_input = onet_input[torch.argmax(p_face_preds, dim=1).cpu().detach().numpy() == 1]
                cache = list(map(lambda elem: elem[torch.argmax(p_face_preds, dim=1).cpu().detach().numpy() == 1], cache))

                # forward prop for RNet
                r_face_preds, r_bbox_preds, r_lm_preds = self.rnet(rnet_input)
                
                onet_input = onet_input[torch.argmax(r_face_preds, dim=1).cpu().detach().numpy() == 1]
                cache = list(map(lambda elem: elem[torch.argmax(r_face_preds, dim=1).cpu().detach().numpy() == 1], cache))

                # forward prop for ONet
                o_face_preds, o_bbox_preds, o_lm_preds = self.onet(onet_input)

                return o_face_preds, o_bbox_preds, o_lm_preds, cache

    def parameters(self):
        params = []
        
        params.extend(self.pnet.parameters())
        params.extend(self.rnet.parameters())
        params.extend(self.onet.parameters())

        return params

    def save(self, path):
        state_dict = {
            "pnet": self.pnet.state_dict(),
            "rnet": self.rnet.state_dict(),
            "onet": self.onet.state_dict(),
        }

        torch.save(state_dict, path)
        print("MTCNN was saved.")

    def load(self, path):
        state_dict = torch.load(path)

        self.pnet.load_state_dict(state_dict["pnet"])
        self.rnet.load_state_dict(state_dict["rnet"])
        self.onet.load_state_dict(state_dict["onet"])

        print("MTCNN was loaded.")

    def get_coord_transformed(self, face_preds, bbox_preds, lm_preds, cache):
        bbox_preds = bbox_preds[torch.argmax(face_preds, dim=1).cpu().detach().numpy() == 1]
        lm_preds = lm_preds[torch.argmax(face_preds, dim=1).cpu().detach().numpy() == 1]
        xs = cache[0][torch.argmax(face_preds, dim=1).cpu().detach().numpy() == 1]
        ys = cache[1][torch.argmax(face_preds, dim=1).cpu().detach().numpy() == 1]
        sizes = cache[2][torch.argmax(face_preds, dim=1).cpu().detach().numpy() == 1]

        transformed = []

        for bbox, lm, x, y, size in zip(bbox_preds, lm_preds, xs, ys, sizes):
            bbox_x, bbox_y, bbox_w, bbox_h = bbox
            bbox_x = bbox_x * size + x
            bbox_y = bbox_y * size + y
            bbox_w = bbox_w * size
            bbox_h = bbox_h * size

            landmark = []
            for i in range(0, 10, 2):
                lm_x, lm_y = lm[i], lm[i+1]
                lm_x = lm_x * size + x
                lm_y = lm_y * size + y

                landmark.extend([int(lm_x), int(lm_y)])

            transformed.append([int(bbox_x), int(bbox_y), int(bbox_w), int(bbox_h), landmark])

        return transformed

    def _get_input_tensor(self, imgs):
        n, c, h, w = imgs.shape

        cvimgs = imgs.transpose(0, 2, 3, 1)

        pnet_input = np.zeros((n, c, 12, 12))
        rnet_input = np.zeros((n, c, 24, 24))
        onet_input = np.zeros((n, c, 48, 48))

        for i in range(n):
            pnet_input[i] = cv2.resize(cvimgs[i], dsize=(12, 12)).transpose(2, 0, 1)
            rnet_input[i] = cv2.resize(cvimgs[i], dsize=(24, 24)).transpose(2, 0, 1)
            onet_input[i] = cv2.resize(cvimgs[i], dsize=(48, 48)).transpose(2, 0, 1)

        return pnet_input, rnet_input, onet_input

    def _crop(self, img, max_crop):
        h, w, c = img.shape

        random_size = np.random.randint(24, min(h, w), size=max_crop)
        random_x = np.random.randint(0, min(h, w) - 24, size=max_crop)
        random_y = np.random.randint(0, min(h, w) - 24, size=max_crop)

        # filterring valid crop
        index_slice = np.ones(random_size.shape).astype(np.bool)
        index_slice = index_slice & (random_x + random_size <= w)
        index_slice = index_slice & (random_y + random_size <= h)

        random_size = random_size[index_slice]
        random_x = random_x[index_slice]
        random_y = random_y[index_slice]

        n = random_size.shape[0]

        pnet_input = np.zeros((n, 12, 12, 3))
        rnet_input = np.zeros((n, 24, 24, 3))
        onet_input = np.zeros((n, 48, 48, 3))

        i = 0

        for size, x, y in zip(random_size, random_x, random_y):
            cropped = img[y:y + size, x:x + size]
            
            pnet_input[i] = cv2.resize(cropped, dsize=(12, 12))
            rnet_input[i] = cv2.resize(cropped, dsize=(24, 24))
            onet_input[i] = cv2.resize(cropped, dsize=(48, 48))

            i += 1

        cache = [random_x, random_y, random_size]

        return pnet_input, rnet_input, onet_input, cache