Esempio n. 1
0
def test_camera():
    video_capture = cv2.VideoCapture(0)
    pnet = PNet()
    rnet = RNet()
    onet = ONet()

    frame_counter = 0
    boxes, landmarks = None, None

    while True:
        ret, frame = video_capture.read()
        if ret:
            if frame_counter % 10 == 0:
                boxes = pnet.detect(frame)
                if len(boxes) > 0:
                    boxes = rnet.detect(frame, boxes)
                    if len(boxes) > 0:
                        boxes, landmarks = onet.detect(frame, boxes)
                    else:
                        boxes, landmarks = None, None

        if boxes is not None:
            draw(frame, boxes, landmarks)
        cv2.imshow('image', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

        frame_counter += 1
Esempio n. 2
0
    def load(self):
        sess = self.m_session
        p_path, r_path, o_path = self.m_model_path[0], None, None
        if len(self.m_model_path) >= 2:
            r_path = self.m_model_path[1]
        if len(self.m_model_path) == 3:
            o_path = self.m_model_path[2]
        if p_path:
            with tf.variable_scope('pnet'):
                data = tf.placeholder(tf.float32, (None, None, None, 3),
                                      'input')
                pnet = PNet({'data': data})
                pnet.load(os.path.join(self.m_model_path[0], 'det1.npy'), sess)
            self.pnet = lambda img: sess.run(
                ('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'),
                feed_dict={'pnet/input:0': img})

        if r_path:
            with tf.variable_scope('rnet'):
                data = tf.placeholder(tf.float32, (None, 24, 24, 3), 'input')
                rnet = RNet({'data': data})
                rnet.load(os.path.join(self.m_model_path[1], 'det2.npy'), sess)
            self.rnet = lambda img: sess.run(
                ('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'),
                feed_dict={'rnet/input:0': img})
        if o_path:
            with tf.variable_scope('onet'):
                data = tf.placeholder(tf.float32, (None, 48, 48, 3), 'input')
                onet = ONet({'data': data})
                onet.load(os.path.join(self.m_model_path[2], 'det3.npy'), sess)
            self.onet = lambda img: sess.run(
                ('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0',
                 'onet/prob1:0'),
                feed_dict={'onet/input:0': img})
Esempio n. 3
0
    def __init__(self, min_face=20, thresh=[0.6, 0.7, 0.7], scale=0.79, stride=2, cellsize=12, use_cuda=True):
        
        self.min_face = min_face
        self.thresh = thresh
        self.scale  = scale
        self.stride = stride
        self.cellsize = cellsize

        self.pnet = PNet()
        self.rnet = RNet()
        self.onet = ONet()
        
        self._load_state(self.pnet)
        self._load_state(self.rnet)
        self._load_state(self.onet)

        if cuda.is_available() and use_cuda:
            self.pnet.cuda()
            self.rnet.cuda()
            self.onet.cuda()
        
        self.pnet.eval()
        self.rnet.eval()
        self.onet.eval()

        self.use_cuda = use_cuda
Esempio n. 4
0
def export_to_pb():
    pnet = PNet()
    rnet = RNet()
    onet = ONet()

    pnet.export_to_pb()
    rnet.export_to_pb()
    onet.export_to_pb()
Esempio n. 5
0
def test_img():
    pnet = PNet()
    rnet = RNet()
    onet = ONet()
    img = cv2.imread('C:\\Users\\lenovo\\Desktop\\0_Parade_Parade_0_693.jpg')
    boxes = pnet.detect(img)

    boxes = rnet.detect(img, boxes)
    boxes, landmarks = onet.detect(img, boxes)
    draw(img, boxes, landmarks)
    cv2.imshow('image', img)
    cv2.waitKey()
Esempio n. 6
0
    def __init__(self,
                 min_face_size=20.0,
                 thresholds=[0.6, 0.7, 0.8],
                 nms_thresholds=[0.7, 0.7, 0.7],
                 device=None):

        # Selece t the device
        if device in ['gpu', 'cuda']:
            if not torch.cuda.is_available():
                print("cuda not available, using cpu instead")
                self.device = torch.device('cpu')
            self.device = torch.device('cuda')
        elif device in ['cpu', 'none']:
            self.device = torch.device('cpu')
        else:
            self.device = torch.device(
                'cuda' if torch.cuda.is_available() else 'cpu')
        print("Using {}...\n".format(self.device))

        self.thresholds = thresholds
        self.nms_thresholds = nms_thresholds
        self.min_face_size = min_face_size
        self.empty_float = torch.tensor([],
                                        dtype=torch.float,
                                        device=self.device)
        self.pnet = PNet().to(device=self.device).eval()
        self.rnet = RNet().to(device=self.device).eval()
        self.onet = ONet().to(device=self.device).eval()
Esempio n. 7
0
def main():
    global w_emb, c_emb, initial_learning_rate
    
    with open(w_emb, 'rb') as handle:
        w_emb = pickle.load(handle)
    with open(c_emb, 'rb') as handle:
        c_emb = pickle.load(handle)
    
    train, val = get_batch()

    handle = tf.placeholder(tf.string, shape=[])
    iterator = tf.data.Iterator.from_string_handle(handle, 
        train.output_types, train.output_shapes)
    
    
    result = []
    current_best_loss = 20
    model = RNet(iterator, w_emb[1], c_emb[1])
    learning_rate = initial_learning_rate
    print("start training...")
    print("save every " +str(save_freq)+" iterations")
    print("check loss every " +str(learning_rate_change_freq)+" iterations")
    with tf.Session() as sess:
        
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(max_to_keep=500)
        train_handle = sess.run(train.make_one_shot_iterator().string_handle())
        val_handle = sess.run(val.make_one_shot_iterator().string_handle())
        sess.run(tf.assign(model.is_train, tf.constant(True, dtype=tf.bool)))
        sess.run(tf.assign(model.lr, tf.constant(learning_rate)))

        for x in range(1, iterations + 1):
            
            
            loss, _ = sess.run([model.loss, model.train_op], feed_dict={handle: train_handle})
            
            
            if x % learning_rate_change_freq == 0:
                sess.run(tf.assign(model.is_train,
                                   tf.constant(False, dtype=tf.bool)))
                val_loss, _ = sess.run([model.loss, model.train_op], feed_dict={
                                      handle: val_handle})
                sess.run(tf.assign(model.is_train,
                                   tf.constant(True, dtype=tf.bool)))
                if val_loss < current_best_loss:
                    current_best_loss = val_loss
                else:
                    print("learning rate changed")
                    learning_rate *= 0.5
                    sess.run(tf.assign(model.lr, 
                    tf.constant(learning_rate)))
                result.append((val_loss,loss))
            
            if x % save_freq == 0:
                
                filename = os.path.join("./model", "model_{}.ckpt".format(x))
                saver.save(sess, filename)
Esempio n. 8
0
def main(argv):
    global rf, w_emb_d, c_emb_d, model_d

    with open(w_emb_d, 'rb') as handle:
        w_emb = pickle.load(handle)
    with open(c_emb_d, 'rb') as handle:
        c_emb = pickle.load(handle)

    test = make_example(argv[0])

    model = RNet(tf.data.TFRecordDataset(rf).map(
        parser()).repeat().batch(batch_size).make_one_shot_iterator(),
                 w_emb[1],
                 c_emb[1],
                 trainable=False)

    with tf.Session() as sess:

        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        saver.restore(sess, model_d)
        sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool)))

        final = []
        for _ in range(len(test) // batch_size + 1):
            ys = model.ys
            qa_id, _, y1, y2 = sess.run(
                [model.qa_id, model.loss, ys[0], ys[1]])

            qq = []
            for qid, p1, p2 in zip(qa_id.tolist(), y1.tolist(), y2.tolist()):
                qq.append(
                    (test[str(qid)]["uuid"], test[str(qid)]["spans"][p1][0],
                     test[str(qid)]["spans"][p2][1] - 1))

            final.append(qq)

        f = open(argv[1], 'w')
        w = csv.writer(f)
        w.writerow(['id', 'answer'])
        count = 0
        for l in final:
            for k in l:
                if count < len(test):
                    w.writerow([
                        k[0], " ".join(str(x) for x in range(k[1], k[2] + 1))
                    ])
                    print(k)
                count += 1
        f.close()
Esempio n. 9
0
 def __init__(self):
     use_cuda = torch.cuda.is_available()
     if use_cuda:
         self.device = 'cuda'
         self.tensor = torch.cuda.FloatTensor
     else:
         self.device = 'cpu'
         self.tensor = torch.FloatTensor
     self._pnet = PNet().to(self.device).eval()
     self._rnet = RNet().to(self.device).eval()
     self._onet = ONet().to(self.device).eval()
     self.scales = [0.3, 0.15, 0.07, 0.035]
     self.thresholds = [0.7, 0.8, 0.9]
     self.nms_thresholds = [0.7, 0.7, 0.7]
Esempio n. 10
0
    def __init__(self, min_face=20, thresh=[0.6, 0.7, 0.7], scale=0.79, stride=2, cellsize=12):
        
        self.min_face = min_face
        self.thresh = thresh
        self.scale  = scale
        self.stride = stride
        self.cellsize = cellsize

        self.pnet = PNet()
        self.rnet = RNet()
        self.onet = ONet()
        
        self._load_state(self.pnet)
        self._load_state(self.rnet)
        self._load_state(self.onet)
Esempio n. 11
0
def test(cfg):
    logging.info('Model is loading...')
    with open(cfg['dev_eval_file'], "r") as fh:
        dev_eval_file = json.load(fh)
    dev_dataset = SQuADDataset(cfg['dev_record_file'], -1, cfg['batch_size'], cfg['word2ind_file'])
    model_args = pickle.load(open(cfg['args_filename'], 'rb'))
    model = RNet(**model_args)

    model.load_state_dict(torch.load(cfg['dump_filename']))
    model.to(device)
    
    metrics, answer_dict = evaluation(model, dev_dataset, dev_eval_file, len(dev_dataset))
    with open('logs/answers.json', 'w') as f:
        json.dump(answer_dict, f)
    logging.info("TEST loss %f F1 %f EM %f\n", metrics["loss"], metrics["f1"], metrics["exact_match"])
Esempio n. 12
0
def train(args):
    if torch.cuda.is_available():
        device = torch.device("cuda")
        torch.cuda.set_device(args.cuda)
    else:
        device = torch.device("cpu")

    if args.net == "pnet":
        model = PNet(device)
    elif args.net == "rnet":
        model = RNet()
    elif args.net == "onet":
        model = ONet()
    else:
        raise Exception("Net Type Error!")

    loss_func = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), args.lr, args.momentum)

    transformed_data = WIDER_Dataset(
        data_path, anno_filename,
        transforms.Compose([Resize((12, 12)),
                            Normalize(),
                            To_Tensor()]))
    trainloader = DataLoader(transformed_data,
                             batch_size=1,
                             shuffle=True,
                             collate_fn=transformed_data.collate_fn,
                             num_workers=4,
                             pin_memory=True)

    #model.to(device=device)
    for epoch in range(args.epoch):
        model.train()
        for i_batch, (images, boxes) in enumerate(trainloader):
            images.type(torch.DoubleTensor)
            images.to(device=device)
            boxes[0].to(device=device, dtype=torch.float)

            output = model(images)
            ptint(output.cpu())
Esempio n. 13
0
full_size = opt.full_size

print('===> Loading datasets')
train_set = get_training_set(opt.upscale_factor, opt.full_size)
test_set = get_test_set(opt.upscale_factor, opt.full_size)
training_data_loader = DataLoader(dataset=train_set,
                                  num_workers=opt.threads,
                                  batch_size=opt.batchSize,
                                  shuffle=True)
testing_data_loader = DataLoader(dataset=test_set,
                                 num_workers=opt.threads,
                                 batch_size=opt.testBatchSize,
                                 shuffle=True)

print('===> Building model')
model = RNet(upscale_factor=opt.upscale_factor, full_size=opt.full_size)
model.to(device)
criterion = nn.MSELoss()
#Three optimizers, one for each output
optimizerLow = optim.Adam(model.parameters(), lr=opt.lr)
optimizerInt1 = optim.Adam(model.parameters(), lr=opt.lr)
optimizerInt2 = optim.Adam(model.parameters(), lr=opt.lr)


def train(epoch):
    low_loss = 0
    int1_loss = 0
    int2_loss = 0
    for iteration, batch in enumerate(training_data_loader, 1):
        inimg, int1, int2, target = batch[0].to(device), batch[1].to(
            device), batch[2].to(device), batch[3].to(device)
Esempio n. 14
0
def train(model_params, launch_params):
    with open(launch_params['word_emb_file'], "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(launch_params['char_emb_file'], "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(launch_params['train_eval_file'], "r") as fh:
        train_eval_file = json.load(fh)
    with open(launch_params['dev_eval_file'], "r") as fh:
        dev_eval_file = json.load(fh)

    writer = SummaryWriter(os.path.join(launch_params['log'], launch_params['prefix']))
    
    lr = launch_params['learning_rate']
    base_lr = 1.0
    warm_up = launch_params['lr_warm_up_num']
    model_params['word_mat'] = word_mat
    model_params['char_mat'] = char_mat
    
    logging.info('Load dataset and create model.')
    dev_dataset = SQuADDataset(launch_params['dev_record_file'], launch_params['test_num_batches'], 
                               launch_params['batch_size'], launch_params['word2ind_file'])
    if launch_params['fine_tuning']:
        train_dataset = SQuADDataset(launch_params['train_record_file'], launch_params['fine_tuning_steps'], 
                                    launch_params['batch_size'], launch_params['word2ind_file'])
        model_args = pickle.load(open(launch_params['args_filename'], 'rb'))
        model = RNet(**model_args)
        model.load_state_dict(torch.load(launch_params['dump_filename']))
        model.to(device)
    else:
        train_dataset = SQuADDataset(launch_params['train_record_file'], launch_params['num_steps'], 
                                    launch_params['batch_size'], launch_params['word2ind_file'])
        model = RNet(**model_params).to(device)
        launch_params['fine_tuning_steps'] = 0
    
    params = filter(lambda param: param.requires_grad, model.parameters())
    optimizer = optim.Adam(params, lr=base_lr, betas=(launch_params['beta1'], launch_params['beta2']), eps=1e-7, weight_decay=3e-7)
    cr = lr / log2(warm_up)
    scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda ee: cr * log2(ee + 1) if ee < warm_up else lr)
    qt = False
    logging.info('Start training.')
    for iter in range(launch_params['num_steps']):
        try:
            passage_w, passage_c, question_w, question_c, y1, y2, ids = train_dataset[iter]
            passage_w, passage_c = passage_w.to(device), passage_c.to(device)
            question_w, question_c = question_w.to(device), question_c.to(device)
            y1, y2 = y1.to(device), y2.to(device)
            loss, p1, p2 = model.train_step([passage_w, passage_c, question_w, question_c], y1, y2, optimizer, scheduler)
            if iter % launch_params['train_interval'] == 0:
                logging.info('Iteration %d; Loss: %f', iter+launch_params['fine_tuning_steps'], loss)
                writer.add_scalar('Loss', loss, iter+launch_params['fine_tuning_steps'])
            if iter % launch_params['train_sample_interval'] == 0:
                start = torch.argmax(p1[0, :]).item()
                end = torch.argmax(p2[0, start:]).item()+start
                passage = train_dataset.decode(passage_w)
                question = train_dataset.decode(question_w)
                generated_answer = train_dataset.decode(passage_w[:, start:end+1])
                real_answer = train_dataset.decode(passage_w[:, y1[0]:y2[0]+1])
                logging.info('Train Sample:\n Passage: %s\nQuestion: %s\nOriginal answer: %s\nGenerated answer: %s',
                        passage, question, real_answer, generated_answer)
            if iter % launch_params['test_interval'] == 0:
                metrics, _ = evaluation(model, dev_dataset, dev_eval_file, launch_params['test_num_batches'])
                logging.info("TEST loss %f F1 %f EM %f", metrics['loss'], metrics['f1'], metrics['exact_match'])
                writer.add_scalar('Test_loss', metrics['loss'], iter)
                writer.add_scalar('Test_f1', metrics['f1'], iter)
                writer.add_scalar('Test_em', metrics['exact_match'], iter)
        except RuntimeError as e:
            logging.error(str(e))
        except KeyboardInterrupt:
            break
    torch.save(model.cpu().state_dict(), launch_params['dump_filename'])
    pickle.dump(model_params, open(launch_params['args_filename'], 'wb'))
    logging.info('Model has been saved.')
Esempio n. 15
0
from model import detect_faces, show_bboxes, PNet, RNet, ONet
from PIL import Image
import numpy as np


def test(filename, save_name, model):
    image = Image.open(filename)
    bounding_boxes, landmarks = detect_faces(image, model)
    show_bboxes(image, bounding_boxes,
                facial_landmarks=landmarks).save(save_name)


if __name__ == "__main__":

    pnet = PNet()
    rnet = RNet()
    onet = ONet()

    model = (pnet, rnet, onet)

    # test("test_1.jpg", "1.jpg", model)
    # test("test_2.jpg", "2.jpg", model)
    # test("test_3.jpg", "3.jpg", model)

    test("test.jpg", "1.jpg", model)
Esempio n. 16
0
                    default='data/train_data.pkl',
                    help='Train Set',
                    type=str)
parser.add_argument('--valid_data',
                    default='data/valid_data.pkl',
                    help='Validation Set',
                    type=str)

# parser.add_argument('model', help='Model to evaluate', type=str)
args = parser.parse_args()

print('Creating the model...', end='')
word_vector_dim = args.word_vector_dim
model = RNet(hdim=args.hdim,
             dropout_rate=args.dropout,
             N=None,
             M=None,
             word2vec_dim=word_vector_dim,
             char_level_embeddings=args.char_level_embeddings)
print('Done!')

print('Compiling Keras model...', end='')
optimizer_config = {
    'class_name': args.optimizer,
    'config': {
        'lr': args.lr
    } if args.lr else {}
}
model.compile(optimizer=optimizer_config, loss=args.loss, metrics=['accuracy'])
print('Done!')

print('Loading datasets...', end='')
Esempio n. 17
0
@LastEditTime: 2019-11-06 15:37:05
@Update: 
'''
import os
import torch
from torch import nn
from torch import optim
from torch.optim import lr_scheduler

from config import configer
from dataset import MtcnnData
from model import RNet
from model import MtcnnLoss, LossFn
from trainer import MtcnnTrainer

net = RNet()
# state = torch.load('ckptdir/RNet_0025.pkl', map_location='cpu')['net_state']; net.load_state_dict(state)

params = net.parameters()
trainset = MtcnnData(configer.datapath, 24, 'train', save_in_memory=False)
validset = MtcnnData(configer.datapath, 24, 'valid', save_in_memory=False)
testset = MtcnnData(configer.datapath, 24, 'test', save_in_memory=False)
# criterion = MtcnnLoss(1.0, 0.5, 0.0)
criterion = LossFn(1.0, 0.5, 1.0)
optimizer = optim.Adam
lr_scheduler = lr_scheduler.ExponentialLR

trainer = MtcnnTrainer(configer, net, params, trainset, validset, testset,
                       criterion, optimizer, lr_scheduler)
trainer.train()
Esempio n. 18
0
class MtcnnDetector(object):
    """ mtcnn detector

    Params:
        prefix: {str} checkpoint
    Attributes:

    Content:

    """
    def __init__(self, min_face=20, thresh=[0.6, 0.7, 0.7], scale=0.79, stride=2, cellsize=12, use_cuda=True):
        
        self.min_face = min_face
        self.thresh = thresh
        self.scale  = scale
        self.stride = stride
        self.cellsize = cellsize

        self.pnet = PNet()
        self.rnet = RNet()
        self.onet = ONet()
        
        self._load_state(self.pnet)
        self._load_state(self.rnet)
        self._load_state(self.onet)

        if cuda.is_available() and use_cuda:
            self.pnet.cuda()
            self.rnet.cuda()
            self.onet.cuda()
        
        self.pnet.eval()
        self.rnet.eval()
        self.onet.eval()

        self.use_cuda = use_cuda

    def _load_state(self, net):
        
        ckpt = '../mtcnn_py/ckptdir/{}.pkl'.format(net._get_name())
        if not os.path.exists(ckpt): return
        print("load state from {}".format(ckpt))
        ckpt = torch.load(ckpt, map_location='cuda' if torch.cuda.is_available() else 'cpu')
        net.load_state_dict(ckpt['net_state'])
    
    def detect_image(self, image):
        """ Detect face over single image
        Params:
            image:    {ndarray(H, W, C)}
        """

        boxes, boxes_c, landmark = self._detect_pnet(image)
        boxes, boxes_c, landmark = self._detect_rnet(image, boxes_c)
        boxes, boxes_c, landmark = self._detect_onet(image, boxes_c)
        return boxes_c, landmark

    def _detect_pnet(self, image):
        """
        Params:
            image:      {ndarray(1, C, H, W)}
        Returns:
            boxes:    {ndarray(n_boxes, 5)} x1, y1, x2, y2, score
            boxes_c:  {ndarray(n_boxes, 5)} x1, y1, x2, y2, score
            landmark: None
        """
        NETSIZE = 12

        def _resize_image(image, scale):
            """ resize image according to scale
            Params:
                image:  {ndarray(h, w, c)}
                scale:  {float}
            """
            h, w, c = image.shape
            hn = int(h*scale); wn = int(w*scale)
            resized = cv2.resize(image, (wn, hn), interpolation=cv2.INTER_LINEAR)
            return resized
        
        def _generate_box(cls_map, reg_map, thresh, scale):
            """ generate boxes
            Params:
                cls_map: {ndarray(h, w)}
                reg_map: {ndarray(4, h, w)}
                thresh:  {float}
                scale:   {float}
            Returns:
                bboxes:  {ndarray(n_boxes, 9)} x1, y1, x2, y2, score, offsetx1, offsety1, offsetx2, offsety2
            """
            idx = np.where(cls_map>thresh)

            if idx[0].size == 0:
                return np.array([])

            x1 = np.round(self.stride * idx[1] / scale)
            y1 = np.round(self.stride * idx[0] / scale)
            x2 = np.round((self.stride * idx[1] + self.cellsize) / scale)
            y2 = np.round((self.stride * idx[0] + self.cellsize) / scale)

            # print("current scale: {} current size: {}".format(scale, self.cellsize/scale))

            score = cls_map[idx[0], idx[1]]
            reg = np.array([reg_map[i, idx[0], idx[1]] for i in range(4)])

            boxes = np.vstack([x1, y1, x2, y2 ,score, reg]).T

            return boxes

        # ======================= generate boxes ===========================
        cur_scale = NETSIZE / self.min_face
        cur_img = _resize_image(image, cur_scale)
        all_boxes = None

        while min(cur_img.shape[:-1]) >= NETSIZE:

            ## forward network
            X = ToTensor()(cur_img).unsqueeze(0)
            if cuda.is_available() and self.use_cuda: X = X.cuda()
            with torch.no_grad():
                y_pred = self.pnet(X)[0].cpu().detach().numpy()

            ## generate bbox
            cls_map = sigmoid(y_pred[0,:,:])
            reg_map = y_pred[1:5,:,:]
            boxes = _generate_box(cls_map, reg_map, self.thresh[0], cur_scale)

            ## update scale
            cur_scale *= self.scale
            cur_img = _resize_image(image, cur_scale)
            if boxes.size == 0: continue
            
            ## nms
            # boxes = boxes[self._nms(boxes[:, :5], 0.6, 'Union')]
            # show_bbox(image.copy(), boxes[:, :5])

            ## save bbox
            if all_boxes is None:
                all_boxes = boxes
            else:
                all_boxes = np.concatenate([all_boxes, boxes], axis=0)

        # ====================================================================

        if all_boxes is None: 
            return np.array([]), np.array([]), None

        ## nms
        all_boxes = all_boxes[self._nms(all_boxes[:, 0:5], 0.6, 'Union')]

        ## parse
        boxes  = all_boxes[:, :4]                   # (n_boxes, 4)
        score  = all_boxes[:,  4].reshape((-1, 1))  # (n_boxes, 1)
        offset = all_boxes[:, 5:]                   # (n_boxes, 4)
        
        # refine bbox
        boxes_c = self._cal_box(boxes, offset)
        
        ## concat
        boxes = np.concatenate([boxes, score], axis=1)
        boxes_c = np.concatenate([boxes_c, score], axis=1)

        ## landmark
        landmark = None

        return boxes, boxes_c, landmark

    def _detect_rnet(self, image, bboxes):
        """
        Params:
            image: {ndarray(H, W, C)}
            bboxes:  {ndarray(n_boxes, 5)} x1, y1, x2, y2, score
        Returns:
            boxes:    {ndarray(n_boxes, 5)} x1, y1, x2, y2, score
            boxes_c:  {ndarray(n_boxes, 5)} x1, y1, x2, y2, score
            landmark: None
        """
        NETSIZE = 24

        if bboxes.shape[0] == 0:
            return np.array([]), np.array([]), None

        bboxes = self._square(bboxes)
        patches = self._crop_patch(image, bboxes, NETSIZE)
        
        ## forward network
        X = torch.cat(list(map(lambda x: ToTensor()(x).unsqueeze(0), patches)), dim=0)
        if cuda.is_available() and self.use_cuda: X = X.cuda()
        with torch.no_grad():
            y_pred = self.rnet(X).cpu().detach().numpy()  # (n_boxes, 15)
        scores = sigmoid(y_pred[:, 0])          # (n_boxes,)
        offset = y_pred[:, 1: 5]                # (n_boxes, 4)
        landmark = y_pred[:, 5:]                # (n_boxes, 10)

        ## update score
        bboxes[:, -1] = scores

        ## filter
        idx = scores > self.thresh[1]
        bboxes = bboxes[idx]                        # (n_boxes, 5)
        offset = offset[idx]                        # (n_boxes, 4)
        landmark = landmark[idx]                    # (n_boxes, 10)
        if bboxes.shape[0] == 0:
            return np.array([]), np.array([]), None

        ## nms
        idx = self._nms(bboxes, 0.5)
        bboxes = bboxes[idx]
        offset = offset[idx]
        landmark = landmark[idx]

        ## landmark
        landmark = self._cal_landmark(bboxes[:, :-1], landmark)

        bboxes_c = self._cal_box(bboxes[:,:-1], offset)
        bboxes_c = np.concatenate([bboxes_c, bboxes[:, -1].reshape((-1, 1))], axis=1)

        return bboxes, bboxes_c, landmark
    
    def _detect_onet(self, image, bboxes):
        """
        Params:
            image: {ndarray(H, W, C)}
            bboxes:  {ndarray(n_boxes, 5)} x1, y1, x2, y2, score
        Returns:
            boxes:    {ndarray(n_boxes, 5)} x1, y1, x2, y2, score
            boxes_c:  {ndarray(n_boxes, 5)} x1, y1, x2, y2, score
            landmark: None
        """
        NETSIZE = 48

        if bboxes.shape[0] == 0:
            return np.array([]), np.array([]), np.array([])

        bboxes = self._square(bboxes)
        patches = self._crop_patch(image, bboxes, NETSIZE)
        
        ## forward network
        X = torch.cat(list(map(lambda x: ToTensor()(x).unsqueeze(0), patches)), dim=0)
        if cuda.is_available() and self.use_cuda: X = X.cuda()
        with torch.no_grad():
            y_pred = self.onet(X).cpu().detach().numpy()  # (n_boxes, 15)
        scores = sigmoid(y_pred[:, 0])          # (n_boxes,)
        offset = y_pred[:, 1: 5]                # (n_boxes, 4)
        landmark = y_pred[:, 5:]                # (n_boxes, 10)
        
        ## update score
        bboxes[:, -1] = scores

        ## filter
        idx = scores > self.thresh[2]
        bboxes = bboxes[idx]                        # (n_boxes, 5)
        offset = offset[idx]                        # (n_boxes, 4)
        landmark = landmark[idx]                    # (n_boxes, 10)
        if bboxes.shape[0] == 0:
            return np.array([]), np.array([]), np.array([])

        ## nms
        idx = self._nms(bboxes, 0.5, mode='Minimum')
        bboxes = bboxes[idx]
        offset = offset[idx]
        landmark = landmark[idx]
        
        ## landmark
        landmark = self._cal_landmark(bboxes[:, :-1], landmark)

        bboxes_c = self._cal_box(bboxes[:,:-1], offset)
        bboxes_c = np.concatenate([bboxes_c, bboxes[:, -1].reshape((-1, 1))], axis=1)

        return bboxes, bboxes_c, landmark

    @classmethod
    def _cal_box(self, boxes, offset):
        """ refine boxes
        Params:
            boxes:  {ndarray(n_boxes, 4)} unrefined boxes
            offset: {ndarray(n_boxes, 4)} boxes offset
        Returns:
            boxes_c:{ndarray(n_boxes, 4)} refined boxes
        Notes:
            offset = (gt - square) / size of square box
             => gt = square + offset * size of square box (*)
            where
                - `offset`, `gt`, `square` are ndarrays
                - `size of square box` is a number
        """
        ## square boxes' heights and widths
        x1, y1, x2, y2 = np.hsplit(boxes, 4)        # (n_boxes, 1)
        w = x2 - x1 + 1; h = y2 - y1 + 1            # (n_boxes, 1)
        bsize = np.hstack([w, h]*2)                 # (n_boxes, 4)
        bbase = np.hstack([x1, y1, x2, y2])         # (n_boxes, 4)
        ## refine
        boxes_c = bbase + offset*bsize
        return boxes_c
    
    @classmethod
    def _cal_landmark(self, boxes, offset):
        """ calculate landmark
        Params:
            boxes:  {ndarray(n_boxes,  4)} unrefined boxes
            offset: {ndarray(n_boxes, 10)} landmark offset
        Returns:
            landmark:{ndarray(n_boxes, 10)} landmark location
        Notes:
            offset_x = (gt_x - square_x1) / size of square box
             => gt_x = square_x1 + offset_x * size of square box (*)
            offset_y = (gt_y - square_y1) / size of square box
             => gt_y = square_y1 + offset_y * size of square box (*)
            where
                - `offset_{}`, `gt_{}`, `square_{}1` are ndarrays
                - `size of square box` is a number
        """
        ## square boxes' heights and widths
        x1, y1, x2, y2 = np.hsplit(boxes, 4)        # (n_boxes, 1)
        w = x2 - x1 +1; h = y2 - y1 + 1             # (n_boxes, 1)
        bsize = np.hstack([w, h]*5)                 # (n_boxes, 10)
        bbase = np.hstack([x1, y1]*5)               # (n_boxes, 10)
        ## refine
        landmark = bbase + offset*bsize
        return landmark

    @classmethod
    def _nms(self, dets, thresh, mode="Union"):
        """
        Params:
            dets:   {ndarray(n_boxes, 5)} x1, y1, x2, y2 score
            thresh: {float} retain overlap <= thresh
            mode:   {str} 'Union' or 'Minimum'
        Returns:
            idx:   {list[int]} indexes to keep
        Notes:
            greedily select boxes with high confidence
            idx boxes overlap <= thresh
            rule out overlap > thresh

            if thresh==1.0, keep all
        """
        x1 = dets[:, 0]
        y1 = dets[:, 1]
        x2 = dets[:, 2]
        y2 = dets[:, 3]
        scores = dets[:, 4]

        areas = (x2 - x1 + 1) * (y2 - y1 + 1)
        order = scores.argsort()[::-1]

        idx = []
        while order.size > 0:
            i = order[0]
            idx.append(i)

            xx1 = np.maximum(x1[i], x1[order[1:]])
            yy1 = np.maximum(y1[i], y1[order[1:]])
            xx2 = np.minimum(x2[i], x2[order[1:]])
            yy2 = np.minimum(y2[i], y2[order[1:]])
            w = np.maximum(0.0, xx2 - xx1 + 1)
            h = np.maximum(0.0, yy2 - yy1 + 1)

            inter = w * h
            if mode == "Union":
                ovr = inter / (areas[i] + areas[order[1:]] - inter)
            elif mode == "Minimum":
                ovr = inter / np.minimum(areas[i], areas[order[1:]])

            inds = np.where(ovr <= thresh)[0]
            order = order[inds + 1]

        return idx
    
    @classmethod
    def _square(self, bbox):
        """ convert rectangle bbox to square bbox
        Params:
            bbox: {ndarray(n_boxes, 5)}
        Returns:
            bbox_s: {ndarray(n_boxes, 5)}
        """
        ## rectangle boxes' heights and widths
        x1, y1, x2, y2, score = np.hsplit(bbox, 5)  # (n_boxes, 1)
        w = x2 - x1 +1; h = y2 - y1 + 1             # (n_boxes, 1)
        maxsize = np.maximum(w, h)                  # (n_boxes, 1)

        ## square boxes' heights and widths
        x1 = x1 + w/2 - maxsize/2
        y1 = y1 + h/2 - maxsize/2
        x2 = x1 + maxsize - 1
        y2 = y1 + maxsize - 1

        bbox_s = np.hstack([x1, y1, x2, y2, score])
        return bbox_s

    @classmethod
    def _crop_patch(self, image, bbox_s, size):
        """ crop patches from image
        Params:
            image: {ndarray(H, W, C)}
            bbox_s: {ndarray(n_boxes, 5)} squared bbox
        Returns:
            patches: {list[ndarray(h, w, c)]}
        """

        def locate(bbox, imh, imw):
            """ 
            Params:
                bbox:       {ndarray(n_boxes, 5)} x1, y1, x2, y2, score
                imh, imw:   {float} size of input image
            Returns:
                oriloc, dstloc: {ndarray(n_boxes, 4)} x1, y1, x2, y2
            """
            ## origin boxes' heights and widths
            x1, y1, x2, y2, score = np.hsplit(bbox_s, 5)# (n_boxes, 1)
            x1, y1, x2, y2 = list(map(lambda x: x.astype('int').reshape(-1), [x1, y1, x2, y2]))
            w = x2 - x1 + 1; h = y2 - y1 + 1            # (n_boxes, 1)

            ## destinate boxes
            xx1 = np.zeros_like(x1)
            yy1 = np.zeros_like(y1)
            xx2 = w.copy() - 1
            yy2 = h.copy() - 1

            ## left side out of image
            i = x1 < 0
            xx1[i] = 0 + (0 - x1[i])
            x1 [i] = 0
            ## top side out of image
            i = y1 < 0
            yy1[i] = 0 + (0 - y1[i])
            y1 [i] = 0
            ## right side out of image
            i = x2 > imw - 1
            xx2[i] = (w[i]-1) + (imw-1 - x2[i])
            x2 [i] = imw - 1
            ## bottom side out of image
            i = y2 > imh - 1
            yy2[i] = (h[i]-1) + (imh-1 - y2[i])
            y2 [i] = imh - 1

            return [x1, y1, x2, y2, xx1, yy1, xx2, yy2]

        imh, imw, _ = image.shape

        x1, y1, x2, y2, score = np.hsplit(bbox_s, 5)    
        pw = x2 - x1 + 1; ph = y2 - y1 + 1
        pshape = np.hstack([ph, pw, 3*np.ones(shape=(score.shape[0], 1))]).astype('int')   # (n_boxes, 3)
        # keep = np.bitwise_or(pw > 0, ph > 0).reshape(-1)
        # pshape = pshape[keep]; bbox_s = bbox_s[keep]
        n_boxes = bbox_s.shape[0]

        x1, y1, x2, y2, xx1, yy1, xx2, yy2 = locate(bbox_s, imh, imw) # (n_boxes, 1)

        patches = []
        for i_boxes in range(n_boxes):
            patch = np.zeros(shape=pshape[i_boxes], dtype='uint8')
            patch[yy1[i_boxes]: yy2[i_boxes], xx1[i_boxes]: xx2[i_boxes]] = \
                        image[y1[i_boxes]: y2[i_boxes], x1[i_boxes]: x2[i_boxes]]
            patch = cv2.resize(patch, (size, size))
            patches += [patch]
        
        return patches