Python DataEncoder Exemples, encoderl.DataEncoder Python Exemples

Exemple #1

0

Afficher le fichier

    def __init__(self, root, list_file, train, transform):
        # print('data init')
        self.root = root
        self.train = train
        self.transform = transform
        self.fnames = []  # 文件名列表
        self.boxes = []  # 位置标签列表，格式 [x1, y1, x2, y2]
        self.labels = []  # 类别标签列表?
        self.small_threshold = 10. / self.image_size  # face that small than threshold will be ignored
        self.data_encoder = DataEncoder()

        with open(list_file) as f:
            lines = f.readlines()

        for line in lines:
            splited = line.strip().split()
            # print('splited', splited)
            self.fnames.append(splited[0])
            num_faces = int(splited[1])
            box = []
            label = []
            for i in range(num_faces):
                x = float(splited[2 + 5 * i])
                y = float(splited[3 + 5 * i])
                w = float(splited[4 + 5 * i])
                h = float(splited[5 + 5 * i])
                c = int(splited[6 + 5 * i])
                box.append([x, y, x + w,
                            y + h])  # [[x1, y1, x2, y2], [x3, y3, x4, y4]]
                label.append(c)  # [1, 1]
                # print('box', box, 'label', label)
            self.boxes.append(torch.Tensor(box))
            self.labels.append(torch.LongTensor(label))
        self.num_samples = len(self.boxes)

Exemple #2

0

Afficher le fichier

    def __init__(self, root, list_file, train, transform):
        print('data init')
        self.root = root
        self.train = train
        self.transform = transform
        self.fnames = []
        self.boxes = []
        self.labels = []
        self.small_threshold = 10. / self.image_size  # face that small than threshold will be ignored
        self.data_encoder = DataEncoder()

        with open(list_file) as f:
            lines = f.readlines()

        for line in lines:
            splited = line.strip().split()
            self.fnames.append(splited[0])
            num_faces = int(splited[1])
            box = []
            label = []
            for i in range(num_faces):
                x = float(splited[2 + 5 * i])
                y = float(splited[3 + 5 * i])
                w = float(splited[4 + 5 * i])
                h = float(splited[5 + 5 * i])
                c = int(splited[6 + 5 * i])
                box.append([x, y, x + w, y + h])
                label.append(c)
            self.boxes.append(torch.Tensor(box))
            self.labels.append(torch.LongTensor(label))
        self.num_samples = len(self.boxes)

Exemple #3

0

Afficher le fichier

    def __init__(self,
                 root,
                 data_type,
                 list_file,
                 lmdb_path,
                 train,
                 transform,
                 multi_scale=False):
        print('data init')
        self.root = root
        self.data_type = data_type
        self.train = train
        self.transform = transform
        self.fnames = []
        self.boxes = []  # 这里的box是左上角和右下角！！！ x1 y1 x2 y2
        self.landmarks = []
        self.lmdb_path = lmdb_path
        self.labels = []
        self.small_threshold = 20. / self.image_size  # face that small than threshold will be ignored
        self.data_encoder = DataEncoder()
        self.snames = glob.glob('./picture/shuiyin/*')
        self.num_shuiyin = len(self.snames)
        self.multi_scale = multi_scale

        with open(list_file) as f:
            lines = f.readlines()

        for line in lines:
            splited = line.strip().split()
            num_faces = int(splited[1])
            box = []
            label = []
            landmarks = []
            if num_faces <= 0:
                continue
            self.fnames.append(splited[0])
            for i in range(num_faces):
                #x = float(splited[2+5*i])
                #y = float(splited[3+5*i])
                #w = float(splited[4+5*i])
                #h = float(splited[5+5*i])
                #c = int(splited[6+5*i])
                x = float(splited[2 + 15 * i])
                y = float(splited[2 + 1 + 15 * i])
                w = float(splited[2 + 2 + 15 * i])
                h = float(splited[2 + 3 + 15 * i])
                landmarks_ = []
                for j in range(5):
                    pt_x = float(splited[2 + 3 + 15 * i + j * 2 + 1])
                    pt_y = float(splited[2 + 3 + 15 * i + j * 2 + 2])
                    landmarks_.append(pt_x)
                    landmarks_.append(pt_y)
                c = float(splited[2 + 15 * i + 14])
                box.append([x, y, x + w, y + h])
                landmarks.append(landmarks_)
                label.append(c)
            self.boxes.append(torch.Tensor(box))
            self.labels.append(torch.LongTensor(label))
            self.landmarks.append(torch.Tensor(landmarks))
        self.num_samples = len(self.boxes)

Exemple #4

0

Afficher le fichier

    def test(self, show_info=False):
        self.model.eval()
        test_loss = 0
        data_encoder = DataEncoder()

        time_start = time.time()
        # 测试集
        # for data, target in test_loader:
        for images, loc_targets, conf_targets in self.test_loader:
            images, loc_targets, conf_targets = Variable(images), Variable(
                loc_targets), Variable(conf_targets)

            if self.use_gpu:
                images, loc_targets, conf_targets = images.cuda(
                ), loc_targets.cuda(), conf_targets.cuda()

            # print('images', images.size())
            loc_preds, conf_preds = self.model(images)
            loss = self.criterion(loc_preds, loc_targets, conf_preds,
                                  conf_targets)  # 计算损失
            test_loss += loss.item()

            if show_info is True:
                # print('0 pre_label', loc_preds.size(), conf_preds.size())
                # print('0 pre_label', loc_preds, conf_preds)

                print("loc_preds len: ", len(loc_preds))
                for i in range(len(loc_preds)):
                    # print('2 pre_label', loc_preds[i].size(), conf_preds[i].size())
                    # print('3 pre_label', loc_preds[i], conf_preds[i])
                    boxes, labels, max_conf = data_encoder.decode(
                        loc_preds[i], conf_preds[i], self.use_gpu,
                        self.nms_threshold)
                    # print('boxes', boxes)
                    # print('labels', labels)
                    # print('max_conf', max_conf)

                    show_img(images[i].permute(1, 2, 0),
                             boxes.cpu().detach().numpy())
                    # show_img(images[i], boxes.cpu().detach().numpy())

        time_end = time.time()
        time_avg = float(time_end - time_start) / float(
            len(self.test_loader.dataset))
        avg_loss = test_loss / len(self.test_loader)
        print('[Test] avg_loss: {:.6f} time: {:.6f}\n'.format(
            avg_loss, time_avg))
        return avg_loss

Exemple #5

0

Afficher le fichier

Fichier : test.py Projet : MrXin94/faceDet_inCar

def test():
    # imgPath = './test2.jpg'
    #imgPath = './badcase-2k.jpg'
    #imgPath = './t.png'
    #imgPath = './noface2.png'
    net = FaceBox()
    scale = 1024
    #scale = 640
    if cuda:
        net = net.cuda()
    net.load_state_dict(torch.load('./weight/faceboxes_209.pt', map_location=lambda storage, loc:storage))
    net.eval()
    data_encoder = DataEncoder(float(scale))

    file_path = "./all_test/"
    files = [file_name for file_name in os.listdir(file_path) if file_name.lower().endswith('jpg')]
    for file in files:
        imgPath = os.path.join(file_path, file)
        img = cv2.imread(imgPath)

        boxes, probs = test_img(net, data_encoder, img, scale, thresh=0.5)
        # print(boxes)
        face_num = len(boxes) / 4
        s = str(int(face_num)) + " "
        for i in range(int(face_num)):
            x1 = int(boxes[i * 4] * 1)
            y1 = int(boxes[i * 4 + 1] * 1)
            x2 = int(boxes[i * 4 + 2] * 1)
            y2 = int(boxes[i * 4 + 3] * 1)
            s = s + str(x1) + " " + str(y1) + " " + str(x2) + " " + str(y2) + " "
        print(file, s[:-1])

Exemple #6

0

Afficher le fichier

Fichier : test.py Projet : MrXin94/faceDet_inCar

def test_faceInCar():
    net = FaceBox()
    # scale = 640
    # scale = 720
    # scale = 800
    # scale = 960

    if cuda:
        net = net.cuda()
    net.load_state_dict(torch.load('./weight/faceboxes_333.pt', map_location=lambda storage, loc:storage))
    net.eval()
    data_encoder = DataEncoder(float(scale))



    # outfile = './1-3_predict.txt'
    # outfile = './faceboxV1.1_noBn-6k-1024-predict.txt'
    # outfile = './faceboxV1.1-6k-800-predict.txt'
    # outfile = './faceboxV1.1-6k-960-predict.txt'
    # outfile = './faceboxV1.1-6k-1024-predict.txt'
    fout = open(outfile,'w')

    lines = open(label_path,'r').readlines()

    resize_time_total = 0
    forward_time_total = 0
    for i,line in enumerate(lines):
        if i % 5 == 0:
            print (i,len(lines))
        imgName = line.strip().split(' ')[0]
        imgName = imgName.split("/")[-1]
        imgPath = os.path.join(imgDir,imgName)
        boxes,probs,resize_time, forward_time = test_img(net,data_encoder,imgPath,scale,thresh=0.1)
        resize_time_total += resize_time
        forward_time_total += forward_time
        fout.write(imgName+' ')

        face_num = len(boxes)/4
        fout.write(str(face_num)+' ')
        for i in range(int(face_num)):
            x1 = int(boxes[i*4]*1)
            y1 = int(boxes[i*4+1]*1)
            x2 = int(boxes[i*4+2]*1)
            y2 = int(boxes[i*4+3]*1)
            prob = str(probs[i])
            fout.write(str(x1)+' '+ str(y1)+' '+ str(x2)+' '+str(y2)+' '+prob+' ')
        fout.write('\n')
    fout.close()

Exemple #7

0

Afficher le fichier

        if '/' in item:
            f_write.write(item)
    f_write.close()
    print('get fddb list done')


if __name__ == '__main__':
    net = FaceBox()
    net.load_state_dict(
        torch.load('weight/faceboxes.pt',
                   map_location=lambda storage, loc: storage))

    if use_gpu:
        net.cuda()
    net.eval()
    data_encoder = DataEncoder()

    font = cv2.FONT_HERSHEY_SCRIPT_SIMPLEX

    # given video path, predict and show
    path = "/home/lxg/codedata/faceVideo/1208.mp4"
    # testVideo(path)

    # given image path, predict and show
    root_path = "/home/lxg/codedata/widerFace/WIDER_train/images/0--Parade/"
    picture = '0_Parade_marchingband_1_495.jpg'
    # testIm(root_path + picture)

    # given image path, predict and show
    fddb_path = "/home/lxg/codedata/fddb/2002/07/19/big/"
    picture = 'img_463.jpg'

Exemple #8

0

Afficher le fichier

class ListDataset(data.Dataset):
    image_size = 1024

    def __init__(self, root, list_file, train, transform):
        print('data init')
        self.root = root
        self.train = train
        self.transform = transform
        self.fnames = []
        self.boxes = []
        self.labels = []
        self.small_threshold = 10. / self.image_size  # face that small than threshold will be ignored
        self.data_encoder = DataEncoder()

        with open(list_file) as f:
            lines = f.readlines()

        for line in lines:
            splited = line.strip().split()
            self.fnames.append(splited[0])
            num_faces = int(splited[1])
            box = []
            label = []
            for i in range(num_faces):
                x = float(splited[2 + 5 * i])
                y = float(splited[3 + 5 * i])
                w = float(splited[4 + 5 * i])
                h = float(splited[5 + 5 * i])
                c = int(splited[6 + 5 * i])
                box.append([x, y, x + w, y + h])
                label.append(c)
            self.boxes.append(torch.Tensor(box))
            self.labels.append(torch.LongTensor(label))
        self.num_samples = len(self.boxes)

    def __getitem__(self, idx):
        fname = self.fnames[idx]
        img = cv2.imread(os.path.join(self.root + fname))
        assert img is not None

        boxes = self.boxes[idx].clone()
        labels = self.labels[idx].clone()

        if self.train:
            img, boxes, labels = self.random_crop(img, boxes, labels)
            img = self.random_bright(img)
            img, boxes = self.random_flip(img, boxes)
            boxwh = boxes[:, 2:] - boxes[:, :2]
            # print('boxwh', boxwh)

        h, w, _ = img.shape
        img = cv2.resize(img, (self.image_size, self.image_size))

        boxes /= torch.Tensor([w, h, w, h]).expand_as(boxes)
        for t in self.transform:
            img = t(img)

        loc_target, conf_target = self.data_encoder.encode(boxes, labels)

        return img, loc_target, conf_target

    def random_getim(self):
        idx = random.randrange(0, self.num_samples)
        fname = self.fnames[idx]
        img = cv2.imread(os.path.join(self.root + fname))
        boxes = self.boxes[idx].clone()
        labels = self.labels[idx]

        return img, boxes, labels

    def __len__(self):
        return self.num_samples

    def random_flip(self, im, boxes):
        if random.random() < 0.5:
            im_lr = np.fliplr(im).copy()
            h, w, _ = im.shape
            xmin = w - boxes[:, 2]
            xmax = w - boxes[:, 0]
            boxes[:, 0] = xmin
            boxes[:, 2] = xmax
            return im_lr, boxes
        return im, boxes

    def random_crop(self, im, boxes, labels):
        imh, imw, _ = im.shape
        short_size = min(imw, imh)
        while True:
            mode = random.choice([None, 0.3, 0.5, 0.7, 0.9])
            if mode is None:
                boxes_uniform = boxes / torch.Tensor([imw, imh, imw, imh
                                                      ]).expand_as(boxes)
                boxwh = boxes_uniform[:, 2:] - boxes_uniform[:, :2]
                mask = (boxwh[:, 0] > self.small_threshold) & (
                    boxwh[:, 1] > self.small_threshold)
                if not mask.any():
                    print(
                        'default image have none box bigger than small_threshold'
                    )
                    im, boxes, labels = self.random_getim()
                    imh, imw, _ = im.shape
                    short_size = min(imw, imh)
                    continue
                selected_boxes = boxes.index_select(0,
                                                    mask.nonzero().squeeze(1))
                selected_labels = labels.index_select(
                    0,
                    mask.nonzero().squeeze(1))
                return im, selected_boxes, selected_labels

            for _ in range(10):
                w = random.randrange(int(0.3 * short_size), short_size)
                h = w

                x = random.randrange(imw - w)
                y = random.randrange(imh - h)
                roi = torch.Tensor([[x, y, x + w, y + h]])

                center = (boxes[:, :2] + boxes[:, 2:]) / 2
                roi2 = roi.expand(len(center), 4)
                mask = (center > roi2[:, :2]) & (center < roi2[:, 2:])
                mask = mask[:, 0] & mask[:, 1]
                if not mask.any():
                    continue

                selected_boxes = boxes.index_select(0,
                                                    mask.nonzero().squeeze(1))
                img = im[y:y + h, x:x + w, :]
                selected_boxes[:, 0].add_(-x).clamp_(min=0, max=w)
                selected_boxes[:, 1].add_(-y).clamp_(min=0, max=h)
                selected_boxes[:, 2].add_(-x).clamp_(min=0, max=w)
                selected_boxes[:, 3].add_(-y).clamp_(min=0, max=h)
                # print('croped')

                boxes_uniform = selected_boxes / torch.Tensor(
                    [w, h, w, h]).expand_as(selected_boxes)
                boxwh = boxes_uniform[:, 2:] - boxes_uniform[:, :2]
                mask = (boxwh[:, 0] > self.small_threshold) & (
                    boxwh[:, 1] > self.small_threshold)
                if not mask.any():
                    print(
                        'crop image have none box bigger than small_threshold')
                    im, boxes, labels = self.random_getim()
                    imh, imw, _ = im.shape
                    short_size = min(imw, imh)
                    continue
                selected_boxes_selected = selected_boxes.index_select(
                    0,
                    mask.nonzero().squeeze(1))
                selected_labels = labels.index_select(
                    0,
                    mask.nonzero().squeeze(1))
                return img, selected_boxes_selected, selected_labels

    def random_bright(self, im, delta=16):
        alpha = random.random()
        if alpha > 0.3:
            im = im * alpha + random.randrange(-delta, delta)
            im = im.clip(min=0, max=255).astype(np.uint8)
        return im

    def testGet(self, idx):
        fname = self.fnames[idx]
        img = cv2.imread(os.path.join(self.root, fname))
        cv2.imwrite('test_encoder_source.jpg', img)
        boxes = self.boxes[idx].clone()
        # print(boxes)
        labels = self.labels[idx].clone()

        for box in boxes:
            cv2.rectangle(img, (int(box[0]), int(box[1])),
                          (int(box[2]), int(box[3])), (0, 0, 255))
        cv2.imwrite(fname, img)

        if self.train:
            img, boxes, labels = self.random_crop(img, boxes, labels)
            img = self.random_bright(img)
            img, boxes = self.random_flip(img, boxes)

        h, w, _ = img.shape
        boxes /= torch.Tensor([w, h, w, h]).expand_as(boxes)

        img = cv2.resize(img, (self.image_size, self.image_size))
        for t in self.transform:
            img = t(img)

        print(idx, fname, boxes)

        return img, boxes, labels

Exemple #9

0

Afficher le fichier

class ListDataset(data.Dataset):
    def __init__(self, root, list_file, train, transform):
        print('data init')
        self.image_size = 1024
        self.root = root
        self.train = train
        self.transform = transform
        self.fnames = []  # list: image name
        self.boxes = []
        self.labels = []
        self.small_threshold = 20. / self.image_size  # face that small than threshold will be ignored
        # it's 20 in the paper
        self.data_encoder = DataEncoder()

        with open(list_file) as f:
            lines = f.readlines()

        for line in lines:
            splited = line.strip().split()
            self.fnames.append(splited[0])
            num_faces = int(splited[1])
            box = []
            label = []
            for i in range(num_faces):
                x = float(splited[2 + 5 * i])
                y = float(splited[3 + 5 * i])
                w = float(splited[4 + 5 * i])
                h = float(splited[5 + 5 * i])
                c = int(splited[6 + 5 * i])
                box.append([x, y, x + w, y + h])
                label.append(c)
            self.boxes.append(torch.Tensor(box))
            self.labels.append(torch.tensor(label))
        self.num_samples = len(self.boxes)  # num of images

    def __getitem__(self, idx):
        fname = self.fnames[idx]
        img = cv2.imread(os.path.join(self.root, fname))

        assert img is not None

        boxes = self.boxes[idx].clone()
        labels = self.labels[idx].clone()

        if self.train:
            img, boxes, labels = self.random_crop(img, boxes, labels)
            img, boxes = self.random_flip(img, boxes)

            img = img.astype(np.float32)
            img = self.random_bright(img)
            img = self.random_swapchannel(img)
            img = self.random_distort(img)
            img = img.astype(np.uint8)

        h, w, _ = img.shape
        img = cv2.resize(img, (self.image_size, self.image_size))

        boxes /= torch.Tensor([w, h, w, h]).expand_as(boxes)
        for t in self.transform:
            img = t(img)

        loc_target, conf_target = self.data_encoder.encode(boxes, labels)

        return img, loc_target, conf_target

    def __len__(self):
        return self.num_samples

    def random_getim(self):
        idx = random.randrange(0, self.num_samples)
        fname = self.fnames[idx]
        img = cv2.imread(os.path.join(self.root, fname))
        box = self.boxes[idx].clone()
        label = self.labels[idx].clone()

        return img, box, label

    def random_flip(self, im, boxes):
        if random.random() < 0.5:
            im_lr = np.fliplr(im).copy()
            h, w, _ = im.shape
            xmin = w - boxes[:, 2]
            xmax = w - boxes[:, 0]
            boxes[:, 0] = xmin
            boxes[:, 2] = xmax
            return im_lr, boxes
        return im, boxes

    def random_crop(self, im, boxes, labels):
        imh, imw, _ = im.shape
        short_size = min(imw, imh)
        while True:
            # choose one crop pitch
            mode = random.randint(0, 4)
            for _ in range(10):
                if mode == 0:
                    w = short_size
                else:
                    w = random.randrange(int(0.3 * short_size), short_size)
                h = w

                x = random.randint(0, imw - w)
                y = random.randint(0, imh - h)
                roi = torch.Tensor([[x, y, x + w, y + h]])

                center = (boxes[:, :2] + boxes[:, 2:]) / 2
                roi2 = roi.expand(len(center), 4)
                mask = (center > roi2[:, :2]) & (center < roi2[:, 2:])
                mask = mask[:, 0] & mask[:, 1]
                if not mask.any():
                    im, boxes, labels = self.random_getim()
                    imh, imw, _ = im.shape
                    short_size = min(imw, imh)
                    continue

                selected_boxes = boxes.index_select(0,
                                                    mask.nonzero().squeeze(1))
                img = im[y:y + h, x:x + w, :]
                selected_boxes[:, 0].add_(-x).clamp_(min=0, max=w)
                selected_boxes[:, 1].add_(-y).clamp_(min=0, max=h)
                selected_boxes[:, 2].add_(-x).clamp_(min=0, max=w)
                selected_boxes[:, 3].add_(-y).clamp_(min=0, max=h)

                boxes_uniform = selected_boxes / torch.Tensor(
                    [w, h, w, h]).expand_as(selected_boxes)
                boxwh = boxes_uniform[:, 2:] - boxes_uniform[:, :2]
                mask = (boxwh[:, 0] > self.small_threshold) & (
                    boxwh[:, 1] > self.small_threshold)
                if not mask.any():
                    # crop image have none box bigger than small_threshold
                    im, boxes, labels = self.random_getim()
                    imh, imw, _ = im.shape
                    short_size = min(imw, imh)
                    continue
                selected_boxes_selected = selected_boxes.index_select(
                    0,
                    mask.nonzero().squeeze(1))
                selected_labels = labels.index_select(
                    0,
                    mask.nonzero().squeeze(1))
                return img, selected_boxes_selected, selected_labels

    def random_bright(self, im, delta=32):
        if random.random() < 0.5:
            delta = random.uniform(-delta, delta)
            im += delta
            im = im.clip(min=0, max=255)
        return im

    def random_swapchannel(self, im):
        perms = ((0, 1, 2), (0, 2, 1), (1, 0, 2), (1, 2, 0), (2, 0, 1), (2, 1,
                                                                         0))
        if random.random() < 0.5:
            swap = perms[random.randrange(0, len(perms))]
            im = im[:, :, swap]
        return im

    def RandomContrast(self, im, lower=0.5, upper=1.5):
        if random.random() < 0.5:
            alpha = random.uniform(lower, upper)
            im *= alpha
            im = im.clip(min=0, max=255)
        return im

    def RandomSaturation(self, im, lower=0.5, upper=1.5):
        if random.random() < 0.5:
            im[:, :, 1] *= random.uniform(lower, upper)

        return im

    def RandomHue(self, im, delta=18.0):
        if random.random() < 0.5:
            im[:, :, 0] += random.uniform(-delta, delta)
            im[:, :, 0][im[:, :, 0] > 360.0] -= 360.0
            im[:, :, 0][im[:, :, 0] < 0.0] += 360.0
        return im

    def for_distort(self, im):
        im = cv2.cvtColor(im, cv2.COLOR_BGR2HSV)
        self.RandomSaturation(im)
        self.RandomHue(im)
        im = cv2.cvtColor(im, cv2.COLOR_HSV2BGR)
        return im

    def random_distort(self, im):
        if random.random() < 0.5:
            self.RandomContrast(im)
            self.for_distort(im)
        else:
            self.for_distort(im)
            self.RandomContrast(im)

        return im

Exemple #10

0

Afficher le fichier

class ListDataset(data.Dataset):
    # image_size = 1024

    def __init__(self, root, list_file, train, transform, image_size):
        print('data init')
        self.root = root
        self.train = train
        self.transform = transform
        self.fnames = []
        self.boxes = []
        self.labels = []
        self.image_size = image_size
        # self.small_threshold = 10. / self.image_size      # face that small than threshold will be ignored
        self.small_threshold = 10. / 1024                   # face that small than threshold will be ignored
        self.data_encoder = DataEncoder()

        with open(list_file) as f:
            lines = f.readlines()

        for line in lines:
            splited = line.strip().split()
            # print('splited', splited)
            self.fnames.append(splited[0])
            num_faces = int(splited[1])
            box = []
            label = []
            for i in range(num_faces):
                x = float(splited[2+5*i])
                y = float(splited[3+5*i])
                w = float(splited[4+5*i])
                h = float(splited[5+5*i])
                c = int(splited[6+5*i])
                box.append([x, y, x+w, y+h])
                label.append(c)
                # print('box', box, 'label', label)
            self.boxes.append(torch.Tensor(box))
            self.labels.append(torch.LongTensor(label))
        self.num_samples = len(self.boxes)

    def __getitem__(self, idx):
        fname = self.fnames[idx]
        img = cv2.imread(os.path.join(self.root + fname))
        # print(os.path.join(self.root + fname))
        assert img is not None

        boxes = self.boxes[idx].clone()
        labels = self.labels[idx].clone()

        # 图片增广
        if self.train:
            img, boxes, labels = self.random_crop(img, boxes, labels)       # 随机裁剪
            img = self.random_bright(img)                                   # 随机调亮
            # img, boxes = self.random_flip(img, boxes)                     # 随机翻转
            # boxwh = boxes[:, 2:] - boxes[:, :2]
            # print('boxwh', boxwh)

        h, w, _ = img.shape
        img = cv2.resize(img, (self.image_size, self.image_size))

        boxes /= torch.Tensor([w, h, w, h]).expand_as(boxes)
        # show_img(img, boxes)

        img = self.transform(img)

        loc_target, conf_target = self.data_encoder.encode(boxes, labels)

        return img, loc_target, conf_target

    def __len__(self):
        return self.num_samples

    def random_getim(self):
        idx = random.randrange(0,self.num_samples)
        fname = self.fnames[idx]
        img = cv2.imread(os.path.join(self.root+fname))
        boxes = self.boxes[idx].clone()
        labels = self.labels[idx]

        return img, boxes, labels

    # 随机翻转
    def random_flip(self, im, boxes):
        if random.random() < 0.5:
            im_lr = np.fliplr(im).copy()
            h, w, _ = im.shape
            xmin = w - boxes[:, 2]
            xmax = w - boxes[:, 0]
            boxes[:, 0] = xmin
            boxes[:, 2] = xmax
            return im_lr, boxes
        return im, boxes

    # 随机裁剪
    def random_crop(self, im, boxes, labels):
        # print('random_crop', boxes, labels)

        imh, imw, _ = im.shape
        short_size = min(imw, imh)
        # print(imh, imw, short_size)
        while True:
            mode = random.choice([None, 0.3, 0.5, 0.7, 0.9])
            if mode is None:
                boxes_uniform = boxes / torch.Tensor([imw, imh, imw, imh]).expand_as(boxes)
                boxwh = boxes_uniform[:, 2:] - boxes_uniform[:, :2]
                mask = (boxwh[:, 0] > self.small_threshold) & (boxwh[:, 1] > self.small_threshold)
                if not mask.any():
                    print('default image have none box bigger than small_threshold')
                    im, boxes, labels = self.random_getim()
                    imh, imw, _ = im.shape
                    short_size = min(imw, imh)
                    continue
                selected_boxes = boxes.index_select(0, mask.nonzero().squeeze(1))
                selected_labels = labels.index_select(0, mask.nonzero().squeeze(1))
                return im, selected_boxes, selected_labels

            for _ in range(10):
                w = random.randrange(int(0.3*short_size), short_size)
                h = w

                x = random.randrange(imw - w)
                y = random.randrange(imh - h)
                roi = torch.Tensor([[x, y, x+w, y+h]])

                center = (boxes[:,:2] + boxes[:,2:]) / 2
                roi2 = roi.expand(len(center), 4)
                mask = (center > roi2[:,:2]) & (center < roi2[:,2:])
                mask = mask[:,0] & mask[:,1]
                if not mask.any():
                    continue

                selected_boxes = boxes.index_select(0, mask.nonzero().squeeze(1))
                img = im[y:y+h, x:x+w, :]
                selected_boxes[:, 0].add_(-x).clamp_(min=0, max=w)
                selected_boxes[:, 1].add_(-y).clamp_(min=0, max=h)
                selected_boxes[:, 2].add_(-x).clamp_(min=0, max=w)
                selected_boxes[:, 3].add_(-y).clamp_(min=0, max=h)
                # print('croped')

                boxes_uniform = selected_boxes / torch.Tensor([w,h,w,h]).expand_as(selected_boxes)
                boxwh = boxes_uniform[:,2:] - boxes_uniform[:,:2]
                mask = (boxwh[:,0] > self.small_threshold) & (boxwh[:,1] > self.small_threshold)
                if not mask.any():
                    print('crop image have none box bigger than small_threshold')
                    im, boxes, labels = self.random_getim()
                    imh, imw, _ = im.shape
                    short_size = min(imw, imh)
                    continue
                selected_boxes_selected = selected_boxes.index_select(0, mask.nonzero().squeeze(1))
                selected_labels = labels.index_select(0, mask.nonzero().squeeze(1))
                return img, selected_boxes_selected, selected_labels

    # 随机调亮
    def random_bright(self, im, delta=16):
        alpha = random.random()
        if alpha > 0.3:
            im = im * alpha + random.randrange(-delta, delta)
            im = im.clip(min=0, max=255).astype(np.uint8)
        return im

    def testGet(self, idx):
        fname = self.fnames[idx]
        img = cv2.imread(os.path.join(self.root,fname))
        cv2.imwrite('test_encoder_source.jpg', img)
        boxes = self.boxes[idx].clone()
        # print(boxes)
        labels = self.labels[idx].clone()

        for box in boxes:
            cv2.rectangle(img, (int(box[0]),int(box[1])), (int(box[2]),int(box[3])), (0,0,255))
        cv2.imwrite(fname, img)

        if self.train:
            img, boxes, labels = self.random_crop(img, boxes, labels)
            img = self.random_bright(img)
            img, boxes = self.random_flip(img, boxes)

        h,w,_ = img.shape
        boxes /= torch.Tensor([w,h,w,h]).expand_as(boxes)

        img = cv2.resize(img,(self.image_size,self.image_size))
        for t in self.transform:
            img = t(img)

        print(idx, fname, boxes)

        return img, boxes, labels


# if __name__ == '__main__':
#     file_root = '/home/lxg/codedata/aflw/'
#     train_dataset = ListDataset(root=file_root, list_file='box_label.txt', train=True, transform=[transforms.ToTensor()])
#     print('the dataset has %d image' % (len(train_dataset)))
#     for i in range(len(train_dataset)):
#         print(i)
#         item = random.randrange(0, len(train_dataset))
#         item = item
#         img, boxes, labels = train_dataset.testGet(item)
#         # img, boxes = train_dataset[item]
#         img = img.numpy().transpose(1, 2, 0).copy()*255
#         train_dataset.data_encoder.test_encode(boxes, img, labels)
#
#         boxes = boxes.numpy().tolist()
#         w, h, _ = img.shape
#         # print('img', img.shape)
#         # print('boxes', boxes.shape)
#
#         for box in boxes:
#             x1 = int(box[0]*w)
#             y1 = int(box[1]*h)
#             x2 = int(box[2]*w)
#             y2 = int(box[3]*h)
#             cv2.rectangle(img, (x1, y1), (x2, y2), (0, 0, 255))
#             boxw = x2-x1
#             boxh = y2-y1
#             print(boxw,boxh, box)
#             if boxw is 0 or boxh is 0:
#                 raise 'zero width'
#
#         cv2.imwrite('test'+str(i)+'.jpg', img)
#         if i == 0:
#             break

Exemple #11

0

Afficher le fichier

class ListDataset(data.Dataset):

    def __init__(self, root, list_file, train, transform):
        print('data init')
        self.image_size = 1024
        self.root=root
        self.train = train
        self.transform=transform
        self.fnames = [] # list: image name
        self.boxes = []
        self.labels = []
        self.small_threshold = 20./self.image_size  # face that small than threshold will be ignored
                                                    # it's 20 in the paper
        self.data_encoder = DataEncoder()

        with open(list_file) as f:
            lines  = f.readlines()

        for line in lines:
            splited = line.strip().split()
            self.fnames.append(splited[0])
            num_faces = int(splited[1])
            box=[]
            label=[]
            for i in range(num_faces):
                x = float(splited[2+5*i])
                y = float(splited[3+5*i])
                w = float(splited[4+5*i])
                h = float(splited[5+5*i])
                c = int(splited[6+5*i])
                box.append([x,y,x+w,y+h])
                label.append(c)
            self.boxes.append(torch.Tensor(box))
            self.labels.append(torch.tensor(label))
        self.num_samples = len(self.boxes) # num of images

    def __getitem__(self, idx):
        fname = self.fnames[idx]
        img = cv2.imread(os.path.join(self.root, fname))

        assert img is not None

        boxes = self.boxes[idx].clone()
        #print(boxes)
        labels = self.labels[idx].clone()

        if self.train:
            img, boxes= self.random_resize(img, boxes)
            #print(boxes)
            img, boxes = self.random_flip(img, boxes)

            img = img.astype(np.float32)
            img = self.random_bright(img)
            #img = self.random_swapchannel(img)
            img = self.random_distort(img)
            img = img.astype(np.uint8)

        h,w,_ = img.shape
        img = cv2.resize(img,(self.image_size,self.image_size))


        boxes /= torch.Tensor([w,h,w,h]).expand_as(boxes)
        #print(labels)
        #print(boxes)
        for t in self.transform:
            img = t(img)

        loc_target,conf_target = self.data_encoder.encode(boxes,labels)

        return img,loc_target,conf_target

    def __len__(self):
        return self.num_samples

    def random_getim(self):
        idx = random.randrange(0, self.num_samples)
        fname = self.fnames[idx]
        img = cv2.imread(os.path.join(self.root, fname))
        box = self.boxes[idx].clone()
        label = self.labels[idx].clone()

        return img, box, label

    def random_flip(self, im, boxes):
        if random.random() < 0.65:
            im_lr = np.fliplr(im).copy()
            h,w,_ = im.shape
            xmin = w - boxes[:,2]
            xmax = w - boxes[:,0]
            boxes[:,0] = xmin
            boxes[:,2] = xmax
            return im_lr, boxes
        return im, boxes

    def random_resize(self, img, boxes):
# =============================================================================
#         if random.random() > 0 and random.random() <= 0.25:
#                 scale=1.5
#         elif random.random() > 0.25 and random.random() <= 0.5:
#             scale=2.5
#         elif random.random() > 0.5 and random.random() <= 0.75:
#             scale=3.25
#         else:
#             scale=1
# =============================================================================
        scale_list=[1.,1.5,2.,2.75,3.45,5.,6.2,5.5,8.5,9.8]
        index=np.random.randint(0,10)
        scale=scale_list[index]
        w=1024
        h=1024   
        img=cv2.resize(img,(int(w/scale),int(h/scale)))
        h1, w1, _ = img.shape
        diffh=int((h-h1)/2)
        diffw=int((w-w1)/2)
        pad1,pad2=(diffh,diffh),(diffw,diffw)
        pad = (pad1, pad2, (0, 0))
        img = np.pad(img, pad, 'constant', constant_values=255)
        x1 = boxes[0][0]/scale
        y1 = boxes[0][1]/scale
        x2 = boxes[0][2]/scale
        y2 = boxes[0][3]/scale
        #new_box=[[x1+diffw,y1+diffh,x2+diffw,y2+diffh]]
        boxes[0][0]=(x1+diffw).int()
        boxes[0][1]=(y1+diffh).int()
        boxes[0][2]=(x2+diffw).int()
        boxes[0][3]=(y2+diffh).int()
        
        #print (type(new_box))
        
        return img,boxes
    def random_bright(self, im, delta=48):
        if random.random() < 0.65:
            delta = random.uniform(-delta, delta)
            im += delta
            im = im.clip(min=0, max=255)
        return im


    def random_swapchannel(self, im):
        perms = ((0, 1, 2), (0, 2, 1),
                 (1, 0, 2), (1, 2, 0),
                 (2, 0, 1), (2, 1, 0))
        if random.random() < 0.5:
            swap = perms[random.randrange(0, len(perms))]
            im = im[:, :, swap]
        return im

    def RandomContrast(self, im, lower=0.4, upper=1.5):
        if random.random() < 0.65:
            alpha = random.uniform(lower, upper)
            im *= alpha
            im = im.clip(min=0, max=255)
        return im

    def RandomSaturation(self, im, lower=0.35, upper=1.55):
        if random.random() < 0.5:
            im[:, :, 1] *= random.uniform(lower, upper)

        return im

    def RandomHue(self, im, delta=18.0):
        if random.random() < 0.5:
            im[:, :, 0] += random.uniform(-delta, delta)
            im[:, :, 0][im[:, :, 0] > 360.0] -= 360.0
            im[:, :, 0][im[:, :, 0] < 0.0] += 360.0
        return im

    def for_distort(self, im):
        im = cv2.cvtColor(im, cv2.COLOR_BGR2HSV)
        self.RandomSaturation(im)
        self.RandomHue(im)
        im = cv2.cvtColor(im, cv2.COLOR_HSV2BGR)
        return im


    def random_distort(self, im):
        if random.random() < 0.6:
            self.RandomContrast(im)
            self.for_distort(im)
        else:
            self.for_distort(im)
            self.RandomContrast(im)

        return im

Exemple #12

0

Afficher le fichier

Fichier : dataset.py Projet : jacke121/faceboxes-1

class ListDataset(data.Dataset):
    image_size = 1024

    def __init__(self, root, list_file, train, transform):
        print('data init')
        self.root = root
        self.train = train
        self.transform = transform
        self.fnames = []  # 文件名列表
        self.boxes = []  # 位置标签列表，格式 [x1, y1, x2, y2]
        self.labels = []  # 类别标签列表?
        self.small_threshold = 10. / self.image_size  # face that small than threshold will be ignored
        self.data_encoder = DataEncoder()

        with open(list_file) as f:
            lines = f.readlines()

        for line in lines:
            splited = line.strip().split()
            # print('splited', splited)
            self.fnames.append(splited[0])
            num_faces = int(splited[1])
            box = []
            label = []
            for i in range(num_faces):
                x = float(splited[2 + 5 * i])
                y = float(splited[3 + 5 * i])
                w = float(splited[4 + 5 * i])
                h = float(splited[5 + 5 * i])
                c = int(splited[6 + 5 * i])
                box.append([x, y, x + w,
                            y + h])  # [[x1, y1, x2, y2], [x3, y3, x4, y4]]
                label.append(c)  # [1, 1]
                # print('box', box, 'label', label)
            self.boxes.append(torch.Tensor(box))
            self.labels.append(torch.LongTensor(label))
        self.num_samples = len(self.boxes)

    def __getitem__(self, idx):
        fname = self.fnames[idx]
        img = cv2.imread(os.path.join(self.root + fname))
        # print(os.path.join(self.root + fname))
        assert img is not None

        boxes = self.boxes[idx].clone()  # 获取某一张图片对应的位置信息（可能多个）
        labels = self.labels[idx].clone()  # 获取某一张图片对应的类别信息（可能多个）

        # 图片增广
        if self.train:
            img, boxes, labels = self.random_crop(img, boxes, labels)  # 随机裁剪
            img = self.random_bright(img)  # 随机调亮
            # img, boxes = self.random_flip(img, boxes)                     # 随机翻转
            # boxwh = boxes[:, 2:] - boxes[:, :2]
            # print('boxwh', boxwh)

        h, w, _ = img.shape
        img = cv2.resize(img, (self.image_size, self.image_size))

        boxes /= torch.Tensor([w, h, w, h]).expand_as(boxes)  # 位置信息除以宽或高（归一化）
        # for t in self.transform:
        #     img = t(img)
        img = self.transform(img)

        loc_target, conf_target = self.data_encoder.encode(boxes,
                                                           labels)  # 对位置标签进行转换

        return img, loc_target, conf_target

    def __len__(self):
        return self.num_samples

    def random_getim(self):
        idx = random.randrange(0, self.num_samples)
        fname = self.fnames[idx]
        img = cv2.imread(os.path.join(self.root + fname))
        boxes = self.boxes[idx].clone()
        labels = self.labels[idx]

        return img, boxes, labels

    # 随机翻转
    def random_flip(self, im, boxes):
        if random.random() < 0.5:
            im_lr = np.fliplr(im).copy()
            h, w, _ = im.shape
            xmin = w - boxes[:, 2]
            xmax = w - boxes[:, 0]
            boxes[:, 0] = xmin
            boxes[:, 2] = xmax
            return im_lr, boxes
        return im, boxes

    # # 随机裁剪
    # def random_crop(self, img, boxes, labels, prob=0.8):
    #     # print('random_crop', boxes, labels)
    #     h, w, _ = img.shape
    #
    #     # show_img = img.copy()
    #     # for box in boxes:
    #     #     cv2.rectangle(show_img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 255, 0))
    #     # cv2.imshow('old_image', show_img)
    #     # cv2.waitKey(0)
    #
    #     x1 = boxes[:, 0]
    #     y1 = boxes[:, 1]
    #     x2 = boxes[:, 2]
    #     y2 = boxes[:, 3]
    #     # print('x1, y1, x2, y2', x1, y1, x2, y2)
    #
    #     min_left = min(min(x1), min(x2))
    #     min_top = min(min(y1), min(y2))
    #     min_lt = min(min_left, min_top)
    #
    #     min_right = w - max(max(x1), max(x2))
    #     min_bottom = h - max(max(y1), max(y2))
    #     min_rb = min(min_right, min_bottom)
    #     # print('min_left, min_top, min_right, min_bottom', min_left, min_top, min_right, min_bottom)
    #
    #     crop_left = 0
    #     crop_top = 0
    #     crop_right = w
    #     crop_bottom = h
    #
    #     # random crop left and top
    #     if random.random() < prob:
    #         rate = random.random()
    #         crop = int(min_lt * rate)
    #
    #         x1 = x1 - crop
    #         x2 = x2 - crop
    #         crop_left = crop
    #         # print('crop_left', crop_left, rate, x1, x2)
    #
    #         y1 = y1 - crop
    #         y2 = y2 - crop
    #         crop_top = crop
    #         # print('crop_top', crop_top, rate, y1, y2)
    #
    #     # random crop right
    #     if random.random() < prob:
    #         rate = random.random()
    #         crop = int(min_rb * rate)
    #
    #         crop_right = crop_right - crop
    #         # print('crop_right', crop_right, rate)
    #
    #         crop_bottom = crop_bottom - crop
    #         # print('crop_bottom', crop_bottom, rate)
    #
    #     img = img[crop_top:crop_bottom, crop_left:crop_right]
    #
    #     boxes[:, 0] = x1    # 第1列表示：x轴中心点（比例） # 第0列表示：类别
    #     boxes[:, 1] = y1    # 第2列表示：y轴中心点（比例）
    #     boxes[:, 2] = x2    # 第3列表示：w（比例）
    #     boxes[:, 3] = y2    # 第4列表示：h（比例）
    #     # print('new labels', labels)
    #
    #     # show_img = img.copy()
    #     # for box in boxes:
    #     #     cv2.rectangle(show_img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 255, 0))
    #     # cv2.imshow('new_image', show_img)
    #     # cv2.waitKey(0)
    #
    #     return img, boxes, labels

    # 随机裁剪
    def random_crop(self, im, boxes, labels):
        cv2.imshow('old_image', im)
        print('random_crop', boxes, labels)

        # show_img = im.copy()
        # for box in boxes:
        #     cv2.rectangle(show_img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 255, 0))
        # cv2.imshow('old_image', show_img)

        imh, imw, _ = im.shape
        short_size = min(imw, imh)
        # print(imh, imw, short_size)
        while True:
            mode = random.choice([None, 0.3, 0.5, 0.7, 0.9])
            print('mode', mode)
            if mode is None:
                boxes_uniform = boxes / torch.Tensor([imw, imh, imw, imh
                                                      ]).expand_as(boxes)
                boxwh = boxes_uniform[:, 2:] - boxes_uniform[:, :2]
                mask = (boxwh[:, 0] > self.small_threshold) & (
                    boxwh[:, 1] > self.small_threshold)
                if not mask.any():
                    print(
                        'default image have none box bigger than small_threshold'
                    )
                    im, boxes, labels = self.random_getim()
                    imh, imw, _ = im.shape
                    short_size = min(imw, imh)
                    continue
                selected_boxes = boxes.index_select(0,
                                                    mask.nonzero().squeeze(1))
                selected_labels = labels.index_select(
                    0,
                    mask.nonzero().squeeze(1))

                # show_img = im.copy()
                # for box in selected_boxes:
                #     cv2.rectangle(show_img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 255, 0))
                # cv2.imshow('new_image', show_img)
                # cv2.waitKey(0)

                return im, selected_boxes, selected_labels

            for _ in range(10):
                # rate = random.choice([, 0.3, 0.5, 0.7, 0.9])
                w = random.randrange(int(mode * short_size), short_size)
                h = w

                x = random.randrange(imw - w)
                y = random.randrange(imh - h)
                roi = torch.Tensor([[x, y, x + w, y + h]])

                center = (boxes[:, :2] + boxes[:, 2:]) / 2
                roi2 = roi.expand(len(center), 4)
                mask = (center > roi2[:, :2]) & (center < roi2[:, 2:])
                mask = mask[:, 0] & mask[:, 1]
                if not mask.any():
                    continue

                selected_boxes = boxes.index_select(0,
                                                    mask.nonzero().squeeze(1))
                img = im[y:y + h, x:x + w, :]
                selected_boxes[:, 0].add_(-x).clamp_(min=0, max=w)
                selected_boxes[:, 1].add_(-y).clamp_(min=0, max=h)
                selected_boxes[:, 2].add_(-x).clamp_(min=0, max=w)
                selected_boxes[:, 3].add_(-y).clamp_(min=0, max=h)
                # print('croped')

                boxes_uniform = selected_boxes / torch.Tensor(
                    [w, h, w, h]).expand_as(selected_boxes)
                print('boxes_uniform', boxes_uniform)
                boxwh = boxes_uniform[:, 2:] - boxes_uniform[:, :2]
                print('boxwh', boxwh)
                mask = (boxwh[:, 0] > self.small_threshold) & (
                    boxwh[:, 1] > self.small_threshold)
                print('mask', mask)

                if not mask.any():
                    print(
                        'crop image have none box bigger than small_threshold')
                    im, boxes, labels = self.random_getim()
                    imh, imw, _ = im.shape
                    short_size = min(imw, imh)
                    continue
                selected_boxes_selected = selected_boxes.index_select(
                    0,
                    mask.nonzero().squeeze(1))
                selected_labels = labels.index_select(
                    0,
                    mask.nonzero().squeeze(1))
                print('selected_boxes_selected', selected_boxes_selected)
                print('selected_labels', selected_labels)

                # show_img = img.copy()
                # for box in selected_boxes_selected:
                #     cv2.rectangle(show_img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 255, 0))
                # cv2.imshow('new_image', show_img)
                # cv2.waitKey(0)

                return img, selected_boxes_selected, selected_labels

    # 随机调亮
    def random_bright(self, im, delta=16):
        alpha = random.random()
        if alpha > 0.3:
            im = im * alpha + random.randrange(-delta, delta)
            im = im.clip(min=0, max=255).astype(np.uint8)
        return im

    def testGet(self, idx):
        fname = self.fnames[idx]
        img = cv2.imread(os.path.join(self.root, fname))
        cv2.imwrite('test_encoder_source.jpg', img)
        boxes = self.boxes[idx].clone()
        # print(boxes)
        labels = self.labels[idx].clone()

        for box in boxes:
            cv2.rectangle(img, (int(box[0]), int(box[1])),
                          (int(box[2]), int(box[3])), (0, 0, 255))
        cv2.imwrite(fname, img)

        if self.train:
            img, boxes, labels = self.random_crop(img, boxes, labels)
            img = self.random_bright(img)
            img, boxes = self.random_flip(img, boxes)

        h, w, _ = img.shape
        boxes /= torch.Tensor([w, h, w, h]).expand_as(boxes)

        img = cv2.resize(img, (self.image_size, self.image_size))
        for t in self.transform:
            img = t(img)

        print(idx, fname, boxes)

        return img, boxes, labels

Exemple #13

0

Afficher le fichier

    def __getitem__(self, idx):
        while True:
            if self.multi_scale:
                if isinstance(idx, int):
                    self.image_size = 1024
                else:
                    self.image_size = idx[1]
                    idx = idx[0]
            fname = self.fnames[idx]
            if self.data_type == "img":
                img = cv2.imread(os.path.join(self.root, fname))
                # print(fname)
            elif self.data_type == "lmdb":
                img = self.getImg(fname)
                # print(fname)
            #print img.shape
            if img is not None:
                # print self.root,fname
                assert img is not None

                boxes = self.boxes[idx].clone()
                labels = self.labels[idx].clone()
                landmarks = self.landmarks[idx].clone()

                if self.train:
                    img = self.pad_to_square(img)
                    assert img is not None
                    # img, boxes, landmarks = self.random_rot(img, boxes,landmarks)
                    if random.random() > 0.7:
                        img, boxes, landmarks, labels = self.random_crop_edge(
                            img, boxes, landmarks, labels)
                    else:
                        #img, boxes, labels = self.random_crop(img, boxes, labels)
                        img, boxes, landmarks, labels = self.random_crop(
                            img, boxes, landmarks, labels)
                    #img = self.random_bright(img)
                    # img = self.my_random_bright(img)
                    #img, boxes = self.random_flip(img, boxes)
                    #if img is None:
                    #    print fname
                    img, boxes, landmarks = self.random_flip(
                        img, boxes, landmarks)
                    #img = self.addImage(img)
                    boxwh = boxes[:, 2:] - boxes[:, :2]
                    #print('boxwh', boxwh)

                h, w, _ = img.shape
                self.data_encoder = DataEncoder(float(self.image_size))
                #print img
                img = cv2.resize(img, (self.image_size, self.image_size))

                #坐标归一化操作
                for i in range(boxes.shape[0]):
                    if boxes[i][0] == boxes[i][2]:
                        boxes[i][0] += 2
                    if boxes[i][1] == boxes[i][3]:
                        boxes[i][1] += 2
                landmarks /= torch.Tensor([w, h] * 5).expand_as(landmarks)
                boxes /= torch.Tensor([w, h, w, h]).expand_as(boxes)
                for t in self.transform:
                    img = t(img)
                #print boxes
                try:
                    #loc_target,conf_target = self.data_encoder.encode(boxes,labels)
                    loc_target, landmarks_target, conf_target = self.data_encoder.encode(
                        boxes, landmarks, labels, fname)
                    #print loc_target
                    break
                except:
                    idx = random.randint(0, self.num_samples)
                    continue
        #print img.shape
        return img, loc_target, landmarks_target, conf_target

Exemple #14

0

Afficher le fichier

class ListDataset(data.Dataset):
    image_size = 1024

    def __init__(self,
                 root,
                 data_type,
                 list_file,
                 lmdb_path,
                 train,
                 transform,
                 multi_scale=False):
        print('data init')
        self.root = root
        self.data_type = data_type
        self.train = train
        self.transform = transform
        self.fnames = []
        self.boxes = []  # 这里的box是左上角和右下角！！！ x1 y1 x2 y2
        self.landmarks = []
        self.lmdb_path = lmdb_path
        self.labels = []
        self.small_threshold = 20. / self.image_size  # face that small than threshold will be ignored
        self.data_encoder = DataEncoder()
        self.snames = glob.glob('./picture/shuiyin/*')
        self.num_shuiyin = len(self.snames)
        self.multi_scale = multi_scale

        with open(list_file) as f:
            lines = f.readlines()

        for line in lines:
            splited = line.strip().split()
            num_faces = int(splited[1])
            box = []
            label = []
            landmarks = []
            if num_faces <= 0:
                continue
            self.fnames.append(splited[0])
            for i in range(num_faces):
                #x = float(splited[2+5*i])
                #y = float(splited[3+5*i])
                #w = float(splited[4+5*i])
                #h = float(splited[5+5*i])
                #c = int(splited[6+5*i])
                x = float(splited[2 + 15 * i])
                y = float(splited[2 + 1 + 15 * i])
                w = float(splited[2 + 2 + 15 * i])
                h = float(splited[2 + 3 + 15 * i])
                landmarks_ = []
                for j in range(5):
                    pt_x = float(splited[2 + 3 + 15 * i + j * 2 + 1])
                    pt_y = float(splited[2 + 3 + 15 * i + j * 2 + 2])
                    landmarks_.append(pt_x)
                    landmarks_.append(pt_y)
                c = float(splited[2 + 15 * i + 14])
                box.append([x, y, x + w, y + h])
                landmarks.append(landmarks_)
                label.append(c)
            self.boxes.append(torch.Tensor(box))
            self.labels.append(torch.LongTensor(label))
            self.landmarks.append(torch.Tensor(landmarks))
        self.num_samples = len(self.boxes)

    def __getitem__(self, idx):
        while True:
            if self.multi_scale:
                if isinstance(idx, int):
                    self.image_size = 1024
                else:
                    self.image_size = idx[1]
                    idx = idx[0]
            fname = self.fnames[idx]
            if self.data_type == "img":
                img = cv2.imread(os.path.join(self.root, fname))
                # print(fname)
            elif self.data_type == "lmdb":
                img = self.getImg(fname)
                # print(fname)
            #print img.shape
            if img is not None:
                # print self.root,fname
                assert img is not None

                boxes = self.boxes[idx].clone()
                labels = self.labels[idx].clone()
                landmarks = self.landmarks[idx].clone()

                if self.train:
                    img = self.pad_to_square(img)
                    assert img is not None
                    # img, boxes, landmarks = self.random_rot(img, boxes,landmarks)
                    if random.random() > 0.7:
                        img, boxes, landmarks, labels = self.random_crop_edge(
                            img, boxes, landmarks, labels)
                    else:
                        #img, boxes, labels = self.random_crop(img, boxes, labels)
                        img, boxes, landmarks, labels = self.random_crop(
                            img, boxes, landmarks, labels)
                    #img = self.random_bright(img)
                    # img = self.my_random_bright(img)
                    #img, boxes = self.random_flip(img, boxes)
                    #if img is None:
                    #    print fname
                    img, boxes, landmarks = self.random_flip(
                        img, boxes, landmarks)
                    #img = self.addImage(img)
                    boxwh = boxes[:, 2:] - boxes[:, :2]
                    #print('boxwh', boxwh)

                h, w, _ = img.shape
                self.data_encoder = DataEncoder(float(self.image_size))
                #print img
                img = cv2.resize(img, (self.image_size, self.image_size))

                #坐标归一化操作
                for i in range(boxes.shape[0]):
                    if boxes[i][0] == boxes[i][2]:
                        boxes[i][0] += 2
                    if boxes[i][1] == boxes[i][3]:
                        boxes[i][1] += 2
                landmarks /= torch.Tensor([w, h] * 5).expand_as(landmarks)
                boxes /= torch.Tensor([w, h, w, h]).expand_as(boxes)
                for t in self.transform:
                    img = t(img)
                #print boxes
                try:
                    #loc_target,conf_target = self.data_encoder.encode(boxes,labels)
                    loc_target, landmarks_target, conf_target = self.data_encoder.encode(
                        boxes, landmarks, labels, fname)
                    #print loc_target
                    break
                except:
                    idx = random.randint(0, self.num_samples)
                    continue
        #print img.shape
        return img, loc_target, landmarks_target, conf_target

    def getImg(self, ckey):
        lmdb_path = self.lmdb_path
        env = lmdb.open(lmdb_path,
                        max_dbs=8,
                        map_size=int(1e12),
                        readonly=True,
                        lock=False)
        txn = env.begin()
        cbuf = txn.get(ckey)
        arr = np.fromstring(cbuf, np.uint8)
        img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
        return img

    def pad_to_square(self, image):
        if random.random() > 0.5:
            return image
        #print 'pad to square'
        factor = random.random() + 1
        height, width, _ = image.shape
        height_new = int(height * factor)
        width_new = int(width * factor)
        image_new = np.zeros((height_new, width_new, 3), dtype='uint8')
        image_new[0:0 + height, 0:0 + width] = image
        return image_new

    def random_getim(self):
        idx = random.randrange(0, self.num_samples)
        fname = self.fnames[idx]
        img = self.getImg(fname)
        #img = cv2.imread(os.path.join(self.root,fname))
        boxes = self.boxes[idx].clone()
        landmarks = self.landmarks[idx].clone()
        labels = self.labels[idx]

        return img, boxes, landmarks, labels

    def __len__(self):
        return self.num_samples

    # 一个点参照中心进行旋转
    def point_rot(self, xy, org_center, angle):
        org = xy - org_center
        a = np.deg2rad(angle)
        new = np.array([
            org[0] * np.cos(a) + org[1] * np.sin(a),
            -org[0] * np.sin(a) + org[1] * np.cos(a)
        ])
        return new

    def random_rot(self, image, boxes, landmarks):
        if np.random.rand() > 0.5:
            #angle = np.float(20 * np.random.rand() - 10)
            angle = np.float(40 * np.random.rand() - 20)
            #angle = np.float(90 * np.random.rand() - 45)
            im_rot = self.cv2_rot(image, angle)
            h, w, _ = im_rot.shape
            org_center = (np.array(image.shape[:2][::-1]) - 1) / 2.
            rot_center = (np.array(im_rot.shape[:2][::-1]) - 1) / 2.
            for i in range(boxes.shape[0]):
                box = np.array(boxes[i])
                landmarks_ = np.array(landmarks[i])
                temp = []
                temp.extend(
                    self.point_rot(box[0:2], org_center, angle) + rot_center)
                temp.extend(
                    self.point_rot(np.array([box[2], box[1]]), org_center,
                                   angle) + rot_center)
                temp.extend(
                    self.point_rot(np.array([box[0], box[3]]), org_center,
                                   angle) + rot_center)
                temp.extend(
                    self.point_rot(box[2:4], org_center, angle) + rot_center)
                x1 = max(min(temp[::2]), 0)  # 左上角坐标
                y1 = max(min(temp[1::2]), 0)  # 不超过边界
                x2 = min(max(temp[::2]), w)  # 右下角坐标
                y2 = min(max(temp[1::2]), h)
                boxes[i, :] = torch.Tensor(np.array([x1, y1, x2, y2]))

                #rotate landmarks
                for j in range(5):
                    pt_xy = self.point_rot(landmarks_[j * 2:(j * 2 + 2)],
                                           org_center, angle) + rot_center
                    pt_x = max(min(pt_xy[0], w), 0)
                    pt_y = max(min(pt_xy[1], h), 0)
                    landmarks_[j * 2] = pt_x
                    landmarks_[j * 2 + 1] = pt_y
                landmarks[i, :] = torch.Tensor(landmarks_)
            #print 'rot: ',angle
            return im_rot, boxes, landmarks
        else:
            return image, boxes, landmarks

    #用cv2旋转图片
    def cv2_rot(self, img, degree):
        height, width = img.shape[:2]
        # 旋转后的尺寸
        heightNew = int(width * fabs(sin(radians(degree))) +
                        height * fabs(cos(radians(degree))))
        widthNew = int(height * fabs(sin(radians(degree))) +
                       width * fabs(cos(radians(degree))))

        matRotation = cv2.getRotationMatrix2D((width / 2, height / 2), degree,
                                              1)

        matRotation[0, 2] += (widthNew - width) / 2  # 重点在这步，目前不懂为什么加这步
        matRotation[1, 2] += (heightNew - height) / 2  # 重点在这步

        imgRotation = cv2.warpAffine(img,
                                     matRotation, (widthNew, heightNew),
                                     borderValue=(0, 0, 0))
        return imgRotation

    # 对图片随机添加水印
    def addImage(self, img):
        if np.random.rand() > 0.5:
            idx = np.random.randint(0, self.num_shuiyin)
            sname = self.snames[idx]
            img_s = cv2.imread(sname)
            # 把旧的img_s当成mask，然后转换成多颜色的
            if np.random.rand() > 0.5:
                img_color = cv2.imread('./picture/color.jpg')
                h, w, _ = img_s.shape
                img_color = cv2.resize(img_color, (w, h))
                img2gray = cv2.cvtColor(img_s, cv2.COLOR_BGR2GRAY)
                ret, mask = cv2.threshold(img2gray, 25, 255, cv2.THRESH_BINARY)
                img_s = cv2.bitwise_and(img_color, img_color, mask=mask)
            h, w, _ = img.shape
            # 函数要求两张图必须是同一个size
            hs, ws, _ = img_s.shape
            if h / hs < w / ws:
                shape_h = np.random.randint(int(h / 4), h)
                shape_w = int(shape_h * ws / hs)
            else:
                shape_w = np.random.randint(int(w / 4), w)
                shape_h = int(shape_w * hs / ws)
            img2 = cv2.resize(img_s, (shape_w, shape_h))
            # alpha，beta，gamma可调
            beta = 0.2 * np.random.rand()
            gamma = 0
            loc_h = np.random.randint(0, h - shape_h)
            loc_w = np.random.randint(0, w - shape_w)
            img[loc_h:loc_h + shape_h,
                loc_w:loc_w + shape_w, :] = cv2.addWeighted(
                    img[loc_h:loc_h + shape_h, loc_w:loc_w + shape_w, :], 1,
                    img2, beta, gamma)
        return img

    def random_flip(self, im, boxes, landmarks):
        if random.random() < 0.5:
            im_lr = np.fliplr(im).copy()
            h, w, _ = im.shape
            xmin_boxes = w - boxes[:, 2]
            xmax_boxes = w - boxes[:, 0]
            boxes[:, 0] = xmin_boxes
            boxes[:, 2] = xmax_boxes

            landmarks_ = landmarks.clone()
            for j in range(5):
                landmarks_[:, j * 2] = w - landmarks_[:, j * 2]

            #new pt0
            landmarks[:, 0] = landmarks_[:, 2]
            landmarks[:, 1] = landmarks_[:, 3]
            #new pt1
            landmarks[:, 2] = landmarks_[:, 0]
            landmarks[:, 3] = landmarks_[:, 1]
            #new pt2
            landmarks[:, 4] = landmarks_[:, 4]
            landmarks[:, 5] = landmarks_[:, 5]
            #new pt3
            landmarks[:, 6] = landmarks_[:, 8]
            landmarks[:, 7] = landmarks_[:, 9]
            #new pt4
            landmarks[:, 8] = landmarks_[:, 6]
            landmarks[:, 9] = landmarks_[:, 7]
            #print 'ranom flip'
            return im_lr, boxes, landmarks

        return im, boxes, landmarks

    def random_crop_edge(self, im, boxes, landmarks, labels):

        while True:
            # choose bbox
            max_area = -1
            best_bbox = None
            for i in range(boxes.size()[0]):
                bbox = boxes[i, :]
                x1, y1, x2, y2 = bbox
                w = x2 - x1 + 1
                h = y2 - y1 + 1
                area = w * h
                if area > max_area:
                    max_area = area
                    best_bbox = bbox
            #
            w = best_bbox[2] - best_bbox[0]
            h = best_bbox[3] - best_bbox[1]
            ratio = float(h) / w
            if ratio > 2:
                im, boxes, landmarks, labels = self.random_getim()
                #print '循环'
                continue
            elif w <= 0 or h <= 0:
                im, boxes, landmarks, labels = self.random_getim()
                #print '循环1'
                continue
            elif float(w) / im.shape[1] < self.small_threshold or float(
                    h) / im.shape[0] < self.small_threshold:
                im, boxes, landmarks, labels = self.random_getim()
                continue
            else:
                break
        boxes_ori = boxes.clone()
        labels_ori = labels.clone()
        landmarks_ori = landmarks.clone()
        im_ori = im.copy()

        imh, imw, _ = im.shape
        center_x = int((best_bbox[0] + best_bbox[2]) / 2)
        center_x = max(center_x, 0)
        center_x = min(imw - 1, center_x)
        if best_bbox[0] > imw / 2:
            x2 = center_x
            if x2 < imh:
                x1 = 0
                y1 = random.randint(0, imh - x2)
                x2 = x2
                y2 = y1 + x2
            else:
                y1 = 0
                x1 = x2 - imh + 1
                x2 = x2
                y2 = imh - 1
            #
            x1 = max(0, x1)
            y1 = max(0, y1)
            x2 = min(x2, imw - 1)
            y2 = min(y2, imh - 1)

            im_new = im[y1:y2, x1:x2, :]
            boxes[:, 0] = boxes[:, 0] - x1
            boxes[:, 1] = boxes[:, 1] - y1
            boxes[:, 2] = boxes[:, 2] - x1
            boxes[:, 3] = boxes[:, 3] - y1
        else:
            x1 = center_x
            if (imw - x1) < imh:
                x1 = x1
                y1 = random.randint(0, imh - (imw - x1))
                x2 = imw - 1
                y2 = y1 + (x2 - x1)
            else:
                y1 = 0
                x1 = x1
                x2 = x1 + imh - 1
                y2 = imh - 1
            #
            x1 = max(0, x1)
            y1 = max(0, y1)
            x2 = min(x2, imw - 1)
            y2 = min(y2, imh - 1)

            im_new = im[y1:y2, x1:x2, :]
            boxes[:, 0] = boxes[:, 0] - x1
            boxes[:, 1] = boxes[:, 1] - y1
            boxes[:, 2] = boxes[:, 2] - x1
            boxes[:, 3] = boxes[:, 3] - y1

        #delete boxes not in this croped Img
        index = []
        for i in range(boxes.size()[0]):
            x1, y1, x2, y2 = boxes[i]
            #if x1 < 0 and y1 < 0 and x2 >= im.shape[1] and y2 >= im.shape[0]:
            if (x1 <= 0 and x2 <= 0) or (x1 >= im_new.shape[1]
                                         and x2 >= im_new.shape[1]):
                index.append(0)
            elif (y1 <= 0 and y2 <= 0) or (y1 >= im_new.shape[0]
                                           and y2 >= im_new.shape[0]):
                index.append(0)
            elif ((x2 - x1) / im_new.shape[1] < self.small_threshold) or (
                (y2 - y1) / im_new.shape[0] < self.small_threshold):
                index.append(0)
            else:
                index.append(1)

        if sum(index) <= 0:
            return im_ori, boxes_ori, landmarks_ori, labels_ori

        #print torch.LongTensor(index).nonzero().size(),'*****'
        index = torch.LongTensor(index).nonzero().squeeze(1)
        #print 'index nonzero: ',index.nonzero().squeeze(1)
        boxes = boxes.index_select(0, index)
        landmarks = landmarks.index_select(0, index)
        labels = labels.index_select(0, index)
        h, w, _ = im_new.shape
        boxes[:, 0].clamp_(min=0, max=w)
        boxes[:, 1].clamp_(min=0, max=h)
        boxes[:, 2].clamp_(min=0, max=w)
        boxes[:, 3].clamp_(min=0, max=h)

        w = boxes[:, 2] - boxes[:, 0]
        id = torch.sum(w <= 0)
        if id > 0:
            print(boxes[:, 2] - boxes[:, 0], 'w', boxes[:, 2])
        h = boxes[:, 3] - boxes[:, 1]
        id = torch.sum(h <= 0)
        if id > 0:
            print(boxes[:, 3] - boxes[:, 1], 'h', boxes[:, 3])

        return im_new, boxes, landmarks, labels

    def random_crop(self, im, boxes, landmarks, labels):
        imh, imw, _ = im.shape
        short_size = min(imw, imh)
        while True:
            mode = random.choice([None, 0.3, 0.5, 0.7, 0.9])
            #mode = random.choice([None, 0.3])
            if 0:
                if mode is None:
                    boxes_uniform = boxes / torch.Tensor([imw, imh, imw, imh
                                                          ]).expand_as(boxes)
                    #landmarks_uniform = landmarks / torch.Tensor([imw,imh]*5)
                    boxwh = boxes_uniform[:, 2:] - boxes_uniform[:, :2]
                    mask = (boxwh[:, 0] > self.small_threshold) & (
                        boxwh[:, 1] > self.small_threshold)
                    if not mask.any():
                        #print('default image have none box bigger than small_threshold')
                        im, boxes, landmarks, labels = self.random_getim()
                        imh, imw, _ = im.shape
                        short_size = min(imw, imh)
                        continue
                    selected_boxes = boxes.index_select(
                        0,
                        mask.nonzero().squeeze(1))
                    selected_labels = labels.index_select(
                        0,
                        mask.nonzero().squeeze(1))
                    selected_landmarks = landmarks.index_select(
                        0,
                        mask.nonzero().squeeze(1))
                    return im, selected_boxes, selected_landmarks, selected_labels

            #ratio = random.randint(5,20) / 10.0
            if mode is None:
                w = short_size
                h = w
            else:
                w = random.randrange(int(0.3 * short_size), short_size)
                h = w
            for kk in range(10):
                #h = min(int(ratio*w),short_size-1)
                #print imw-w+1,imh-h+1
                x = random.randrange(imw - w + 1)
                y = random.randrange(imh - h + 1)
                roi = torch.Tensor([[x, y, x + w, y + h]])

                center = (boxes[:, :2] + boxes[:, 2:]) / 2
                roi2 = roi.expand(len(center), 4)
                mask = (center > roi2[:, :2]) & (center < roi2[:, 2:])
                mask = mask[:, 0] & mask[:, 1]
                if not mask.any():
                    #im, boxes, landmarks, labels = self.random_getim()
                    #imh, imw, _ = im.shape
                    #short_size = min(imw,imh)
                    continue

                selected_boxes = boxes.index_select(0,
                                                    mask.nonzero().squeeze(1))
                selected_landmarks = landmarks.index_select(
                    0,
                    mask.nonzero().squeeze(1))

                img = im[y:y + h, x:x + w, :]
                selected_boxes[:, 0].add_(-x).clamp_(min=0, max=w)
                selected_boxes[:, 1].add_(-y).clamp_(min=0, max=h)
                selected_boxes[:, 2].add_(-x).clamp_(min=0, max=w)
                selected_boxes[:, 3].add_(-y).clamp_(min=0, max=h)
                for k in range(5):
                    selected_landmarks[:, 2 * k].add_(-x).clamp_(min=0, max=w)
                    selected_landmarks[:, 2 * k + 1].add_(-y).clamp_(min=0,
                                                                     max=h)
                # print('croped')

                boxes_uniform = selected_boxes / torch.Tensor(
                    [w, h, w, h]).expand_as(selected_boxes)
                #landmarks_uniform = selected_landmarks / torch.Tensor([imw,imh]*5)
                boxwh = boxes_uniform[:, 2:] - boxes_uniform[:, :2]
                mask = (boxwh[:, 0] > self.small_threshold) & (
                    boxwh[:, 1] > self.small_threshold)
                if not mask.any():
                    #print('crop image have none box bigger than small_threshold')
                    #im, boxes, landmarks, labels = self.random_getim()
                    #imh, imw, _ = im.shape
                    #short_size = min(imw,imh)
                    continue
                selected_boxes_selected = selected_boxes.index_select(
                    0,
                    mask.nonzero().squeeze(1))
                selected_landmarks_selected = selected_landmarks.index_select(
                    0,
                    mask.nonzero().squeeze(1))
                selected_labels = labels.index_select(
                    0,
                    mask.nonzero().squeeze(1))
                #print 'random crop '
                return img, selected_boxes_selected, selected_landmarks_selected, selected_labels
            im, boxes, landmarks, labels = self.random_getim()
            imh, imw, _ = im.shape
            short_size = min(imw, imh)

    def my_random_bright(self, img):
        alpha = np.random.rand() + 0.5
        beta = np.random.randint(0, 3)
        image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        if beta == 0:
            image = ImageEnhance.Brightness(image).enhance(alpha)  #亮度
        elif beta == 1:
            image = ImageEnhance.Contrast(image).enhance(alpha)  #对比度
        else:
            image = ImageEnhance.Color(image).enhance(alpha)  #色度
        img = cv2.cvtColor(np.asarray(image), cv2.COLOR_RGB2BGR)
        return img

    def random_bright(self, im, delta=16):
        alpha = random.random()
        if alpha > 0.3:
            im = im * alpha + random.randrange(-delta, delta)
            im = im.clip(min=0, max=255).astype(np.uint8)
        return im

    def distort(self, image):
        def _convert(image, alpha=1, beta=0):
            tmp = image.astype(float) * alpha + beta
            tmp[tmp < 0] = 0
            tmp[tmp > 255] = 255
            image[:] = tmp

        if random.random() < 0.5:
            return image
        image = image.copy()

        if random.randrange(2):

            #brightness distortion
            if random.randrange(2):
                _convert(image, beta=random.uniform(-32, 32))

            #contrast distortion
            if random.randrange(2):
                _convert(image, alpha=random.uniform(0.5, 1.5))

            image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

            #saturation distortion
            if random.randrange(2):
                _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))

            #hue distortion
            if random.randrange(2):
                tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
                tmp %= 180
                image[:, :, 0] = tmp

            image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)

        else:

            #brightness distortion
            if random.randrange(2):
                _convert(image, beta=random.uniform(-32, 32))

            image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

            #saturation distortion
            if random.randrange(2):
                _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))

            #hue distortion
            if random.randrange(2):
                tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
                tmp %= 180
                image[:, :, 0] = tmp

            image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)

            #contrast distortion
            if random.randrange(2):
                _convert(image, alpha=random.uniform(0.5, 1.5))

        return image

    def testGet(self, idx):
        fname = self.fnames[idx]
        img = cv2.imread(os.path.join(self.root, fname))
        #cv2.imwrite('test_encoder_source.jpg', img)
        #cv2.imshow('ori img',img)
        boxes = self.boxes[idx].clone()
        landmarks = self.landmarks[idx].clone()
        # print(boxes)
        labels = self.labels[idx].clone()

        #for box in boxes:
        #    cv2.rectangle(img, (int(box[0]),int(box[1])), (int(box[2]),int(box[3])), (0,0,255))
        #cv2.imwrite(fname, img)

        if self.train:
            img = self.pad_to_square(img)
            img, boxes, landmarks = self.random_rot(img, boxes, landmarks)
            img, boxes, landmarks, labels = self.random_crop_edge(
                img, boxes, landmarks, labels)
            #img, boxes,landmarks, labels = self.random_crop(img, boxes,landmarks, labels)
            #img = self.random_bright(img)
            #img, boxes,landmarks = self.random_flip(img, boxes,landmarks)

        h, w, _ = img.shape
        boxes /= torch.Tensor([w, h, w, h]).expand_as(boxes)
        landmarks /= torch.Tensor([w, h] * 5).expand_as(landmarks)

        img = cv2.resize(img, (self.image_size, self.image_size))
        for t in self.transform:
            img = t(img)

        #print(idx, fname, boxes)

        return img, boxes, landmarks, labels