Esempio n. 1
0
    def __init__(self, root, list_file, train, transform, input_size):
        '''
        Args:
          root: (str) ditectory to images.
          list_file: (str) path to index file.
          train: (boolean) train or test.
          transform: ([transforms]) image transforms.
          input_size: (int) model input size.
        '''
        self.root = root
        self.train = train
        self.transform = transform
        self.input_size = input_size

        self.fnames = []
        self.boxes = []
        self.labels = []

        self.encoder = DataEncoder()

        with open(list_file) as f:
            lines = f.readlines()
            self.num_samples = len(lines)

        for line in lines:
            splited = line.strip().split()
            self.fnames.append(splited[0])
            num_boxes = (len(splited) - 1) // 5
            box = []
            label = []
            for i in range(num_boxes):
                xmin = splited[1+5*i]
                ymin = splited[2+5*i]
                xmax = splited[3+5*i]
                ymax = splited[4+5*i]
                c = splited[5+5*i]
                box.append([float(xmin),float(ymin),float(xmax),float(ymax)])
                label.append(int(c))
            self.boxes.append(torch.Tensor(box))
            self.labels.append(torch.LongTensor(label))
Esempio n. 2
0
class ListDataset(data.Dataset):
    def __init__(self, root, list_file, train, transform, input_size):
        '''
        Args:
          root: (str) ditectory to images.
          list_file: (str) path to index file.
          train: (boolean) train or test.
          transform: ([transforms]) image transforms.
          input_size: (int) model input size.
        '''
        self.root = root
        self.train = train
        self.transform = transform
        self.input_size = input_size

        self.fnames = []
        self.boxes = []
        self.labels = []

        self.encoder = DataEncoder()

        with open(list_file) as f:
            lines = f.readlines()
            self.num_samples = len(lines)

        for line in lines:
            splited = line.strip().split()
            self.fnames.append(splited[0])
            num_boxes = (len(splited) - 1) // 5
            box = []
            label = []
            for i in range(num_boxes):
                xmin = splited[1+5*i]
                ymin = splited[2+5*i]
                xmax = splited[3+5*i]
                ymax = splited[4+5*i]
                c = splited[5+5*i]
                box.append([float(xmin),float(ymin),float(xmax),float(ymax)])
                label.append(int(c))
            self.boxes.append(torch.Tensor(box))
            self.labels.append(torch.LongTensor(label))

    def __getitem__(self, idx):
        '''Load image.

        Args:
          idx: (int) image index.

        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and boxes.
        fname = self.fnames[idx]
        img = Image.open(os.path.join(self.root, fname))
        if img.mode != 'RGB':
            img = img.convert('RGB')

        boxes = self.boxes[idx].clone()
        labels = self.labels[idx]
        size = self.input_size

        # Data augmentation.
        if self.train:
            img, boxes = random_flip(img, boxes)
            img, boxes = random_crop(img, boxes)
            img, boxes = resize(img, boxes, (size,size))
        else:
            img, boxes = resize(img, boxes, size)
            img, boxes = center_crop(img, boxes, (size,size))

        img = self.transform(img)
        return img, boxes, labels

    def collate_fn(self, batch):
        '''Pad images and encode targets.

        As for images are of different sizes, we need to pad them to the same size.

        Args:
          batch: (list) of images, cls_targets, loc_targets.

        Returns:
          padded images, stacked cls_targets, stacked loc_targets.
        '''
        imgs = [x[0] for x in batch]
        boxes = [x[1] for x in batch]
        labels = [x[2] for x in batch]

        h = w = self.input_size
        num_imgs = len(imgs)
        inputs = torch.zeros(num_imgs, 3, h, w)

        loc_targets = []
        cls_targets = []
        for i in range(num_imgs):
            inputs[i] = imgs[i]
            loc_target, cls_target = self.encoder.encode(boxes[i], labels[i], input_size=(w,h))
            loc_targets.append(loc_target)
            cls_targets.append(cls_target)
        return inputs, torch.stack(loc_targets), torch.stack(cls_targets)

    def __len__(self):
        return self.num_samples
Esempio n. 3
0
def test_eval():
    id_net.eval()
    fnames = []
    ids = []
    ids_list = list(range(2874))
    im_name_list = []
    root = "./../face_a/train"
    encoder = DataEncoder()
    list_file = "./../face_a/train.csv"
    file_list = csv.reader(open(list_file,'r'))
    file_list = list(file_list)
    # 2874
    for content_counter in range(len(file_list)):
        fnames.append(os.path.join(root, file_list[content_counter][0]))
        ids.append(int(file_list[content_counter][1]))
    
    for id_counter in range(2874):
        seq_num = ids.index(id_counter)
        im_name_list.append(fnames[seq_num])
        del(ids[seq_num])
        del(fnames[seq_num])

    im_name_valid = fnames[:400]
    im_name_train = fnames[400:]+im_name_list
    ids_valid = ids[:400]
    ids_train = ids[400:]+ids_list

    eval_list_feature = torch.zeros(len(ids_list), 1024)
    for i in range(len(ids_list)):
        name = im_name_list[i]
        img = Image.open(name).convert('RGB')
        img = alignment(img)
        img, img_ = transform(img), transform(F.hflip(img))
        img, img_ = Variable(img.unsqueeze(0).cuda(), volatile=True), Variable(img_.unsqueeze(0).cuda(),
                                                                                  volatile=True)
        print(i)
        face_feature = torch.cat((id_net(img), id_net(img_)), 1).data.cpu()[0]
        eval_list_feature[i,:] = face_feature  
    
    id_ = []
    for i in range(len(ids_valid)):
        #pdb.set_trace()
        name = im_name_valid[i]
        img = Image.open(name).convert('RGB') 
        
        img = alignment(img)
        img, img_ = transform(img), transform(F.hflip(img))
        img, img_ = Variable(img.unsqueeze(0).cuda(), volatile=True), Variable(img_.unsqueeze(0).cuda(),
                                                                                  volatile=True)
        face_feature = torch.cat((id_net(img), id_net(img_)), 1).data.cpu()[0]
        dis = []
        for gallery_counter in range(eval_list_feature.size(0)):
            f1 = eval_list_feature[gallery_counter, :]
            f2 = face_feature
            cos_dis = f1.dot(f2) / (f1.norm() * f2.norm() + 1e-5)
            dis.append(float(cos_dis))
        id_num = dis.index(max(dis))
        id_.append(str(ids_list[id_num]))
    pdb.set_trace()
    acc_counter =0
    for id_counter in range(len(id_)):
        if id_[id_counter] == ids_valid[id_counter]:
            acc_counter +=1
    print(acc_counter/400.0)
Esempio n. 4
0
image = Image.open('IMG_3321.JPG').convert('RGB')

image = image.resize((1280, 960))
img = image.copy()
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])
print(image.size)
image = transform(image)

# forward
loc_preds = traced_script_module(image.unsqueeze(0).cuda())
loc_preds = loc_preds.argmax()
print(loc_preds)
encoder = DataEncoder()
ref_table = encoder._get_anchor_boxes(torch.Tensor([1280, 960]))

boxes = [ref_table[loc_preds]]
box = boxes[0]
print(boxes)
box[0] = (box[0] - box[2] / 2)
box[1] = (box[1] - box[3] / 2)
box[2] = (box[2] + box[0])
box[3] = (box[3] + box[1])

print(ref_table[215999])

draw = ImageDraw.Draw(img)

draw.rectangle(list(box), outline='red')
Esempio n. 5
0
    transforms.ToTensor(),
    transforms.Normalize(cfg.mean, cfg.std)
]
if cfg.scale is not None:
    train_transform_list.insert(0, transforms.Scale(cfg.scale))
train_transform = transforms.Compose(train_transform_list)
val_transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize(cfg.mean, cfg.std)])

trainset = VocLikeDataset(image_dir=cfg.image_dir,
                          annotation_dir=cfg.annotation_dir,
                          imageset_fn=cfg.train_imageset_fn,
                          image_ext=cfg.image_ext,
                          classes=cfg.classes,
                          encoder=DataEncoder(),
                          transform=train_transform)
valset = VocLikeDataset(image_dir=cfg.image_dir,
                        annotation_dir=cfg.annotation_dir,
                        imageset_fn=cfg.val_imageset_fn,
                        image_ext=cfg.image_ext,
                        classes=cfg.classes,
                        encoder=DataEncoder(),
                        transform=val_transform)
trainloader = torch.utils.data.DataLoader(trainset,
                                          batch_size=cfg.batch_size,
                                          shuffle=True,
                                          num_workers=cfg.num_workers,
                                          collate_fn=trainset.collate_fn)
valloader = torch.utils.data.DataLoader(valset,
                                        batch_size=cfg.batch_size,
Esempio n. 6
0
class BottleLoader(Dataset):
    def __init__(self,
                 dir,
                 encoder,
                 json_suffix='',
                 transform=None,
                 val=False):
        self.dir = dir
        self.encoder = DataEncoder()
        self.json_suffix = json_suffix
        self.transform = transform
        self.encoder = encoder

        files = listdir(self.dir)
        prefixes = list(
            map(lambda f: f.replace('.jpg', ''),
                filter(lambda f: '.jpg' in f, files)))
        prefixes = list(map(lambda f: path.join(self.dir, f), prefixes))

        self.impath = list(map(lambda f: f'{f}.jpg', prefixes))
        self.annotations = list(
            map(lambda f: f'{f}{self.json_suffix}.json', prefixes))

        labelset = set()
        for p in self.annotations:
            with open(p, 'r') as f:
                j = json.load(f)
            labelset = labelset.union(set(map(lambda f: f['id'], j)))
        self.label_index = dict((k, v) for v, k in enumerate(labelset))

        self.val = val

    def annotate(self, fname, imsize):
        boxes = []
        with open(fname, 'r') as f:
            groups = json.load(f)
        coords, labels = [], []
        for group in groups:
            for obj in group['data']:
                boxes.append(
                    BoundingBox(
                        obj['boundingBox']['X'],
                        obj['boundingBox']['Y'] + obj['boundingBox']['Height'],
                        obj['boundingBox']['X'] + obj['boundingBox']['Width'],
                        obj['boundingBox']['Y'], imsize[0], imsize[1],
                        self.label_index[group['id']]))
        return boxes

    def __getitem__(self, i):
        data = list(self.metadata['paths'][i])
        shape = self.metadata['shape'][i]
        img = np.array(Image.open(data[0]))
        img = resize(img, (sizeremap[shape[0]], sizeremap[shape[1]]))
        img = torch.Tensor(img.transpose(2, 0, 1))

        coords = torch.Tensor(np.stack(coords))
        labels = torch.LongTensor(
            np.array(list(map(self.metadata['label_index'].get,
                              labels)))).view(-1, 1)
        return img, coords, labels

    def __getitem__(self, index):
        impath = self.impath[index]
        annotation = self.annotations[index]
        image = Image.open(impath)
        boxes = self.annotate(annotation, image.size)
        example = {'image': image, 'boxes': boxes}
        if self.transform:
            example = self.transform(example)
        return example

    def __len__(self):
        return len(self.impath)

    def collate_fn(self, batch):
        imgs = [example['image'] for example in batch]
        boxes = [example['boxes'] for example in batch]
        labels = [example['labels'] for example in batch]
        img_sizes = [img.size()[1:] for img in imgs]

        max_h = max([im.size(1) for im in imgs])
        max_w = max([im.size(2) for im in imgs])
        num_imgs = len(imgs)
        inputs = torch.zeros(num_imgs, 3, max_h, max_w)

        loc_targets = []
        cls_targets = []
        for i in range(num_imgs):
            im = imgs[i]
            imh, imw = im.size(1), im.size(2)
            inputs[i, :, :imh, :imw] = im

            loc_target, cls_target = self.encoder.encode(boxes[i],
                                                         labels[i],
                                                         input_size=(max_w,
                                                                     max_h))
            loc_targets.append(loc_target)
            cls_targets.append(cls_target)
        if not self.val:
            return inputs, torch.stack(loc_targets), torch.stack(cls_targets)
        return inputs, img_sizes, torch.stack(loc_targets), torch.stack(
            cls_targets)
Esempio n. 7
0
class ListDataset(data.Dataset):
    def __init__(self, root, list_file, train, transform, input_size,
                 max_size):
        '''
        Args:
          root: (str) ditectory to images.
          list_file: (str) path to index file.
          train: (boolean) train or test.
          transform: ([transforms]) image transforms.
          input_size: (int) image shorter side size.
          max_size: (int) maximum image longer side size.
        '''
        self.root = root
        self.train = train
        self.transform = transform
        self.input_size = input_size
        self.max_size = max_size

        self.fnames = []
        self.boxes = []
        self.labels = []

        self.data_encoder = DataEncoder()

        with open(list_file) as f:
            lines = f.readlines()
            self.num_samples = len(lines)

        for line in lines:
            splited = line.strip().split()
            self.fnames.append(splited[0])
            num_boxes = (len(splited) - 3) // 5
            box = []
            label = []
            for i in range(num_boxes):
                xmin = splited[3 + 5 * i]
                ymin = splited[4 + 5 * i]
                xmax = splited[5 + 5 * i]
                ymax = splited[6 + 5 * i]
                c = splited[7 + 5 * i]
                box.append(
                    [float(xmin),
                     float(ymin),
                     float(xmax),
                     float(ymax)])
                label.append(int(c))
            self.boxes.append(torch.Tensor(box))
            self.labels.append(torch.LongTensor(label))

    def __getitem__(self, idx):
        '''Load image.

        Args:
          idx: (int) image index.

        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and bbox locations.
        fname = self.fnames[idx]
        img = Image.open(os.path.join(self.root, fname))
        boxes = self.boxes[idx]
        labels = self.labels[idx]

        # Data augmentation while training.
        if self.train:
            img, boxes = self.random_flip(img, boxes)

        img, im_scale = self.resize(img)
        boxes *= im_scale
        img = self.transform(img)
        return img, boxes, labels

    def resize(self, img):
        '''Resize the image shorter side to input_size.

        Args:
          img: (PIL.Image) image.

        Returns:
          (PIL.Image) resized image.
          (float) image scale.

        Reference:
          https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/utils/blob.py
        '''
        im_size_min = min(img.size)
        im_size_max = max(img.size)
        im_scale = float(self.input_size) / float(im_size_min)
        if round(im_scale * im_size_max
                 ) > self.max_size:  # limit the longer side to MAX_SIZE
            im_scale = float(self.max_size) / float(im_size_max)
        w = int(img.width * im_scale)
        h = int(img.height * im_scale)
        return img.resize((w, h)), im_scale

    def random_flip(self, img, boxes):
        '''Randomly flip the image and adjust the bbox locations.

        For bbox (xmin, ymin, xmax, ymax), the flipped bbox is:
        (w-xmax, ymin, w-xmin, ymax).

        Args:
          img: (PIL.Image) image.
          boxes: (tensor) bbox locations, sized [#obj, 4].

        Returns:
          img: (PIL.Image) randomly flipped image.
          boxes: (tensor) randomly flipped bbox locations, sized [#obj, 4].
        '''
        if random.random() < 0.5:
            img = img.transpose(Image.FLIP_LEFT_RIGHT)
            w = img.width
            xmin = w - boxes[:, 2]
            xmax = w - boxes[:, 0]
            boxes[:, 0] = xmin
            boxes[:, 2] = xmax
        return img, boxes

    def collate_fn(self, batch):
        '''Pad images and encode targets.

        As for images are of different sizes, we need to pad them to the same size.

        Args:
          batch: (list) of images, cls_targets, loc_targets.

        Returns:
          (list) of padded images, stacked cls_targets, stacked loc_targets.

        Reference:
          https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/utils/blob.py
        '''
        imgs = [x[0] for x in batch]
        boxes = [x[1] for x in batch]
        labels = [x[2] for x in batch]

        max_size, _ = torch.IntTensor([im.size() for im in imgs]).max(0)
        max_h, max_w = max_size[1], max_size[2]
        num_imgs = len(imgs)
        inputs = torch.zeros(num_imgs, 3, max_h, max_w)

        loc_targets = []
        cls_targets = []
        for i in range(num_imgs):
            im = imgs[i]
            imh, imw = im.size(1), im.size(2)
            inputs[i, :, :imh, :imw] = im

            # Encode data.
            loc_target, cls_target = self.data_encoder.encode(
                boxes[i], labels[i], input_size=(max_h, max_w))
            loc_targets.append(loc_target)
            cls_targets.append(cls_target)
        return inputs, torch.stack(loc_targets), torch.stack(cls_targets)

    def __len__(self):
        return self.num_samples
Esempio n. 8
0
class ListDataset(data.Dataset):
	img_size = InputImgSize

	def __init__(self, root, list_file, train, transform):
		'''
		Args:
		  root: (str) ditectory to images.
		  list_file: (str) path to index file.
		  train: (boolean) train or test.
		  transform: ([transforms]) image transforms.
		'''
		self.root = root
		self.train = train
		self.transform = transform

		self.fnames = []
		self.boxes = []
		self.labels = []

		self.data_encoder = DataEncoder()

		with open(list_file) as f:
			lines = f.readlines()
			self.num_samples = len(lines)

		for line in lines:
			splited = line.strip().split()
			self.fnames.append(splited[0])

			num_objs = int(splited[1])
			box = []
			label = []
			for i in range(num_objs):
				xmin = splited[2+5*i]
				ymin = splited[3+5*i]
				xmax = splited[4+5*i]
				ymax = splited[5+5*i]
				c = splited[6+5*i]
				box.append([float(xmin),float(ymin),float(xmax),float(ymax)])
				label.append(int(c))
			self.boxes.append(torch.Tensor(box))
			self.labels.append(torch.LongTensor(label))

	def __getitem__(self, idx):
		'''Load a image, and encode its bbox locations and class labels.

		Args:
		  idx: (int) image index.

		Returns:
		  img: (tensor) image tensor.
		  loc_target: (tensor) location targets, sized [8732,4].
		  conf_target: (tensor) label targets, sized [8732,].
		'''
		# Load image and bbox locations.
		fname = self.fnames[idx]
	
		###############3
		img = Image.open(os.path.join(self.root, fname)).convert('L')
		#################
		boxes = self.boxes[idx].clone()
		labels = self.labels[idx]

		# Data augmentation while training.
		#if self.train:
		#   img, boxes = self.random_flip(img, boxes)
		#  img, boxes, labels = self.random_crop(img, boxes, labels)

		# Scale bbox locaitons to [0,1].
		w,h = img.size
		boxes /= torch.Tensor([w,h,w,h]).expand_as(boxes)

		img = img.resize((self.img_size,self.img_size))
		

		img = self.transform(img)


		# Encode loc & conf targets.
		loc_target, conf_target = self.data_encoder.encode(boxes, labels)


		return img, loc_target, conf_target


	def random_crop(self, img, boxes, labels):
		'''Randomly crop the image and adjust the bbox locations.

		For more details, see 'Chapter2.2: Data augmentation' of the paper.

		Args:
		  img: (PIL.Image) image.
		  boxes: (tensor) bbox locations, sized [#obj, 4].
		  labels: (tensor) bbox labels, sized [#obj,].

		Returns:
		  img: (PIL.Image) cropped image.
		  selected_boxes: (tensor) selected bbox locations.
		  labels: (tensor) selected bbox labels.
		'''
		imw, imh = img.size
		while True:
			min_iou = random.choice([None, 0.1, 0.3, 0.5, 0.7, 0.9])
			if min_iou is None:
				return img, boxes, labels

			for _ in range(100):
				w = random.randrange(int(0.1*imw), imw)
				h = random.randrange(int(0.1*imh), imh)

				if h > 2*w or w > 2*h:
					continue

				x = random.randrange(imw - w)
				y = random.randrange(imh - h)
				roi = torch.Tensor([[x, y, x+w, y+h]])

				center = (boxes[:,:2] + boxes[:,2:]) / 2  # [N,2]
				roi2 = roi.expand(len(center), 4)  # [N,4]
				mask = (center > roi2[:,:2]) & (center < roi2[:,2:])  # [N,2]
				mask = mask[:,0] & mask[:,1]  #[N,]
				if not mask.any():
					continue

				selected_boxes = boxes.index_select(0, mask.nonzero().squeeze(1))

				iou = self.data_encoder.iou(selected_boxes, roi)
				if iou.min() < min_iou:
					continue

				img = img.crop((x, y, x+w, y+h))
				selected_boxes[:,0].add_(-x).clamp_(min=0, max=w)
				selected_boxes[:,1].add_(-y).clamp_(min=0, max=h)
				selected_boxes[:,2].add_(-x).clamp_(min=0, max=w)
				selected_boxes[:,3].add_(-y).clamp_(min=0, max=h)
				return img, selected_boxes, labels[mask]

	def __len__(self):
		return self.num_samples
Esempio n. 9
0
class ListDataset(data.Dataset):
    def __init__(self, root, list_file, train, transform, input_size):
        '''
        Args:
          root: (str) ditectory to images. ".data"
          list_file: (str) path to index file. '.data/find_star_split/find_star_train_bbx_gt.txt'
          train: (boolean) train or test.
          transform: ([transforms]) image transforms.
          input_size: (int) model input size. (800 * 800)
        '''
        self.root = root
        self.train = train
        self.transform = transform
        self.input_size = input_size

        self.fnames = [] # 存储的是image_name
        self.boxes = []
        self.labels = []

        self.encoder = DataEncoder()

        with open(list_file) as f:
            lines = f.readlines()
            self.num_samples = len(lines)

        for line in lines:
            splited = line.strip().split()
            self.fnames.append(splited[0])
            num_boxes = (len(splited) - 1) // 5
            box = []
            label = []
            for i in range(num_boxes):
                xmin = splited[1+5*i]
                ymin = splited[2+5*i]
                xmax = splited[3+5*i]
                ymax = splited[4+5*i]
                c = splited[5+5*i]
                box.append([float(xmin),float(ymin),float(xmax),float(ymax)])
                label.append(int(c))
            self.boxes.append(torch.Tensor(box))
            self.labels.append(torch.LongTensor(label))

    def __getitem__(self, idx):
        '''Load image.

        Args:
          idx: (int) image index.

        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and boxes.
        fname = self.fnames[idx]

        prefix_name = fname[:2]

        if self.train:
            image_path = self.root + '/' + prefix_name + '/' + fname
        else:
            image_path = self.root + '/' + prefix_name + '/' + fname
            
        
        # img = Image.open(os.path.join(self.root, fname))

        img_a = Image.open(image_path + '_a.jpg')
        img_b = Image.open(image_path + '_b.jpg')
        img_c = Image.open(image_path + '_c.jpg')
        img = Image.merge('RGB', (img_a, img_b, img_c))


        # if img.mode != 'RGB':
            # img = img.convert('RGB')

        boxes = self.boxes[idx].clone()
        labels = self.labels[idx]
        size = self.input_size

        # Data augmentation.
        if self.train:
            img, boxes = random_flip(img, boxes)
            img, boxes = random_crop(img, boxes)
            img, boxes = resize(img, boxes, (size,size))
        else:
            img, boxes = resize(img, boxes, size)
            img, boxes = center_crop(img, boxes, (size,size))

        img = self.transform(img)
        # if self.transforms is not None:
        #     # if img is a byte or uint8 array, it will convert from 0-255 to 0-1
        #     # this converts from (HxWxC) to (CxHxW) as well
        #     img_a, img_b, img_c = image
        #     img_a = self.transforms(img_a)
        #     img_b = self.transforms(img_b)
        #     img_c = self.transforms(img_c)
        #     img = (img_a, img_b, img_c)

        return img, boxes, labels

    def collate_fn(self, batch):
        '''Pad images and encode targets.

        As for images are of different sizes, we need to pad them to the same size.

        Args:
          batch: (list) of images, cls_targets, loc_targets.

        Returns:
          padded images, stacked cls_targets, stacked loc_targets.
        '''
        imgs = [x[0] for x in batch]
        boxes = [x[1] for x in batch]
        labels = [x[2] for x in batch]

        h = w = self.input_size
        num_imgs = len(imgs)
        inputs = torch.zeros(num_imgs, 3, h, w)

        loc_targets = []
        cls_targets = []
        for i in range(num_imgs):
            inputs[i] = imgs[i]
            loc_target, cls_target = self.encoder.encode(boxes[i], labels[i], input_size=(w,h))
            loc_targets.append(loc_target)
            cls_targets.append(cls_target)
        return inputs, torch.stack(loc_targets), torch.stack(cls_targets)

    def __len__(self):
        return self.num_samples
Esempio n. 10
0
class ListDataset(data.Dataset):
    def __init__(self,
                 list_file,
                 root=None,
                 train=True,
                 transform=None,
                 image_size=96,
                 small_threshold=5,
                 big_threshold=60,
                 setmin=6,
                 setmax=50,
                 fm_size=None,
                 ac_size=None,
                 ac_density=None,
                 stride=4,
                 offset=12):
        print('data init')
        self.image_size = image_size
        self.root = root
        self.train = train
        self.transform = transform
        self.fnames = []
        self.boxes = []
        self.labels = []
        self.small_threshold = float(
            small_threshold)  #img_48:8,45,10,40  img_36:8,36,10,35
        self.big_threshold = float(big_threshold)
        self.data_encoder = DataEncoder(img_size=image_size,
                                        fm_size=fm_size,
                                        ac_size=ac_size,
                                        ac_density=ac_density,
                                        stride=stride,
                                        offset=offset)
        self.setmin = setmin
        self.setmax = setmax

        with open(list_file) as f:
            lines = f.readlines()

        for line in lines:
            splited = line.strip().split()
            self.fnames.append(splited[0])
            num_faces = int(splited[1])
            box = []
            label = []
            for i in range(num_faces):
                x = float(splited[2 + 5 * i])
                y = float(splited[3 + 5 * i])
                w = float(splited[4 + 5 * i])
                h = float(splited[5 + 5 * i])
                c = int(splited[6 + 5 * i])
                box.append([x, y, x + w, y + h])
                label.append(c)
            self.boxes.append(torch.Tensor(box))
            self.labels.append(torch.LongTensor(label))
        self.num_samples = len(self.boxes)

    def __getitem__(self, idx):
        while True:
            fname = self.fnames[idx]
            img = cv2.imread(os.path.join(self.root + fname))
            if img is None:
                idx = random.randrange(0, self.num_samples)
                continue
            imh, imw, _ = img.shape
            boxes = self.boxes[idx].clone()
            labels = self.labels[idx].clone()
            boxwh = boxes[:, 2:] - boxes[:, :2]
            center = (boxes[:, :2] + boxes[:, 2:]) / 2.
            #			boxar = boxwh[:,0] * boxwh[:,1]
            ratio = boxwh.max(1)[0] / boxwh.min(1)[0]
            mask = (boxwh[:, 0] >=
                    self.setmin) & (boxwh[:, 1] >= self.setmin) & (
                        ratio < float(self.setmax) / self.setmin) & (
                            center[:, 0] > 0) & (center[:, 0] < imw - 1) & (
                                center[:, 1] > 0) & (center[:, 1] < imh - 1)
            if mask.any():
                break
            else:
                idx = random.randrange(0, self.num_samples)
        if self.train:
            while True:
                bbox_idx = random.randint(0, boxwh.size(0) - 1)
                #				area = boxwh[bbox_idx][0]*boxwh[bbox_idx][1]
                #				if area >= self.setmin**2:
                if mask[bbox_idx]:
                    break
#			if area > self.setmax**2:
            if max(boxwh[bbox_idx][0], boxwh[bbox_idx][1]) > self.setmax:
                oh, ow, _ = img.shape
                fct_min = self.setmin / min(boxwh[bbox_idx][0],
                                            boxwh[bbox_idx][1])
                fct_max = self.setmax / max(boxwh[bbox_idx][0],
                                            boxwh[bbox_idx][1])
                #				tgt_size = random.randint(self.setmin, self.setmax)
                #				factor = tgt_size / math.sqrt(area)
                #				factor = tgt_size / max(boxwh[bbox_idx][0], boxwh[bbox_idx][1])
                factor = random.uniform(fct_min, fct_max)
                img = cv2.resize(img, (0, 0), fx=factor, fy=factor)
                h, w, _ = img.shape
                boxes *= torch.Tensor([
                    float(w) / ow,
                    float(h) / oh,
                    float(w) / ow,
                    float(h) / oh
                ]).expand_as(boxes)
                new_center = (boxes[:, :2] + boxes[:, 2:]) / 2
                tmp = (new_center[:, 0] > 0) & (new_center[:, 0] < w) & (
                    new_center[:, 1] > 0) & (new_center[:, 1] < h)
                if not tmp.any():
                    print 'center:', center
                    print imw, imh
                    print 'new_center:', new_center
                    print w, h
                assert tmp.any()

            else:
                h, w, _ = img.shape
                center = (boxes[:, :2] + boxes[:, 2:]) / 2
                tmp = (center[:, 0] > 0) & (center[:, 0] < w - 1) & (
                    center[:, 1] > 0) & (center[:, 1] < h - 1)
                if not tmp.any():
                    print 'center:', center
                    print w, h
                assert tmp.any()

            boxwh = boxes[:, 2:] - boxes[:, :2]
            new_mask = (boxwh[:, 0] > self.small_threshold) & (
                boxwh[:, 1] > self.small_threshold) & (
                    boxwh[:, 0] < self.big_threshold) & (boxwh[:, 1] <
                                                         self.big_threshold)
            if not new_mask.any():
                print boxes
            assert new_mask.any()

            if max(h, w) < self.image_size:
                img, boxes, labels = self.supple_filter(img, boxes, labels)
            elif h >= self.image_size and w >= self.image_size:
                img, boxes, labels = self.random_crop(img, boxes, labels,
                                                      bbox_idx)
            else:
                img, boxes, labels = self.supple(img, boxes, labels)
                img, boxes, labels = self.random_crop(img, boxes, labels,
                                                      bbox_idx)
            if random.random() < 0.5:
                img = self.random_bright(img)
                img = self.random_contrast(img)
                img = self.random_saturation(img)
                img = self.random_hue(img)
            else:
                img = self.random_bright(img)
                img = self.random_saturation(img)
                img = self.random_hue(img)
                img = self.random_contrast(img)
            img, boxes = self.random_flip(img, boxes)
            boxwh = boxes[:, 2:] - boxes[:, :2]
            # print('boxwh', boxwh)

        h, w, _ = img.shape
        assert (h == w and h == self.image_size)
        #		img = cv2.resize(img,(self.image_size,self.image_size))

        boxes_wh = boxes[:, 2:] - boxes[:, :2]
        if ((boxes_wh[:, 0] == 0) | (boxes_wh[:, 1] == 0)).any():
            print boxes


#		save_path = '/home/michael/data/tmp/wider_acn/'
#		cv2.imwrite(save_path+'%d_old.jpg'%idx, img)
#		self.visual(img, boxes, idx)
#		cv2.imwrite(save_path+'%d_new.jpg'%idx, img)
#		print 'idx:', idx
#		print 'boxes:', boxes
#		print 'label:', labels
        boxes /= torch.Tensor([w, h, w, h]).expand_as(boxes)
        for t in self.transform:
            img = t(img)
        loc_target, conf_target = self.data_encoder.encode(idx, boxes, labels)

        return img, loc_target, conf_target

    def random_getim(self):
        idx = random.randrange(0, self.num_samples)
        fname = self.fnames[idx]
        img = cv2.imread(os.path.join(self.root + fname))
        boxes = self.boxes[idx].clone()
        labels = self.labels[idx]

        return img, boxes, labels

    def __len__(self):
        return self.num_samples

    def random_flip(self, im, boxes):
        if random.random() < 0.5:
            im_lr = np.fliplr(im).copy()
            h, w, _ = im.shape
            xmin = w - boxes[:, 2]
            xmax = w - boxes[:, 0]
            boxes[:, 0] = xmin
            boxes[:, 2] = xmax
            return im_lr, boxes
        return im, boxes

    def visual(self, im, boxes, idx):
        save_path = '/home/michael/data/tmp/wider_acn/%d.jpg' % idx
        for j, (box) in enumerate(boxes):
            x1 = int(box[0])
            x2 = int(box[2])
            y1 = int(box[1])
            y2 = int(box[3])
            cv2.rectangle(im, (x1, y1 + 2), (x2, y2), (0, 255, 0), 2)
        cv2.imwrite(save_path, im)

    def supple(self, im, boxes, labels):
        h, w, _ = im.shape
        im = cv2.copyMakeBorder(im,
                                0,
                                max(0, self.image_size - h),
                                0,
                                max(0, self.image_size - w),
                                cv2.BORDER_CONSTANT,
                                value=0)
        return im, boxes, labels

    def supple_filter(self, im, boxes, labels):
        h, w, _ = im.shape
        im = cv2.copyMakeBorder(im,
                                0,
                                max(0, self.image_size - h),
                                0,
                                max(0, self.image_size - w),
                                cv2.BORDER_CONSTANT,
                                value=0)
        boxwh = boxes[:, 2:] - boxes[:, :2]
        mask = (boxwh[:, 0] > self.small_threshold) & (
            boxwh[:, 1] > self.small_threshold) & (
                boxwh[:, 0] < self.big_threshold) & (boxwh[:, 1] <
                                                     self.big_threshold)
        if not mask.any():
            print boxes
        assert mask.any()
        selected_boxes = boxes.index_select(0, mask.nonzero().squeeze(1))
        selected_labels = labels.index_select(0, mask.nonzero().squeeze(1))
        return im, selected_boxes, selected_labels

    def random_crop(self, im, boxes, labels, bbox_idx):
        imh, imw, _ = im.shape
        w = self.image_size
        h = w
        tgt_box = boxes[bbox_idx]
        #print 'tgt:', tgt_box
        if tgt_box[0] <= 0 or imw == w:
            x = 0
        elif tgt_box[2] >= imw:
            x = imw - 1 - w
        else:
            x_min = int(max(0, tgt_box[2] - w))
            x_max = int(min(tgt_box[0], imw - w))
            x = random.randint(x_min, x_max)
        if tgt_box[1] <= 0 or imh == h:
            y = 0
        elif tgt_box[3] >= imh:
            y = imh - 1 - h
        else:
            y_min = int(max(0, tgt_box[3] - h))
            y_max = int(min(tgt_box[1], imh - h))
            y = random.randint(y_min, y_max)
        #print 'xy:', x, y
        roi = torch.Tensor([[x, y, x + w, y + h]])
        center = (boxes[:, :2] + boxes[:, 2:]) / 2
        roi2 = roi.expand(len(center), 4)
        mask = (center > roi2[:, :2]) & (center < roi2[:, 2:] + 1)
        mask = mask[:, 0] & mask[:, 1]
        if not mask.any():
            print 'roi:', roi
            print 'center:', center
            print 'box:', boxes
            print 'img:', imw, imh
        assert mask.any()

        selected_boxes = boxes.index_select(0, mask.nonzero().squeeze(1))
        img = im[y:y + h, x:x + w, :]
        tmph, tmpw, _ = img.shape
        if tmph != tmpw:
            print tgt_box[0], tgt_box[2], x, y, imw, imh, tmph, tmpw
        assert tmph == tmpw
        selected_boxes[:, 0].add_(-x)  #.clamp_(min=0, max=w)
        selected_boxes[:, 1].add_(-y)  #.clamp_(min=0, max=h)
        selected_boxes[:, 2].add_(-x)  #.clamp_(min=0, max=w)
        selected_boxes[:, 3].add_(-y)  #.clamp_(min=0, max=h)
        #print selected_boxes
        boxwh = selected_boxes[:, 2:] - selected_boxes[:, :2]
        mask = (boxwh[:, 0] > self.small_threshold) & (
            boxwh[:, 1] > self.small_threshold) & (
                boxwh[:, 0] < self.big_threshold) & (boxwh[:, 1] <
                                                     self.big_threshold)
        if not mask.any():
            print selected_boxes
            print 'boxes:', boxes
            print 'roi:', roi
            print 'center:', center
            print 'idx:', bbox_idx
            print 'img:', imw, imh
            cv2.imwrite('wrong.jpg', img)
        assert mask.any()

        selected_boxes_selected = selected_boxes.index_select(
            0,
            mask.nonzero().squeeze(1))

        selected_labels = labels.index_select(0, mask.nonzero().squeeze(1))

        return img, selected_boxes_selected, selected_labels

    def random_bright(self, im, delta=32):
        if random.random() > 0.5:
            im = im + random.randrange(-delta, delta)
            im = im.clip(min=0, max=255).astype(np.uint8)
        return im

    def random_contrast(self, im):
        if random.random() > 0.5:
            alpha = random.uniform(0.5, 1.5)
            im = im * alpha
            im = im.clip(min=0, max=255).astype(np.uint8)
        return im

    def random_saturation(self, im):
        if random.random() > 0.5:
            alpha = random.uniform(0.5, 1.5)
            hsv_im = cv2.cvtColor(im, cv2.COLOR_BGR2HSV)
            hsv_im = hsv_im * [1.0, alpha, 1.0]
            hsv_im = hsv_im.clip(min=0, max=255).astype(np.uint8)
            im = cv2.cvtColor(hsv_im, cv2.COLOR_HSV2BGR)
        return im

    def random_hue(self, im, delta=18):
        if random.random() > 0.5:
            alpha = random.randrange(-delta, delta)
            hsv_im = cv2.cvtColor(im, cv2.COLOR_BGR2HSV)
            hsv_im = hsv_im + [alpha, 0, 0]
            hsv_im = hsv_im.clip(min=0, max=179).astype(np.uint8)
            im = cv2.cvtColor(hsv_im, cv2.COLOR_HSV2BGR)
        return im

    def testGet(self, idx):
        fname = self.fnames[idx]
        img = cv2.imread(os.path.join(self.root, fname))
        cv2.imwrite('test_encoder_source.jpg', img)
        boxes = self.boxes[idx].clone()
        # print(boxes)
        labels = self.labels[idx].clone()

        for box in boxes:
            cv2.rectangle(img, (int(box[0]), int(box[1])),
                          (int(box[2]), int(box[3])), (0, 0, 255))
        cv2.imwrite(fname, img)

        if self.train:
            img, boxes, labels = self.random_crop(img, boxes, labels)
            img = self.random_bright(img)
            img, boxes = self.random_flip(img, boxes)

        h, w, _ = img.shape
        boxes /= torch.Tensor([w, h, w, h]).expand_as(boxes)

        img = cv2.resize(img, (self.image_size, self.image_size))
        for t in self.transform:
            img = t(img)

        print(idx, fname, boxes)

        return img, boxes, labels
Esempio n. 11
0
print("gpu available : ", torch.cuda.is_available())
print("num_gpus : ", torch.cuda.device_count())

# Set data parallel training
net = torch.nn.DataParallel(net, device_ids=[0,1,2,3])
net.cuda()

# Training
print("==>training start...")
net.train()
# Freeze BN layer for pre-trained backbone
net.module.freeze_bn()
# Set optimizer -- SGD or Adam
optimizer = optim.SGD(net.parameters(), lr=cur_lr, momentum=0.9, weight_decay=1e-4) #optim.Adam(net.parameters(), lr=cur_lr)
# Encode anchor to each feature maps
encoder = DataEncoder(cls_thresh=0.5, nms_thresh=0.2)
# Tensorboard visualize recorder
writer = SummaryWriter(logdir=args.logdir)
lossest = 1
save_lossest = False

t0 = time.time()
for epoch in range(start_epoch, 10000):
    if iteration > args.max_iter:
        break

    for inputs, loc_targets, cls_targets in trainloader:
        # prepare data and cls & loc label
        inputs = Variable(inputs.cuda())
        loc_targets = Variable(loc_targets.cuda())
        cls_targets = Variable(cls_targets.cuda())
Esempio n. 12
0
class ListDataset(data.Dataset):
    def __init__(self,
                 root,
                 dataset,
                 train,
                 transform,
                 input_size,
                 multi_scale=False):
        '''
        Args:
          root: (str) DB root ditectory.
          dataset: (str) Dataset name(dir).
          train: (boolean) train or test.
          transform: ([transforms]) image transforms.
          input_size: (int) model input size.
          multi_scale: (bool) use multi-scale training or not.
        '''
        self.root = root
        self.train = train
        self.transform = transform
        self.input_size = input_size

        self.fnames = []
        self.boxes = []
        self.labels = []

        self.multi_scale = multi_scale
        self.MULTI_SCALES = [
            608, 640, 672, 704, 736, 768, 800, 832, 864, 896, 928, 960
        ]  #step1, 2
        #self.MULTI_SCALES = [960, 992, 1024, 1056, 1088, 1120, 1152, 1184, 1216, 1248, 1280] #step3

        self.encoder = DataEncoder()

        if "SynthText" in dataset:
            self.get_SynthText()
        if "ICDAR2015" in dataset:
            self.get_ICDAR2015()
        if "MLT" in dataset:
            self.get_MLT()
        if "ICDAR2013" in dataset:
            self.get_ICDAR2013()

    def __getitem__(self, idx):
        '''Load image.

        Args:
          idx: (int) dataset index.

        Returns:
          image: (tensor) image array.
          boxes: (tensor) boxes array.
          labels: (tensor) labels array.
        '''
        # Load image, boxes and labels.
        fname = self.fnames[idx]

        img = cv2.imread(os.path.join(self.root, fname))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        boxes = self.boxes[idx].copy()
        labels = self.labels[idx]

        return {"image": img, "boxes": boxes, "labels": labels}

    def collate_fn(self, batch):
        '''bbox encode and make batch

        Args:
          batch: (dict list) images, boxes and labels

        Returns:
          batch_images, batch_loc, batch_cls
        '''
        size = self.input_size
        if self.multi_scale:  # get random input_size for multi-scale traininig
            random_choice = random.randint(0, len(self.MULTI_SCALES) - 1)
            size = self.MULTI_SCALES[random_choice]

        inputs = torch.zeros(len(batch), 3, size, size)
        loc_targets = []
        cls_targets = []

        for n, data in enumerate(batch):
            img, boxes, labels = self.transform(size=size)(data['image'],
                                                           data['boxes'],
                                                           data['labels'])
            inputs[n] = img
            loc_target, cls_target = self.encoder.encode(boxes,
                                                         labels,
                                                         input_size=(size,
                                                                     size))

            loc_targets.append(loc_target)
            cls_targets.append(cls_target)
        return inputs, torch.stack(loc_targets), torch.stack(cls_targets)

    def __len__(self):
        return self.num_samples

    def get_SynthText(self):
        import scipy.io as sio
        data_dir = os.path.join(self.root, 'SynthText/train/')

        gt = sio.loadmat(data_dir + 'gt.mat')
        dataset_size = gt['imnames'].shape[1]
        img_files = gt['imnames'][0]
        labels = gt['wordBB'][0]

        self.num_samples = dataset_size
        print("Training on SynthText : ", dataset_size)

        for i in range(dataset_size):
            img_file = data_dir + str(img_files[i][0])
            label = labels[i]

            _quad = []
            _classes = []

            if label.ndim == 3:
                for i in range(label.shape[2]):
                    _x0 = label[0][0][i]
                    _y0 = label[1][0][i]
                    _x1 = label[0][1][i]
                    _y1 = label[1][1][i]
                    _x2 = label[0][2][i]
                    _y2 = label[1][2][i]
                    _x3 = label[0][3][i]
                    _y3 = label[1][3][i]

                    _quad.append([_x0, _y0, _x1, _y1, _x2, _y2, _x3, _y3])
                    _classes.append(1)

            else:
                _x0 = label[0][0]
                _y0 = label[1][0]
                _x1 = label[0][1]
                _y1 = label[1][1]
                _x2 = label[0][2]
                _y2 = label[1][2]
                _x3 = label[0][3]
                _y3 = label[1][3]

                _quad.append([_x0, _y0, _x1, _y1, _x2, _y2, _x3, _y3])
                _classes.append(1)

            self.fnames.append(img_file)
            self.boxes.append(np.array(_quad, dtype=np.float32))
            self.labels.append(np.array(_classes))

    def get_ICDAR2015(self):
        data_dir = os.path.join(self.root, 'ICDAR2015_Incidental/')

        dataset_list = os.listdir(data_dir + "train")
        dataset_list = [l[:-4] for l in dataset_list if "jpg" in l]

        dataset_size = len(dataset_list)
        mode = 'train' if self.train else 'test'

        self.num_samples = dataset_size
        print(mode, "ing on ICDAR2015 : ", dataset_size)

        for i in dataset_list:
            img_file = data_dir + "%s/%s.jpg" % (mode, i)
            label_file = open(data_dir + "%s/gt_%s.txt" % (mode, i))
            label_file = label_file.readlines()

            _quad = []
            _classes = []

            for label in label_file:
                _x0, _y0, _x1, _y1, _x2, _y2, _x3, _y3, txt = label.split(
                    ",")[:9]

                if "###" in txt:
                    continue

                try:
                    _x0 = int(_x0)
                except:
                    _x0 = int(_x0[1:])

                _y0, _x1, _y1, _x2, _y2, _x3, _y3 = [
                    int(p) for p in [_y0, _x1, _y1, _x2, _y2, _x3, _y3]
                ]

                _quad.append([_x0, _y0, _x1, _y1, _x2, _y2, _x3, _y3])
                _classes.append(1)

            if len(_quad) is 0:
                self.num_samples -= 1
                continue
            self.fnames.append(img_file)
            self.boxes.append(np.array(_quad, dtype=np.float32))
            self.labels.append(np.array(_classes))

    def get_MLT(self):
        data_dir = os.path.join(self.root, 'MLT/')

        dataset_list = os.listdir(data_dir + "train")
        dataset_list = [l[:-4] for l in dataset_list if "jpg" in l]

        dataset_size = len(dataset_list)
        mode = 'train' if self.train else 'test'

        self.num_samples = dataset_size
        print(mode, "ing on MLT : ", dataset_size)

        for i in dataset_list:
            img_file = data_dir + "%s/%s.jpg" % (mode, i)
            label_file = open(data_dir + "%s/gt_%s.txt" % (mode, i))
            label_file = label_file.readlines()

            _quad = []
            _classes = []

            for label in label_file:
                _x0, _y0, _x1, _y1, _x2, _y2, _x3, _y3, lang, txt = label.split(
                    ",")[:10]

                if "###" in txt:
                    continue

                try:
                    _x0 = int(_x0)
                except:
                    _x0 = int(_x0[1:])

                _y0, _x1, _y1, _x2, _y2, _x3, _y3 = [
                    int(p) for p in [_y0, _x1, _y1, _x2, _y2, _x3, _y3]
                ]

                _quad.append([_x0, _y0, _x1, _y1, _x2, _y2, _x3, _y3])
                _classes.append(1)

            if len(_quad) is 0:
                self.num_samples -= 1
                continue
            self.fnames.append(img_file)
            self.boxes.append(np.array(_quad, dtype=np.float32))
            self.labels.append(np.array(_classes))

    def get_ICDAR2013(self):
        data_dir = os.path.join(self.root, 'ICDAR2013_FOCUSED/')

        dataset_list = os.listdir(data_dir + "train")
        dataset_list = [l[:-4] for l in dataset_list if "jpg" in l]

        dataset_size = len(dataset_list)
        mode = 'train' if self.train else 'test'

        self.num_samples = dataset_size
        print(mode, "ing on ICDAR2013 : ", dataset_size)

        for i in dataset_list:
            img_file = data_dir + "%s/%s.jpg" % (mode, i)
            label_file = open(data_dir + "%s/gt_%s.txt" % (mode, i))
            label_file = label_file.readlines()

            _quad = []
            _classes = []

            for label in label_file:
                _xmin, _ymin, _xmax, _ymax = label.split(" ")[:4]

                _x0 = _xmin
                _y0 = _ymin
                _x1 = _xmax
                _y1 = _ymin
                _x2 = _xmax
                _y2 = _ymax
                _x3 = _xmin
                _y3 = _ymax

                _x0, _y0, _x1, _y1, _x2, _y2, _x3, _y3 = [
                    int(p) for p in [_x0, _y0, _x1, _y1, _x2, _y2, _x3, _y3]
                ]

                _quad.append([_x0, _y0, _x1, _y1, _x2, _y2, _x3, _y3])
                _classes.append(1)

            if len(_quad) is 0:
                self.num_samples -= 1
                continue
            self.fnames.append(img_file)
            self.boxes.append(np.array(_quad, dtype=np.float32))
            self.labels.append(np.array(_classes))
Esempio n. 13
0
def prediction(bin_of_images, checkpoint_dir, minimum_idx, result_dir):
    print('Loading model..')

    if torch.cuda.is_available():
        load_pth = torch.load(checkpoint_dir + "/ckpt-" + str(minimum_idx) +
                              ".pth")
    else:
        load_pth = torch.load(checkpoint_dir + "/ckpt-" + str(minimum_idx) +
                              ".pth",
                              map_location=lambda storage, loc: storage)

    valid_loss = load_pth['loss']
    print("valid loss : " + str(valid_loss))

    num_classes = load_pth['num_classes']
    num_batch = load_pth['batch']
    num_crops = load_pth['crops']
    print("num. batch : " + str(num_batch))
    print("num. crops : " + str(num_crops))

    net = load_sstdnet(num_classes=num_classes, using_pretrained=False)
    net.load_state_dict(load_pth['net'])
    net.eval()

    transform = transforms.Compose([transforms.ToTensor()])

    for img_file in bin_of_images:
        img = Image.open(img_file)
        w = img.width
        h = img.height

        print('Predicting : ' + img_file)
        x = transform(img)
        x = x.unsqueeze(0)
        x = Variable(x, volatile=True)
        loc_preds, cls_preds, mask_pred = net(x)

        # print('Decoding..')
        encoder = DataEncoder()
        boxes, labels = encoder.decode(loc_preds.data.squeeze(),
                                       cls_preds.data.squeeze(), (w, h))

        draw = ImageDraw.Draw(img)

        img_file_name = img_file.split("/")[-1]
        txt_file_name = img_file_name.replace(".jpg", ".result")

        result_txt = open(result_dir + "/" + txt_file_name, 'w')

        for result_idx in range(0, boxes.__len__(), 1):
            draw.rectangle(list(boxes[result_idx]), outline='red')
            result_txt.write(
                str(boxes[result_idx][0]) + "\t" + str(boxes[result_idx][1]) +
                "\t" + str(boxes[result_idx][2]) + "\t" +
                str(boxes[result_idx][3]) + "\t" + str(labels[result_idx]) +
                "\n")
        result_txt.close()

        img.save(result_dir + "/" + img_file_name)

        mask_pred = F.softmax(mask_pred)
        mask_data = mask_pred.data.numpy()
        mask_data = mask_data[:, 1:2, :, :]
        mask_data = np.squeeze(mask_data)
        mask_img = Image.fromarray(np.uint8(mask_data * 255.), 'L')
        mask_img.save(result_dir + "//" +
                      img_file_name.replace(".jpg", ".png"))
Esempio n. 14
0
class ListDataset(data.Dataset):
    classes = [
        "articulated_truck", "bicycle", "bus", "car", "motorcycle",
        'motorized_vehicle', "non-motorized_vehicle", "pedestrian",
        "pickup_truck", "single_unit_truck", "work_van"
    ]
    n_class = len(classes)

    def __init__(self, root, list_file, train, transform, input_size,
                 max_size):
        '''
        Args:
          root: (str) ditectory to images.
          list_file: (str) path to index file.
          train: (boolean) train or test.
          transform: ([transforms]) image transforms.
          input_size: (int) image shorter side size.
          max_size: (int) maximum image longer side size.
        '''
        self.root = root
        self.train = train
        self.transform = transform
        self.input_size = input_size
        self.max_size = max_size

        self.fnames = []
        self.boxes = []
        self.labels = []

        self.data_encoder = DataEncoder()

        with open(list_file) as f:
            lines = f.readlines()
            self.num_samples = len(lines)
        datas = defaultdict(lambda: {'box': [], 'label': []})
        for line in lines:
            splited = line.strip().split(',')
            fname, c, xmin, ymin, xmax, ymax = splited
            lab = self.classes.index(c)
            assert lab != -1, c
            datas[fname]['box'].append(
                [float(xmin),
                 float(ymin),
                 float(xmax),
                 float(ymax)])
            datas[fname]['label'].append(lab)

        for file, vals in datas.items():
            self.fnames.append(file + '.jpg')
            self.boxes.append(torch.Tensor(vals['box']))
            self.labels.append(torch.LongTensor(vals['label']))

    def __getitem__(self, idx):
        '''Load image.

        Args:
          idx: (int) image index.

        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and boxes.
        fname = self.fnames[idx]
        img = Image.open(os.path.join(self.root, fname))
        boxes = self.boxes[idx]
        labels = self.labels[idx]

        # Data augmentation while training.
        if self.train:
            img, boxes = self.random_flip(img, boxes)
            img, boxes = self.scale_jitter(img, boxes)

        img, boxes = self.resize(img, boxes)
        img = self.transform(img)
        return img, boxes, labels

    def resize(self, img, boxes):
        '''Resize the image shorter side to input_size.

        Args:
          img: (PIL.Image) image.
          boxes: (tensor) object boxes, sized [#obj, 4].

        Returns:
          (PIL.Image) resized image.
          (tensor) resized object boxes.

        Reference:
          https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/utils/blob.py
        '''
        # im_size_min = min(img.size)
        # im_size_max = max(img.size)
        # scale = float(self.input_size) / float(im_size_min)
        # if round(scale*im_size_max) > self.max_size:  # limit the longer side to MAX_SIZE
        #     scale = float(self.max_size) / float(im_size_max)
        # w = int(img.width*scale)
        # h = int(img.height*scale)
        w = h = self.input_size
        ws = 1.0 * w / img.width
        hs = 1.0 * h / img.height

        scale = torch.Tensor([ws, hs, ws, hs])
        return img.resize((w, h)), scale * boxes

    def random_flip(self, img, boxes):
        '''Randomly flip the image and adjust the boxes.

        For box (xmin, ymin, xmax, ymax), the flipped box is:
        (w-xmax, ymin, w-xmin, ymax).

        Args:
          img: (PIL.Image) image.
          boxes: (tensor) object boxes, sized [#obj, 4].

        Returns:
          img: (PIL.Image) randomly flipped image.
          boxes: (tensor) randomly flipped boxes, sized [#obj, 4].
        '''
        if random.random() < 0.5:
            img = img.transpose(Image.FLIP_LEFT_RIGHT)
            w = img.width
            xmin = w - boxes[:, 2]
            xmax = w - boxes[:, 0]
            boxes[:, 0] = xmin
            boxes[:, 2] = xmax
        return img, boxes

    def scale_jitter(self, img, boxes):
        '''Scale image size randomly to [3/4,4/3].

        Args:
          img: (PIL.Image) image.
          boxes: (tensor) object boxes, sized [#obj, 4].

        Returns:
          img: (PIL.Image) scaled image.
          boxes: (tensor) scaled object boxes, sized [#obj, 4].
        '''
        imw, imh = img.size
        sw = random.uniform(3 / 4., 4 / 3.)
        sh = random.uniform(3 / 4., 4 / 3.)
        w = int(imw * sw)
        h = int(imh * sh)
        img = img.resize((w, h))
        boxes[:, ::2] *= sw
        boxes[:, 1::2] *= sh
        return img, boxes

    def collate_fn(self, batch):
        '''Pad images and encode targets.

        As for images are of different sizes, we need to pad them to the same size.

        Args:
          batch: (list) of images, cls_targets, loc_targets.

        Returns:
          padded images, stacked cls_targets, stacked loc_targets.

        Reference:
          https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/utils/blob.py
        '''
        imgs = [x[0] for x in batch]
        boxes = [x[1] for x in batch]
        labels = [x[2] for x in batch]

        max_h = max([im.size(1) for im in imgs])
        max_w = max([im.size(2) for im in imgs])
        num_imgs = len(imgs)
        inputs = torch.zeros(num_imgs, 3, max_h, max_w)

        loc_targets = []
        cls_targets = []
        for i in range(num_imgs):
            im = imgs[i]
            imh, imw = im.size(1), im.size(2)
            inputs[i, :, :imh, :imw] = im

            # Encode data.
            loc_target, cls_target = self.data_encoder.encode(
                boxes[i],
                labels[i],
                input_size=(max_w, max_h),
                train=self.train)
            loc_targets.append(loc_target)
            cls_targets.append(cls_target)
        return inputs, torch.stack(loc_targets), torch.stack(cls_targets)

    def __len__(self):
        return len(self.fnames)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--data', '-data', type=str, default='VOC')
    parser.add_argument('--loss_fn', '-loss', type=str, default='sigmoid')
    parser.add_argument('--epoch', '-e', type=str, default='None')
    parser.add_argument('--debug', '-d', type=str, default='False')
    parser.add_argument('--weight_path', '-w', type=str, default='None')
    args = parser.parse_args()

    scale = 600
    use_cuda = torch.cuda.is_available() 
    num_workers = os.cpu_count()
    batch_size = 1
    gpus = [0,1]
    save_path = args.weight_path
    if not os.path.exists(save_path+'/test_img/'):
        os.mkdir(save_path+'/test_img/')

    if args.debug == 'True':
        num_workers = 0
    
    transform = transforms.Compose([transforms.ToTensor(), \
            transforms.Normalize((0.485,0.456,0.406),(0.229,0.224,0.225))])

    if args.data == "VOC":
        test_root = '/media/NAS/dataset/PASCALVOC/VOCdevkit/07+12/test.txt'
        if args.loss_fn == 'sigmoid':
            voc_label = ['aeroplane','bicycle','bird','boat','bottle','bus','car',
                 'cat','chair','cow','diningtable','dog','horse','motorbike'
                 ,'person','pottedplant','sheep','sofa','train','tvmonitor',]
            num_classes = 20
        elif args.loss_fn == 'softmax':
            voc_label = ['background','aeroplane','bicycle','bird','boat','bottle','bus','car',
                 'cat','chair','cow','diningtable','dog','horse','motorbike'
                 ,'person','pottedplant','sheep','sofa','train','tvmonitor',]
            num_classes = 21
        color_label = [(  0,   0,   0),
                       (  0,   0,   0),
                       (111,  74,   0),
                       ( 81,   0,  81),
                       (128,  64, 128),
                       (244,  35, 232),
                       (230, 150, 140),
                       ( 70,  70, 700),
                       (102, 102, 156),
                       (190, 153, 153),
                       (150, 120,  90),
                       (153, 153, 153),
                       (250, 170,  30),
                       (220, 220,   0),
                       (107, 142,  35),
                       ( 52, 151,  52),
                       ( 70, 130, 180),
                       (220,  20,  60),
                       (  0,   0, 142),
                       (  0,   0, 230),
                       (119,  11,  32)]

    elif args.data == "COCO":
        test_root = '/media/NAS/dataset/COCO/minival2014/test.txt'
        if args.loss_fn == 'sigmoid':
            num_classes = 80
        elif args.loss_fn == 'softmax':
            num_classes = 81

    global device
    device = torch.device("cuda" if use_cuda else "cpu")

    print('Loading model..')
    if args.data == 'VOC':
        weights = './{}/retina_{}.pth'.format(args.weight_path,args.epoch)
    elif args.data == 'COCO':
        weights = './{}/retina_{}.pth'.format(args.weight_path,args.epoch)

    model = RetinaNet(num_classes)

    checkpoint = torch.load(weights)
    if use_cuda:
        if len(gpus) >= 1:
            model = torch.nn.DataParallel(model).to(device)
        else:
            model = model.to(device)
        model.cuda()
    model.load_state_dict(checkpoint['state_dict'])
    print('\nTest')

    with open(test_root, 'r') as file:
        lines = file.readlines()

    encoder = DataEncoder(args.loss_fn)
    model.eval()
    result = ''
    for img_idx in lines[:100]:
        img_path = img_idx.rstrip()
        labelpath = img_path.replace('images','labels').replace('JPEGImages'
                    ,'labels').replace('.jpg','.txt').replace('.png','.txt')
        img = Image.open(img_path).convert('RGB')
        label = load_label(labelpath, img)

        input_img = img.resize((scale,scale))

        input_img = transform(input_img)
        data = torch.zeros(1,3,input_img.shape[1],input_img.shape[2])
        data[0] = input_img
        inputs = data.to(device)
        loc_preds_split, cls_preds_split = model(inputs.cuda())
        loc_preds_nms, cls_preds_nms, score = encoder.decode(loc_preds_split,
                                                             cls_preds_split,
                                                             data.shape,
                                                             data[0].shape,
                                                             0)
        image_id = img_path[-10:]

        if not os.path.exists(save_path+'/test_img/val_epoch_{}'\
                        .format(args.epoch)):
            os.mkdir(save_path+'/test_img/val_epoch_{}'.format(args.epoch))

        if score.shape[0] != 0:
            box_preds = loc_preds_nms.cpu().detach().numpy().astype(int)
            box_preds = np.ndarray.tolist(box_preds)
            category_preds = cls_preds_nms.cpu().detach().numpy().astype(str)
            c = np.ndarray.tolist(category_preds)
            score_preds = score.cpu().detach().numpy().astype(str)
            score_preds = np.ndarray.tolist(score_preds)

        else:
            box_preds = []
            c = []
            score_preds = []

        new_img = cv2.imread(img_path)
        for i in range(int(label.shape[0])):
            coor_min = (int(label[i][1]), int(label[i][2]))
            coor_max = (int(label[i][3]), int(label[i][4]))
            cls = int(label[i][0])
            # cv2.rectangle(new_img, coor_min, coor_max, color_label[cls], 2)
            cv2.rectangle(new_img, coor_min, coor_max, (250,0,0), 2)
            cv2.putText(new_img, voc_label[cls] + ' | ' + 'GT', (coor_min[0]+5, coor_min[1]-5), cv2.FONT_HERSHEY_SIMPLEX, 0.2, (255, 255, 255), 1, cv2.LINE_AA)
        if len(box_preds) > 0:
            for idx, box_pred in enumerate(box_preds):
                box_pred_xmin = int(float(box_pred[0]))
                if box_pred_xmin < 0: box_pred_xmin = 0
                box_pred_ymin = int(float(box_pred[1]))
                if box_pred_ymin < 0: box_pred_ymin = 0
                box_pred_xmax = int(float(box_pred[2]))
                if box_pred_xmax < 0: box_pred_xmax = 0
                box_pred_ymax = int(float(box_pred[3]))
                if box_pred_ymax < 0: box_pred_ymax = 0
                cls_idx = int(category_preds[idx])
                box_pred_min = (int(box_pred_xmin), int(box_pred_ymin))
                box_pred_max = (int(box_pred_xmax), int(box_pred_ymax))
                box_pred_min = (int(box_pred_xmin*new_img.shape[1]/scale), int(box_pred_ymin*new_img.shape[0]/scale))
                box_pred_max = (int(box_pred_xmax*new_img.shape[1]/scale), int(box_pred_ymax*new_img.shape[0]/scale))
                cls_name = voc_label[cls_idx]
                cls_color = color_label[cls_idx]
                box_coor = (box_pred_min, box_pred_max)
                conf = score_preds[idx][:4]
                # cv2.rectangle(new_img, box_pred_min, box_pred_max, cls_color, 2)
                cv2.rectangle(new_img, box_pred_min, box_pred_max, (0,250,0), 2)
                cv2.putText(new_img, cls_name + ' | ' + conf, (box_pred_min[0]+5, box_pred_min[1]-5), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255, 255, 255), 1, cv2.LINE_AA)

        new_path = save_path+'/test_img/val_epoch_{}/'.format(args.epoch) + image_id
        cv2.imwrite(new_path, new_img)
        print(image_id)
def iter_scan(scan,
              scan_array,
              patient_df,
              net,
              cube_size=64,
              stride=50,
              iou=0.01):
    scan_df = pd.DataFrame(columns=["scan_id", "z", "y", "x", "iou"])
    start_time = time.time()
    gt_boxes, gt_labels = annotation(patient_df)
    #print(gt_boxes, gt_labels)
    ais_gt_boxes, mia_gt_boxes = split_class(gt_boxes, gt_labels)
    #print(ais_gt_boxes, mia_gt_boxes)
    ais_locs = torch.FloatTensor(1, 6)
    ais_probs = torch.FloatTensor(1)

    mia_locs = torch.FloatTensor(1, 6)
    mia_probs = torch.FloatTensor(1)

    for z in range(0, scan_array.shape[0], stride):
        for y in range(0, scan_array.shape[1], stride):
            for x in range(0, scan_array.shape[2], stride):
                start_coord = torch.FloatTensor([z, y, x])
                end_coord = start_coord + torch.FloatTensor(
                    [cube_size, cube_size, cube_size])
                zmax = min(z + cube_size, scan_array.shape[0])
                ymax = min(y + cube_size, scan_array.shape[1])
                xmax = min(x + cube_size, scan_array.shape[2])
                cube_sample = np.zeros((cube_size, cube_size, cube_size),
                                       dtype=np.float32)
                cube_sample[:(zmax - z), :(ymax -
                                           y), :(xmax -
                                                 x)] = scan_array[z:zmax,
                                                                  y:ymax,
                                                                  x:xmax]
                cube_sample = np.expand_dims(cube_sample, 0)
                cube_sample = np.expand_dims(cube_sample, 0)
                input_cube = Variable(torch.from_numpy(cube_sample).cuda())
                locs, clss = net(input_cube)
                locs = locs.data.cpu().squeeze()
                clss = clss.data.cpu().squeeze()
                ais_boxes, ais_scores, ais_labels, mia_boxes, mia_scores, mia_labels = DataEncoder(
                ).decode(locs, clss, [cube_size, cube_size, cube_size])
                if not isinstance(ais_boxes, int):
                    ais_boxes = calc_scan_coord(ais_boxes, start_coord)
                    ais_locs = torch.cat([ais_locs, ais_boxes], 0)
                    ais_probs = torch.cat([ais_probs, ais_scores], 0)

                if not isinstance(mia_boxes, int):
                    mia_boxes = calc_scan_coord(mia_boxes, start_coord)
                    mia_locs = torch.cat([mia_locs, mia_boxes], 0)
                    mia_probs = torch.cat([mia_probs, mia_scores], 0)

    end_time = time.time()
    run_time = end_time - start_time
    print(run_time)
    if not isinstance(ais_gt_boxes, int):
        ais_locs = ais_locs[1:, :]
        ais_probs = ais_probs[1:]
        ais_keep = box_nms(ais_locs, ais_probs)
        ais_locs = ais_locs[ais_keep]
        ais_probs = ais_probs[ais_keep]
        ais_count, best_ious = find_best_pred(ais_gt_boxes, ais_locs)
        ais_locs = change_box_order(ais_locs, "zyxzyx2zyxdhw")
        for i in range(ais_locs.size(0)):
            insert = {
                "scan_id": scan,
                "z": ais_locs[i, 0],
                "y": ais_locs[i, 1],
                "x": ais_locs[i, 2],
                "iou": best_ious[i]
            }
            la_df = pd.DataFrame(data=insert, index=["0"])
            scan_df = scan_df.append(la_df, ignore_index=True)

    else:
        ais_count = np.zeros(3)

    if not isinstance(mia_gt_boxes, int):
        mia_locs = mia_locs[1:, :]
        mia_probs = mia_probs[1:]
        mia_keep = box_nms(mia_locs, mia_probs)
        mia_locs = mia_locs[mia_keep]
        mia_probs = mia_probs[mia_keep]
        mia_count, best_ious = find_best_pred(mia_gt_boxes, mia_locs)
        for i in range(mia_locs.size(0)):
            insert = {
                "scan_id": scan,
                "z": mia_locs[i, 0],
                "y": mia_locs[i, 1],
                "x": mia_locs[i, 2],
                "iou": best_ious[i]
            }
            la_df = pd.DataFrame(data=insert, index=["0"])
            scan_df = scan_df.append(la_df, ignore_index=True)
    else:
        mia_count = np.zeros(3)

    return ais_count, mia_count, scan_df
Esempio n. 17
0
class SSD_Core:
    def __init__(self):

        self.dictindex = []

        with open('./label.txt') as f:
            content = f.readlines()
            for symbol in content:
                symbol = symbol.replace('\n', '')

                split = symbol.split(' ')

                self.dictindex.append(split[0])

        # Load model
        self.net = SSD300()
        checkpoint = torch.load(args.resuming_model)
        checkpoint['net']
        self.net.load_state_dict(checkpoint['net'])
        self.net.eval()

        self.data_encoder = DataEncoder()

        self.transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=(0.485, 0.456, 0.406),
                                 std=(0.229, 0.224, 0.225))
        ])

    def generatePrediction(self, imgpath, outname):

        # Load test image
        img = Image.open(imgpath).convert('L')
        img1 = img.resize((InputImgSize, InputImgSize))

        img1 = self.transform(img1)

        # Forward
        loc, conf = self.net(Variable(img1[None, :, :, :], volatile=True))

        # Decode

        boxes, labels, scores = self.data_encoder.decode(
            loc.data.squeeze(0),
            F.softmax(conf.squeeze(0)).data)

        draw = ImageDraw.Draw(img)

        return_str = 'null ' + str(len(boxes))

        boxes_np = boxes.numpy() * InputImgSize
        labels_np = labels.numpy()

        for i in range(len(boxes)):

            return_str = return_str + ' ' + str(int(
                boxes_np[i][0])) + ' ' + str(int(boxes_np[i][1])) + ' ' + str(
                    int(boxes_np[i][2])) + ' ' + str(int(
                        boxes_np[i][3])) + ' ' + str(int(labels_np[i][0]) - 1)

            boxes[i][::2] *= img.width
            boxes[i][1::2] *= img.height
            draw.rectangle(list(boxes[i]), outline='red')

            draw.text((boxes[i][0], boxes[i][1]),
                      self.dictindex[labels.numpy()[i, 0] - 1],
                      font=ImageFont.truetype("./font/arial.ttf"))
            #draw.text((boxes[i][0] * 300, boxes[i][1] * 300), dictindex[labels.numpy()[i, 0]], font=ImageFont.truetype("./font/arial.ttf"))

        img.save('./temp/' + outname)

        return return_str
Esempio n. 18
0
class ImageDataset(data.Dataset):
    def __init__(self, img_ids, img_dir, bbox_dict, has_label=True):
        self.input_size = settings.IMG_SZ
        self.img_ids = img_ids
        self.img_dir = img_dir
        self.num = len(img_ids)
        self.bbox_dict = bbox_dict
        self.has_label = has_label
        self.transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
        self.boxes = []
        self.labels = []

        self.encoder = DataEncoder()

        if has_label:
            for img_id in self.img_ids:
                box = []
                label = []
                if img_id in self.bbox_dict:
                    for x in self.bbox_dict[img_id]:
                        box.append(x[1])
                        label.append(x[0])
                else:
                    raise ValueError('No bbox: {}'.format(img_id))
                self.boxes.append(torch.Tensor(box) * self.input_size)  #
                self.labels.append(torch.LongTensor(label))  #

    def __getitem__(self, index):
        fn = os.path.join(self.img_dir, '{}.jpg'.format(self.img_ids[index]))
        img = cv2.imread(fn)
        img = self.transform(img)
        #print(get_class_names(self.labels[index]))

        if self.has_label:
            return img, self.boxes[index], self.labels[index]
        else:
            return [img]

    def __len__(self):
        return self.num

    def collate_fn(self, batch):
        """Encode targets.

        Args:
          batch: (list) of images, ids

        Returns:
          images, stacked bbox_targets, stacked clf_targets.
        """
        imgs = [x[0] for x in batch]

        if self.has_label:
            boxes = [x[1] for x in batch]
            labels = [x[2] for x in batch]

        h = w = self.input_size
        num_imgs = len(imgs)
        inputs = torch.zeros(num_imgs, 3, h, w)

        loc_targets = []
        cls_targets = []
        for i in range(num_imgs):
            inputs[i] = imgs[i]
            #print('1>>>')
            #print(boxes[i].size(), labels[i].size())
            if self.has_label:
                loc_target, cls_target = self.encoder.encode(boxes[i],
                                                             labels[i],
                                                             input_size=(w, h))
                loc_targets.append(loc_target)
                cls_targets.append(cls_target)
        if self.has_label:
            return inputs, torch.stack(loc_targets), torch.stack(cls_targets)
        else:
            return inputs
Esempio n. 19
0
class ListDataset(data.Dataset):
    def __init__(self, root, list_file, train, transform, input_size):
        '''
        Args:
          root: (str) ditectory to images.
          list_file: (str) path to index file.
          train: (boolean) train or test.
          transform: ([transforms]) image transforms.
          input_size: (int) model input size.
        '''
        self.root = root
        self.train = train
        self.transform = transform
        self.input_size = input_size

        self.fnames = []
        self.boxes = []
        self.labels = []

        self.encoder = DataEncoder()

        with open(list_file) as f:
            lines = f.readlines()
            self.num_samples = len(lines)

        for line in lines:
            splited = line.strip().split()
            self.fnames.append(splited[0])
            num_boxes = (len(splited) - 1) // 5
            box = []
            label = []
            for i in range(num_boxes):
                xmin = splited[1 + 5 * i]
                ymin = splited[2 + 5 * i]
                xmax = splited[3 + 5 * i]
                ymax = splited[4 + 5 * i]
                c = splited[5 + 5 * i]
                box.append(
                    [float(xmin),
                     float(ymin),
                     float(xmax),
                     float(ymax)])
                label.append(int(c))
            self.boxes.append(torch.Tensor(box))
            self.labels.append(torch.LongTensor(label))

    def __getitem__(self, idx):
        '''Load image.

        Args:
          idx: (int) image index.

        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and boxes.
        fname = self.fnames[idx]
        img = Image.open(os.path.join(self.root, fname))
        if img.mode != 'RGB':
            img = img.convert('RGB')

        boxes = self.boxes[idx].clone()
        labels = self.labels[idx]
        size = self.input_size

        # Data augmentation.
        if self.train:
            # img, boxes = random_flip(img, boxes)
            img, boxes = random_crop(img, boxes)
            img, boxes = resize(img, boxes, (size, size))
        else:
            img, boxes = resize(img, boxes, size)
            img, boxes = center_crop(img, boxes, (size, size))

        img = self.transform(img)
        return img, boxes, labels

    def collate_fn(self, batch):
        '''Pad images and encode targets.

        As for images are of different sizes, we need to pad them to the same size.

        Args:
          batch: (list) of images, cls_targets, loc_targets.

        Returns:
          padded images, stacked cls_targets, stacked loc_targets.
        '''
        imgs = [x[0] for x in batch]
        boxes = [x[1] for x in batch]
        labels = [x[2] for x in batch]

        h = w = self.input_size
        num_imgs = len(imgs)
        inputs = torch.zeros(num_imgs, 3, h, w)

        loc_targets = []
        cls_targets = []
        for i in range(num_imgs):
            inputs[i] = imgs[i]
            loc_target, cls_target = self.encoder.encode(boxes[i],
                                                         labels[i],
                                                         input_size=(w, h))
            loc_targets.append(loc_target)
            cls_targets.append(cls_target)
        return inputs, torch.stack(loc_targets), torch.stack(cls_targets)

    def __len__(self):
        return self.num_samples
Esempio n. 20
0
def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('--batchSz', type=int, default=1, help='batch size')
	parser.add_argument('--nEpochs', type=int, default=300, help='number of epoch to end training')
	parser.add_argument('--lr', type=float, default=1e-5, help='learning rate')
	parser.add_argument('--momentum', type=float, default=0.9)
	parser.add_argument('--wd', type=float, default=5e-4, help='weight decay')
	# parser.add_argument('--save')
	# parser.add_argument('--seed', type=int, default=1)
	parser.add_argument('--opt', type=str, default='sgd', choices=('sgd', 'adam', 'rmsprop'))
	parser.add_argument('--resume', '-r', action='store_true', help='resume from checkpoint')
	parser.add_argument('--resume_from', type=int, default=220, help='resume from which checkpoint')
	parser.add_argument('--visdom', '-v', action='store_true', help='use visdom for training visualization')
	args = parser.parse_args()

	# args.save = args.save or 'work/DSOS.base'
	# setproctitle.setproctitle(args.save)
	# if os.path.exists(args.save):
	# 	shutil.rmtree(args.save)
	# os.makedirs(args.save, exist_ok=True)

	use_cuda = torch.cuda.is_available()
	best_loss = float('inf') # best test loss
	start_epoch = 0 # start from epoch 0 for last epoch

	normMean = [0.485, 0.456, 0.406]
	normStd = [0.229, 0.224, 0.225]
	normTransform = transforms.Normalize(normMean, normStd)

	trainTransform = transforms.Compose([
		transforms.Scale((300, 300)),
		transforms.ToTensor(),
		normTransform
		])

	testTransform = transforms.Compose([
		transforms.Scale((300, 300)),
		transforms.ToTensor(),
		normTransform
		])

	# Data
	kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {}
	trainset = ListDataset(root=cfg.img_root, list_file=cfg.label_train,
		                   train=True, transform=trainTransform)
	trainLoader = DataLoader(trainset, batch_size=args.batchSz,
		                     shuffle=True, **kwargs)
	testset = ListDataset(root=cfg.img_root, list_file=cfg.label_test,
		                  train=False, transform=testTransform)
	testLoader = DataLoader(testset, batch_size=args.batchSz,
		                    shuffle=False, **kwargs)
 
	# Model
	net = DSOD(growthRate=48, reduction=1)
	if args.resume:
		print('==> Resuming from checkpoint...')
		checkpoint = torch.load('./checkpoint/ckpt_{:03d}.pth'.format(args.resume_from))
		net.load_state_dict(checkpoint['net'])
		best_loss = checkpoint['loss']
		start_epoch = checkpoint['epoch']+1
		print('Previours_epoch: {}, best_loss: {}'.format(start_epoch-1, best_loss))
	else:
		print('==> Initializing weight...')
		def init_weights(m):
			if isinstance(m, nn.Conv2d):
				init.xavier_uniform(m.weight.data)
				# m.bias.data.zero_()
		net.apply(init_weights)

	print(' + Number of params: {}'.format(
		sum([p.data.nelement() for p in net.parameters()])))
	if use_cuda:
		net = net.cuda()

	if args.opt == 'sgd':
		optimizer = optim.SGD(net.parameters(), lr=args.lr,
			                  momentum=args.momentum, weight_decay=args.wd)
	elif args.opt == 'adam':
		optimizer = optim.Adam(net.parameters(), weight_decay=args.wd)
	elif args.opt == 'rmsprop':
		optimizer = optim.RMSprop(net.parameters(), weight_decay=args.wd)

	criterion = MultiBoxLoss()

	if use_cuda:
		net.cuda()
		cudnn.benchmark = True

	if args.visdom:
		import visdom
		viz = visdom.Visdom()
		training_plot = viz.line(
			X=torch.zeros((1,)).cpu(),
			Y=torch.zeros((1, 3)).cpu(),
			opts=dict(
				xlabel='Epoch',
				ylabel='Loss',
				title='Epoch DSOD Training Loss',
				legend=['Loc Loss', 'Conf Loss', 'Loss']
				)
			)
		testing_plot = viz.line(
			X=torch.zeros((1,)).cpu(),
			Y=torch.zeros((1, 3)).cpu(),
			opts=dict(
				xlabel='Epoch',
				ylabel='Loss',
				title='Epoch DSOD Testing Loss',
				legend=['Loc Loss', 'Conf Loss', 'Loss']
				)
			)

	with open(cfg.label_test) as f:
		test_lines = f.readlines()
		num_tests = len(test_lines)

		transform = trainTransform
		transform_viz = testTransform

		data_encoder = DataEncoder()
		if args.visdom:
			testing_image = viz.image(np.ones((3, 300, 300)),
			                      opts=dict(caption='Random Testing Image'))

	# TODO: save training data on log file
	# trainF = open(os.path.join(args.save, 'train.csv'), 'w')
	# testF = open(os.path.join(args.save, 'test.csv'), 'w')

	for epoch in range(start_epoch, start_epoch+args.nEpochs+1):
		adjust_opt(args.opt, optimizer, epoch)
		train(epoch, net, trainLoader, optimizer, criterion, use_cuda, args.visdom, viz=None)
		test(epoch, net, testLoader, optimizer, criterion, use_cuda, args.visdom, viz=None)

		if epoch%10 == 0:
			state = {
			      'net': net.state_dict(),
			      'loss': test_loss,
			      'epoch': epoch
			}
			if not os.path.isdir('checkpoint'):
				os.mkdir('checkpoint')
			torch.save(state, './checkpoint/ckpt_{:03d}.pth'.format(epoch))
class VESSELBboxDataset:
    
    def __init__(self, split='trainval'):

        
        data_dir = "/media/nasir/Drive1/datasets/SAR/SAR-Ship-Dataset"
        paths = glob.glob(f'{data_dir}/JPEGImages/*.jpg')
        
        ids = [os.path.splitext(os.path.basename(x))[0] for x in paths]
        if split == 'trainval':
            self.ids = ids[0: 40000]
        else:
            self.ids = ids[40000:]
        self.input_size = 256
        self.encoder = DataEncoder()

        self.data_dir = data_dir
        self.label_names = ['ship']
        self.transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.485,0.456,0.406), (0.229,0.224,0.225))
        ])

    def __len__(self):
        return len(self.ids)

    def str2int(self, a):
        return [int(x) for x in a]

    def extract_boxes(self, fname):
        with open(fname) as f:
            content = f.readlines()
            f.close()
            content = [x.strip() for x in content]
            content = [self.str2int(x.split(' ')[-4:]) for x in content]
            return content

    def __getitem__(self, i):
        """Returns the i-th example.

        Returns a color image and bounding boxes. The image is in CHW format.
        The returned image is RGB.

        Args:
            i (int): The index of the example.

        Returns:
            tuple of an image and bounding boxes

        """
        id_ = self.ids[i]
        anno_file = os.path.join(self.data_dir, 'ground-truth', id_ + '.txt')
        # bbox = self.extract_boxes(anno_file)
        
        # label = list()
        
        
        # bbox = np.stack(bbox).astype(np.float32)
        # bb = np.ones_like(bbox).astype(np.float32)
        # for i in range(len(bbox)):
        #     label.append(0)

        # bb[:, 0] = bbox[:, 1]
        # bb[:, 1] = bbox[:, 0]
        # bb[:, 2] = bbox[:, 3] + bbox[:, 1]
        # bb[:, 3] = bbox[:, 2] + bbox[:, 0]
        # label = np.stack(label)
        
        img_file = os.path.join(self.data_dir, 'JPEGImages', id_ + '.jpg')
        img = Image.open(img_file).convert('RGB')
        img = self.transform(img)
        annot = self.load_annotations(self.extract_boxes(anno_file))
        return {'img': img, 'annot': annot}
        # return {img, torch.Tensor(bb).type(torch.float)}

    def load_annotations(self, bboxes):
        annotations     = np.zeros((0, 5))
        if len(bboxes) == 0:
            return annotations
        for idx, box in enumerate(bboxes):
            annotation        = np.zeros((1, 5))
            annotation[0, :4] = box
            annotation[0, 4]  = 0
            annotations       = np.append(annotations, annotation, axis=0)

        annotations[:, 2] = annotations[:, 0] + annotations[:, 2]
        annotations[:, 3] = annotations[:, 1] + annotations[:, 3]

        return annotations

    def collate_fn(self, batch):
        '''Pad images and encode targets.

        As for images are of different sizes, we need to pad them to the same size.

        Args:
          batch: (list) of images, cls_targets, loc_targets.

        Returns:
          padded images, stacked cls_targets, stacked loc_targets.
        '''
        imgs = [x[0] for x in batch]
        boxes = [x[1] for x in batch]
        labels = [x[2] for x in batch]

        h = w = self.input_size
        num_imgs = len(imgs)
        inputs = torch.zeros(num_imgs, 3, h, w)

        loc_targets = []
        cls_targets = []
        for i in range(num_imgs):
            inputs[i] = imgs[i]
            loc_target, cls_target = self.encoder.encode(boxes[i], labels[i], input_size=(w,h))
            loc_targets.append(loc_target)
            cls_targets.append(cls_target)
        return inputs, torch.stack(loc_targets), torch.stack(cls_targets)
Esempio n. 22
0
class jsonDataset(data.Dataset):
    def __init__(self,
                 path,
                 classes,
                 transform,
                 input_image_size,
                 num_crops,
                 fpn_level,
                 is_norm_reg_target,
                 radius,
                 view_image=False,
                 min_cols=1,
                 min_rows=1):
        '''
        Args:
          root: (str) ditectory to images.
          list_file: (str) path to index file.
          train: (boolean) train or test.
          transform: ([transforms]) image transforms.
          input_size: (int) image shorter side size.
          max_size: (int) maximum image longer side size.
        '''
        self.path = path
        self.classes = classes
        self.transform = transform
        self.input_size = input_image_size
        self.num_crops = num_crops
        self.view_img = view_image
        self.fpn_level = fpn_level
        self.is_norm_reg_target = is_norm_reg_target
        self.radius = radius

        self.fnames = list()
        self.offsets = list()
        self.boxes = list()
        self.labels = list()

        self.num_classes = len(self.classes)

        self.label_map = dict()
        self.class_idx_map = dict()
        # 0 is background class
        for idx in range(0, self.num_classes):
            self.label_map[self.classes[idx]] = idx + 1  # 0 is background
            self.class_idx_map[idx + 1] = self.classes[idx]

        self.data_encoder = DataEncoder(
            image_size=self.input_size,
            num_classes=self.num_classes + 1,
            fpn_level=self.fpn_level,
            is_norm_reg_target=self.is_norm_reg_target)

        fp_read = open(self.path, 'r')
        gt_dict = json.load(fp_read)

        all_boxes = list()
        all_labels = list()
        all_img_path = list()

        # read gt files
        for gt_key in gt_dict:
            gt_data = gt_dict[gt_key][0]

            box = list()
            label = list()

            num_boxes = len(gt_data['labels'])

            img = cv2.imread(gt_data['image_path'])
            img_rows = img.shape[0]
            img_cols = img.shape[1]

            for iter_box in range(0, num_boxes):
                xmin = gt_data['boxes'][iter_box][0]
                ymin = gt_data['boxes'][iter_box][1]
                xmax = gt_data['boxes'][iter_box][2]
                ymax = gt_data['boxes'][iter_box][3]
                rows = ymax - ymin
                cols = xmax - xmin

                if xmin < 0 or ymin < 0:
                    print('negative coordinate: [xmin: ' + str(xmin) +
                          ', ymin: ' + str(ymin) + ']')
                    print(gt_data['image_path'])
                    continue

                if xmax > img_cols or ymax > img_rows:
                    print('over maximum size: [xmax: ' + str(xmax) +
                          ', ymax: ' + str(ymax) + ']')
                    print(gt_data['image_path'])
                    continue

                if cols < min_cols:
                    print('cols is lower than ' + str(min_cols) + ': [' +
                          str(xmin) + ', ' + str(ymin) + ', ' + str(xmax) +
                          ', ' + str(ymax) + '] ' + str(gt_data['image_path']))
                    continue
                if rows < min_rows:
                    print('rows is lower than ' + str(min_rows) + ': [' +
                          str(xmin) + ', ' + str(ymin) + ', ' + str(xmax) +
                          ', ' + str(ymax) + '] ' + str(gt_data['image_path']))
                    continue

                class_name = gt_data['labels'][iter_box][0]
                if class_name not in self.label_map:
                    print('weired class name: ' + class_name)
                    print(gt_data['image_path'])
                    continue

                class_idx = self.label_map[class_name]
                box.append(
                    [float(xmin),
                     float(ymin),
                     float(xmax),
                     float(ymax)])
                label.append(int(class_idx))

            if len(box) == 0 or len(label) == 0:
                print('none of object exist in the image: ' +
                      gt_data['image_path'])
                continue

            all_boxes.append(box)
            all_labels.append(label)
            all_img_path.append(gt_data['image_path'])

        if len(all_boxes) == len(all_labels) and len(all_boxes) == len(
                all_img_path):
            num_images = len(all_img_path)
        else:
            print('num. of boxes: ' + str(len(all_boxes)))
            print('num. of labels: ' + str(len(all_labels)))
            print('num. of paths: ' + str(len(all_img_path)))
            raise ValueError(
                'num. of elements are different(all boxes, all_labels, all_img_path)'
            )

        if num_crops <= 0:
            for idx in range(0, num_images, 1):
                self.fnames.append(all_img_path[idx])
                self.boxes.append(
                    torch.tensor(all_boxes[idx], dtype=torch.float32))
                self.labels.append(
                    torch.tensor(all_labels[idx], dtype=torch.int64))
        else:
            for idx in range(0, num_images, 1):
                ori_boxes = all_boxes[idx]
                ori_labels = all_labels[idx]

                ori_img = cv2.imread(all_img_path[idx])
                img_rows = ori_img.shape[0]
                img_cols = ori_img.shape[1]

                offsets, crop_boxes, crop_labels = self._do_crop(
                    ori_img_rows=img_rows,
                    ori_img_cols=img_cols,
                    target_img_size=self.input_size,
                    boxes=ori_boxes,
                    labels=ori_labels)

                num_offsets = len(offsets)

                for idx_offset in range(0, num_offsets, 1):
                    self.fnames.append(all_img_path[idx])
                    self.offsets.append(offsets[idx_offset])
                    self.boxes.append(
                        torch.tensor(crop_boxes[idx_offset],
                                     dtype=torch.float32))
                    self.labels.append(
                        torch.tensor(crop_labels[idx_offset],
                                     dtype=torch.int64))

        self.num_samples = len(self.fnames)

    def __getitem__(self, idx):
        # Load image and boxes.
        fname = self.fnames[idx]
        boxes = self.boxes[idx]
        labels = self.labels[idx]
        img = cv2.imread(fname)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        if self.num_crops > 0:
            offset = self.offsets[idx]
            crop_rect = (int(offset[0]), int(offset[1]),
                         int(offset[0] + self.input_size[1]),
                         int(offset[1] + self.input_size[0]))

            if offset[0] < 0 or offset[1] < 0:
                raise ValueError("negative offset!")
            for box in boxes:
                if box[0] < 0 or box[1] < 0 or box[2] > self.input_size[
                        1] or box[3] > self.input_size[0]:
                    raise ValueError("negative box coordinate!")

            img = img[crop_rect[1]:crop_rect[3], crop_rect[0]:crop_rect[2]]

        bboxes = [
            bbox.tolist() + [label.item()]
            for bbox, label in zip(boxes, labels)
        ]
        augmented = self.transform(image=img, bboxes=bboxes)
        img = augmented['image']
        rows, cols = img.shape[1:]
        boxes = augmented['bboxes']
        boxes = [list(bbox) for bbox in boxes]
        labels = [bbox.pop() for bbox in boxes]

        if self.view_img is True:
            np_img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
            np_img = np_img.numpy()
            np_img = np.transpose(np_img, (1, 2, 0))
            np_img = np.uint8(np_img * 255)
            np_img = np.ascontiguousarray(np_img)
            for idx_box, box in enumerate(boxes):
                cv2.rectangle(np_img, (int(box[0]), int(box[1])),
                              (int(box[2]), int(box[3])), (0, 255, 0))
                class_idx = labels[idx_box]
                text_size = cv2.getTextSize(self.class_idx_map[class_idx],
                                            cv2.FONT_HERSHEY_PLAIN, 1, 1)
                cv2.putText(np_img, self.class_idx_map[class_idx],
                            (int(box[0]), int(box[1]) - text_size[1]),
                            cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1)

            cv2.imwrite(os.path.join("crop_test", str(idx) + ".jpg"), np_img)

        boxes = torch.tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64)

        return img, boxes, labels, fname

    def __len__(self):
        return self.num_samples

    # def _resize(self, img, boxes):
    #     if isinstance(self.input_size, int) is True:
    #         w = h = self.input_size
    #     elif isinstance(self.input_size, tuple) is True:
    #         h = self.input_size[0]
    #         w = self.input_size[1]
    #     else:
    #         raise ValueError('input size should be int or tuple of ints')
    #
    #     ws = 1.0 * w / img.shape[1]
    #     hs = 1.0 * h / img.shape[0]
    #     scale = torch.tensor([ws, hs, ws, hs], dtype=torch.float32)
    #     if boxes.numel() == 0:
    #         scaled_box = boxes
    #     else:
    #         scaled_box = scale * boxes
    #     return cv2.resize(img, (w, h)), scaled_box

    def _do_crop(self, ori_img_rows, ori_img_cols, target_img_size, boxes,
                 labels):
        num_boxes = len(boxes)
        num_labels = len(labels)

        if num_boxes != num_labels:
            print("error occur: Random crop")

        rand_indices = [0, 1, 2, 3, 4]
        np.random.shuffle(rand_indices)

        output_offsets = []
        output_boxes = []
        output_labels = []

        for box in boxes:
            # box coordinate from 1. not 0.
            xmin = box[0]
            ymin = box[1]
            xmax = box[2]
            ymax = box[3]

            width = (xmax - xmin) + 1
            height = (ymax - ymin) + 1

            if width < 0 or height < 0:
                print("negative width/height")
                continue

            for iter_crop in range(0, self.num_crops, 1):
                rand_idx = rand_indices[iter_crop]

                margin = np.random.randint(16, 128, size=1)

                # top-left
                if rand_idx == 0:
                    offset_x = xmin - 1 - margin[0]
                    offset_y = ymin - 1 - margin[0]
                    crop_maxx = offset_x + target_img_size[1]
                    crop_maxy = offset_y + target_img_size[0]

                    if crop_maxx > ori_img_cols - 1 or crop_maxy > ori_img_rows - 1:
                        continue
                    if offset_x < 0 or offset_y < 0:
                        continue

                    crop_rect = [
                        offset_x, offset_y, target_img_size[1],
                        target_img_size[0]
                    ]

                    in_boxes, in_labels = self._find_boxes_in_crop(
                        crop_rect, boxes, labels)

                    if len(in_boxes) == 0:
                        continue

                    output_offsets.append([offset_x, offset_y])
                    output_boxes.append(in_boxes)
                    output_labels.append(in_labels)
                # top-right
                elif rand_idx == 1:
                    offset_x = xmin - (target_img_size[1] -
                                       width) - 1 + margin[0]
                    offset_y = ymin - 1 - margin[0]
                    crop_maxx = offset_x + target_img_size[1]
                    crop_maxy = offset_y + target_img_size[0]

                    if crop_maxx > ori_img_cols - 1 or crop_maxy > ori_img_rows - 1:
                        continue

                    if offset_x < 0 or offset_y < 0:
                        continue

                    crop_rect = [
                        offset_x, offset_y, target_img_size[1],
                        target_img_size[0]
                    ]

                    in_boxes, in_labels = self._find_boxes_in_crop(
                        crop_rect, boxes, labels)

                    if len(in_boxes) == 0:
                        continue

                    output_offsets.append([offset_x, offset_y])
                    output_boxes.append(in_boxes)
                    output_labels.append(in_labels)
                # bottom-left
                elif rand_idx == 2:
                    offset_x = xmin - 1 - margin[0]
                    offset_y = ymin - (target_img_size[0] -
                                       height) - 1 + margin[0]
                    crop_maxx = offset_x + target_img_size[1]
                    crop_maxy = offset_y + target_img_size[0]

                    if crop_maxx > ori_img_cols - 1 or crop_maxy > ori_img_rows - 1:
                        continue

                    if offset_x < 0 or offset_y < 0:
                        continue

                    crop_rect = [
                        offset_x, offset_y, target_img_size[1],
                        target_img_size[0]
                    ]

                    in_boxes, in_labels = self._find_boxes_in_crop(
                        crop_rect, boxes, labels)

                    if len(in_boxes) == 0:
                        continue

                    output_offsets.append([offset_x, offset_y])
                    output_boxes.append(in_boxes)
                    output_labels.append(in_labels)
                # bottom-right
                elif rand_idx == 3:
                    offset_x = xmin - (target_img_size[1] -
                                       width) - 1 + margin[0]
                    offset_y = ymin - (target_img_size[0] -
                                       height) - 1 + margin[0]
                    crop_maxx = offset_x + target_img_size[1]
                    crop_maxy = offset_y + target_img_size[0]

                    if crop_maxx > ori_img_cols - 1 or crop_maxy > ori_img_rows - 1:
                        continue

                    if offset_x < 0 or offset_y < 0:
                        continue

                    crop_rect = [
                        offset_x, offset_y, target_img_size[1],
                        target_img_size[0]
                    ]

                    in_boxes, in_labels = self._find_boxes_in_crop(
                        crop_rect, boxes, labels)

                    if len(in_boxes) == 0:
                        continue

                    output_offsets.append([offset_x, offset_y])
                    output_boxes.append(in_boxes)
                    output_labels.append(in_labels)
                # center
                elif rand_idx == 4:
                    rand_direction = np.random.randint(-1, 1, size=1)

                    offset_x = (xmin - ((target_img_size[1] - width) / 2) -
                                1) + (rand_direction[0] * margin[0])
                    offset_y = (ymin - ((target_img_size[0] - height) / 2) -
                                1) + (rand_direction[0] * margin[0])
                    crop_maxx = offset_x + target_img_size[1]
                    crop_maxy = offset_y + target_img_size[0]

                    if crop_maxx > ori_img_cols - 1 or crop_maxy > ori_img_rows - 1:
                        continue

                    if offset_x < 0 or offset_y < 0:
                        continue

                    crop_rect = [
                        offset_x, offset_y, target_img_size[1],
                        target_img_size[0]
                    ]

                    in_boxes, in_labels = self._find_boxes_in_crop(
                        crop_rect, boxes, labels)

                    if len(in_boxes) == 0:
                        continue

                    output_offsets.append([offset_x, offset_y])
                    output_boxes.append(in_boxes)
                    output_labels.append(in_labels)

                else:
                    print("exceed possible crop num")

        return output_offsets, output_boxes, output_labels

    def _find_boxes_in_crop(self, crop_rect, boxes, labels):
        num_boxes = len(boxes)
        num_labels = len(labels)

        if num_boxes != num_labels:
            print("error occur: Random crop")

        boxes_in_crop = []
        labels_in_crop = []
        for idx in range(0, num_boxes, 1):
            box_in_crop, label, is_contain = self._find_box_in_crop(
                crop_rect, boxes[idx], labels[idx])

            if is_contain is True:
                boxes_in_crop.append(box_in_crop)
                labels_in_crop.append(label)

        return boxes_in_crop, labels_in_crop

    def _find_box_in_crop(self, rect, box, label):
        rect_minx = rect[0]
        rect_miny = rect[1]
        rect_width = rect[2]
        rect_height = rect[3]

        box_minx = box[0]
        box_miny = box[1]
        box_maxx = box[2]
        box_maxy = box[3]
        box_width = (box_maxx - box_minx) + 1
        box_height = (box_maxy - box_miny) + 1

        # occlusion_ratio
        occlusion_ratio = 0.3
        occlusion_width = int(box_width * occlusion_ratio) * -1
        occlusion_height = int(box_height * occlusion_ratio) * -1

        box_in_crop_minx = box_minx - rect_minx
        if box_in_crop_minx <= occlusion_width or box_in_crop_minx >= rect_width:
            box_in_rect = []
            return box_in_rect, label, False

        box_in_crop_miny = box_miny - rect_miny
        if box_in_crop_miny <= occlusion_height or box_in_crop_miny >= rect_height:
            box_in_rect = []
            return box_in_rect, label, False

        box_in_crop_maxx = box_maxx - rect_minx
        if rect_width - box_in_crop_maxx <= occlusion_width or box_in_crop_maxx <= 0:
            box_in_rect = []
            return box_in_rect, label, False

        box_in_crop_maxy = box_maxy - rect_miny
        if rect_height - box_in_crop_maxy <= occlusion_height or box_in_crop_maxy <= 0:
            box_in_rect = []
            return box_in_rect, label, False

        if box_in_crop_minx < 0:
            box_in_crop_minx = 0
        if box_in_crop_miny < 0:
            box_in_crop_miny = 0
        if rect_width - box_in_crop_maxx < 0:
            box_in_crop_maxx = rect_width - 1
        if rect_height - box_in_crop_maxy < 0:
            box_in_crop_maxy = rect_height - 1

        box_in_rect = [
            box_in_crop_minx, box_in_crop_miny, box_in_crop_maxx,
            box_in_crop_maxy
        ]
        return box_in_rect, label, True

    def collate_fn(self, batch):
        imgs = [x[0] for x in batch]
        boxes = [x[1] for x in batch]
        labels = [x[2] for x in batch]
        paths = [x[3] for x in batch]

        num_imgs = len(imgs)

        if isinstance(self.input_size, int) is True:
            inputs = torch.zeros(
                [num_imgs, 3, self.input_size, self.input_size],
                dtype=torch.float32)
        elif isinstance(self.input_size, tuple) is True:
            inputs = torch.zeros(
                [num_imgs, 3, self.input_size[0], self.input_size[1]],
                dtype=torch.float32)
        else:
            raise ValueError('input size should be int or tuple of ints')

        loc_targets = list()
        cls_targets = list()
        center_targets = list()

        for i in range(num_imgs):
            im = imgs[i]
            imh, imw = im.size(1), im.size(2)
            inputs[i, :, :imh, :imw] = im

            # Encode data.
            loc_target, cls_target, center_target = self.data_encoder.encode(
                boxes[i], labels[i], radius=self.radius)

            loc_targets.append(loc_target)
            cls_targets.append(cls_target)
            center_targets.append(center_target)

        return inputs, \
               torch.stack(loc_targets, dim=0), \
               torch.stack(cls_targets, dim=0), \
               torch.stack(center_targets, dim=0), \
               paths
Esempio n. 23
0
print('Loading model..')
net = RetinaNet()
net.load_state_dict(torch.load('./checkpoint/params.pth'))
net.eval()

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

print('Loading image..')
img = Image.open('./image/000001.jpg')
w = h = 600
img = img.resize((w, h))

print('Predicting..')
x = transform(img)
x = x.unsqueeze(0)
x = Variable(x, volatile=True)
loc_preds, cls_preds = net(x)

print('Decoding..')
encoder = DataEncoder()
boxes, labels = encoder.decode(loc_preds.data.squeeze(),
                               cls_preds.data.squeeze(), (w, h))

draw = ImageDraw.Draw(img)
for box in boxes:
    draw.rectangle(list(box), outline='red')
img.show()
Esempio n. 24
0
    def __init__(self,
                 path,
                 classes,
                 transform,
                 input_image_size,
                 num_crops,
                 fpn_level,
                 is_norm_reg_target,
                 radius,
                 view_image=False,
                 min_cols=1,
                 min_rows=1):
        '''
        Args:
          root: (str) ditectory to images.
          list_file: (str) path to index file.
          train: (boolean) train or test.
          transform: ([transforms]) image transforms.
          input_size: (int) image shorter side size.
          max_size: (int) maximum image longer side size.
        '''
        self.path = path
        self.classes = classes
        self.transform = transform
        self.input_size = input_image_size
        self.num_crops = num_crops
        self.view_img = view_image
        self.fpn_level = fpn_level
        self.is_norm_reg_target = is_norm_reg_target
        self.radius = radius

        self.fnames = list()
        self.offsets = list()
        self.boxes = list()
        self.labels = list()

        self.num_classes = len(self.classes)

        self.label_map = dict()
        self.class_idx_map = dict()
        # 0 is background class
        for idx in range(0, self.num_classes):
            self.label_map[self.classes[idx]] = idx + 1  # 0 is background
            self.class_idx_map[idx + 1] = self.classes[idx]

        self.data_encoder = DataEncoder(
            image_size=self.input_size,
            num_classes=self.num_classes + 1,
            fpn_level=self.fpn_level,
            is_norm_reg_target=self.is_norm_reg_target)

        fp_read = open(self.path, 'r')
        gt_dict = json.load(fp_read)

        all_boxes = list()
        all_labels = list()
        all_img_path = list()

        # read gt files
        for gt_key in gt_dict:
            gt_data = gt_dict[gt_key][0]

            box = list()
            label = list()

            num_boxes = len(gt_data['labels'])

            img = cv2.imread(gt_data['image_path'])
            img_rows = img.shape[0]
            img_cols = img.shape[1]

            for iter_box in range(0, num_boxes):
                xmin = gt_data['boxes'][iter_box][0]
                ymin = gt_data['boxes'][iter_box][1]
                xmax = gt_data['boxes'][iter_box][2]
                ymax = gt_data['boxes'][iter_box][3]
                rows = ymax - ymin
                cols = xmax - xmin

                if xmin < 0 or ymin < 0:
                    print('negative coordinate: [xmin: ' + str(xmin) +
                          ', ymin: ' + str(ymin) + ']')
                    print(gt_data['image_path'])
                    continue

                if xmax > img_cols or ymax > img_rows:
                    print('over maximum size: [xmax: ' + str(xmax) +
                          ', ymax: ' + str(ymax) + ']')
                    print(gt_data['image_path'])
                    continue

                if cols < min_cols:
                    print('cols is lower than ' + str(min_cols) + ': [' +
                          str(xmin) + ', ' + str(ymin) + ', ' + str(xmax) +
                          ', ' + str(ymax) + '] ' + str(gt_data['image_path']))
                    continue
                if rows < min_rows:
                    print('rows is lower than ' + str(min_rows) + ': [' +
                          str(xmin) + ', ' + str(ymin) + ', ' + str(xmax) +
                          ', ' + str(ymax) + '] ' + str(gt_data['image_path']))
                    continue

                class_name = gt_data['labels'][iter_box][0]
                if class_name not in self.label_map:
                    print('weired class name: ' + class_name)
                    print(gt_data['image_path'])
                    continue

                class_idx = self.label_map[class_name]
                box.append(
                    [float(xmin),
                     float(ymin),
                     float(xmax),
                     float(ymax)])
                label.append(int(class_idx))

            if len(box) == 0 or len(label) == 0:
                print('none of object exist in the image: ' +
                      gt_data['image_path'])
                continue

            all_boxes.append(box)
            all_labels.append(label)
            all_img_path.append(gt_data['image_path'])

        if len(all_boxes) == len(all_labels) and len(all_boxes) == len(
                all_img_path):
            num_images = len(all_img_path)
        else:
            print('num. of boxes: ' + str(len(all_boxes)))
            print('num. of labels: ' + str(len(all_labels)))
            print('num. of paths: ' + str(len(all_img_path)))
            raise ValueError(
                'num. of elements are different(all boxes, all_labels, all_img_path)'
            )

        if num_crops <= 0:
            for idx in range(0, num_images, 1):
                self.fnames.append(all_img_path[idx])
                self.boxes.append(
                    torch.tensor(all_boxes[idx], dtype=torch.float32))
                self.labels.append(
                    torch.tensor(all_labels[idx], dtype=torch.int64))
        else:
            for idx in range(0, num_images, 1):
                ori_boxes = all_boxes[idx]
                ori_labels = all_labels[idx]

                ori_img = cv2.imread(all_img_path[idx])
                img_rows = ori_img.shape[0]
                img_cols = ori_img.shape[1]

                offsets, crop_boxes, crop_labels = self._do_crop(
                    ori_img_rows=img_rows,
                    ori_img_cols=img_cols,
                    target_img_size=self.input_size,
                    boxes=ori_boxes,
                    labels=ori_labels)

                num_offsets = len(offsets)

                for idx_offset in range(0, num_offsets, 1):
                    self.fnames.append(all_img_path[idx])
                    self.offsets.append(offsets[idx_offset])
                    self.boxes.append(
                        torch.tensor(crop_boxes[idx_offset],
                                     dtype=torch.float32))
                    self.labels.append(
                        torch.tensor(crop_labels[idx_offset],
                                     dtype=torch.int64))

        self.num_samples = len(self.fnames)
Esempio n. 25
0
class ListDataset(data.Dataset):
    img_size = 300

    def __init__(self, root, list_file, train, transform):
        '''
        Args:
          root: (str) ditectory to images.
          list_file: (str) path to annotation files.
          train: (boolean) train or test.
          transform: ([transforms]) image transforms.
        '''
        self.root = root
        self.train = train
        self.transform = transform

        self.fnames = []
        self.boxes = []
        self.labels = []

        self.data_encoder = DataEncoder()
        self.num_samples = 0

        for i in os.listdir(list_file):
            self.num_samples += 1
            self.fnames.append(i)
            box = []
            labels = []
            with open(os.path.join(list_file, i)) as f:
                f = f.read().split("\n")
                f = f[:-1]
            num_objs = len(f)

            for j in range(num_objs):
                f[j] = f[j].split(",")
                xmin = float(f[j][0])
                ymin = float(f[j][1])
                w = float(f[j][2])
                h = float(f[j][3])

                box.append([xmin, ymin, xmin + h, ymin + h])
                labels.append(int(f[j][5]))

            self.boxes.append(torch.Tensor(box))
            self.labels.append(torch.LongTensor(labels))

    def __getitem__(self, idx):
        '''Load a image, and encode its bbox locations and class labels.
        Args:
          idx: (int) image index.
        Returns:
          img: (tensor) image tensor.
          loc_target: (tensor) location targets, sized [8732,4].
          conf_target: (tensor) label targets, sized [8732,].
        '''
        # Load image and bbox locations.
        fname = self.fnames[idx]
        img = cv2.imread(os.path.join(self.root, fname[:-4] + ".jpg"))
        boxes = self.boxes[idx].clone()
        labels = self.labels[idx]

        # Data augmentation while training.
        if self.train:
            img, boxes = self.random_flip(img, boxes)
            img, boxes, labels = self.random_crop(img, boxes, labels)

        # Scale bbox locaitons to [0,1].
        w, h = img.shape[1], img.shape[0]
        boxes /= torch.Tensor([w, h, w, h]).expand_as(boxes)
        img = cv2.resize(img, (self.img_size, self.img_size))
        img = self.transform(img)

        # Encode loc & conf targets.

        loc_target, conf_target = self.data_encoder.encode(boxes, labels)
        return img, loc_target, conf_target

    def random_flip(self, img, boxes):
        '''Randomly flip the image and adjust the bbox locations.
        For bbox (xmin, ymin, xmax, ymax), the flipped bbox is:
        (w-xmax, ymin, w-xmin, ymax).
        Args:
          img: (ndarray.Image) image. f
          boxes: (tensor) bbox locations, sized [#obj, 4].
        Returns:
          img: (ndarray.Image) randomly flipped image.
          boxes: (tensor) randomly flipped bbox locations, sized [#obj, 4].
        '''
        if random.random() < 0.5:
            img = cv2.flip(img, 1)
            w = img.shape[1]
            xmin = w - boxes[:, 2]
            xmax = w - boxes[:, 0]
            boxes[:, 0] = xmin
            boxes[:, 2] = xmax
        return img, boxes

    def random_crop(self, img, boxes, labels):
        '''Randomly crop the image and adjust the bbox locations.
        For more details, see 'Chapter2.2: Data augmentation' of the paper.
        Args:
          img: (ndarray.Image) image.
          boxes: (tensor) bbox locations, sized [#obj, 4].
          labels: (tensor) bbox labels, sized [#obj,].
        Returns:
          img: (ndarray.Image) cropped image.
          selected_boxes: (tensor) selected bbox locations.
          labels: (tensor) selected bbox labels.
        '''
        imw, imh = img.shape[1], img.shape[0]
        while True:
            min_iou = random.choice([None, 0.1, 0.3, 0.5, 0.7,
                                     0.9])  # random choice the one
            if min_iou is None:
                return img, boxes, labels

            for _ in range(100):
                w = random.randrange(int(0.1 * imw), imw)
                h = random.randrange(int(0.1 * imh), imh)

                if h > 2 * w or w > 2 * h or h < 1 or w < 1:
                    continue

                x = random.randrange(imw - w)
                y = random.randrange(imh - h)
                roi = torch.Tensor([[x, y, x + w, y + h]])

                center = (boxes[:, :2] + boxes[:, 2:]) / 2  # [N,2]
                roi2 = roi.expand(len(center), 4)  # [N,4]

                mask = (center > roi2[:, :2]) & (center < roi2[:, 2:])  # [N,2]
                mask = mask[:, 0] & mask[:, 1]  #[N,]

                if not mask.any():
                    continue

                selected_boxes = boxes.index_select(0,
                                                    mask.nonzero().squeeze(1))

                iou = self.data_encoder.iou(selected_boxes, roi)
                if iou.min() < min_iou:
                    continue
                img = img[y:y + h, x:x + w, :]

                selected_boxes[:, 0].add_(-x).clamp_(min=0, max=w)
                selected_boxes[:, 1].add_(-y).clamp_(min=0, max=h)
                selected_boxes[:, 2].add_(-x).clamp_(min=0, max=w)
                selected_boxes[:, 3].add_(-y).clamp_(min=0, max=h)

                return img, selected_boxes, labels[mask]

    def __len__(self):
        return self.num_samples
Esempio n. 26
0
class ListDataset(data.Dataset):
    def __init__(self, root, train, transform, input_size):
        '''
        Args:
          root: (str) ditectory to images.
          train: (boolean) train or test.
          transform: ([transforms]) image transforms.
          input_size: (int) model input size.
        '''
        self.root = root
        self.train = train
        self.transform = transform
        self.input_size = input_size

        self.fnames = []
        self.boxes = []
        self.labels = []

        self.encoder = DataEncoder()

        self._labpath = sorted(glob.glob("%s/*.*" % self.root))
        self._imgpath = [
            path.replace("labels", "image").replace(".txt", ".jpg")
            for path in self._labpath
        ]

    def __getitem__(self, index):
        '''Load image.

        Args:
          idx: (int) image index.

        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and boxes.

        img_path = self._imgpath[index].rstrip()
        fname = img_path.split('/')[-1].split('.')[0]

        # print(img_path)
        img = Image.open(img_path)
        if img.mode != 'RGB':
            img = img.convert('RGB')

        label_path = self._labpath[index].rstrip()
        # print(label_path)

        targets = np.loadtxt(label_path).reshape(-1, 5)
        # targets = np.array(targets)
        # print(targets)
        boxes = torch.Tensor(targets[:, 1:])
        labels = torch.LongTensor(targets[:, 0])

        size = self.input_size

        # Data augmentation.
        if self.train:
            img, boxes = random_flip(img, boxes)
            img, boxes = random_crop(img, boxes)
            img, boxes = resize(img, boxes, (size, size))
        else:
            img, boxes = resize(img, boxes, (size, size))
            # img, boxes = center_crop(img, boxes, (size,size))

        img = self.transform(img)
        return img, boxes, labels, fname

    def collate_fn(self, batch):
        '''Pad images and encode targets.

        As for images are of different sizes, we need to pad them to the same size.

        Args:
          batch: (list) of images, cls_targets, loc_targets.

        Returns:
          padded images, stacked cls_targets, stacked loc_targets.
        '''
        imgs = [x[0] for x in batch]
        boxes = [x[1] for x in batch]
        labels = [x[2] for x in batch]
        fname = [x[3] for x in batch]

        h = w = self.input_size
        num_imgs = len(imgs)
        # print(num_imgs)
        inputs = torch.zeros(num_imgs, 3, h, w)

        loc_targets = []
        cls_targets = []
        for i in range(num_imgs):
            inputs[i] = imgs[i]
            loc_target, cls_target = self.encoder.encode(boxes[i],
                                                         labels[i],
                                                         input_size=(w, h))
            loc_targets.append(loc_target)
            cls_targets.append(cls_target)
        return inputs, torch.stack(loc_targets), torch.stack(
            cls_targets), fname

        # return inputs, boxes, labels

    def __len__(self):
        return len(self._labpath)
Esempio n. 27
0
from torch.autograd import Variable
from encoder import DataEncoder
import arcface_loss2
from cosface_loss import MarginCosineProduct

cudnn.benchmark = True

id_net = Idnet(classnum=2874)
id_net = torch.nn.DataParallel(id_net, device_ids=[0])
id_net.load_state_dict(torch.load("./arcface_id_net-data_addition-epoch-20-acc0.pth"))
id_net.cuda()

#net.load_state_dict(torch.load("./trained model/originalFAN_model.pth"))
#net.eval()
coder = DataEncoder()

detector = dlib.get_frontal_face_detector()
predicter_path = "./model/shape_predictor_5_face_landmarks.dat"
sp = dlib.shape_predictor(predicter_path)


transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.485,0.456,0.406), (0.229,0.224,0.225))
])

def KFold(n=6000, n_folds=10):
    folds = []
    base = list(range(n))
    for i in range(n_folds):
Esempio n. 28
0
def train():
    args = parse_args()

    assert torch.cuda.is_available(), 'Error: CUDA not found!'
    assert args.focal_loss, "OHEM + ce_loss is not working... :("

    if not os.path.exists(args.save_folder):
        os.mkdir(args.save_folder)

    if not os.path.exists(args.logdir):
        os.mkdir(args.logdir)

    ###########################################################################
    # Data
    ###########################################################################

    print('==> Preparing data..')
    trainset = ListDataset(root='/mnt/9C5E1A4D5E1A2116/datasets/',
                           dataset=args.dataset,
                           train=True,
                           transform=Augmentation_traininig,
                           input_size=args.input_size,
                           multi_scale=args.multi_scale)
    trainloader = torch.utils.data.DataLoader(trainset,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              num_workers=args.num_workers,
                                              collate_fn=trainset.collate_fn)

    ###########################################################################

    # Training Detail option\
    stepvalues = (10000, 20000, 30000, 40000, 50000) if args.dataset in ["SynthText"] \
        else (2000, 4000, 6000, 8000, 10000)
    best_loss = float('inf')  # best test loss
    start_epoch = 0  # start from epoch 0 or last epoch
    iteration = 0
    cur_lr = args.lr
    mean = (0.485, 0.456, 0.406)
    var = (0.229, 0.224, 0.225)
    step_index = 0
    pEval = None

    ###########################################################################
    # Model
    ###########################################################################

    # set model (focal_loss vs OHEM_CE loss)
    if args.focal_loss:
        imagenet_pretrain = 'weights/retinanet_se50.pth'
        criterion = FocalLoss()
        num_classes = 1
    else:
        imagenet_pretrain = 'weights/retinanet_se50_OHEM.pth'
        criterion = OHEM_loss()
        num_classes = 2

    net = RetinaNet(num_classes)

    # Restore model weights
    net.load_state_dict(torch.load(imagenet_pretrain))

    if args.resume:
        print('==> Resuming from checkpoint..', args.resume)
        checkpoint = torch.load(args.resume)
        net.load_state_dict(checkpoint['net'])
        #start_epoch = checkpoint['epoch']
        #iteration = checkpoint['iteration']
        #cur_lr = checkpoint['lr']
        #step_index = checkpoint['step_index']
        # optimizer.load_state_dict(state["optimizer"])

    print("multi_scale : ", args.multi_scale)
    print("input_size : ", args.input_size)
    print("stepvalues : ", stepvalues)
    print("start_epoch : ", start_epoch)
    print("iteration : ", iteration)
    print("cur_lr : ", cur_lr)
    print("step_index : ", step_index)
    print("num_gpus : ", torch.cuda.device_count())

    # Data parellelism for multi-gpu training
    net = torch.nn.DataParallel(net,
                                device_ids=range(torch.cuda.device_count()))
    net.cuda()

    # Put model in training mode and freeze batch norm.
    net.train()
    net.module.freeze_bn()  # you must freeze batchnorm

    ###########################################################################
    # Optimizer
    ###########################################################################

    optimizer = optim.SGD(net.parameters(),
                          lr=cur_lr,
                          momentum=0.9,
                          weight_decay=1e-4)
    #optimizer = optim.Adam(net.parameters(), lr=cur_lr)

    ###########################################################################
    # Utils
    ###########################################################################

    encoder = DataEncoder()
    writer = SummaryWriter(log_dir=args.logdir)

    ###########################################################################
    # Training loop
    ###########################################################################

    t0 = time.time()
    for epoch in range(start_epoch, 10000):
        if iteration > args.max_iter:
            break

        for inputs, loc_targets, cls_targets in trainloader:
            inputs = Variable(inputs.cuda())
            loc_targets = Variable(loc_targets.cuda())
            cls_targets = Variable(cls_targets.cuda())

            optimizer.zero_grad()
            loc_preds, cls_preds = net(inputs)

            loc_loss, cls_loss = criterion(loc_preds, loc_targets, cls_preds,
                                           cls_targets)
            loss = loc_loss + cls_loss
            loss.backward()
            optimizer.step()

            if iteration % 20 == 0:
                t1 = time.time()

                print(
                    'iter ' + repr(iteration) + ' (epoch ' + repr(epoch) +
                    ') || loss: %.4f || l loc_loss: %.4f || l cls_loss: %.4f (Time : %.1f)'
                    % (loss.sum().item(), loc_loss.sum().item(),
                       cls_loss.sum().item(), (t1 - t0)))
                # t0 = time.time()

                writer.add_scalar('loc_loss', loc_loss.sum().item(), iteration)
                writer.add_scalar('cls_loss', cls_loss.sum().item(), iteration)
                writer.add_scalar('loss', loss.sum().item(), iteration)

                # show inference image in tensorboard
                infer_img = np.transpose(inputs[0].cpu().numpy(), (1, 2, 0))
                infer_img *= var
                infer_img += mean
                infer_img *= 255.
                infer_img = np.clip(infer_img, 0, 255)
                infer_img = infer_img.astype(np.uint8)
                h, w, _ = infer_img.shape

                boxes, labels, scores = encoder.decode(loc_preds[0],
                                                       cls_preds[0], (w, h))
                boxes = boxes.reshape(-1, 4, 2).astype(np.int32)

                if boxes.shape[0] != 0:
                    # infer_img = infer_img/np.float32(255)

                    # print(boxes)
                    # print(
                    #     f"infer_img prior to cv2.polylines - dtype: {infer_img.dtype}, shape: {infer_img.shape}, min: {infer_img.min()}, max: {infer_img.max()}")
                    # print(
                    #     f"boxes prior to cv2.polylines - dtype: {boxes.dtype}, shape: {boxes.shape}, min: {boxes.min()}, max: {boxes.max()}")
                    infer_img = cv2.polylines(infer_img.copy(), boxes, True,
                                              (0, 255, 0), 4)

                # print(
                #     f"infer_img - dtype: {infer_img.dtype}, shape: {infer_img.shape}, min: {infer_img.min()}, max: {infer_img.max()}")

                writer.add_image('image',
                                 infer_img,
                                 iteration,
                                 dataformats="HWC")
                writer.add_scalar('input_size', h, iteration)
                writer.add_scalar('learning_rate', cur_lr, iteration)

                t0 = time.time()

            if iteration % args.save_interval == 0 and iteration > 0:
                print('Saving state, iter : ', iteration)
                state = {
                    'net': net.module.state_dict(),
                    "optimizer": optimizer.state_dict(),
                    'iteration': iteration,
                    'epoch': epoch,
                    'lr': cur_lr,
                    'step_index': step_index
                }
                model_file = args.save_folder + \
                    'ckpt_' + repr(iteration) + '.pth'
                torch.save(state, model_file)

            if iteration in stepvalues:
                step_index += 1
                cur_lr = adjust_learning_rate(cur_lr, optimizer, args.gamma,
                                              step_index)

            if iteration > args.max_iter:
                break

            if args.evaluation and iteration % args.eval_step == 0:
                try:
                    if pEval is None:
                        print("Evaluation started at iteration {} on IC15...".
                              format(iteration))
                        eval_cmd = "CUDA_VISIBLE_DEVICES=" + str(args.eval_device) + \
                            " python eval.py" + \
                            " --tune_from=" + args.save_folder + 'ckpt_' + repr(iteration) + '.pth' + \
                            " --input_size=1024" + \
                            " --output_zip=result_temp1"

                        pEval = Popen(eval_cmd,
                                      shell=True,
                                      stdout=PIPE,
                                      stderr=PIPE)

                    elif pEval.poll() is not None:
                        (scorestring, stderrdata) = pEval.communicate()

                        hmean = float(
                            str(scorestring).strip().split(":")[3].split(",")
                            [0].split("}")[0].strip())

                        writer.add_scalar('test_hmean', hmean, iteration)

                        print("test_hmean for {}-th iter : {:.4f}".format(
                            iteration, hmean))

                        if pEval is not None:
                            pEval.kill()
                        pEval = None

                except Exception as e:
                    print("exception happened in evaluation ", e)
                    if pEval is not None:
                        pEval.kill()
                    pEval = None

            iteration += 1
Esempio n. 29
0
 img = Image.open(image_path).convert('RGB')
 img1 = img.resize((300, 300))
 transform = transforms.Compose([
     transforms.ToTensor(),
     transforms.Normalize(mean=(0.485, 0.456, 0.406),
                          std=(0.229, 0.224, 0.225))
 ])
 img1 = transform(img1)
 if use_cuda:
     img1 = img1.cuda()
 loc, conf = net(Variable(img1[None, :, :, :],
                          volatile=True))  # Forward
 loc = loc.cpu()
 conf = conf.cpu()
 #print(loc, conf)
 data_encoder = DataEncoder()  # Decode
 boxes, labels, scores = data_encoder.decode(
     loc.data.squeeze(0),
     F.softmax(conf.squeeze(0)).data)
 draw = ImageDraw.Draw(img)
 #draw.rectangle(list(box), outline='blue')
 #draw.rectangle(ground_truth_box, outline='blue')
 fnt = ImageFont.truetype('Pillow/Tests/fonts/FreeMono.ttf', 40)
 #img.show()
 for box in boxes:
     box[::2] *= img.width
     box[1::2] *= img.height
     box = list(box)
     x1_org = image[1]
     y1_org = image[2]
     x2_org = image[3]
Esempio n. 30
0
                    default='ICDAR2015',
                    type=str,
                    help='evaluation dataset')

args = parser.parse_args()

net = RetinaNet()
net = net.cuda()

# load checkpoint
checkpoint = torch.load(args.tune_from)

net.load_state_dict(checkpoint['net'])
net.eval()

encoder = DataEncoder(args.cls_thresh, args.nms_thresh)

# test image path & list
img_dir = "/root/DB/ICDAR2015_Incidental/test/" if args.dataset in [
    "ICDAR2015"
] else "/root/DB/ICDAR2013_FOCUSED/test/"
val_list = [im for im in os.listdir(img_dir) if "jpg" in im]

if not os.path.exists(args.output_zip):
    os.mkdir(args.output_zip)

# save results dir & zip
eval_dir = "/root/Detector/ocr_evaluation/code/icdar/4_incidental_scene_text/1_TextLocalization/1_IoU/" if args.dataset in ["ICDAR2015"] \
           else "/root/Detector/ocr_evaluation/code/icdar/2_focused_scene_text/1_TextLocalization/1_ICDAR2013/"
result_zip = zipfile.ZipFile(eval_dir + args.output_zip, 'w')
Esempio n. 31
0
num_classes = len(target_classes)

net = load_model(num_classes=num_classes,
                 fpn_level=5,
                 basenet=config['params']['base'],
                 is_pretrained_base=False,
                 is_norm_reg_target=config['params']['norm_reg_target'],
                 centerness_with_loc=config['params']['centerness_on_reg'],
                 is_train=False)
net = net.to(device)
net.eval()

data_encoder = DataEncoder(
    image_size=img_size,
    num_classes=num_classes + 1,
    fpn_level=5,
    is_norm_reg_target=config['params']['norm_reg_target'])

ckpt = torch.load(os.path.join(config['model']['exp_path'], 'best.pth'),
                  map_location=device)
weights = utils._load_weights(ckpt['net'])
missing_keys = net.load_state_dict(weights, strict=False)
print(missing_keys)

class_idx_map = dict()
for idx in range(0, num_classes):
    class_idx_map[idx + 1] = target_classes[idx]

img_paths = list()
for (path, _, files) in os.walk(opt.imgs):
Esempio n. 32
0
class ListDataset(data.Dataset):
    def __init__(self, root, train, transform, input_size):
        '''
        Args:
          root: (str) ditectory to images.
          train: (boolean) train or test.
          transform: ([transforms]) image transforms.
          input_size: (int) model input size.
        '''
        self.root = root
        self.train = train
        self.transform = transform
        self.input_size = input_size

        self.fnames = []
        self.boxes = []
        self.labels = []

        self.encoder = DataEncoder()

        self._labpath = sorted(glob.glob("%s/*.*" % self.root))
        self._imgpath = [
            path.replace("labels", "image").replace(".txt", ".jpg")
            for path in self._labpath
        ]

    def __getitem__(self, index):
        '''Load image.

        Args:
          idx: (int) image index.

        Returns:
          img: (tensor) image tensor.
          loc_targets: (tensor) location targets.
          cls_targets: (tensor) class label targets.
        '''
        # Load image and boxes.

        img_path = self._imgpath[index].rstrip()
        fname = img_path.split('/')[-1].split('.')[0]

        # print(img_path)
        img = cv2.imread(img_path)
        # if img.mode != 'RGB':
        #     img = img.convert('RGB')
        h, w, _ = img.shape

        label_path = self._labpath[index].rstrip()
        # print(label_path)

        targets = np.loadtxt(label_path).reshape(-1, 5)

        targets[:, 1] = (targets[:, 1]) / w
        targets[:, 2] = (targets[:, 2]) / h
        targets[:, 3] = (targets[:, 3]) / w
        targets[:, 4] = (targets[:, 4]) / h

        size = self.input_size

        if self.train:

            Augmentation = SSDAugmentation(size=size)
            img, boxe, labels = Augmentation(img, targets[:, 1:], targets[:,
                                                                          0])
            # to rgb
            img = img[:, :, (2, 1, 0)]
            img = torch.from_numpy(img).permute(2, 0, 1)
            img = img / 255
            _, h1, w1 = img.shape
            img[0, :, :] = img[0, :, :] / 0.229
            img[1, :, :] = img[1, :, :] / 0.224
            img[2, :, :] = img[2, :, :] / 0.225

            boxe[:, 0] = w1 * boxe[:, 0]
            boxe[:, 1] = h1 * boxe[:, 1]
            boxe[:, 2] = w1 * boxe[:, 2]
            boxe[:, 3] = h1 * boxe[:, 3]

        else:

            Augmentation = BaseTransform(size=size)
            img, boxe, labels = Augmentation(img, targets[:, 1:], targets[:,
                                                                          0])
            img = img[:, :, (2, 1, 0)]
            img = torch.from_numpy(img).permute(2, 0, 1)
            img = img / 255
            _, h1, w1 = img.shape
            img[0, :, :] = img[0, :, :] / 0.229
            img[1, :, :] = img[1, :, :] / 0.224
            img[2, :, :] = img[2, :, :] / 0.225

            boxe[:, 0] = w1 * boxe[:, 0]
            boxe[:, 1] = h1 * boxe[:, 1]
            boxe[:, 2] = w1 * boxe[:, 2]
            boxe[:, 3] = h1 * boxe[:, 3]

        boxes = torch.Tensor(boxe)
        labels = torch.LongTensor(labels)
        # img = self.transform(img)
        return img, boxes, labels, fname

    def collate_fn(self, batch):
        '''Pad images and encode targets.

        As for images are of different sizes, we need to pad them to the same size.

        Args:
          batch: (list) of images, cls_targets, loc_targets.

        Returns:
          padded images, stacked cls_targets, stacked loc_targets.
        '''
        imgs = [x[0] for x in batch]
        boxes = [x[1] for x in batch]
        labels = [x[2] for x in batch]
        fname = [x[3] for x in batch]

        h = w = self.input_size
        num_imgs = len(imgs)
        # print(num_imgs)
        inputs = torch.zeros(num_imgs, 3, h, w)

        loc_targets = []
        cls_targets = []
        for i in range(num_imgs):
            inputs[i] = imgs[i]
            loc_target, cls_target = self.encoder.encode(boxes[i],
                                                         labels[i],
                                                         input_size=(w, h))
            loc_targets.append(loc_target)
            cls_targets.append(cls_target)
        return inputs, torch.stack(loc_targets), torch.stack(
            cls_targets), fname

    def __len__(self):
        return len(self._labpath)