Ejemplo n.º 1
0
    def __init__(self, opt):
        super(BoxSampler, self).__init__()
        self.opt = {}
        self.low_thresh = utils.getopt(opt, 'low_thresh', 0.4)
        self.high_thresh = utils.getopt(opt, 'high_thresh', 0.75)
        self.batch_size = utils.getopt(opt, 'batch_size', 256)
        self.debug = utils.getopt(opt, 'debug', False)

        self.iou = boxIoU.BoxIoU()
        self.x_min, self.x_max = None, None
        self.y_min, self.y_max = None, None
Ejemplo n.º 2
0
    def __init__(self, opt):
        super(LocalizationLayer, self).__init__()
        self.opt = easydict.EasyDict()
        self.opt.input_dim = utils.getopt(opt, 'input_dim')
        self.opt.output_size = utils.getopt(opt, 'output_size')
        self.opt.sampler_batch_size = utils.getopt(opt, 'sampler_batch_size')
        self.opt.sampler_high_thresh = utils.getopt(opt, 'sampler_high_thresh')
        self.opt.sampler_low_thresh = utils.getopt(opt, 'sampler_low_thresh')
        self.opt.train_remove_outbounds_boxes = utils.getopt(
            opt, 'train_remove_outbounds_boxes', 1)
        self.opt.contrastive_loss = utils.getopt(opt, 'contrastive_loss')

        sampler_opt = {
            'batch_size': self.opt.sampler_batch_size,
            'low_thresh': self.opt.sampler_low_thresh,
            'high_thresh': self.opt.sampler_high_thresh,
            'contrastive_loss': self.opt.contrastive_loss
        }

        self.box_sampler_helper = BoxSamplerHelper(sampler_opt)
        self.roi_pooling = BilinearRoiPooling(self.opt.output_size[0],
                                              self.opt.output_size[1])

        self.image_height = None
        self.image_width = None
        self._called_forward_size = False
        self._called_backward_size = False
Ejemplo n.º 3
0
    def __init__(self, opt):
        super(BoxSamplerHelper, self).__init__()
        if opt.has_key('box_sampler'):
            self.box_sampler = opt['box_sampler']  #For testing
        else:
            self.box_sampler = box_sampler.BoxSampler(opt)

        self.contrastive_loss = opt['contrastive_loss']
        self.return_index = utils.getopt(opt, 'return_index', False)
Ejemplo n.º 4
0
 def setTestArgs(self, args={}):
     self.test_clip_boxes = utils.getopt(args, 'clip_boxes', True)
     self.test_nms_thresh = utils.getopt(args, 'nms_thresh', 0.7)
     self.test_max_proposals = utils.getopt(args, 'max_proposals', 300)
Ejemplo n.º 5
0
    def getBatch(opt):
        split = getopt(opt, 'split')
        split = str(split)
        batch_zie = getopt(opt, 'batch_size', 128)

        split_ix_tmp = self.split_ix[split]
        assert (split_ix_tmp, 'split ' + str(split) + ' not found')

        max_index = len(split_ix_tmp) - 1
        ques_idx = torch.LongTensor(batch_size)
        img_idx = torch.LongTensor(batch_size)

        if self.feature_type == 'VGG':
            self.img_batch = torch.Tensor(batch_size, 14, 14, 512)
        elif self.feature_type == 'Residual':
            self.img_batch = torch.Tensor(batch_size, 14, 14, 2048)

        for i in range(0, batch_size):
            ri = self.iterators[split]
            ri_next = ri + 1
            if ri_next > max_index:
                ri_next = 1
            self.iterators[split] = ri_next
            if int(split) == 0:
                ix = split_ix_tmp[torch.randperm(max_index + 1)[0]]
            else:
                ix = split_ix_tmp[ri]

            assert (ix != None, 'Bug: split ' + split +
                    ' was accessed out of bounds with ' + str(ri))
            ques_idx[i] = ix
            if int(split) == 0 or int(split) == 1:
                img_idx[i] = self.img_pos_train[ix]
                if self.h5_img_file_train != None:
                    if self.feature_type == 'VGG':
                        img = self.h5_img_file_train['/images_train'][
                            img_idx[i] - 1:img_idx[i], 0:14, 0:14, 0:512]
                        self.img_batch[i] = img
                    elif self.feature_type == 'Residual':
                        img = self.h5_img_file_train['/images_train'][
                            img_idx[i] - 1:img_idx[i], 0:14, 0:14, 0:2048]
                        self.img_batch[i] = img
                    else:
                        print("Error(train): feature type error")
            else:
                img_idx[i] = self.img_pos_test[ix]
                if self.h5_img_file_test != None:
                    if self.feature_type == 'VGG':
                        img = self.h5_img_file_test['/images_test'][
                            img_idx[i] - 1:img_idx[i], 0:14, 0:14, 0:512]
                        self.img_batch[i] = img
                    elif self.feature_type == 'Residual':
                        img = self.h5_img_file_test['/images_test'][
                            img_idx[i] - 1:img_idx[i], 0:14, 0:14, 0:2048]
                        self.img_batch[i] = img
                    else:
                        print("Error(test): feature type error")

        data = {}
        data['questions'] = []
        data['ques_id'] = []
        data['ques_len'] = []
        data['answer'] = []
        if int(split) == 0 or int(split) == 1:
            data['images'] = np.reshape(self.img_batch, (batch_size, 196, -1))
            for i in range(0, len(ques_idx)):
                data['questions'].append(self.ques_train[ques_idx[i]])
                data['ques_id'].append(self.ques_id_train[ques_idx[i]])
                data['ques_len'].append(self.ques_len_train[ques_idx[i]])
                data['answer'].append(self.ans_train[ques_idx[i]])
        else:
            data['images'] = np.reshape(self.img_batch, (batch_size, 196, -1))
            for i in range(0, len(ques_idx)):
                data['questions'].append(self.ques_test[ques_idx[i]])
                data['ques_id'].append(self.ques_id_test[ques_idx[i]])
                data['ques_len'].append(self.ques_len_test[ques_idx[i]])
                data['answer'].append(self.ans_test[ques_idx[i]])
Ejemplo n.º 6
0
    def __init__(self, opt, split, root='data/'):
        if split == 0:
            self.dataset = utils.getopt(opt, 'dataset')
        else:
            self.dataset = utils.getopt(opt, 'val_dataset')

        root += '%s/' % self.dataset
        self.debug_max_train_images = utils.getopt(opt,
                                                   'debug_max_train_images',
                                                   -1)
        self.embedding = utils.getopt(opt, 'embedding')
        self.fold = utils.getopt(opt, 'fold')
        self.image_size = opt.image_size
        self.split_num = split
        self.dtp_train = opt.dtp_train
        self.augment = opt.augment
        self.opt = opt
        num2split = {0: 'train', 1: 'val', 2: 'test'}
        self.split = num2split[split]
        self.train = self.split == 'train'
        self.alphabet = dl.default_alphabet
        self.ghosh = opt.ghosh

        if self.dataset == 'konzilsprotokolle':
            self.alphabet = '&' + string.digits + string.ascii_lowercase

        suffix = ''
        if self.augment:
            suffix = '_augmented'

        if self.opt.reproduce_paper:
            self.h5_file = 'data/reproduce/%s_fold%d.h5' % (self.dataset,
                                                            self.fold)
            self.json_file = 'data/reproduce/%s_fold%d.json' % (self.dataset,
                                                                self.fold)
            self.data = self._repr_load_data()
        else:
            self.data = getattr(dl, 'load_%s' % self.dataset)(
                fold=self.fold, alphabet=self.alphabet)
            self.h5_file = root + self.dataset + '%s_fold%d.h5' % (suffix,
                                                                   self.fold)
            self.json_file = root + self.dataset + '%s_fold%d.json' % (
                suffix, self.fold)

        self.data_split = [d for d in self.data if d['split'] == self.split]

        if self.dataset != 'iiit_hws':
            self.split_vocab = utils.build_vocab(self.data_split)
        else:
            self.split_vocab = np.unique([d['label'] for d in self.data])

        if self.ghosh:
            self.h5_file = root + 'washington_fold1_ghosh.h5'
            self.json_file = root + 'washington_fold1_ghosh.json'

        show = not opt.quiet

        #load the json file which contains additional information about the dataset
        if show:
            print('DataLoader loading json file: ', self.json_file)
        with open(self.json_file, 'r') as f:
            self.info = json.load(f)

        self.vocab_size = len(self.info['itow'])

        #Convert keys in idx_to_token from string to integer
        itow = {}
        for k, v in self.info['itow'].iteritems():
            itow[int(k) - 1] = v
        self.info['itow'] = itow

        self.itow = itow
        self.wtoi = {w: i for i, w in itow.iteritems()}

        self.resolution = 3
        #boils down to whether or not the all embeddings should match their label
        self.bins = len(self.alphabet) * 2
        self.ngrams = 2
        self.unigram_levels = range(1, 6)
        self.emb_func = getattr(emb, opt.embedding)
        self.args = (self.resolution, self.alphabet)
        if opt.embedding == 'ngram_dct':
            self.args += (self.ngrams, self.bins)
        elif opt.embedding == 'phoc':
            self.args = (self.alphabet, self.unigram_levels)

        if opt.embedding_loss == 'phocnet':
            self.wtoe = {
                w: self.emb_func(w, *self.args)
                for i, w in self.itow.iteritems()
            }  #word embedding table
        else:
            self.wtoe = {
                w: self.normalize(self.emb_func(w, *self.args))
                for i, w in self.itow.iteritems()
            }  #word embedding table

        self.iam = self.dataset == 'iam'
        if self.iam:
            with open('data/iam/stopwords.txt') as f:
                tmp = f.readline()[:-1]
            self.stopwords = tmp.split(',')

        if not self.train:
            self.init_queries()

        # open the hdf5 file
        if show:
            print('DataLoader loading h5 file: ', self.h5_file)
        self.h5_file = h5py.File(self.h5_file, 'r')
        self.boxes = self.h5_file.get('boxes').value
        self.image_heights = self.h5_file.get('image_heights').value
        self.image_widths = self.h5_file.get('image_widths').value
        self.img_to_first_box = self.h5_file.get('img_to_first_box').value
        self.img_to_last_box = self.h5_file.get('img_to_last_box').value
        self.labels = self.h5_file.get('labels').value - 1
        self.word_embedding = self.h5_file.get(self.embedding +
                                               '_word_embeddings').value
        self.img_to_first_rp = self.h5_file.get('img_to_first_rp').value
        self.img_to_last_rp = self.h5_file.get('img_to_last_rp').value
        self.original_heights = self.h5_file.get('original_heights').value
        self.original_widths = self.h5_file.get('original_widths').value
        self.split_inds = self.h5_file.get('split').value

        #dimensionality of the embedding
        self.embedding_dim = self.word_embedding.shape[1]

        #extract image size from dataset
        images_size = self.h5_file.get('images').shape
        assert len(images_size) == 4, '/images should be a 4D tensor'
        self.num_images = images_size[0]
        self.num_channels = images_size[1]
        self.max_image_height = images_size[2]
        self.max_image_width = images_size[3]

        #extract some attributes from the data
        self.num_regions = self.boxes.shape[0]
        self.image_mean = self.h5_file.get('/image_mean').value[0]

        #set up index ranges for the different splits
        self.train_ix = []
        self.val_ix = []
        self.test_ix = []
        for i in range(self.num_images):
            if self.split_inds[i] == 0: self.train_ix.append(i)
            if self.split_inds[i] == 1: self.val_ix.append(i)
            if self.split_inds[i] == 2: self.test_ix.append(i)

        if show:
            print('assigned %d/%d/%d images to train/val/test.' %
                  (len(self.train_ix), len(self.val_ix), len(self.test_ix)))
            print('initialized DataLoader:')
            print('#images: %d, #regions: %d' %
                  (self.num_images, self.num_regions))
Ejemplo n.º 7
0
    def __init__(self, opt):
        super(LocalizationLayer, self).__init__()
        self.opt = easydict.EasyDict()
        self.opt.input_dim = utils.getopt(opt, 'input_dim')
        self.opt.output_size = utils.getopt(opt, 'output_size')

        # list x0, y0, sx, sy
        self.opt.field_centers = utils.getopt(opt, 'field_centers')

        self.opt.mid_box_reg_weight = utils.getopt(opt, 'mid_box_reg_weight')
        self.opt.mid_objectness_weight = utils.getopt(opt,
                                                      'mid_objectness_weight')

        self.opt.rpn_filter_size = utils.getopt(opt, 'rpn_filter_size', 3)
        self.opt.rpn_num_filters = utils.getopt(opt, 'rpn_num_filters', 256)
        self.opt.zero_box_conv = utils.getopt(opt, 'zero_box_conv', True)
        self.opt.std = utils.getopt(opt, 'std', 0.01)
        self.opt.anchor_scale = utils.getopt(opt, 'anchor_scale', 1.0)
        self.opt.anchors = utils.getopt(opt, 'anchors', 'original')

        self.opt.sampler_batch_size = utils.getopt(opt, 'sampler_batch_size',
                                                   256)
        self.opt.sampler_high_thresh = utils.getopt(opt, 'sampler_high_thresh',
                                                    0.75)
        self.opt.sampler_low_thresh = utils.getopt(opt, 'sampler_low_thresh',
                                                   0.4)
        self.opt.train_remove_outbounds_boxes = utils.getopt(
            opt, 'train_remove_outbounds_boxes', 1)
        self.opt.box_reg_decay = utils.getopt(opt, 'box_reg_decay', 5e-5)
        self.opt.tunable_anchors = utils.getopt(opt, 'tunable_anchors', False)
        self.opt.backprop_rpn_anchors = utils.getopt(opt,
                                                     'backprop_rpn_anchors',
                                                     False)
        self.box_loss = utils.getopt(opt, 'wordness_loss')
        self.opt.contrastive_loss = utils.getopt(opt, 'contrastive_loss')

        self.stats = easydict.EasyDict()
        self.stats.losses = easydict.EasyDict()
        self.stats.vars = easydict.EasyDict()
        self.dtp_train = utils.getopt(opt, 'dtp_train', False)

        if self.dtp_train:
            self.opt.sampler_batch_size /= 2

        sampler_opt = {
            'batch_size': self.opt.sampler_batch_size,
            'low_thresh': self.opt.sampler_low_thresh,
            'high_thresh': self.opt.sampler_high_thresh,
            'contrastive_loss': self.opt.contrastive_loss
        }

        debug_sampler = utils.getopt(opt, 'box_sampler', False)
        if debug_sampler != False:
            sampler_opt['box_sampler'] = debug_sampler

        self.rpn = RPN(self.opt)
        self.box_sampler_helper = BoxSamplerHelper(sampler_opt)
        self.roi_pooling = BilinearRoiPooling(self.opt.output_size[0],
                                              self.opt.output_size[1])
        self.invert_box_transform = InvertBoxTransform()

        # Construct criterions
        if self.opt.backprop_rpn_anchors:
            self.box_reg_loss = BoxRegressionCriterion(
                self.opt.mid_box_reg_weight)
        else:
            self.box_reg_loss = nn.SmoothL1Loss()  # for RPN box regression

        self.box_scoring_loss = nn.CrossEntropyLoss()

        self.image_height = None
        self.image_width = None
        self._called_forward_size = False
        self._called_backward_size = False
Ejemplo n.º 8
0
 def setBounds(self, bounds):
     self.x_min = utils.getopt(bounds, 'x_min', None)
     self.x_max = utils.getopt(bounds, 'x_max', None)
     self.y_min = utils.getopt(bounds, 'y_min', None)
     self.y_max = utils.getopt(bounds, 'y_max', None)