Example #1
0
 def __init__(self, rootfolder, is_training):
     super(MNISTDataset, self).__init__()
     if mpi.is_root():
         # root loads the data
         if is_training:
             self._data = self._read_byte_data(
                     os.path.join(rootfolder,'train-images-idx3-ubyte'), 
                     16, (MNISTDataset.__num_train,) + \
                             MNISTDataset.__image_dim)
             self._label = self._read_byte_data(
                     os.path.join(rootfolder,'train-labels-idx1-ubyte'),
                     8, [MNISTDataset.__num_train]).astype(np.int)
         else:
             self._data = self._read_byte_data(
                     os.path.join(rootfolder,'t10k-images-idx3-ubyte'),
                     16, (MNISTDataset.__num_test,) + \
                             MNISTDataset.__image_dim)
             self._label = self._read_byte_data(
                     os.path.join(rootfolder,'t10k-labels-idx1-ubyte'),
                     8, [MNISTDataset.__num_test]).astype(np.int)
     else:
         self._data = None
         self._label = None
     self._data = mpi.distribute(self._data)
     self._label = mpi.distribute(self._label)
     self._dim = MNISTDataset.__image_dim
     self._channels = 1
Example #2
0
 def __init__(self, rootfolder, is_training):
     super(MNISTDataset, self).__init__()
     if mpi.is_root():
         # root loads the data
         if is_training:
             self._data = self._read_byte_data(
                     os.path.join(rootfolder,'train-images-idx3-ubyte'),
                     16, (MNISTDataset.__num_train,) + \
                             MNISTDataset.__image_dim)
             self._label = self._read_byte_data(
                 os.path.join(rootfolder, 'train-labels-idx1-ubyte'), 8,
                 [MNISTDataset.__num_train]).astype(np.int)
         else:
             self._data = self._read_byte_data(
                     os.path.join(rootfolder,'t10k-images-idx3-ubyte'),
                     16, (MNISTDataset.__num_test,) + \
                             MNISTDataset.__image_dim)
             self._label = self._read_byte_data(
                 os.path.join(rootfolder, 't10k-labels-idx1-ubyte'), 8,
                 [MNISTDataset.__num_test]).astype(np.int)
     else:
         self._data = None
         self._label = None
     self._data = mpi.distribute(self._data)
     self._label = mpi.distribute(self._label)
     self._dim = MNISTDataset.__image_dim
     self._channels = 1
Example #3
0
 def load_cifar10(self, rootfolder, is_training):
     """loads the cifar-10 dataset
     """
     if mpi.is_root():
         if is_training:
             self._data = np.empty((CifarDataset.__num_train,) + \
                                   CifarDataset.__image_dim)
             self._label = np.empty(CifarDataset.__num_train)
             # training batches
             for i in range(CifarDataset.__num_batches):
                 with open(
                         os.path.join(rootfolder,
                                      'data_batch_{0}'.format(i + 1)),
                         'r') as fid:
                     batch = pickle.load(fid)
                 start_idx = CifarDataset.__batchsize * i
                 end_idx = CifarDataset.__batchsize * (i + 1)
                 self._data[start_idx:end_idx] = \
                         CifarDataset.get_images_from_matrix(batch['data'])
                 self._label[start_idx:end_idx] = np.array(batch['labels'])
         else:
             with open(os.path.join(rootfolder, 'test_batch'), 'r') as fid:
                 batch = pickle.load(fid)
             self._data = CifarDataset.get_images_from_matrix(batch['data'])
             self._label = np.array(batch['labels'])
     else:
         self._data = None
         self._label = None
     self._data = mpi.distribute(self._data)
     self._label = mpi.distribute(self._label)
Example #4
0
 def load_cifar10(self, rootfolder, is_training):
     """loads the cifar-10 dataset
     """
     if mpi.is_root():
         if is_training:
             self._data = np.empty((CifarDataset.__num_train,) + \
                                   CifarDataset.__image_dim)
             self._label = np.empty(CifarDataset.__num_train)
             # training batches
             for i in range(CifarDataset.__num_batches):
                 with open(os.path.join(rootfolder,
                         'data_batch_{0}'.format(i+1)),'r') as fid:
                     batch = pickle.load(fid)
                 start_idx = CifarDataset.__batchsize * i
                 end_idx = CifarDataset.__batchsize * (i+1)
                 self._data[start_idx:end_idx] = \
                         CifarDataset.get_images_from_matrix(batch['data'])
                 self._label[start_idx:end_idx] = np.array(batch['labels'])
         else:
             with open(os.path.join(rootfolder, 'test_batch'), 'r') as fid:
                 batch = pickle.load(fid)
             self._data = CifarDataset.get_images_from_matrix(batch['data'])
             self._label = np.array(batch['labels'])
     else:
         self._data = None
         self._label = None
     self._data = mpi.distribute(self._data)
     self._label = mpi.distribute(self._label)
Example #5
0
 def get_data(filename):
     """This is a wrapper function that returns the images in the right
     axes order
     """
     if mpi.is_root():
         matdata = io.loadmat(filename)
         X = matdata['X'].reshape(\
                 (matdata['X'].shape[0],) + STL10Dataset._image_dim[::-1])
         # make it contiguous so we can do mpi distribute
         X = np.ascontiguousarray(np.transpose(X, axes=[0,3,2,1]),
                                  dtype = X.dtype)
         Y = matdata['y'].astype(int).flatten()
     else:
         X = None
         Y = None
     return mpi.distribute(X), mpi.distribute(Y)
Example #6
0
 def get_data(filename):
     """This is a wrapper function that returns the images in the right
     axes order
     """
     if mpi.is_root():
         matdata = io.loadmat(filename)
         X = matdata['X'].reshape(\
                 (matdata['X'].shape[0],) + STL10Dataset._image_dim[::-1])
         # make it contiguous so we can do mpi distribute
         X = np.ascontiguousarray(np.transpose(X, axes=[0, 3, 2, 1]),
                                  dtype=X.dtype)
         Y = matdata['y'].astype(int).flatten()
     else:
         X = None
         Y = None
     return mpi.distribute(X), mpi.distribute(Y)
Example #7
0
 def testDistribute(self):
     data_list = [np.ones(100), np.ones((100, 2)), np.ones((100, 2, 3))]
     for data in data_list:
         distributed = mpi.distribute(data)
         self.assertTrue(isinstance(distributed, np.ndarray))
         np.testing.assert_array_almost_equal(distributed, np.ones(distributed.shape), 8)
         total_number = mpi.COMM.allreduce(distributed.shape[0])
         self.assertEqual(total_number, data.shape[0])
Example #8
0
 def testDistribute(self):
     data_list = [np.ones(100), np.ones((100, 2)), np.ones((100, 2, 3))]
     for data in data_list:
         distributed = mpi.distribute(data)
         self.assertTrue(isinstance(distributed, np.ndarray))
         np.testing.assert_array_almost_equal(distributed,
                                              np.ones(distributed.shape), 8)
         total_number = mpi.COMM.allreduce(distributed.shape[0])
         self.assertEqual(total_number, data.shape[0])
Example #9
0
 def load_cifar100(self, rootfolder, is_training):
     """loads the cifar-100 dataset
     """
     if mpi.is_root():
         if is_training:
             filename = 'train'
         else:
             filename = 'test'
         with open(rootfolder + os.sep + filename) as fid:
             batch = pickle.load(fid)
         self._data = CifarDataset.get_images_from_matrix(batch['data'])
         self._coarselabel = np.array(batch['coarse_labels'])
         self._label = np.array(batch['fine_labels'])
     else:
         self._data = None
         self._coarselabel = None
         self._label = None
     self._data = mpi.distribute(self._data)
     self._coarselabel = mpi.distribute(self._coarselabel)
     self._label = mpi.distribute(self._label)
Example #10
0
 def load_cifar100(self, rootfolder, is_training):
     """loads the cifar-100 dataset
     """
     if mpi.is_root():
         if is_training:
             filename = 'train'
         else:
             filename = 'test'
         with open(rootfolder + os.sep + filename) as fid:
             batch = pickle.load(fid)
         self._data = CifarDataset.get_images_from_matrix(batch['data'])
         self._coarselabel = np.array(batch['coarse_labels'])
         self._label = np.array(batch['fine_labels'])
     else:
         self._data = None
         self._coarselabel = None
         self._label = None
     self._data = mpi.distribute(self._data)
     self._coarselabel = mpi.distribute(self._coarselabel)
     self._label = mpi.distribute(self._label)
Example #11
0
 def __init__(self,
              root,
              is_training,
              crop=False,
              prefetch=False,
              target_size=None):
     """Load the dataset.
     Input:
         root: the root folder of the CUB_200_2011 dataset.
         is_training: if true, load the training data. Otherwise, load the
             testing data.
         crop: if False, does not crop the bounding box. If a real value,
             crop is the ratio of the bounding box that gets cropped.
             e.g., if crop = 1.5, the resulting image will be 1.5 * the
             bounding box area.
         prefetch: if True, the images are prefetched to avoid disk read. If
             you have a large number of images, prefetch would require a lot
             of memory.
         target_size: if provided, all images are resized to the size 
             specified. Should be a list of two integers, like [640,480].
         
     Note that we will use the python indexing (labels start from 0).
     """
     if is_training:
         mat_filename = 'train_list.mat'
     else:
         mat_filename = 'test_list.mat'
     if mpi.is_root():
         matfile = io.loadmat(os.path.join(root, mat_filename))
         labels = np.array(matfile['labels'].flatten() - 1, dtype=np.int)
         files = [f[0][0] for f in matfile['file_list']]
     else:
         labels = None
         files = None
     self._data = mpi.distribute_list(files)
     self._label = mpi.distribute(labels)
     self._root = root
     self._prefetch = prefetch
     self._crop = crop
     self._target_size = target_size
     if target_size is not None:
         self._dim = tuple(target_size) + (3, )
     else:
         self._dim = False
     self._channels = 3
     if self._prefetch:
         self._data = [self._read(i) for i in range(len(self._data))]
 def __init__(self, root, is_training, crop = False,
              prefetch = False, target_size = None):
     """Load the dataset.
     Input:
         root: the root folder of the CUB_200_2011 dataset.
         is_training: if true, load the training data. Otherwise, load the
             testing data.
         crop: if False, does not crop the bounding box. If a real value,
             crop is the ratio of the bounding box that gets cropped.
             e.g., if crop = 1.5, the resulting image will be 1.5 * the
             bounding box area.
         prefetch: if True, the images are prefetched to avoid disk read. If
             you have a large number of images, prefetch would require a lot
             of memory.
         target_size: if provided, all images are resized to the size 
             specified. Should be a list of two integers, like [640,480].
         
     Note that we will use the python indexing (labels start from 0).
     """
     if is_training:
         mat_filename = 'train_list.mat'
     else:
         mat_filename = 'test_list.mat'
     if mpi.is_root():
         matfile = io.loadmat(os.path.join(root, mat_filename))
         labels = np.array(matfile['labels'].flatten()-1, dtype=np.int)
         files = [f[0][0] for f in matfile['file_list']]
     else:
         labels = None
         files = None
     self._data = mpi.distribute_list(files)
     self._label = mpi.distribute(labels)
     self._root = root
     self._prefetch = prefetch
     self._crop = crop
     self._target_size = target_size
     if target_size is not None:
         self._dim = tuple(target_size) + (3,)
     else:
         self._dim = False
     self._channels = 3
     if self._prefetch:
         self._data = [self._read(i) for i in range(len(self._data))]
Example #13
0
 def __init__(self, root, is_training, crop = False, subset = None, 
              prefetch = False, target_size = None):
     """Load the dataset.
     Input:
         root: the root folder of the CUB_200_2011 dataset.
         is_training: if true, load the training data. Otherwise, load the
             testing data.
         crop: if False, does not crop the bounding box. If a real value,
             crop is the ratio of the bounding box that gets cropped.
             e.g., if crop = 1.5, the resulting image will be 1.5 * the
             bounding box area.
         subset: if nonempty, we will only use the subset specified in the
             list. The content of the list should be class subfolder names, 
             like ['001.Black_footed_Albatross', ...]
         prefetch: if True, the images are prefetched to avoid disk read. If
             you have a large number of images, prefetch would require a lot
             of memory.
         target_size: if provided, all images are resized to the size 
             specified. Should be a list of two integers, like [640,480].
         
     Note that we will use the python indexing (labels start from 0).
     """
     super(CUBDataset, self).__init__()
     images = [line.split()[1] for line in
                 open(os.path.join(root, 'images.txt'), 'r')]
     boxes = [line.split()[1:] for line in
                 open(os.path.join(root, 'bounding_boxes.txt'),'r')]
     labels = [int(line.split()[1]) - 1 for line in
                 open(os.path.join(root, 'image_class_labels.txt'), 'r')]
     classnames = [line.split()[1] for line in
                 open(os.path.join(root, 'classes.txt'),'r')]
     class2id = dict(zip(classnames, range(len(classnames))))
     split = [int(line.split()[1]) for line in
                 open(os.path.join(root, 'train_test_split.txt'),'r')]
     # load parts
     rawparts = np.loadtxt(os.path.join(root, 'parts','part_locs.txt'))
     rawparts = rawparts[:,2:-1].reshape((len(images), 15, 2))
     if subset is not None:
         # create the subset mapping
         old2new = {}
         selected_ids = set()
         for new_id, name in enumerate(subset):
             old_id = class2id[name]
             selected_ids.add(old_id)
             old2new[old_id] = new_id
         # select the subset
         is_selected = [(label in selected_ids) for label in labels]
         images = [image for image, val in zip(images, is_selected) if val]
         boxes = [box for box, val in zip(boxes, is_selected) if val]
         labels = [old2new[label] for label, val in zip(labels, is_selected) \
                   if val]
         classnames = subset
         class2id = dict(zip(classnames, range(len(classnames))))
         split = [trte for trte, val in zip(split, is_selected) if val]
         rawparts = rawparts[np.asarray(is_selected, dtype=bool)]
     # now, do training testing split
     if is_training:
         target = 1
     else:
         target = 0
     images = [os.path.join(root, 'images', image)
               for image, val in zip(images, split) if val == target]
     boxes = [box for box, val in zip(boxes, split) if val == target]
     labels = [label for label, val in zip(labels, split) if val == target]
     rawparts = rawparts[np.asarray(split)==target] - 1
     # store the necessary values
     self._data = mpi.distribute_list(images)
     # for the boxes, we store them as a numpy array
     self._boxes = np.array(mpi.distribute_list(boxes)).astype(float)
     self._boxes -= 1
     self._parts = mpi.distribute(rawparts)
     self._prefetch = prefetch
     self._target_size = target_size
     self._crop = crop
     if target_size is not None:
         self._dim = tuple(target_size) + (3,)
     else:
         self._dim = False
     self._channels = 3
     # we store the raw dimensions for part location computation
     self._raw_dimension = np.zeros((len(self._data),2), dtype=int)
     if prefetch:
         self._data = [self._read(i) for i in range(len(self._data))]
     self._label = mpi.distribute_list(labels)
     self._classnames = mpi.COMM.bcast(classnames)
Example #14
0
    def __init__(self, list_file, feat_range, posting_file, perc_pos, keep_full_utt=False, posting_sampler=None, min_dur=0.2, min_count=0.0, max_count=10000000.0, reader_type='utterance', 
                 pickle_fname=None, list_file_sph=None, kw_feat=None, merge_score_files=None):
        '''TODO: Read pieces of utterance from the CSV file instead to save memory. It would be nice to index thse by utt_id (by now I do a map).'''
        super(BabelDataset, self).__init__()
        if list_file.find('eval') >= 0:
            self.is_eval = True
            self.T = FLAGS.T_eval
        else:
            self.is_eval = False
            self.T = FLAGS.T_train
        self.beta = FLAGS.beta
        self.reader_type = reader_type
        if reader_type=='lattice':
            self.is_lattice = True
            utt_reader = LatticeReader.LatticeReader(list_file)
            utt_reader.ReadAllLatices()
        elif reader_type=='utterance':
            self.is_lattice = False
            utt_reader = UtteranceReader.UtteranceReader(list_file,pickle_fname=pickle_fname)
            utt_reader.ReadAllUtterances(feat_range)
        elif reader_type=='snr':
            self.is_lattice = False
            utt_reader = SNRReader.SNRReader(list_file,pickle_fname=pickle_fname)
            utt_reader.ReadAllSNR()
        elif reader_type=='srate':
            self.is_lattice = False
            utt_reader = SrateReader.SrateReader(list_file,pickle_fname=pickle_fname)
            utt_reader.ReadAllSrate()
        elif reader_type=='score':
            self.is_lattice = False
            utt_reader = ScoreReader.ScoreReader(list_file,list_file_sph=list_file_sph,pickle_fname=pickle_fname, merge_score_files=merge_score_files)
        else:
            print 'Reader not implemented!'
            exit(0)
        if posting_sampler == None:
            testParser = PostingParser.PostingParser(posting_file)
            self.posting_sampler = Sampler.Sampler(testParser)
            self.posting_sampler.GetPositive()
            self.posting_sampler.GetNegative()
            self.posting_sampler.SampleData(perc_pos)
        else:
            self.posting_sampler = posting_sampler
        self.min_dur = min_dur
        self._data_all = None
        self._dim = False
        self._channels = 1
        self.keep_full_utt = keep_full_utt
        if mpi.is_root():
            self._data = []
            self._label = []
            self._features = []
            self._utt_id = []
            self._times = []
            self._keyword = []
            skipped = 0
            for i in range(len(self.posting_sampler.negative_data)):
                if utt_reader.map_utt_idx.has_key(self.posting_sampler.negative_data[i]['file']):
                    if self.posting_sampler.negative_data[i]['sys_bt'] == '':
                        print 'We found a negative example that was not produced by the system!'
                        exit(0)
                    sys_bt = float(self.posting_sampler.negative_data[i]['sys_bt'])
                    sys_et = float(self.posting_sampler.negative_data[i]['sys_et'])
                    sys_sc = float(self.posting_sampler.negative_data[i]['sys_score'])
                    if(sys_et-sys_bt < self.min_dur):
                        skipped += 1
                        continue
                    self._data.append(utt_reader.GetKeywordData(self.posting_sampler.negative_data[i]['file'],
                                                              sys_bt, sys_et,kw=self.posting_sampler.negative_data[i]['termid']))
                    self._label.append(0)
                    self._features.append(sys_sc)
                    self._utt_id.append(self.posting_sampler.negative_data[i]['file'])
                    self._times.append((sys_bt,sys_et))
                    self._keyword.append(self.posting_sampler.negative_data[i]['termid'])
                else:
                    pass
            for i in range(len(self.posting_sampler.positive_data)):
                if utt_reader.map_utt_idx.has_key(self.posting_sampler.positive_data[i]['file']):
                    if self.posting_sampler.positive_data[i]['sys_bt'] == '':
                        sys_bt = 0
                        sys_et = None
                        sys_sc = -1.0
                        #print self.posting_sampler.positive_data[i]['alignment']
                        continue #Should just ignore these?
                    else:
                        sys_bt = float(self.posting_sampler.positive_data[i]['sys_bt'])
                        sys_et = float(self.posting_sampler.positive_data[i]['sys_et'])
                        sys_sc = float(self.posting_sampler.positive_data[i]['sys_score'])
                        if(sys_et-sys_bt < self.min_dur):
                            skipped += 1
                            continue
                    self._data.append(utt_reader.GetKeywordData(self.posting_sampler.positive_data[i]['file'],
                                                              sys_bt, sys_et,kw=self.posting_sampler.positive_data[i]['termid']))
                    self._label.append(1)
                    self._features.append(sys_sc)
                    self._utt_id.append(self.posting_sampler.positive_data[i]['file'])
                    self._times.append((sys_bt,sys_et))
                    self._keyword.append(self.posting_sampler.positive_data[i]['termid'])
                else:
                    pass
            
            print 'I skipped ',skipped,' entries out of ',(len(self.posting_sampler.negative_data)+len(self.posting_sampler.positive_data))
            
            self._label = np.array(self._label)
        else:
            self._data = None
            self._label = None
            self._features = None
            self._utt_id = None
            self._times = None
            self._keyword = None
        #populate true kw freq
        self._map_kw_counts = {}
        for i in range(len(self.posting_sampler.positive_data)):
            if utt_reader.map_utt_idx.has_key(self.posting_sampler.positive_data[i]['file']):
                kw = self.posting_sampler.positive_data[i]['termid']
                if self._map_kw_counts.has_key(kw):
                    self._map_kw_counts[kw] += 1
                else:
                    self._map_kw_counts[kw] = 1
        #filter dataset depending on count
        if mpi.is_root():
            ind_keep = []
            kw_zero = 0
            for i in range(len(self._keyword)):
                kw = self._keyword[i]
                kw_count = 0
                if self._map_kw_counts.has_key(kw):
                    kw_count = self._map_kw_counts[kw]
                else:
                    kw_zero += 1
                if kw_count <= max_count and kw_count >= min_count:
                    ind_keep.append(i)
            
            self._data = [self._data[i] for i in ind_keep]
            self._label = [self._label[i] for i in ind_keep]
            self._features = [self._features[i] for i in ind_keep]
            self._utt_id = [self._utt_id[i] for i in ind_keep]
            self._times = [self._times[i] for i in ind_keep]
            self._keyword = [self._keyword[i] for i in ind_keep]

                    
        self._data = mpi.distribute_list(self._data)
        self._label = mpi.distribute(self._label)
        self._features = mpi.distribute_list(self._features)
        self._utt_id = mpi.distribute_list(self._utt_id)
        self._times = mpi.distribute_list(self._times)
        self._keyword = mpi.distribute_list(self._keyword)
        if self.keep_full_utt == True:
            self.utt_reader = utt_reader
        if kw_feat != None:
            try:
                kw_feat.has_key('length')
                self.CopyKeywordMaps(kw_feat)
            except:
                self.LoadMappingHescii(FLAGS.hescii_file)
                self.ComputeKeywordMaps()
Example #15
0
 def __init__(self, root, is_training, crop = False, subset = None, 
              prefetch = False, target_size = None, version = '2011'):
     """Load the dataset.
     Input:
         root: the root folder of the CUB_200_2011 dataset.
         is_training: if true, load the training data. Otherwise, load the
             testing data.
         crop: if False, does not crop the bounding box. If a real value,
             crop is the ratio of the bounding box that gets cropped.
             e.g., if crop = 1.5, the resulting image will be 1.5 * the
             bounding box area.
         subset: if nonempty, we will only use the subset specified in the
             list. The content of the list should be class subfolder names, 
             like ['001.Black_footed_Albatross', ...]
         prefetch: if True, the images are prefetched to avoid disk read. If
             you have a large number of images, prefetch would require a lot
             of memory.
         target_size: if provided, all images are resized to the size 
             specified. Should be a list of two integers, like [640,480].
         version: either '2011' or '2010'. Note that the 2011 version
             contains the parts, while the 2010 version does not.
         
     Note that we will use the python indexing (labels start from 0).
     """
     super(CUBDataset, self).__init__()
     if version == '2011':
         images = [line.split()[1] for line in
                     open(os.path.join(root, 'images.txt'), 'r')]
         boxes = [line.split()[1:] for line in
                     open(os.path.join(root, 'bounding_boxes.txt'),'r')]
         labels = [int(line.split()[1]) - 1 for line in
                     open(os.path.join(root, 'image_class_labels.txt'), 'r')]
         classnames = [line.split()[1] for line in
                     open(os.path.join(root, 'classes.txt'),'r')]
         class2id = dict(zip(classnames, range(len(classnames))))
         split = [int(line.split()[1]) for line in
                     open(os.path.join(root, 'train_test_split.txt'),'r')]
         # load parts
         rawparts = np.loadtxt(os.path.join(root, 'parts','part_locs.txt'))
         rawparts = rawparts[:,2:-1].reshape((len(images), 15, 2))
     elif version == '2010':
         # we are using version 2010. We load the data to mimic the 2011
         # version data format
         images = [line.strip() for line in
                     open(os.path.join(root, 'lists', 'files.txt'), 'r')]
         boxes = [] # TODO: get boxes
         # unfortunately, we need to load the boxes from matlab annotations
         for filename in images:
             matfile = io.loadmat(os.path.join(root, 'annotations-mat',
                                               filename[:-3]+'mat'))
             left, top, right, bottom = \
                     [matfile['bbox'][0][0][i][0][0] for i in range(4)]
             boxes.append([left, top, right-left, bottom-top])
         train_images = [line.strip() for line in
                     open(os.path.join(root, 'lists', 'train.txt'), 'r')]
         labels = [int(line[:line.find('.')]) - 1 for line in images]
         classnames = [line.strip() for line in
                     open(os.path.join(root, 'lists', 'classes.txt'),'r')]
         class2id = dict(zip(classnames, range(len(classnames))))
         split = [int(line in train_images) for line in images]
         # we do not have rawparts.
         rawparts = None
     else:
         raise ValueError, "Unrecognized version: %s" % version
     if subset is not None:
         # create the subset mapping
         old2new = {}
         selected_ids = set()
         for new_id, name in enumerate(subset):
             old_id = class2id[name]
             selected_ids.add(old_id)
             old2new[old_id] = new_id
         # select the subset
         is_selected = [(label in selected_ids) for label in labels]
         images = [image for image, val in zip(images, is_selected) if val]
         boxes = [box for box, val in zip(boxes, is_selected) if val]
         labels = [old2new[label] for label, val in zip(labels, is_selected) \
                   if val]
         classnames = subset
         class2id = dict(zip(classnames, range(len(classnames))))
         split = [trte for trte, val in zip(split, is_selected) if val]
         if rawparts is not None:
             rawparts = rawparts[np.asarray(is_selected, dtype=bool)]
     # now, do training testing split
     if is_training:
         target = 1
     else:
         target = 0
     images = [image for image, val in zip(images, split) if val == target]
     boxes = [box for box, val in zip(boxes, split) if val == target]
     labels = [label for label, val in zip(labels, split) if val == target]
     if rawparts is not None:
         rawparts = rawparts[np.asarray(split)==target] - 1
     # store the necessary values
     self._version = version
     self._root = root
     self._data = mpi.distribute_list(images)
     self._raw_name = self._data
     # for the boxes, we store them as a numpy array
     self._boxes = np.array(mpi.distribute_list(boxes)).astype(float)
     self._boxes -= 1
     if rawparts is not None:
         self._parts = mpi.distribute(rawparts)
     else:
         self._parts = None
     self._prefetch = prefetch
     self._target_size = target_size
     self._crop = crop
     if target_size is not None:
         self._dim = tuple(target_size) + (3,)
     else:
         self._dim = False
     self._channels = 3
     # we store the raw dimensions for part location computation
     self._raw_dimension = np.zeros((len(self._data),2), dtype=int)
     if prefetch:
         self._data = [self._read(i) for i in range(len(self._data))]
     self._label = mpi.distribute_list(labels)
     self._classnames = mpi.COMM.bcast(classnames)
Example #16
0
 def __init__(self,
              root,
              is_training,
              crop=False,
              subset=None,
              prefetch=False,
              target_size=None,
              version='2011'):
     """Load the dataset.
     Input:
         root: the root folder of the CUB_200_2011 dataset.
         is_training: if true, load the training data. Otherwise, load the
             testing data.
         crop: if False, does not crop the bounding box. If a real value,
             crop is the ratio of the bounding box that gets cropped.
             e.g., if crop = 1.5, the resulting image will be 1.5 * the
             bounding box area.
         subset: if nonempty, we will only use the subset specified in the
             list. The content of the list should be class subfolder names, 
             like ['001.Black_footed_Albatross', ...]
         prefetch: if True, the images are prefetched to avoid disk read. If
             you have a large number of images, prefetch would require a lot
             of memory.
         target_size: if provided, all images are resized to the size 
             specified. Should be a list of two integers, like [640,480].
         version: either '2011' or '2010'. Note that the 2011 version
             contains the parts, while the 2010 version does not.
         
     Note that we will use the python indexing (labels start from 0).
     """
     super(CUBDataset, self).__init__()
     if version == '2011':
         images = [
             line.split()[1]
             for line in open(os.path.join(root, 'images.txt'), 'r')
         ]
         boxes = [
             line.split()[1:]
             for line in open(os.path.join(root, 'bounding_boxes.txt'), 'r')
         ]
         labels = [
             int(line.split()[1]) - 1 for line in open(
                 os.path.join(root, 'image_class_labels.txt'), 'r')
         ]
         classnames = [
             line.split()[1]
             for line in open(os.path.join(root, 'classes.txt'), 'r')
         ]
         class2id = dict(zip(classnames, range(len(classnames))))
         split = [
             int(line.split()[1]) for line in open(
                 os.path.join(root, 'train_test_split.txt'), 'r')
         ]
         # load parts
         rawparts = np.loadtxt(os.path.join(root, 'parts', 'part_locs.txt'))
         rawparts = rawparts[:, 2:-1].reshape((len(images), 15, 2))
     elif version == '2010':
         # we are using version 2010. We load the data to mimic the 2011
         # version data format
         images = [
             line.strip()
             for line in open(os.path.join(root, 'lists', 'files.txt'), 'r')
         ]
         boxes = []  # TODO: get boxes
         # unfortunately, we need to load the boxes from matlab annotations
         for filename in images:
             matfile = io.loadmat(
                 os.path.join(root, 'annotations-mat',
                              filename[:-3] + 'mat'))
             left, top, right, bottom = \
                     [matfile['bbox'][0][0][i][0][0] for i in range(4)]
             boxes.append([left, top, right - left, bottom - top])
         train_images = [
             line.strip()
             for line in open(os.path.join(root, 'lists', 'train.txt'), 'r')
         ]
         labels = [int(line[:line.find('.')]) - 1 for line in images]
         classnames = [
             line.strip() for line in open(
                 os.path.join(root, 'lists', 'classes.txt'), 'r')
         ]
         class2id = dict(zip(classnames, range(len(classnames))))
         split = [int(line in train_images) for line in images]
         # we do not have rawparts.
         rawparts = None
     else:
         raise ValueError, "Unrecognized version: %s" % version
     if subset is not None:
         # create the subset mapping
         old2new = {}
         selected_ids = set()
         for new_id, name in enumerate(subset):
             old_id = class2id[name]
             selected_ids.add(old_id)
             old2new[old_id] = new_id
         # select the subset
         is_selected = [(label in selected_ids) for label in labels]
         images = [image for image, val in zip(images, is_selected) if val]
         boxes = [box for box, val in zip(boxes, is_selected) if val]
         labels = [old2new[label] for label, val in zip(labels, is_selected) \
                   if val]
         classnames = subset
         class2id = dict(zip(classnames, range(len(classnames))))
         split = [trte for trte, val in zip(split, is_selected) if val]
         if rawparts is not None:
             rawparts = rawparts[np.asarray(is_selected, dtype=bool)]
     # now, do training testing split
     if is_training:
         target = 1
     else:
         target = 0
     images = [image for image, val in zip(images, split) if val == target]
     boxes = [box for box, val in zip(boxes, split) if val == target]
     labels = [label for label, val in zip(labels, split) if val == target]
     if rawparts is not None:
         rawparts = rawparts[np.asarray(split) == target] - 1
     # store the necessary values
     self._version = version
     self._root = root
     self._data = mpi.distribute_list(images)
     self._raw_name = self._data
     # for the boxes, we store them as a numpy array
     self._boxes = np.array(mpi.distribute_list(boxes)).astype(float)
     self._boxes -= 1
     if rawparts is not None:
         self._parts = mpi.distribute(rawparts)
     else:
         self._parts = None
     self._prefetch = prefetch
     self._target_size = target_size
     self._crop = crop
     if target_size is not None:
         self._dim = tuple(target_size) + (3, )
     else:
         self._dim = False
     self._channels = 3
     # we store the raw dimensions for part location computation
     self._raw_dimension = np.zeros((len(self._data), 2), dtype=int)
     if prefetch:
         self._data = [self._read(i) for i in range(len(self._data))]
     self._label = mpi.distribute_list(labels)
     self._classnames = mpi.COMM.bcast(classnames)