def read_paths_and_labels(self, labels_file): paths = [] labels = [] try: with open(labels_file, 'r') as file: for line in file: line_split = line.split(',') paths.append( os.path.join( self.dirdata, tools.adapt_path_to_current_os(line_split[0]))) labels.append(int(line_split[1])) except FileNotFoundError as ex: logging.error('File ' + labels_file + ' does not exist.') logging.error(str(ex)) raise # Remove data or shuffle: if self.percent_of_data != 100: # Remove data: indexes = np.random.choice(np.arange(len(labels)), int(self.percent_of_data / 100.0 * len(labels)), replace=False) else: # Shuffle data at least: indexes = np.arange(len(labels)) if self.shuffle_data: np.random.shuffle(indexes) aux_paths = paths aux_labels = labels paths = [] labels = [] for i in range(len(indexes)): paths.append(aux_paths[indexes[i]]) labels.append(aux_labels[indexes[i]]) # Remove the remaining examples that do not fit in a batch. if len(paths) % self.batch_size != 0: aux_paths = paths aux_labels = labels paths = [] labels = [] for i in range( len(aux_paths) - (len(aux_paths) % self.batch_size)): paths.append(aux_paths[i]) labels.append(aux_labels[i]) assert len( paths ) % self.batch_size == 0, 'Number of images is not a multiple of batch size' return paths, labels
def get_filenames(self, split): if split != 'train' and split != 'val': raise Exception('Split name not recognized.') list_file = os.path.join(self.dirdata, split + '_files.txt') try: with open(list_file, 'r') as fid: filenamesnoext = fid.read().splitlines() for i in range(len(filenamesnoext)): filenamesnoext[i] = tools.adapt_path_to_current_os( filenamesnoext[i]) except FileNotFoundError as ex: logging.error('File ' + list_file + ' does not exist.') logging.error(str(ex)) raise # Remove data or shuffle: if self.opts.percent_of_data != 100: # Remove data: indexes = np.random.choice(np.arange(len(filenamesnoext)), int(self.opts.percent_of_data / 100.0 * len(filenamesnoext)), replace=False) else: # Shuffle data at least: indexes = np.arange(len(filenamesnoext)) if self.opts.shuffle_data: np.random.shuffle(indexes) aux = filenamesnoext filenamesnoext = [] for i in range(len(indexes)): filenamesnoext.append(aux[indexes[i]]) # Remove the remaining examples that do not fit in a batch. if len(filenamesnoext) % self.batch_size != 0: aux = filenamesnoext filenamesnoext = [] for i in range(len(aux) - (len(aux) % self.batch_size)): filenamesnoext.append(aux[i]) assert len( filenamesnoext ) % self.batch_size == 0, 'Number of images is not a multiple of batch size' return filenamesnoext
def read_paths_and_labels(labels_file, dirdata): paths = [] labels = [] try: with open(labels_file, 'r') as file: for line in file: line_split = line.split(',') paths.append( os.path.join(dirdata, tools.adapt_path_to_current_os( line_split[0]))) labels.append(int(line_split[1])) except FileNotFoundError as ex: print('File ' + labels_file + ' does not exist.') print(str(ex)) raise # Shuffle data: indexes = np.arange(len(labels)) np.random.shuffle(indexes) aux_paths = paths aux_labels = labels paths = [] labels = [] for i in range(len(indexes)): paths.append(aux_paths[indexes[i]]) labels.append(aux_labels[indexes[i]]) # Remove the remaining examples that do not fit in a batch. if len(paths) % batch_size != 0: aux_paths = paths aux_labels = labels paths = [] labels = [] for i in range(len(aux_paths) - (len(aux_paths) % batch_size)): paths.append(aux_paths[i]) labels.append(aux_labels[i]) assert len( paths ) % batch_size == 0, 'Number of images is not a multiple of batch size' return paths, labels