def __init__(self, name, scene): if not isinstance(name, Evaluator.Name): if isinstance(name, str): if not name.islower(): name = name.lower() if name not in Evaluator.NAME_LIST: raise ValueError( value_error_msg('name', name, Evaluator.NAME_LIST)) name = Evaluator.Name(name) else: raise TypeError( type_error_msg('name', name, [Evaluator.Name, str])) if not isinstance(scene, Evaluator.Scene): if isinstance(scene, str): if not scene.islower(): scene = scene.lower() if scene not in Evaluator.SCENE_LIST: raise ValueError( value_error_msg('scene', scene, Evaluator.SCENE_LIST)) scene = Evaluator.Scene(scene) else: raise TypeError( type_error_msg('scene', scene, [Evaluator.SCENE_LIST, str])) self.name = name self.scene = scene
def __getitem__(self, index): # derive ImageFolder to support slice, list, tuple..., return PIL image(default) # or tensors(if transform: toTensor) try: # equals to if isinstance(index, Number): return ImageFolder.__getitem__(self, index)[0] except (ValueError, TypeError): if isinstance(index, slice): # tuple_list: [(img, tag), (img, tag)]...... return [ ImageFolder.__getitem__(self, i)[0] for i in range( super(ExtendedDataset, self).__len__())[index] ] elif isinstance(index, (tuple, list, np.ndarray, torch.Tensor)): return [ImageFolder.__getitem__(self, i)[0] for i in index] # only use [xxx], or tuple(xxx) as in default_collate: # batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'generator'> else: raise TypeError( type_error_msg('index', type(index), ExtendedDataset.INDEX_LIST))
def __init__(self, name, root, transform=None): super(ExtendedDataset, self).__init__(root, transform) if not isinstance(name, BaseExecutor.Name): if isinstance(name, str): if not name.islower(): name = name.lower() if name not in BaseExecutor.NAME_LIST: raise ValueError( value_error_msg('name', name, BaseExecutor.NAME_LIST)) name = BaseExecutor.Name(name) else: raise TypeError( type_error_msg('name', name, [BaseExecutor.Name, str])) if name == BaseExecutor.Name.MARKET1501: # 0001_c1s1_001051_00.jpg, here 0001 is peopleId, and 1 in c1 is camId self.cams = [ int(item[0].split('_c')[1].split('s')[0]) for item in self.imgs ] # [cam1, cam1, cam2, ...](list) self.ids = [ int(list(self.class_to_idx.keys())[item]) for item in self.targets ]
def generate_triplets(self, mode: str = ReorganisedDataset.DEFAULT_MODE, positive_sample_range: str = ReorganisedDataset. DEFAULT_POSITIVE_RANGE, max_positive_param: int = DEFAULT_POSITIVE_PARAM, negative_sample_range: str = ReorganisedDataset. DEFAULT_NEGATIVE_RANGE, max_negative: int = DEFAULT_MAX_NEGATIVE, model=DEFAULT_MODEL, batch_size: int = DEFAULT_BATCH_SIZE, sort_by: str = DEFAULT_SORT): """Triplet_index generator. Ways to Generate triplets: OFF: randomly sample triplets. OFFLINE(every n steps): using the most recent network checkpoint and computing the argmin and argmax on a subset of the data. ONLINE: selecting the hard positive/negative exemplars from within a mini-batch. Reference: F. Schroff, D. Kalenichenko, and J. Philbin. FaceNet: A Unified Embedding for Face Recognition and Clustering. In CVPR, 2015. Args: mode (str): A mode defined in base class ReorganisedDataset. positive_sample_range (str): A positive range defined in base class ReorganisedDataset. max_positive_param (int): An integer acts as denominator in #sampled positive indices = #anchor class // max_positive_param + 1. negative_sample_range (str): A negative range defined in base class ReorganisedDataset. max_negative (int): An integer acts as the max #sampled negative indices in #sampled negative indices = min(#negative class, max_negative) model (nn.Module or None): batch_size (int): sort_by (str): A sort defined above. """ # self.used_by = 'user' if not isinstance(mode, TripletDataset.Mode): if isinstance(mode, str): if not mode.islower(): mode = mode.lower() if mode not in TripletDataset.MODE_LIST: raise ValueError( value_error_msg('mode', mode, TripletDataset.MODE_LIST, TripletDataset.DEFAULT_MODE)) mode = TripletDataset.Mode(mode) else: raise TypeError( type_error_msg('mode', mode, [TripletDataset.Mode, str])) if not isinstance(positive_sample_range, TripletDataset.PositiveRange): if isinstance(positive_sample_range, str): if not positive_sample_range.islower(): positive_sample_range = positive_sample_range.lower() if positive_sample_range not in TripletDataset.POSITIVE_RANGE_LIST: raise ValueError( value_error_msg('positive_sample_range', positive_sample_range, TripletDataset.POSITIVE_RANGE_LIST, TripletDataset.DEFAULT_POSITIVE_RANGE)) positive_sample_range = TripletDataset.PositiveRange( positive_sample_range) else: raise TypeError( type_error_msg('positive_sample_range', positive_sample_range, [TripletDataset.PositiveRange, str])) if not isinstance(negative_sample_range, TripletDataset.NegativeRange): if isinstance(negative_sample_range, str): if not negative_sample_range.islower(): negative_sample_range = negative_sample_range.lower() if negative_sample_range not in TripletDataset.NEGATIVE_RANGE_LIST: raise ValueError( value_error_msg('negative_sample_range', negative_sample_range, TripletDataset.NEGATIVE_RANGE_LIST, TripletDataset.DEFAULT_NEGATIVE_RANGE)) negative_sample_range = TripletDataset.NegativeRange( negative_sample_range) else: raise TypeError( type_error_msg('negative_sample_range', negative_sample_range, [TripletDataset.NegativeRange, str])) if not max_positive_param >= 1: raise ValueError( value_error_msg('max_positive_param', max_positive_param, 'max_positive_param >= 1', TripletDataset.DEFAULT_POSITIVE_PARAM)) if not max_negative >= 1: raise ValueError( value_error_msg('max_negative', max_negative, 'max_negative >= 1', TripletDataset.DEFAULT_MAX_NEGATIVE)) if not batch_size >= 1: raise ValueError( value_error_msg('batch_size', batch_size, 'batch_size >= 1', TripletDataset.DEFAULT_BATCH_SIZE)) if not isinstance(sort_by, TripletDataset.Sort): if isinstance(sort_by, str): if not sort_by.islower(): sort_by = sort_by.lower() if sort_by not in TripletDataset.SORT_LIST: raise ValueError( value_error_msg('sort_by', sort_by, TripletDataset.SORT_LIST, TripletDataset.DEFAULT_SORT)) sort_by = TripletDataset.Sort(sort_by) else: raise TypeError( type_error_msg('sort_by', sort_by, [TripletDataset.Sort, str])) if mode == TripletDataset.Mode.RANDOM: self.__random_selecting(positive_sample_range, max_positive_param, negative_sample_range, max_negative) elif mode == TripletDataset.Mode.OFFLINE: if not isinstance(model, torch.nn.Module): raise TypeError( type_error_msg('model', model, [torch.nn.Module])) self.__offline_mining(model, batch_size, positive_sample_range, max_positive_param, negative_sample_range, max_negative, sort_by) else: # equals to elif mining mode == 'online' raise ValueError( value_error_msg('mode', mode, TripletDataset.MODE_LIST, TripletDataset.DEFAULT_MODE))
def run(self, max_epoch: int = BaseTrainer.DEFAULT_MAX_EPOCH, batch_size: int = BaseTrainer.DEFAULT_BATCH_SIZE, mode: str = DEFAULT_MODE, reorder_freq: int = DEFAULT_REORDER_FREQ): """ Reads: None. Processes: Trains the model. Writes: Max_epoch pth files of model's state dict. Max_epoch // reorder_freq mat file(s) of triplet indices. A mat file of training loss. Args: max_epoch (int): batch_size (int): mode (str): A mode defined above. reorder_freq (int): Re-generate triplets every reorder_freq epoch(s). """ TripletTrainer.run_info(self.__class__.__name__) if not max_epoch > 0: raise ValueError( value_error_msg('max_epoch', max_epoch, 'max_epoch > 0', TripletTrainer.DEFAULT_MAX_EPOCH)) if not batch_size > 0: raise ValueError( value_error_msg('batch_size', batch_size, 'batch_size > 0', TripletTrainer.DEFAULT_BATCH_SIZE)) if not isinstance(mode, TripletTrainer.Mode): if isinstance(mode, str): if not mode.islower(): mode = mode.lower() if mode not in TripletTrainer.MODE_LIST: raise ValueError( value_error_msg('mode', mode, TripletTrainer.MODE_LIST)) mode = TripletTrainer.Mode(mode) else: raise TypeError( type_error_msg('mode', mode, [TripletTrainer.MODE_LIST, str])) if not reorder_freq > 0: raise ValueError( value_error_msg('reorder_freq', reorder_freq, 'reorder_freq > 0', TripletTrainer.DEFAULT_REORDER_FREQ)) # # self.writer.add_figure(mode.value, images_show(self.dataset.triplet_index[0], self.dataset[0], # # mode.value, 0)) self.model.train() # for dropout & batch normalization if self.start_epoch == 0: self.save_model(0) loss_history = [] start = time() for epoch in range(self.start_epoch, self.start_epoch + max_epoch): running_loss = 0.0 if epoch % reorder_freq == 0: self.generate_triplets(mode) dataloader = DataLoader( self.dataset, batch_size=batch_size, num_workers=TripletTrainer.DEFAULT_NUM_WORKER) self.save_triplet(epoch) max_iteration = ceil(len(self.dataset) / batch_size) print('\n=====Epoch %s=====' % (epoch + 1)) print('{:<13} {:<6}'.format('Iteration', 'loss')) for i, data in enumerate(dataloader): now_batch_size = data[0].size()[0] self.optimizer.zero_grad() outputs = self.model(*data) loss = self.criterion(*outputs) loss.backward() self.optimizer.step() running_loss += loss.item() * now_batch_size print('{:<4}/{:<8} {:.2f}'.format(i + 1, max_iteration, loss.item())) epoch_loss = running_loss / len(self.dataset) loss_history.append(epoch_loss) now_time_elapsed = time() - start self.save_model(epoch + 1) print('Epoch ({}/{})\nloss {:.4f}, time elapsed {}m {}s'.format( epoch + 1, self.start_epoch + max_epoch, epoch_loss, int(now_time_elapsed // 60), int(now_time_elapsed % 60))) loss_path = self.config[self.name.value]['loss_path'] loss_dir = dirname(loss_path) if not exists(loss_dir): makedirs(loss_dir) if self.start_epoch == 0 or not exists(loss_path): savemat(loss_path, {'training_loss': loss_history}) else: last_loss_history = loadmat(loss_path)['training_loss'].reshape( -1).tolist() savemat(loss_path, {'training_loss': last_loss_history + loss_history})
def __init__(self, name, dataset, split, model, optimizer, start_epoch): if not isinstance(name, BaseTrainer.Name): if isinstance(name, str): if not name.islower(): name = name.lower() if name not in BaseTrainer.NAME_LIST: raise ValueError( value_error_msg('name', name, BaseTrainer.NAME_LIST)) name = BaseTrainer.Name(name) else: raise TypeError( type_error_msg('name', name, [BaseTrainer.Name, str])) if not isinstance(dataset, BaseTrainer.Dataset): if isinstance(dataset, str): if not dataset.islower(): dataset = dataset.lower() if dataset not in BaseTrainer.DATASET_LIST: raise ValueError( value_error_msg('dataset', dataset, BaseTrainer.DATASET_LIST)) dataset = BaseTrainer.Dataset(dataset) else: raise TypeError( type_error_msg('dataset', dataset, [BaseTrainer.Dataset, str])) if not isinstance(split, BaseTrainer.Split): if isinstance(split, str): if not split.islower(): split = split.lower() if split not in BaseTrainer.SPLIT_LIST: raise ValueError( value_error_msg('split', split, BaseTrainer.SPLIT_LIST)) split = BaseTrainer.Split(split) else: raise TypeError( type_error_msg('split', split, [BaseTrainer.Split, str])) if not isinstance(model, BaseTrainer.Model): if isinstance(model, str): if not model.islower(): model = model.lower() if model not in BaseTrainer.MODEL_LIST: raise ValueError( value_error_msg('model', model, BaseTrainer.MODEL_LIST)) model = BaseTrainer.Model(model) else: raise TypeError( type_error_msg('model', model, [BaseTrainer.MODEL_LIST, str])) if not isinstance(optimizer, BaseTrainer.Optimizer): if isinstance(optimizer, str): if not optimizer.islower(): optimizer = optimizer.lower() if optimizer not in BaseTrainer.OPTIMIZER_LIST: raise ValueError( value_error_msg('optimizer', optimizer, BaseTrainer.OPTIMIZER_LIST)) optimizer = BaseTrainer.Optimizer(optimizer) else: raise TypeError( type_error_msg('optimizer', optimizer, [BaseTrainer.OPTIMIZER_LIST, str])) if start_epoch is not None: if not start_epoch > 0: raise ValueError( value_error_msg('start_epoch', start_epoch, 'start_epoch > 0', BaseTrainer.DEFAULT_START_EPOCH)) else: start_epoch = 0 self.name = name self.dataset = dataset self.split = split self.model = model self.optimizer = optimizer self.start_epoch = start_epoch
def run(self, max_epoch: int = BaseTrainer.DEFAULT_MAX_EPOCH, batch_size: int = BaseTrainer.DEFAULT_BATCH_SIZE): """ Reads: None. Processes: Trains the model. Writes: Max_epoch pth files of model's state dict. A mat file of losses/errors. Args: max_epoch (int): batch_size (int): """ Trainer.run_info(self.__class__.__name__, self.suffix) if not isinstance(max_epoch, int): raise TypeError(type_error_msg('max_epoch', max_epoch, [int])) if not max_epoch > 0: raise ValueError( value_error_msg('max_epoch', max_epoch, 'max_epoch > 0', TripletTrainer.DEFAULT_MAX_EPOCH)) if not isinstance(batch_size, int): raise TypeError(type_error_msg('batch_size', batch_size, [int])) if not batch_size > 0: raise ValueError( value_error_msg('batch_size', batch_size, 'batch_size > 0', TripletTrainer.DEFAULT_BATCH_SIZE)) # save the initial state for comparing if self.start_epoch == 0: save_model(self.model, self.model_path % 0) # prepare for fetching image dataloader = {} dataset_size = {} max_iteration = {} loss_history = {} error_history = {} for phase in self.phase: dataloader[phase] = DataLoader( self.dataset[phase], batch_size=batch_size, shuffle=True, num_workers=Trainer.DEFAULT_NUM_WORKER) dataset_size[phase] = len(self.dataset[phase]) max_iteration[phase] = ceil(len(self.dataset[phase]) / batch_size) loss_history[phase] = [] error_history[phase] = [] start = time() for epoch in range(self.start_epoch, self.start_epoch + max_epoch): print('\n===============Epoch %s===============' % (epoch + 1)) print('{:<6} {:<13} {:<6} {:<13}'.format('Phase', 'Iteration', 'loss', 'accuracy')) for phase in self.phase: if phase == 'train': self.model.train() elif phase == 'val': self.model.eval( ) # batch norm performs differently when model is set to eval rather than train. print('-----------------------------------') running_loss = 0.0 running_correct = 0.0 for i, data in enumerate(dataloader[phase]): inputs, labels = data now_batch_size = inputs.shape[0] self.optimizer.zero_grad() if phase == 'train': outputs = self.model(inputs) elif phase == 'val': with torch.no_grad(): outputs = self.model(inputs) else: raise ValueError() # for error computation with torch.no_grad(): predictions = torch.argmax(outputs, 1) loss = self.criterion(outputs, labels) if phase == 'train': loss.backward() self.optimizer.step() # loss/error computation with torch.no_grad(): iteration_loss = loss.item() iteration_correct = float( torch.sum(torch.eq(predictions, labels))) / now_batch_size running_loss += iteration_loss * now_batch_size running_correct += iteration_correct * now_batch_size print('{:<6} {:<4}/{:<8} {:<.2f} {:<.2f}'.format( phase, i + 1, max_iteration[phase], iteration_loss, iteration_correct)) # compute loss and error for every phases in an epoch epoch_loss = running_loss / dataset_size[phase] epoch_accuracy = running_correct / dataset_size[phase] loss_history[phase].append(epoch_loss) error_history[phase].append(1 - epoch_accuracy) # save model at the end of every epochs save_model(self.model, self.model_path % (epoch + 1)) now_time_elapsed = time() - start print('\nEpoch ({}/{})'.format(epoch + 1, self.start_epoch + max_epoch)) for phase in self.phase: print('{:<5} loss {:.4f}, error {:.4f}'.format( phase, loss_history[phase][-1], error_history[phase][-1])) print('time elapsed {:.0f}h {:.0f}m {:.0f}s'.format( now_time_elapsed // 3600, (now_time_elapsed % 3600) / 60, now_time_elapsed % 60)) train_dir = dirname(self.train_path) if self.split == Trainer.Split.TRAIN_VAL: dictionary = { 'training_loss': loss_history['train'], 'training_error': error_history['train'], 'validation_loss': loss_history['val'], 'validation_error': error_history['val'] } elif self.split == Trainer.Split.TRAIN_ONLY: dictionary = { 'training_loss': loss_history['train'], 'training_error': error_history['train'] } else: raise ValueError( value_error_msg('split', self.split, BaseTrainer.SPLIT_LIST)) if not exists(train_dir): makedirs(train_dir) if self.start_epoch == 0 or not exists(self.train_path): savemat(self.train_path, dictionary) else: # train.mat already exists, so we should preserve original data and merge them with new data last_train_history = loadmat(self.train_path) savemat(self.train_path, merge_last(last_train_history, dictionary))
def __init__(self, config, name, dataset, model, epoch: int, scene): if not isinstance(name, Tester.Name): if isinstance(name, str): if not name.islower(): name = name.lower() if name not in Tester.NAME_LIST: raise ValueError( value_error_msg('name', name, Tester.NAME_LIST)) name = Tester.Name(name) else: raise TypeError( type_error_msg('name', name, [Tester.Name, str])) if not isinstance(dataset, Tester.Dataset): if isinstance(dataset, str): if not dataset.islower(): dataset = dataset.lower() if dataset not in Tester.DATASET_LIST: raise ValueError( value_error_msg('dataset', dataset, Tester.DATASET_LIST)) dataset = Tester.Dataset(dataset) else: raise TypeError( type_error_msg('dataset', dataset, [Tester.Dataset, str])) if not isinstance(model, Tester.Model): if isinstance(model, str): if not model.islower(): model = model.lower() if model not in Tester.MODEL_LIST: raise ValueError( value_error_msg('model', model, Tester.MODEL_LIST)) model = Tester.Model(model) else: raise TypeError( type_error_msg('model', model, [Tester.MODEL_LIST, str])) if not isinstance(scene, Tester.Scene): if isinstance(scene, str): if not scene.islower(): scene = scene.lower() if scene not in Tester.SCENE_LIST: raise ValueError( value_error_msg('scene', scene, Tester.SCENE_LIST)) scene = Tester.Scene(scene) else: raise TypeError( type_error_msg('scene', scene, [Tester.SCENE_LIST, str])) if not isinstance(epoch, int): raise TypeError(type_error_msg('epoch', epoch, [int])) if not epoch >= 0: raise ValueError(value_error_msg('epoch', epoch, 'epoch >= 0')) self.name = name self.dataset = dataset self.model = model self.scene = scene self.config = config self.train_class = config.getint(self.name.value, 'train_class') # initialize model model_name = self.model.value if self.model == Tester.Model.MSSNET: self.model = MssNet(self.config) elif self.model == Tester.Model.RESNET50: self.model = ResNet50(self.config, self.train_class, False) # else: # raise ValueError(value_error_msg('model', model, Tester.MODEL_LIST)) transform_list = [] if self.name == Tester.Name.MARKET1501: transform_list = [ # transforms.Resize((160, 64)), # transforms.Pad(10), # transforms.RandomCrop((160, 64)), # transforms.RandomHorizontalFlip(), # transforms.ToTensor() transforms.Resize((256, 128), interpolation=3), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ] self.dataset_type = ['gallery', 'query'] if self.scene == Tester.Scene.MULTI_SHOT: self.dataset_type.append('multi_query') # prepare datasets if self.dataset == Tester.Dataset.EXTENDED: self.dataset = {} for item in self.dataset_type: self.dataset[item] = ExtendedDataset( self.name.value, join(self.config[self.name.value]['dataset_dir'], item), transforms.Compose(transform_list)) else: raise ValueError( value_error_msg('dataset', dataset, Tester.Dataset.EXTENDED)) # load weights load_model( self.model, self.config[self.name.value]['model_format'] % (model_name, epoch)) if isinstance(self.model, IdentificationModel): self.model.set_to_test() self.test_path = self.config[ self.name.value]['test_path'] % self.scene.value
def __init__(self, config, name, model, pretrain, epoch, split, scene, query_list: list, length: int): if not isinstance(name, Visualizer.Name): if isinstance(name, str): if not name.islower(): name = name.lower() if name not in Visualizer.NAME_LIST: raise ValueError(value_error_msg('name', name, Visualizer.NAME_LIST)) name = Visualizer.Name(name) else: raise TypeError(type_error_msg('name', name, [Visualizer.Name, str])) if not isinstance(model, Visualizer.Model): if isinstance(model, str): if not model.islower(): model = model.lower() if model not in Visualizer.MODEL_LIST: raise ValueError(value_error_msg('model', model, Visualizer.MODEL_LIST)) model = Visualizer.Model(model) else: raise TypeError(type_error_msg('model', model, [Visualizer.Model, str])) if not isinstance(epoch, int): raise TypeError(type_error_msg('epoch', epoch, [int])) if not epoch >= 0: raise ValueError(value_error_msg('epoch', epoch, 'epoch >= 0')) if not isinstance(split, Visualizer.Split): if isinstance(split, str): if not split.islower(): split = split.lower() if split not in Visualizer.SPLIT_LIST: raise ValueError(value_error_msg('split', split, Visualizer.SPLIT_LIST)) split = Visualizer.Split(split) else: raise TypeError(type_error_msg('split', split, [Visualizer.Split, str])) if not isinstance(scene, Visualizer.Scene): if isinstance(scene, str): if not scene.islower(): scene = scene.lower() if scene not in Visualizer.SCENE_LIST: raise ValueError(value_error_msg('scene', scene, Visualizer.SCENE_LIST)) scene = Visualizer.Scene(scene) else: raise TypeError(type_error_msg('scene', scene, [Visualizer.Scene, str])) if not isinstance(query_list, Iterable): raise TypeError(type_error_msg('query_list', query_list, Iterable)) if not isinstance(length, int): raise TypeError(type_error_msg('length', length, [int])) if not length > 0: raise ValueError(value_error_msg('length', length, 'length > 0', Visualizer.DEFAULT_LIST_LENGTH)) self.config = config self.name = name self.model = model self.train_class = config.getint(self.name.value, 'train_class') # initialize model model_name = self.model.value if self.model == Visualizer.Model.RESNET50: self.model = ResNet50(self.config, self.train_class, False) elif self.model == Visualizer.Model.MSSNET: self.model = MssNet(self.config) else: raise ValueError(value_error_msg('model', model, [Visualizer.Model.RESNET50])) # load weights load_model(self.model, self.config[self.name.value]['model_format'] % (model_name, epoch)) self.split = split self.scene = scene self.query_list = query_list self.length = length self.tensorboard_dir = self.config[self.name.value]['tensorboard_dir'] # WARNING: the log files won' t be saved rmtree(self.tensorboard_dir, ignore_errors=True) self.writer = SummaryWriter(join(self.tensorboard_dir, self.name.value)) # dataset loading self.dataset_type = ['gallery', 'query'] if self.scene == Visualizer.Scene.MULTI_SHOT: self.dataset_type.append('multi_query') self.dataset = {} for item in self.dataset_type: self.dataset[item] = ExtendedDataset(self.name.value, join(self.config[self.name.value]['dataset_dir'], item)) self.suffix = 'pretrain' if pretrain else 'no_pretrain' self.train_path = self.config[self.name.value]['train_path'] % self.suffix self.test_path = self.config[self.name.value]['test_path'] % self.scene.value self.evaluation_path = self.config[self.name.value]['evaluation_path'] % self.scene.value