def normalizing_params(data_names): total_mean = [] total_std = [] for data_name in data_names: data_params = { 'data_name': data_name, 'mode': 'train', 'root_dir': '/mnt/nfs/work1/ds4cg', 'batch_size': 512, 'num_workers': 4, 'label_type': 'binary' } dataloader = load(**data_params) num_batches = len(dataloader['train']) for i, (image, label) in enumerate(dataloader['train']): numpy_image = image.numpy() batch_mean = np.mean(numpy_image, axis=(0,2,3)) batch_std = np.std(numpy_image, axis=(0,2,3)) total_mean.append(batch_mean) total_std.append(batch_std) log.infov('{}/{}'.format(i, num_batches)) mean = np.mean(total_mean, axis=0) / 255.0 std = np.mean(total_std, axis=0) / 255.0 log.infov('mean: {}, std: {}'.format(mean, std))
def build(model_config, checkpoint): if 'name' not in model_config: log.error('Specify a model name') model_name = model_config['name'] # build model if model_name in SUPERVISED_MODELS: log.infov('{} model is built'.format(model_name.upper())) model = build_supervised_model(model_name, model_config) elif model_name in SEMI_MODELS: log.infov('{} model is built'.format(model_name.upper())) model = build_semi_model(model_name, model_config) else: SUPERVISED_MODELS.update(SEMI_MODELS) log.error( 'Enter valid model name among {}'.format(SUPERVISED_MODELS) ); exit() # load model if checkpoint is not None: model.load_state_dict(checkpoint['model_state_dict']) log.infov('Model is built using the given checkpoint') else: log.infov('Model is built without checkpoint') # parallelize model if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) log.warn("{} GPUs will be used.".format(torch.cuda.device_count())) return model
def build(train_config, label_type): # set cross entropy as a default if 'criterion' not in train_config: criterion_config = {'name': 'cross_entropy'} else: criterion_config = train_config['criterion'] criterion_name = label_type + '_' + criterion_config.pop('name') if criterion_config: criterion = CRITERIONS[criterion_name](**criterion_config) else: criterion = CRITERIONS[criterion_name]() log.infov('{} is built'.format(criterion_name)) return criterion
def _evaluate_once(self, data_name, is_label_available, num_batches, use_roc): if is_label_available: total_loss, num_corrects = 0.0, 0 else: prediction_results = [] self.model.eval() for i, (inputs, labels) in enumerate(self.dataloader['eval']): inputs = inputs.to(self.device) if is_label_available: labels = labels.to(self.device).unsqueeze(-1).float() # Forward propagation outputs = self.model(inputs) # Use softmax when the num of classes > 1 else sigmoid if self.num_classes > 1: _, predictions = torch.max(outputs, 1) else: probabilities = torch.sigmoid(outputs) predictions = torch.gt(probabilities, 0.5).float() if use_roc and is_label_available: # TODO: implement save_roc util.save_roc(probabilities, labels) # Statistics if is_label_available: num_corrects += torch.sum((predictions == labels).int()) else: predictions = predictions.data.cpu().numpy().flatten() ids = [id for id in labels] for prediction, id in zip(predictions, ids): prediction_results.append([id, int(prediction)]) log.info('Evaluation batch {}/{}'.format(i + 1, num_batches)) if is_label_available: eval_acc = num_corrects.double() / float( len(self.dataloader['eval'].dataset)) # TODO: add step log.infov("Evaluation accuracy: {}".format(eval_acc)) self.writer.add_scalar('eval accuracy', eval_acc) else: util.save_results(self.mode, self.model_name, self.tag, data_name, prediction_results)
def train(self): start_epoch = 0 if self.checkpoint is None else self.checkpoint['epoch'] if not isinstance(start_epoch, int): start_epoch = 0 num_epochs = self.train_config.get('num_epochs', 50) if num_epochs < start_epoch: num_epochs = start_epoch + 50 log.info("Training for {} epochs starts from epoch {}".format( num_epochs, start_epoch)) val_accuracies = [] best_acc = 0.0 for epoch in range(start_epoch, num_epochs): train_start = time.time() if self.scheduler is not None: self.scheduler.step() train_loss = self._train_one_epoch() self._save_model(epoch) time_elapsed = time.time() - train_start log.info( 'Epoch {} completed in {} - train loss: {:4f}'\ .format(epoch, time_elapsed, train_loss) ) val_start = time.time() val_loss, val_acc = self.validate() # save the best model if val_acc > best_acc: best_acc = val_acc self._save_model(epoch, 'best') val_accuracies.append(val_acc) time_elapsed = time.time() - val_start log.infov( 'Epoch {} completed in {} - val loss: {:4f}, val accuracy {:4f}'\ .format(epoch, time_elapsed, val_loss, val_acc) )
def evaluate(self): image_paths, predictions, labels, label_type = self.predict() is_label_available = util.is_label_available(labels) if is_label_available: num_images = len(labels) num_corrects = np.sum(predictions == labels) accuracy = num_corrects / float(num_images) log.infov('Evaluation accuracy: {}'.format(accuracy)) else: ids = [path.split('/')[-1].split('.')[0] for path in image_paths] results = np.stack((ids, predictions), axis=-1) results = np.concatenate(([['id', 'animal_present']], results), axis=0) output_file = os.path.join(self.eval_config['output_dir'], 'results.csv') with open(output_file, 'w') as f: writer = csv.writer(f) writer.writerows(results) log.infov('Evaluation results are written on {}'.format(output_file)) return None
def build(train_config, optimizer, checkpoint): if 'lr_schedule' not in train_config: log.infov('No scheduler is specified') return None schedule_config = train_config['lr_schedule'] scheduler_name = schedule_config.pop('name', 'step_lr') schedule_config['optimizer'] = optimizer if scheduler_name in SCHEDULERS: scheduler = SCHEDULERS[scheduler_name](**schedule_config) else: log.error('Specify valid scheduler name among {}'.format( SCHEDULERS.keys())) exit() if checkpoint is not None: scheduler.load_state_dict(checkpoint['scheduler_state_dict']) log.infov('{} scheduler is built'.format(scheduler_name.upper())) return scheduler
def build(train_config, model_params, checkpoint): if 'optimizer' not in train_config: log.error('Specify an optimizer') exit() optim_config = train_config['optimizer'] optimizer_name = optim_config.pop('name', 'sgd') optim_config['params'] = model_params if optimizer_name in OPTIMIZERS: optimizer = OPTIMIZERS[optimizer_name](**optim_config) else: log.error('Specify valid optimizer name among {}'.format( OPTIMIZERS.keys())) exit() if checkpoint is not None: optimizer.load_state_dict(checkpoint['optimizer_state_dict']) log.infov('{} optimizer is built'.format(optimizer_name.upper())) return optimizer
def _batch(image_paths, labels, total_num, batch_size): for idx in range(0, total_num, batch_size): log.infov('Processing batch : {}/{}'.format( int(idx/batch_size)+1, int(total_num/batch_size))) yield (image_paths[idx:min(idx + batch_size, total_num)], labels[idx:min(idx + batch_size, total_num)])