def test(validate_loader, device, model, criterion): val_acc = 0.0 model = model.to(device) model.eval() confuse_meter = ConfuseMeter() with torch.no_grad(): # 进行评测的时候网络不更新梯度 val_top1 = AvgrageMeter() validate_loader = tqdm(validate_loader) validate_loss = 0.0 for i, data in enumerate(validate_loader, 0): # 0是下标起始位置默认为0 inputs, labels, batch_seq_len = data[0].to(device), data[1].to( device), data[2] # inputs,labels = data[0],data[1] outputs, _ = model(inputs, batch_seq_len) # loss = criterion(outputs, labels) prec1, prec2 = accuracy(outputs, labels, topk=(1, 2)) n = inputs.size(0) val_top1.update(prec1.item(), n) confuse_meter.update(outputs, labels) # validate_loss += loss.item() postfix = { 'test_acc': '%.6f' % val_top1.avg, 'confuse_acc': '%.6f' % confuse_meter.acc } validate_loader.set_postfix(log=postfix) val_acc = val_top1.avg return confuse_meter
def train(epoch, epochs, train_loader, device, model, criterion, optimizer, scheduler, tensorboard_path): model.train() top1 = AvgrageMeter() model = model.to(device) train_loss = 0.0 for i, data in enumerate(train_loader, 0): # 0是下标起始位置默认为0 inputs, labels, batch_seq_len = data[0].to(device), data[1].to( device), data[2] # 初始为0,清除上个batch的梯度信息 optimizer.zero_grad() outputs, hidden = model(inputs, batch_seq_len) loss = criterion(outputs, labels) loss.backward() optimizer.step() _, pred = outputs.topk(1) prec1, prec2 = accuracy(outputs, labels, topk=(1, 2)) n = inputs.size(0) top1.update(prec1.item(), n) train_loss += loss.item() postfix = { 'train_loss': '%.6f' % (train_loss / (i + 1)), 'train_acc': '%.6f' % top1.avg } train_loader.set_postfix(log=postfix) # ternsorboard 曲线绘制 if os.path.exists(tensorboard_path) == False: os.mkdir(tensorboard_path) writer = SummaryWriter(tensorboard_path) writer.add_scalar('Train/Loss', loss.item(), epoch) writer.add_scalar('Train/Accuracy', top1.avg, epoch) writer.flush() scheduler.step()
def update_MLP(self): all_archs = torch.zeros(self.max, 6).cuda() all_target = torch.zeros(self.max).cuda() self.MLP.train() for i, structure_father in enumerate(self.group): all_archs[i][:] = torch.tensor([ item for sublist in structure_father.structure for item in sublist ])[:] all_target[i] = structure_father.loss indx = all_target < 15 all_archs = all_archs[indx, :] all_target = all_target[indx] epoch = 20 objs = AvgrageMeter() batch_size = 32 for i in range(epoch): start = (batch_size * i) % all_archs.size(0) end = start + batch_size archs = all_archs[start:end] target = all_target[start:end] output = self.MLP(archs) loss = self.criterion(output, target) self.optimizer.zero_grad() loss.backward() self.optimizer.step() n = archs.size(0) objs.update(loss.item(), n) logInfo = 'MLP: loss = {:.6f},\t'.format(objs.avg) logging.info(logInfo)
def validate(epoch, validate_loader, device, model, criterion, tensorboard_path): val_acc = 0.0 model = model.to(device) model.eval() with torch.no_grad(): # 进行评测的时候网络不更新梯度 val_top1 = AvgrageMeter() validate_loader = tqdm(validate_loader) validate_loss = 0.0 for i, data in enumerate(validate_loader, 0): # 0是下标起始位置默认为0 inputs, labels, batch_seq_len = data[0].to(device), data[1].to( device), data[2] # inputs,labels = data[0],data[1] outputs, _ = model(inputs, batch_seq_len) loss = criterion(outputs, labels) prec1, prec2 = accuracy(outputs, labels, topk=(1, 2)) n = inputs.size(0) val_top1.update(prec1.item(), n) validate_loss += loss.item() postfix = { 'validate_loss': '%.6f' % (validate_loss / (i + 1)), 'validate_acc': '%.6f' % val_top1.avg } validate_loader.set_postfix(log=postfix) # ternsorboard 曲线绘制 if os.path.exists(tensorboard_path) == False: os.mkdir(tensorboard_path) writer = SummaryWriter(tensorboard_path) writer.add_scalar('Validate/Loss', loss.item(), epoch) writer.add_scalar('Validate/Accuracy', val_top1.avg, epoch) writer.flush() val_acc = val_top1.avg return val_acc
def eval_fn(self, loader, device, train=False, confusion_m = False, criterion = None): """ Evaluation method :param loader: data loader for either training or testing set :param device: torch device :param train: boolean to indicate if training or test set is used :return: accuracy on the data """ objs = AvgrageMeter() score = AvgrageMeter() self.eval() with torch.no_grad(): for step, (images, labels) in enumerate(loader): images = images.to(device) labels = labels.to(device) outputs = self(images) acc, _ = accuracy(outputs, labels, topk=(1, 5)) score.update(acc.item(), images.size(0)) if(criterion): loss = criterion(outputs, labels) objs.update(loss.item(), images.size(0)) if step % self.report_freq == 0: logging.info('Evaluation | step: %d | accuracy: %f' % (step, score.avg)) return score.avg, objs.avg
def aggregation(data_train, data_test, args, clientIDs, model, optimizer): mean_train_acc, mean_train_loss = AvgrageMeter(), AvgrageMeter() mean_test_acc, mean_test_loss = AvgrageMeter(), AvgrageMeter() initial_weights = copy.deepcopy(model.state_dict()) num_examples = [] weight_dict_list = [] for clientID in clientIDs: model.load_state_dict(initial_weights) model, train_acc, train_loss, test_acc_list, test_loss_list = lstm_train( data_train[clientID], data_test[clientID], args, model, optimizer, 1) num_examples.append(len(data_train[clientID])) # load state_dict for each client weight_dict_list.append(copy.deepcopy(model.state_dict())) mean_train_acc.update(train_acc, 1) mean_train_loss.update(train_loss, 1) mean_test_acc.update(test_acc_list[-1], 1) mean_test_loss.update(test_loss_list[-1], 1) # meta-learning for key in weight_dict_list[0].keys(): for model_id in range(1, len(weight_dict_list)): weight_dict_list[0][key].add_(weight_dict_list[model_id][key]) weight_dict_list[0][key].mul_(args.global_lr / len(clientIDs)).add_( (1 - args.global_lr) * initial_weights[key]) return weight_dict_list[ 0], mean_train_acc.avg, mean_train_loss.avg, mean_test_acc.avg, mean_test_loss.avg
def aggregation(data_train, data_test, args, clientIDs, model, optimizer): mean_train_acc, mean_train_loss = AvgrageMeter(), AvgrageMeter() mean_test_acc, mean_test_loss = AvgrageMeter(), AvgrageMeter() initial_weights = copy.deepcopy(model.state_dict()) num_examples = [] weight_dict_list = [] for clientID in clientIDs: model.load_state_dict(initial_weights) model, train_acc, train_loss, test_acc_list, test_loss_list = client_update( data_train[clientID], data_test[clientID], args, model, optimizer, args.train_epochs) num_examples.append(len(data_train[clientID])) # load state_dict for each client weight_dict_list.append(copy.deepcopy(model.state_dict())) mean_train_acc.update(train_acc, 1) mean_train_loss.update(train_loss, 1) mean_test_acc.update(test_acc_list[-1], 1) mean_test_loss.update(test_loss_list[-1], 1) # fedAveraging for key in weight_dict_list[0].keys(): weight_dict_list[0][key] *= num_examples[0] for model_id in range(1, len(weight_dict_list)): weight_dict_list[0][key].add_(weight_dict_list[model_id][key] * num_examples[model_id]) weight_dict_list[0][key].div_(np.sum(num_examples)) return weight_dict_list[ 0], mean_train_acc.avg, mean_train_loss.avg, mean_test_acc.avg, mean_test_loss.avg
def train_fn(self, optimizer, criterion, loader, device, train=True): """ Training method :param optimizer: optimization algorithm :criterion: loss function :param loader: data loader for either training or testing set :param device: torch device :param train: boolean to indicate if training or test set is used :return: (accuracy, loss) on the data """ score = AvgrageMeter() objs = AvgrageMeter() self.train() t = tqdm(loader) for images, labels in t: images = images.to(device) labels = labels.to(device) optimizer.zero_grad() logits = self(images) loss = criterion(logits, labels) loss.backward() optimizer.step() acc, _ = accuracy(logits, labels, topk=(1, 5)) n = images.size(0) objs.update(loss.item(), n) score.update(acc.item(), n) t.set_description('(=> Training) Loss: {:.4f}'.format(objs.avg)) return score.avg, objs.avg
def eval_fn(self, loader, device, train=False, confusion_m = False, criterion = None): """ Evaluation method :param loader: data loader for either training or testing set :param device: torch device :param train: boolean to indicate if training or test set is used :return: accuracy on the data """ objs = AvgrageMeter() score = AvgrageMeter() self.eval() t = tqdm(loader) with torch.no_grad(): for images, labels in t: images = images.to(device) labels = labels.to(device) outputs = self(images) acc, _ = accuracy(outputs, labels, topk=(1, 5)) score.update(acc.item(), images.size(0)) if(criterion): loss = criterion(outputs, labels) objs.update(loss.data, images.size(0)) if(confusion_m): # Plot confusion matrix plot_confusion_matrix(labels.cpu(), outputs.topk(1, 1, True, True)[1].cpu(), normalize = True, title='Confusion matrix') t.set_description('(=> Test) Score: {:.4f}'.format(score.avg)) return score.avg, objs.avg
def validate(model, device, args, *, all_iters=None, arch_loader=None): assert arch_loader is not None objs = AvgrageMeter() top1 = AvgrageMeter() top5 = AvgrageMeter() loss_function = args.loss_function val_dataloader = args.val_dataloader model.eval() # model.apply(bn_calibration_init) max_val_iters = 0 t1 = time.time() result_dict = {} arch_dict = arch_loader.get_arch_dict() base_model = mutableResNet20(10).cuda() with torch.no_grad(): for key, value in arch_dict.items(): # 每一个网络 max_val_iters += 1 # print('\r ', key, ' iter:', max_val_iters, end='') for data, target in val_dataloader: # 过一遍数据集 target = target.type(torch.LongTensor) data, target = data.to(device), target.to(device) output = model(data, value["arch"]) prec1, prec5 = accuracy(output, target, topk=(1, 5)) print("acc1: ", prec1.item()) n = data.size(0) top1.update(prec1.item(), n) top5.update(prec5.item(), n) tmp_dict = {} tmp_dict['arch'] = value['arch'] tmp_dict['acc'] = top1.avg result_dict[key] = tmp_dict with open("acc_result.json", "w") as f: json.dump(result_dict, f)
def eval_one_epoch(): model.eval() acc = 0.0 map_score_list = [] loss_absolute = AvgrageMeter() loss_contra = AvgrageMeter() for i, batch in enumerate(tqdm(val_dataloader)): # get the inputs with torch.no_grad(): data, binary_mask, label = batch data, binary_mask, label = data.cuda(), binary_mask.cuda( ), label.cuda() optimizer.zero_grad() map_score = 0.0 map_x, embedding, x_Block1, x_Block2, x_Block3, x_input = model( data) absolute_loss = criterion_absolute_loss(map_x, binary_mask) contrastive_loss = criterion_contrastive_loss(map_x, binary_mask) loss = absolute_loss + contrastive_loss n = data.size(0) loss_absolute.update(absolute_loss.data, n) loss_contra.update(contrastive_loss.data, n) map_score = torch.mean(map_x) map_score = 1.0 if map_score > 1 else map_score.item() map_score_list.append(map_score) # need another way to evaluate pred = 1 if map_score > 0.5 else 0 acc += (pred == label.item()) loss_avg = loss_absolute.avg + loss_contra.avg aou = metrics.roc_auc_score(labels, map_score_list) print( 'epoch:%d, Eval: Absolute_Depth_loss= %.4f, Contrastive_Depth_loss= %.4f, Total Loss: %.4f, AOU: %.4f\n' % (epoch + 1, loss_absolute.avg, loss_contra.avg, loss_avg, aou)) return acc / len(val_dataset), loss_avg, aou, map_score_list
def train(self, epochs, train_dl, log_freq = 1): loss_avg = AvgrageMeter('loss') acc_avg = AvgrageMeter('acc') epoch_loss_avg = AvgrageMeter('epoch_loss') epoch_acc_avg = AvgrageMeter('epoch_acc') last_epoch = self.epoch + epochs self.logger.info(f"begin training for {epochs} epochs") while self.epoch < last_epoch: self.epoch += 1 batch_tic = time.time() epoch_tic = time.time() self.logger.info( f"Start train for epoch {self.epoch}/{last_epoch}") for step, (X, y) in enumerate(train_dl, 1): # preform training step X = Variable(X, requires_grad = True).cuda() y = Variable(y, requires_grad = False).cuda(non_blocking = True) pred, loss = self._step(X, y) # update status batch_size = y.size()[0] acc = torch.sum(pred == y).float() / batch_size loss_avg.update(loss) acc_avg.update(acc) epoch_loss_avg.update(loss) epoch_acc_avg.update(acc) # report status if step % log_freq is 0: speed = 1.0 * (batch_size * log_freq) / (time.time() - batch_tic) self.logger.info( "Epoch[%d]/[%d] Batch[%d] Speed: %.6f samples/sec %s %s" % (self.epoch, last_epoch, step, speed, loss_avg, acc_avg)) map(lambda avg: avg.reset(), [loss_avg, acc_avg]) batch_tic = time.time() self.logger.info("Epoch[%d]/[%d] Time: %.3f sec %s %s" % (self.epoch, last_epoch, time.time() - epoch_tic, epoch_loss_avg, epoch_acc_avg)) self.train_acc.append(epoch_acc_avg.avg) self.train_loss.append(epoch_loss_avg.avg) map(lambda avg: avg.reset(), [epoch_loss_avg, epoch_acc_avg]) self._post_epoch() checkpoint_path = os.path.join(self.model_dir, f'checkpoint_{self.epoch}.tar') self.save_checkpoint(checkpoint_path) return self.train_acc, self.train_loss
def infer(model): test_data = dset.CIFAR10( root=TestConfig['data_path'], train=False, download=True, transform=data_transforms_cifar10(0, False), ) if DEBUG: sampler = torch.utils.data.sampler.SubsetRandomSampler(list( range(256))) test_queue = torch.utils.data.DataLoader( test_data, sampler=sampler, batch_size=TestConfig['batch_size'], shuffle=False, pin_memory=True, num_workers=16, ) else: test_queue = torch.utils.data.DataLoader( test_data, batch_size=TestConfig['batch_size'], shuffle=False, pin_memory=True, num_workers=16, ) model.eval().cuda() acc_avg = AvgrageMeter('acc') for step, (X, y) in enumerate(test_queue): X = Variable(X, requires_grad=False).cuda() y = Variable(y, requires_grad=False).cuda(non_blocking=True) logits, _ = model(X, TestConfig['drop_path_prob']) pred = torch.argmax(logits, dim=1) acc = torch.sum(pred == y).float() / TestConfig['batch_size'] acc_avg.update(acc) if step % TestConfig['log_freq'] is 0: print(f"test batch {step}: {acc_avg}") print(f"Final test: {acc_avg}")
def test(model, data_test_loader): objs = AvgrageMeter() top1 = AvgrageMeter() criterion = torch.nn.CrossEntropyLoss().cuda() model.eval() with torch.no_grad(): for i, (images_test, labels_test) in enumerate(data_test_loader): images_test, labels_test = images_test.cuda(), labels_test.cuda() output_test = model(images_test) loss_test = criterion(output_test, labels_test) prec_test, = accuracy(output_test, labels_test) n_test = images_test.size(0) objs.update(loss_test.item(), n_test) top1.update(prec_test.item(), n_test) if i % 50 == 0: print(f'Finished {i+1}/{len(data_test_loader)}') print(f'Avg Loss = {objs.avg}' f'Test Acc = {top1.avg}')
def infer(valid_queue, model, criterion, local_rank, epoch, lr): loss_avg = AvgrageMeter() Acc_avg = AvgrageMeter() infer_time = AvgrageMeter() model.eval() with torch.no_grad(): for step, (input, target) in enumerate(valid_queue): input = input.cuda(local_rank, non_blocking=True) target = target.cuda(local_rank, non_blocking=True) end = time.time() logits = model(input) infer_time.update(time.time() - end) loss = criterion(logits, target) accuracy = calculate_accuracy(logits, target) n = input.size(0) torch.distributed.barrier() reduced_loss = reduce_mean(loss, args.nprocs) reduced_acc = reduce_mean(accuracy, args.nprocs) loss_avg.update(reduced_loss.item(), n) Acc_avg.update(reduced_acc.item(), n) if step % args.report_freq == 0 and local_rank == 0: logging.info( 'epoch: %d, mini-batch: %3d, inference time: %.4f, lr = %.5f, loss_CE= %.5f, Accuracy= %.4f' % (epoch + 1, step + 1, infer_time.avg, lr, loss_avg.avg, Acc_avg.avg)) return Acc_avg.avg, loss_avg.avg
def validate(model, args, *, all_iters=None, arch_loader=None): assert arch_loader is not None objs = AvgrageMeter() top1 = AvgrageMeter() top5 = AvgrageMeter() loss_function = args.loss_function val_loader = args.val_loader model.eval() t1 = time.time() result_dict = {} arch_dict = arch_loader.get_part_dict() with torch.no_grad(): for ii, (key, value) in enumerate(arch_dict.items()): for data, target in val_loader: target = target.type(torch.LongTensor) data, target = data.cuda(args.gpu), target.cuda(args.gpu) output = model(data, value["arch"]) loss = loss_function(output, target) acc1, acc5 = accuracy(output, target, topk=(1, 5)) n = data.size(0) objs.update(loss.item(), n) top1.update(acc1.item(), n) top5.update(acc5.item(), n) if ii % 5: logging.info("validate acc:{:.6f} iter:{}".format( top1.avg / 100, ii)) writer.add_scalar( "Val/Loss", loss.item(), all_iters * len(val_loader) * args.batch_size + ii) writer.add_scalar( "Val/acc1", acc1.item(), all_iters * len(val_loader) * args.batch_size + ii) writer.add_scalar( "Val/acc5", acc5.item(), all_iters * len(val_loader) * args.batch_size + ii) result_dict[key] = top1.avg logInfo = 'TEST Iter {}: loss = {:.6f},\t'.format(all_iters, objs.avg) + \ 'Top-1 acc = {:.6f},\t'.format(top1.avg) + \ 'Top-5 acc = {:.6f},\t'.format(top5.avg) + \ 'val_time = {:.6f}'.format(time.time() - t1) logging.info(logInfo) logging.info("RESULTS") for ii, (key, value) in enumerate(result_dict.items()): logging.info("{: ^10} \t {:.6f}".format(key, value)) if ii > 10: break logging.info("E N D")
def validate(model, device, args, *, all_iters=None): objs = AvgrageMeter() top1 = AvgrageMeter() top5 = AvgrageMeter() loss_function = args.loss_function val_dataprovider = args.val_dataprovider model.eval() max_val_iters = 250 t1 = time.time() with torch.no_grad(): for _ in range(1, max_val_iters + 1): data, target = val_dataprovider.next() target = target.type(torch.LongTensor) data, target = data.to(device), target.to(device) output = model(data) loss = loss_function(output, target) prec1, prec5 = accuracy(output, target, topk=(1, 5)) n = data.size(0) objs.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) logInfo = 'TEST Iter {}: loss = {:.6f},\t'.format(all_iters, objs.avg) + \ 'Top-1 err = {:.6f},\t'.format(1 - top1.avg / 100) + \ 'Top-5 err = {:.6f},\t'.format(1 - top5.avg / 100) + \ 'val_time = {:.6f}'.format(time.time() - t1) logging.info(logInfo)
def validate(model, device, args): objs = AvgrageMeter() top1 = AvgrageMeter() top5 = AvgrageMeter() loss_function = args.loss_function val_dataloader = args.val_dataloader L = len(val_dataloader) model.eval() with torch.no_grad(): data_iterator = enumerate(val_dataloader) for _ in tqdm(range(250)): _, data = next(data_iterator) target = data[1].type(torch.LongTensor) data, target = data[0].to(device), target.to(device) output = model(data) loss = loss_function(output, target) prec1, prec5 = accuracy(output, target, topk=(1, 5)) n = data.size(0) objs.update(loss.item()) top1.update(prec1.item()) top5.update(prec5.item()) if args.local_rank == 0: logInfo = 'TEST: loss = {:.6f},\t'.format(objs.avg) + \ 'Top-1 err = {:.6f},\t'.format(100 - top1.avg) + \ 'Top-5 err = {:.6f},\t'.format(100 - top5.avg) logging.info(logInfo)
def train_one_epoch(): model.train() loss_absolute = AvgrageMeter() loss_contra = AvgrageMeter() trange = tqdm(train_dataloader) for i, batch in enumerate(trange): # get the inputs data, binary_mask, label = batch data, binary_mask, label = data.cuda(), binary_mask.cuda(), label.cuda( ) optimizer.zero_grad() # forward + backward + optimize map_x, embedding, x_Block1, x_Block2, x_Block3, x_input = model(data) absolute_loss = criterion_absolute_loss(map_x, binary_mask) contrastive_loss = criterion_contrastive_loss(map_x, binary_mask) loss = absolute_loss + contrastive_loss loss.backward() optimizer.step() n = data.size(0) loss_absolute.update(absolute_loss.data, n) loss_contra.update(contrastive_loss.data, n) postfix_dict = { "loss_absolute": absolute_loss.item(), "loss_contra": contrastive_loss.item(), "loss": loss.item() } trange.set_postfix(**postfix_dict) print( 'epoch:%d, Train: Absolute_Depth_loss= %.4f, Contrastive_Depth_loss= %.4f\n' % (epoch + 1, loss_absolute.avg, loss_contra.avg))
def train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, epoch, local_rank): model.train() loss_avg = AvgrageMeter() arc_loss_avg = AvgrageMeter() Acc_avg = AvgrageMeter() data_time = AvgrageMeter() prefetcher = data_prefetcher(valid_queue) input_search, target_search = prefetcher.next() end = time.time() for step, (input, target) in enumerate(train_queue): data_time.update(time.time() - end) n = input.size(0) input, target = map(lambda x: x.cuda(local_rank, non_blocking=True), [input, target]) if epoch >= args.warmUp: while input_search is not None: arc_loss = architect.step( input_search.cuda(local_rank, non_blocking=True), target_search.cuda(local_rank, non_blocking=True)) arc_loss_avg.update(arc_loss.item(), input_search.size(0)) input_search, target_search = prefetcher.next() break optimizer.zero_grad() logits = model(input) loss = criterion(logits, target) loss.backward() nn.utils.clip_grad_norm_(model.module.parameters(), args.grad_clip) optimizer.step() accuracy = calculate_accuracy(logits, target) torch.distributed.barrier() reduced_loss = reduce_mean(loss, args.nprocs) reduced_acc = reduce_mean(accuracy, args.nprocs) loss_avg.update(reduced_loss.item(), n) Acc_avg.update(reduced_acc.item(), n) if step % args.report_freq == 0 and local_rank == 0: logging.info( 'epoch:%d, mini-batch:%3d, data time: %.5f, lr = %.5f, loss_CE = %.5f, loss_ARC = %.5f, Accuracy = %.4f' % (epoch + 1, step + 1, data_time.avg, lr, loss_avg.avg, arc_loss_avg.avg, Acc_avg.avg)) end = time.time() return Acc_avg.avg, loss_avg.avg
def eval_fn(self, loader, device, train=False): """ Evaluation method :param loader: data loader for either training or testing set :param device: torch device :param train: boolean to indicate if training or test set is used :return: accuracy on the data """ score = AvgrageMeter() self.eval() t = tqdm(loader) with torch.no_grad(): # no gradient needed for images, labels in t: images = images.to(device) labels = labels.to(device) outputs = self(images) acc, _ = accuracy(outputs, labels, topk=(1, 5)) score.update(acc.item(), images.size(0)) t.set_description('(=> Test) Score: {:.4f}'.format(score.avg)) return score.avg
def train_fn(self, optimizer, criterion, loader, device, train=True): score = AvgrageMeter() objs = AvgrageMeter() self.train() for step, (images, labels) in enumerate(loader): images = images.to(device) labels = labels.to(device) optimizer.zero_grad() logits = self(images) loss = criterion(logits, labels) loss.backward() optimizer.step() acc, _ = accuracy(logits, labels, topk=(1, 5)) n = images.size(0) objs.update(loss.item(), n) score.update(acc.item(), n) if step % self.report_freq == 0: logging.info('Training | step: %d | loss: %e | accuracy: %f' % (step, objs.avg, score.avg)) return score.avg, objs.avg
def train_teacher(teacher, data_train_loader, data_test_loader, optimizer, num_epochs): """ train a teacher model on a specified dataset """ objs = AvgrageMeter() top1 = AvgrageMeter() criterion = torch.nn.CrossEntropyLoss().cuda() for epoch in range(num_epochs): # train teacher.train() for i, (images, labels) in enumerate(data_train_loader): images, labels = images.cuda(), labels.cuda() optimizer.zero_grad() output = teacher(images) loss = criterion(output, labels) loss.backward() prec, = accuracy(output, labels) optimizer.step() n = images.size(0) objs.update(loss.item(), n) top1.update(prec.item(), n) if i % 50 == 0: print(f'Epoch {epoch}/{num_epochs}, Batch {i*50}; '\ f'loss = {objs.avg}, acc = {top1.avg}') # test objs.reset() top1.reset() teacher.eval() with torch.no_grad(): for images_test, labels_test in data_test_loader: images_test, labels_test = images_test.cuda( ), labels_test.cuda() output_test = teacher(images_test) loss_test = criterion(output_test, labels_test) prec_test, = accuracy(output_test, labels_test) n_test = images_test.size(0) objs.update(loss_test.item(), n_test) top1.update(prec_test.item(), n_test) print(f'Epoch {epoch}/{num_epochs}; Test Acc = {top1.avg}')
def infer(args, epoch, valid_queue, net, criterion, mode='val', record_file=None): objs = AvgrageMeter() top1 = AvgrageMeter() top5 = AvgrageMeter() net.eval() test_loss = 0.0 # cost function error correct = 0.0 for step, (images, labels, index) in enumerate(valid_queue): images = Variable(images) labels = Variable(labels) images = images.cuda() labels = labels.cuda() outputs = net(images) # loss = loss_function(outputs, labels) loss = criterion(outputs, labels) test_loss += loss.item() prec1, prec5 = accuracy(outputs, labels, topk=(1, 5)) n = images.shape[0] objs.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) if mode == 'val': print('Valid: epoch {}, loss {:.3f}, top1 {:.3f}, top5 {:.3f}'.format(epoch, objs.avg, top1.avg, top5.avg)) if record_file is not None: with open(record_file, 'a') as f: f.write('Valid: epoch {}, loss {:.3f}, top1 {:.3f}, top5 {:.3f}\n'.format(epoch, objs.avg, top1.avg, top5.avg)) else: print('Test: epoch {}, loss {:.3f}, top1 {:.3f}, top5 {:.3f}'.format(epoch, objs.avg, top1.avg, top5.avg)) if record_file is not None: with open(record_file, 'a') as f: f.write('Test: epoch {}, loss {:.3f}, top1 {:.3f}, top5 {:.3f}\n'.format(epoch, objs.avg, top1.avg, top5.avg)) # print() # add informations to tensorboard # writer.add_scalar('Test/Average loss', test_loss / len(cifar100_test_loader.dataset), epoch) # writer.add_scalar('Test/Accuracy', correct.float() / len(cifar100_test_loader.dataset), epoch) return top1.avg, top5.avg, objs.avg
def infer(args, epoch, valid_queue, net, criterion, mode='val', record_file=None): objs = AvgrageMeter() top1 = AvgrageMeter() top5 = AvgrageMeter() net.eval() test_loss = 0.0 # cost function error correct = 0.0 for step, (images, labels, index) in enumerate(valid_queue): images = Variable(images) labels = Variable(labels) images = images.cuda() labels = labels.cuda() with torch.no_grad(): outputs = net(images) # loss = loss_function(outputs, labels) loss = criterion(outputs, labels) test_loss += loss.item() prec1, prec5 = accuracy(outputs, labels, topk=(1, 5)) n = images.shape[0] objs.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) if mode == 'val': print('Valid: epoch {}, loss {:.3f}, top1 {:.3f}, top5 {:.3f}'.format( epoch, objs.avg, top1.avg, top5.avg)) if record_file is not None: with open(record_file, 'a') as f: f.write( 'Valid: epoch {}, loss {:.3f}, top1 {:.3f}, top5 {:.3f}\n'. format(epoch, objs.avg, top1.avg, top5.avg)) else: print('Test: epoch {}, loss {:.3f}, top1 {:.3f}, top5 {:.3f}'.format( epoch, objs.avg, top1.avg, top5.avg)) if record_file is not None: with open(record_file, 'a') as f: f.write( 'Test: epoch {}, loss {:.3f}, top1 {:.3f}, top5 {:.3f}\n'. format(epoch, objs.avg, top1.avg, top5.avg)) return top1.avg, top5.avg, objs.avg
def validate(model, device, args, *, all_iters=None, arch_loader=None): assert arch_loader is not None objs = AvgrageMeter() top1 = AvgrageMeter() top5 = AvgrageMeter() loss_function = args.loss_function # val_dataprovider = args.val_dataprovider val_loader = args.val_loader model.eval() max_val_iters = 250 t1 = time.time() result_dict = {} arch_dict = arch_loader.get_arch_dict()[:100] # 为了速度暂且测评前100个 with torch.no_grad(): for key, value in arch_dict.items(): for _ in range(1, max_val_iters + 1): # data, target = val_dataprovider.next() for data, target in val_loader: target = target.type(torch.LongTensor) data, target = data.to(device), target.to(device) output = model(data, value["arch"]) loss = loss_function(output, target) prec1, prec5 = accuracy(output, target, topk=(1, 5)) n = data.size(0) objs.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) result_dict[key] = top1.avg / 100 # logInfo = 'TEST Iter {}: loss = {:.6f},\t'.format(all_iters, objs.avg) + \ # 'Top-1 err = {:.6f},\t'.format(1 - top1.avg / 100) + \ # 'Top-5 err = {:.6f},\t'.format(1 - top5.avg / 100) + \ # 'val_time = {:.6f}'.format(time.time() - t1) # logging.info(logInfo) print("=" * 50, "RESULTS", "=" * 50) for key, value in result_dict: print(key, "\t", value) print("=" * 50, "E N D", "=" * 50)
def validate(model, device, args, *, all_iters=None, arch_loader=None): assert arch_loader is not None objs = AvgrageMeter() top1 = AvgrageMeter() top5 = AvgrageMeter() loss_function = args.loss_function val_dataloader = args.val_dataloader model.eval() model.apply(bn_calibration_init) max_val_iters = 25 t1 = time.time() result_dict = {} arch_dict = arch_loader.get_arch_dict() with torch.no_grad(): for key, value in arch_dict.items(): # 每一个网络 max_val_iters -= 1 print('\r ', key, ' iter:', max_val_iters, end='') if max_val_iters == 0: break for data, target in val_dataloader: # 过一遍数据集 target = target.type(torch.LongTensor) data, target = data.to(device), target.to(device) output = model(data, value["arch"]) loss = loss_function(output, target) prec1, prec5 = accuracy(output, target, topk=(1, 5)) n = data.size(0) objs.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) result_dict[key] = top1.avg / 100 print('\n', "=" * 10, "RESULTS", "=" * 10) for key, value in result_dict.items(): print(key, "\t", value) print("=" * 10, "E N D", "=" * 10)
def validate(model, device, args, *, all_iters=None): objs = AvgrageMeter() top1 = AvgrageMeter() top5 = AvgrageMeter() get_random_cand = lambda: tuple(np.random.randint(4) for i in range(20)) flops_l, flops_r, flops_step = 290, 360, 10 bins = [[i, i + flops_step] for i in range(flops_l, flops_r, flops_step)] def get_uniform_sample_cand(*, timeout=500): idx = np.random.randint(len(bins)) l, r = bins[idx] for i in range(timeout): cand = get_random_cand() if l * 1e6 <= get_cand_flops(cand) <= r * 1e6: return cand return get_random_cand() loss_function = args.loss_function val_dataprovider = args.val_dataprovider cand = get_uniform_sample_cand() model.eval() max_val_iters = 250 t1 = time.time() with torch.no_grad(): for _ in range(1, max_val_iters + 1): data, target = val_dataprovider.next() target = target.type(torch.LongTensor) data, target = data.to(device), target.to(device) output = model(data, cand) loss = loss_function(output, target) prec1, prec5 = accuracy(output, target, topk=(1, 5)) n = data.size(0) objs.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) logInfo = 'TEST Iter {}: loss = {:.6f},\t'.format(all_iters, objs.avg) + \ 'Top-1 err = {:.6f},\t'.format(1 - top1.avg / 100) + \ 'Top-5 err = {:.6f},\t'.format(1 - top5.avg / 100) + \ 'val_time = {:.6f}'.format(time.time() - t1) logging.info(logInfo)
def validate(model, device, args, *, all_iters=None): objs = AvgrageMeter() top1 = AvgrageMeter() top5 = AvgrageMeter() loss_function = args.loss_function val_dataprovider = args.val_dataprovider trained_group = args.evo_controller.trained_group t1 = time.time() with torch.no_grad(): for i in range(len(trained_group)): data, target = val_dataprovider.next() target = target.type(torch.LongTensor) data, target = data.to(device), target.to(device) structure_father = trained_group[i] structure = structure_father.structure output = model(data, structure) loss = loss_function(output, target) prec1, prec5 = accuracy(output, target, topk=(1, 5)) n = data.size(0) objs.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) if structure_father.count == 0: structure_father.loss = float(loss.item()) + 1e-4 #initial else: structure_father.loss = (float(loss.item())) * ( 1 - args.evo_controller.momentum ) + structure_father.loss * args.evo_controller.momentum structure_father.count += 1 logInfo = 'TEST Iter {}: loss = {:.6f},\t'.format(all_iters, objs.avg) + \ 'Top-1 err = {:.6f},\t'.format(1 - top1.avg / 100) + \ 'Top-5 err = {:.6f},\t'.format(1 - top5.avg / 100) + \ 'val_time = {:.6f}'.format(time.time() - t1) logging.info(logInfo)
def validate(model, device, args, *, all_iters=None, architecture=None): objs = AvgrageMeter() top1 = AvgrageMeter() top5 = AvgrageMeter() loss_function = args.loss_function val_dataprovider = args.val_dataprovider model.eval() max_val_iters = int(args.test_interval / args.batch_size) t1 = time.time() with torch.no_grad(): for _ in range(1, max_val_iters + 1): data, target = val_dataprovider.next() target = target.type(torch.LongTensor) data, target = data.to(device), target.to(device) if args.block == 4: # cifar_architecture = [0, 0, 0, 0, 0] batch = {'input': data, 'target': target} states = model(batch) output = states['logits'] # output = model(data, cifar_architecture) elif args.block == 3: output = model(data, architecture) loss = loss_function(output, target) prec1, prec5 = accuracy(output, target, topk=(1, 5)) n = data.size(0) objs.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) logInfo = 'TEST Iter {}: loss = {:.6f},\t'.format(all_iters, objs.avg) + \ 'Top-1 err = {:.6f},\t'.format(1 - top1.avg / 100) + \ 'Top-5 err = {:.6f},\t'.format(1 - top5.avg / 100) + \ 'val_time = {:.6f}'.format(time.time() - t1) logging.info(logInfo) top1_acc = top1.avg / 100 return top1_acc