def nao_train(train_queue, model, optimizer): objs = utils.AvgrageMeter() mse = utils.AvgrageMeter() nll = utils.AvgrageMeter() model.train() for step, sample in enumerate(train_queue): encoder_input = sample['encoder_input'] encoder_target = sample['encoder_target'] decoder_input = sample['decoder_input'] decoder_target = sample['decoder_target'] encoder_input = encoder_input.cuda() encoder_target = encoder_target.cuda().requires_grad_() decoder_input = decoder_input.cuda() decoder_target = decoder_target.cuda() optimizer.zero_grad() predict_value, log_prob, arch = model(encoder_input, decoder_input) loss_1 = F.mse_loss(predict_value.squeeze(), encoder_target.squeeze()) loss_2 = F.nll_loss(log_prob.contiguous().view(-1, log_prob.size(-1)), decoder_target.view(-1)) loss = args.trade_off * loss_1 + (1 - args.trade_off) * loss_2 loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), args.grad_bound) optimizer.step() n = encoder_input.size(0) objs.update(loss.data, n) mse.update(loss_1.data, n) nll.update(loss_2.data, n) return objs.avg, mse.avg, nll.avg
def infer(valid_queue, model, criterion): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() model.eval() with torch.no_grad(): for step, (input, target) in enumerate(valid_queue): input = input.cuda() target = target.cuda(non_blocking=True) logits, _ = model(input) loss = criterion(logits, target) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) if step % args.report_freq == 0: end_time = time.time() if step == 0: duration = 0 start_time = time.time() else: duration = end_time - start_time start_time = time.time() logging.info( 'VALID Step: %03d Objs: %e R1: %f R5: %f Duration: %ds', step, objs.avg, top1.avg, top5.avg, duration) return top1.avg, top5.avg, objs.avg
def infer(valid_queue, model, criterion): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() model.eval() with torch.no_grad(): for step, (input, target) in enumerate(valid_queue): # input = Variable(input, volatile=True).cuda() # target = Variable(target, volatile=True).cuda(async=True) input = input.to(device) target = target.to(device) logits, _ = model(input) loss = criterion(logits, target) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) loss_data = loss.data[0] if loss.dim() != 0 else loss.item() prec1_data = prec1.data[0] if prec1.dim() != 0 else prec1.item() prec5_data = prec5.data[0] if prec5.dim() != 0 else prec5.item() objs.update(loss_data, n) top1.update(prec1_data, n) top5.update(prec5_data, n) if step % args.report_freq == 0: logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) return top1.avg, objs.avg
def train(train_queue, model, criterion, optimizer): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() model.train() for step, (input, target) in enumerate(train_queue): target = target.cuda(async=True) input = input.cuda() input = Variable(input) target = Variable(target) optimizer.zero_grad() logits, logits_aux = model(input) loss = criterion(logits, target) if args.auxiliary: loss_aux = criterion(logits_aux, target) loss += args.auxiliary_weight * loss_aux loss.backward() nn.utils.clip_grad_norm(model.parameters(), args.grad_clip) optimizer.step() prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.data[0], n) top1.update(prec1.data[0], n) top5.update(prec5.data[0], n) if step % args.report_freq == 0: logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) return top1.avg, objs.avg
def infer(valid_queue, model, criterion): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() model.eval() with torch.no_grad(): for step, (data, target) in enumerate(valid_queue): data = data.cuda() target = target.cuda() logits, _ = model(data) loss = criterion(logits, target) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = data.size(0) objs.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) if step % args.report_freq == 0: logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) return top1.avg, top5.avg, objs.avg
def infer(valid_queue, model, criterion): global is_multi_gpu objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() # nn.BatchNorm layers will use their running stats (in the default mode) and nn.Dropout will be deactivated. model.eval() for step, (input, target) in enumerate(valid_queue): with torch.no_grad(): input = input.cuda() target = target.cuda(non_blocking=True) logits, _ = model(input) loss = criterion(logits, target) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) if step % args.report_freq == 0: logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) return top1.avg, objs.avg
def train(train_queue, model, criterion, optimizer): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() model.train() for step, (input, target) in enumerate(train_queue): input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) optimizer.zero_grad() logits, logits_aux = model(input) loss = criterion(logits, target) if args.auxiliary: loss_aux = criterion(logits_aux, target) loss += args.auxiliary_weight * loss_aux loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() prec1, _ = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.data.item(), n) top1.update(prec1.data.item(), n) if step % args.report_freq == 0: logging.info('Train Step: %03d Objs: %e Acc: %f', step, objs.avg, top1.avg) return top1.avg, objs.avg
def infer(valid_queue, model, criterion, writer_dict): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() model.eval() # tensorboard logger writer = writer_dict['writer'] val_step = writer_dict['val_steps'] for step, (input, target) in enumerate(valid_queue): input = Variable(input, volatile=True).cuda() # target = Variable(target, volatile=True).cuda(async=True) target = Variable(target, volatile=True).cuda() logits = model(input) loss = criterion(logits, target) writer.add_scalar('val_loss', loss.data, val_step) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) writer.add_scalar('val_prec1', prec1.data, val_step) writer.add_scalar('val_prec5', prec5.data, val_step) n = input.size(0) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) val_step += 1 writer_dict['val_steps'] += 1 if step % args.report_freq == 0: logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) return top1.avg, objs.avg
def train(train_queue, model, criterion, optimizer, logger): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() for step, (input, target) in enumerate(train_queue): n = input.size(0) input = Variable(input.float(), requires_grad=False).to(device) target = Variable(target.long(), requires_grad=False).to(device) logits = model(input) loss = criterion(logits, target) optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) objs.update(loss.item(), n) top1.update(prec1.item(), n) if step % args.report_freq == 0: logger.info('time = %s, train %03d %e %f', str(utils.get_unix_timestamp()), step, objs.avg, top1.avg) print('time = {}, train {} {}'.format( str(utils.get_unix_timestamp()), step, objs.avg)) return objs.avg, top1.avg
def infer(valid_queue, model, criterion, args, gpu): ''' 在最后一个epoch后打印验证集计算结果 ''' print('正在进行测试!!!!') objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() model.eval() for step, (input, target) in enumerate(valid_queue): input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) logits = model(input) # 计算预测结果 loss = criterion(logits, target) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) if gpu == 0: if step % args.report_freq == 0: logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) print('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) return top1.avg, objs.avg
def infer(valid_queue, model, criterion, temperature, logger=None, batch_num=-1, log_frequence=10): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() model.eval() # logger.info("Start new epoch inference") tic = time.time() for step, (input, target) in enumerate(valid_queue): input = input.cuda() target = target.cuda() logits , _, _ = model(input , temperature) loss = criterion(logits, target) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.data , n) top1.update(prec1.data , n) top5.update(prec5.data , n) if logger is not None: if step > 0 and step % log_frequence == 0: toc = time.time() speed = 1.0 * log_frequence * n / (toc - tic) if batch_num > 0: logger.info("Step[%d/%d] speed[%.4f samples/s] loss[%.6f] acc[%.4f]" % (step, batch_num, speed, loss.detach().cpu().numpy(), \ prec1.detach().cpu().numpy() / 100.0)) else: logger.info("Step[%d] speed[%.4f samples/s] loss[%.6f] acc[%.4f]" % (step, speed, loss.detach().cpu().numpy(), \ prec1.detach().cpu().numpy() / 100.0)) tic = time.time() return top1.avg, top5.avg, objs.avg
def test(test_queue, model, criterion, device): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() model.eval() correct = 0 total = 0 for step, (inputs, targets) in enumerate(test_queue): with torch.no_grad(): inputs, targets = Variable(inputs.to(device)), Variable( targets.to(device)) logits = model(inputs) loss = criterion(logits, targets) total += inputs.size(0) _, predict = torch.max(logits.data, 1) correct += predict.eq(targets.data).cpu().sum().item() prec1, prec5 = utils.accuracy(logits, targets, topk=(1, 5)) n = inputs.size(0) objs.update(loss.data.item(), n) top1.update(prec1.data.item(), n) top5.update(prec5.data.item(), n) if step % args.report_freq == 0: logging.info('test %03d loss: %.3f top1: %f top5: %f', step, objs.avg, top1.avg, top5.avg) logging.info('Testing: Acc=%.3f(%d/%d)' % (correct / total, correct, total)) return top1.avg, objs.avg
def infer(valid_queue, model, criterion): root = logging.getLogger() objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() model.eval() for step, (input, target) in enumerate(valid_queue): input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) logits, _ = model(input) loss = criterion(logits, target) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) if step % args.report_freq == 0: root.info('%s %03d %e %f %f', valid_queue.name, step, objs.avg, top1.avg, top5.avg) return top1.avg, objs.avg
def infer(valid_queue, model, criterion): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() model.eval() with torch.no_grad(): for step, (input, target) in enumerate(valid_queue): input = input.cuda() target = target.cuda(non_blocking=True) logits = model(input) loss = criterion(logits, target) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) if step % args.report_freq == 0: logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) if 'debug' in args.save: break return top1.avg, objs.avg
def train_step(train_queue, optimizer): objs = utils.AvgrageMeter() nll = utils.AvgrageMeter() for step, sample in enumerate(train_queue): fw_adjs = sample['fw_adjs'] bw_adjs = sample['bw_adjs'] operations = sample['operations'] num_nodes = sample['num_nodes'] sequence = sample['sequence'] optimizer.zero_grad() log_prob, predicted_value = model(fw_adjs, bw_adjs, operations, num_nodes, targets=sequence) # print("input: {} output : {}".format(log_prob.size(), sequence.size())) loss = F.nll_loss( log_prob.contiguous().view(-1, log_prob.size(-1)), sequence.view(-1)) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), conf.grad_bound) optimizer.step() n = sequence.size(0) objs.update(loss.data, n) nll.update(loss.data, n) # logging.info("step : %04d, objs: %.6f, nll : %.6f", step, objs,avgs, nll) return objs.avg, nll.avg
def test(test_queue, model, criterion, logger): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() with torch.no_grad(): for step, (input, target) in enumerate(test_queue): n = input.size(0) input = Variable(input.float()).to(device) target = Variable(target.long()).to(device) logits = model(input) loss = criterion(logits, target) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) objs.update(loss.item(), n) top1.update(prec1.item(), n) if step % args.report_freq == 0: logger.info('time = %s, test %03d %e %f', str(utils.get_unix_timestamp()), step, objs.avg, top1.avg) print('time = {}, test {} {}'.format( str(utils.get_unix_timestamp()), step, objs.avg)) return objs.avg, top1.avg
def train(train_queue, model, criterion, optimizer): global is_multi_gpu objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() model.train() for step, (input, target) in enumerate(train_queue): n = input.size(0) input = input.cuda() target = target.cuda(non_blocking=True) optimizer.zero_grad() logits, logits_aux = model(input) loss = criterion(logits, target) if args.auxiliary: loss_aux = criterion(logits_aux, target) loss += args.auxiliary_weight * loss_aux loss.backward() parameters = model.module.parameters() if is_multi_gpu else model.parameters() nn.utils.clip_grad_norm_(parameters, args.grad_clip) optimizer.step() prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) objs.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) if step % args.report_freq == 0: logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) wandb.log({"evaluation_train_accuracy_avg": objs.avg}, step=step) wandb.log({"evaluation_train_accuracy_top1": top1.avg}, step=step) wandb.log({"evaluation_train_accuracy_top5": top5.avg}, step=step) return top1.avg, objs.avg
def nao_valid(queue, model): pa = utils.AvgrageMeter() hs = utils.AvgrageMeter() mse = utils.AvgrageMeter() with torch.no_grad(): model.eval() for step, sample in enumerate(queue): encoder_input = sample['encoder_input'] encoder_target = sample['encoder_target'] decoder_target = sample['decoder_target'] encoder_input = encoder_input.cuda() encoder_target = encoder_target.cuda() decoder_target = decoder_target.cuda() predict_value, logits, arch = model(encoder_input) n = encoder_input.size(0) pairwise_acc = utils.pairwise_accuracy( encoder_target.data.squeeze().tolist(), predict_value.data.squeeze().tolist()) hamming_dis = utils.hamming_distance( decoder_target.data.squeeze().tolist(), arch.data.squeeze().tolist()) mse.update( F.mse_loss(predict_value.data.squeeze(), encoder_target.data.squeeze()), n) pa.update(pairwise_acc, n) hs.update(hamming_dis, n) return mse.avg, pa.avg, hs.avg
def infer(valid_queue, model, criterion): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() model.eval() for step, (input, target) in enumerate(valid_queue): input = Variable(input, volatile=True).cuda() target = Variable(target, volatile=True).cuda(async=True) logits = model(input) loss = criterion(logits, target) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.data[0], n) top1.update(prec1.data[0], n) top5.update(prec5.data[0], n) if step % args.report_freq == 0: logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) if args.debug: break return top1.avg, objs.avg
def infer(valid_loader, model, controller, criterion): total_loss = utils.AvgrageMeter() total_top1 = utils.AvgrageMeter() model.eval() controller.eval() with torch.no_grad(): for step in range(20): data, target = valid_loader.next_batch() data = data.cuda() target = target.cuda() dag, _, _ = controller() logits, auxs = model(dag, data) loss = criterion(logits, target).cuda() prec1 = utils.accuracy(logits, target)[0] n = data.size(0) total_loss.update(loss.item(), n) total_top1.update(prec1.item(), n) logging.info('valid {:0>3d} {:.6f} {:.3f}'.format( step, loss.item(), prec1.item())) logging.info('{}'.format([i for i in dag])) with open(os.path.join(args.save, 'dag_all.txt'), 'a') as f: f.write('{:.3f} {} infer\n'.format(prec1.item(), [i for i in dag])) # del loss, logits return total_top1.avg
def infer(valid_queue, model, criterion): global is_multi_gpu objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() model.eval() for step, (input, target) in enumerate(valid_queue): input = input.cuda() target = target.cuda() logits = model(input) loss = criterion(logits, target) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) if step % args.report_freq == 0: logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) return top1.avg, objs.avg, loss
def train(epoch, train_queue, valid_queue, model, architect, criterion, optimizer, lr): global is_multi_gpu objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() for step, (input, target) in enumerate(train_queue): # logging.info("epoch %d, step %d START" % (epoch, step)) model.train() n = input.size(0) model.set_tau(args.tau_max - epoch * 1.0 / args.epochs * (args.tau_max - args.tau_min)) input = input.cuda() target = target.cuda() # get a random minibatch from the search queue with replacement input_search, target_search = next(iter(valid_queue)) input_search = input_search.cuda() target_search = target_search.cuda() # Update architecture alpha by Adam-SGD # logging.info("step %d. update architecture by Adam. START" % step) # if args.optimization == "AOS": architect.step_AOS(input, target, input_search, target_search) else: architect.step_milenas(input, target, input_search, target_search, 1, 1) # logging.info("step %d. update architecture by Adam. FINISH" % step) # Update weights w by SGD, ignore the weights that gained during architecture training # logging.info("step %d. update weight by SGD. START" % step) optimizer.zero_grad() logits = model(input) loss = criterion(logits, target) loss.backward() parameters = model.module.arch_parameters( ) if is_multi_gpu else model.arch_parameters() nn.utils.clip_grad_norm_(parameters, args.grad_clip) optimizer.step() # logging.info("step %d. update weight by SGD. FINISH\n" % step) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) objs.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) if step % args.report_freq == 0: logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) return top1.avg, objs.avg
def train(train_queue, model, criterion, optimizer, epoch, init_lr, warmup_epochs, global_step): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() model.train() for step, (data, target) in enumerate(train_queue): n = data.size(0) data = data.cuda() target = target.cuda() # Change lr. if epoch < warmup_epochs: len_epoch = len(train_queue) scale = float(1 + step + epoch * len_epoch) / \ (warmup_epochs * len_epoch) lr = init_lr * scale for param_group in optimizer.param_groups: param_group['lr'] = lr # Forward. optimizer.zero_grad() logits, logits_aux = model(data) loss = criterion(logits, target) if args.auxiliary: loss_aux = criterion(logits_aux, target) loss += args.auxiliary_weight * loss_aux # Backward and step. loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() ############# APEX ############# # Calculate the accuracy. prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) reduced_loss = utils.reduce_tensor(loss.data, args.world_size) prec1 = utils.reduce_tensor(prec1, args.world_size) prec5 = utils.reduce_tensor(prec5, args.world_size) objs.update(to_python_float(reduced_loss), n) top1.update(to_python_float(prec1), n) top5.update(to_python_float(prec5), n) ################################ if step % args.report_freq == 0: current_lr = list(optimizer.param_groups)[0]['lr'] logging.info('train %03d %e %f %f lr: %e', step, objs.avg, top1.avg, top5.avg, current_lr) writer.add_scalar('train/loss', objs.avg, global_step) writer.add_scalar('train/acc_top1', top1.avg, global_step) writer.add_scalar('train/acc_top5', top5.avg, global_step) writer.add_scalar('train/lr', optimizer.state_dict()['param_groups'][0]['lr'], global_step) global_step += 1 return top1.avg, objs.avg, global_step
def infer(valid_loader, model, criterion): """ targert valid loop and record loss ,accuracy parameters train_loader : training dataloader model : training model criterion : loss function return average top 1 accuracy """ total_loss = utils.AvgrageMeter() total_top1 = utils.AvgrageMeter() aux_nums = len(model.aux_ind) total_loss_aux = [utils.AvgrageMeter() for i in range(aux_nums)] total_top1_aux = [utils.AvgrageMeter() for i in range(aux_nums)] model.eval() for step, (data, target) in enumerate(valid_loader): data = data.cuda() target = target.cuda() with torch.no_grad(): logits, auxs = model(data) loss = F.cross_entropy(logits, target).cuda() loss_aux = [F.cross_entropy(i, target).cuda() for i in auxs] prec1 = utils.accuracy(logits, target)[0] prec1_aux = [utils.accuracy(i, target)[0] for i in auxs] n = data.size(0) total_loss.update(loss.item(), n) total_top1.update(prec1.item(), n) [ total_loss_aux[ind].update(i.item(), n) for ind, i in enumerate(loss_aux) ] [ total_top1_aux[ind].update(i.item(), n) for ind, i in enumerate(prec1_aux) ] if (step + 1) % args.report_freq == 0: [ logging.info('aux_{} {:0>3d} {:.6f} {:.3f}'.format( i, step, total_loss_aux[i].avg, total_top1_aux[i].avg)) for i in range(aux_nums) ] logging.info('\nvalid {:0>3d} {:.6f} {:.3f}'.format( step, total_loss.avg, total_top1.avg)) del loss, logits, loss_aux return total_top1.avg
def train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, epoch): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() for step, (input, target) in enumerate(train_queue): model.train() n = input.size(0) input = input.cuda() target = target.cuda(non_blocking=True) # get a minibatch from the search queue with replacement try: input_search, target_search = next(valid_queue_iter) except: valid_queue_iter = iter(valid_queue) input_search, target_search = next(valid_queue_iter) input_search = input_search.cuda() target_search = target_search.cuda(non_blocking=True) # Allow for warm starting of the one-shot model for more reliable architecture updates. if epoch >= args.warm_start_epochs: architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) optimizer.zero_grad() logits = model(input) loss = criterion(logits, target) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) objs.update(loss.data.item(), n) top1.update(prec1.data.item(), n) top5.update(prec5.data.item(), n) if step % args.report_freq == 0: logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) if args.debug: break return top1.avg, objs.avg
def train_controller(reward_loader, model, controller, controller_optimizer): global baseline total_loss = utils.AvgrageMeter() total_reward = utils.AvgrageMeter() total_entropy = utils.AvgrageMeter() controller.train() model.eval() for step in range(150): data, target = reward_loader.next_batch() n = data.size(0) data = data.cuda() target = target.cuda() controller_optimizer.zero_grad() dag, log_prob, entropy = controller() log_prob = sum(log_prob) entropy = sum(entropy) with torch.no_grad(): logits, auxs = model(dag, data) reward = utils.accuracy(logits, target)[0] if args.entropy_weight is not None: reward = reward + args.entropy_weight * entropy log_prob = torch.sum(log_prob) if baseline is None: baseline = reward baseline -= (1 - args.bl_dec) * (baseline - reward) loss = (log_prob * (reward - baseline)).sum() loss.backward() controller_optimizer.step() total_loss.update(loss.item(), n) total_reward.update(reward.item(), n) total_entropy.update(entropy.item(), n) if (step + 1) % args.report_freq == 0: #logging.info('controller %03d %e %f %f', step, loss.item(), reward.item(), baseline.item()) #logging.info(f'controller {step :0>3d} {total_loss.avg :.6f} {total_reward.avg :.3f} {baseline.item() :.3f}') logging.info('controller {:0>3d} {:.6f} {:.3f} {:.3f}'.format( step, total_loss.avg, total_reward.avg, baseline.item())) # logging.info(f'{[i for i in dag]}') logging.info('{}'.format([i for i in dag])) with open(os.path.join(args.save, 'dag_all.txt'), 'a') as f: #f.write(f'{reward.item() :.3f} {[i for i in dag]} controller\n') f.write('{:.3f} {} controller\n'.format(reward.item(), [i for i in dag])) del loss, reward, entropy, logits
def train(train_loader, model, optimizer, criterion, start=False): """ targert training loop and record loss, accuracy parameters train_loader : training dataloader model : training model optimizer : optimizer criterion : loss function start : in search stage needed , make all training stable return average top 1 accuracy """ total_loss = utils.AvgrageMeter() total_top1 = utils.AvgrageMeter() model.train() aux_nums = len(model.aux_ind) for step, (data, target) in enumerate(train_loader): n = data.size(0) data = data.cuda() target = target.cuda() # data, targets_a, targets_b, lam = mixup_data(data, target,args.alpha, True) optimizer.zero_grad() logits, auxs = model(data) loss1 = criterion(logits, target).cuda() #loss1 = mixup_criterion(criterion, logits, targets_a, targets_b, lam) #stage loss not in paper , every block will return loss and have different weight loss_aux = sum([ criterion(auxs[i], target).cuda() * 0.1 * (i + 1) for i in range(aux_nums) ]) / sum((i + 1) * 0.1 for i in range(aux_nums)) #loss_aux = sum([mixup_criterion(criterion , auxs[i], targets_a, targets_b, lam).cuda()*0.1*(i+1) for i in range(aux_nums)])/sum((i+1)*0.1 for i in range(aux_nums)) loss = loss1 + 0.4 * loss_aux with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() nn.utils.clip_grad_norm(model.parameters(), args.grad_clip) optimizer.step() prec1 = utils.accuracy(logits, target)[0] total_loss.update(loss.item(), n) total_top1.update(prec1.item(), n) if (step + 1) % args.report_freq == 0: logging.info('train {:0>3d} {:.6f} {:.3f}'.format( step, total_loss.avg, total_top1.avg)) del loss, loss1, loss_aux return total_top1.avg
def train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, epoch): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() for step, (input, target) in enumerate(train_queue): model.train() n = input.size(0) input = input.cuda() target = target.cuda(non_blocking=True) # get a random minibatch from the search queue with replacement input_search, target_search = next(iter(valid_queue)) input_search = input_search.cuda() target_search = target_search.cuda(non_blocking=True) if epoch >= 10: architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) optimizer.zero_grad() architect.optimizer.zero_grad() logits = model(input) loss = criterion(logits, target) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() optimizer.zero_grad() architect.optimizer.zero_grad() prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) if step % args.report_freq == 0: logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) if 'debug' in args.save: break return top1.avg, objs.avg
def train(train_loader, model, controller, optimizer, criterion, start=False): total_loss = utils.AvgrageMeter() total_top1 = utils.AvgrageMeter() #controller.eval() model.eval() controller.GA_training(10, copy.deepcopy(model)) model.train() aux_nums = len(model.aux_ind) for step, (data, target) in enumerate(train_loader): n = data.size(0) data = data.cuda() target = target.cuda() optimizer.zero_grad() # with torch.no_grad(): # dag, _, _ = controller() #with architecture different GPU memery usage is different when start with maximum memery usage architecutre sequence dag = [seq_creater() for i in range(2)] if ((step == 0) & (start == True)): dag = [[ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 ]] * 2 logits, auxs = model(dag, data) loss1 = criterion(logits, target).cuda() loss_aux = sum([ criterion(auxs[i], target).cuda() * 0.1 * (i + 1) for i in range(aux_nums) ]) / sum((i + 1) * 0.1 for i in range(aux_nums)) loss = loss1 + 0.4 * loss_aux with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() #loss.backward() without amp use this optimizer.step() prec1 = utils.accuracy(logits, target)[0] total_loss.update(loss.item(), n) total_top1.update(prec1.item(), n) if (step + 1) % args.report_freq == 0: logging.info('train {:0>3d} {:.6f} {:.3f}'.format( step, total_loss.avg, total_top1.avg)) with open(os.path.join(args.save, 'dag_all.txt'), 'a') as f: f.write('{:.3f} {} share_weight\n'.format(prec1.item(), [i for i in dag])) # del loss return total_top1.avg
def train(epoch, train_queue, valid_queue, model, architect, criterion, optimizer, lr): global is_multi_gpu objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() for step, (input, target) in enumerate(train_queue): # logging.info("epoch %d, step %d START" % (epoch, step)) model.train() n = input.size(0) input = input.cuda() target = target.cuda() # get a random minibatch from the search queue with replacement input_search, target_search = next(iter(valid_queue)) input_search = input_search.cuda() target_search = target_search.cuda() architect.step_v2(input, target, input_search, target_search, lambda_train_regularizer, lambda_valid_regularizer) optimizer.zero_grad() logits = model(input) loss = criterion(logits, target) loss.backward() parameters = model.module.arch_parameters( ) if is_multi_gpu else model.arch_parameters() nn.utils.clip_grad_norm_(parameters, args.grad_clip) optimizer.step() # logging.info("step %d. update weight by SGD. FINISH\n" % step) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) objs.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) # torch.cuda.empty_cache() if step % args.report_freq == 0: logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) return top1.avg, objs.avg, loss