def controller_train(train_queue, model, optimizer): objs = utils.AvgrageMeter() mse = utils.AvgrageMeter() nll = utils.AvgrageMeter() model.train() for step, sample in enumerate(train_queue): encoder_input = utils.move_to_cuda(sample['encoder_input']) encoder_target = utils.move_to_cuda(sample['encoder_target']) decoder_input = utils.move_to_cuda(sample['decoder_input']) decoder_target = utils.move_to_cuda(sample['decoder_target']) optimizer.zero_grad() predict_value, log_prob, arch = model(encoder_input, decoder_input) loss_1 = F.mse_loss(predict_value.squeeze(), encoder_target.squeeze()) loss_2 = F.nll_loss(log_prob.contiguous().view(-1, log_prob.size(-1)), decoder_target.view(-1)) loss = args.trade_off * loss_1 + (1 - args.trade_off) * loss_2 loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), args.grad_bound) optimizer.step() n = encoder_input.size(0) objs.update(loss.data, n) mse.update(loss_1.data, n) nll.update(loss_2.data, n) return objs.avg, mse.avg, nll.avg
def controller_infer(queue, model, step, direction='+'): new_arch_list = [] new_predict_values = [] model.eval() for i, sample in enumerate(queue): encoder_input = utils.move_to_cuda(sample['encoder_input']) model.zero_grad() new_arch, new_predict_value = model.generate_new_arch( encoder_input, step, direction=direction) new_arch_list.extend(new_arch.data.squeeze().tolist()) new_predict_values.extend(new_predict_value.data.squeeze().tolist()) return new_arch_list, new_predict_values