def train(train_loader, dml_model, dml_optimizer, args): dml_model.train() costs = [AvgrageMeter() for i in range(dml_model.model_num)] accs = [AvgrageMeter() for i in range(dml_model.model_num)] for step_id, (images, labels) in enumerate(train_loader): images, labels = to_variable(images), to_variable(labels) batch_size = images.shape[0] logits = dml_model.forward(images) precs = [ fluid.layers.accuracy( input=l, label=labels, k=1) for l in logits ] losses = dml_model.loss(logits, labels) dml_optimizer.minimize(losses) for i in range(dml_model.model_num): accs[i].update(precs[i].numpy(), batch_size) costs[i].update(losses[i].numpy(), batch_size) model_names = dml_model.full_name() if step_id % args.log_freq == 0: log_msg = "Train Step {}".format(step_id) for model_id, (cost, acc) in enumerate(zip(costs, accs)): log_msg += ", {} loss: {:.6f} acc: {:.6f}".format( model_names[model_id], cost.avg[0], acc.avg[0]) logger.info(log_msg) return costs, accs
def train_one_epoch(model, architect, train_loader, valid_loader, optimizer, epoch, use_data_parallel, log_freq): ce_losses = AvgrageMeter() accs = AvgrageMeter() model.train() step_id = 0 for train_data, valid_data in izip(train_loader(), valid_loader): architect.step(train_data, valid_data) loss, acc = model_loss(model, train_data) if use_data_parallel: loss = model.scale_loss(loss) loss.backward() model.apply_collective_grads() else: loss.backward() optimizer.minimize(loss) model.clear_gradients() batch_size = train_data[0].shape[0] ce_losses.update(loss.numpy(), batch_size) accs.update(acc.numpy(), batch_size) if step_id % log_freq == 0: logger.info( "Train Epoch {}, Step {}, Lr {:.6f} loss {:.6f}; acc: {:.6f};". format(epoch, step_id, optimizer.current_step_lr(), ce_losses.avg[0], accs.avg[0])) step_id += 1
def valid_one_epoch(self, epoch): losses = [] accs = [] for i in range(self.model_num): if self.use_data_parallel: self.parallel_models[i].eval() else: self.models[i].eval() losses.append(AvgrageMeter()) accs.append(AvgrageMeter()) for _, (images, labels) in enumerate(self.valid_loader): images, labels = to_variable(images), to_variable(labels) batch_size = images.shape[0] logits = [] if self.use_data_parallel: for model in self.parallel_models: logits.append(model(images)) else: for model in self.models: logits.append(model(images)) for i in range(self.model_num): gt_loss = self.models[i].loss(logits[i], labels) kl_loss = 0 for j in range(self.model_num): if i != j: x = F.log_softmax(logits[i], axis=1) y = fluid.layers.softmax(logits[j], axis=1) kl_loss += fluid.layers.kldiv_loss( x, y, reduction='batchmean') loss = gt_loss if (self.model_num > 1): loss += kl_loss / (self.model_num - 1) prec = fluid.layers.accuracy(input=logits[i], label=labels, k=1) losses[i].update(loss.numpy(), batch_size) accs[i].update(prec.numpy() * 100, batch_size) return losses, accs
def valid_one_epoch(model, valid_loader, epoch, log_freq): accs = AvgrageMeter() ce_losses = AvgrageMeter() model.student.eval() step_id = 0 for valid_data in valid_loader(): try: loss, acc, ce_loss, _, _ = model._layers.loss(valid_data, epoch) except: loss, acc, ce_loss, _, _ = model.loss(valid_data, epoch) batch_size = valid_data[0].shape[0] ce_losses.update(ce_loss.numpy(), batch_size) accs.update(acc.numpy(), batch_size) step_id += 1 return ce_losses.avg[0], accs.avg[0]
def valid_one_epoch(model, valid_loader, epoch, log_freq): ce_losses = AvgrageMeter() accs = AvgrageMeter() model.eval() step_id = 0 for valid_data in valid_loader(): loss, acc = model_loss(model, valid_data) batch_size = valid_data[0].shape[0] ce_losses.update(loss.numpy(), batch_size) accs.update(acc.numpy(), batch_size) if step_id % log_freq == 0: logger.info( "Valid Epoch {}, Step {}, loss {:.6f}; acc: {:.6f};".format( epoch, step_id, ce_losses.avg[0], accs.avg[0])) step_id += 1
def train(model, train_reader, optimizer, epoch, drop_path_prob, args): objs = AvgrageMeter() top1 = AvgrageMeter() top5 = AvgrageMeter() model.train() for step_id, data in enumerate(train_reader()): image_np, label_np = data image = to_variable(image_np) label = to_variable(label_np) label.stop_gradient = True logits, logits_aux = model(image, drop_path_prob, True) prec1 = fluid.layers.accuracy(input=logits, label=label, k=1) prec5 = fluid.layers.accuracy(input=logits, label=label, k=5) loss = fluid.layers.reduce_mean( fluid.layers.softmax_with_cross_entropy(logits, label)) if args.auxiliary: loss_aux = fluid.layers.reduce_mean( fluid.layers.softmax_with_cross_entropy(logits_aux, label)) loss = loss + args.auxiliary_weight * loss_aux if args.use_data_parallel: loss = model.scale_loss(loss) loss.backward() model.apply_collective_grads() else: loss.backward() optimizer.minimize(loss) model.clear_gradients() n = image.shape[0] objs.update(loss.numpy(), n) top1.update(prec1.numpy(), n) top5.update(prec5.numpy(), n) if step_id % args.log_freq == 0: logger.info( "Train Epoch {}, Step {}, loss {:.6f}, acc_1 {:.6f}, acc_5 {:.6f}". format(epoch, step_id, objs.avg[0], top1.avg[0], top5.avg[0])) return top1.avg[0]
def valid(model, valid_reader, epoch, args): objs = AvgrageMeter() top1 = AvgrageMeter() top5 = AvgrageMeter() model.eval() for step_id, data in enumerate(valid_reader()): image_np, label_np = data image = to_variable(image_np) label = to_variable(label_np) logits, _ = model(image, 0, False) prec1 = fluid.layers.accuracy(input=logits, label=label, k=1) prec5 = fluid.layers.accuracy(input=logits, label=label, k=5) loss = fluid.layers.reduce_mean( fluid.layers.softmax_with_cross_entropy(logits, label)) n = image.shape[0] objs.update(loss.numpy(), n) top1.update(prec1.numpy(), n) top5.update(prec5.numpy(), n) if step_id % args.log_freq == 0: logger.info( "Valid Epoch {}, Step {}, loss {:.6f}, acc_1 {:.6f}, acc_5 {:.6f}". format(epoch, step_id, objs.avg[0], top1.avg[0], top5.avg[0])) return top1.avg[0]
def train_one_epoch(self, epoch): losses = [] accs = [] for i in range(self.model_num): if self.use_data_parallel: self.parallel_models[i].train() else: self.models[i].train() losses.append(AvgrageMeter()) accs.append(AvgrageMeter()) for step_indx, (images, labels) in enumerate(self.train_loader): images, labels = to_variable(images), to_variable(labels) batch_size = images.shape[0] logits = [] if self.use_data_parallel: for model in self.parallel_models: logits.append(model(images)) else: for model in self.models: logits.append(model(images)) log_msg = 'Train Epoch {}, Step {}'.format(epoch, step_indx) for i in range(self.model_num): gt_loss = self.models[i].loss(logits[i], labels) kl_loss = 0 for j in range(self.model_num): if i != j: x = F.log_softmax(logits[i], axis=1) y = fluid.layers.softmax(logits[j], axis=1) kl_loss += fluid.layers.kldiv_loss( x, y, reduction='batchmean') loss = gt_loss if (self.model_num > 1): loss += kl_loss / (self.model_num - 1) prec = fluid.layers.accuracy(input=logits[i], label=labels, k=1) losses[i].update(loss.numpy(), batch_size) accs[i].update(prec.numpy() * 100, batch_size) if self.use_data_parallel: loss = self.parallel_models[i].scale_loss(loss) loss.backward() self.parallel_models[i].apply_collective_grads() else: loss.backward() self.optimizers[i].minimize(loss) if self.use_data_parallel: self.parallel_models[i].clear_gradients() else: self.models[i].clear_gradients() log_msg += ', model{}_loss: {:.3f}'.format( i + 1, losses[i].avg[0]) if step_indx % self.log_freq == 0: logger.info(log_msg) return losses, accs
def train_one_epoch(model, train_loader, valid_loader, optimizer, arch_optimizer, epoch, use_data_parallel, log_freq): total_losses = AvgrageMeter() accs = AvgrageMeter() ce_losses = AvgrageMeter() kd_losses = AvgrageMeter() val_accs = AvgrageMeter() model.student.train() step_id = 0 for train_data, valid_data in izip(train_loader(), valid_loader()): batch_size = train_data[0].shape[0] # make sure arch on every gpu is same, otherwise an error will occurs np.random.seed(step_id * 2 * (epoch + 1)) if use_data_parallel: total_loss, acc, ce_loss, kd_loss, _ = model._layers.loss( train_data, epoch) else: total_loss, acc, ce_loss, kd_loss, _ = model.loss( train_data, epoch) if use_data_parallel: total_loss = model.scale_loss(total_loss) total_loss.backward() model.apply_collective_grads() else: total_loss.backward() optimizer.minimize(total_loss) model.clear_gradients() total_losses.update(total_loss.numpy(), batch_size) accs.update(acc.numpy(), batch_size) ce_losses.update(ce_loss.numpy(), batch_size) kd_losses.update(kd_loss.numpy(), batch_size) # make sure arch on every gpu is same, otherwise an error will occurs np.random.seed(step_id * 2 * (epoch + 1) + 1) if use_data_parallel: arch_loss, _, _, _, arch_logits = model._layers.loss( valid_data, epoch) else: arch_loss, _, _, _, arch_logits = model.loss(valid_data, epoch) if use_data_parallel: arch_loss = model.scale_loss(arch_loss) arch_loss.backward() model.apply_collective_grads() else: arch_loss.backward() arch_optimizer.minimize(arch_loss) model.clear_gradients() probs = fluid.layers.softmax(arch_logits[-1]) val_acc = fluid.layers.accuracy(input=probs, label=valid_data[4]) val_accs.update(val_acc.numpy(), batch_size) if step_id % log_freq == 0: logger.info( "Train Epoch {}, Step {}, Lr {:.6f} total_loss {:.6f}; ce_loss {:.6f}, kd_loss {:.6f}, train_acc {:.6f}, search_valid_acc {:.6f};" .format(epoch, step_id, optimizer.current_step_lr(), total_losses.avg[0], ce_losses.avg[0], kd_losses.avg[0], accs.avg[0], val_accs.avg[0])) step_id += 1
def train_one_epoch(model, train_loader, optimizer, epoch, use_data_parallel, log_freq): total_losses = AvgrageMeter() accs = AvgrageMeter() ce_losses = AvgrageMeter() kd_losses = AvgrageMeter() model.student.train() step_id = 0 for train_data in train_loader(): batch_size = train_data[0].shape[0] if use_data_parallel: total_loss, acc, ce_loss, kd_loss, _ = model._layers.loss( train_data, epoch) else: total_loss, acc, ce_loss, kd_loss, _ = model.loss( train_data, epoch) if use_data_parallel: total_loss = model.scale_loss(total_loss) total_loss.backward() model.apply_collective_grads() else: total_loss.backward() optimizer.minimize(total_loss) model.clear_gradients() total_losses.update(total_loss.numpy(), batch_size) accs.update(acc.numpy(), batch_size) ce_losses.update(ce_loss.numpy(), batch_size) kd_losses.update(kd_loss.numpy(), batch_size) if step_id % log_freq == 0: logger.info( "Train Epoch {}, Step {}, Lr {:.6f} total_loss {:.6f}; ce_loss {:.6f}, kd_loss {:.6f}, train_acc {:.6f};" .format(epoch, step_id, optimizer.current_step_lr(), total_losses.avg[0], ce_losses.avg[0], kd_losses.avg[0], accs.avg[0])) step_id += 1