Beispiel #1
0
def train(train_loader, dml_model, dml_optimizer, args):
    dml_model.train()
    costs = [AvgrageMeter() for i in range(dml_model.model_num)]
    accs = [AvgrageMeter() for i in range(dml_model.model_num)]
    for step_id, (images, labels) in enumerate(train_loader):
        images, labels = to_variable(images), to_variable(labels)
        batch_size = images.shape[0]

        logits = dml_model.forward(images)
        precs = [
            fluid.layers.accuracy(
                input=l, label=labels, k=1) for l in logits
        ]
        losses = dml_model.loss(logits, labels)
        dml_optimizer.minimize(losses)

        for i in range(dml_model.model_num):
            accs[i].update(precs[i].numpy(), batch_size)
            costs[i].update(losses[i].numpy(), batch_size)
        model_names = dml_model.full_name()
        if step_id % args.log_freq == 0:
            log_msg = "Train Step {}".format(step_id)
            for model_id, (cost, acc) in enumerate(zip(costs, accs)):
                log_msg += ", {} loss: {:.6f} acc: {:.6f}".format(
                    model_names[model_id], cost.avg[0], acc.avg[0])
            logger.info(log_msg)
    return costs, accs
Beispiel #2
0
def train_one_epoch(model, architect, train_loader, valid_loader, optimizer,
                    epoch, use_data_parallel, log_freq):
    ce_losses = AvgrageMeter()
    accs = AvgrageMeter()
    model.train()

    step_id = 0
    for train_data, valid_data in izip(train_loader(), valid_loader):
        architect.step(train_data, valid_data)
        loss, acc = model_loss(model, train_data)

        if use_data_parallel:
            loss = model.scale_loss(loss)
            loss.backward()
            model.apply_collective_grads()
        else:
            loss.backward()

        optimizer.minimize(loss)
        model.clear_gradients()

        batch_size = train_data[0].shape[0]
        ce_losses.update(loss.numpy(), batch_size)
        accs.update(acc.numpy(), batch_size)

        if step_id % log_freq == 0:
            logger.info(
                "Train Epoch {}, Step {}, Lr {:.6f} loss {:.6f}; acc: {:.6f};".
                format(epoch, step_id, optimizer.current_step_lr(),
                       ce_losses.avg[0], accs.avg[0]))
        step_id += 1
Beispiel #3
0
    def valid_one_epoch(self, epoch):
        losses = []
        accs = []
        for i in range(self.model_num):
            if self.use_data_parallel:
                self.parallel_models[i].eval()
            else:
                self.models[i].eval()
            losses.append(AvgrageMeter())
            accs.append(AvgrageMeter())

        for _, (images, labels) in enumerate(self.valid_loader):
            images, labels = to_variable(images), to_variable(labels)
            batch_size = images.shape[0]

            logits = []
            if self.use_data_parallel:
                for model in self.parallel_models:
                    logits.append(model(images))
            else:
                for model in self.models:
                    logits.append(model(images))
            for i in range(self.model_num):
                gt_loss = self.models[i].loss(logits[i], labels)
                kl_loss = 0
                for j in range(self.model_num):
                    if i != j:
                        x = F.log_softmax(logits[i], axis=1)
                        y = fluid.layers.softmax(logits[j], axis=1)
                        kl_loss += fluid.layers.kldiv_loss(
                            x, y, reduction='batchmean')

                loss = gt_loss
                if (self.model_num > 1):
                    loss += kl_loss / (self.model_num - 1)

                prec = fluid.layers.accuracy(input=logits[i],
                                             label=labels,
                                             k=1)
                losses[i].update(loss.numpy(), batch_size)
                accs[i].update(prec.numpy() * 100, batch_size)
        return losses, accs
Beispiel #4
0
def valid_one_epoch(model, valid_loader, epoch, log_freq):
    accs = AvgrageMeter()
    ce_losses = AvgrageMeter()
    model.student.eval()

    step_id = 0
    for valid_data in valid_loader():
        try:
            loss, acc, ce_loss, _, _ = model._layers.loss(valid_data, epoch)
        except:
            loss, acc, ce_loss, _, _ = model.loss(valid_data, epoch)

        batch_size = valid_data[0].shape[0]
        ce_losses.update(ce_loss.numpy(), batch_size)
        accs.update(acc.numpy(), batch_size)
        step_id += 1
    return ce_losses.avg[0], accs.avg[0]
Beispiel #5
0
def valid_one_epoch(model, valid_loader, epoch, log_freq):
    ce_losses = AvgrageMeter()
    accs = AvgrageMeter()
    model.eval()

    step_id = 0
    for valid_data in valid_loader():
        loss, acc = model_loss(model, valid_data)

        batch_size = valid_data[0].shape[0]
        ce_losses.update(loss.numpy(), batch_size)
        accs.update(acc.numpy(), batch_size)

        if step_id % log_freq == 0:
            logger.info(
                "Valid Epoch {}, Step {}, loss {:.6f}; acc: {:.6f};".format(
                    epoch, step_id, ce_losses.avg[0], accs.avg[0]))
        step_id += 1
Beispiel #6
0
def train(model, train_reader, optimizer, epoch, drop_path_prob, args):
    objs = AvgrageMeter()
    top1 = AvgrageMeter()
    top5 = AvgrageMeter()
    model.train()

    for step_id, data in enumerate(train_reader()):
        image_np, label_np = data
        image = to_variable(image_np)
        label = to_variable(label_np)
        label.stop_gradient = True
        logits, logits_aux = model(image, drop_path_prob, True)

        prec1 = fluid.layers.accuracy(input=logits, label=label, k=1)
        prec5 = fluid.layers.accuracy(input=logits, label=label, k=5)
        loss = fluid.layers.reduce_mean(
            fluid.layers.softmax_with_cross_entropy(logits, label))
        if args.auxiliary:
            loss_aux = fluid.layers.reduce_mean(
                fluid.layers.softmax_with_cross_entropy(logits_aux, label))
            loss = loss + args.auxiliary_weight * loss_aux

        if args.use_data_parallel:
            loss = model.scale_loss(loss)
            loss.backward()
            model.apply_collective_grads()
        else:
            loss.backward()

        optimizer.minimize(loss)
        model.clear_gradients()

        n = image.shape[0]
        objs.update(loss.numpy(), n)
        top1.update(prec1.numpy(), n)
        top5.update(prec5.numpy(), n)

        if step_id % args.log_freq == 0:
            logger.info(
                "Train Epoch {}, Step {}, loss {:.6f}, acc_1 {:.6f}, acc_5 {:.6f}".
                format(epoch, step_id, objs.avg[0], top1.avg[0], top5.avg[0]))
    return top1.avg[0]
Beispiel #7
0
def valid(model, valid_reader, epoch, args):
    objs = AvgrageMeter()
    top1 = AvgrageMeter()
    top5 = AvgrageMeter()
    model.eval()

    for step_id, data in enumerate(valid_reader()):
        image_np, label_np = data
        image = to_variable(image_np)
        label = to_variable(label_np)
        logits, _ = model(image, 0, False)
        prec1 = fluid.layers.accuracy(input=logits, label=label, k=1)
        prec5 = fluid.layers.accuracy(input=logits, label=label, k=5)
        loss = fluid.layers.reduce_mean(
            fluid.layers.softmax_with_cross_entropy(logits, label))

        n = image.shape[0]
        objs.update(loss.numpy(), n)
        top1.update(prec1.numpy(), n)
        top5.update(prec5.numpy(), n)
        if step_id % args.log_freq == 0:
            logger.info(
                "Valid Epoch {}, Step {}, loss {:.6f}, acc_1 {:.6f}, acc_5 {:.6f}".
                format(epoch, step_id, objs.avg[0], top1.avg[0], top5.avg[0]))
    return top1.avg[0]
Beispiel #8
0
    def train_one_epoch(self, epoch):
        losses = []
        accs = []

        for i in range(self.model_num):
            if self.use_data_parallel:
                self.parallel_models[i].train()
            else:
                self.models[i].train()
            losses.append(AvgrageMeter())
            accs.append(AvgrageMeter())

        for step_indx, (images, labels) in enumerate(self.train_loader):
            images, labels = to_variable(images), to_variable(labels)
            batch_size = images.shape[0]

            logits = []
            if self.use_data_parallel:
                for model in self.parallel_models:
                    logits.append(model(images))
            else:
                for model in self.models:
                    logits.append(model(images))

            log_msg = 'Train Epoch {}, Step {}'.format(epoch, step_indx)
            for i in range(self.model_num):
                gt_loss = self.models[i].loss(logits[i], labels)
                kl_loss = 0
                for j in range(self.model_num):
                    if i != j:
                        x = F.log_softmax(logits[i], axis=1)
                        y = fluid.layers.softmax(logits[j], axis=1)
                        kl_loss += fluid.layers.kldiv_loss(
                            x, y, reduction='batchmean')

                loss = gt_loss
                if (self.model_num > 1):
                    loss += kl_loss / (self.model_num - 1)

                prec = fluid.layers.accuracy(input=logits[i],
                                             label=labels,
                                             k=1)
                losses[i].update(loss.numpy(), batch_size)
                accs[i].update(prec.numpy() * 100, batch_size)

                if self.use_data_parallel:
                    loss = self.parallel_models[i].scale_loss(loss)
                    loss.backward()
                    self.parallel_models[i].apply_collective_grads()
                else:
                    loss.backward()
                self.optimizers[i].minimize(loss)
                if self.use_data_parallel:
                    self.parallel_models[i].clear_gradients()
                else:
                    self.models[i].clear_gradients()

                log_msg += ', model{}_loss: {:.3f}'.format(
                    i + 1, losses[i].avg[0])

            if step_indx % self.log_freq == 0:
                logger.info(log_msg)
        return losses, accs
Beispiel #9
0
def train_one_epoch(model, train_loader, valid_loader, optimizer,
                    arch_optimizer, epoch, use_data_parallel, log_freq):
    total_losses = AvgrageMeter()
    accs = AvgrageMeter()
    ce_losses = AvgrageMeter()
    kd_losses = AvgrageMeter()
    val_accs = AvgrageMeter()
    model.student.train()

    step_id = 0
    for train_data, valid_data in izip(train_loader(), valid_loader()):
        batch_size = train_data[0].shape[0]
        # make sure arch on every gpu is same, otherwise an error will occurs
        np.random.seed(step_id * 2 * (epoch + 1))
        if use_data_parallel:
            total_loss, acc, ce_loss, kd_loss, _ = model._layers.loss(
                train_data, epoch)
        else:
            total_loss, acc, ce_loss, kd_loss, _ = model.loss(
                train_data, epoch)

        if use_data_parallel:
            total_loss = model.scale_loss(total_loss)
            total_loss.backward()
            model.apply_collective_grads()
        else:
            total_loss.backward()
        optimizer.minimize(total_loss)
        model.clear_gradients()
        total_losses.update(total_loss.numpy(), batch_size)
        accs.update(acc.numpy(), batch_size)
        ce_losses.update(ce_loss.numpy(), batch_size)
        kd_losses.update(kd_loss.numpy(), batch_size)

        # make sure arch on every gpu is same, otherwise an error will occurs
        np.random.seed(step_id * 2 * (epoch + 1) + 1)
        if use_data_parallel:
            arch_loss, _, _, _, arch_logits = model._layers.loss(
                valid_data, epoch)
        else:
            arch_loss, _, _, _, arch_logits = model.loss(valid_data, epoch)

        if use_data_parallel:
            arch_loss = model.scale_loss(arch_loss)
            arch_loss.backward()
            model.apply_collective_grads()
        else:
            arch_loss.backward()
        arch_optimizer.minimize(arch_loss)
        model.clear_gradients()
        probs = fluid.layers.softmax(arch_logits[-1])
        val_acc = fluid.layers.accuracy(input=probs, label=valid_data[4])
        val_accs.update(val_acc.numpy(), batch_size)

        if step_id % log_freq == 0:
            logger.info(
                "Train Epoch {}, Step {}, Lr {:.6f} total_loss {:.6f}; ce_loss {:.6f}, kd_loss {:.6f}, train_acc {:.6f}, search_valid_acc {:.6f};"
                .format(epoch, step_id, optimizer.current_step_lr(),
                        total_losses.avg[0], ce_losses.avg[0],
                        kd_losses.avg[0], accs.avg[0], val_accs.avg[0]))

        step_id += 1
Beispiel #10
0
def train_one_epoch(model, train_loader, optimizer, epoch, use_data_parallel,
                    log_freq):
    total_losses = AvgrageMeter()
    accs = AvgrageMeter()
    ce_losses = AvgrageMeter()
    kd_losses = AvgrageMeter()
    model.student.train()

    step_id = 0
    for train_data in train_loader():
        batch_size = train_data[0].shape[0]

        if use_data_parallel:
            total_loss, acc, ce_loss, kd_loss, _ = model._layers.loss(
                train_data, epoch)
        else:
            total_loss, acc, ce_loss, kd_loss, _ = model.loss(
                train_data, epoch)

        if use_data_parallel:
            total_loss = model.scale_loss(total_loss)
            total_loss.backward()
            model.apply_collective_grads()
        else:
            total_loss.backward()
        optimizer.minimize(total_loss)
        model.clear_gradients()
        total_losses.update(total_loss.numpy(), batch_size)
        accs.update(acc.numpy(), batch_size)
        ce_losses.update(ce_loss.numpy(), batch_size)
        kd_losses.update(kd_loss.numpy(), batch_size)

        if step_id % log_freq == 0:
            logger.info(
                "Train Epoch {}, Step {}, Lr {:.6f} total_loss {:.6f}; ce_loss {:.6f}, kd_loss {:.6f}, train_acc {:.6f};"
                .format(epoch, step_id, optimizer.current_step_lr(),
                        total_losses.avg[0], ce_losses.avg[0],
                        kd_losses.avg[0], accs.avg[0]))
        step_id += 1