Ejemplo n.º 1
0
    def test(self):
        epoch = self.scheduler.last_epoch + 1
        self.ckp.write_log('\nEvaluation:')
        self.loss.start_log(train=False)
        self.model.eval()

        timer_test = utility.timer()
        timer_test.tic()
        with torch.no_grad():
            for img, label in tqdm(self.loader_test, ncols=80):
                img, label = self.prepare(img, label)
                prediction = self.model(img)
                self.loss(prediction, label, train=False)

        self.loss.end_log(len(self.loader_test.dataset), train=False)

        # Lower is better
        best = self.loss.log_test.min(0)
        for i, measure in enumerate(('Loss', 'Top1 error', 'Top5 error')):
            self.ckp.write_log('{}: {:.3f} (Best: {:.3f} from epoch {})'.
                               format(measure, self.loss.log_test[-1, i], best[0][i], best[1][i] + 1))

        if hasattr(self, 'epochs_searching') and self.converging:
            best = self.loss.log_test[:self.epochs_searching, :].min(0)
            self.ckp.write_log('\nBest during searching')
            for i, measure in enumerate(('Loss', 'Top1 error', 'Top5 error')):
                self.ckp.write_log('{}: {:.3f} from epoch {}'.format(measure, best[0][i], best[1][i]))
        self.ckp.write_log('Time: {:.2f}s\n'.format(timer_test.toc()), refresh=True)

        is_best = self.loss.log_test[-1, self.args.top] <= best[0][self.args.top]
        self.ckp.save(self, epoch, converging=self.converging, is_best=is_best)
        self.ckp.save_results(epoch, self.model)

        # scheduler.step is moved from training procedure to test procedure
        self.scheduler.step()
Ejemplo n.º 2
0
    def train(self):
        self.loss.step()
        epoch = self.scheduler.last_epoch + 1
        lr = self.scheduler.get_lr()[0]

        self.ckp.write_log('[Epoch {}]\tLearning rate: {:.2e}'.format(
            epoch, Decimal(lr)))
        self.loss.start_log()
        self.model.train()
        # from IPython import embed; embed(); exit()
        timer_data, timer_model = utility.timer(), utility.timer()
        for batch, (lr, hr, _, idx_scale) in enumerate(self.loader_train):
            lr, hr = self.prepare([lr, hr])
            timer_data.hold()
            timer_model.tic()
            # from IPython import embed; embed(); exit()

            self.optimizer.zero_grad()
            sr = self.model(idx_scale, lr)
            loss = self.loss(sr, hr)
            if loss.item() < self.args.skip_threshold * self.error_last:
                loss.backward()
                self.optimizer.step()
            else:
                print('Skip this batch {}! (Loss: {})'.format(
                    batch + 1, loss.item()))

            timer_model.hold()

            if (batch + 1) % self.args.print_every == 0:
                self.ckp.write_log('[{}/{}]\t{}\t{:.3f}+{:.3f}s'.format(
                    (batch + 1) * self.args.batch_size,
                    len(self.loader_train.dataset),
                    self.loss.display_loss(batch), timer_model.release(),
                    timer_data.release()))

            timer_data.tic()

        self.loss.end_log(len(self.loader_train))
        self.error_last = self.loss.log[-1, -1]
        self.scheduler.step()
Ejemplo n.º 3
0
    def test(self):
        epoch = self.scheduler.last_epoch
        self.ckp.write_log('\nEvaluation:')
        self.ckp.add_log(torch.zeros(1, len(self.scale)))
        self.model.eval()

        timer_test = utility.timer()
        with torch.no_grad():
            for idx_scale, scale in enumerate(self.scale):
                eval_acc = 0
                self.loader_test.dataset.set_scale(idx_scale)
                tqdm_test = tqdm(self.loader_test, ncols=80)
                for idx_img, (lr, hr, filename, _) in enumerate(tqdm_test):

                    # from IPython import embed; embed();
                    filename = filename[0]
                    no_eval = (hr.nelement() == 1)
                    if not no_eval:
                        lr, hr = self.prepare([lr, hr])
                    else:
                        lr = self.prepare([lr])[0]

                    sr = self.model(idx_scale, lr)
                    sr = utility.quantize(sr, self.args.rgb_range)

                    save_list = [sr]
                    if not no_eval:
                        eval_acc += utility.calc_psnr(
                            sr,
                            hr,
                            scale,
                            self.args.rgb_range,
                            benchmark=self.loader_test.dataset.benchmark)
                        save_list.extend([lr, hr])

                    if self.args.save_results:
                        self.ckp.save_results(filename, save_list, scale)

                self.ckp.log[-1, idx_scale] = eval_acc / len(self.loader_test)
                best = self.ckp.log.max(0)
                self.ckp.write_log(
                    '[{} x{}]\tPSNR: {:.3f} (Best: {:.3f} @epoch {})'.format(
                        self.args.data_test, scale, self.ckp.log[-1,
                                                                 idx_scale],
                        best[0][idx_scale], best[1][idx_scale] + 1))

        self.ckp.write_log('Total time: {:.2f}s\n'.format(timer_test.toc()),
                           refresh=True)
        if not self.args.test_only:
            self.ckp.save(self, epoch, is_best=(best[1][0] + 1 == epoch))
Ejemplo n.º 4
0
    def forward(self, x):
        body_list = self.body_list
        timer = utility.timer()

        for i in range(len(body_list)):
            layer = body_list[i]
            # layer = nn.Sequential(layer)
            timer.tic()
            x = layer(x)
            self.total_time[i] += timer.toc()
            # if isinstance(body_list[i], common.BasicBlock):
            #     self.total_time[i] += float(end_time - begin_time)

        # x = self.features(x)
        x = x.view(x.size(0), -1)
        # print(x.shape)
        x = self.classifier(x)
        return x
Ejemplo n.º 5
0
    def train(self):
        self.loss.step()
        epoch = self.scheduler.last_epoch + 1
        learning_rate = self.scheduler.get_lr()[0]
        idx_scale = self.args.scale
        if not self.converging:
            stage = 'Searching Stage'
        else:
            stage = 'Finetuning Stage (Searching Epoch {})'.format(
                self.epochs_searching)
        self.ckp.write_log('\n[Epoch {}]\tLearning rate: {:.2e}\t{}'.format(
            epoch, Decimal(learning_rate), stage))
        self.loss.start_log()
        self.model.train()
        timer_data, timer_model = utility.timer(), utility.timer()

        for batch, (lr, hr, _) in enumerate(self.loader_train):
            # if batch <= 1200:
            lr, hr = self.prepare([lr, hr])

            timer_data.hold()
            timer_model.tic()

            self.optimizer.zero_grad()
            sr = self.model(idx_scale, lr)
            loss = self.loss(sr, hr)

            if loss.item() < self.args.skip_threshold * self.error_last:
                # Adam
                loss.backward()
                self.optimizer.step()
                # proximal operator
                if not self.converging:
                    self.model.get_model().proximal_operator(learning_rate)
                    # check the compression ratio
                    if (batch +
                            1) % self.args.compression_check_frequency == 0:
                        # set the channels of the potential pruned model
                        self.model.get_model().set_parameters()
                        # update the flops and number of parameters
                        self.flops_prune = get_flops(self.model.get_model())
                        self.flops_compression_ratio = self.flops_prune / self.flops
                        self.params_prune = get_parameters(
                            self.model.get_model())
                        self.params_compression_ratio = self.params_prune / self.params
                        self.flops_ratio_log.append(
                            self.flops_compression_ratio)
                        self.params_ratio_log.append(
                            self.params_compression_ratio)
                        if self.terminate():
                            break
                    if (batch + 1) % 1000 == 0:
                        self.model.get_model().latent_vector_distribution(
                            epoch, batch + 1, self.ckp.dir)
                        self.model.get_model().per_layer_compression_ratio(
                            epoch, batch + 1, self.ckp.dir)

            else:
                print('Skip this batch {}! (Loss: {}) (Threshold: {})'.format(
                    batch + 1, loss.item(),
                    self.args.skip_threshold * self.error_last))

            timer_model.hold()

            if (batch + 1) % self.args.print_every == 0:
                self.ckp.write_log(
                    '[{}/{}]\t{}\t{:.3f}+{:.3f}s'
                    '\tFlops Ratio: {:.2f}% = {:.4f} G / {:.4f} G'
                    '\tParams Ratio: {:.2f}% = {:.2f} k / {:.2f} k'.format(
                        (batch + 1) * self.args.batch_size,
                        len(self.loader_train.dataset),
                        self.loss.display_loss(batch), timer_model.release(),
                        timer_data.release(),
                        self.flops_compression_ratio * 100,
                        self.flops_prune / 10.**9, self.flops / 10.**9,
                        self.params_compression_ratio * 100,
                        self.params_prune / 10.**3, self.params / 10.**3))
            timer_data.tic()
            # else:
            #     break

        self.loss.end_log(len(self.loader_train))
        self.error_last = self.loss.log[-1, -1]
        # self.error_last = loss
        self.scheduler.step()
    def train(self):
        epoch, lr = self.start_epoch()
        self.model.begin(
            epoch, self.ckp
        )  #TODO: investigate why not using self.model.train() directly
        self.loss.start_log()
        timer_data, timer_model = utility.timer(), utility.timer()
        n_samples = 0

        for batch, (img, label) in enumerate(self.loader_train):
            # embed()
            if (self.args.data_train == 'ImageNet' or self.args.model.lower()
                    == 'efficientnet_hh') and not self.converging:
                if self.args.model == 'ResNet_ImageNet_HH' or self.args.model == 'RegNet_ImageNet_HH':
                    divider = 4
                else:
                    divider = 2
                print('Divider is {}'.format(divider))
                batch_size = img.shape[0] // divider
                img = img[:batch_size]
                label = label[:batch_size]
            # embed()
            img, label = self.prepare(img, label)
            n_samples += img.size(0)

            timer_data.hold()
            timer_model.tic()

            self.optimizer.zero_grad()
            prediction = self.model(img)
            # embed()
            if (not self.converging and self.args.distillation_stage == 'c') or \
                    (self.converging and not self.args.distillation_final):
                loss, _ = self.loss(prediction, label)
            else:
                with torch.no_grad():
                    prediction_teacher = self.model_teacher(img)
                if not self.args.distillation_inter:
                    prediction = [prediction]
                    prediction_teacher = [prediction_teacher]
                loss, _ = self.loss(prediction[0], label)

                if self.args.distillation_final == 'kd':
                    loss_distill_final = distillation(prediction[0],
                                                      prediction_teacher[0],
                                                      T=4)
                    loss = 0.4 * loss_distill_final + 0.6 * loss
                elif self.args.distillation_inter == 'sp':
                    loss_distill_final = similarity_preserving(
                        prediction[0], prediction_teacher[0]) * 3000
                    loss = loss_distill_final + loss
                if self.args.distillation_inter == 'kd':
                    loss_distill_inter = 0
                    for p, pt in zip(prediction[1], prediction_teacher[1]):
                        loss_distill_inter += self.loss_mse(p, pt)
                        # embed()
                    loss_distill_inter = loss_distill_inter / len(
                        prediction[1]) * self.args.distill_beta
                    loss = loss_distill_inter + loss
                elif self.args.distillation_inter == 'sp':
                    loss_distill_inter = 0
                    for p, pt in zip(prediction[1], prediction_teacher[1]):
                        loss_distill_inter += similarity_preserving(p, pt)
                    loss_distill_inter = loss_distill_inter / len(
                        prediction[1]) * 3000 * self.args.distill_beta
                    # loss_distill_inter = similarity_preserving(prediction[1], prediction_teacher[1])
                    loss = loss_distill_inter + loss
                # else: self.args.distillation_inter == '', do nothing here

            # SGD
            loss.backward()
            self.optimizer.step()
            if not self.converging and self.args.use_prox:
                # if epoch > 5:
                # proximal operator
                self.model.get_model().proximal_operator(lr)
                if (batch + 1) % self.args.compression_check_frequency == 0:
                    self.model.get_model().set_parameters()
                    self.flops_prune = get_flops(self.model.get_model())
                    self.flops_compression_ratio = self.flops_prune / self.flops
                    self.params_prune = get_parameters(self.model.get_model())
                    self.params_compression_ratio = self.params_prune / self.params
                    self.flops_ratio_log.append(self.flops_compression_ratio)
                    self.params_ratio_log.append(self.params_compression_ratio)
                    if self.terminate():
                        break
                if (batch + 1) % 300 == 0:
                    self.model.get_model().latent_vector_distribution(
                        epoch, batch + 1, self.ckp.dir)
                    self.model.get_model().per_layer_compression_ratio(
                        epoch, batch + 1, self.ckp.dir)

            timer_model.hold()

            if (batch + 1) % self.args.print_every == 0:
                s = '{}/{} ({:.0f}%)\tNLL: {:.3f} Top1: {:.2f} / Top5: {:.2f}\t'.format(
                    n_samples, len(self.loader_train.dataset),
                    100.0 * n_samples / len(self.loader_train.dataset),
                    *(self.loss.log_train[-1, :] / n_samples))
                if self.converging or (not self.converging and
                                       self.args.distillation_stage == 's'):
                    if self.args.distillation_final:
                        s += 'DFinal: {:.3f} '.format(loss_distill_final)
                    if self.args.distillation_inter:
                        s += 'DInter: {:.3f}'.format(loss_distill_inter)
                    if self.args.distillation_final or self.args.distillation_inter:
                        s += '\t'
                s += 'Time: {:.1f}+{:.1f}s\t'.format(timer_model.release(),
                                                     timer_data.release())
                if hasattr(self, 'flops_compression_ratio') and hasattr(
                        self, 'params_compression_ratio'):
                    s += 'Flops: {:.2f}% = {:.4f} [G] / {:.4f} [G]\t' \
                         'Params: {:.2f}% = {:.2f} [k] / {:.2f} [k]'.format(
                         self.flops_compression_ratio * 100, self.flops_prune / 10. ** 9, self.flops / 10. ** 9,
                         self.params_compression_ratio * 100, self.params_prune / 10. ** 3, self.params / 10. ** 3)

                self.ckp.write_log(s)

            if self.args.summary:
                if (batch + 1) % 50 == 0:
                    for name, param in self.model.named_parameters():
                        if name.find('features') >= 0 and name.find(
                                'weight') >= 0:
                            self.writer.add_scalar(
                                'data/' + name,
                                param.clone().cpu().data.abs().mean().numpy(),
                                1000 * (epoch - 1) + batch)
                            if param.grad is not None:
                                self.writer.add_scalar(
                                    'data/' + name + '_grad',
                                    param.grad.clone().cpu().data.abs().mean().
                                    numpy(), 1000 * (epoch - 1) + batch)
                if (batch + 1) == 500:
                    for name, param in self.model.named_parameters():
                        if name.find('features') >= 0 and name.find(
                                'weight') >= 0:
                            self.writer.add_histogram(
                                name,
                                param.clone().cpu().data.numpy(),
                                1000 * (epoch - 1) + batch)
                            if param.grad is not None:
                                self.writer.add_histogram(
                                    name + '_grad',
                                    param.grad.clone().cpu().data.numpy(),
                                    1000 * (epoch - 1) + batch)
            timer_data.tic()
            if not self.converging and epoch == self.args.epochs_grad and batch == 1:
                break
        self.model.log(self.ckp)  # TODO: why this is used?
        self.loss.end_log(len(self.loader_train.dataset))
Ejemplo n.º 7
0
    def train(self):
        epoch = self.start_epoch()
        self.model.begin(
            epoch, self.ckp
        )  #TODO: investigate why not using self.model.train() directly
        self.loss.start_log()
        # modules = self.model.get_model().find_modules() #TODO: merge this
        timer_data, timer_model = utility.timer(), utility.timer()
        n_samples = 0

        for batch, (img, label) in enumerate(self.loader_train):
            img, label = self.prepare(img, label)
            n_samples += img.size(0)

            timer_data.hold()
            timer_model.tic()

            # Forward pass and computing the loss function
            self.optimizer.zero_grad()
            prediction = self.model(img)
            loss, _ = self.loss(prediction, label)
            lossp = self.model.get_model().compute_loss(
                batch + 1, epoch, self.converging)
            if not self.converging:
                # use projection loss for SGD and don't use it for PG
                if self.args.optimizer == 'SGD':
                    loss = loss + sum(lossp)
            else:
                # use distillation loss
                if self.args.distillation:
                    with torch.no_grad():
                        prediction_teacher = self.model_teacher(img)
                    loss_distill = distillation(prediction,
                                                prediction_teacher,
                                                T=4)
                    loss = loss_distill * 0.4 + loss * 0.6
            # Backward pass and computing the gradients
            loss.backward()
            # Update learning rate based on the gradients. ResNet20, 56, 164, and Wide ResNet

            if not self.converging and self.lr_adjust_flag:
                self.model.get_model().update_grad_ratio()
                self.scheduler.running_grad_ratio = self.model.get_model(
                ).running_grad_ratio
                for param_group, lr in zip(self.optimizer.param_groups,
                                           self.scheduler.get_lr()):
                    param_group['lr'] = lr

            # Update the parameters
            if self.args.optimizer == 'SGD':
                self.optimizer.step()
            elif self.args.optimizer == 'PG':
                # Gradient step
                self.optimizer.step()
                if not self.converging and (batch +
                                            1) % self.args.prox_freq == 0:
                    # Anneal the regularization factor
                    reg = reg_anneal(lossp[0], self.args.regularization_factor,
                                     self.args.annealing_factor,
                                     self.args.annealing_t1,
                                     self.args.annealing_t2)
                    # Proximal step
                    self.model.get_model().proximal_operator(
                        self.scheduler.get_lr()[-1], batch + 1, reg)
            elif self.args.optimizer == 'APG':  # TODO: still interesting to investigate APG
                self.optimizer.converging = self.converging
                self.optimizer.batch = batch + 1
                self.optimizer.step()

            timer_model.hold()

            if (batch + 1) % self.args.print_every == 0:
                s = '{}/{} ({:.0f}%)\tTotal: {:.3f} / P1: {:.3f}'.\
                    format(n_samples, len(self.loader_train.dataset),
                           100.0 * n_samples / len(self.loader_train.dataset), loss, lossp[0])
                if len(lossp) == 2:
                    s += ' / P2: {:.3f}'.format(lossp[1])
                if not self.converging:
                    if self.lr_adjust_flag:
                        s += ' / rP: {:.3f}'.format(
                            self.model.get_model().running_grad_ratio)
                else:
                    if self.args.distillation:
                        s += ' / Dis: {:.3f}'.format(loss_distill)
                s += ' / NLL: {:.3f}\tTop1: {:.2f} / Top5: {:.2f}\tTime: {:.1f}+{:.1f}s'.\
                    format(*(self.loss.log_train[-1, :] / n_samples), timer_model.release(), timer_data.release())
                self.ckp.write_log(s)

            if self.args.summary:
                if (batch + 1) % 50 == 0:
                    for name, param in self.model.named_parameters():
                        if name.find('features') >= 0 and name.find(
                                'weight') >= 0:
                            self.writer.add_scalar(
                                'data/' + name,
                                param.clone().cpu().data.abs().mean().numpy(),
                                1000 * (epoch - 1) + batch)
                            if param.grad is not None:
                                self.writer.add_scalar(
                                    'data/' + name + '_grad',
                                    param.grad.clone().cpu().data.abs().mean().
                                    numpy(), 1000 * (epoch - 1) + batch)
                if (batch + 1) == 500:
                    for name, param in self.model.named_parameters():
                        if name.find('features') >= 0 and name.find(
                                'weight') >= 0:
                            self.writer.add_histogram(
                                name,
                                param.clone().cpu().data.numpy(),
                                1000 * (epoch - 1) + batch)
                            if param.grad is not None:
                                self.writer.add_histogram(
                                    name + '_grad',
                                    param.grad.clone().cpu().data.numpy(),
                                    1000 * (epoch - 1) + batch)

            timer_data.tic()
        self.model.log(self.ckp)
        self.loss.end_log(len(self.loader_train.dataset))
Ejemplo n.º 8
0
    def train(self):
        epoch, lr = self.start_epoch()
        self.model.begin(epoch, self.ckp) #TODO: investigate why not using self.model.train() directly
        self.loss.start_log()
        timer_data, timer_model = utility.timer(), utility.timer()
        n_samples = 0

        for batch, (img, label) in enumerate(self.loader_train):
            img, label = self.prepare(img, label)
            n_samples += img.size(0)

            timer_data.hold()
            timer_model.tic()

            self.optimizer.zero_grad()
            prediction = self.model(img)
            loss, _ = self.loss(prediction, label)

            # SGD
            loss.backward()
            self.optimizer.step()
            # proximal operator
            if not self.converging:
                self.model.get_model().proximal_operator(lr)
                if (batch + 1) % self.args.compression_check_frequency == 0:
                    self.model.get_model().set_parameters()
                    self.flops_prune = get_flops(self.model.get_model())
                    self.flops_compression_ratio = self.flops_prune / self.flops
                    self.params_prune = get_parameters(self.model.get_model())
                    self.params_compression_ratio = self.params_prune / self.params
                    self.flops_ratio_log.append(self.flops_compression_ratio)
                    self.params_ratio_log.append(self.params_compression_ratio)
                    # if self.terminate():
                    #     break
                if (batch + 1) % 300 == 0:
                    self.model.get_model().latent_vector_distribution(epoch, batch + 1, self.ckp.dir)
                    self.model.get_model().per_layer_compression_ratio(epoch, batch + 1, self.ckp.dir)

            timer_model.hold()

            if (batch + 1) % self.args.print_every == 0:
                self.ckp.write_log('{}/{} ({:.0f}%)\t'
                    'NLL: {:.3f}\tTop1: {:.2f} / Top5: {:.2f}\t'
                    'Time: {:.1f}+{:.1f}s\t'
                    'Flops Ratio: {:.2f}% = {:.4f} [G] / {:.4f} [G]\t'
                    'Params Ratio: {:.2f}% = {:.2f} [k] / {:.2f} [k]'.format(
                    n_samples, len(self.loader_train.dataset), 100.0 * n_samples / len(self.loader_train.dataset),
                    *(self.loss.log_train[-1, :] / n_samples),
                    timer_model.release(), timer_data.release(),
                    self.flops_compression_ratio * 100, self.flops_prune / 10. ** 9, self.flops / 10. ** 9,
                    self.params_compression_ratio * 100, self.params_prune / 10. ** 3, self.params / 10. ** 3))
            if not self.converging and self.terminate():
                break

            if self.args.summary:
                if (batch + 1) % 50 == 0:
                    for name, param in self.model.named_parameters():
                        if name.find('features') >= 0 and name.find('weight') >= 0:
                            self.writer.add_scalar('data/' + name, param.clone().cpu().data.abs().mean().numpy(),
                                                   1000 * (epoch - 1) + batch)
                            if param.grad is not None:
                                self.writer.add_scalar('data/' + name + '_grad',
                                                       param.grad.clone().cpu().data.abs().mean().numpy(),
                                                       1000 * (epoch - 1) + batch)
                if (batch + 1) == 500:
                    for name, param in self.model.named_parameters():
                        if name.find('features') >= 0 and name.find('weight') >= 0:
                            self.writer.add_histogram(name, param.clone().cpu().data.numpy(), 1000 * (epoch - 1) + batch)
                            if param.grad is not None:
                                self.writer.add_histogram(name + '_grad', param.grad.clone().cpu().data.numpy(),
                                                      1000 * (epoch - 1) + batch)

            timer_data.tic()
        self.model.log(self.ckp) # TODO: why this is used?
        self.loss.end_log(len(self.loader_train.dataset))
Ejemplo n.º 9
0
    def train(self):
        epoch, _ = self.start_epoch()
        self.model.begin(
            epoch, self.ckp
        )  #TODO: investigate why not using self.model.train() directly
        self.loss.start_log()

        timer_data, timer_model = utility.timer(), utility.timer()
        n_samples = 0

        for batch, (img, label) in enumerate(self.loader_train):
            # if batch<=1:
            img, label = self.prepare(img, label)
            n_samples += img.size(0)

            timer_data.hold()
            timer_model.tic()

            self.optimizer.zero_grad()
            # embed()
            prediction = self.model(img)
            loss, _ = self.loss(prediction, label)

            loss.backward()
            self.optimizer.step()

            timer_model.hold()
            if (batch + 1) % self.args.print_every == 0:
                self.ckp.write_log(
                    '{}/{} ({:.0f}%)\t'
                    'NLL: {:.3f}\t'
                    'Top1: {:.2f} / Top5: {:.2f}\t'
                    'Time: {:.1f}+{:.1f}s'.format(
                        n_samples, len(self.loader_train.dataset),
                        100.0 * n_samples / len(self.loader_train.dataset),
                        *(self.loss.log_train[-1, :] / n_samples),
                        timer_model.release(), timer_data.release()))

            if self.args.summary:
                if (batch + 1) % 50 == 0:
                    for name, param in self.model.named_parameters():
                        if name.find('features') >= 0 and name.find(
                                'weight') >= 0:
                            self.writer.add_scalar(
                                'data/' + name,
                                param.clone().cpu().data.abs().mean().numpy(),
                                1000 * (epoch - 1) + batch)
                            self.writer.add_scalar(
                                'data/' + name + '_grad',
                                param.grad.clone().cpu().data.abs().mean().
                                numpy(), 1000 * (epoch - 1) + batch)

                if (batch + 1) == 500:
                    for name, param in self.model.named_parameters():
                        if name.find('features') >= 0 and name.find(
                                'weight') >= 0:
                            self.writer.add_histogram(
                                name,
                                param.clone().cpu().data.numpy(),
                                1000 * (epoch - 1) + batch)
                            self.writer.add_histogram(
                                name + '_grad',
                                param.grad.clone().cpu().data.numpy(),
                                1000 * (epoch - 1) + batch)
            # else:
            #     break

            timer_data.tic()
        self.model.log(self.ckp)
        self.loss.end_log(len(self.loader_train.dataset))
Ejemplo n.º 10
0
    def train(self):
        epoch = self.start_epoch()
        self.model.begin(epoch, self.ckp)
        self.loss.start_log()
        # modules = self.model.get_model().find_modules() #TODO: merge this
        timer_data, timer_model = utility.timer(), utility.timer()
        n_samples = 0

        for batch, (img, label) in enumerate(self.loader_train):
            # if batch<=1:
            img, label = self.prepare(img, label)
            n_samples += img.size(0)

            timer_data.hold()
            timer_model.tic()

            self.optimizer.zero_grad()
            # embed()
            prediction = self.model(img)
            loss, _ = self.loss(prediction, label)

            if self.args.distillation:
                with torch.no_grad():
                    prediction_teacher = self.model_teacher(img)
                loss_distill = distillation(prediction,
                                            prediction_teacher,
                                            T=4)
                loss = loss_distill * 0.4 + loss * 0.6

            loss.backward()
            self.optimizer.step()

            timer_model.hold()

            if self.args.summary:
                if (batch + 1) % 50 == 0:
                    for name, param in self.model.named_parameters():
                        if name.find('features') >= 0 and name.find(
                                'weight') >= 0:
                            self.writer.add_scalar(
                                'data/' + name,
                                param.clone().cpu().data.abs().mean().numpy(),
                                1000 * (epoch - 1) + batch)
                            if param.grad is not None:
                                self.writer.add_scalar(
                                    'data/' + name + '_grad',
                                    param.grad.clone().cpu().data.abs().mean().
                                    numpy(), 1000 * (epoch - 1) + batch)
                if (batch + 1) == 500:
                    for name, param in self.model.named_parameters():
                        if name.find('features') >= 0 and name.find(
                                'weight') >= 0:
                            self.writer.add_histogram(
                                name,
                                param.clone().cpu().data.numpy(),
                                1000 * (epoch - 1) + batch)
                            if param.grad is not None:
                                self.writer.add_histogram(
                                    name + '_grad',
                                    param.grad.clone().cpu().data.numpy(),
                                    1000 * (epoch - 1) + batch)

            timer_data.tic()

        self.model.log(self.ckp)
        self.loss.end_log(len(self.loader_train.dataset))
Ejemplo n.º 11
0
    def test(self):
        self.model.get_model().total_time = [0] * len(
            self.model.get_model().body_list)
        epoch = self.scheduler.last_epoch + 1
        self.ckp.write_log('\nEvaluation:')
        self.loss.start_log(train=False)
        self.model.eval()

        timer_test = utility.timer()
        i = 0
        with torch.no_grad():
            for img, label in tqdm(self.loader_test, ncols=80):
                i = i + 1
                # if i == 5:
                #     break
                img, label = self.prepare(img, label)
                timer_test.tic()
                prediction = self.model(img)
                timer_test.hold()
                self.loss(prediction, label, train=False)

        current_time = timer_test.acc

        self.loss.end_log(len(self.loader_test.dataset), train=False)

        # Lower is better
        best = self.loss.log_test.min(0)
        for i, measure in enumerate(('Loss', 'Top1 error', 'Top5 error')):
            self.ckp.write_log(
                '{}: {:.3f} (Best: {:.3f} from epoch {})'.format(
                    measure, self.loss.log_test[-1, i], best[0][i],
                    best[1][i] + 1))

        if hasattr(self, 'epoch_continue') and self.converging:
            best = self.loss.log_test[:self.epoch_continue, :].min(0)
            self.ckp.write_log('\nBest during searching')
            for i, measure in enumerate(('Loss', 'Top1 error', 'Top5 error')):
                self.ckp.write_log('{}: {:.3f} from epoch {}'.format(
                    measure, best[0][i], best[1][i]))

        self.ckp.write_log('Time: {:.2f}s\n'.format(current_time),
                           refresh=True)

        is_best = self.loss.log_test[-1,
                                     self.args.top] <= best[0][self.args.top]
        self.ckp.save(self, epoch, converging=self.converging, is_best=is_best)
        # This is used by clustering convolutional kernels
        # self.ckp.save_results(epoch, self.model)

        # scheduler.step is moved from training procedure to test procedure
        self.scheduler.step()

        # 下面是新加的统计内容
        self.model.get_model().timer_test_list.append(
            "{:.3f}".format(current_time))
        print("whole network inference time : ")
        print(self.model.get_model().timer_test_list)

        print("each layer time: ")
        for i in range(len(self.model.get_model().total_time)):
            self.model.get_model().total_time[i] = float("{:.5f}".format(
                self.model.get_model().total_time[i]))
        print(self.model.get_model().total_time)
        print("sum : ")
        print("{:.5f}".format(sum(self.model.get_model().total_time)))
        if self.model.get_model().layer_num != -1:
            self.model.get_model().spec_list.append("{:.5f}".format(
                self.model.get_model().total_time[
                    self.model.get_model().layer_num]))
            print("the %d 's layer inference time list : " %
                  self.model.get_model().layer_num)
            print(self.model.get_model().spec_list)
        self.model.get_model().sum_list.append("{:.5f}".format(
            sum(self.model.get_model().total_time)))
        print("sum list : ")
        print(self.model.get_model().sum_list)
        self.model.get_model().top1_err_list.append("{:.3f}".format(
            self.loss.log_test[-1, 1]))
        print("top1 error list : ")
        print(self.model.get_model().top1_err_list)