Example #1
0
    def train(self):
        """
        training logic
        :return:
        """
        best_val_loss = None
        progress_bar = tqdm(range(self.epochs))
        for epoch in progress_bar:
            self.scheduler.step()
            cur_lr = get_lr(self.optimizer)
            print('\nEpoch: {}/{}'.format(epoch, self.epochs))
            print('Cur lr: {}'.format(cur_lr))

            # train
            epoch_log = self._train_epoch(epoch)
            # validation
            epoch_valid_log, _ = self._valid_epoch(epoch)
            epoch_log = {**epoch_log, **epoch_valid_log}
            log_string = ''
            for key, value in epoch_log.items():
                if key == 'epo':
                    log_string += '%s:%d,' % (key, value)
                elif key == 'det q' or key == 'sto q':
                    log_string += '%s:[' % key
                    log_string += ','.join(
                        ['%0.2f' % value[i] for i in self.nz_post])
                    log_string += '],'
                else:
                    log_string += '%s:%0.3f,' % (key, value)

            #log_string += 'best:%0.3f' % best_val_loss
            progress_bar.set_description(log_string)
            PolicyKL.test(args=self.args,
                          score_net=self.score_net,
                          sdn_model=self.sdn_model,
                          data_loader=self.data_loader.test_loader,
                          nz_post=self.nz_post,
                          device=self.device)
            if epoch % 10 == 0 and epoch > 0:
                print('This is the performance on the train dataset:')
                PolicyKL.test(args=self.args,
                              score_net=self.score_net,
                              sdn_model=self.sdn_model,
                              data_loader=self.train_data_generator,
                              nz_post=self.nz_post,
                              device=self.device)

            torch.save(
                self.score_net.state_dict(), self.args.save_dir +
                '/{}_best_val_policy.dump'.format(self.sdn_name))
Example #2
0
def cnn_train(model, data, epochs, optimizer, scheduler, device='cpu'):
    metrics = {
        'epoch_times': [],
        'test_top1_acc': [],
        'test_top3_acc': [],
        'train_top1_acc': [],
        'train_top3_acc': [],
        'lrs': []
    }
    print("cnn training")
    for epoch in range(1, epochs + 1):
        scheduler.step()

        cur_lr = af.get_lr(optimizer)

        if not hasattr(model, 'augment_training') or model.augment_training:
            train_loader = data.aug_train_loader
        else:
            train_loader = data.train_loader

        start_time = time.time()
        model.train()
        print('Epoch: {}/{}'.format(epoch, epochs))
        print('Cur lr: {}'.format(cur_lr))
        for x, y in train_loader:
            cnn_training_step(model, optimizer, x, y, device)

        end_time = time.time()

        top1_test, top3_test = cnn_test(model, data.test_loader, device)
        print('Top1 Test accuracy: {}'.format(top1_test))
        print('Top3 Test accuracy: {}'.format(top3_test))
        metrics['test_top1_acc'].append(top1_test)
        metrics['test_top3_acc'].append(top3_test)

        top1_train, top3_train = cnn_test(model, train_loader, device)
        print('Top1 Train accuracy: {}'.format(top1_train))
        print('top3 Train accuracy: {}'.format(top3_train))
        metrics['train_top1_acc'].append(top1_train)
        metrics['train_top3_acc'].append(top3_train)
        epoch_time = int(end_time - start_time)
        print('Epoch took {} seconds.'.format(epoch_time))
        metrics['epoch_times'].append(epoch_time)

        metrics['lrs'].append(cur_lr)

    return metrics
Example #3
0
def epoch_routine(model, datas, optimizer, scheduler, epoch, epochs, augment,
                  metrics, device):
    scheduler.step()
    cur_lr = af.get_lr(optimizer)

    print('cur_lr: {}'.format(cur_lr))
    print("scheduler state dict: {}".format(scheduler.state_dict()))
    max_coeffs = calc_coeff(model)
    cur_coeffs = 0.01 + epoch * (np.array(max_coeffs) / epochs)
    cur_coeffs = np.minimum(max_coeffs, cur_coeffs)
    print("current coeffs: {}".format(cur_coeffs))

    start_time = time.time()
    model.train()
    loader = get_loader(datas, augment)
    losses = []
    for i, batch in enumerate(loader):
        total_loss = sdn_training_step(optimizer, model, cur_coeffs, batch,
                                       device, epoch)
        losses.append(total_loss)
        if i % 100 == 0:
            print("Loss: {}".format(total_loss))
    top1_test, top3_test = sdn_test(
        model, datas.aug_valid_loader if augment else datas.valid_loader,
        device)
    end_time = time.time()

    print('Top1 Valid accuracies: {}'.format(top1_test))
    print('Top3 Valid accuracies: {}'.format(top3_test))
    top1_train, top3_train = sdn_test(model, get_loader(datas, augment),
                                      device)
    print('Top1 Train accuracies: {}'.format(top1_train))
    print('Top3 Train accuracies: {}'.format(top3_train))

    epoch_time = int(end_time - start_time)
    print('Epoch took {} seconds.'.format(epoch_time))

    metrics['valid_top1_acc'].append(top1_test)
    metrics['valid_top3_acc'].append(top3_test)
    metrics['train_top1_acc'].append(top1_train)
    metrics['train_top3_acc'].append(top3_train)
    metrics['epoch_times'].append(epoch_time)
    metrics['lrs'].append(cur_lr)

    loss_moy = sum(losses) / len(losses)
    print("mean loss: {}".format(loss_moy))
    return loss_moy
Example #4
0
def sdn_train(model, data, epochs, optimizer, scheduler, device='cpu'):
    augment = model.augment_training
    metrics = {'epoch_times':[], 'test_top1_acc':[], 'test_top5_acc':[], 
        'train_top1_acc':[], 'train_top5_acc':[], 'lrs':[],
        'test_cumulative_acc': []}
    max_coeffs = np.array([0.15, 0.3, 0.45, 0.6, 0.75, 0.9]) # max tau_i --- C_i values

    if model.ic_only:
        print('sdn will be converted from a pre-trained CNN...  (The IC-only training)')
    else:
        print('sdn will be trained from scratch...(The SDN training)')

    for epoch in range(1, epochs+1):
        scheduler.step()
        cur_lr = af.get_lr(optimizer)
        print('\nEpoch: {}/{}'.format(epoch, epochs))
        print('Cur lr: {}'.format(cur_lr))

        if model.ic_only is False:
            # calculate the IC coeffs for this epoch for the weighted objective function
            cur_coeffs = 0.01 + epoch*(max_coeffs/epochs) # to calculate the tau at the currect epoch
            cur_coeffs = np.minimum(max_coeffs, cur_coeffs)
            print('Cur coeffs: {}'.format(cur_coeffs))

        start_time = time.time()
        model.train()
        loader = get_loader(data, augment)
        for i, batch in enumerate(loader):
            if model.ds:
                if model.ic_only is False:
                    # print('DS: True, IC: False')
                    total_loss = sdn_training_step_DS(optimizer, model, cur_coeffs, batch, device)
                else:
                    # print('DS: True, IC: True')
                    total_loss = sdn_ic_only_step_DS(optimizer, model, batch, device)
            else:
                if model.ic_only is False:
                    # print('DS: False, IC: False')
                    total_loss = sdn_training_step(optimizer, model, cur_coeffs, batch, device)
                else:
                    # print('DS: False, IC: True')
                    total_loss = sdn_ic_only_step(optimizer, model, batch, device)

            if i % 100 == 0:
                print('Loss: {} '.format(total_loss))

        top1_test, top5_test = sdn_test(model, data.val_loader, device)

        print('Top1 Test accuracies: {}'.format(top1_test))
        print('Top5 Test accuracies: {}'.format(top5_test))
        end_time = time.time()

        # check the detailed performance with sdn_get_detailed_results
        layer_correct, _, _, _ = sdn_get_detailed_results(model, loader=data.val_loader, device=device)
        layers = sorted(list(layer_correct.keys()))

        cum_correct = set()
        for layer in layers:
            cur_correct = layer_correct[layer]
            cum_correct = cum_correct | cur_correct
        print('Cumulative accuracies: {}'.format(len(cum_correct)/10000))
        metrics['test_cumulative_acc'].append(len(cum_correct)/10000)

        top1_train, top5_train = sdn_test(model, get_loader(data, augment), device)
        print('Top1 Train accuracies: {}'.format(top1_train))
        print('Top5 Train accuracies: {}'.format(top5_train))
        metrics['train_top1_acc'].append(top1_train)
        metrics['train_top5_acc'].append(top5_train)

        epoch_time = int(end_time-start_time)
        metrics['epoch_times'].append(epoch_time)
        print('Epoch took {} seconds.'.format(epoch_time))

        metrics['lrs'].append(cur_lr)

    return metrics
Example #5
0
def sdn_train(model, data, epochs, optimizer, scheduler, device='cpu'):
    augment = model.augment_training
    metrics = {
        'epoch_times': [],
        'test_top1_acc': [],
        'test_top5_acc': [],
        'train_top1_acc': [],
        'train_top5_acc': [],
        'lrs': []
    }
    max_coeffs = np.array([0.15, 0.3, 0.45, 0.6, 0.75,
                           0.9])  # max tau_i --- C_i values

    if model.ic_only:
        print(
            'sdn will be converted from a pre-trained CNN...  (The IC-only training)'
        )
    else:
        print('sdn will be trained from scratch...(The SDN training)')

    for epoch in range(1, epochs + 1):
        scheduler.step()
        cur_lr = af.get_lr(optimizer)
        print('\nEpoch: {}/{}'.format(epoch, epochs))
        print('Cur lr: {}'.format(cur_lr))

        if model.ic_only is False:
            # calculate the IC coeffs for this epoch for the weighted objective function
            cur_coeffs = 0.01 + epoch * (
                max_coeffs / epochs
            )  # to calculate the tau at the currect epoch
            cur_coeffs = np.minimum(max_coeffs, cur_coeffs)
            print('Cur coeffs: {}'.format(cur_coeffs))

        start_time = time.time()
        model.train()
        loader = get_loader(data, augment)
        for i, batch in enumerate(loader):
            if model.ic_only is False:
                total_loss = sdn_training_step(optimizer, model, cur_coeffs,
                                               batch, device)
            else:
                total_loss = sdn_ic_only_step(optimizer, model, batch, device)

            if i % 100 == 0:
                print('Loss: {}: '.format(total_loss))

        top1_test, top5_test = sdn_test(model, data.test_loader, device)

        print('Top1 Test accuracies: {}'.format(top1_test))
        print('Top5 Test accuracies: {}'.format(top5_test))
        end_time = time.time()

        metrics['test_top1_acc'].append(top1_test)
        metrics['test_top5_acc'].append(top5_test)

        top1_train, top5_train = sdn_test(model, get_loader(data, augment),
                                          device)
        print('Top1 Train accuracies: {}'.format(top1_train))
        print('Top5 Train accuracies: {}'.format(top5_train))
        metrics['train_top1_acc'].append(top1_train)
        metrics['train_top5_acc'].append(top5_train)

        epoch_time = int(end_time - start_time)
        metrics['epoch_times'].append(epoch_time)
        print('Epoch took {} seconds.'.format(epoch_time))

        metrics['lrs'].append(cur_lr)

    return metrics