def train(epoch, sample_weights=torch.Tensor(np.ones((50000,1))/50000.0)): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() end = time.time() adjust_learning_rate(optimizer, epoch) trainloader = build_train_dataset('sample_dataset', sample_weights) #print('==> Starting one epoch ...') for batch_idx, (data, target, _) in enumerate(trainloader): # measure data loading time data_time.update(time.time() - end) bin_op.binarization() data, target = Variable(data.cuda()), Variable(target.cuda()) optimizer.zero_grad() output = model(data) # backwarding loss = criterion(output, target) prec1, prec5 = accuracy(output.data, target.data, topk=(1, 5)) losses.update(loss.data[0], data.size(0)) top1.update(prec1[0], data.size(0)) top5.update(prec5[0], data.size(0)) loss.backward() # restore weights bin_op.restore() bin_op.updateBinaryGradWeight() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() # write tensorboard train_top5_write.add(prec5[0]) train_top1_write.add(prec1[0]) train_loss_write.add(loss.data[0]) train_avg_top1_write.add(top5.avg) if batch_idx % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'LR: {lr:.8f}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, batch_idx, len(trainloader), lr=optimizer.param_groups[0]['lr'],batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) gc.collect() return top1.avg
print('\n Train accuracy from selected model 2: {:.4f} \n'.format(1-final_loss_print_2)) if __name__ == '__main__': model, optimizer, criterion, criterion_seperated, bin_op = model_components() already_boosted = 0 boosting_iters = 3 train_batch_size = 250 test_batch_size = 250 testloader = build_test_dataset(batch_size=test_batch_size) trainloader = build_train_dataset('normal', None, batch_size=train_batch_size) total_classes = 1000 train_dataset_size = 1281167 test_dataset_size = 50000 for i in range(boosting_iters): print("Bagging "+str(i)) if i > already_boosted: sample_weights_new = np.random.choice(train_dataset_size, size=train_dataset_size) sample_models(boosting_iters=i, sample_weights=sample_weights_new, retrain_epoch=args.retrain_epochs) print('%s %d-th Sample done !' % (str(datetime.datetime.utcnow()), i)) #alpha_m_mat = torch.cat((alpha_m_mat, alpha_m), 1)
def sample_models(boosting_iters, sample_weights, retrain_epoch=100): print( str(datetime.datetime.utcnow()) + " Start boosting iter: " + str(boosting_iters)) print('===> Start retraining ...') best_acc = -1 reset_learning_rate(optimizer, lr=args.lr) scheduler = optim.lr_scheduler.StepLR( optimizer, step_size=25, gamma=0.1, last_epoch=-1) # this is jiecao's implement if boosting_iters == 0: best_acc = eval_test(str(boosting_iters), best_acc, 0) else: reset_model() for epoch in range(1, retrain_epoch + 1): now_acc = train(epoch, scheduler, sample_weights) print("Epoch " + str(epoch) + ": " + str(now_acc)) if epoch % 1 == 0: best_acc = eval_test(str(boosting_iters), best_acc, now_acc) pretrained_model = torch.load(args.root_dir + str(boosting_iters) + '.pth.tar') model.load_state_dict(pretrained_model['state_dict']) model.eval() bin_op.binarization() pred_output = torch.Tensor(np.zeros( (train_dataset_size, 1))) #torch tensor in cpu label_in_tensor = torch.Tensor(np.zeros( (train_dataset_size, ))) #torch tensor in cpu trainloader = build_train_dataset('normal', None, batch_size=256) for batch_idx, (data, target) in tqdm(enumerate(trainloader)): batch_size = target.size(0) batch_sample_weights = sample_weights[batch_idx * batch_size:(batch_idx + 1) * batch_size] batch_softmax_output = get_error_output(data, target, batch_sample_weights) pred_output[batch_idx * batch_size:(batch_idx + 1) * batch_size, :] = batch_softmax_output.max( 1, keepdim=True)[1].data.cpu() label_in_tensor[batch_idx * batch_size:(batch_idx + 1) * batch_size] = target bin_op.restore() np.save(args.root_dir + 'pred_output_' + str(boosting_iters), pred_output.numpy()) np.save(args.root_dir + 'label_in_tensor_' + str(boosting_iters), label_in_tensor.numpy()) if args.boosting_mode == 'A': best_sample_weights, best_alpha_m = boostA(pred_output, label_in_tensor, sample_weights) elif args.boosting_mode == 'B': best_sample_weights, best_alpha_m = boostB(pred_output, label_in_tensor, sample_weights) elif args.boosting_mode == 'C': best_sample_weights, best_alpha_m = boostC(pred_output, label_in_tensor, sample_weights) else: raise ValueError('Wrong Boosting Mode !') np.save(args.root_dir + 'sample_weights_' + str(boosting_iters), best_sample_weights.numpy()) return best_sample_weights, best_alpha_m