vgg16.load_state_dict(torch.load(f))

    # device
    use_gpu_if_available = True
    use_CUDA = use_gpu_if_available and torch.cuda.is_available()
    if use_CUDA:
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')
    print('# using', device)

    nn = vgg16.to(device)
    nn.eval()

    # dataset & dataloader
    dT = ilsvrc2012.datasets('T')
    bsize = 64
    dl = torch.utils.data.DataLoader(dT,
                                     batch_size=bsize,
                                     shuffle=False,
                                     pin_memory=use_CUDA,
                                     num_workers=16)
    nbatch = len(dl)

    s1 = datetime.datetime.now()

    ncorrect = 0
    ntotal = 0
    with torch.no_grad():
        for ib, rv in enumerate(dl):
            X, lab = rv[0].to(device), rv[1].to(device)
    # loading VGG11-bn pretrained model
    #vgg16 = torchvision.models.vgg16(pretrained=False)
    vgg16 = DCTnet.DCTnet2v2()
    print(vgg16)

    # loading the parameters
    fnParam_prev = f'data/ex20210103_trainDCT2v2_epoch{epoch_prev:03d}.pth'
    if epoch_prev != 0:
        with open(fnParam_prev, mode='rb') as f:
            vgg16.load_state_dict(torch.load(f))

    nn = vgg16.to(device)
    nn.train()

    # dataset & dataloader
    dL = ilsvrc2012.datasets('L')
    bsize = 256
    dl = torch.utils.data.DataLoader(dL,
                                     batch_size=bsize,
                                     shuffle=True,
                                     pin_memory=use_CUDA,
                                     num_workers=16)
    nbatch = len(dl)

    # optimizer
    #optimizer = optim.SGD(nn.parameters(), lr=0.01, momentum=0.9)
    optimizer = optim.Adam(nn.parameters(), lr=0.0001, weight_decay=0.0)

    nb = 1024 // bsize
    ncList = np.zeros(nb, dtype=int)
    lossList = np.zeros(nb)