def main(): global args, best_prec1 args = parser.parse_args() # create model if args.type == "dn3": model = dn.DenseNet3(args.layers, 120, args.growth, reduction=args.reduce, bottleneck=args.bottleneck) elif args.type == "resnet": model = rn.ResNetTransfer(120) else: raise Exception('No such model exists - choose dn3 or resnet') # get the number of model parameters print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) # for training on multiple GPUs. # Use CUDA_VISIBLE_DEVICES=0,1 to specify which GPUs to use # model = torch.nn.DataParallel(model).cuda() model = model.cuda() cudnn.benchmark = True if os.path.isfile(args.model): checkpoint = torch.load(args.model) model.load_state_dict(checkpoint['state_dict']) else: sys.exit() model.eval() out = pd.read_csv('../sample_submission.csv') print("hi") print(out.head()) out = out.set_index("id") for input, img in output_dataset: input = input.cuda() input_var = torch.autograd.Variable(input, volatile=True).unsqueeze(0) # compute output output = torch.nn.functional.softmax(model(input_var)) #print(output[0].data.numpy()) print(img) print(output[0].data.cpu().numpy()) out.loc[img] = output[0].data.cpu().numpy() out.to_csv("out.csv")
def get_model(args): if args.model == 'resnet18': model = resnet.ResNet18(num_classes=10) elif args.model == 'resnet50': model = resnet.ResNet50(num_classes=10) elif args.model == 'densenet40': model = densenet.DenseNet3(depth=40, num_classes=10) elif args.model == 'vgg16': model = vgg.VGG('VGG16') return model
def main(): global args, best_prec1 args = parser.parse_args() # if args.tensorboard: configure("runs/%s"%(args.name)) # Data loading code normalize = transforms.Normalize( mean=[x / 255.0 for x in [125.3, 123.0, 113.9]], std=[x / 255.0 for x in [63.0, 62.1, 66.7]]) # Data augmentation if args.augment: transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]) else: transform_train = transforms.Compose([ transforms.ToTensor(), normalize, ]) # transform_test = transforms.Compose([ # transforms.ToTensor(), # normalize # ]) #normalize = transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]) # load Data train_dataset = datasets.ImageFolder(train_dirs, transform_train) #pdb.set_trace() val_dataset = datasets.ImageFolder(val_dirs, transform_train) kwargs = {'num_workers': 0, 'pin_memory': True} train_loader = torch.utils.data.DataLoader( #datasets.CIFAR10('../data', train=True, download=True,transform=transform_train), train_dataset, batch_size=args.batch_size, shuffle=True, **kwargs) val_loader = torch.utils.data.DataLoader( #datasets.CIFAR10('../data', train=False, transform=transform_test), val_dataset, batch_size=args.batch_size, shuffle=True, **kwargs) # create model model = dn.DenseNet3(args.layers, 3, args.growth, reduction=args.reduce, bottleneck=args.bottleneck, dropRate=args.droprate, small_inputs=False) # get the number of model parameters print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) # for training on multiple GPUs. # Use CUDA_VISIBLE_DEVICES=0,1 to specify which GPUs to use # model = torch.nn.DataParallel(model).cuda() model = model.cuda() # for network visualization in tensorboard dummy_input = torch.rand(20, 3, 200, 200).cuda() writer.add_graph(model, (dummy_input, )) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # define loss function (criterion) and optimizer # criterion = nn.CrossEntropyLoss().cuda() criterion = FocalLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, nesterov=True, weight_decay=args.weight_decay) for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set prec1 = validate(val_loader, model, criterion, epoch) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best) print('Best accuracy: ', best_prec1)
def main(): global args, best_acc, suffix datasize = [1000, 2000, 4000, 8000, 16000, 32000, 50000] args = parser.parse_args() if args.tensorboard: configure("runs/%s" % (args.name)) normalize = transforms.Normalize( mean=[x / 255.0 for x in [125.3, 123.0, 113.9]], std=[x / 255.0 for x in [63.0, 62.1, 66.7]]) if args.augment: transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]) else: transform_train = transforms.Compose([ transforms.ToTensor(), normalize, ]) transform_test = transforms.Compose([transforms.ToTensor(), normalize]) dataset_train = datasets.CIFAR10('/home/gh349/bicheng/data', train=True, download=True, transform=transform_train) dataset_test = datasets.CIFAR10('/home/gh349/bicheng/data/', train=False, transform=transform_test) for size in datasize: suffix = " - " + str(size) tmp_train = random.sample(dataset_train, size) tmp_test = dataset_test kwargs = {'num_workers': 12, 'pin_memory': True} train_loader = torch.utils.data.DataLoader(tmp_train, batch_size=args.batch_size, shuffle=True, **kwargs) val_loader = torch.utils.data.DataLoader(tmp_test, batch_size=args.batch_size, shuffle=True, **kwargs) # create model model = dn.DenseNet3(args.layers, 10, args.growth, reduction=args.reduce, bottleneck=args.bottleneck, dropRate=args.droprate) # get the number of model parameters print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) # for training on multiple GPUs. # Use CUDA_VISIBLE_DEVICES=0,1 to specify which GPUs to use model = model.cuda() cudnn.benchmark = True # define loss function (criterion) and pptimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) best_acc = 0 for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) # train for one epoch acc_train = train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set acc_val = validate(val_loader, model, criterion, epoch) if args.tensorboard: log_value("generalization error" + suffix, acc_train - acc_val, epoch) # remember best precision and save checkpoint is_best = acc_val > best_acc best_acc = max(acc_val, best_acc) print('Best accuracy' + suffix + ': ', best_acc) if args.tensorboard: log_value('dataset accuracy', best_acc, size)
def main(): global args, best_prec1 args = parser.parse_args() cudnn.benchmark = True #if args.structure == '' # Data loading code traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) print args.batch_size train_loader = torch.utils.data.DataLoader( datasets.ImageFolder(traindir, transforms.Compose([ transforms.RandomSizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader( datasets.ImageFolder(valdir, transforms.Compose([ transforms.Scale(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # create model model = dn.DenseNet3(args.layers, 1000, args.growth, reduction=args.reduce, bottleneck=args.bottleneck, dropRate=args.droprate) # get the number of model parameters print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) # MultiGPU Training gpu_list = [int(idx) for idx in args.gpus.split(',')] print gpu_list model = torch.nn.DataParallel(model, device_ids=gpu_list).cuda() # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) ''' if args.evaluate: validate(val_loader, model, criterion) return ''' for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set prec1 = validate(val_loader, model, criterion) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint({ 'epoch': epoch + 1, #'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best)
def main(): global args, best_prec1, writer args = parser.parse_args() #if args.tensorboard: configure("runs/%s"%(args.name)) writer = SummaryWriter("runs/%s" % (args.name)) # Data loading code normalize = transforms.Normalize( mean=[x / 255.0 for x in [125.3, 123.0, 113.9]], std=[x / 255.0 for x in [63.0, 62.1, 66.7]]) if args.augment: transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]) else: transform_train = transforms.Compose([ transforms.ToTensor(), normalize, ]) transform_test = transforms.Compose([transforms.ToTensor(), normalize]) kwargs = {'num_workers': 1, 'pin_memory': True} droprate = args.droprate if args.sdr: droprate = 0.0 if args.dataset == 'C10': train_loader = torch.utils.data.DataLoader(datasets.CIFAR10( '../data', train=True, download=True, transform=transform_train), batch_size=args.batch_size, shuffle=True, **kwargs) val_loader = torch.utils.data.DataLoader(datasets.CIFAR10( '../data', train=False, transform=transform_test), batch_size=args.batch_size, shuffle=True, **kwargs) model = dn.DenseNet3(args.layers, 10, args.growth, reduction=args.reduce, bottleneck=args.bottleneck, dropRate=droprate, use_sdr=args.sdr, beta=args.beta, zeta=args.zeta, zeta_drop=args.zeta_drop) elif args.dataset == 'C100': train_loader = torch.utils.data.DataLoader(datasets.CIFAR100( '../data', train=True, download=True, transform=transform_train), batch_size=args.batch_size, shuffle=True, **kwargs) val_loader = torch.utils.data.DataLoader(datasets.CIFAR100( '../data', train=False, transform=transform_test), batch_size=args.batch_size, shuffle=True, **kwargs) model = dn.DenseNet3(args.layers, 100, args.growth, reduction=args.reduce, bottleneck=args.bottleneck, dropRate=droprate, use_sdr=args.sdr, beta=args.beta, zeta=args.zeta, zeta_drop=args.zeta_drop) elif args.dataset == 'ImageNet': #from imagenet_seq.data.Loader import ImagenetLoader #import imagenet_seq #train_loader = imagenet_seq.data.Loader('train', batch_size=args.batch_size, # num_workers=1) #val_loader = imagenet_seq.data.Loader('val', batch_size=args.batch_size, # num_workers=1) # Data loading code if args.layers not in [121, 161, 169, 201]: print("Please use 121, 161, 169, or 201 layers fori " + "ImageNet training.") system.exit(1) import densenet_imagenet as dn_im traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if args.augment: train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) else: train_dataset = transforms.Compose([ transforms.ToTensor(), normalize, ]) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=2, pin_memory=False, sampler=None) val_loader = torch.utils.data.DataLoader(datasets.ImageFolder( valdir, transforms.Compose([ transforms.Resize(224), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=2, pin_memory=False) # model = dn.DenseNet3(args.layers, 1000, args.growth, reduction=args.reduce, # bottleneck=args.bottleneck, dropRate=droprate, # use_sdr=args.sdr, beta=args.beta, zeta=args.zeta, # zeta_drop = args.zeta_drop) if args.layers == 121: model = dn_im.DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 24, 16), drop_rate=droprate) elif args.layers == 161: model = dn_im.DenseNet(num_init_features=64, growth_rate=48, block_config=(6, 12, 36, 24), drop_rate=droprate) elif args.layers == 169: model = dn_im.DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 32, 32), drop_rate=droprate) elif args.layers == 201: model = dn_im.DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 48, 32), drop_rate=droprate) # get the number of model parameters print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) # for training on multiple GPUs. # Use CUDA_VISIBLE_DEVICES=0,1 to specify which GPUs to use if args.parallel: #model = torch.nn.DataParallel(model, device_ids=[0, 1]).cuda() model = torch.nn.DataParallel(model).cuda() else: model = model.cuda() if args.sdr: model.sdr = args.sdr model.beta = args.beta model.zeta = args.zeta model.zeta_orig = args.zeta model.zeta_drop = args.zeta_drop model.data_swap = [] model.sds = [] else: model.sdr = False if args.logfiles: rundir = "runs/%s" % (args.name) init_weights = [np.asarray(p.data) for p in model.parameters()] fname1 = rundir + "/init_weights.npy" np.save(fname1, init_weights) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # define loss function (criterion) and pptimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, nesterov=True, weight_decay=args.weight_decay) print("Training...") t_elapsed = 0 for epoch in range(args.start_epoch, args.epochs): t_start = time.time() adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) if model.sdr: print("zeta value", str(model.zeta)) # evaluate on validation set prec1 = validate(val_loader, model, criterion, epoch) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best) if model.sdr and (epoch + 1) % model.zeta_drop == 0: #parabolic annealing if args.layers < 200: model.zeta = model.zeta_orig**((epoch + 1) // model.zeta_drop) #exponential annealing #larger networks benefit from longer exposure to noise else: lambda_ = 0.1 model.zeta = model.zeta_orig * np.power( np.e, -(lambda_ * epoch)) #for p in model.parameters(): # print(p) #print out time taken and estimated time to completion t_end = time.time() t_total = t_end - t_start m, s = divmod(t_total, 60) h, m = divmod(m, 60) d, h = divmod(h, 24) print("Time for epoch " + str(epoch) + ": %02d:%02d:%02d:%02d" % (d, h, m, s)) t_elapsed += t_total m, s = divmod(t_elapsed, 60) h, m = divmod(m, 60) d, h = divmod(h, 24) print("Time elapsed: %02d:%02d:%02d:%02d\n" % (d, h, m, s)) t_left = (args.epochs - epoch - 1) * t_total m, s = divmod(t_left, 60) h, m = divmod(m, 60) d, h = divmod(h, 24) print("Estimated time to completion: %02d:%02d:%02d:%02d" % (d, h, m, s)) if args.activations and (epoch == 0 or (epoch + 1) % 10 == 0): rundir = "runs/%s" % (args.name) sampled = [np.asarray(p.cpu().numpy()) for p in model.activations] sampled = np.asarray(model.activations) fname1 = rundir + "/acts_" + str(epoch) + ".npy" np.save(fname1, sampled) print('Best accuracy: ', best_prec1)
def main(): global args, best_prec1 args = parser.parse_args() if args.tensorboard: configure("runs/%s" % (args.name)) # Data loading code normalize = transforms.Normalize( mean=[x / 255.0 for x in [125.3, 123.0, 113.9]], std=[x / 255.0 for x in [63.0, 62.1, 66.7]]) if args.augment: transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]) else: transform_train = transforms.Compose([ transforms.ToTensor(), normalize, ]) transform_test = transforms.Compose([transforms.ToTensor(), normalize]) kwargs = {'num_workers': 1, 'pin_memory': True} train_loader = torch.utils.data.DataLoader(datasets.CIFAR10( '../data', train=True, download=True, transform=transform_train), batch_size=args.batch_size, shuffle=True, **kwargs) val_loader = torch.utils.data.DataLoader(datasets.CIFAR10( '../data', train=False, transform=transform_test), batch_size=args.batch_size, shuffle=True, **kwargs) # create model model = dn.DenseNet3(args.layers, 10, args.growth, reduction=args.reduce, bottleneck=args.bottleneck, dropRate=args.droprate) # get the number of model parameters print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) # for training on multiple GPUs. # Use CUDA_VISIBLE_DEVICES=0,1 to specify which GPUs to use # model = torch.nn.DataParallel(model).cuda() model = model.cuda() # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # define loss function (criterion) and pptimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) file_path = os.path.join("./runs", args.name) if not os.path.exists(file_path): os.makedirs(file_path) file1 = open(file_path + '/train_res', 'w') file2 = open(file_path + '/val_res', 'w') for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) # train for one epoch prec_avg = train(train_loader, model, criterion, optimizer, epoch) file1.write(str(prec_avg)) file2.write('\n') # evaluate on validation set prec1 = validate(val_loader, model, criterion, epoch) file2.write(str(prec1)) file2.write('\n') # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best) print('Best accuracy: ', best_prec1) file1.close() file2.close()
def main(): global args, best_prec1 args = parser.parse_args() if args.tensorboard: configure("runs/%s" % (args.name)) # Data loading code normalize = transforms.Normalize( mean=[x / 255.0 for x in [125.3, 123.0, 113.9]], std=[x / 255.0 for x in [63.0, 62.1, 66.7]]) if args.type == "dn3": model = dn.DenseNet3(args.layers, 120, args.growth, reduction=args.reduce, bottleneck=args.bottleneck, dropRate=args.droprate) elif args.type == "resnet": model = rn.ResNetTransfer(120, dropRate=args.droprate) else: raise Exception('No such model exists - choose dn3 or resnet') # get the number of model parameters print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) # for training on multiple GPUs. # Use CUDA_VISIBLE_DEVICES=0,1 to specify which GPUs to use # model = torch.nn.DataParallel(model).cuda() model = model.cuda() # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # define loss function (criterion) and pptimizer criterion = nn.CrossEntropyLoss().cuda() if args.type == "dn3": optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) else: optimizer = torch.optim.SGD(model.resnet.fc.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader(224), model, criterion, optimizer, epoch) # evaluate on validation set prec1 = validate(val_loader(224), model, criterion, epoch) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best) print('Best accuracy: ', best_prec1)
net.load_state_dict(checkpoint['state_dict']) print("=> loaded Unet++ checkpoint '{}' (epoch {})".format( args.model, checkpoint['epoch'])) else: print( "=> not best model. Suppose loading official Unet++ model...") net.load_state_dict(torch.load(args.model)) else: net.cpu() net.load_state_dict(torch.load(args.model, map_location='cpu')) print("Using CPU version of the net, this may be very slow") densenet = dn.DenseNet3(16, 3, args.growth, bottleneck=args.bottleneck, small_inputs=False) densenet = densenet.cuda() if args.usedense: if args.growth == 12: if os.path.isfile(dense12): # print("=> loading checkpoint '{}'".format(dense12)) checkpoint = torch.load(dense12) args.start_epoch = checkpoint['epoch'] #best_prec1 = checkpoint['best_prec1'] densenet.load_state_dict(checkpoint['state_dict']) print("=> loaded Densenet checkpoint '{}' (epoch {})".format( dense12, checkpoint['epoch'])) else: