nb_dense_block = 4 growth_rate = 12 nb_filter = 16 dropout_rate = 0.2 # 0.0 for data augmentation weight_decay = 1E-4 # model = cnn_model() model = densenet.DenseNet(input_shape=img_dim, depth=depth, nb_dense_block=nb_dense_block, growth_rate=growth_rate, nb_filter=nb_filter, nb_layers_per_block=-1, bottleneck=True, reduction=0.0, dropout_rate=dropout_rate, weight_decay=weight_decay, include_top=True, weights=None, input_tensor=None, classes=NUM_CLASSES, activation='softmax') print("Model created") model.summary() # optimizer = Adam(lr=1e-4) # Using Adam instead of SGD to speed up training # optimizer = SGD(lr=learning_rate, decay=0.0, momentum=0.9, nesterov=True) optimizer = Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
def build_network(num_outputs, architecture, classification=False, name=None): """ Constructs a CNN. # Arguments: - num_outputs: number of final output units. - architecture: name of the architecture. See ARCHITECTURES for a list of possible values and README.md for descriptions. - classification: If `True`, the final layer will have a softmax activation, otherwise no activation at all. - name: The name of the network. # Returns: keras.models.Model """ if architecture.lower().endswith('-selu'): activation = 'selu' architecture = architecture[:-5] else: activation = 'relu' # CIFAR-100 architectures if architecture == 'resnet-32': return cifar_resnet.SmallResNet( 5, filters=[16, 32, 64] if classification else [32, 64, num_outputs], activation=activation, include_top=classification, classes=num_outputs, name=name) elif architecture == 'resnet-110': return cifar_resnet.SmallResNet(18, filters=[16, 32, 64], activation=activation, include_top=classification, classes=num_outputs, name=name) elif architecture == 'resnet-110-fc': return cifar_resnet.SmallResNet( 18, filters=[32, 64, 128], activation=activation, include_top=True, top_activation='softmax' if classification else None, classes=num_outputs, name=name) elif architecture == 'wrn-28-10': return wrn.create_wide_residual_network( (32, 32, 3), nb_classes=num_outputs, N=4, k=10, verbose=0, final_activation='softmax' if classification else None, name=name) elif architecture == 'densenet-100-12': return densenet.DenseNet( growth_rate=12, depth=100, nb_dense_block=3, bottleneck=False, nb_filter=16, reduction=0.0, classes=num_outputs, activation='softmax' if classification else None, name=name) elif architecture == 'densenet-100-24': return densenet.DenseNet( growth_rate=24, depth=100, nb_dense_block=3, bottleneck=False, nb_filter=16, reduction=0.0, classes=num_outputs, activation='softmax' if classification else None, name=name) elif architecture == 'densenet-bc-190-40': return densenet.DenseNet( growth_rate=40, depth=190, nb_dense_block=3, bottleneck=True, nb_filter=-1, reduction=0.5, classes=num_outputs, activation='softmax' if classification else None, name=name) elif architecture == 'pyramidnet-272-200': return cifar_pyramidnet.PyramidNet( 272, 200, bottleneck=True, activation=activation, classes=num_outputs, top_activation='softmax' if classification else None, name=name) elif architecture == 'pyramidnet-110-270': return cifar_pyramidnet.PyramidNet( 110, 270, bottleneck=False, activation=activation, classes=num_outputs, top_activation='softmax' if classification else None, name=name) elif architecture == 'simple': return plainnet.PlainNet( num_outputs, activation=activation, final_activation='softmax' if classification else None, name=name) # ImageNet architectures elif architecture == 'resnet-50': rn50 = keras.applications.ResNet50(include_top=False, weights=None) rn50_out = rn50.layers[-2].output if isinstance( rn50.layers[-1], keras.layers.AveragePooling2D) else rn50.layers[-1].output x = keras.layers.GlobalAvgPool2D(name='avg_pool')(rn50_out) x = keras.layers.Dense( num_outputs, activation='softmax' if classification else None, name='prob' if classification else 'embedding')(x) return keras.models.Model(rn50.inputs, x, name=name) elif architecture.startswith('rn'): import keras_resnet.models factories = { 'rn18': keras_resnet.models.ResNet18, 'rn34': keras_resnet.models.ResNet34, 'rn50': keras_resnet.models.ResNet50, 'rn101': keras_resnet.models.ResNet101, 'rn152': keras_resnet.models.ResNet152, 'rn200': keras_resnet.models.ResNet200 } input_ = keras.layers.Input((3, None, None)) if K.image_data_format( ) == 'channels_first' else keras.layers.Input((None, None, 3)) rn = factories[architecture](input_, include_top=classification, classes=num_outputs, freeze_bn=False, name=name) if not classification: x = keras.layers.GlobalAvgPool2D(name='avg_pool')(rn.outputs[-1]) x = keras.layers.Dense(num_outputs, name='embedding')(x) rn = keras.models.Model(input_, x, name=name) return rn elif architecture == 'nasnet-a': nasnet = keras.applications.NASNetLarge(include_top=False, input_shape=(224, 224, 3), weights=None, pooling='avg') x = keras.layers.Dense( num_outputs, activation='softmax' if classification else None, name='prob' if classification else 'embedding')(nasnet.output) return keras.models.Model(nasnet.inputs, x, name=name) else: raise ValueError( 'Unknown network architecture: {}'.format(architecture))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batchSize', type=int, default=64) parser.add_argument('--Epochs', type=int, default=175) parser.add_argument('--no-cuda', action='store_true') parser.add_argument('--save') parser.add_argument('--seed', type=int, default=1) parser.add_argument('--opt', type=str, default='sgd', choices=('sgd', 'adam', 'rmsprop')) args = parser.parse_args() #Check for cuda args.cuda = not args.no_cuda and torch.cuda.is_available() args.save = args.save or 'postprocessing' setproctitle.setproctitle(args.save) #manual seed on CPU or GPU torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) #Path for saving the progress if os.path.exists(args.save): shutil.rmtree(args.save) os.makedirs(args.save, exist_ok=True) # mean and std of the Fashion-MNIST train dataset images normMean = [0.2860405969887955] normStd = [0.35302424451492237] normTransform = transforms.Normalize(normMean, normStd) # Transforms : Random crop, random horizontal flip trainTransform = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normTransform ]) # # Transforms : RandomRotation, RandomVerticalFlip # trainTransform = transforms.Compose([ # transforms.RandomRotation(90), # transforms.RandomVerticalFlip(), # transforms.ToTensor(), # normTransform # ]) testTransform = transforms.Compose([transforms.ToTensor(), normTransform]) #Loading the datasets, if not found will be downloaded kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} loader_train = DataLoader(dataset.FashionMNIST(root='Fashion-MNIST', train=True, download=True, transform=trainTransform), batch_size=args.batchSize, shuffle=True, **kwargs) loader_test = DataLoader(dataset.FashionMNIST(root='Fashion-MNIST', train=False, download=True, transform=testTransform), batch_size=args.batchSize, shuffle=False, **kwargs) # Calling the Densenet dense_net = densenet.DenseNet(growthRate=15, depth=100, reduction=0.5, bottleneck=True, nClasses=10) print(' + Number of params: {}'.format( sum([p.data.nelement() for p in dense_net.parameters()]))) if args.cuda: dense_net = dense_net.cuda() else: print("no cuda") #Choosing the optimizer if args.opt == 'sgd': optimizer = optim.SGD(dense_net.parameters(), lr=1e-1, momentum=0.9, weight_decay=1e-4) elif args.opt == 'adam': optimizer = optim.Adam(dense_net.parameters(), weight_decay=1e-4) elif args.opt == 'rmsprop': optimizer = optim.RMSprop(dense_net.parameters(), weight_decay=1e-4) #Progress being saved to csv files pfile_train = open(os.path.join(args.save, 'train.csv'), 'w') pfile_test = open(os.path.join(args.save, 'test.csv'), 'w') # running the training loop for epoch in range(1, args.Epochs + 1): adjust_optimizer(args.opt, optimizer, epoch) train(args, epoch, dense_net, loader_train, optimizer, pfile_train) test(args, epoch, dense_net, loader_test, pfile_test) torch.save(dense_net, os.path.join(args.save, 'latest.pth')) os.system('./plot.py {} &'.format(args.save)) pfile_train.close() pfile_test.close() end = time.time() print(end - start)
Y_train = np_utils.to_categorical(trainY, nb_classes) Y_test = np_utils.to_categorical(testY, nb_classes) # GENERATOR generator = ImageDataGenerator(rotation_range=15, width_shift_range=5. / 32, height_shift_range=5. / 32, horizontal_flip=True) generator.fit(trainX, seed=0) # MODELS model2k = densenet.DenseNet(img_dim, classes=nb_classes, depth=depth, nb_dense_block=nb_dense_block, growth_rate=growth_rate, nb_filter=nb_filter, dropout_rate=dropout_rate, weights=None, bottleneck=True, growth_rate_factor=2) print("Models created") # 2K MODEL print("Building model 2k...") model2k.summary() optimizer = Adam(lr=1e-3) # Using Adam instead of SGD to speed up training model2k.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=["accuracy"]) print("Finished compiling") # Load model weights_file_2k = "weights/DenseNet-40-12-CIFAR10-2K.h5" if os.path.exists(weights_file_2k):
dest='batch_size', help='batch size', type=int, default=1) args = parser.parse_args() batch_size = args.batch_size gpu = args.gpu load = args.load # criterion = nn.CrossEntropyLoss() # optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.85, weight_decay=0.0005) net = densenet.DenseNet(num_classes=3, depth=46, growthRate=12, compressionRate=2, dropRate=0) if gpu: net = nn.DataParallel(net, device_ids=device_ids) net = net.cuda() if load: net.load_state_dict(torch.load('checkpoints/CP36.pth')) case_folder = '/mnt/zhoum/large_scale_test/nanjinggulou_299' slides = _get_case_dir(case_folder) i = 0 print('--------------Start Analyse----------------------')
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batchSz', type=int, default=64) parser.add_argument('--nEpochs', type=int, default=300) parser.add_argument('--no-cuda', action='store_true') parser.add_argument('--save') parser.add_argument('--seed', type=int, default=1) parser.add_argument('--opt', type=str, default='sgd', choices=('sgd', 'adam', 'rmsprop')) args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() args.save = args.save or 'work/dense_se_shuffle_net.base' setproctitle.setproctitle(args.save) torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) if os.path.exists(args.save): shutil.rmtree(args.save) os.makedirs(args.save, exist_ok=True) normMean = [0.49139968, 0.48215827, 0.44653124] normStd = [0.24703233, 0.24348505, 0.26158768] normTransform = transforms.Normalize(normMean, normStd) trainTransform = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normTransform ]) testTransform = transforms.Compose([ transforms.ToTensor(), normTransform ]) kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} trainLoader = DataLoader( dset.CIFAR10(root='cifar', train=True, download=True, transform=trainTransform), batch_size=args.batchSz, shuffle=True, **kwargs) testLoader = DataLoader( dset.CIFAR10(root='cifar', train=False, download=True, transform=testTransform), batch_size=args.batchSz, shuffle=False, **kwargs) net = densenet.DenseNet(growthRate=24, depth=88, reduction=0.5, bottleneck=True, nClasses=10) #net = cifarnet.ResAttentNet() print(' + Number of params: {}'.format( sum([p.data.nelement() for p in net.parameters()]))) if args.cuda: net = net.cuda() net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count())) cudnn.benchmark = True if args.opt == 'sgd': optimizer = optim.SGD(net.parameters(), lr=1e-1, momentum=0.9, weight_decay=0.00011) elif args.opt == 'adam': optimizer = optim.Adam(net.parameters(), weight_decay=1e-4) elif args.opt == 'rmsprop': optimizer = optim.RMSprop(net.parameters(), weight_decay=1e-4) trainF = open(os.path.join(args.save, 'train.csv'), 'w') testF = open(os.path.join(args.save, 'test.csv'), 'w') for epoch in range(1, args.nEpochs + 1): adjust_opt(args.opt, optimizer, epoch) train(args, epoch, net, trainLoader, optimizer, trainF) test(args, epoch, net, testLoader, optimizer, testF) torch.save(net, os.path.join(args.save, 'latest.pth')) os.system('./plot.py {} &'.format(args.save)) trainF.close() testF.close()
def train(): batch_size = args.batch_size num_class = args.num_class model = densenet.DenseNet(batch_size=batch_size, num_classes=num_class) global_step = tf.train.get_or_create_global_step() start_learning_rate = 0.0001 learning_rate = tf.train.exponential_decay(start_learning_rate, global_step, 100000, 0.98, staircase=False, name="learning_rate") update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize( loss=model.loss, global_step=global_step) train_op = tf.group([train_op, update_ops]) #optimizer=tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9).minimize(loss=model.loss) saver = tf.train.Saver() tf.summary.scalar(name='loss', tensor=model.loss) tf.summary.scalar(name='softmax_loss', tensor=model.softmax_loss) tf.summary.scalar(name='center_loss', tensor=model.center_loss) tf.summary.scalar(name='accuracy', tensor=model.accuracy) merge_summary_op = tf.summary.merge_all() sess_config = tf.ConfigProto(allow_soft_placement=True, ) with tf.Session(config=sess_config) as sess: ckpt = tf.train.latest_checkpoint(args.checkpoint_path) if ckpt: print("restore form %s " % (ckpt)) st = int(ckpt.split('-')[-1]) saver.restore(sess, ckpt) sess.run(global_step.assign(st)) else: tf.global_variables_initializer().run() summary_writer = tf.summary.FileWriter(args.checkpoint_path) summary_writer.add_graph(sess.graph) start_time = time.time() step = 0 iterator = data_generator.get_batch(args.train_image_list, batch_size) for batch in iterator: if batch is None: print("batch is None") continue image = batch[0] labels = batch[1] feed_dict = {model.images: image, model.labels: labels} _, loss, accuracy, summary, g_step, logits, lr = sess.run( [ train_op, model.loss, model.accuracy, merge_summary_op, global_step, model.logits, learning_rate ], feed_dict=feed_dict) if loss is None: print(np.max(logits), np.min(logits)) exit(0) if step % 10 == 0: print(np.max(logits), np.min(logits)) print("step:%d, lr: %f, loss: %f, accuracy: %f" % (g_step, lr, loss, accuracy)) if step % 100 == 0: summary_writer.add_summary(summary=summary, global_step=g_step) saver.save(sess=sess, save_path=os.path.join(args.checkpont_path, 'model'), global_step=g_step) step += 1 print("cost: ", time.time() - start_time)
img_dim = (img_channels, img_rows, img_cols) if K.image_dim_ordering() == "th" else (img_rows, img_cols, img_channels) depth = 40 nb_dense_block = 3 growth_rate = 12 nb_filter = 16 dropout_rate = 0.0 # 0.0 for data augmentation if __name__ == '__main__': model = densenet.DenseNet(img_dim, classes=nb_classes, depth=depth, nb_dense_block=nb_dense_block, growth_rate=growth_rate, nb_filter=nb_filter, dropout_rate=dropout_rate) print("Model created") model.summary() optimizer = Adam(lr=1e-4) # Using Adam instead of SGD to speed up training model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=["accuracy"]) print("Finished compiling") print("Building model...") (trainX, trainY), (testX, testY) = cifar10.load_data()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batch', type=int, default=8) parser.add_argument('--nEpochs', type=int, default=300) parser.add_argument('--no-cuda', action='store_true') parser.add_argument('--save') parser.add_argument('--seed', type=int, default=1) parser.add_argument('--opt', type=str, default='adam', choices=('sgd', 'adam', 'rmsprop')) args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() args.save = args.save or 'work/densenet.base' # setproctitle.setproctitle(args.save) torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) if os.path.exists(args.save): shutil.rmtree(args.save) os.makedirs(args.save, exist_ok=True) normMean = [0.49139968, 0.48215827, 0.44653124] normStd = [0.24703233, 0.24348505, 0.26158768] normTransform = transforms.Normalize(normMean, normStd) trainTransform = transforms.Compose([ transforms.RandomCrop(96, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normTransform ]) testTransform = transforms.Compose([transforms.ToTensor(), normTransform]) kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} trainLoader = DataLoader(dset.STL10(root='stl', split='train', download=True, transform=trainTransform), batch_size=args.batch, shuffle=True, **kwargs) testLoader = DataLoader(dset.STL10(root='stl', split='test', download=True, transform=testTransform), batch_size=args.batch, shuffle=False, **kwargs) net = densenet.DenseNet(growth_rate=12, depth=100, reduction=0.5, bottleneck=True, nClasses=10) print(' + Number of params: {}'.format( sum([p.data.nelement() for p in net.parameters()]))) if args.cuda: net = net.cuda() if args.opt == 'sgd': optimizer = optim.SGD(net.parameters(), lr=1e-1, momentum=0.9, weight_decay=1e-4) elif args.opt == 'adam': optimizer = optim.Adam(net.parameters(), weight_decay=1e-4) elif args.opt == 'rmsprop': optimizer = optim.RMSprop(net.parameters(), weight_decay=1e-4) trainF = open(os.path.join(args.save, 'train.csv'), 'w') testF = open(os.path.join(args.save, 'test.csv'), 'w') index = 0 writer = SummaryWriter() for epoch in range(1, args.nEpochs + 1): adjust_opt(args.opt, optimizer, epoch) index = train(args, epoch, net, trainLoader, optimizer, trainF, writer, index) loss2, err2 = test(args, epoch, net, testLoader, optimizer, testF) torch.save(net, os.path.join(args.save, 'latest.pth')) os.system('./plot.py {} &'.format(args.save)) writer.add_scalar('data/Test_Loss', loss2, epoch) writer.add_scalar('data/Test_Accuracy', 100 - err2, epoch) trainF.close() testF.close() writer.close()
out = self.lr(out) x = out out = self.conv2(out) out = self.bn1(out) out = self.lr(out) out = torch.cat((x, out), 1) x = out out = self.conv3(out) out = self.bn2(out) out = self.lr(out) out = torch.cat((x, out), 1) x = out out = self.conv4(out) out = self.bn3(out) out = self.lr(out) out = torch.cat((x, out), 1) out = self.conv5(out) out = self.final(out) #output = self.make_main(input) return out.view(-1, 1).squeeze(1) # Test import densenet #import densenet_notransition #model = densenet_notransition.DenseNet(12,50,0.5,10,1,4) model = densenet.DenseNet(12,50,0.5,10,1) #model = _netG(0) print(torch_summarize(model))
img_dim = (img_channels, img_rows, img_cols) if K.image_dim_ordering() == "th" else (img_rows, img_cols, img_channels) depth = 19 # 121 nb_dense_block = 3 # ?? growth_rate = 12 # ?? nb_filter = -1 # ?? dropout_rate = 0.0 # 0.0 for data augmentation ?? model = densenet.DenseNet(img_dim, depth=depth, nb_dense_block=nb_dense_block, growth_rate=growth_rate, nb_filter=nb_filter, dropout_rate=dropout_rate, classes=14, weights=None, activation='sigmoid') print("Model created") model.summary() optimizer = Adam(lr=1e-4) # Using Adam instead of SGD to speed up training model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=["accuracy"]) print("Finished compiling") print("Building model...") folder = '/Volumes/work/data/medical/CheXpert-v1.0-small'
def run_MURA( batch_size=8, # select a batch of samples to train a time nb_epoch=12, # times of iteration depth=22, # network depth nb_dense_block=4, # number of dense blocks nb_filter=16, # initial number of conv filter growth_rate=12, # numbers of new filters added by each layer dropout_rate=0.2, # dropout rate learning_rate=0.001, # learning rate weight_decay=1E-4, # wight decay plot_architecture=False # plot network architecture ): ################### # Data processing # ################### im_size = 320 # resize images path_train = '/home/yu/Documents/tensorflow/MURA/MURA-v1.1/train/XR_ELBOW' # the absolute path path_valid = '/home/yu/Documents/tensorflow/MURA/MURA-v1.1/valid/XR_ELBOW' X_train_path, Y_train = data_loader.load_path(root_path=path_train, size=im_size) X_valid_path, Y_valid = data_loader.load_path(root_path=path_valid, size=im_size) X_valid = data_loader.load_image(X_valid_path, im_size) # import path for validation Y_valid = np.asarray(Y_valid) nb_classes = 1 img_dim = (im_size, im_size, 1) #tuple channel last ################### # Construct model # ################### # model is one instance of class 'Model' model = densenet.DenseNet(nb_classes, img_dim, depth, nb_dense_block, growth_rate, nb_filter, dropout_rate=dropout_rate, weight_decay=weight_decay) # Model output model.summary() # Build optimizer opt = Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08) model.compile( loss='binary_crossentropy', optimizer=opt, # optimizer used to update gradient metrics=["accuracy"]) if plot_architecture: from keras.utils import plot_model plot_model(model, to_file='./figures/densenet_archi.png', show_shapes=True) #################### # Network training # #################### print("Start Training") list_train_loss = [] list_valid_loss = [] list_learning_rate = [] best_record = [100, 0, 100, 100] # record the best result start_time = datetime.datetime.now() for e in range(nb_epoch): if e == int(0.25 * nb_epoch): # update learning_rate K.set_value(model.optimizer.lr, np.float32(learning_rate / 10.)) if e == int(0.5 * nb_epoch): K.set_value(model.optimizer.lr, np.float32(learning_rate / 50.)) if e == int(0.75 * nb_epoch): K.set_value(model.optimizer.lr, np.float32(learning_rate / 100.)) split_size = batch_size num_splits = len( X_train_path ) / split_size # Calculate how many batches of training images arr_all = np.arange(len(X_train_path)).astype( int) # Return evenly spaced values within a given interval random.shuffle( arr_all ) # reshuffle, so the order of each training would be different # avoid local optimal solution # with shuffle open, it would be SGD arr_splits = np.array_split( arr_all, num_splits) # Divede the training images to num_splits batches l_train_loss = [] batch_train_loss = [] start = datetime.datetime.now() for i, batch_idx in enumerate( arr_splits): # i: how many batches, batch_idx: each batch X_batch_path, Y_batch = [], [ ] # X_batch_path is the path of images, Y_batch is the label for idx in batch_idx: X_batch_path.append(X_train_path[idx]) Y_batch.append(Y_train[idx]) X_batch = data_loader.load_image( Path=X_batch_path, size=im_size) # load data for training Y_batch = np.asarray( Y_batch ) # Transform the type of Y_batch as array, that is label train_logloss, train_acc = model.train_on_batch( X_batch, Y_batch) # train, return loss and accuracy l_train_loss.append([train_logloss, train_acc]) batch_train_loss.append([train_logloss, train_acc]) if i % 100 == 0: # 100 batches loss_1, acc_1 = np.mean(np.array(l_train_loss), 0) loss_2, acc_2 = np.mean(np.array(batch_train_loss), 0) batch_train_loss = [] print( '[Epoch {}/{}] [Batch {}/{}] [Time: {}] [all_batchs--> train_epoch_logloss: {:.5f}, train_epoch_acc:{:.5f}] ' .format(e + 1, nb_epoch, i, len(arr_splits), datetime.datetime.now() - start, loss_1, acc_1), '[this_100_batchs-->train_batchs_logloss: {:.5f}, train_batchs_acc:{:.5f}]' .format(loss_2, acc_2)) # validate valid_logloss, valid_acc = model.evaluate(X_valid, Y_valid, verbose=0, batch_size=64) list_train_loss.append(np.mean(np.array(l_train_loss), 0).tolist()) list_valid_loss.append([valid_logloss, valid_acc]) list_learning_rate.append(float(K.get_value(model.optimizer.lr))) # to convert numpy array to json serializable print('[Epoch %s/%s] [Time: %s, Total_time: %s]' % (e + 1, nb_epoch, datetime.datetime.now() - start, datetime.datetime.now() - start_time), end='') print( '[train_loss_and_acc:{:.5f} {:.5f}] [valid_loss_acc:{:.5f} {:.5f}]' .format(list_train_loss[-1][0], list_train_loss[-1][1], list_valid_loss[-1][0], list_valid_loss[-1][1])) d_log = {} d_log["batch_size"] = batch_size d_log["nb_epoch"] = nb_epoch d_log["optimizer"] = opt.get_config() d_log["train_loss"] = list_train_loss d_log["valid_loss"] = list_valid_loss d_log["learning_rate"] = list_learning_rate json_file = os.path.join('./log/experiment_log_MURA.json') with open(json_file, 'w') as fp: json.dump(d_log, fp, indent=4, sort_keys=True) record = [ valid_logloss, valid_acc, abs(valid_logloss - list_train_loss[-1][0]), abs(valid_acc - list_train_loss[-1][1]), ] if ((record[0] <= best_record[0]) & (record[1] >= best_record[1])): if e <= int(0.25 * nb_epoch) | (record[2] <= best_record[2]) & ( record[3] <= best_record[3]): best_record = record print('saving the best model:epoch', e + 1, best_record) model.save('save_models/best_MURA_modle@epochs{}.h5'.format(e + 1)) model.save('save_models/MURA_modle@epochs{}.h5'.format(e + 1))