Exemple #1
0
nb_dense_block = 4
growth_rate = 12
nb_filter = 16
dropout_rate = 0.2  # 0.0 for data augmentation
weight_decay = 1E-4

# model = cnn_model()

model = densenet.DenseNet(input_shape=img_dim,
                          depth=depth,
                          nb_dense_block=nb_dense_block,
                          growth_rate=growth_rate,
                          nb_filter=nb_filter,
                          nb_layers_per_block=-1,
                          bottleneck=True,
                          reduction=0.0,
                          dropout_rate=dropout_rate,
                          weight_decay=weight_decay,
                          include_top=True,
                          weights=None,
                          input_tensor=None,
                          classes=NUM_CLASSES,
                          activation='softmax')
print("Model created")

model.summary()

# optimizer = Adam(lr=1e-4) # Using Adam instead of SGD to speed up training
# optimizer = SGD(lr=learning_rate, decay=0.0, momentum=0.9, nesterov=True)
optimizer = Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
Exemple #2
0
def build_network(num_outputs, architecture, classification=False, name=None):
    """ Constructs a CNN.
    
    # Arguments:
    
    - num_outputs: number of final output units.
    
    - architecture: name of the architecture. See ARCHITECTURES for a list of possible values and README.md for descriptions.
    
    - classification: If `True`, the final layer will have a softmax activation, otherwise no activation at all.
    
    - name: The name of the network.
    
    # Returns:
        keras.models.Model
    """

    if architecture.lower().endswith('-selu'):
        activation = 'selu'
        architecture = architecture[:-5]
    else:
        activation = 'relu'

    # CIFAR-100 architectures

    if architecture == 'resnet-32':

        return cifar_resnet.SmallResNet(
            5,
            filters=[16, 32, 64] if classification else [32, 64, num_outputs],
            activation=activation,
            include_top=classification,
            classes=num_outputs,
            name=name)

    elif architecture == 'resnet-110':

        return cifar_resnet.SmallResNet(18,
                                        filters=[16, 32, 64],
                                        activation=activation,
                                        include_top=classification,
                                        classes=num_outputs,
                                        name=name)

    elif architecture == 'resnet-110-fc':

        return cifar_resnet.SmallResNet(
            18,
            filters=[32, 64, 128],
            activation=activation,
            include_top=True,
            top_activation='softmax' if classification else None,
            classes=num_outputs,
            name=name)

    elif architecture == 'wrn-28-10':

        return wrn.create_wide_residual_network(
            (32, 32, 3),
            nb_classes=num_outputs,
            N=4,
            k=10,
            verbose=0,
            final_activation='softmax' if classification else None,
            name=name)

    elif architecture == 'densenet-100-12':

        return densenet.DenseNet(
            growth_rate=12,
            depth=100,
            nb_dense_block=3,
            bottleneck=False,
            nb_filter=16,
            reduction=0.0,
            classes=num_outputs,
            activation='softmax' if classification else None,
            name=name)

    elif architecture == 'densenet-100-24':

        return densenet.DenseNet(
            growth_rate=24,
            depth=100,
            nb_dense_block=3,
            bottleneck=False,
            nb_filter=16,
            reduction=0.0,
            classes=num_outputs,
            activation='softmax' if classification else None,
            name=name)

    elif architecture == 'densenet-bc-190-40':

        return densenet.DenseNet(
            growth_rate=40,
            depth=190,
            nb_dense_block=3,
            bottleneck=True,
            nb_filter=-1,
            reduction=0.5,
            classes=num_outputs,
            activation='softmax' if classification else None,
            name=name)

    elif architecture == 'pyramidnet-272-200':

        return cifar_pyramidnet.PyramidNet(
            272,
            200,
            bottleneck=True,
            activation=activation,
            classes=num_outputs,
            top_activation='softmax' if classification else None,
            name=name)

    elif architecture == 'pyramidnet-110-270':

        return cifar_pyramidnet.PyramidNet(
            110,
            270,
            bottleneck=False,
            activation=activation,
            classes=num_outputs,
            top_activation='softmax' if classification else None,
            name=name)

    elif architecture == 'simple':

        return plainnet.PlainNet(
            num_outputs,
            activation=activation,
            final_activation='softmax' if classification else None,
            name=name)

    # ImageNet architectures

    elif architecture == 'resnet-50':

        rn50 = keras.applications.ResNet50(include_top=False, weights=None)
        rn50_out = rn50.layers[-2].output if isinstance(
            rn50.layers[-1],
            keras.layers.AveragePooling2D) else rn50.layers[-1].output
        x = keras.layers.GlobalAvgPool2D(name='avg_pool')(rn50_out)
        x = keras.layers.Dense(
            num_outputs,
            activation='softmax' if classification else None,
            name='prob' if classification else 'embedding')(x)
        return keras.models.Model(rn50.inputs, x, name=name)

    elif architecture.startswith('rn'):

        import keras_resnet.models
        factories = {
            'rn18': keras_resnet.models.ResNet18,
            'rn34': keras_resnet.models.ResNet34,
            'rn50': keras_resnet.models.ResNet50,
            'rn101': keras_resnet.models.ResNet101,
            'rn152': keras_resnet.models.ResNet152,
            'rn200': keras_resnet.models.ResNet200
        }
        input_ = keras.layers.Input((3, None, None)) if K.image_data_format(
        ) == 'channels_first' else keras.layers.Input((None, None, 3))
        rn = factories[architecture](input_,
                                     include_top=classification,
                                     classes=num_outputs,
                                     freeze_bn=False,
                                     name=name)
        if not classification:
            x = keras.layers.GlobalAvgPool2D(name='avg_pool')(rn.outputs[-1])
            x = keras.layers.Dense(num_outputs, name='embedding')(x)
            rn = keras.models.Model(input_, x, name=name)
        return rn

    elif architecture == 'nasnet-a':

        nasnet = keras.applications.NASNetLarge(include_top=False,
                                                input_shape=(224, 224, 3),
                                                weights=None,
                                                pooling='avg')
        x = keras.layers.Dense(
            num_outputs,
            activation='softmax' if classification else None,
            name='prob' if classification else 'embedding')(nasnet.output)
        return keras.models.Model(nasnet.inputs, x, name=name)

    else:

        raise ValueError(
            'Unknown network architecture: {}'.format(architecture))
Exemple #3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--batchSize', type=int, default=64)
    parser.add_argument('--Epochs', type=int, default=175)
    parser.add_argument('--no-cuda', action='store_true')
    parser.add_argument('--save')
    parser.add_argument('--seed', type=int, default=1)
    parser.add_argument('--opt',
                        type=str,
                        default='sgd',
                        choices=('sgd', 'adam', 'rmsprop'))
    args = parser.parse_args()

    #Check for cuda
    args.cuda = not args.no_cuda and torch.cuda.is_available()
    args.save = args.save or 'postprocessing'
    setproctitle.setproctitle(args.save)

    #manual seed on CPU or GPU
    torch.manual_seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)

    #Path for saving the progress
    if os.path.exists(args.save):
        shutil.rmtree(args.save)
    os.makedirs(args.save, exist_ok=True)

    # mean and std of the Fashion-MNIST train dataset images
    normMean = [0.2860405969887955]
    normStd = [0.35302424451492237]
    normTransform = transforms.Normalize(normMean, normStd)

    # Transforms : Random crop, random horizontal flip

    trainTransform = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(), normTransform
    ])

    # # Transforms : RandomRotation, RandomVerticalFlip
    # trainTransform = transforms.Compose([
    #     transforms.RandomRotation(90),
    #     transforms.RandomVerticalFlip(),
    #     transforms.ToTensor(),
    #     normTransform
    # ])
    testTransform = transforms.Compose([transforms.ToTensor(), normTransform])

    #Loading the datasets, if not found will be downloaded
    kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
    loader_train = DataLoader(dataset.FashionMNIST(root='Fashion-MNIST',
                                                   train=True,
                                                   download=True,
                                                   transform=trainTransform),
                              batch_size=args.batchSize,
                              shuffle=True,
                              **kwargs)
    loader_test = DataLoader(dataset.FashionMNIST(root='Fashion-MNIST',
                                                  train=False,
                                                  download=True,
                                                  transform=testTransform),
                             batch_size=args.batchSize,
                             shuffle=False,
                             **kwargs)

    # Calling the  Densenet
    dense_net = densenet.DenseNet(growthRate=15,
                                  depth=100,
                                  reduction=0.5,
                                  bottleneck=True,
                                  nClasses=10)

    print('  + Number of params: {}'.format(
        sum([p.data.nelement() for p in dense_net.parameters()])))
    if args.cuda:
        dense_net = dense_net.cuda()
    else:
        print("no cuda")

    #Choosing the optimizer
    if args.opt == 'sgd':
        optimizer = optim.SGD(dense_net.parameters(),
                              lr=1e-1,
                              momentum=0.9,
                              weight_decay=1e-4)
    elif args.opt == 'adam':
        optimizer = optim.Adam(dense_net.parameters(), weight_decay=1e-4)
    elif args.opt == 'rmsprop':
        optimizer = optim.RMSprop(dense_net.parameters(), weight_decay=1e-4)

    #Progress being saved to csv files
    pfile_train = open(os.path.join(args.save, 'train.csv'), 'w')
    pfile_test = open(os.path.join(args.save, 'test.csv'), 'w')

    # running the training loop
    for epoch in range(1, args.Epochs + 1):
        adjust_optimizer(args.opt, optimizer, epoch)
        train(args, epoch, dense_net, loader_train, optimizer, pfile_train)
        test(args, epoch, dense_net, loader_test, pfile_test)
        torch.save(dense_net, os.path.join(args.save, 'latest.pth'))
        os.system('./plot.py {} &'.format(args.save))

    pfile_train.close()
    pfile_test.close()
    end = time.time()
    print(end - start)
Exemple #4
0
Y_train = np_utils.to_categorical(trainY, nb_classes)
Y_test = np_utils.to_categorical(testY, nb_classes)

# GENERATOR
generator = ImageDataGenerator(rotation_range=15,
                               width_shift_range=5. / 32,
                               height_shift_range=5. / 32,
                               horizontal_flip=True)

generator.fit(trainX, seed=0)

# MODELS

model2k = densenet.DenseNet(img_dim, classes=nb_classes, depth=depth, nb_dense_block=nb_dense_block,
                            growth_rate=growth_rate, nb_filter=nb_filter, dropout_rate=dropout_rate, weights=None,
                            bottleneck=True, growth_rate_factor=2)

print("Models created")

# 2K MODEL
print("Building model 2k...")
model2k.summary()
optimizer = Adam(lr=1e-3)  # Using Adam instead of SGD to speed up training
model2k.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=["accuracy"])
print("Finished compiling")


# Load model
weights_file_2k = "weights/DenseNet-40-12-CIFAR10-2K.h5"
if os.path.exists(weights_file_2k):
Exemple #5
0
                        dest='batch_size',
                        help='batch size',
                        type=int,
                        default=1)
    args = parser.parse_args()
    batch_size = args.batch_size
    gpu = args.gpu
    load = args.load

    # criterion = nn.CrossEntropyLoss()

    # optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.85, weight_decay=0.0005)

    net = densenet.DenseNet(num_classes=3,
                            depth=46,
                            growthRate=12,
                            compressionRate=2,
                            dropRate=0)

    if gpu:
        net = nn.DataParallel(net, device_ids=device_ids)
        net = net.cuda()

    if load:
        net.load_state_dict(torch.load('checkpoints/CP36.pth'))

    case_folder = '/mnt/zhoum/large_scale_test/nanjinggulou_299'
    slides = _get_case_dir(case_folder)
    i = 0

    print('--------------Start Analyse----------------------')
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--batchSz', type=int, default=64)
    parser.add_argument('--nEpochs', type=int, default=300)
    parser.add_argument('--no-cuda', action='store_true')
    parser.add_argument('--save')
    parser.add_argument('--seed', type=int, default=1)
    parser.add_argument('--opt', type=str, default='sgd',
                        choices=('sgd', 'adam', 'rmsprop'))
    args = parser.parse_args()

    args.cuda = not args.no_cuda and torch.cuda.is_available()
    args.save = args.save or 'work/dense_se_shuffle_net.base'
    setproctitle.setproctitle(args.save)

    torch.manual_seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)

    if os.path.exists(args.save):
        shutil.rmtree(args.save)
    os.makedirs(args.save, exist_ok=True)

    normMean = [0.49139968, 0.48215827, 0.44653124]
    normStd = [0.24703233, 0.24348505, 0.26158768]
    normTransform = transforms.Normalize(normMean, normStd)

    trainTransform = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normTransform
    ])
    testTransform = transforms.Compose([
        transforms.ToTensor(),
        normTransform
    ])

    kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
    trainLoader = DataLoader(
        dset.CIFAR10(root='cifar', train=True, download=True,
                     transform=trainTransform),
        batch_size=args.batchSz, shuffle=True, **kwargs)
    testLoader = DataLoader(
        dset.CIFAR10(root='cifar', train=False, download=True,
                     transform=testTransform),
        batch_size=args.batchSz, shuffle=False, **kwargs)

    net = densenet.DenseNet(growthRate=24, depth=88, reduction=0.5,
                            bottleneck=True, nClasses=10)
    #net = cifarnet.ResAttentNet()
    print('  + Number of params: {}'.format(
        sum([p.data.nelement() for p in net.parameters()])))
    if args.cuda:
        net = net.cuda()
        net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count()))
        cudnn.benchmark = True
    
    if args.opt == 'sgd':
        optimizer = optim.SGD(net.parameters(), lr=1e-1,
                            momentum=0.9, weight_decay=0.00011)
    elif args.opt == 'adam':
        optimizer = optim.Adam(net.parameters(), weight_decay=1e-4)
    elif args.opt == 'rmsprop':
        optimizer = optim.RMSprop(net.parameters(), weight_decay=1e-4)

    trainF = open(os.path.join(args.save, 'train.csv'), 'w')
    testF = open(os.path.join(args.save, 'test.csv'), 'w')

    for epoch in range(1, args.nEpochs + 1):
        adjust_opt(args.opt, optimizer, epoch)
        train(args, epoch, net, trainLoader, optimizer, trainF)
        test(args, epoch, net, testLoader, optimizer, testF)
        torch.save(net, os.path.join(args.save, 'latest.pth'))
        os.system('./plot.py {} &'.format(args.save))

    trainF.close()
    testF.close()
Exemple #7
0
def train():
    batch_size = args.batch_size
    num_class = args.num_class
    model = densenet.DenseNet(batch_size=batch_size, num_classes=num_class)
    global_step = tf.train.get_or_create_global_step()
    start_learning_rate = 0.0001
    learning_rate = tf.train.exponential_decay(start_learning_rate,
                                               global_step,
                                               100000,
                                               0.98,
                                               staircase=False,
                                               name="learning_rate")
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(
        loss=model.loss, global_step=global_step)
    train_op = tf.group([train_op, update_ops])
    #optimizer=tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9).minimize(loss=model.loss)
    saver = tf.train.Saver()
    tf.summary.scalar(name='loss', tensor=model.loss)
    tf.summary.scalar(name='softmax_loss', tensor=model.softmax_loss)
    tf.summary.scalar(name='center_loss', tensor=model.center_loss)
    tf.summary.scalar(name='accuracy', tensor=model.accuracy)
    merge_summary_op = tf.summary.merge_all()
    sess_config = tf.ConfigProto(allow_soft_placement=True, )
    with tf.Session(config=sess_config) as sess:
        ckpt = tf.train.latest_checkpoint(args.checkpoint_path)
        if ckpt:
            print("restore form %s " % (ckpt))
            st = int(ckpt.split('-')[-1])
            saver.restore(sess, ckpt)
            sess.run(global_step.assign(st))
        else:
            tf.global_variables_initializer().run()
        summary_writer = tf.summary.FileWriter(args.checkpoint_path)
        summary_writer.add_graph(sess.graph)
        start_time = time.time()
        step = 0
        iterator = data_generator.get_batch(args.train_image_list, batch_size)
        for batch in iterator:
            if batch is None:
                print("batch is None")
                continue
            image = batch[0]
            labels = batch[1]
            feed_dict = {model.images: image, model.labels: labels}
            _, loss, accuracy, summary, g_step, logits, lr = sess.run(
                [
                    train_op, model.loss, model.accuracy, merge_summary_op,
                    global_step, model.logits, learning_rate
                ],
                feed_dict=feed_dict)
            if loss is None:
                print(np.max(logits), np.min(logits))
                exit(0)
            if step % 10 == 0:
                print(np.max(logits), np.min(logits))
                print("step:%d, lr: %f, loss: %f, accuracy: %f" %
                      (g_step, lr, loss, accuracy))
            if step % 100 == 0:
                summary_writer.add_summary(summary=summary, global_step=g_step)
                saver.save(sess=sess,
                           save_path=os.path.join(args.checkpont_path,
                                                  'model'),
                           global_step=g_step)
            step += 1
        print("cost: ", time.time() - start_time)
Exemple #8
0
img_dim = (img_channels, img_rows,
           img_cols) if K.image_dim_ordering() == "th" else (img_rows,
                                                             img_cols,
                                                             img_channels)
depth = 40
nb_dense_block = 3
growth_rate = 12
nb_filter = 16
dropout_rate = 0.0  # 0.0 for data augmentation

if __name__ == '__main__':

    model = densenet.DenseNet(img_dim,
                              classes=nb_classes,
                              depth=depth,
                              nb_dense_block=nb_dense_block,
                              growth_rate=growth_rate,
                              nb_filter=nb_filter,
                              dropout_rate=dropout_rate)
    print("Model created")

    model.summary()
    optimizer = Adam(lr=1e-4)  # Using Adam instead of SGD to speed up training
    model.compile(loss='categorical_crossentropy',
                  optimizer=optimizer,
                  metrics=["accuracy"])
    print("Finished compiling")
    print("Building model...")

    (trainX, trainY), (testX, testY) = cifar10.load_data()
Exemple #9
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--batch', type=int, default=8)
    parser.add_argument('--nEpochs', type=int, default=300)
    parser.add_argument('--no-cuda', action='store_true')
    parser.add_argument('--save')
    parser.add_argument('--seed', type=int, default=1)
    parser.add_argument('--opt',
                        type=str,
                        default='adam',
                        choices=('sgd', 'adam', 'rmsprop'))
    args = parser.parse_args()

    args.cuda = not args.no_cuda and torch.cuda.is_available()
    args.save = args.save or 'work/densenet.base'
    #    setproctitle.setproctitle(args.save)

    torch.manual_seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)

    if os.path.exists(args.save):
        shutil.rmtree(args.save)
    os.makedirs(args.save, exist_ok=True)

    normMean = [0.49139968, 0.48215827, 0.44653124]
    normStd = [0.24703233, 0.24348505, 0.26158768]
    normTransform = transforms.Normalize(normMean, normStd)

    trainTransform = transforms.Compose([
        transforms.RandomCrop(96, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(), normTransform
    ])
    testTransform = transforms.Compose([transforms.ToTensor(), normTransform])

    kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
    trainLoader = DataLoader(dset.STL10(root='stl',
                                        split='train',
                                        download=True,
                                        transform=trainTransform),
                             batch_size=args.batch,
                             shuffle=True,
                             **kwargs)
    testLoader = DataLoader(dset.STL10(root='stl',
                                       split='test',
                                       download=True,
                                       transform=testTransform),
                            batch_size=args.batch,
                            shuffle=False,
                            **kwargs)

    net = densenet.DenseNet(growth_rate=12,
                            depth=100,
                            reduction=0.5,
                            bottleneck=True,
                            nClasses=10)

    print('  + Number of params: {}'.format(
        sum([p.data.nelement() for p in net.parameters()])))
    if args.cuda:
        net = net.cuda()

    if args.opt == 'sgd':
        optimizer = optim.SGD(net.parameters(),
                              lr=1e-1,
                              momentum=0.9,
                              weight_decay=1e-4)
    elif args.opt == 'adam':
        optimizer = optim.Adam(net.parameters(), weight_decay=1e-4)
    elif args.opt == 'rmsprop':
        optimizer = optim.RMSprop(net.parameters(), weight_decay=1e-4)

    trainF = open(os.path.join(args.save, 'train.csv'), 'w')
    testF = open(os.path.join(args.save, 'test.csv'), 'w')
    index = 0
    writer = SummaryWriter()
    for epoch in range(1, args.nEpochs + 1):
        adjust_opt(args.opt, optimizer, epoch)
        index = train(args, epoch, net, trainLoader, optimizer, trainF, writer,
                      index)
        loss2, err2 = test(args, epoch, net, testLoader, optimizer, testF)
        torch.save(net, os.path.join(args.save, 'latest.pth'))
        os.system('./plot.py {} &'.format(args.save))
        writer.add_scalar('data/Test_Loss', loss2, epoch)
        writer.add_scalar('data/Test_Accuracy', 100 - err2, epoch)
    trainF.close()
    testF.close()
    writer.close()
Exemple #10
0
        out = self.lr(out)
        x = out
        out = self.conv2(out)
        out = self.bn1(out)
        out = self.lr(out)
        out = torch.cat((x, out), 1)
        x = out
        out = self.conv3(out)
        out = self.bn2(out)
        out = self.lr(out)
        out = torch.cat((x, out), 1)
        x = out
        out = self.conv4(out)
        out = self.bn3(out)
        out = self.lr(out)
        out = torch.cat((x, out), 1)

        out = self.conv5(out)
        out = self.final(out)
        
        #output = self.make_main(input)
        return out.view(-1, 1).squeeze(1)

# Test
import densenet
#import densenet_notransition
#model = densenet_notransition.DenseNet(12,50,0.5,10,1,4)
model = densenet.DenseNet(12,50,0.5,10,1)
#model = _netG(0)
print(torch_summarize(model))
Exemple #11
0
img_dim = (img_channels, img_rows,
           img_cols) if K.image_dim_ordering() == "th" else (img_rows,
                                                             img_cols,
                                                             img_channels)

depth = 19  # 121
nb_dense_block = 3  # ??
growth_rate = 12  # ??
nb_filter = -1  # ??
dropout_rate = 0.0  # 0.0 for data augmentation ??

model = densenet.DenseNet(img_dim,
                          depth=depth,
                          nb_dense_block=nb_dense_block,
                          growth_rate=growth_rate,
                          nb_filter=nb_filter,
                          dropout_rate=dropout_rate,
                          classes=14,
                          weights=None,
                          activation='sigmoid')
print("Model created")

model.summary()
optimizer = Adam(lr=1e-4)  # Using Adam instead of SGD to speed up training
model.compile(loss='categorical_crossentropy',
              optimizer=optimizer,
              metrics=["accuracy"])
print("Finished compiling")
print("Building model...")

folder = '/Volumes/work/data/medical/CheXpert-v1.0-small'
Exemple #12
0
def run_MURA(
        batch_size=8,  # select a batch of samples to train a time
        nb_epoch=12,  # times of iteration
        depth=22,  # network depth
        nb_dense_block=4,  # number of dense blocks
        nb_filter=16,  # initial number of conv filter
        growth_rate=12,  # numbers of new filters added by each layer
        dropout_rate=0.2,  # dropout rate
        learning_rate=0.001,  # learning rate
        weight_decay=1E-4,  # wight decay
        plot_architecture=False  # plot network architecture
):

    ###################
    # Data processing #
    ###################

    im_size = 320  # resize images
    path_train = '/home/yu/Documents/tensorflow/MURA/MURA-v1.1/train/XR_ELBOW'  # the absolute path
    path_valid = '/home/yu/Documents/tensorflow/MURA/MURA-v1.1/valid/XR_ELBOW'
    X_train_path, Y_train = data_loader.load_path(root_path=path_train,
                                                  size=im_size)
    X_valid_path, Y_valid = data_loader.load_path(root_path=path_valid,
                                                  size=im_size)

    X_valid = data_loader.load_image(X_valid_path,
                                     im_size)  # import path for validation
    Y_valid = np.asarray(Y_valid)
    nb_classes = 1
    img_dim = (im_size, im_size, 1)  #tuple channel last

    ###################
    # Construct model #
    ###################

    # model is one instance of class 'Model'
    model = densenet.DenseNet(nb_classes,
                              img_dim,
                              depth,
                              nb_dense_block,
                              growth_rate,
                              nb_filter,
                              dropout_rate=dropout_rate,
                              weight_decay=weight_decay)
    # Model output
    model.summary()

    # Build optimizer
    opt = Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08)

    model.compile(
        loss='binary_crossentropy',
        optimizer=opt,  # optimizer used to update gradient
        metrics=["accuracy"])

    if plot_architecture:
        from keras.utils import plot_model
        plot_model(model,
                   to_file='./figures/densenet_archi.png',
                   show_shapes=True)

    ####################
    # Network training #
    ####################

    print("Start Training")

    list_train_loss = []
    list_valid_loss = []
    list_learning_rate = []
    best_record = [100, 0, 100, 100]  # record the best result
    start_time = datetime.datetime.now()
    for e in range(nb_epoch):

        if e == int(0.25 * nb_epoch):  # update learning_rate
            K.set_value(model.optimizer.lr, np.float32(learning_rate / 10.))

        if e == int(0.5 * nb_epoch):
            K.set_value(model.optimizer.lr, np.float32(learning_rate / 50.))

        if e == int(0.75 * nb_epoch):
            K.set_value(model.optimizer.lr, np.float32(learning_rate / 100.))

        split_size = batch_size
        num_splits = len(
            X_train_path
        ) / split_size  # Calculate how many batches of training images
        arr_all = np.arange(len(X_train_path)).astype(
            int)  # Return evenly spaced values within a given interval
        random.shuffle(
            arr_all
        )  # reshuffle, so the order of each training would be different
        # avoid local optimal solution
        # with shuffle open, it would be SGD
        arr_splits = np.array_split(
            arr_all,
            num_splits)  # Divede the training images to num_splits batches

        l_train_loss = []
        batch_train_loss = []
        start = datetime.datetime.now()

        for i, batch_idx in enumerate(
                arr_splits):  # i: how many batches, batch_idx: each batch

            X_batch_path, Y_batch = [], [
            ]  # X_batch_path is the path of images, Y_batch is the label

            for idx in batch_idx:

                X_batch_path.append(X_train_path[idx])
                Y_batch.append(Y_train[idx])

            X_batch = data_loader.load_image(
                Path=X_batch_path, size=im_size)  # load data for training
            Y_batch = np.asarray(
                Y_batch
            )  # Transform the type of Y_batch as array, that is label
            train_logloss, train_acc = model.train_on_batch(
                X_batch, Y_batch)  # train, return loss and accuracy

            l_train_loss.append([train_logloss, train_acc])
            batch_train_loss.append([train_logloss, train_acc])
            if i % 100 == 0:  # 100 batches
                loss_1, acc_1 = np.mean(np.array(l_train_loss), 0)
                loss_2, acc_2 = np.mean(np.array(batch_train_loss), 0)
                batch_train_loss = []
                print(
                    '[Epoch {}/{}] [Batch {}/{}] [Time: {}] [all_batchs--> train_epoch_logloss: {:.5f}, train_epoch_acc:{:.5f}] '
                    .format(e + 1, nb_epoch, i, len(arr_splits),
                            datetime.datetime.now() - start, loss_1, acc_1),
                    '[this_100_batchs-->train_batchs_logloss: {:.5f}, train_batchs_acc:{:.5f}]'
                    .format(loss_2, acc_2))

        # validate
        valid_logloss, valid_acc = model.evaluate(X_valid,
                                                  Y_valid,
                                                  verbose=0,
                                                  batch_size=64)

        list_train_loss.append(np.mean(np.array(l_train_loss), 0).tolist())
        list_valid_loss.append([valid_logloss, valid_acc])
        list_learning_rate.append(float(K.get_value(model.optimizer.lr)))

        # to convert numpy array to json serializable
        print('[Epoch %s/%s] [Time: %s, Total_time: %s]' %
              (e + 1, nb_epoch, datetime.datetime.now() - start,
               datetime.datetime.now() - start_time),
              end='')
        print(
            '[train_loss_and_acc:{:.5f} {:.5f}] [valid_loss_acc:{:.5f} {:.5f}]'
            .format(list_train_loss[-1][0], list_train_loss[-1][1],
                    list_valid_loss[-1][0], list_valid_loss[-1][1]))

        d_log = {}
        d_log["batch_size"] = batch_size
        d_log["nb_epoch"] = nb_epoch
        d_log["optimizer"] = opt.get_config()
        d_log["train_loss"] = list_train_loss
        d_log["valid_loss"] = list_valid_loss
        d_log["learning_rate"] = list_learning_rate

        json_file = os.path.join('./log/experiment_log_MURA.json')

        with open(json_file, 'w') as fp:
            json.dump(d_log, fp, indent=4, sort_keys=True)

        record = [
            valid_logloss,
            valid_acc,
            abs(valid_logloss - list_train_loss[-1][0]),
            abs(valid_acc - list_train_loss[-1][1]),
        ]
        if ((record[0] <= best_record[0]) & (record[1] >= best_record[1])):
            if e <= int(0.25 * nb_epoch) | (record[2] <= best_record[2]) & (
                    record[3] <= best_record[3]):
                best_record = record
                print('saving the best model:epoch', e + 1, best_record)
                model.save('save_models/best_MURA_modle@epochs{}.h5'.format(e +
                                                                            1))
        model.save('save_models/MURA_modle@epochs{}.h5'.format(e + 1))