예제 #1
0
파일: train.py 프로젝트: gongenhao/Kaggle
def cross_validate_inmemory(model_name, **kwargs):
    """
    StateFarm competition:
    Training set has 26 unique drivers. We do 26 fold CV where
    a driver is alternatively singled out to be the validation set

    Load the whole train data in memory for faster operations

    args: model (keras model)
          **kwargs (dict) keyword arguments that specify the model hyperparameters
    """

    # Roll out the parameters
    nb_classes = kwargs["nb_classes"]
    batch_size = kwargs["batch_size"]
    n_batch_per_epoch = kwargs["n_batch_per_epoch"]
    nb_epoch = kwargs["nb_epoch"]
    prob = kwargs["prob"]
    do_plot = kwargs["do_plot"]
    data_file = kwargs["data_file"]
    semi_super_file = kwargs["semi_super_file"]
    pretr_weights_file = kwargs["pretr_weights_file"]
    normalisation_style = kwargs["normalisation_style"]
    weak_labels = kwargs["weak_labels"]
    objective = kwargs["objective"]
    experiment = kwargs["experiment"]
    start_fold = kwargs["start_fold"]

    # Load env variables in (in .env file at the root of the project)
    load_dotenv(find_dotenv())

    # Load env variables
    model_dir = os.path.expanduser(os.environ.get("MODEL_DIR"))
    data_dir = os.path.expanduser(os.environ.get("DATA_DIR"))

    # Output path where we store experiment log and weights
    model_dir = os.path.join(model_dir, model_name)
    # Create if it does not exist
    general_utils.create_dir(model_dir)
    # Automatically determine experiment name
    list_exp = glob.glob(model_dir + "/*")
    # Create the experiment dir and weights dir
    if experiment:
        exp_dir = os.path.join(model_dir, experiment)
    else:
        exp_dir = os.path.join(model_dir, "Experiment_%s" % len(list_exp))
    general_utils.create_dir(exp_dir)

    # Compile model.
    # opt = RMSprop(lr=5E-6, rho=0.9, epsilon=1e-06)
    opt = SGD(lr=5e-4, decay=1e-6, momentum=0.9, nesterov=True)
    # opt = Adam(lr=1E-5, beta_1=0.9, beta_2=0.999, epsilon=1e-08)

    # Batch generator
    DataAug = batch_utils.AugDataGenerator(data_file,
                                           batch_size=batch_size,
                                           prob=prob,
                                           dset="train",
                                           maxproc=4,
                                           num_cached=60,
                                           random_augm=False,
                                           hdf5_file_semi=semi_super_file)
    DataAug.add_transform("h_flip")
    # DataAug.add_transform("v_flip")
    # DataAug.add_transform("fixed_rot", angle=40)
    DataAug.add_transform("random_rot", angle=40)
    # DataAug.add_transform("fixed_tr", tr_x=40, tr_y=40)
    DataAug.add_transform("random_tr", tr_x=40, tr_y=40)
    # DataAug.add_transform("fixed_blur", kernel_size=5)
    DataAug.add_transform("random_blur", kernel_size=5)
    # DataAug.add_transform("fixed_erode", kernel_size=4)
    DataAug.add_transform("random_erode", kernel_size=3)
    # DataAug.add_transform("fixed_dilate", kernel_size=4)
    DataAug.add_transform("random_dilate", kernel_size=3)
    # DataAug.add_transform("fixed_crop", pos_x=10, pos_y=10, crop_size_x=200, crop_size_y=200)
    DataAug.add_transform("random_crop", min_crop_size=140, max_crop_size=160)
    # DataAug.add_transform("hist_equal")
    # DataAug.add_transform("random_occlusion", occ_size_x=100, occ_size_y=100)

    epoch_size = n_batch_per_epoch * batch_size

    general_utils.pretty_print("Load all data...")

    with h5py.File(data_file, "r") as hf:
        X = hf["train_data"][:, :, :, :]
        y = hf["train_label"][:].astype(np.uint8)
        y = np_utils.to_categorical(y, nb_classes=nb_classes)  # Format for keras

        try:
            for fold in range(start_fold, 8):
                # for fold in np.random.permutation(26):

                min_valid_loss = 100

                # Save losses
                list_train_loss = []
                list_valid_loss = []

                # Load valid data in memory for fast error evaluation
                idx_valid = hf["valid_fold%s" % fold][:]
                idx_train = hf["train_fold%s" % fold][:]
                X_valid = X[idx_valid]
                y_valid = y[idx_valid]

                # Normalise
                X_valid = normalisation(X_valid, normalisation_style)

                # Compile model
                general_utils.pretty_print("Compiling...")
                model = models.load(model_name,
                                    nb_classes,
                                    X_valid.shape[-3:],
                                    pretr_weights_file=pretr_weights_file)
                model.compile(optimizer=opt, loss=objective)

                # Save architecture
                json_string = model.to_json()
                with open(os.path.join(data_dir, '%s_archi.json' % model.name), 'w') as f:
                    f.write(json_string)

                for e in range(nb_epoch):
                    # Initialize progbar and batch counter
                    progbar = generic_utils.Progbar(epoch_size)
                    batch_counter = 1
                    l_train_loss = []
                    start = time.time()

                    for X_train, y_train in DataAug.gen_batch_inmemory(X, y, idx_train=idx_train):
                        if do_plot:
                            general_utils.plot_batch(X_train, np.argmax(y_train, 1), batch_size)

                        # Normalise
                        X_train = normalisation(X_train, normalisation_style)

                        train_loss = model.train_on_batch(X_train, y_train)
                        l_train_loss.append(train_loss)
                        batch_counter += 1
                        progbar.add(batch_size, values=[("train loss", train_loss)])
                        if batch_counter >= n_batch_per_epoch:
                            break
                    print("")
                    print('Epoch %s/%s, Time: %s' % (e + 1, nb_epoch, time.time() - start))
                    y_valid_pred = model.predict(X_valid, verbose=0, batch_size=16)
                    train_loss = float(np.mean(l_train_loss))  # use float to make it json saveable
                    valid_loss = log_loss(y_valid, y_valid_pred)
                    print("Train loss:", train_loss, "valid loss:", valid_loss)
                    list_train_loss.append(train_loss)
                    list_valid_loss.append(valid_loss)

                    # Record experimental data in a dict
                    d_log = {}
                    d_log["fold"] = fold
                    d_log["nb_classes"] = nb_classes
                    d_log["batch_size"] = batch_size
                    d_log["n_batch_per_epoch"] = n_batch_per_epoch
                    d_log["nb_epoch"] = nb_epoch
                    d_log["epoch_size"] = epoch_size
                    d_log["prob"] = prob
                    d_log["optimizer"] = opt.get_config()
                    d_log["augmentator_config"] = DataAug.get_config()
                    d_log["train_loss"] = list_train_loss
                    d_log["valid_loss"] = list_valid_loss

                    json_file = os.path.join(exp_dir, 'experiment_log_fold%s.json' % fold)
                    general_utils.save_exp_log(json_file, d_log)

                    # Only save the best epoch
                    if valid_loss < min_valid_loss:
                        min_valid_loss = valid_loss
                        trained_weights_path = os.path.join(exp_dir, '%s_weights_fold%s.h5' % (model.name, fold))
                        model.save_weights(trained_weights_path, overwrite=True)

        except KeyboardInterrupt:
            pass
예제 #2
0
from keras.optimizers import SGD

op = SGD(lr=0.01)
a = op.get_config()
print(a)
예제 #3
0
def run_gtsrb(batch_size, nb_epoch, depth, nb_dense_block, nb_filter,
              growth_rate, dropout_rate, learning_rate, weight_decay, logfile,
              plot_architecture):
    """ Run GTSRB experiments

    :param batch_size: int -- batch size
    :param nb_epoch: int -- number of training epochs
    :param depth: int -- network depth
    :param nb_dense_block: int -- number of dense blocks
    :param nb_filter: int -- initial number of conv filter
    :param growth_rate: int -- number of new filters added by conv layers
    :param dropout_rate: float -- dropout rate
    :param learning_rate: float -- learning rate
    :param weight_decay: float -- weight decay
    :param plot_architecture: bool -- whether to plot network architecture

    """

    ###################
    # Data processing #
    ###################
    tr_x = np.load(os.path.join(DATASET_DIR, 'rgb_train_in.npy'))
    tr_y = np.load(os.path.join(DATASET_DIR, 'rgb_train_out.npy'))
    te_x = np.load(os.path.join(DATASET_DIR, 'rgb_test_in.npy'))
    te_y = np.load(os.path.join(DATASET_DIR, 'rgb_test_out.npy'))
    va_x = np.load(os.path.join(DATASET_DIR, 'rgb_valid_in.npy'))
    va_y = np.load(os.path.join(DATASET_DIR, 'rgb_valid_out.npy'))
    X_train = tr_x
    Y_train = tr_y
    X_test = np.vstack((te_x, va_x))
    Y_test = np.vstack((te_y, va_y))

    nb_classes = Y_train.shape[1]
    img_dim = X_train.shape[1:]

    if K.image_data_format() == "channels_first":
        n_channels = X_train.shape[1]
    else:
        n_channels = X_train.shape[-1]

    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')

    # Normalisation
    X = np.vstack((X_train, X_test))
    # 2 cases depending on the image ordering
    if K.image_data_format() == "channels_first":
        for i in range(n_channels):
            mean = np.mean(X[:, i, :, :])
            std = np.std(X[:, i, :, :])
            X_train[:, i, :, :] = (X_train[:, i, :, :] - mean) / std
            X_test[:, i, :, :] = (X_test[:, i, :, :] - mean) / std

    elif K.image_data_format() == "channels_last":
        for i in range(n_channels):
            mean = np.mean(X[:, :, :, i])
            std = np.std(X[:, :, :, i])
            X_train[:, :, :, i] = (X_train[:, :, :, i] - mean) / std
            X_test[:, :, :, i] = (X_test[:, :, :, i] - mean) / std

    ###################
    # Construct model #
    ###################

    model = densenet.DenseNet(nb_classes,
                              img_dim,
                              depth,
                              nb_dense_block,
                              growth_rate,
                              nb_filter,
                              dropout_rate=dropout_rate,
                              weight_decay=weight_decay)
    # Model output
    model.summary()

    # Build optimizer
    # opt = Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
    opt = SGD(lr=learning_rate, momentum=0.9, nesterov=True)

    model.compile(loss='categorical_crossentropy',
                  optimizer=opt,
                  metrics=["accuracy"])

    if plot_architecture:
        from keras.utils.visualize_util import plot
        plot(model, to_file='./figures/densenet_archi.png', show_shapes=True)

    ####################
    # Network training #
    ####################

    print("Training")

    list_train_loss = []
    list_test_loss = []
    list_learning_rate = []

    datagen = ImageDataGenerator()

    for e in range(nb_epoch):

        if e == int(0.5 * nb_epoch):
            K.set_value(model.optimizer.lr, np.float32(learning_rate / 10.))

        if e == int(0.75 * nb_epoch):
            K.set_value(model.optimizer.lr, np.float32(learning_rate / 100.))

        l_train_loss = []
        start = time.time()

        model.fit_generator(datagen.flow(X_train, Y_train, batch_size),
                            epochs=1)

        test_logloss, test_acc = model.evaluate(X_test,
                                                Y_test,
                                                verbose=1,
                                                batch_size=64)
        list_test_loss.append([test_logloss, test_acc])
        list_learning_rate.append(float(K.get_value(model.optimizer.lr)))
        # to convert numpy array to json serializable
        print('Epoch %s/%s, Time: %s' % (e + 1, nb_epoch, time.time() - start))

        d_log = {}
        d_log["batch_size"] = batch_size
        d_log["nb_epoch"] = nb_epoch
        d_log["optimizer"] = opt.get_config()
        # d_log["train_loss"] = list_train_loss
        d_log["test_loss"] = list_test_loss
        d_log["learning_rate"] = list_learning_rate

        json_file = os.path.join('./log', logfile)
        with open(json_file, 'w') as fp:
            json.dump(d_log, fp, indent=4, sort_keys=True)
    #         break

    # val_loss, val_acc,val_f2_score = model.evaluate(X_val,
    #                                     y_val,
    #                                     verbose=1,
    #                                     batch_size=batch_size)
    # list_test_loss.append([val_loss, val_acc,val_f2_score])

    list_learning_rate.append(float(K.get_value(model.optimizer.lr)))
    # to convert numpy array to json serializable
    print('Epoch %s/%s, Time: %s' % (e + 1, epochs, time.time() - start))

    d_log = {}
    d_log["batch_size"] = batch_size
    d_log["nb_epoch"] = epochs
    d_log["optimizer"] = optimizer.get_config()
    d_log["train_loss"] = list_train_loss
    d_log["test_loss"] = list_val_loss
    d_log["learning_rate"] = list_learning_rate

    json_file = os.path.join('./logs/experiment_Planet_Densenet.json')
    with open(json_file, 'w') as fp:
        json.dump(d_log, fp, indent=4, sort_keys=True)

    model.save('last-epoch-model-val.h5')
    # early stopping
    if val_loss > val_loss_last:
        wait += 1

    if wait == 2:
        break
예제 #5
0
파일: train.py 프로젝트: cvtower/basedMl
def train(model_name, **kwargs):
    """
    Train model

    args: model_name (str, keras model name)
          **kwargs (dict) keyword arguments that specify the model hyperparameters
    """

    # Roll out the parameters
    batch_size = kwargs["batch_size"]
    nb_epoch = kwargs["nb_epoch"]
    dataset = kwargs["dataset"]
    optimizer = kwargs["optimizer"]
    experiment_name = kwargs["experiment_name"]

    # Compile model.
    if optimizer == "SGD":
        opt = SGD(lr=1E-2, decay=1E-4, momentum=0.9, nesterov=True)
    if optimizer == "Adam":
        opt = Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=1E-4)
    if optimizer == "Eve":
        opt = Eve(lr=1E-4, decay=1E-4, beta_1=0.9, beta_2=0.999, beta_3=0.999, small_k=0.1, big_K=10, epsilon=1e-08)

    if dataset == "cifar10":
        (X_train, y_train), (X_test, y_test) = cifar10.load_data()
    if dataset == "cifar100":
        (X_train, y_train), (X_test, y_test) = cifar100.load_data()
    if dataset == "mnist":
        (X_train, y_train), (X_test, y_test) = mnist.load_data()
        X_train = X_train.reshape((X_train.shape[0], 1, 28, 28))
        X_test = X_test.reshape((X_test.shape[0], 1, 28, 28))

    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')
    X_train /= 255.
    X_test /= 255.

    img_dim = X_train.shape[-3:]
    nb_classes = len(np.unique(y_train))

    # convert class vectors to binary class matrices
    Y_train = np_utils.to_categorical(y_train, nb_classes)
    Y_test = np_utils.to_categorical(y_test, nb_classes)

    # Compile model
    model = models.load(model_name, img_dim, nb_classes)
    model.compile(optimizer=opt, loss="categorical_crossentropy", metrics=["accuracy"])

    train_losses, train_accs = [], []
    val_losses, val_accs = [], []

    for e in range(nb_epoch):

        loss = model.fit(X_train, Y_train,
                         batch_size=batch_size,
                         validation_data=(X_test, Y_test),
                         epochs=1)

        train_losses.append(loss.history["loss"])
        val_losses.append(loss.history["val_loss"])
        train_accs.append(loss.history["acc"])
        val_accs.append(loss.history["val_acc"])

        # Save experimental log
        d_log = {}
        d_log["experiment_name"] = experiment_name
        d_log["img_dim"] = img_dim
        d_log["batch_size"] = batch_size
        d_log["nb_epoch"] = nb_epoch
        d_log["train_losses"] = train_losses
        d_log["val_losses"] = val_losses
        d_log["train_accs"] = train_accs
        d_log["val_accs"] = val_accs
        d_log["optimizer"] = opt.get_config()
        # Add model architecture
        json_string = json.loads(model.to_json())
        for key in json_string.keys():
            d_log[key] = json_string[key]
        json_file = os.path.join("log", '%s_%s_%s.json' % (dataset, model.name, experiment_name))
        with open(json_file, 'w') as fp:
            json.dump(d_log, fp, indent=4, sort_keys=True)
예제 #6
0
def cross_validate_inmemory(model_name, **kwargs):
    """
    StateFarm competition:
    Training set has 26 unique drivers. We do 26 fold CV where
    a driver is alternatively singled out to be the validation set

    Load the whole train data in memory for faster operations

    args: model (keras model)
          **kwargs (dict) keyword arguments that specify the model hyperparameters
    """

    # Roll out the parameters
    nb_classes = kwargs["nb_classes"]
    batch_size = kwargs["batch_size"]
    n_batch_per_epoch = kwargs["n_batch_per_epoch"]
    nb_epoch = kwargs["nb_epoch"]
    prob = kwargs["prob"]
    do_plot = kwargs["do_plot"]
    data_file = kwargs["data_file"]
    semi_super_file = kwargs["semi_super_file"]
    pretr_weights_file = kwargs["pretr_weights_file"]
    normalisation_style = kwargs["normalisation_style"]
    weak_labels = kwargs["weak_labels"]
    objective = kwargs["objective"]
    experiment = kwargs["experiment"]
    start_fold = kwargs["start_fold"]

    # Load env variables in (in .env file at the root of the project)
    load_dotenv(find_dotenv())

    # Load env variables
    model_dir = os.path.expanduser(os.environ.get("MODEL_DIR"))
    data_dir = os.path.expanduser(os.environ.get("DATA_DIR"))

    # Output path where we store experiment log and weights
    model_dir = os.path.join(model_dir, model_name)
    # Create if it does not exist
    general_utils.create_dir(model_dir)
    # Automatically determine experiment name
    list_exp = glob.glob(model_dir + "/*")
    # Create the experiment dir and weights dir
    if experiment:
        exp_dir = os.path.join(model_dir, experiment)
    else:
        exp_dir = os.path.join(model_dir, "Experiment_%s" % len(list_exp))
    general_utils.create_dir(exp_dir)

    # Compile model.
    # opt = RMSprop(lr=5E-6, rho=0.9, epsilon=1e-06)
    opt = SGD(lr=5e-4, decay=1e-6, momentum=0.9, nesterov=True)
    # opt = Adam(lr=1E-5, beta_1=0.9, beta_2=0.999, epsilon=1e-08)

    # Batch generator
    DataAug = batch_utils.AugDataGenerator(data_file,
                                           batch_size=batch_size,
                                           prob=prob,
                                           dset="train",
                                           maxproc=4,
                                           num_cached=60,
                                           random_augm=False,
                                           hdf5_file_semi=semi_super_file)
    DataAug.add_transform("h_flip")
    # DataAug.add_transform("v_flip")
    # DataAug.add_transform("fixed_rot", angle=40)
    DataAug.add_transform("random_rot", angle=40)
    # DataAug.add_transform("fixed_tr", tr_x=40, tr_y=40)
    DataAug.add_transform("random_tr", tr_x=40, tr_y=40)
    # DataAug.add_transform("fixed_blur", kernel_size=5)
    DataAug.add_transform("random_blur", kernel_size=5)
    # DataAug.add_transform("fixed_erode", kernel_size=4)
    DataAug.add_transform("random_erode", kernel_size=3)
    # DataAug.add_transform("fixed_dilate", kernel_size=4)
    DataAug.add_transform("random_dilate", kernel_size=3)
    # DataAug.add_transform("fixed_crop", pos_x=10, pos_y=10, crop_size_x=200, crop_size_y=200)
    DataAug.add_transform("random_crop", min_crop_size=140, max_crop_size=160)
    # DataAug.add_transform("hist_equal")
    # DataAug.add_transform("random_occlusion", occ_size_x=100, occ_size_y=100)

    epoch_size = n_batch_per_epoch * batch_size

    general_utils.pretty_print("Load all data...")

    with h5py.File(data_file, "r") as hf:
        X = hf["train_data"][:, :, :, :]
        y = hf["train_label"][:].astype(np.uint8)
        y = np_utils.to_categorical(y,
                                    nb_classes=nb_classes)  # Format for keras

        try:
            for fold in range(start_fold, 8):
                # for fold in np.random.permutation(26):

                min_valid_loss = 100

                # Save losses
                list_train_loss = []
                list_valid_loss = []

                # Load valid data in memory for fast error evaluation
                idx_valid = hf["valid_fold%s" % fold][:]
                idx_train = hf["train_fold%s" % fold][:]
                X_valid = X[idx_valid]
                y_valid = y[idx_valid]

                # Normalise
                X_valid = normalisation(X_valid, normalisation_style)

                # Compile model
                general_utils.pretty_print("Compiling...")
                model = models.load(model_name,
                                    nb_classes,
                                    X_valid.shape[-3:],
                                    pretr_weights_file=pretr_weights_file)
                model.compile(optimizer=opt, loss=objective)

                # Save architecture
                json_string = model.to_json()
                with open(os.path.join(data_dir, '%s_archi.json' % model.name),
                          'w') as f:
                    f.write(json_string)

                for e in range(nb_epoch):
                    # Initialize progbar and batch counter
                    progbar = generic_utils.Progbar(epoch_size)
                    batch_counter = 1
                    l_train_loss = []
                    start = time.time()

                    for X_train, y_train in DataAug.gen_batch_inmemory(
                            X, y, idx_train=idx_train):
                        if do_plot:
                            general_utils.plot_batch(X_train,
                                                     np.argmax(y_train, 1),
                                                     batch_size)

                        # Normalise
                        X_train = normalisation(X_train, normalisation_style)

                        train_loss = model.train_on_batch(X_train, y_train)
                        l_train_loss.append(train_loss)
                        batch_counter += 1
                        progbar.add(batch_size,
                                    values=[("train loss", train_loss)])
                        if batch_counter >= n_batch_per_epoch:
                            break
                    print("")
                    print('Epoch %s/%s, Time: %s' %
                          (e + 1, nb_epoch, time.time() - start))
                    y_valid_pred = model.predict(X_valid,
                                                 verbose=0,
                                                 batch_size=16)
                    train_loss = float(np.mean(
                        l_train_loss))  # use float to make it json saveable
                    valid_loss = log_loss(y_valid, y_valid_pred)
                    print("Train loss:", train_loss, "valid loss:", valid_loss)
                    list_train_loss.append(train_loss)
                    list_valid_loss.append(valid_loss)

                    # Record experimental data in a dict
                    d_log = {}
                    d_log["fold"] = fold
                    d_log["nb_classes"] = nb_classes
                    d_log["batch_size"] = batch_size
                    d_log["n_batch_per_epoch"] = n_batch_per_epoch
                    d_log["nb_epoch"] = nb_epoch
                    d_log["epoch_size"] = epoch_size
                    d_log["prob"] = prob
                    d_log["optimizer"] = opt.get_config()
                    d_log["augmentator_config"] = DataAug.get_config()
                    d_log["train_loss"] = list_train_loss
                    d_log["valid_loss"] = list_valid_loss

                    json_file = os.path.join(
                        exp_dir, 'experiment_log_fold%s.json' % fold)
                    general_utils.save_exp_log(json_file, d_log)

                    # Only save the best epoch
                    if valid_loss < min_valid_loss:
                        min_valid_loss = valid_loss
                        trained_weights_path = os.path.join(
                            exp_dir,
                            '%s_weights_fold%s.h5' % (model.name, fold))
                        model.save_weights(trained_weights_path,
                                           overwrite=True)

        except KeyboardInterrupt:
            pass
예제 #7
0
def run_cifar100(batch_size,
                nb_epoch,
                depth,
                nb_dense_block,
                nb_filter,
                growth_rate,
                dropout_rate,
                learning_rate,
                weight_decay,
                plot_architecture,
                compression=0.5,
                init_from_epoch=0):
    """ Run CIFAR100 experiments
    :param batch_size: int -- batch size
    :param nb_epoch: int -- number of training epochs
    :param depth: int -- network depth
    :param nb_dense_block: int -- number of dense blocks
    :param nb_filter: int -- initial number of conv filter
    :param growth_rate: int -- number of new filters added by conv layers
    :param dropout_rate: float -- dropout rate
    :param learning_rate: float -- learning rate
    :param weight_decay: float -- weight decay
    :param plot_architecture: bool -- whether to plot network architecture
    """

    ###################
    # Data processing #
    ###################

    # the data, shuffled and split between train and test sets
    #(X_train, y_train), (X_test, y_test) = cifar100.load_data()
    (X_train, y_train), (X_test, y_test) = load_cifar100()
	
    nb_classes = len(np.unique(y_train))
    img_dim = X_train.shape[1:]

    if K.image_dim_ordering() == "th":
        n_channels = X_train.shape[1]
    else:
        n_channels = X_train.shape[-1]

    # convert class vectors to binary class matrices
    Y_train = np_utils.to_categorical(y_train, nb_classes)
    Y_test = np_utils.to_categorical(y_test, nb_classes)

    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')

    # Normalisation
    X = np.vstack((X_train, X_test))
    # 2 cases depending on the image ordering
    if K.image_dim_ordering() == "th":
        for i in range(n_channels):
            mean = np.mean(X[:, i, :, :])
            std = np.std(X[:, i, :, :])
            X_train[:, i, :, :] = (X_train[:, i, :, :] - mean) / std
            X_test[:, i, :, :] = (X_test[:, i, :, :] - mean) / std

    elif K.image_dim_ordering() == "tf":
        for i in range(n_channels):
            mean = np.mean(X[:, :, :, i])
            std = np.std(X[:, :, :, i])
            X_train[:, :, :, i] = (X_train[:, :, :, i] - mean) / std
            X_test[:, :, :, i] = (X_test[:, :, :, i] - mean) / std

    print("X_train shape:{}".format(X_train.shape))

    ###################
    # Construct model #
    ###################

    model = Baseline.Baseline(nb_classes,
                              img_dim,
                              depth,
                              nb_dense_block,
                              growth_rate,
                              nb_filter,
                              dropout_rate=dropout_rate,
                              weight_decay=weight_decay,
                              compression=0.5)
    # Model output
    model.summary()

    # Build optimizer
    #opt = Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
    opt = SGD(lr = learning_rate, momentum = 0.9, nesterov=True)
    model.compile(loss='categorical_crossentropy',
                  optimizer=opt,
                  metrics=["accuracy"])

    if plot_architecture:
        from keras.utils.visualize_util import plot
        plot(model, to_file='./figures/Baseline_sum_archi.png', show_shapes=True)

    ####################
    # Network training #
    ####################
    print("Training")

    list_train_loss = []
    list_test_loss = []
    list_learning_rate = []
    loglog = [0]
    lr=learning_rate
    if init_from_epoch != 0:
        model_path = 'weights/Baseline_sum-cifar100-40-tf-'+str(init_from_epoch)+'.h5'
        print('loading wights from %s'%model_path)
        model.load_weights(model_path)
    
    print('traing on batch from epoch %d'%init_from_epoch)
    for e in range(init_from_epoch,nb_epoch):

        if e == int(0.5 * nb_epoch):
           K.set_value(model.optimizer.lr, np.float32(learning_rate / 10.))

        if e == int(0.75 * nb_epoch):
           K.set_value(model.optimizer.lr, np.float32(learning_rate / 100.))

        split_size = batch_size
        num_splits = X_train.shape[0] / split_size
        arr_splits = np.array_split(np.arange(X_train.shape[0]), num_splits)

        l_train_loss = []
        start = time.time()

        for batch_nm,batch_idx in enumerate(arr_splits):

            X_batch, Y_batch = X_train[batch_idx], Y_train[batch_idx]
            train_logloss, train_acc = model.train_on_batch(X_batch, Y_batch)

            l_train_loss.append([train_logloss, train_acc])
            sys.stdout.write("\rEpoch{},Batch {}/{}:Training logloss:{:.4f}, training accuracy:{:.4f}%"\
                    .format(e,batch_nm,num_splits,train_logloss,train_acc*100))
        test_logloss, test_acc = model.evaluate(X_test,
                                                Y_test,
                                                verbose=1,
                                                batch_size=64)


        #EarlyStopping

        # loglog.append(np.mean(np.array(l_train_loss), 0)[0])
        # if len(loglog) >= 20 :
        #     if loglog[-1] - loglog[1] >= -0.01:
        #         print("\n\n\nreduce LR\n\n\n")
        #         lr=np.float32(lr / 10.)
        #         print(lr)
        #         K.set_value(model.optimizer.lr, lr)
        #         loglog = [0]
        #     else:
        #         loglog = [0]
        
        # print("\n\nNOTICE{}\n\n".format(loglog))

        
        list_train_loss.append(np.mean(np.array(l_train_loss), 0).tolist())
        list_test_loss.append([test_logloss, test_acc])
        list_learning_rate.append(float(K.get_value(model.optimizer.lr)))
        # to convert numpy array to json serializable
        print('\nEpoch %s/%s, training logloss:%4f test_logloss: %4f, test acc: %4f%% Time: %s' \
              % (e + 1, nb_epoch, np.mean(np.array(l_train_loss), 0)[0], test_logloss, test_acc*100, time.time() - start))

        weights_file = 'weights/Baseline-cifar100-40-12-tf-'+str(e)+'.h5'
        if e%5 ==0:
            model.save_weights(weights_file)

        d_log = {}
        d_log["batch_size"] = batch_size
        d_log["nb_epoch"] = nb_epoch
        d_log["optimizer"] = opt.get_config()
        d_log["train_loss"] = list_train_loss
        d_log["test_loss"] = list_test_loss
        d_log["learning_rate"] = list_learning_rate

        json_file = os.path.join('./log/Baseline_log_cifar100.json')
        with open(json_file, 'w') as fp:
            json.dump(d_log, fp, indent=4, sort_keys=True)
예제 #8
0
class TrainGradientBased(TrainNN):
    """ Train an artificial neural network
    """
    def __init__(self,
                 model_filename="trained-model.hdf5",
                 optimizer='Adam',
                 optimizer_params={'learning_rate': 5e-5},
                 monitor='val_loss',
                 min_delta=1e-5,
                 patience=50,
                 metrics=['mae', 'mse', 'msle', 'mape'],
                 seed=0,
                 verbose=0,
                 **kwargs):
        super().__init__(seed=seed, verbose=verbose, **kwargs)
        self.checkpointer = ModelCheckpoint(filepath=model_filename,
                                            verbose=verbose,
                                            save_best_only=True)

        if optimizer == 'Adadelta':
            self.optimizer = Adadelta(**optimizer_params)
        elif optimizer == 'Adagrad':
            self.optimizer = Adagrad(**optimizer_params)
        elif optimizer == 'Adam':
            self.optimizer = Adam(**optimizer_params)
        elif optimizer == 'Adamax':
            self.optimizer = Adamax(**optimizer_params)
        elif optimizer == 'Nadam':
            self.optimizer = Nadam(**optimizer_params)
        elif optimizer == 'RMSprop':
            self.optimizer = RMSprop(**optimizer_params)
        elif optimizer == 'SGD':
            self.optimizer = SGD(**optimizer_params)
        else:
            raise Exception("Unknown optimizer ", optimizer)

        if self.verbose > 1:
            print("Optimizer ", optimizer, str(self.optimizer.get_config()))
        self.early_stopping = EarlyStopping(monitor=monitor,
                                            min_delta=min_delta,
                                            patience=patience,
                                            verbose=verbose,
                                            mode='auto')
        self.metrics = metrics

    def train(self,
              train_dataset,
              validation_dataset=None,
              validation_steps=None,
              epochs=100,
              steps_per_epoch=None,
              loss='mean_squared_error',
              **kwargs):
        self.model.compile(loss=loss,
                           optimizer=self.optimizer,
                           metrics=self.metrics)
        self.trainable_count = int(
            np.sum([
                K.count_params(p) for p in list(self.model.trainable_weights)
            ]))
        start = time.time()
        if self.verbose:
            print('Start training (', start, ')')
        verb = 0
        if self.verbose > 1:
            verb = 1
        elif self.verbose == 1:
            verb = 2
        self.model.fit_generator(
            train_dataset,
            validation_data=validation_dataset,
            validation_steps=validation_steps,
            steps_per_epoch=steps_per_epoch,
            epochs=epochs,
            callbacks=[self.early_stopping, self.checkpointer],
            verbose=verb)
        train_time = time.time() - start
        if self.verbose:
            print('Finish trainning. Total time: ', train_time)
        return {
            'trainable_vars': self.trainable_count,
            'training_time': train_time
        }

    def evaluate(self, test_dataset, **kwargs):
        values = self.model.evaluate_generator(test_dataset)
        metrics_dict = dict(zip(self.metrics, values[1:]))
        prediction = self.model.predict_generator(test_dataset)
        return metrics_dict, prediction
예제 #9
0
파일: train.py 프로젝트: soumyapal96/CDSGD
def train(model_name, **kwargs):
    """
    Train model

    args: model_name (str, keras model name)
          **kwargs (dict) keyword arguments that specify the model hyperparameters
    """

    # Roll out the parameters
    batch_size = kwargs["batch_size"]
    nb_epoch = kwargs["nb_epoch"]
    dataset = kwargs["dataset"]
    optimizer = kwargs["optimizer"]
    experiment_name = kwargs["experiment_name"]
    n_agents = kwargs["n_agents"]
    communication_period = kwargs["communication_period"]
    sparsity = kwargs["sparsity"]

    if dataset == "cifar10":
        (X_train, y_train), (X_test, y_test) = cifar10.load_data()

    if dataset == "cifar100":
        (X_train, y_train), (X_test, y_test) = cifar100.load_data()
    if dataset == "mnist":
        (X_train, y_train), (X_test, y_test) = mnist.load_data()
        X_train = X_train.reshape((X_train.shape[0], 1, 28, 28))
        X_test = X_test.reshape((X_test.shape[0], 1, 28, 28))
    if dataset != "cifar10_non_iid":
        X_train = X_train.astype('float32')
        X_test = X_test.astype('float32')
        X_train /= 255.
        X_test /= 255.

    if dataset == "cifar10_non_iid":
        (X_train, y_train), (X_test, y_test) = cifar10.load_data()
        img_dim = X_train.shape[-3:]
        nb_classes = len(np.unique(y_train))
        X_test = X_test.astype('float32')
        X_test /= 255.
        Y_test = np_utils.to_categorical(y_test, nb_classes)
        X_train_c = [0 for nb in range(nb_classes)]
        y_train_c = [0 for nb in range(nb_classes)]
        for select in range(nb_classes):
            indices = np.argwhere(y_train == select)
            X_temp = X_train[indices[:, 0], :, :, :].astype('float32') / 255.
            y_temp = y_train[indices[:, 0]]
            X_train_c[select] = X_temp
            y_train_c[select] = np_utils.to_categorical(y_temp, nb_classes)
        X_train = X_train.astype('float32')
        X_train /= 255.
        Y_train = np_utils.to_categorical(y_train, nb_classes)

    if (dataset != "cifar10_non_iid"):
        img_dim = X_train.shape[-3:]
        nb_classes = len(np.unique(y_train))

        # convert class vectors to binary class matrices
        Y_train = np_utils.to_categorical(y_train, nb_classes)
        Y_test = np_utils.to_categorical(y_test, nb_classes)

    if (optimizer == "CDSGD") or (optimizer == "CDMSGD") or (
            optimizer == "EASGD") or (optimizer == "FASGD"):
        if dataset != "cifar10_non_iid":
            # Slice the Data into the agents
            ins = [X_train, Y_train]
            num_train_samples = ins[0].shape[0]
            agent_data_size = int(num_train_samples / n_agents)
            index_array = np.arange(num_train_samples)
            agent_batches = _make_batches(num_train_samples, agent_data_size)
            X_agent_ins = []
            Y_agent_ins = []
            for agent_index, (batch_start,
                              batch_end) in enumerate(agent_batches):
                agent_ids = index_array[batch_start:batch_end]
                temp_ins = _slice_arrays(ins, agent_ids)
                X_agent_ins.append(temp_ins[0])
                Y_agent_ins.append(temp_ins[1])
        else:
            X_agent_ins = []
            Y_agent_ins = []
            class_per_agent = int(nb_classes / n_agents)
            for nb in range(n_agents):
                for select in range(class_per_agent):
                    if select == 0:
                        X_temp = X_train_c[class_per_agent * nb + select]
                        y_temp = y_train_c[class_per_agent * nb + select]
                    else:
                        X_temp = np.concatenate(
                            (X_temp, X_train_c[class_per_agent * nb + select]),
                            axis=0)
                        y_temp = np.concatenate(
                            (y_temp, y_train_c[class_per_agent * nb + select]),
                            axis=0)
                print(y_temp.shape)
                X_agent_ins.append(X_temp)
                Y_agent_ins.append(y_temp)

    if optimizer == "CDSGD":
        pi = np.ones((n_agents, n_agents))
        degree = n_agents
        degreeval = 1 / n_agents

        if sparsity == True:
            pi = np.asarray([[0.34, 0.33, 0., 0., 0.33],
                             [0.33, 0.34, 0.33, 0., 0.],
                             [0., 0.33, 0.34, 0.33, 0.],
                             [0., 0., 0.33, 0.34, 0.33],
                             [0.33, 0., 0., 0.33, 0.34]])
            # for nb in range(n_agents*n_agents):
            #     m1=np.random.randint(n_agents)
            #     n1=np.random.randint(n_agents)
            #     if (m1!=n1):
            print(pi)
        else:
            pi = degreeval * np.ones((n_agents, n_agents))

        print(pi)
        model = models.load(model_name, img_dim, nb_classes)
        # model.summary()
        model_json = model.to_json()
        with open("model.json", "w") as json_file:
            json_file.write(model_json)
        # serialize weights to HDF5
        model.save_weights("model0.h5")
        del model
        agentmodels = [0 for nb in range(n_agents)]
        for nb in range(n_agents):
            json_file = open('model.json', 'r')
            loaded_model_json = json_file.read()
            json_file.close()
            agentmodels[nb] = model_from_json(loaded_model_json)
            # load weights into new model
            agentmodels[nb].load_weights("model0.h5")
    elif optimizer == "CDMSGD":
        pi = np.ones((n_agents, n_agents))
        degree = n_agents
        degreeval = 1 / n_agents

        if sparsity == True:
            pi = np.asarray([[0.34, 0.33, 0., 0., 0.33],
                             [0.33, 0.34, 0.33, 0., 0.],
                             [0., 0.33, 0.34, 0.33, 0.],
                             [0., 0., 0.33, 0.34, 0.33],
                             [0.33, 0., 0., 0.33, 0.34]])
            # for nb in range(n_agents*n_agents):
            #     m1=np.random.randint(n_agents)
            #     n1=np.random.randint(n_agents)
            #     if (m1!=n1):
            # print(pi)
        else:
            pi = degreeval * np.ones((n_agents, n_agents))

        print(pi)
        model = models.load(model_name, img_dim, nb_classes)
        # model.summary()
        model_json = model.to_json()
        with open("model.json", "w") as json_file:
            json_file.write(model_json)
        # serialize weights to HDF5
        model.save_weights("model0.h5")
        del model
        agentmodels = [0 for nb in range(n_agents)]
        for nb in range(n_agents):
            json_file = open('model.json', 'r')
            loaded_model_json = json_file.read()
            json_file.close()
            agentmodels[nb] = model_from_json(loaded_model_json)
            # load weights into new model
            agentmodels[nb].load_weights("model0.h5")
    elif optimizer == "EASGD":
        model = models.load(model_name, img_dim, nb_classes)
        model.summary()
        model_json = model.to_json()
        with open("model.json", "w") as json_file:
            json_file.write(model_json)
        model.save_weights("model_EASGD.h5")

        agentmodels = [0 for nb in range(n_agents)]
        for nb in range(n_agents):
            json_file = open('model.json', 'r')
            loaded_model_json = json_file.read()
            json_file.close()
            agentmodels[nb] = model_from_json(loaded_model_json)
            # load weights into new model
            agentmodels[nb].load_weights("model_EASGD.h5")
    elif optimizer == "FASGD":
        model = models.load(model_name, img_dim, nb_classes)
        # model.summary()
        model_json = model.to_json()
        with open("model.json", "w") as json_file:
            json_file.write(model_json)
        # serialize weights to HDF5
        model.save_weights("model0.h5")
        del model
        agentmodels = [0 for nb in range(n_agents)]
        for nb in range(n_agents):
            json_file = open('model.json', 'r')
            loaded_model_json = json_file.read()
            json_file.close()
            agentmodels[nb] = model_from_json(loaded_model_json)
            # load weights into new model
            agentmodels[nb].load_weights("model0.h5")
    else:
        model = models.load(model_name, img_dim, nb_classes)
    # Compile model.
    if optimizer == "SGD":
        opt = SGD(lr=1E-2, decay=0, momentum=0.0, nesterov=False)
        model.compile(optimizer=opt,
                      loss="categorical_crossentropy",
                      metrics=["accuracy"])
        model.summary()
    elif optimizer == "MSGD":
        opt = SGD(lr=1E-2, decay=0, momentum=0.95, nesterov=True)
        model.compile(optimizer=opt,
                      loss="categorical_crossentropy",
                      metrics=["accuracy"])
        model.summary()
    elif optimizer == "Adam":
        opt = Adam(lr=1E-4,
                   beta_1=0.9,
                   beta_2=0.999,
                   epsilon=1e-08,
                   decay=1E-4)
        model.compile(optimizer=opt,
                      loss="categorical_crossentropy",
                      metrics=["accuracy"])
        model.summary()
    elif optimizer == "CDSGD":
        opt = [0 for nb in range(n_agents)]
        agentmodels = model_compilers_cdsgd(agentmodels, n_agents, optimizer,
                                            pi, opt)
    elif optimizer == "CDMSGD":
        opt = [0 for nb in range(n_agents)]
        agentmodels = model_compilers_cdmsgd(agentmodels, n_agents, optimizer,
                                             pi, opt)
    elif optimizer == "EASGD":
        opt = [0 for nb in range(n_agents)]
        agentmodels = model_compilers_easgd(agentmodels, n_agents,
                                            communication_period, optimizer,
                                            opt)
    elif optimizer == "FASGD":
        opt = [0 for nb in range(n_agents)]
        agentmodels = model_compilers_fasgd(agentmodels, n_agents, opt)

    train_losses, train_accs = [], []
    val_losses, val_accs = [], []
    agent_training_loss_history = [[] for nb in range(n_agents)]
    agent_validation_loss_history = [[] for nb in range(n_agents)]
    agent_training_acc_history = [[] for nb in range(n_agents)]
    agent_validation_acc_history = [[] for nb in range(n_agents)]

    if (optimizer == "CDSGD") or (optimizer == "CDMSGD") or (
            optimizer == "EASGD") or (optimizer == "FASGD"):
        training_loss = np.zeros(n_agents)
        training_acc = np.zeros(n_agents)
        validation_loss = np.zeros(n_agents)
        validation_acc = np.zeros(n_agents)
    communication_count = 0
    for e in range(nb_epoch):
        if (optimizer == "CDSGD") or (optimizer == "CDMSGD") or (
                optimizer == "EASGD") or (optimizer == "FASGD"):
            for nb in range(n_agents):
                loss = agentmodels[nb].fit(X_agent_ins[nb],
                                           Y_agent_ins[nb],
                                           batch_size=batch_size,
                                           validation_split=0.0,
                                           epochs=1,
                                           verbose=0)
            for nb in range(n_agents):
                training_score = agentmodels[nb].evaluate(X_train,
                                                          Y_train,
                                                          verbose=0,
                                                          batch_size=512)
                #print(training_score)
                validation_score = agentmodels[nb].evaluate(X_test,
                                                            Y_test,
                                                            verbose=0,
                                                            batch_size=512)
                training_loss[nb] = training_score[0]
                training_acc[nb] = training_score[1]
                validation_loss[nb] = validation_score[0]
                validation_acc[nb] = validation_score[1]
            train_losses.append(np.average(training_loss))
            val_losses.append(np.average(validation_loss))
            train_accs.append(np.average(training_acc))
            val_accs.append(np.average(validation_acc))
            for nb in range(n_agents):
                agent_training_loss_history[nb].append(training_loss[nb])
                agent_validation_loss_history[nb].append(validation_loss[nb])
                agent_training_acc_history[nb].append(training_acc[nb])
                agent_validation_acc_history[nb].append(validation_acc[nb])

            print("epoch", (e + 1),
                  "is completed with following metrics:,loss:",
                  np.average(training_loss), "accuracy:",
                  np.average(training_acc), "val_loss",
                  np.average(validation_loss), "val_acc",
                  np.average(validation_acc))
            if (optimizer == "CDSGD") or (optimizer == "CDMSGD"):
                communication_count += 1
                if (communication_count >= communication_period):
                    if (optimizer == "CDMSGD"):
                        update_parameters_cdmsgd(agentmodels, n_agents)
                        print("Agents share their information!")
                    if (optimizer == "CDSGD"):
                        update_parameters_cdsgd(agentmodels, n_agents)
                        print("Agents share their information!")
                    communication_count = 0
            elif (optimizer == "EASGD"):
                update_epoch()
            elif (optimizer == "FASGD"):
                agentmodels = update_mean_parameters(agentmodels, n_agents)

        else:
            loss = model.fit(X_train,
                             Y_train,
                             batch_size=batch_size,
                             validation_data=(X_test, Y_test),
                             epochs=1,
                             verbose=0)
            train_losses.append(loss.history["loss"])
            val_losses.append(loss.history["val_loss"])
            train_accs.append(loss.history["acc"])
            val_accs.append(loss.history["val_acc"])
            print("epoch", (e + 1),
                  "is completed with following metrics:,loss:",
                  loss.history["loss"], "accuracy:", loss.history["acc"],
                  "val_loss", loss.history["val_loss"], "val_acc",
                  loss.history["val_acc"])

        # Save experimental log
        d_log = {}
        Agent_log = {}
        if (optimizer == "CDSGD") or (optimizer == "CDMSGD") or (
                optimizer == "EASGD") or (optimizer == "FASGD"):
            d_log["experiment_name"] = experiment_name + '_' + str(
                n_agents) + 'Agents'
            for nb in range(n_agents):
                Agent_log["Agent%s training loss" %
                          nb] = agent_training_loss_history[nb]
                Agent_log["Agent%s validation loss" %
                          nb] = agent_validation_loss_history[nb]
                Agent_log["Agent%s training acc" %
                          nb] = agent_training_acc_history[nb]
                Agent_log["Agent%s validation acc" %
                          nb] = agent_validation_acc_history[nb]
        else:
            d_log["experiment_name"] = experiment_name
        d_log["img_dim"] = img_dim
        d_log["batch_size"] = batch_size
        d_log["nb_epoch"] = nb_epoch
        d_log["train_losses"] = train_losses
        d_log["val_losses"] = val_losses
        d_log["train_accs"] = train_accs
        d_log["val_accs"] = val_accs
        if (optimizer == "CDSGD") or (optimizer == "CDMSGD") or (
                optimizer == "EASGD") or (optimizer == "FASGD"):
            d_log["optimizer"] = opt[0].get_config()
            json_string = json.loads(agentmodels[0].to_json())
        else:
            d_log["optimizer"] = opt.get_config()
            json_string = json.loads(model.to_json())
        # Add model architecture

        for key in json_string.keys():
            d_log[key] = json_string[key]
        if (optimizer == "CDSGD") or (optimizer == "CDMSGD") or (
                optimizer == "EASGD") or (optimizer == "FASGD"):
            json_file = os.path.join(
                "log", '%s_%s_%s_%sAgents.json' %
                (dataset, agentmodels[0].name, experiment_name, str(n_agents)))
            json_file1 = os.path.join(
                "log", '%s_%s_%s_%sAgents_history.json' %
                (dataset, agentmodels[0].name, experiment_name, str(n_agents)))
            with open(json_file1, 'w') as fp1:
                json.dump(Agent_log, fp1, indent=4, sort_keys=True)
        else:
            json_file = os.path.join(
                "log",
                '%s_%s_%s.json' % (dataset, model.name, experiment_name))
        with open(json_file, 'w') as fp:
            json.dump(d_log, fp, indent=4, sort_keys=True)
예제 #10
0
def train(model_name, **kwargs):
    """
    Train model

    args: model_name (str, keras model name)
          **kwargs (dict) keyword arguments that specify the model hyperparameters
    """

    # Roll out the parameters
    batch_size = kwargs["batch_size"]
    nb_epoch = kwargs["nb_epoch"]
    # dataset = kwargs["dataset"]
    optimizer = kwargs["optimizer"]
    experiment_name = kwargs["experiment_name"]

    # Compile model.
    if optimizer == "SGD":
        opt = SGD(lr=1E-2, decay=1E-4, momentum=0.9, nesterov=True)
    if optimizer == "Adam":
        opt = Adam(lr=1E-4,
                   beta_1=0.9,
                   beta_2=0.999,
                   epsilon=1e-08,
                   decay=1E-4)
    if optimizer == "Eve":
        opt = Eve(lr=1E-4,
                  decay=1E-4,
                  beta_1=0.9,
                  beta_2=0.999,
                  beta_3=0.999,
                  small_k=0.1,
                  big_K=10,
                  epsilon=1e-08)

    # if dataset == "endovis":
    # (X_train, y_train), (X_test, y_test) = cifar10.load_data()
    dataset = "endovis"
    # if dataset == "cifar100":
    #     (X_train, y_train), (X_test, y_test) = cifar100.load_data()
    # if dataset == "mnist":
    #     (X_train, y_train), (X_test, y_test) = mnist.load_data()
    #     X_train = X_train.reshape((X_train.shape[0], 1, 28, 28))
    #     X_test = X_test.reshape((X_test.shape[0], 1, 28, 28))
    #
    # X_train = X_train.astype('float32')
    # X_test = X_test.astype('float32')
    # X_train /= 255.
    # X_test /= 255.
    #
    # img_dim = X_train.shape[-3:]
    # nb_classes = len(np.unique(y_train))
    #
    # # convert class vectors to binary class matrices
    # Y_train = np_utils.to_categorical(y_train, nb_classes)
    # Y_test = np_utils.to_categorical(y_test, nb_classes)

    train_dir = 'chest_xray/train'
    valid_dir = 'chest_xray/val'
    test_dir = 'chest_xray/test'
    img_width, img_height = 75, 75
    batch_size = 16
    num_epochs = 2
    filter_size = (3, 3)
    pool_size = (2, 2)
    drop_out_dense = 0.5
    drop_out_conv = 0.25
    padding = 'same'
    img_dim = (img_width, img_height, 1)
    nb_classes = 2

    train_datagen = ImageDataGenerator(rescale=1. / 255,
                                       shear_range=0.2,
                                       zoom_range=0.2,
                                       horizontal_flip=True,
                                       vertical_flip=True)

    train_generator = train_datagen.flow_from_directory(
        train_dir,
        target_size=(img_height, img_width),
        batch_size=batch_size,
        class_mode='categorical',
        color_mode='grayscale')

    validation_datagen = ImageDataGenerator(rescale=1. / 255,
                                            shear_range=0.2,
                                            zoom_range=0.2,
                                            horizontal_flip=True,
                                            vertical_flip=True)
    validation_generator = validation_datagen.flow_from_directory(
        valid_dir,
        target_size=(img_height, img_width),
        batch_size=batch_size,
        class_mode='categorical',
        color_mode='grayscale')

    test_datagen = ImageDataGenerator(rescale=1. / 255)
    test_generator = test_datagen.flow_from_directory(test_dir,
                                                      target_size=(img_height,
                                                                   img_width),
                                                      batch_size=batch_size,
                                                      class_mode='categorical',
                                                      color_mode='grayscale')

    # Compile model
    model = models.load(model_name, img_dim, nb_classes)
    model.compile(optimizer=opt,
                  loss="binary_crossentropy",
                  metrics=["accuracy"])

    train_losses, train_accs = [], []
    val_losses, val_accs = [], []
    model.summary()
    for e in range(nb_epoch):

        loss = model.fit_generator(train_generator,
                                   steps_per_epoch=400,
                                   validation_data=validation_generator,
                                   validation_steps=100,
                                   epochs=1)

        train_losses.append(loss.history["loss"])
        val_losses.append(loss.history["val_loss"])
        train_accs.append(loss.history["acc"])
        val_accs.append(loss.history["val_acc"])

        # Save experimental log
        d_log = {}
        d_log["experiment_name"] = experiment_name
        d_log["img_dim"] = img_dim
        d_log["batch_size"] = batch_size
        d_log["nb_epoch"] = nb_epoch
        d_log["train_losses"] = train_losses
        d_log["val_losses"] = val_losses
        d_log["train_accs"] = train_accs
        d_log["val_accs"] = val_accs
        d_log["optimizer"] = opt.get_config()
        # Add model architecture
        json_string = json.loads(model.to_json())
        for key in json_string.keys():
            d_log[key] = json_string[key]
        json_file = os.path.join(
            "log", '%s_%s_%s.json' % (dataset, model.name, experiment_name))
        with open(json_file, 'w') as fp:
            json.dump(d_log, fp, indent=4, sort_keys=True)
def run_cifar10(batch_size, nb_epoch, depth, nb_dense_block, nb_filter,
                growth_rate, dropout_rate, learning_rate, weight_decay,
                plot_architecture):
    """ Run CIFAR10 experiments

    :param batch_size: int -- batch size
    :param nb_epoch: int -- number of training epochs
    :param depth: int -- network depth
    :param nb_dense_block: int -- number of dense blocks
    :param nb_filter: int -- initial number of conv filter
    :param growth_rate: int -- number of new filters added by conv layers
    :param dropout_rate: float -- dropout rate
    :param learning_rate: float -- learning rate
    :param weight_decay: float -- weight decay
    :param plot_architecture: bool -- whether to plot network architecture

    """

    ###################
    # Data processing #
    ###################

    # the data, shuffled and split between train and test sets
    (X_train, y_train), (X_test, y_test) = cifar10.load_data()

    nb_classes = len(np.unique(y_train))
    img_dim = X_train.shape[1:]

    if K.image_data_format() == "channels_first":
        n_channels = X_train.shape[1]
    else:
        n_channels = X_train.shape[-1]

    # convert class vectors to binary class matrices
    Y_train = np_utils.to_categorical(y_train, nb_classes)
    Y_test = np_utils.to_categorical(y_test, nb_classes)

    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')

    # Normalisation
    X = np.vstack((X_train, X_test))
    # 2 cases depending on the image ordering
    if K.image_data_format() == "channels_first":
        for i in range(n_channels):
            mean = np.mean(X[:, i, :, :])
            std = np.std(X[:, i, :, :])
            X_train[:, i, :, :] = (X_train[:, i, :, :] - mean) / std
            X_test[:, i, :, :] = (X_test[:, i, :, :] - mean) / std

    elif K.image_data_format() == "channels_last":
        for i in range(n_channels):
            mean = np.mean(X[:, :, :, i])
            std = np.std(X[:, :, :, i])
            X_train[:, :, :, i] = (X_train[:, :, :, i] - mean) / std
            X_test[:, :, :, i] = (X_test[:, :, :, i] - mean) / std

    ###################
    # Construct model #
    ###################

    model = densenet.DenseNet(nb_classes,
                              img_dim,
                              depth,
                              nb_dense_block,
                              growth_rate,
                              nb_filter,
                              dropout_rate=dropout_rate,
                              weight_decay=weight_decay)
    # Model output
    model.summary()

    # Build optimizer
    opt0 = Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08)

    model.compile(loss='categorical_crossentropy',
                  optimizer=opt0,
                  metrics=["accuracy"])

    if plot_architecture:
        from keras.utils.vis_utils import plot_model
        plot_model(model,
                   to_file='./figures/densenet_archi.png',
                   show_shapes=True)

    ####################
    # Network training #
    ####################

    print("Training")

    list_train_loss = []
    list_test_loss = []
    list_learning_rate = []
    set_lr = learning_rate

    # First step
    nb_epoch_opt0 = nb_epoch // 10
    for e in range(nb_epoch_opt0):

        if e == int(0.5 * nb_epoch_opt0):
            K.set_value(model.optimizer.lr, np.float32(learning_rate / 10.))

        if e == int(0.75 * nb_epoch_opt0):
            K.set_value(model.optimizer.lr, np.float32(learning_rate / 100.))

        split_size = batch_size
        num_splits = X_train.shape[0] / split_size
        arr_splits = np.array_split(np.arange(X_train.shape[0]), num_splits)

        l_train_loss = []
        start = time.time()

        for batch_idx in arr_splits:

            X_batch, Y_batch = X_train[batch_idx], Y_train[batch_idx]
            _, _ = model.train_on_batch(X_batch, Y_batch)

        train_logloss, train_acc = model.evaluate(X_train,
                                                  Y_train,
                                                  verbose=0,
                                                  batch_size=64)
        print('\ttrain_acc = %.6f' % train_acc)
        #l_train_loss.append([train_logloss, train_acc])

        test_logloss, test_acc = model.evaluate(X_test,
                                                Y_test,
                                                verbose=0,
                                                batch_size=64)
        print('\t\t test_acc = %.6f' % test_acc)

        list_train_loss.append([train_logloss, train_acc])
        list_test_loss.append([test_logloss, test_acc])
        list_learning_rate.append(float(K.get_value(model.optimizer.lr)))
        # to convert numpy array to json serializable
        print('\t\t\tEpoch %s/%s, Time: %s' %
              (e + 1, nb_epoch, time.time() - start))

        #file_name = 'checkpoints/{nb_epoch:04d}-{train_logloss:.3f}-{:.3f}-{train_acc:.3f}-{test_acc:.3f}-lr%.6f-m%d-b%d.h5' % ( set_lr, momentum, batch_size)
        file_name = 'checkpoints/e%d-trl%.3f-tel%.3f-tracc%.3f-teacc%.3f-lr%.6f-m%d-b%d.h5'\
                        % ( e, train_logloss, test_logloss, train_acc, test_acc, set_lr, momentum, batch_size)

        print(file_name)
        model.save(file_name)

        d_log = {}
        d_log["batch_size"] = batch_size
        d_log["nb_epoch"] = nb_epoch
        d_log["optimizer"] = opt0.get_config()
        d_log["train_loss"] = list_train_loss
        d_log["test_loss"] = list_test_loss
        d_log["learning_rate"] = list_learning_rate

        json_file = os.path.join('./log/experiment_log_cifar10.json')
        with open(json_file, 'w') as fp:
            json.dump(d_log, fp, indent=4, sort_keys=True)
    print(
        '########################################Second round#################################'
    )
    opt1 = SGD(lr=learning_rate, momentum=momentum, decay=0.0, nesterov=False)

    model.compile(loss='categorical_crossentropy',
                  optimizer=opt1,
                  metrics=["accuracy"])
    # Second step
    for e in range(nb_epoch):

        # set_lr = np.float32(10 ** ( math.log10(learning_rate) - math.floor( e/500)))
        set_lr = set_lr * 0.99**math.floor(e / 10)
        K.set_value(model.optimizer.lr, set_lr)
        print('set_lr = %.10f' % set_lr)

        split_size = batch_size
        num_splits = X_train.shape[0] / split_size
        arr_splits = np.array_split(np.arange(X_train.shape[0]), num_splits)

        l_train_loss = []
        start = time.time()

        for batch_idx in arr_splits:

            X_batch, Y_batch = X_train[batch_idx], Y_train[batch_idx]
            _, _ = model.train_on_batch(X_batch, Y_batch)

        train_logloss, train_acc = model.evaluate(X_train,
                                                  Y_train,
                                                  verbose=0,
                                                  batch_size=64)
        print('\ttrain_acc = %.6f' % train_acc)
        #l_train_loss.append([train_logloss, train_acc])

        test_logloss, test_acc = model.evaluate(X_test,
                                                Y_test,
                                                verbose=0,
                                                batch_size=64)
        print('\t\t test_acc = %.6f' % test_acc)

        list_train_loss.append([train_logloss, train_acc])
        list_test_loss.append([test_logloss, test_acc])
        list_learning_rate.append(float(K.get_value(model.optimizer.lr)))
        # to convert numpy array to json serializable
        print('\t\t\tEpoch %s/%s, Time: %s' %
              (e + 1, nb_epoch, time.time() - start))

        #file_name = 'checkpoints/{nb_epoch:04d}-{train_logloss:.3f}-{:.3f}-{train_acc:.3f}-{test_acc:.3f}-lr%.6f-m%d-b%d.h5' % ( set_lr, momentum, batch_size)
        file_name = 'checkpoints/e%d-trl%.3f-tel%.3f-tracc%.3f-teacc%.3f-lr%.6f-m%d-b%d.h5'\
                        % ( e, train_logloss, test_logloss, train_acc, test_acc, set_lr, momentum, batch_size)

        print(file_name)
        model.save(file_name)

        d_log = {}
        d_log["batch_size"] = batch_size
        d_log["nb_epoch"] = nb_epoch
        d_log["optimizer"] = opt1.get_config()
        d_log["train_loss"] = list_train_loss
        d_log["test_loss"] = list_test_loss
        d_log["learning_rate"] = list_learning_rate

        json_file = os.path.join('./log/experiment_log_cifar10.json')
        with open(json_file, 'w') as fp:
            json.dump(d_log, fp, indent=4, sort_keys=True)