def cross_validate_inmemory(model_name, **kwargs): """ StateFarm competition: Training set has 26 unique drivers. We do 26 fold CV where a driver is alternatively singled out to be the validation set Load the whole train data in memory for faster operations args: model (keras model) **kwargs (dict) keyword arguments that specify the model hyperparameters """ # Roll out the parameters nb_classes = kwargs["nb_classes"] batch_size = kwargs["batch_size"] n_batch_per_epoch = kwargs["n_batch_per_epoch"] nb_epoch = kwargs["nb_epoch"] prob = kwargs["prob"] do_plot = kwargs["do_plot"] data_file = kwargs["data_file"] semi_super_file = kwargs["semi_super_file"] pretr_weights_file = kwargs["pretr_weights_file"] normalisation_style = kwargs["normalisation_style"] weak_labels = kwargs["weak_labels"] objective = kwargs["objective"] experiment = kwargs["experiment"] start_fold = kwargs["start_fold"] # Load env variables in (in .env file at the root of the project) load_dotenv(find_dotenv()) # Load env variables model_dir = os.path.expanduser(os.environ.get("MODEL_DIR")) data_dir = os.path.expanduser(os.environ.get("DATA_DIR")) # Output path where we store experiment log and weights model_dir = os.path.join(model_dir, model_name) # Create if it does not exist general_utils.create_dir(model_dir) # Automatically determine experiment name list_exp = glob.glob(model_dir + "/*") # Create the experiment dir and weights dir if experiment: exp_dir = os.path.join(model_dir, experiment) else: exp_dir = os.path.join(model_dir, "Experiment_%s" % len(list_exp)) general_utils.create_dir(exp_dir) # Compile model. # opt = RMSprop(lr=5E-6, rho=0.9, epsilon=1e-06) opt = SGD(lr=5e-4, decay=1e-6, momentum=0.9, nesterov=True) # opt = Adam(lr=1E-5, beta_1=0.9, beta_2=0.999, epsilon=1e-08) # Batch generator DataAug = batch_utils.AugDataGenerator(data_file, batch_size=batch_size, prob=prob, dset="train", maxproc=4, num_cached=60, random_augm=False, hdf5_file_semi=semi_super_file) DataAug.add_transform("h_flip") # DataAug.add_transform("v_flip") # DataAug.add_transform("fixed_rot", angle=40) DataAug.add_transform("random_rot", angle=40) # DataAug.add_transform("fixed_tr", tr_x=40, tr_y=40) DataAug.add_transform("random_tr", tr_x=40, tr_y=40) # DataAug.add_transform("fixed_blur", kernel_size=5) DataAug.add_transform("random_blur", kernel_size=5) # DataAug.add_transform("fixed_erode", kernel_size=4) DataAug.add_transform("random_erode", kernel_size=3) # DataAug.add_transform("fixed_dilate", kernel_size=4) DataAug.add_transform("random_dilate", kernel_size=3) # DataAug.add_transform("fixed_crop", pos_x=10, pos_y=10, crop_size_x=200, crop_size_y=200) DataAug.add_transform("random_crop", min_crop_size=140, max_crop_size=160) # DataAug.add_transform("hist_equal") # DataAug.add_transform("random_occlusion", occ_size_x=100, occ_size_y=100) epoch_size = n_batch_per_epoch * batch_size general_utils.pretty_print("Load all data...") with h5py.File(data_file, "r") as hf: X = hf["train_data"][:, :, :, :] y = hf["train_label"][:].astype(np.uint8) y = np_utils.to_categorical(y, nb_classes=nb_classes) # Format for keras try: for fold in range(start_fold, 8): # for fold in np.random.permutation(26): min_valid_loss = 100 # Save losses list_train_loss = [] list_valid_loss = [] # Load valid data in memory for fast error evaluation idx_valid = hf["valid_fold%s" % fold][:] idx_train = hf["train_fold%s" % fold][:] X_valid = X[idx_valid] y_valid = y[idx_valid] # Normalise X_valid = normalisation(X_valid, normalisation_style) # Compile model general_utils.pretty_print("Compiling...") model = models.load(model_name, nb_classes, X_valid.shape[-3:], pretr_weights_file=pretr_weights_file) model.compile(optimizer=opt, loss=objective) # Save architecture json_string = model.to_json() with open(os.path.join(data_dir, '%s_archi.json' % model.name), 'w') as f: f.write(json_string) for e in range(nb_epoch): # Initialize progbar and batch counter progbar = generic_utils.Progbar(epoch_size) batch_counter = 1 l_train_loss = [] start = time.time() for X_train, y_train in DataAug.gen_batch_inmemory(X, y, idx_train=idx_train): if do_plot: general_utils.plot_batch(X_train, np.argmax(y_train, 1), batch_size) # Normalise X_train = normalisation(X_train, normalisation_style) train_loss = model.train_on_batch(X_train, y_train) l_train_loss.append(train_loss) batch_counter += 1 progbar.add(batch_size, values=[("train loss", train_loss)]) if batch_counter >= n_batch_per_epoch: break print("") print('Epoch %s/%s, Time: %s' % (e + 1, nb_epoch, time.time() - start)) y_valid_pred = model.predict(X_valid, verbose=0, batch_size=16) train_loss = float(np.mean(l_train_loss)) # use float to make it json saveable valid_loss = log_loss(y_valid, y_valid_pred) print("Train loss:", train_loss, "valid loss:", valid_loss) list_train_loss.append(train_loss) list_valid_loss.append(valid_loss) # Record experimental data in a dict d_log = {} d_log["fold"] = fold d_log["nb_classes"] = nb_classes d_log["batch_size"] = batch_size d_log["n_batch_per_epoch"] = n_batch_per_epoch d_log["nb_epoch"] = nb_epoch d_log["epoch_size"] = epoch_size d_log["prob"] = prob d_log["optimizer"] = opt.get_config() d_log["augmentator_config"] = DataAug.get_config() d_log["train_loss"] = list_train_loss d_log["valid_loss"] = list_valid_loss json_file = os.path.join(exp_dir, 'experiment_log_fold%s.json' % fold) general_utils.save_exp_log(json_file, d_log) # Only save the best epoch if valid_loss < min_valid_loss: min_valid_loss = valid_loss trained_weights_path = os.path.join(exp_dir, '%s_weights_fold%s.h5' % (model.name, fold)) model.save_weights(trained_weights_path, overwrite=True) except KeyboardInterrupt: pass
from keras.optimizers import SGD op = SGD(lr=0.01) a = op.get_config() print(a)
def run_gtsrb(batch_size, nb_epoch, depth, nb_dense_block, nb_filter, growth_rate, dropout_rate, learning_rate, weight_decay, logfile, plot_architecture): """ Run GTSRB experiments :param batch_size: int -- batch size :param nb_epoch: int -- number of training epochs :param depth: int -- network depth :param nb_dense_block: int -- number of dense blocks :param nb_filter: int -- initial number of conv filter :param growth_rate: int -- number of new filters added by conv layers :param dropout_rate: float -- dropout rate :param learning_rate: float -- learning rate :param weight_decay: float -- weight decay :param plot_architecture: bool -- whether to plot network architecture """ ################### # Data processing # ################### tr_x = np.load(os.path.join(DATASET_DIR, 'rgb_train_in.npy')) tr_y = np.load(os.path.join(DATASET_DIR, 'rgb_train_out.npy')) te_x = np.load(os.path.join(DATASET_DIR, 'rgb_test_in.npy')) te_y = np.load(os.path.join(DATASET_DIR, 'rgb_test_out.npy')) va_x = np.load(os.path.join(DATASET_DIR, 'rgb_valid_in.npy')) va_y = np.load(os.path.join(DATASET_DIR, 'rgb_valid_out.npy')) X_train = tr_x Y_train = tr_y X_test = np.vstack((te_x, va_x)) Y_test = np.vstack((te_y, va_y)) nb_classes = Y_train.shape[1] img_dim = X_train.shape[1:] if K.image_data_format() == "channels_first": n_channels = X_train.shape[1] else: n_channels = X_train.shape[-1] X_train = X_train.astype('float32') X_test = X_test.astype('float32') # Normalisation X = np.vstack((X_train, X_test)) # 2 cases depending on the image ordering if K.image_data_format() == "channels_first": for i in range(n_channels): mean = np.mean(X[:, i, :, :]) std = np.std(X[:, i, :, :]) X_train[:, i, :, :] = (X_train[:, i, :, :] - mean) / std X_test[:, i, :, :] = (X_test[:, i, :, :] - mean) / std elif K.image_data_format() == "channels_last": for i in range(n_channels): mean = np.mean(X[:, :, :, i]) std = np.std(X[:, :, :, i]) X_train[:, :, :, i] = (X_train[:, :, :, i] - mean) / std X_test[:, :, :, i] = (X_test[:, :, :, i] - mean) / std ################### # Construct model # ################### model = densenet.DenseNet(nb_classes, img_dim, depth, nb_dense_block, growth_rate, nb_filter, dropout_rate=dropout_rate, weight_decay=weight_decay) # Model output model.summary() # Build optimizer # opt = Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08) opt = SGD(lr=learning_rate, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=["accuracy"]) if plot_architecture: from keras.utils.visualize_util import plot plot(model, to_file='./figures/densenet_archi.png', show_shapes=True) #################### # Network training # #################### print("Training") list_train_loss = [] list_test_loss = [] list_learning_rate = [] datagen = ImageDataGenerator() for e in range(nb_epoch): if e == int(0.5 * nb_epoch): K.set_value(model.optimizer.lr, np.float32(learning_rate / 10.)) if e == int(0.75 * nb_epoch): K.set_value(model.optimizer.lr, np.float32(learning_rate / 100.)) l_train_loss = [] start = time.time() model.fit_generator(datagen.flow(X_train, Y_train, batch_size), epochs=1) test_logloss, test_acc = model.evaluate(X_test, Y_test, verbose=1, batch_size=64) list_test_loss.append([test_logloss, test_acc]) list_learning_rate.append(float(K.get_value(model.optimizer.lr))) # to convert numpy array to json serializable print('Epoch %s/%s, Time: %s' % (e + 1, nb_epoch, time.time() - start)) d_log = {} d_log["batch_size"] = batch_size d_log["nb_epoch"] = nb_epoch d_log["optimizer"] = opt.get_config() # d_log["train_loss"] = list_train_loss d_log["test_loss"] = list_test_loss d_log["learning_rate"] = list_learning_rate json_file = os.path.join('./log', logfile) with open(json_file, 'w') as fp: json.dump(d_log, fp, indent=4, sort_keys=True)
# break # val_loss, val_acc,val_f2_score = model.evaluate(X_val, # y_val, # verbose=1, # batch_size=batch_size) # list_test_loss.append([val_loss, val_acc,val_f2_score]) list_learning_rate.append(float(K.get_value(model.optimizer.lr))) # to convert numpy array to json serializable print('Epoch %s/%s, Time: %s' % (e + 1, epochs, time.time() - start)) d_log = {} d_log["batch_size"] = batch_size d_log["nb_epoch"] = epochs d_log["optimizer"] = optimizer.get_config() d_log["train_loss"] = list_train_loss d_log["test_loss"] = list_val_loss d_log["learning_rate"] = list_learning_rate json_file = os.path.join('./logs/experiment_Planet_Densenet.json') with open(json_file, 'w') as fp: json.dump(d_log, fp, indent=4, sort_keys=True) model.save('last-epoch-model-val.h5') # early stopping if val_loss > val_loss_last: wait += 1 if wait == 2: break
def train(model_name, **kwargs): """ Train model args: model_name (str, keras model name) **kwargs (dict) keyword arguments that specify the model hyperparameters """ # Roll out the parameters batch_size = kwargs["batch_size"] nb_epoch = kwargs["nb_epoch"] dataset = kwargs["dataset"] optimizer = kwargs["optimizer"] experiment_name = kwargs["experiment_name"] # Compile model. if optimizer == "SGD": opt = SGD(lr=1E-2, decay=1E-4, momentum=0.9, nesterov=True) if optimizer == "Adam": opt = Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=1E-4) if optimizer == "Eve": opt = Eve(lr=1E-4, decay=1E-4, beta_1=0.9, beta_2=0.999, beta_3=0.999, small_k=0.1, big_K=10, epsilon=1e-08) if dataset == "cifar10": (X_train, y_train), (X_test, y_test) = cifar10.load_data() if dataset == "cifar100": (X_train, y_train), (X_test, y_test) = cifar100.load_data() if dataset == "mnist": (X_train, y_train), (X_test, y_test) = mnist.load_data() X_train = X_train.reshape((X_train.shape[0], 1, 28, 28)) X_test = X_test.reshape((X_test.shape[0], 1, 28, 28)) X_train = X_train.astype('float32') X_test = X_test.astype('float32') X_train /= 255. X_test /= 255. img_dim = X_train.shape[-3:] nb_classes = len(np.unique(y_train)) # convert class vectors to binary class matrices Y_train = np_utils.to_categorical(y_train, nb_classes) Y_test = np_utils.to_categorical(y_test, nb_classes) # Compile model model = models.load(model_name, img_dim, nb_classes) model.compile(optimizer=opt, loss="categorical_crossentropy", metrics=["accuracy"]) train_losses, train_accs = [], [] val_losses, val_accs = [], [] for e in range(nb_epoch): loss = model.fit(X_train, Y_train, batch_size=batch_size, validation_data=(X_test, Y_test), epochs=1) train_losses.append(loss.history["loss"]) val_losses.append(loss.history["val_loss"]) train_accs.append(loss.history["acc"]) val_accs.append(loss.history["val_acc"]) # Save experimental log d_log = {} d_log["experiment_name"] = experiment_name d_log["img_dim"] = img_dim d_log["batch_size"] = batch_size d_log["nb_epoch"] = nb_epoch d_log["train_losses"] = train_losses d_log["val_losses"] = val_losses d_log["train_accs"] = train_accs d_log["val_accs"] = val_accs d_log["optimizer"] = opt.get_config() # Add model architecture json_string = json.loads(model.to_json()) for key in json_string.keys(): d_log[key] = json_string[key] json_file = os.path.join("log", '%s_%s_%s.json' % (dataset, model.name, experiment_name)) with open(json_file, 'w') as fp: json.dump(d_log, fp, indent=4, sort_keys=True)
def cross_validate_inmemory(model_name, **kwargs): """ StateFarm competition: Training set has 26 unique drivers. We do 26 fold CV where a driver is alternatively singled out to be the validation set Load the whole train data in memory for faster operations args: model (keras model) **kwargs (dict) keyword arguments that specify the model hyperparameters """ # Roll out the parameters nb_classes = kwargs["nb_classes"] batch_size = kwargs["batch_size"] n_batch_per_epoch = kwargs["n_batch_per_epoch"] nb_epoch = kwargs["nb_epoch"] prob = kwargs["prob"] do_plot = kwargs["do_plot"] data_file = kwargs["data_file"] semi_super_file = kwargs["semi_super_file"] pretr_weights_file = kwargs["pretr_weights_file"] normalisation_style = kwargs["normalisation_style"] weak_labels = kwargs["weak_labels"] objective = kwargs["objective"] experiment = kwargs["experiment"] start_fold = kwargs["start_fold"] # Load env variables in (in .env file at the root of the project) load_dotenv(find_dotenv()) # Load env variables model_dir = os.path.expanduser(os.environ.get("MODEL_DIR")) data_dir = os.path.expanduser(os.environ.get("DATA_DIR")) # Output path where we store experiment log and weights model_dir = os.path.join(model_dir, model_name) # Create if it does not exist general_utils.create_dir(model_dir) # Automatically determine experiment name list_exp = glob.glob(model_dir + "/*") # Create the experiment dir and weights dir if experiment: exp_dir = os.path.join(model_dir, experiment) else: exp_dir = os.path.join(model_dir, "Experiment_%s" % len(list_exp)) general_utils.create_dir(exp_dir) # Compile model. # opt = RMSprop(lr=5E-6, rho=0.9, epsilon=1e-06) opt = SGD(lr=5e-4, decay=1e-6, momentum=0.9, nesterov=True) # opt = Adam(lr=1E-5, beta_1=0.9, beta_2=0.999, epsilon=1e-08) # Batch generator DataAug = batch_utils.AugDataGenerator(data_file, batch_size=batch_size, prob=prob, dset="train", maxproc=4, num_cached=60, random_augm=False, hdf5_file_semi=semi_super_file) DataAug.add_transform("h_flip") # DataAug.add_transform("v_flip") # DataAug.add_transform("fixed_rot", angle=40) DataAug.add_transform("random_rot", angle=40) # DataAug.add_transform("fixed_tr", tr_x=40, tr_y=40) DataAug.add_transform("random_tr", tr_x=40, tr_y=40) # DataAug.add_transform("fixed_blur", kernel_size=5) DataAug.add_transform("random_blur", kernel_size=5) # DataAug.add_transform("fixed_erode", kernel_size=4) DataAug.add_transform("random_erode", kernel_size=3) # DataAug.add_transform("fixed_dilate", kernel_size=4) DataAug.add_transform("random_dilate", kernel_size=3) # DataAug.add_transform("fixed_crop", pos_x=10, pos_y=10, crop_size_x=200, crop_size_y=200) DataAug.add_transform("random_crop", min_crop_size=140, max_crop_size=160) # DataAug.add_transform("hist_equal") # DataAug.add_transform("random_occlusion", occ_size_x=100, occ_size_y=100) epoch_size = n_batch_per_epoch * batch_size general_utils.pretty_print("Load all data...") with h5py.File(data_file, "r") as hf: X = hf["train_data"][:, :, :, :] y = hf["train_label"][:].astype(np.uint8) y = np_utils.to_categorical(y, nb_classes=nb_classes) # Format for keras try: for fold in range(start_fold, 8): # for fold in np.random.permutation(26): min_valid_loss = 100 # Save losses list_train_loss = [] list_valid_loss = [] # Load valid data in memory for fast error evaluation idx_valid = hf["valid_fold%s" % fold][:] idx_train = hf["train_fold%s" % fold][:] X_valid = X[idx_valid] y_valid = y[idx_valid] # Normalise X_valid = normalisation(X_valid, normalisation_style) # Compile model general_utils.pretty_print("Compiling...") model = models.load(model_name, nb_classes, X_valid.shape[-3:], pretr_weights_file=pretr_weights_file) model.compile(optimizer=opt, loss=objective) # Save architecture json_string = model.to_json() with open(os.path.join(data_dir, '%s_archi.json' % model.name), 'w') as f: f.write(json_string) for e in range(nb_epoch): # Initialize progbar and batch counter progbar = generic_utils.Progbar(epoch_size) batch_counter = 1 l_train_loss = [] start = time.time() for X_train, y_train in DataAug.gen_batch_inmemory( X, y, idx_train=idx_train): if do_plot: general_utils.plot_batch(X_train, np.argmax(y_train, 1), batch_size) # Normalise X_train = normalisation(X_train, normalisation_style) train_loss = model.train_on_batch(X_train, y_train) l_train_loss.append(train_loss) batch_counter += 1 progbar.add(batch_size, values=[("train loss", train_loss)]) if batch_counter >= n_batch_per_epoch: break print("") print('Epoch %s/%s, Time: %s' % (e + 1, nb_epoch, time.time() - start)) y_valid_pred = model.predict(X_valid, verbose=0, batch_size=16) train_loss = float(np.mean( l_train_loss)) # use float to make it json saveable valid_loss = log_loss(y_valid, y_valid_pred) print("Train loss:", train_loss, "valid loss:", valid_loss) list_train_loss.append(train_loss) list_valid_loss.append(valid_loss) # Record experimental data in a dict d_log = {} d_log["fold"] = fold d_log["nb_classes"] = nb_classes d_log["batch_size"] = batch_size d_log["n_batch_per_epoch"] = n_batch_per_epoch d_log["nb_epoch"] = nb_epoch d_log["epoch_size"] = epoch_size d_log["prob"] = prob d_log["optimizer"] = opt.get_config() d_log["augmentator_config"] = DataAug.get_config() d_log["train_loss"] = list_train_loss d_log["valid_loss"] = list_valid_loss json_file = os.path.join( exp_dir, 'experiment_log_fold%s.json' % fold) general_utils.save_exp_log(json_file, d_log) # Only save the best epoch if valid_loss < min_valid_loss: min_valid_loss = valid_loss trained_weights_path = os.path.join( exp_dir, '%s_weights_fold%s.h5' % (model.name, fold)) model.save_weights(trained_weights_path, overwrite=True) except KeyboardInterrupt: pass
def run_cifar100(batch_size, nb_epoch, depth, nb_dense_block, nb_filter, growth_rate, dropout_rate, learning_rate, weight_decay, plot_architecture, compression=0.5, init_from_epoch=0): """ Run CIFAR100 experiments :param batch_size: int -- batch size :param nb_epoch: int -- number of training epochs :param depth: int -- network depth :param nb_dense_block: int -- number of dense blocks :param nb_filter: int -- initial number of conv filter :param growth_rate: int -- number of new filters added by conv layers :param dropout_rate: float -- dropout rate :param learning_rate: float -- learning rate :param weight_decay: float -- weight decay :param plot_architecture: bool -- whether to plot network architecture """ ################### # Data processing # ################### # the data, shuffled and split between train and test sets #(X_train, y_train), (X_test, y_test) = cifar100.load_data() (X_train, y_train), (X_test, y_test) = load_cifar100() nb_classes = len(np.unique(y_train)) img_dim = X_train.shape[1:] if K.image_dim_ordering() == "th": n_channels = X_train.shape[1] else: n_channels = X_train.shape[-1] # convert class vectors to binary class matrices Y_train = np_utils.to_categorical(y_train, nb_classes) Y_test = np_utils.to_categorical(y_test, nb_classes) X_train = X_train.astype('float32') X_test = X_test.astype('float32') # Normalisation X = np.vstack((X_train, X_test)) # 2 cases depending on the image ordering if K.image_dim_ordering() == "th": for i in range(n_channels): mean = np.mean(X[:, i, :, :]) std = np.std(X[:, i, :, :]) X_train[:, i, :, :] = (X_train[:, i, :, :] - mean) / std X_test[:, i, :, :] = (X_test[:, i, :, :] - mean) / std elif K.image_dim_ordering() == "tf": for i in range(n_channels): mean = np.mean(X[:, :, :, i]) std = np.std(X[:, :, :, i]) X_train[:, :, :, i] = (X_train[:, :, :, i] - mean) / std X_test[:, :, :, i] = (X_test[:, :, :, i] - mean) / std print("X_train shape:{}".format(X_train.shape)) ################### # Construct model # ################### model = Baseline.Baseline(nb_classes, img_dim, depth, nb_dense_block, growth_rate, nb_filter, dropout_rate=dropout_rate, weight_decay=weight_decay, compression=0.5) # Model output model.summary() # Build optimizer #opt = Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08) opt = SGD(lr = learning_rate, momentum = 0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=["accuracy"]) if plot_architecture: from keras.utils.visualize_util import plot plot(model, to_file='./figures/Baseline_sum_archi.png', show_shapes=True) #################### # Network training # #################### print("Training") list_train_loss = [] list_test_loss = [] list_learning_rate = [] loglog = [0] lr=learning_rate if init_from_epoch != 0: model_path = 'weights/Baseline_sum-cifar100-40-tf-'+str(init_from_epoch)+'.h5' print('loading wights from %s'%model_path) model.load_weights(model_path) print('traing on batch from epoch %d'%init_from_epoch) for e in range(init_from_epoch,nb_epoch): if e == int(0.5 * nb_epoch): K.set_value(model.optimizer.lr, np.float32(learning_rate / 10.)) if e == int(0.75 * nb_epoch): K.set_value(model.optimizer.lr, np.float32(learning_rate / 100.)) split_size = batch_size num_splits = X_train.shape[0] / split_size arr_splits = np.array_split(np.arange(X_train.shape[0]), num_splits) l_train_loss = [] start = time.time() for batch_nm,batch_idx in enumerate(arr_splits): X_batch, Y_batch = X_train[batch_idx], Y_train[batch_idx] train_logloss, train_acc = model.train_on_batch(X_batch, Y_batch) l_train_loss.append([train_logloss, train_acc]) sys.stdout.write("\rEpoch{},Batch {}/{}:Training logloss:{:.4f}, training accuracy:{:.4f}%"\ .format(e,batch_nm,num_splits,train_logloss,train_acc*100)) test_logloss, test_acc = model.evaluate(X_test, Y_test, verbose=1, batch_size=64) #EarlyStopping # loglog.append(np.mean(np.array(l_train_loss), 0)[0]) # if len(loglog) >= 20 : # if loglog[-1] - loglog[1] >= -0.01: # print("\n\n\nreduce LR\n\n\n") # lr=np.float32(lr / 10.) # print(lr) # K.set_value(model.optimizer.lr, lr) # loglog = [0] # else: # loglog = [0] # print("\n\nNOTICE{}\n\n".format(loglog)) list_train_loss.append(np.mean(np.array(l_train_loss), 0).tolist()) list_test_loss.append([test_logloss, test_acc]) list_learning_rate.append(float(K.get_value(model.optimizer.lr))) # to convert numpy array to json serializable print('\nEpoch %s/%s, training logloss:%4f test_logloss: %4f, test acc: %4f%% Time: %s' \ % (e + 1, nb_epoch, np.mean(np.array(l_train_loss), 0)[0], test_logloss, test_acc*100, time.time() - start)) weights_file = 'weights/Baseline-cifar100-40-12-tf-'+str(e)+'.h5' if e%5 ==0: model.save_weights(weights_file) d_log = {} d_log["batch_size"] = batch_size d_log["nb_epoch"] = nb_epoch d_log["optimizer"] = opt.get_config() d_log["train_loss"] = list_train_loss d_log["test_loss"] = list_test_loss d_log["learning_rate"] = list_learning_rate json_file = os.path.join('./log/Baseline_log_cifar100.json') with open(json_file, 'w') as fp: json.dump(d_log, fp, indent=4, sort_keys=True)
class TrainGradientBased(TrainNN): """ Train an artificial neural network """ def __init__(self, model_filename="trained-model.hdf5", optimizer='Adam', optimizer_params={'learning_rate': 5e-5}, monitor='val_loss', min_delta=1e-5, patience=50, metrics=['mae', 'mse', 'msle', 'mape'], seed=0, verbose=0, **kwargs): super().__init__(seed=seed, verbose=verbose, **kwargs) self.checkpointer = ModelCheckpoint(filepath=model_filename, verbose=verbose, save_best_only=True) if optimizer == 'Adadelta': self.optimizer = Adadelta(**optimizer_params) elif optimizer == 'Adagrad': self.optimizer = Adagrad(**optimizer_params) elif optimizer == 'Adam': self.optimizer = Adam(**optimizer_params) elif optimizer == 'Adamax': self.optimizer = Adamax(**optimizer_params) elif optimizer == 'Nadam': self.optimizer = Nadam(**optimizer_params) elif optimizer == 'RMSprop': self.optimizer = RMSprop(**optimizer_params) elif optimizer == 'SGD': self.optimizer = SGD(**optimizer_params) else: raise Exception("Unknown optimizer ", optimizer) if self.verbose > 1: print("Optimizer ", optimizer, str(self.optimizer.get_config())) self.early_stopping = EarlyStopping(monitor=monitor, min_delta=min_delta, patience=patience, verbose=verbose, mode='auto') self.metrics = metrics def train(self, train_dataset, validation_dataset=None, validation_steps=None, epochs=100, steps_per_epoch=None, loss='mean_squared_error', **kwargs): self.model.compile(loss=loss, optimizer=self.optimizer, metrics=self.metrics) self.trainable_count = int( np.sum([ K.count_params(p) for p in list(self.model.trainable_weights) ])) start = time.time() if self.verbose: print('Start training (', start, ')') verb = 0 if self.verbose > 1: verb = 1 elif self.verbose == 1: verb = 2 self.model.fit_generator( train_dataset, validation_data=validation_dataset, validation_steps=validation_steps, steps_per_epoch=steps_per_epoch, epochs=epochs, callbacks=[self.early_stopping, self.checkpointer], verbose=verb) train_time = time.time() - start if self.verbose: print('Finish trainning. Total time: ', train_time) return { 'trainable_vars': self.trainable_count, 'training_time': train_time } def evaluate(self, test_dataset, **kwargs): values = self.model.evaluate_generator(test_dataset) metrics_dict = dict(zip(self.metrics, values[1:])) prediction = self.model.predict_generator(test_dataset) return metrics_dict, prediction
def train(model_name, **kwargs): """ Train model args: model_name (str, keras model name) **kwargs (dict) keyword arguments that specify the model hyperparameters """ # Roll out the parameters batch_size = kwargs["batch_size"] nb_epoch = kwargs["nb_epoch"] dataset = kwargs["dataset"] optimizer = kwargs["optimizer"] experiment_name = kwargs["experiment_name"] n_agents = kwargs["n_agents"] communication_period = kwargs["communication_period"] sparsity = kwargs["sparsity"] if dataset == "cifar10": (X_train, y_train), (X_test, y_test) = cifar10.load_data() if dataset == "cifar100": (X_train, y_train), (X_test, y_test) = cifar100.load_data() if dataset == "mnist": (X_train, y_train), (X_test, y_test) = mnist.load_data() X_train = X_train.reshape((X_train.shape[0], 1, 28, 28)) X_test = X_test.reshape((X_test.shape[0], 1, 28, 28)) if dataset != "cifar10_non_iid": X_train = X_train.astype('float32') X_test = X_test.astype('float32') X_train /= 255. X_test /= 255. if dataset == "cifar10_non_iid": (X_train, y_train), (X_test, y_test) = cifar10.load_data() img_dim = X_train.shape[-3:] nb_classes = len(np.unique(y_train)) X_test = X_test.astype('float32') X_test /= 255. Y_test = np_utils.to_categorical(y_test, nb_classes) X_train_c = [0 for nb in range(nb_classes)] y_train_c = [0 for nb in range(nb_classes)] for select in range(nb_classes): indices = np.argwhere(y_train == select) X_temp = X_train[indices[:, 0], :, :, :].astype('float32') / 255. y_temp = y_train[indices[:, 0]] X_train_c[select] = X_temp y_train_c[select] = np_utils.to_categorical(y_temp, nb_classes) X_train = X_train.astype('float32') X_train /= 255. Y_train = np_utils.to_categorical(y_train, nb_classes) if (dataset != "cifar10_non_iid"): img_dim = X_train.shape[-3:] nb_classes = len(np.unique(y_train)) # convert class vectors to binary class matrices Y_train = np_utils.to_categorical(y_train, nb_classes) Y_test = np_utils.to_categorical(y_test, nb_classes) if (optimizer == "CDSGD") or (optimizer == "CDMSGD") or ( optimizer == "EASGD") or (optimizer == "FASGD"): if dataset != "cifar10_non_iid": # Slice the Data into the agents ins = [X_train, Y_train] num_train_samples = ins[0].shape[0] agent_data_size = int(num_train_samples / n_agents) index_array = np.arange(num_train_samples) agent_batches = _make_batches(num_train_samples, agent_data_size) X_agent_ins = [] Y_agent_ins = [] for agent_index, (batch_start, batch_end) in enumerate(agent_batches): agent_ids = index_array[batch_start:batch_end] temp_ins = _slice_arrays(ins, agent_ids) X_agent_ins.append(temp_ins[0]) Y_agent_ins.append(temp_ins[1]) else: X_agent_ins = [] Y_agent_ins = [] class_per_agent = int(nb_classes / n_agents) for nb in range(n_agents): for select in range(class_per_agent): if select == 0: X_temp = X_train_c[class_per_agent * nb + select] y_temp = y_train_c[class_per_agent * nb + select] else: X_temp = np.concatenate( (X_temp, X_train_c[class_per_agent * nb + select]), axis=0) y_temp = np.concatenate( (y_temp, y_train_c[class_per_agent * nb + select]), axis=0) print(y_temp.shape) X_agent_ins.append(X_temp) Y_agent_ins.append(y_temp) if optimizer == "CDSGD": pi = np.ones((n_agents, n_agents)) degree = n_agents degreeval = 1 / n_agents if sparsity == True: pi = np.asarray([[0.34, 0.33, 0., 0., 0.33], [0.33, 0.34, 0.33, 0., 0.], [0., 0.33, 0.34, 0.33, 0.], [0., 0., 0.33, 0.34, 0.33], [0.33, 0., 0., 0.33, 0.34]]) # for nb in range(n_agents*n_agents): # m1=np.random.randint(n_agents) # n1=np.random.randint(n_agents) # if (m1!=n1): print(pi) else: pi = degreeval * np.ones((n_agents, n_agents)) print(pi) model = models.load(model_name, img_dim, nb_classes) # model.summary() model_json = model.to_json() with open("model.json", "w") as json_file: json_file.write(model_json) # serialize weights to HDF5 model.save_weights("model0.h5") del model agentmodels = [0 for nb in range(n_agents)] for nb in range(n_agents): json_file = open('model.json', 'r') loaded_model_json = json_file.read() json_file.close() agentmodels[nb] = model_from_json(loaded_model_json) # load weights into new model agentmodels[nb].load_weights("model0.h5") elif optimizer == "CDMSGD": pi = np.ones((n_agents, n_agents)) degree = n_agents degreeval = 1 / n_agents if sparsity == True: pi = np.asarray([[0.34, 0.33, 0., 0., 0.33], [0.33, 0.34, 0.33, 0., 0.], [0., 0.33, 0.34, 0.33, 0.], [0., 0., 0.33, 0.34, 0.33], [0.33, 0., 0., 0.33, 0.34]]) # for nb in range(n_agents*n_agents): # m1=np.random.randint(n_agents) # n1=np.random.randint(n_agents) # if (m1!=n1): # print(pi) else: pi = degreeval * np.ones((n_agents, n_agents)) print(pi) model = models.load(model_name, img_dim, nb_classes) # model.summary() model_json = model.to_json() with open("model.json", "w") as json_file: json_file.write(model_json) # serialize weights to HDF5 model.save_weights("model0.h5") del model agentmodels = [0 for nb in range(n_agents)] for nb in range(n_agents): json_file = open('model.json', 'r') loaded_model_json = json_file.read() json_file.close() agentmodels[nb] = model_from_json(loaded_model_json) # load weights into new model agentmodels[nb].load_weights("model0.h5") elif optimizer == "EASGD": model = models.load(model_name, img_dim, nb_classes) model.summary() model_json = model.to_json() with open("model.json", "w") as json_file: json_file.write(model_json) model.save_weights("model_EASGD.h5") agentmodels = [0 for nb in range(n_agents)] for nb in range(n_agents): json_file = open('model.json', 'r') loaded_model_json = json_file.read() json_file.close() agentmodels[nb] = model_from_json(loaded_model_json) # load weights into new model agentmodels[nb].load_weights("model_EASGD.h5") elif optimizer == "FASGD": model = models.load(model_name, img_dim, nb_classes) # model.summary() model_json = model.to_json() with open("model.json", "w") as json_file: json_file.write(model_json) # serialize weights to HDF5 model.save_weights("model0.h5") del model agentmodels = [0 for nb in range(n_agents)] for nb in range(n_agents): json_file = open('model.json', 'r') loaded_model_json = json_file.read() json_file.close() agentmodels[nb] = model_from_json(loaded_model_json) # load weights into new model agentmodels[nb].load_weights("model0.h5") else: model = models.load(model_name, img_dim, nb_classes) # Compile model. if optimizer == "SGD": opt = SGD(lr=1E-2, decay=0, momentum=0.0, nesterov=False) model.compile(optimizer=opt, loss="categorical_crossentropy", metrics=["accuracy"]) model.summary() elif optimizer == "MSGD": opt = SGD(lr=1E-2, decay=0, momentum=0.95, nesterov=True) model.compile(optimizer=opt, loss="categorical_crossentropy", metrics=["accuracy"]) model.summary() elif optimizer == "Adam": opt = Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=1E-4) model.compile(optimizer=opt, loss="categorical_crossentropy", metrics=["accuracy"]) model.summary() elif optimizer == "CDSGD": opt = [0 for nb in range(n_agents)] agentmodels = model_compilers_cdsgd(agentmodels, n_agents, optimizer, pi, opt) elif optimizer == "CDMSGD": opt = [0 for nb in range(n_agents)] agentmodels = model_compilers_cdmsgd(agentmodels, n_agents, optimizer, pi, opt) elif optimizer == "EASGD": opt = [0 for nb in range(n_agents)] agentmodels = model_compilers_easgd(agentmodels, n_agents, communication_period, optimizer, opt) elif optimizer == "FASGD": opt = [0 for nb in range(n_agents)] agentmodels = model_compilers_fasgd(agentmodels, n_agents, opt) train_losses, train_accs = [], [] val_losses, val_accs = [], [] agent_training_loss_history = [[] for nb in range(n_agents)] agent_validation_loss_history = [[] for nb in range(n_agents)] agent_training_acc_history = [[] for nb in range(n_agents)] agent_validation_acc_history = [[] for nb in range(n_agents)] if (optimizer == "CDSGD") or (optimizer == "CDMSGD") or ( optimizer == "EASGD") or (optimizer == "FASGD"): training_loss = np.zeros(n_agents) training_acc = np.zeros(n_agents) validation_loss = np.zeros(n_agents) validation_acc = np.zeros(n_agents) communication_count = 0 for e in range(nb_epoch): if (optimizer == "CDSGD") or (optimizer == "CDMSGD") or ( optimizer == "EASGD") or (optimizer == "FASGD"): for nb in range(n_agents): loss = agentmodels[nb].fit(X_agent_ins[nb], Y_agent_ins[nb], batch_size=batch_size, validation_split=0.0, epochs=1, verbose=0) for nb in range(n_agents): training_score = agentmodels[nb].evaluate(X_train, Y_train, verbose=0, batch_size=512) #print(training_score) validation_score = agentmodels[nb].evaluate(X_test, Y_test, verbose=0, batch_size=512) training_loss[nb] = training_score[0] training_acc[nb] = training_score[1] validation_loss[nb] = validation_score[0] validation_acc[nb] = validation_score[1] train_losses.append(np.average(training_loss)) val_losses.append(np.average(validation_loss)) train_accs.append(np.average(training_acc)) val_accs.append(np.average(validation_acc)) for nb in range(n_agents): agent_training_loss_history[nb].append(training_loss[nb]) agent_validation_loss_history[nb].append(validation_loss[nb]) agent_training_acc_history[nb].append(training_acc[nb]) agent_validation_acc_history[nb].append(validation_acc[nb]) print("epoch", (e + 1), "is completed with following metrics:,loss:", np.average(training_loss), "accuracy:", np.average(training_acc), "val_loss", np.average(validation_loss), "val_acc", np.average(validation_acc)) if (optimizer == "CDSGD") or (optimizer == "CDMSGD"): communication_count += 1 if (communication_count >= communication_period): if (optimizer == "CDMSGD"): update_parameters_cdmsgd(agentmodels, n_agents) print("Agents share their information!") if (optimizer == "CDSGD"): update_parameters_cdsgd(agentmodels, n_agents) print("Agents share their information!") communication_count = 0 elif (optimizer == "EASGD"): update_epoch() elif (optimizer == "FASGD"): agentmodels = update_mean_parameters(agentmodels, n_agents) else: loss = model.fit(X_train, Y_train, batch_size=batch_size, validation_data=(X_test, Y_test), epochs=1, verbose=0) train_losses.append(loss.history["loss"]) val_losses.append(loss.history["val_loss"]) train_accs.append(loss.history["acc"]) val_accs.append(loss.history["val_acc"]) print("epoch", (e + 1), "is completed with following metrics:,loss:", loss.history["loss"], "accuracy:", loss.history["acc"], "val_loss", loss.history["val_loss"], "val_acc", loss.history["val_acc"]) # Save experimental log d_log = {} Agent_log = {} if (optimizer == "CDSGD") or (optimizer == "CDMSGD") or ( optimizer == "EASGD") or (optimizer == "FASGD"): d_log["experiment_name"] = experiment_name + '_' + str( n_agents) + 'Agents' for nb in range(n_agents): Agent_log["Agent%s training loss" % nb] = agent_training_loss_history[nb] Agent_log["Agent%s validation loss" % nb] = agent_validation_loss_history[nb] Agent_log["Agent%s training acc" % nb] = agent_training_acc_history[nb] Agent_log["Agent%s validation acc" % nb] = agent_validation_acc_history[nb] else: d_log["experiment_name"] = experiment_name d_log["img_dim"] = img_dim d_log["batch_size"] = batch_size d_log["nb_epoch"] = nb_epoch d_log["train_losses"] = train_losses d_log["val_losses"] = val_losses d_log["train_accs"] = train_accs d_log["val_accs"] = val_accs if (optimizer == "CDSGD") or (optimizer == "CDMSGD") or ( optimizer == "EASGD") or (optimizer == "FASGD"): d_log["optimizer"] = opt[0].get_config() json_string = json.loads(agentmodels[0].to_json()) else: d_log["optimizer"] = opt.get_config() json_string = json.loads(model.to_json()) # Add model architecture for key in json_string.keys(): d_log[key] = json_string[key] if (optimizer == "CDSGD") or (optimizer == "CDMSGD") or ( optimizer == "EASGD") or (optimizer == "FASGD"): json_file = os.path.join( "log", '%s_%s_%s_%sAgents.json' % (dataset, agentmodels[0].name, experiment_name, str(n_agents))) json_file1 = os.path.join( "log", '%s_%s_%s_%sAgents_history.json' % (dataset, agentmodels[0].name, experiment_name, str(n_agents))) with open(json_file1, 'w') as fp1: json.dump(Agent_log, fp1, indent=4, sort_keys=True) else: json_file = os.path.join( "log", '%s_%s_%s.json' % (dataset, model.name, experiment_name)) with open(json_file, 'w') as fp: json.dump(d_log, fp, indent=4, sort_keys=True)
def train(model_name, **kwargs): """ Train model args: model_name (str, keras model name) **kwargs (dict) keyword arguments that specify the model hyperparameters """ # Roll out the parameters batch_size = kwargs["batch_size"] nb_epoch = kwargs["nb_epoch"] # dataset = kwargs["dataset"] optimizer = kwargs["optimizer"] experiment_name = kwargs["experiment_name"] # Compile model. if optimizer == "SGD": opt = SGD(lr=1E-2, decay=1E-4, momentum=0.9, nesterov=True) if optimizer == "Adam": opt = Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=1E-4) if optimizer == "Eve": opt = Eve(lr=1E-4, decay=1E-4, beta_1=0.9, beta_2=0.999, beta_3=0.999, small_k=0.1, big_K=10, epsilon=1e-08) # if dataset == "endovis": # (X_train, y_train), (X_test, y_test) = cifar10.load_data() dataset = "endovis" # if dataset == "cifar100": # (X_train, y_train), (X_test, y_test) = cifar100.load_data() # if dataset == "mnist": # (X_train, y_train), (X_test, y_test) = mnist.load_data() # X_train = X_train.reshape((X_train.shape[0], 1, 28, 28)) # X_test = X_test.reshape((X_test.shape[0], 1, 28, 28)) # # X_train = X_train.astype('float32') # X_test = X_test.astype('float32') # X_train /= 255. # X_test /= 255. # # img_dim = X_train.shape[-3:] # nb_classes = len(np.unique(y_train)) # # # convert class vectors to binary class matrices # Y_train = np_utils.to_categorical(y_train, nb_classes) # Y_test = np_utils.to_categorical(y_test, nb_classes) train_dir = 'chest_xray/train' valid_dir = 'chest_xray/val' test_dir = 'chest_xray/test' img_width, img_height = 75, 75 batch_size = 16 num_epochs = 2 filter_size = (3, 3) pool_size = (2, 2) drop_out_dense = 0.5 drop_out_conv = 0.25 padding = 'same' img_dim = (img_width, img_height, 1) nb_classes = 2 train_datagen = ImageDataGenerator(rescale=1. / 255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, vertical_flip=True) train_generator = train_datagen.flow_from_directory( train_dir, target_size=(img_height, img_width), batch_size=batch_size, class_mode='categorical', color_mode='grayscale') validation_datagen = ImageDataGenerator(rescale=1. / 255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, vertical_flip=True) validation_generator = validation_datagen.flow_from_directory( valid_dir, target_size=(img_height, img_width), batch_size=batch_size, class_mode='categorical', color_mode='grayscale') test_datagen = ImageDataGenerator(rescale=1. / 255) test_generator = test_datagen.flow_from_directory(test_dir, target_size=(img_height, img_width), batch_size=batch_size, class_mode='categorical', color_mode='grayscale') # Compile model model = models.load(model_name, img_dim, nb_classes) model.compile(optimizer=opt, loss="binary_crossentropy", metrics=["accuracy"]) train_losses, train_accs = [], [] val_losses, val_accs = [], [] model.summary() for e in range(nb_epoch): loss = model.fit_generator(train_generator, steps_per_epoch=400, validation_data=validation_generator, validation_steps=100, epochs=1) train_losses.append(loss.history["loss"]) val_losses.append(loss.history["val_loss"]) train_accs.append(loss.history["acc"]) val_accs.append(loss.history["val_acc"]) # Save experimental log d_log = {} d_log["experiment_name"] = experiment_name d_log["img_dim"] = img_dim d_log["batch_size"] = batch_size d_log["nb_epoch"] = nb_epoch d_log["train_losses"] = train_losses d_log["val_losses"] = val_losses d_log["train_accs"] = train_accs d_log["val_accs"] = val_accs d_log["optimizer"] = opt.get_config() # Add model architecture json_string = json.loads(model.to_json()) for key in json_string.keys(): d_log[key] = json_string[key] json_file = os.path.join( "log", '%s_%s_%s.json' % (dataset, model.name, experiment_name)) with open(json_file, 'w') as fp: json.dump(d_log, fp, indent=4, sort_keys=True)
def run_cifar10(batch_size, nb_epoch, depth, nb_dense_block, nb_filter, growth_rate, dropout_rate, learning_rate, weight_decay, plot_architecture): """ Run CIFAR10 experiments :param batch_size: int -- batch size :param nb_epoch: int -- number of training epochs :param depth: int -- network depth :param nb_dense_block: int -- number of dense blocks :param nb_filter: int -- initial number of conv filter :param growth_rate: int -- number of new filters added by conv layers :param dropout_rate: float -- dropout rate :param learning_rate: float -- learning rate :param weight_decay: float -- weight decay :param plot_architecture: bool -- whether to plot network architecture """ ################### # Data processing # ################### # the data, shuffled and split between train and test sets (X_train, y_train), (X_test, y_test) = cifar10.load_data() nb_classes = len(np.unique(y_train)) img_dim = X_train.shape[1:] if K.image_data_format() == "channels_first": n_channels = X_train.shape[1] else: n_channels = X_train.shape[-1] # convert class vectors to binary class matrices Y_train = np_utils.to_categorical(y_train, nb_classes) Y_test = np_utils.to_categorical(y_test, nb_classes) X_train = X_train.astype('float32') X_test = X_test.astype('float32') # Normalisation X = np.vstack((X_train, X_test)) # 2 cases depending on the image ordering if K.image_data_format() == "channels_first": for i in range(n_channels): mean = np.mean(X[:, i, :, :]) std = np.std(X[:, i, :, :]) X_train[:, i, :, :] = (X_train[:, i, :, :] - mean) / std X_test[:, i, :, :] = (X_test[:, i, :, :] - mean) / std elif K.image_data_format() == "channels_last": for i in range(n_channels): mean = np.mean(X[:, :, :, i]) std = np.std(X[:, :, :, i]) X_train[:, :, :, i] = (X_train[:, :, :, i] - mean) / std X_test[:, :, :, i] = (X_test[:, :, :, i] - mean) / std ################### # Construct model # ################### model = densenet.DenseNet(nb_classes, img_dim, depth, nb_dense_block, growth_rate, nb_filter, dropout_rate=dropout_rate, weight_decay=weight_decay) # Model output model.summary() # Build optimizer opt0 = Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08) model.compile(loss='categorical_crossentropy', optimizer=opt0, metrics=["accuracy"]) if plot_architecture: from keras.utils.vis_utils import plot_model plot_model(model, to_file='./figures/densenet_archi.png', show_shapes=True) #################### # Network training # #################### print("Training") list_train_loss = [] list_test_loss = [] list_learning_rate = [] set_lr = learning_rate # First step nb_epoch_opt0 = nb_epoch // 10 for e in range(nb_epoch_opt0): if e == int(0.5 * nb_epoch_opt0): K.set_value(model.optimizer.lr, np.float32(learning_rate / 10.)) if e == int(0.75 * nb_epoch_opt0): K.set_value(model.optimizer.lr, np.float32(learning_rate / 100.)) split_size = batch_size num_splits = X_train.shape[0] / split_size arr_splits = np.array_split(np.arange(X_train.shape[0]), num_splits) l_train_loss = [] start = time.time() for batch_idx in arr_splits: X_batch, Y_batch = X_train[batch_idx], Y_train[batch_idx] _, _ = model.train_on_batch(X_batch, Y_batch) train_logloss, train_acc = model.evaluate(X_train, Y_train, verbose=0, batch_size=64) print('\ttrain_acc = %.6f' % train_acc) #l_train_loss.append([train_logloss, train_acc]) test_logloss, test_acc = model.evaluate(X_test, Y_test, verbose=0, batch_size=64) print('\t\t test_acc = %.6f' % test_acc) list_train_loss.append([train_logloss, train_acc]) list_test_loss.append([test_logloss, test_acc]) list_learning_rate.append(float(K.get_value(model.optimizer.lr))) # to convert numpy array to json serializable print('\t\t\tEpoch %s/%s, Time: %s' % (e + 1, nb_epoch, time.time() - start)) #file_name = 'checkpoints/{nb_epoch:04d}-{train_logloss:.3f}-{:.3f}-{train_acc:.3f}-{test_acc:.3f}-lr%.6f-m%d-b%d.h5' % ( set_lr, momentum, batch_size) file_name = 'checkpoints/e%d-trl%.3f-tel%.3f-tracc%.3f-teacc%.3f-lr%.6f-m%d-b%d.h5'\ % ( e, train_logloss, test_logloss, train_acc, test_acc, set_lr, momentum, batch_size) print(file_name) model.save(file_name) d_log = {} d_log["batch_size"] = batch_size d_log["nb_epoch"] = nb_epoch d_log["optimizer"] = opt0.get_config() d_log["train_loss"] = list_train_loss d_log["test_loss"] = list_test_loss d_log["learning_rate"] = list_learning_rate json_file = os.path.join('./log/experiment_log_cifar10.json') with open(json_file, 'w') as fp: json.dump(d_log, fp, indent=4, sort_keys=True) print( '########################################Second round#################################' ) opt1 = SGD(lr=learning_rate, momentum=momentum, decay=0.0, nesterov=False) model.compile(loss='categorical_crossentropy', optimizer=opt1, metrics=["accuracy"]) # Second step for e in range(nb_epoch): # set_lr = np.float32(10 ** ( math.log10(learning_rate) - math.floor( e/500))) set_lr = set_lr * 0.99**math.floor(e / 10) K.set_value(model.optimizer.lr, set_lr) print('set_lr = %.10f' % set_lr) split_size = batch_size num_splits = X_train.shape[0] / split_size arr_splits = np.array_split(np.arange(X_train.shape[0]), num_splits) l_train_loss = [] start = time.time() for batch_idx in arr_splits: X_batch, Y_batch = X_train[batch_idx], Y_train[batch_idx] _, _ = model.train_on_batch(X_batch, Y_batch) train_logloss, train_acc = model.evaluate(X_train, Y_train, verbose=0, batch_size=64) print('\ttrain_acc = %.6f' % train_acc) #l_train_loss.append([train_logloss, train_acc]) test_logloss, test_acc = model.evaluate(X_test, Y_test, verbose=0, batch_size=64) print('\t\t test_acc = %.6f' % test_acc) list_train_loss.append([train_logloss, train_acc]) list_test_loss.append([test_logloss, test_acc]) list_learning_rate.append(float(K.get_value(model.optimizer.lr))) # to convert numpy array to json serializable print('\t\t\tEpoch %s/%s, Time: %s' % (e + 1, nb_epoch, time.time() - start)) #file_name = 'checkpoints/{nb_epoch:04d}-{train_logloss:.3f}-{:.3f}-{train_acc:.3f}-{test_acc:.3f}-lr%.6f-m%d-b%d.h5' % ( set_lr, momentum, batch_size) file_name = 'checkpoints/e%d-trl%.3f-tel%.3f-tracc%.3f-teacc%.3f-lr%.6f-m%d-b%d.h5'\ % ( e, train_logloss, test_logloss, train_acc, test_acc, set_lr, momentum, batch_size) print(file_name) model.save(file_name) d_log = {} d_log["batch_size"] = batch_size d_log["nb_epoch"] = nb_epoch d_log["optimizer"] = opt1.get_config() d_log["train_loss"] = list_train_loss d_log["test_loss"] = list_test_loss d_log["learning_rate"] = list_learning_rate json_file = os.path.join('./log/experiment_log_cifar10.json') with open(json_file, 'w') as fp: json.dump(d_log, fp, indent=4, sort_keys=True)