Beispiel #1
0
    def on_epoch_end(self, epoch, logs={}):
        scores = logs.get('val_acc')
        nb_epoch = self.params['nb_epoch']
        self.val_epochs.append(scores)
        if (epoch + 1 == self.num_init_curve):
            self.action_region, self.grid_St = run_BOS(
                1 - np.array(self.val_epochs), self.incumbent,
                self.params['nb_epoch'])

        if (epoch >= self.num_init_curve) and (epoch < nb_epoch - 1):
            state = np.sum(1 - np.array(self.val_epochs[self.num_init_curve:])
                           ) / (epoch - self.num_init_curve + 1)
            ind_state = np.max(np.nonzero(state > self.grid_St)[0])

            action_to_take = self.action_region[epoch - self.num_init_curve,
                                                ind_state]
            if (action_to_take) == 1 or (scores >= self.incumbent):
                self.stop = 1
                self.model.stop_training = True

            elif action_to_take == 2:
                self.stop = -1
                self.model.stop_training = True

        elif (epoch == nb_epoch - 1):
            self.stop = -1
Beispiel #2
0
def objective_function_LR_MNIST(param,
                                no_stop=False,
                                incumbent=None,
                                bo_iteration=0,
                                stds=[],
                                N=50,
                                N_init_epochs=8):
    '''
        param: parameters
        no_stop: if TRUE, then the function evaluation never early-stops
        incumbent: the currently found maximum value
        bo_iteration: the BO iteration
        stds: the standard deviations corresponding to different input number of epochs; used in the second criteria for early stopping
        N: the maximum number of epochs
        N_init_epochs: the number of initial epochs used in BOS
    '''

    training_epochs = N
    num_init_curve = N_init_epochs
    time_BOS = -1  # the time spent in solving the BOS problem, just for reference

    #### load the MNIST dataset
    loaded_data = pickle.load(open(mnist_path + "mnist_dataset.p", "rb"))
    X_train = loaded_data["X_train"]
    X_test = loaded_data["X_test"]
    Y_train = loaded_data["Y_train"]
    Y_test = loaded_data["Y_test"]
    n_ft, n_classes = X_train.shape[1], Y_train.shape[1]

    # transform the input to the real range of the hyper-parameters, to be used for model training
    parameter_range = [[20, 500], [1e-6, 1.0], [1e-3, 0.10]]
    batch_size_ = param[0]
    batch_size = int(batch_size_ *
                     (parameter_range[0][1] - parameter_range[0][0]) +
                     parameter_range[0][0])
    C_ = param[1]
    C = C_ * (parameter_range[1][1] -
              parameter_range[1][0]) + parameter_range[1][0]
    learning_rate_ = param[2]
    learning_rate = learning_rate_ * (
        parameter_range[2][1] - parameter_range[2][0]) + parameter_range[2][0]

    print("[Evaluating parameters: batch size={0}/C={1}/lr={2}]".format(
        batch_size, C, learning_rate))

    ### The tensorflow model of logistic regression is built below

    # tf Graph Input
    x = tf.placeholder(tf.float32,
                       [None, n_ft])  # mnist data image of shape 28*28=784
    y = tf.placeholder(
        tf.float32, [None, n_classes])  # 0-9 digits recognition => 10 classes

    # Set model weights
    W = tf.Variable(tf.zeros([n_ft, n_classes]))
    b = tf.Variable(tf.zeros([n_classes]))

    # Construct model
    pred = tf.nn.softmax(tf.matmul(x, W) + b)  # Softmax

    regularizers = tf.nn.l2_loss(W)

    # Minimize error using cross entropy
    cost = tf.reduce_mean(
        -tf.reduce_sum(y * tf.log(pred), reduction_indices=1) +
        C * regularizers)
    # Gradient Descent
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

    # Test model
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    neg_log_loss = tf.reduce_mean(
        -tf.reduce_sum(y * tf.log(pred), reduction_indices=1))

    # Initialize the variables (i.e. assign their default value)
    init = tf.global_variables_initializer()

    val_epochs = []
    time_func_eval = []
    with tf.Session(config=config) as sess:
        # Run the initializer
        sess.run(init)
        # iteration over the number of epochs
        for epoch in tqdm(range(training_epochs)):
            avg_cost = 0.0
            total_batch = int(X_train.shape[0] / batch_size)

            # Loop over all batches for SGD
            for i in range(total_batch):
                batch_xs, batch_ys = X_train[(i * batch_size):(
                    (i + 1) *
                    batch_size), :], Y_train[(i * batch_size):((i + 1) *
                                                               batch_size), :]
                _, c = sess.run([optimizer, cost],
                                feed_dict={
                                    x: batch_xs,
                                    y: batch_ys
                                })
                avg_cost += c / total_batch

            # calculate validation loss


#             val_log_loss = neg_log_loss.eval({x:X_test, y:Y_test})
            val_acc = accuracy.eval({x: X_test, y: Y_test})
            val_epochs.append(val_acc)

            time_func_eval.append(time.time())

            # run BOS after observing "num_init_curve" initial number of training epochs
            if (epoch + 1 == num_init_curve) and (not no_stop):
                print("initial learning errors: ", 1 - np.array(val_epochs))
                time_start = time.time()
                action_regions, grid_St = run_BOS(1 - np.array(val_epochs),
                                                  incumbent, training_epochs,
                                                  bo_iteration)
                time_BOS = time.time() - time_start

            # start using the decision rules obtained from BOS
            if (epoch >= num_init_curve) and (not no_stop):
                state = np.sum(1 - np.array(val_epochs[num_init_curve:])) / (
                    epoch - num_init_curve + 1)
                ind_state = np.max(np.nonzero(state > grid_St)[0])
                action_to_take = action_regions[epoch - num_init_curve,
                                                ind_state]

                # condition 1: if action_to_take == 2, then the optimal decision is to stop the current training
                if action_to_take == 2:
                    # condition 2: the second criteria used in the BO-BOS algorithm
                    if (kappa * stds[epoch] >= stds[-1]) or (stds == []):
                        break

    return val_epochs[-1], (
        epoch + 1) / training_epochs, time_BOS, val_epochs, time_func_eval
Beispiel #3
0
def objective_function_CNN_SVHN(param,
                                no_stop=False,
                                incumbent=None,
                                bo_iteration=0,
                                stds=[],
                                N=50,
                                N_init_epochs=8):
    '''
        param: parameters
        no_stop: if TRUE, then the function evaluation never early-stops
        incumbent: the currently found maximum value
        bo_iteration: the BO iteration
        stds: the standard deviations corresponding to different input number of epochs; used in the second criteria for early stopping
        N: the maximum number of epochs
        N_init_epochs: the number of initial epochs used in BOS
    '''

    data_augmentation = True

    training_epochs = N
    num_init_curve = N_init_epochs
    time_BOS = -1  # the time spent in solving the BOS problem, just for reference

    # load the svhn dataset
    train_data = loadmat(svhn_path + "train_32x32.mat")
    test_data = loadmat(svhn_path + "test_32x32.mat")
    y_train = keras.utils.to_categorical(train_data['y'][:, 0])[:, 1:]
    y_test = keras.utils.to_categorical(test_data['y'][:, 0])[:, 1:]
    x_train = np.zeros((73257, 32, 32, 3))
    for i in range(len(x_train)):
        x_train[i] = train_data['X'].T[i].T.astype('float32') / 255
    x_test = np.zeros((26032, 32, 32, 3))
    for i in range(len(x_test)):
        x_test[i] = test_data['X'].T[i].T.astype('float32') / 255

    # transform the input to the real range of the hyper-parameters, to be used for model training
    parameter_range = [[32, 512], [1e-7, 0.1], [1e-7, 1e-3], [1e-7, 1e-3],
                       [128, 256], [256, 512]]
    batch_size_ = param[0]
    batch_size = int(batch_size_ *
                     (parameter_range[0][1] - parameter_range[0][0]) +
                     parameter_range[0][0])
    learning_rate_ = param[1]
    learning_rate = learning_rate_ * (
        parameter_range[1][1] - parameter_range[1][0]) + parameter_range[1][0]
    learning_rate_decay_ = param[2]
    learning_rate_decay = learning_rate_decay_ * (
        parameter_range[2][1] - parameter_range[2][0]) + parameter_range[2][0]
    l2_regular_ = param[3]
    l2_regular = l2_regular_ * (parameter_range[3][1] -
                                parameter_range[3][0]) + parameter_range[3][0]
    conv_filters_ = param[4]
    conv_filters = int(conv_filters_ *
                       (parameter_range[4][1] - parameter_range[4][0]) +
                       parameter_range[4][0])
    dense_units_ = param[5]
    dense_units = int(dense_units_ *
                      (parameter_range[5][1] - parameter_range[5][0]) +
                      parameter_range[5][0])

    print("[parameters: batch_size: {0}/lr: {1}/lr_decay: {2}/l2: {3}/conv_filters: {4}/dense_unit: {5}]".format(\
        batch_size, learning_rate, learning_rate_decay, l2_regular, conv_filters, dense_units))

    num_conv_layers = 3
    dropout_rate = 0.0
    kernel_size = 5
    pool_size = 3

    # build the CNN model using Keras
    model = Sequential()
    model.add(
        Conv2D(conv_filters, (kernel_size, kernel_size),
               padding='same',
               input_shape=x_train.shape[1:],
               kernel_regularizer=regularizers.l2(l2_regular)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(pool_size, pool_size)))
    model.add(Dropout(dropout_rate))

    model.add(
        Conv2D(conv_filters, (kernel_size, kernel_size),
               padding='same',
               kernel_regularizer=regularizers.l2(l2_regular)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(pool_size, pool_size)))
    model.add(Dropout(dropout_rate))

    if num_conv_layers >= 3:
        model.add(
            Conv2D(conv_filters, (kernel_size, kernel_size),
                   padding='same',
                   kernel_regularizer=regularizers.l2(l2_regular)))
        model.add(Activation('relu'))
        model.add(MaxPooling2D(pool_size=(pool_size, pool_size)))
        model.add(Dropout(dropout_rate))

    model.add(Flatten())
    model.add(
        Dense(dense_units, kernel_regularizer=regularizers.l2(l2_regular)))
    model.add(Activation('relu'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(num_classes))
    model.add(Activation('softmax'))

    opt = keras.optimizers.rmsprop(lr=learning_rate, decay=learning_rate_decay)

    model.compile(loss='categorical_crossentropy',
                  optimizer=opt,
                  metrics=['accuracy'])

    time_start = time.time()

    val_epochs = []
    time_func_eval = []
    for epoch in range(training_epochs):
        model.fit(x_train,
                  y_train,
                  batch_size=batch_size,
                  epochs=1,
                  validation_data=(x_test, y_test),
                  shuffle=True,
                  verbose=0)
        scores = model.evaluate(x_test, y_test, verbose=0)
        val_epochs.append(scores[1])
        time_func_eval.append(time.time())

        # run BOS after observing "num_init_curve" initial number of training epochs
        if (epoch + 1 == num_init_curve) and (not no_stop):
            print("initial learning errors: ", 1 - np.array(val_epochs))
            time_start = time.time()
            action_regions, grid_St = run_BOS(1 - np.array(val_epochs),
                                              incumbent, training_epochs,
                                              bo_iteration)
            time_BOS = time.time() - time_start

        # start using the decision rules obtained from BOS
        if (epoch >= num_init_curve) and (not no_stop):
            state = np.sum(1 - np.array(val_epochs[num_init_curve:])) / (
                epoch - num_init_curve + 1)
            ind_state = np.max(np.nonzero(state > grid_St)[0])
            action_to_take = action_regions[epoch - num_init_curve, ind_state]

            # condition 1: if action_to_take == 2, then the optimal decision is to stop the current training
            if action_to_take == 2:
                # condition 2: the second criteria used in the BO-BOS algorithm
                if (kappa * stds[epoch] >= stds[-1]) or (stds == []):
                    break

    return val_epochs[-1], (
        epoch + 1) / training_epochs, time_BOS, val_epochs, time_func_eval