예제 #1
0
    def test_keras_digits_recognition_active_learning(self):

        # load the data - it returns 2 tuples of digits & labels - one for
        (x_train, y_train), (x_test, y_test) = mnist.load_data()

        batch_size = 1024
        num_classes = 10
        epochs = 3

        # input image dimensions
        img_rows, img_cols = 28, 28

        # display 14 random images from the training set
        np.random.seed(123)

        rand_14 = np.random.randint(0, x_train.shape[0], 14)
        sample_digits = x_train[rand_14]
        sample_labels = y_train[rand_14]
        num_rows, num_cols = 2, 7
        f, ax = plt.subplots(num_rows,
                             num_cols,
                             figsize=(12, 5),
                             gridspec_kw={
                                 'wspace': 0.03,
                                 'hspace': 0.01
                             },
                             squeeze=True)

        for r in range(num_rows):
            for c in range(num_cols):
                image_index = r * 7 + c
                ax[r, c].axis("off")
                ax[r, c].imshow(sample_digits[image_index], cmap='gray')
                ax[r, c].set_title('No. %d' % sample_labels[image_index])
        plt.show()
        plt.close()

        if K.image_data_format() == 'channels_first':
            x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
            x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
            input_shape = (1, img_rows, img_cols)
        else:
            x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
            x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
            input_shape = (img_rows, img_cols, 1)

        x_train = x_train.astype('float32')
        x_test = x_test.astype('float32')
        x_train /= 255
        x_test /= 255

        ml_technique = Sequential()
        ml_technique.add(
            Conv2D(32,
                   kernel_size=(3, 3),
                   activation='relu',
                   input_shape=input_shape))
        ml_technique.add(Conv2D(64, (3, 3), activation='relu'))
        ml_technique.add(MaxPooling2D(pool_size=(2, 2)))
        ml_technique.add(Dropout(0.25))
        ml_technique.add(Flatten())
        ml_technique.add(Dense(128, activation='relu'))
        ml_technique.add(Dropout(0.5))
        ml_technique.add(Dense(num_classes, activation='softmax'))
        ml_technique.compile(optimizer='Adam',
                             loss='categorical_crossentropy',
                             metrics=['accuracy'])

        # convert class vectors to binary class matrices
        y_train = keras.utils.to_categorical(y_train, num_classes)
        y_test = keras.utils.to_categorical(y_test, num_classes)

        X = np.concatenate((x_train, x_test))
        y = np.concatenate((y_train, y_test))

        train_idx, test_idx, label_idx, unlabel_idx = split(
            X=X,
            y=y,
            test_ratio=0.3,
            initial_label_rate=0.05,
            split_count=1,
            all_class=True)

        # convert to indexed collection
        train_idx = IndexCollection(train_idx[0])
        test_idx = IndexCollection(test_idx[0])
        label_idx = IndexCollection(label_idx[0])
        unlabel_idx = IndexCollection(unlabel_idx[0])

        # Define the active learning components
        stopping_criteria = MaxIteration(10)
        query_strategy = QueryLeastConfidentSampling()
        oracle = SimulatedOracle(labels=y)

        start_time = time.time()
        experimentState = State(
            round=0,
            train_idx=train_idx,
            test_idx=test_idx,
            init_L=label_idx,
            init_U=unlabel_idx,
            performance_metrics=[metric for metric in ["loss", "accuracy"]],
            verbose=True)

        while not stopping_criteria.is_stop() and len(unlabel_idx) > 0:
            label_x = X[label_idx.index, :]
            label_y = y[label_idx.index]
            test_x = X[test_idx, :]
            test_y = y[test_idx]

            # Train and evaluate Model over the labeled instances
            ml_technique.fit(label_x,
                             label_y,
                             batch_size=batch_size,
                             epochs=epochs,
                             verbose=True,
                             validation_data=(test_x, test_y))

            # predict the results over the labeled test instances
            label_pred = ml_technique.predict_classes(test_x)

            # performance calc for all metrics
            label_perf = []
            score = ml_technique.evaluate(x_test, y_test, verbose=1)

            label_perf.append({"name": "loss", "value": score[0]})
            label_perf.append({"name": "accuracy", "value": score[1]})

            # use the query strategy for selecting the indexes
            select_ind = query_strategy.select(X=X,
                                               y=y,
                                               label_index=label_idx,
                                               unlabel_index=unlabel_idx,
                                               batch_size=batch_size,
                                               model=ml_technique,
                                               client=self.__client)

            # show label values
            oracle.query(instances=X[select_ind], indexes=select_ind)

            # update label and unlabel instaces
            label_idx.update(select_ind)
            unlabel_idx.difference_update(select_ind)

            # save intermediate results
            experimentState.add_state(
                StateItem(select_index=select_ind,
                          performance_metrics=[
                              metric['name'] for metric in label_perf
                          ],
                          performance=label_perf))

            # update stopping_criteria
            stopping_criteria.update_information(experimentState)

        end_time = time.time() - start_time
        print(end_time)

        query_analyser = ExperimentAnalyserFactory.experiment_analyser(
            performance_metrics=[metric for metric in ["loss", "accuracy"]],
            method_name=query_strategy.query_function_name,
            method_results=[experimentState],
            type="queries")

        # get a brief description of the experiment
        query_analyser.plot_learning_curves(
            title='Active Learning experiment results')
예제 #2
0
    def test_custom_activeLearning_keras(self):
        batch_size = 5
        epochs = 20

        # partition the data
        train_idx, test_idx, label_idx, unlabel_idx = split(
            X=self.__X,
            y=self.__y,
            test_ratio=0.3,
            initial_label_rate=0.05,
            split_count=1,
            all_class=True)

        # convert to indexed collection
        train_idx = IndexCollection(train_idx[0])
        test_idx = IndexCollection(test_idx[0])
        label_idx = IndexCollection(label_idx[0])
        unlabel_idx = IndexCollection(unlabel_idx[0])

        # Create the model
        ml_technique = Sequential()
        ml_technique.add(Dense(input_dim=30, units=30))
        ml_technique.add(Dense(input_dim=30, units=30))
        ml_technique.add(Dense(input_dim=30, units=2))
        ml_technique.add(Activation('softmax'))
        ml_technique.compile(loss='sparse_categorical_crossentropy',
                             optimizer='adam',
                             metrics=['accuracy'])

        # Define the active learning components
        stopping_criteria = MaxIteration(10)
        query_strategy = QueryLeastConfidentSampling()
        performance_metrics = [
            Accuracy(), F1(average='weighted'),
            HammingLoss()
        ]
        oracle = SimulatedOracle(labels=self.__y)

        start_time = time.time()
        experimentState = State(round=0,
                                train_idx=train_idx,
                                test_idx=test_idx,
                                init_L=label_idx,
                                init_U=unlabel_idx,
                                performance_metrics=[
                                    metric.metric_name
                                    for metric in performance_metrics
                                ],
                                verbose=True)

        while not stopping_criteria.is_stop() and len(unlabel_idx) > 0:

            label_x = self.__X[label_idx.index, :]
            label_y = self.__y[label_idx.index]
            test_x = self.__X[test_idx, :]
            test_y = self.__y[test_idx]

            # Train and evaluate Model over the labeled instances
            ml_technique.fit(label_x,
                             label_y,
                             batch_size=batch_size,
                             epochs=epochs,
                             verbose=True)

            # predict the results over the labeled test instances
            label_pred = ml_technique.predict_classes(test_x)

            # performance calc for all metrics
            label_perf = []
            for metric in performance_metrics:
                value = metric.compute(y_true=test_y, y_pred=label_pred)
                label_perf.append({"name": metric.metric_name, "value": value})

            # use the query strategy for selecting the indexes
            select_ind = query_strategy.select(X=self.__X,
                                               y=self.__y,
                                               label_index=label_idx,
                                               unlabel_index=unlabel_idx,
                                               batch_size=batch_size,
                                               model=ml_technique,
                                               client=self.__client)

            # show label values
            oracle.query(instances=self.__X[select_ind], indexes=select_ind)

            # update label and unlabel instaces
            label_idx.update(select_ind)
            unlabel_idx.difference_update(select_ind)

            # save intermediate results
            experimentState.add_state(
                StateItem(select_index=select_ind,
                          performance_metrics=[
                              metric['name'] for metric in label_perf
                          ],
                          performance=label_perf))

            # update stopping_criteria
            stopping_criteria.update_information(experimentState)

        end_time = time.time() - start_time
        print(end_time)

        query_analyser = ExperimentAnalyserFactory.experiment_analyser(
            performance_metrics=[
                metric.metric_name for metric in performance_metrics
            ],
            method_name=query_strategy.query_function_name,
            method_results=[experimentState],
            type="queries")

        # get a brief description of the experiment
        query_analyser.plot_learning_curves(
            title='Active Learning experiment results')