Example #1
0
    def test_uncertainty_entropy_exceptions(self):
        trn_ds = init_toyexample(self.X, self.y)

        with self.assertRaises(TypeError):
            qs = UncertaintySampling(trn_ds, method='entropy', model=SVM())

        with self.assertRaises(TypeError):
            qs = UncertaintySampling(trn_ds,
                                     method='entropy',
                                     model=Perceptron())

        with self.assertRaises(TypeError):
            qs = UncertaintySampling(
                trn_ds,
                method='not_exist',
                model=LogisticRegression(solver='liblinear',
                                         multi_class="ovr"))
Example #2
0
 def test_uncertainty_entropy(self):
     trn_ds = init_toyexample(self.X, self.y)
     qs = UncertaintySampling(trn_ds,
                              method='entropy',
                              model=LogisticRegression(solver='liblinear',
                                                       multi_class="ovr"))
     model = LogisticRegression(solver='liblinear', multi_class="ovr")
     qseq = run_qs(trn_ds, self.lbr, model, qs, self.quota)
     assert_array_equal(qseq, np.array([6, 7, 8, 9]))
Example #3
0
 def test_UcertaintySamplingEntropy(self):
     random.seed(1126)
     trn_ds = Dataset(
         self.X, np.concatenate([self.y[:10], [None] * (len(self.y) - 10)]))
     qs = UncertaintySampling(trn_ds,
                              method='entropy',
                              model=LogisticRegression(solver="liblinear",
                                                       multi_class="ovr"))
     qseq = run_qs(trn_ds, qs, self.y, self.quota)
     assert_array_equal(
         qseq, np.array([145, 66, 82, 37, 194, 60, 191, 211, 245, 131]))
 def test_hs_subsampling(self):
     ds = Dataset(self.X, self.y[:10] + [None] * (len(self.y) - 10))
     sub_qs = UncertaintySampling(ds,
                                  model=SVM(gamma='auto',
                                            decision_function_shape='ovr'))
     qs = HS(ds, self.classes, subsample_qs=sub_qs, random_state=1126)
     qseq = run_qs(ds, qs, self.y, len(self.y) - 10)
     assert_array_equal(
         np.concatenate([qseq[:10], qseq[-10:]]),
         np.array([
             120, 50, 33, 28, 78, 133, 52, 124, 102, 109, 81, 108, 12, 10,
             89, 114, 92, 126, 48, 25
         ]))
Example #5
0
 def test_ActiveLearningByLearning(self):
     trn_ds = Dataset(
         self.X, np.concatenate([self.y[:10], [None] * (len(self.y) - 10)]))
     qs = ActiveLearningByLearning(
         trn_ds,
         T=self.quota,
         query_strategies=[
             UncertaintySampling(
                 trn_ds,
                 model=LogisticRegression(solver="liblinear",
                                          multi_class="ovr")),
             HintSVM(trn_ds, random_state=1126)
         ],
         model=LogisticRegression(solver="liblinear", multi_class="ovr"),
         random_state=1126)
     qseq = run_qs(trn_ds, qs, self.y, self.quota)
     assert_array_equal(
         qseq, np.array([173, 103, 133, 184, 187, 147, 251, 83, 93, 33]))
def main():
    # Specifiy the parameters here:
    # path to your binary classification dataset
    dataset_filepath = '../../data/musk_csv.mat'
    test_size = 0.33  # the percentage of samples in the dataset that will be
    # randomly selected and assigned to the test set
    n_labeled = 10  # number of samples that are initially labeled

    # Load dataset
    trn_ds, tst_ds, y_train, fully_labeled_trn_ds = \
        split_train_test(dataset_filepath, test_size, n_labeled)
    trn_ds2 = copy.deepcopy(trn_ds)
    lbr = IdealLabeler(fully_labeled_trn_ds)

    quota = len(y_train) - n_labeled  # number of samples to query

    # Comparing UncertaintySampling strategy with RandomSampling.
    # model is the base learner, e.g. LogisticRegression, SVM ... etc.
    qs = UncertaintySampling(trn_ds, method='lc', model=LogisticRegression())
    model = LogisticRegression()
    E_in_1, E_out_1 = run(trn_ds, tst_ds, lbr, model, qs, quota)

    qs2 = RandomSampling(trn_ds2)
    model = LogisticRegression()
    E_in_2, E_out_2 = run(trn_ds2, tst_ds, lbr, model, qs2, quota)

    # Plot the learning curve of UncertaintySampling to RandomSampling
    # The x-axis is the number of queries, and the y-axis is the corresponding
    # error rate.
    query_num = np.arange(1, quota + 1)
    plt.plot(query_num, E_in_1, 'b', label='qs Ein')
    plt.plot(query_num, E_in_2, 'r', label='random Ein')
    plt.plot(query_num, E_out_1, 'g', label='qs Eout')
    plt.plot(query_num, E_out_2, 'k', label='random Eout')
    plt.xlabel('Number of Queries')
    plt.ylabel('Error')
    plt.title('Experiment Result')
    plt.legend(loc='upper center',
               bbox_to_anchor=(0.5, -0.05),
               fancybox=True,
               shadow=True,
               ncol=5)
    plt.show()
Example #7
0
 def test_density_weighted_meta_uncertainty_lc(self):
     trn_ds = Dataset(self.X[:20], np.concatenate([self.y[:6],
                                                   [None] * 14]))
     base_qs = UncertaintySampling(trn_ds,
                                   method='lc',
                                   model=LogisticRegression(
                                       solver='liblinear',
                                       multi_class="ovr"))
     similarity_metric = cosine_similarity
     clustering_method = KMeans(n_clusters=3, random_state=1126)
     qs = DensityWeightedMeta(dataset=trn_ds,
                              base_query_strategy=base_qs,
                              similarity_metric=similarity_metric,
                              clustering_method=clustering_method,
                              beta=1.0,
                              random_state=1126)
     model = LogisticRegression(solver='liblinear', multi_class="ovr")
     qseq = run_qs(trn_ds, qs, self.y, self.quota)
     assert_array_equal(qseq,
                        np.array([13, 18, 9, 12, 8, 16, 10, 19, 15, 17]))