Example #1
0
    def test_ratio(self):
        '''
        Compare several competing methods changing the ratio of the positive
        class in the dataset. We use binary class dataset for the easy of
        interpretation.
        '''
        dataset = rcv1_binary_reader.toNumpy()
        set_size = 100

        X_train_full, y_train_full, X_test, y_test = dataset
        X_train, y_train = self.get_sub_set_with_size([X_train_full, y_train_full], set_size)
        assert(len(y_train) == set_size)

        train_set = (X_train, y_train)
        test_set_original = (X_test, y_test)

        clf = LogisticRegression()
        clf.fit(X_train, y_train)

        p = Prior(clf)

        for r in np.arange(0.05, 1.0, 0.05):
            # Generate a new test set with desired positive proportions.
            X_test_new, y_test_new = SetGen.with_pos_ratio(test_set_original, r, pos_label=1)
            test_set = [X_test_new, y_test_new]

            true_pos = DE.arrayToDist(y_test_new)[1]

            p.fit(X_train, y_train, {-1:1-true_pos, 1:true_pos})
            y_pred = p.predict(X_test_new)
            cm = confusion_matrix(y_test_new, y_pred)
            acc = self.accuracy(cm)

            print r, acc