Exemple #1
0
    def _test_svm_based(self):
        dataset = snippet_reader.toNumpy()
        n_feature = 1000

        print "Dataset: Snippet, Classifier: Linear SVM"
        print
        #self.run_ratio(LinearSVC, dataset, n_feature)
        self.run_size(LinearSVC, dataset, n_feature)
Exemple #2
0
    def load_dataset2(self):
        X, y, X_test, y_test = dataset = snippet_reader.toNumpy()
        X, y = shuffle(X, y)

        lb = LabelBinarizer()
        lb.fit(y)

        for y_bin in lb.transform(y).T:
            return X, y_bin
Exemple #3
0
    def _test_debug(self):
        X_train_full, y_train_full, X_test, y_test = snippet_reader.toNumpy()
        set_size = 500     # an arbitrary number
        X_train, y_train = self.get_sub_set_with_size(
                [X_train_full, y_train_full], set_size)
        assert(len(y_train) == set_size)

        train_set = (X_train, y_train)
        test_set_original = (X_test, y_test)
        X_test_new, y_test_new = SetGen.with_pos_ratio(test_set_original, 0.5, pos_label=1)
        test_set = [X_test_new, y_test_new]

        en = EN(LogisticRegression, debug=True)

        self.run_for_estimator(en, train_set, test_set, debug=True)
Exemple #4
0
    def _test_svm_based(self):
        print "Compare Snippet with SVM"
        dataset = snippet_reader.toNumpy()

        from collections import Counter

        X_train, y_train, X_test, y_test = dataset
        print "training shape", X_train.shape
        print "test shape", X_test.shape
        print "training class dist", Counter(y_train)
        print "test class dist", Counter(y_test)

        #n_feature = 1000
        #self.run_test_with(dataset, self.compare_svm_based, n_feature)
        acc_matrix, f1_matrix, auc_matrix = self.compare_svm_based(dataset)
Exemple #5
0
    def load_dataset(self):
        X, y, X_test, y_test = dataset = snippet_reader.toNumpy()

        lb = LabelBinarizer()
        lb.fit(y)

        for y_bin in lb.transform(y).T:
            y = y_bin
            break

        for y_bin in lb.transform(y_test).T:
            y_test = y_bin
            break

        return X, y, X_test, y_test
Exemple #6
0
    def _test_change_training_size(self):
        '''
        Do compare using ncRNA dataset.

        The dataset is from
            Andrew V Uzilov, Joshua M Keegan, and David H Mathews. 
            Detection of non-coding RNAs on the basis of predicted secondary
            structure formation free energy change. 
            BMC Bioinformatics, 7(173), 2006.
        '''
        X_train_full, y_train_full, X_test, y_test = snippet_reader.toNumpy()
        test_set_original = [X_test, y_test]
        pos_ratio = 0.7     # arbtrary ratio
        X_test_new, y_test_new = SetGen.with_pos_ratio(test_set_original, pos_ratio, pos_label=1)
        test_set = [X_test_new, y_test_new]

        cc = CC(LogisticRegression)
        ac = AC(LogisticRegression)
        ms = MS(LogisticRegression)
        it = Itr(LogisticRegression)
        en = EN(LogisticRegression)

        ests = [cc, ac, ms, it, en]

        print "We compare performance as chaning the training set size."
        print "Fixed positive ratio is %f" % pos_ratio
        print "size\tcc\tac\tms\tit\ten"
        for set_size in [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 2000, 3000]:
            X_train_sub, y_train_sub = self.get_sub_set_with_size(
                    [X_train_full, y_train_full], set_size)

            train_set = [X_train_sub, y_train_sub]
            en.find_hyperparameter(train_set)
            errs = map(lambda e: self.run_for_estimator(e, train_set, test_set), ests)
            print ("%d" + "\t%.4f" * 5) % (set_size,
                    errs[0], errs[1], errs[2], errs[3], errs[4])
Exemple #7
0
 def test_ratio(self):
     dataset = snippet_reader.toNumpy()
     for set_size in numpy.arange(100, 1100, 100):
         self.run_ratio(dataset, set_size)
         print
Exemple #8
0
    def _test_svm_based(self):
        dataset = snippet_reader.toNumpy()
        n_feature = 1000

        print "Snippet with SVM"
        self.run_test_with(dataset, LinearSVC, n_feature)