Python DE.arrayToDistDict Examples

Programming Language: Python

Namespace/Package Name: cde.util

Class/Type: DE

Method/Function: arrayToDistDict

Examples at hotexamples.com: 4

Python DE.arrayToDistDict - 4 examples found. These are the top rated real world Python examples of cde.util.DE.arrayToDistDict extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

arrayToDistDict(4)

arrayToDist(1)

rms(1)

to_bin_dist(1)

Example #1

Show file

File: mla.py Project: pyongjoo/ende

    def test_ratio(self):
        '''
        Compare several competing methods changing the ratio of the positive
        class in the dataset. We use binary class dataset for the easy of
        interpretation.
        '''
        #dataset = rcv1_binary_reader.toNumpy()
        #dataset = snippet_reader.toNumpy()
        dataset = sentiment_reader.toNumpy()
        #set_size = 200
        #X_train_full, y_train_full, X_test, y_test = dataset
        #X_train, y_train = self.get_sub_set_with_size([X_train_full, y_train_full], set_size)
        #assert(len(y_train) == set_size)

        X_train, y_train, X_test, y_test = dataset

        X_test = X_test[:1000]
        y_test = y_test[:1000]

        train_set = (X_train, y_train)
        test_set_original = (X_test, y_test)

        clf = SVMLight()
        #clf = LinearSVC()
        clf.fit(X_train, y_train)


        mla = MLA(clf, verbose=1)

        for r in np.arange(0.05, 1.0, 0.05):
        #r = 0.1

            # Generate a new test set with desired positive proportions.
            X_test_new, y_test_new = SetGen.with_pos_ratio(test_set_original, r, pos_label=1)
            test_set = [X_test_new, y_test_new]

            dist_dict = DE.arrayToDistDict(y_test_new)

            mla.fit(X_train, y_train, dist_dict)
            y_pred = mla.predict(X_test_new)
            cm = confusion_matrix(y_test_new, y_pred)
            acc = self.accuracy(cm)

            print r, acc

Example #2

Show file

File: comp.py Project: pyongjoo/ende

    def compare_svm_based_repeat(self, data_set):
        X_train, y_train, X_test, y_test = data_set

        prob_estimator = LinearSVC()
        prob_estimator.fit(X_train, y_train)

        w = SVMWeights()
        #p = Prior(prob_estimator)
        m = MLT(prob_estimator)

        ests = [w, m]

        acc_matrix = []
        f1_matrix = []
        auc_matrix = []

        #print "Ratio\tSVM\tSVMW\tPrior\tMLA"
        for r in np.arange(0.1, 1.0, 0.1):
            repeat_num = 20

            for repeat in range(repeat_num):
                # Generate a new test set with desired positive proportions.
                X_test_new, y_test_new = SetGen.with_pos_ratio([X_test, y_test], r, pos_label=1)

                class_dist = DE.arrayToDistDict(y_test_new)

                map(lambda x: x.fit(X_train, y_train, class_dist), ests)
                y_preds = map(lambda x: x.predict(X_test_new), [prob_estimator] + ests)
                cms = map(lambda x: confusion_matrix(y_test_new, x), y_preds)

                accs = map(self.accuracy, cms)
                f1s = map(self.f1, cms)
                auc = map(self.auc, cms)
                acc_matrix.append(accs)
                f1_matrix.append(f1s)
                auc_matrix.append(auc)

                #print ("%.2f" + "\t%.4f" * len(accs)) % tuple([r] + accs)
                print r
                print accs
                print f1s
                print

        return acc_matrix, f1_matrix, auc_matrix

Example #3

Show file

File: comp.py Project: pyongjoo/ende

    def compare_rf_based(self, data_set):
        X_train, y_train, X_test, y_test = data_set

        # TODO: We actually need to convert to dense array using toarray()
        # TODO: Satimage data is the only exception.
        prob_estimator = RandomForestClassifier(n_estimators=200)
        prob_estimator.fit(X_train, y_train)

        w = RFWeights(n_estimators=200)
        p = Prior(prob_estimator)
        m = MLT(prob_estimator)

        ests = [w, p, m]

        acc_matrix = []
        f1_matrix = []
        auc_matrix = []

        #print "Ratio\tRF\tRFW\tPrior\tMLA"
        for r in np.arange(0.2, 1.0, 0.2):
            # Generate a new test set with desired positive proportions.
            X_test_new, y_test_new = SetGen.with_pos_ratio([X_test, y_test], r, pos_label=1)

            class_dist = DE.arrayToDistDict(y_test_new)


            # TODO: We actually need to convert to dense array using toarray()
            # TODO: Satimage data is the only exception.

            map(lambda x: x.fit(X_train, y_train, class_dist), ests)
            y_preds = map(lambda x: x.predict(X_test_new), [prob_estimator] + ests)
            cms = map(lambda x: confusion_matrix(y_test_new, x), y_preds)

            accs = map(self.accuracy, cms)
            f1s = map(self.f1, cms)
            auc = map(self.auc, cms)
            acc_matrix.append(accs)
            f1_matrix.append(f1s)
            auc_matrix.append(auc)

            #print ("%.2f" + "\t%.4f" * len(accs)) % tuple([r] + accs)

        return acc_matrix, f1_matrix, auc_matrix

Example #4

Show file

File: comp.py Project: pyongjoo/ende

    def compare_maxent_based(self, data_set):
        X_train, y_train, X_test, y_test = data_set

        prob_estimator = LogisticRegression()
        prob_estimator.fit(X_train, y_train)

        w = MaxentWeights()
        p = Prior(prob_estimator)
        m = MLT(prob_estimator)

        ests = [w, p, m]

        acc_matrix = []
        f1_matrix = []
        auc_matrix = []

        #print "Ratio\tME\tMEW\tPrior\tMLA"
        for r in np.arange(0.2, 1.0, 0.2):
            # Generate a new test set with desired positive proportions.
            X_test_new, y_test_new = SetGen.with_pos_ratio([X_test, y_test], r, pos_label=1)

            class_dist = DE.arrayToDistDict(y_test_new)

            map(lambda x: x.fit(X_train, y_train, class_dist), ests)
            y_preds = map(lambda x: x.predict(X_test_new), [prob_estimator] + ests)
            cms = map(lambda x: confusion_matrix(y_test_new, x), y_preds)

            accs = map(self.accuracy, cms)
            f1s = map(self.f1, cms)
            auc = map(self.auc, cms)
            acc_matrix.append(accs)
            f1_matrix.append(f1s)
            auc_matrix.append(auc)

            #print ("%.2f" + "\t%.4f" * len(accs)) % tuple([r] + accs)

        return acc_matrix, f1_matrix, auc_matrix