Beispiel #1
0
                self.plotMidText((self.xOff, self.yOff,), cntrPt, str(key))
        self.yOff += 1.0 / self.totalD

    def createPlot(self):
        inTree = self._parameter['tree']
        fig = plt.figure(1, facecolor='white')
        fig.clf()
        axprops = dict(xticks=[], yticks=[])
        self.decisionNode = dict(boxstyle='sawtooth', fc='0.8')
        self.leafNone = dict(boxstyle='round4', fc='0.8')
        self.arrow_args = dict(arrowstyle='<-')
        self.ax1 = plt.subplot(111, frameon=False, **axprops)
        self.totalW = float(self.get_num_leafs(inTree))
        self.totalD = float(self.get_tree_depth(inTree))
        self.xOff = -0.5 / self.totalW
        self.yOff = 1.0
        self.plotTree(inTree, (0.5, 1.0), '')
        plt.show()


if __name__ == '__main__':
    path = os.getcwd() + '/../dataset/dataset_21_car.arff'
    loader = DataLoader(path)
    dataset = loader.load(target_col_name='class')
    trainset, testset = dataset.cross_split()
    dt = DecisionTreeClassifier(min_split=1, is_prune=False)
    dt.fit(trainset[0], trainset[1])
    predict = dt.predict(testset[0])
    performance = accuracy_score(testset[1], predict)
    print 'test accuracy:', performance
Beispiel #2
0
            info = self._S
        if cumulative is False:
            return info
        else:
            return np.cumsum(info)

    def plot(self, X):
        nSize = X.shape[0]
        nFeat = X.shape[1]
        assert nFeat == 2, 'feature number should be 2.'
        for i in xrange(nSize):
            plt.plot(X[i, 0], X[i, 1], 'or')
        plt.show()


if __name__ == '__main__':
    path = os.getcwd() + '/../dataset/iris.arff'
    loader = DataLoader(path)
    dataset = loader.load(target_col_name='binaryClass')
    trainset, testset = dataset.cross_split()
    pca = PCA(2, whiten=False)
    X = trainset[0][:, [0, 2]]
    pca.fit(X)
    _X = pca.transform(X)
    print 'eigenvalue:', pca.information_distribution(percent=True)
    pca.plot(_X)
    pca = PCA(2, whiten=True)
    pca.fit(X)
    _X = pca.transform(X)
    pca.plot(_X)
Beispiel #3
0
                logger.info("progress: %.2f %%" % (float(i) / X.shape[0] * 100))
        else:
            raise ValueError
        return pred


logger = get_logger("KNN")

if __name__ == "__main__":
    from base.time_scheduler import TimeScheduler

    scheduler = TimeScheduler()

    # KNN for classification task
    path = os.getcwd() + "/../dataset/electricity-normalized.arff"
    loader = DataLoader(path)
    dataset = loader.load(target_col_name="class")
    trainset, testset = dataset.cross_split()
    knn = KNNClassifier(search_mode="kd_tree")
    knn.fit(trainset[0], trainset[1])
    predict_kd_tree = scheduler.tic_tac("kd_tree", knn.predict, X=testset[0])
    knn = KNNClassifier(search_mode="brutal")
    knn.fit(trainset[0], trainset[1])
    predict_brutal = scheduler.tic_tac("brutal", knn.predict, X=testset[0])
    scheduler.print_task_schedule("brutal")
    scheduler.print_task_schedule("kd_tree")
    print accuracy_score(testset[1], predict_brutal), accuracy_score(testset[1], predict_kd_tree)

    # KNN for regression task
    # path = os.getcwd() + '/../dataset/winequality-white.csv'
    # loader = DataLoader(path)
Beispiel #4
0
        for irow in range(X.shape[0]):
            _X = X[irow]
            max_prob = None
            label = None
            for c in proba_y.keys():
                p = proba_y[c]
                for icol, feat in cond_proba_y[c].iteritems():
                    p += feat[_X[icol]]
                if max_prob < p or max_prob is None:
                    max_prob = p
                    label = c
            assert label is not None, 'label should be None. There must be some error. please check.'
            pred.append(label)
        return np.array(pred)


if __name__ == '__main__':
    path = os.getcwd() + '/../dataset/dataset_21_car.arff'
    loader = DataLoader(path)
    dataset = loader.load(target_col_name='class')
    trainset, testset = dataset.cross_split()
    nb = NaiveBayes()
    nb.fit(trainset[0], trainset[1])
    predict = nb.predict(testset[0])
    acc = accuracy_score(testset[1], predict)
    print acc
    nb.dump('NB.model')
    # nb = NaiveBayes.load('NB.model')
    # predict = nb.predict(testset[0])
    # print accuracy_score(testset[1], predict)
Beispiel #5
0
        else:
            is_valid = False
            nFeat = X.shape[1]
            if nFeat == self._nFeat:
                is_valid = True
            return is_valid

    def predict(self, X):
        models = self._parameter['trees']
        pred = np.zeros(X.shape[0])
        for model in models:
            pred += np.array(model.predict(X))
        return pred


if __name__ == '__main__':
    path = os.getcwd() + '/../dataset/winequality-white.csv'
    loader = DataLoader(path)
    dataset = loader.load(target_col_name='quality')
    trainset, testset = dataset.cross_split()
    gbdt = GradientBoostingDecisionTree(10)
    gbdt.fit(trainset[0], trainset[1])
    predict = gbdt.predict(testset[0])
    print 'GBDT mean error:', mean_error(testset[1], predict)

    dt = DecisionTreeRegressor()
    dt.fit(trainset[0], trainset[1])
    predict = dt.predict(testset[0])
    print 'DecisionTree mean error:', mean_error(testset[1], predict)