Ejemplo n.º 1
0
 def __init__(self):
     self._logger = get_logger(self.__class__.__name__)
     self.losses = list()
     self._tol = 1e-9
Ejemplo n.º 2
0
                plt.plot(X[ix, 0], X[ix, 1], mark[i])
        mark = ['Dr', 'Db', 'Dg', 'Dk', '^b', '+b', 'sb', 'db', '<b', 'pb']
        # plot centroids
        for i, centroid in enumerate(centroids):
            plt.plot(centroid[0], centroid[1], mark[i], markersize=12)
        plt.show()

    def __check_valid(self, X):
        if self._is_trained is False:
            return True
        else:
            is_valid = False
            nFeat = X.shape[1]
            if nFeat == self._nFeat:
                is_valid = True
            return is_valid


logger = get_logger(Kmeans.__name__)

if __name__ == '__main__':
    path = os.getcwd() + '/../dataset/iris.arff'
    loader = DataLoader(path)
    dataset = loader.load(target_col_name='binaryClass')
    trainset, testset = dataset.cross_split()
    kmeans = Kmeans(2, is_plot=True)
    kmeans.fit(trainset[0][:, [1, 3]])
    prediction = kmeans.predict(testset[0][:, [1, 3]])
    performance = cluster_f_measure(testset[1], prediction)
    print 'F-measure:', performance
Ejemplo n.º 3
0
                )
        elif self._search_mode == "brutal":
            K = min(self._K, len(self._parameter["neighbor_y"]))
            for i in xrange(X.shape[0]):
                dist = list()
                for irow in range(self._parameter["neighbor_X"].shape[0]):
                    dist.append(np.linalg.norm(X[i, :] - self._parameter["neighbor_X"][irow, :]))
                indices = np.argsort(dist)[:K]
                pred.append(np.mean(self._parameter["neighbor_y"][indices]))
                logger.info("progress: %.2f %%" % (float(i) / X.shape[0] * 100))
        else:
            raise ValueError
        return pred


logger = get_logger("KNN")

if __name__ == "__main__":
    from base.time_scheduler import TimeScheduler

    scheduler = TimeScheduler()

    # KNN for classification task
    path = os.getcwd() + "/../dataset/electricity-normalized.arff"
    loader = DataLoader(path)
    dataset = loader.load(target_col_name="class")
    trainset, testset = dataset.cross_split()
    knn = KNNClassifier(search_mode="kd_tree")
    knn.fit(trainset[0], trainset[1])
    predict_kd_tree = scheduler.tic_tac("kd_tree", knn.predict, X=testset[0])
    knn = KNNClassifier(search_mode="brutal")
Ejemplo n.º 4
0
 def __init__(self):
     self._logger = get_logger(self.__class__.__name__)
Ejemplo n.º 5
0
            pred.append(self._to_leaf(_x, self._parameter['tree']))
        return np.array(pred)

    def _to_leaf(self, x, node):
        if isinstance(node, TreeNode):
            feat = node.Feature
            split = node.Split
            if x[feat] <= split:
                return self._to_leaf(x, node.L)
            else:
                return self._to_leaf(x, node.R)
        else:
            return node


logger = get_logger('DecisionTree')

if __name__ == '__main__':
    path = os.getcwd() + '/../dataset/dataset_21_car.arff'
    loader = DataLoader(path)
    dataset = loader.load(target_col_name='class')
    trainset, testset = dataset.cross_split()
    dt = DecisionTreeClassifier(min_split=1, is_prune=False)
    dt.fit(trainset[0], trainset[1])
    predict = dt.predict(testset[0])
    performance = accuracy_score(testset[1], predict)
    print 'test accuracy:', performance
    # dt.dump('decisiontree.model')

    # path = os.getcwd() + '/../dataset/winequality-white.csv'
    # loader = DataLoader(path)
Ejemplo n.º 6
0
            logger.warning('feature number must be 2.')
            return
        logger.info('start plotting...')
        pred = self._predict(X)
        h = 0.02  # step size in the mesh
        x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
        y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
        xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                             np.arange(y_min, y_max, h))
        Z = self._predict(np.c_[xx.ravel(), yy.ravel()])
        # Put the result into a color plot
        Z = Z.reshape(xx.shape)
        plt.scatter(X[:, 0], X[:, 1], c=pred, cmap=plt.cm.Paired)
        plt.contour(xx, yy, Z, cmap=plt.cm.Paired)
        plt.show()

logger = get_logger(SVM.__name__)

if __name__ == '__main__':
    path = os.getcwd() + '/../dataset/iris.arff'
    loader = DataLoader(path)
    dataset = loader.load(target_col_name='binaryClass')
    trainset, testset = dataset.cross_split()
    X = trainset[0][:, [0, 1]]
    y = trainset[1]
    svm = SVM(kernel_type='rbf', sigma=0.3)
    svm.fit(X, y)
    predict = svm.predict(testset[0][:, [0, 1]])
    print 'test accuracy:', accuracy_score(testset[1], predict)
    svm.plot(X)
Ejemplo n.º 7
0
 def __init__(self):
     self._logger = get_logger(self.__class__.__name__)
     self.losses = list()