def __init__(self): self._logger = get_logger(self.__class__.__name__) self.losses = list() self._tol = 1e-9
plt.plot(X[ix, 0], X[ix, 1], mark[i]) mark = ['Dr', 'Db', 'Dg', 'Dk', '^b', '+b', 'sb', 'db', '<b', 'pb'] # plot centroids for i, centroid in enumerate(centroids): plt.plot(centroid[0], centroid[1], mark[i], markersize=12) plt.show() def __check_valid(self, X): if self._is_trained is False: return True else: is_valid = False nFeat = X.shape[1] if nFeat == self._nFeat: is_valid = True return is_valid logger = get_logger(Kmeans.__name__) if __name__ == '__main__': path = os.getcwd() + '/../dataset/iris.arff' loader = DataLoader(path) dataset = loader.load(target_col_name='binaryClass') trainset, testset = dataset.cross_split() kmeans = Kmeans(2, is_plot=True) kmeans.fit(trainset[0][:, [1, 3]]) prediction = kmeans.predict(testset[0][:, [1, 3]]) performance = cluster_f_measure(testset[1], prediction) print 'F-measure:', performance
) elif self._search_mode == "brutal": K = min(self._K, len(self._parameter["neighbor_y"])) for i in xrange(X.shape[0]): dist = list() for irow in range(self._parameter["neighbor_X"].shape[0]): dist.append(np.linalg.norm(X[i, :] - self._parameter["neighbor_X"][irow, :])) indices = np.argsort(dist)[:K] pred.append(np.mean(self._parameter["neighbor_y"][indices])) logger.info("progress: %.2f %%" % (float(i) / X.shape[0] * 100)) else: raise ValueError return pred logger = get_logger("KNN") if __name__ == "__main__": from base.time_scheduler import TimeScheduler scheduler = TimeScheduler() # KNN for classification task path = os.getcwd() + "/../dataset/electricity-normalized.arff" loader = DataLoader(path) dataset = loader.load(target_col_name="class") trainset, testset = dataset.cross_split() knn = KNNClassifier(search_mode="kd_tree") knn.fit(trainset[0], trainset[1]) predict_kd_tree = scheduler.tic_tac("kd_tree", knn.predict, X=testset[0]) knn = KNNClassifier(search_mode="brutal")
def __init__(self): self._logger = get_logger(self.__class__.__name__)
pred.append(self._to_leaf(_x, self._parameter['tree'])) return np.array(pred) def _to_leaf(self, x, node): if isinstance(node, TreeNode): feat = node.Feature split = node.Split if x[feat] <= split: return self._to_leaf(x, node.L) else: return self._to_leaf(x, node.R) else: return node logger = get_logger('DecisionTree') if __name__ == '__main__': path = os.getcwd() + '/../dataset/dataset_21_car.arff' loader = DataLoader(path) dataset = loader.load(target_col_name='class') trainset, testset = dataset.cross_split() dt = DecisionTreeClassifier(min_split=1, is_prune=False) dt.fit(trainset[0], trainset[1]) predict = dt.predict(testset[0]) performance = accuracy_score(testset[1], predict) print 'test accuracy:', performance # dt.dump('decisiontree.model') # path = os.getcwd() + '/../dataset/winequality-white.csv' # loader = DataLoader(path)
logger.warning('feature number must be 2.') return logger.info('start plotting...') pred = self._predict(X) h = 0.02 # step size in the mesh x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) Z = self._predict(np.c_[xx.ravel(), yy.ravel()]) # Put the result into a color plot Z = Z.reshape(xx.shape) plt.scatter(X[:, 0], X[:, 1], c=pred, cmap=plt.cm.Paired) plt.contour(xx, yy, Z, cmap=plt.cm.Paired) plt.show() logger = get_logger(SVM.__name__) if __name__ == '__main__': path = os.getcwd() + '/../dataset/iris.arff' loader = DataLoader(path) dataset = loader.load(target_col_name='binaryClass') trainset, testset = dataset.cross_split() X = trainset[0][:, [0, 1]] y = trainset[1] svm = SVM(kernel_type='rbf', sigma=0.3) svm.fit(X, y) predict = svm.predict(testset[0][:, [0, 1]]) print 'test accuracy:', accuracy_score(testset[1], predict) svm.plot(X)
def __init__(self): self._logger = get_logger(self.__class__.__name__) self.losses = list()