def main(): splitRatio = 0.67 pred_acc = [] for attrnum in range(2, 13): model = Bayesian(attrnum, 3) # dataset = loadData.loadWine() # trainingSet, testSet = g.splitDataset(dataset, splitRatio) # np.savez('Wine.npz', train=trainingSet, test=testSet) trainingSet = np.load('Wine.npz')['train'] testSet = np.load('Wine.npz')['test'] trainingSet_2, trainingSet_ori = callPCA(trainingSet, 13, attrnum) testSet_2, testSet_ori = callPCA(testSet, 13, attrnum) trainingSet = np.array(trainingSet_2) testSet = testSet_2 print(trainingSet.shape) summaries = model.summarizeByClass(trainingSet) predictions, result_prob = model.getPredictions(summaries, testSet) x, y = g.splitXandY(np.array(testSet), model.attrNum, len(testSet)) # print(x) # print(x.shape) confusion_matrix = np.zeros((len(summaries), len(summaries))) accuracy, confusion_matrix = g.getAccuracy(testSet, predictions, confusion_matrix) print(accuracy) pred_acc.append(accuracy) plot.ROC(y, result_prob[:, 1]) plt.plot(range(2, 13), pred_acc) plt.show() return accuracy
def callFLD(dataset, attrNum): fld = FLD.FLD(len(dataset[0]) - 1, 1) X, y = g.splitXandY(dataset, attrNum, len(dataset)) mean = X.mean() std = X.std() X_norm = (X - mean) / std fld.X = {"label": y, "data": X_norm} fld.initClassData() fld.reduce() print(fld.Jw()) m = np.shape(fld.X_f)[1] for i in range(m): color = '' if y[i] == 1: color = 'r' if y[i] == 2: color = 'g' if y[i] == 3: color = 'b' plt.scatter(fld.X_f[0, i], y[i], s=50, c=color) plt.show() plt.show() return np.hstack((fld.X_f.T, y))
def callPCA(dataset, attrNum, k): X, y = g.splitXandY(dataset, attrNum, len(dataset)) print(k) finalData, reconMat = PCA.pca(X, k) # PCA.plotBestFit(finalData, reconMat, y) return np.hstack((finalData, y)), np.hstack((reconMat, y))
def summarizeByClass(self, dataset): separated = self.separateByClass(dataset) summaries = {} for classValue, instances in separated.items(): x, y = g.splitXandY(np.array(instances), self.attrNum, len(instances)) summaries[classValue] = self.meanVector(x), self.stdMat(x) return summaries
def main(): pred_acc = [] for attrnum in range(1, 32): model = Linear(attrnum, 2, 30) ''' splitRatio = 0.67 dataset = loadData.loadIono() trainingSet, testSet = g.splitDataset(dataset, 0.67) # np.savez('Gender_FLD.npz', train=trainingSet, test=testSet) ''' trainingSet = np.load('Iono.npz')['train'] testSet = np.load('Iono.npz')['test'] """ trainingSet = callFLD(np.array(trainingSet), 32) testSet = callFLD(testSet, 32) """ trainingSet_2, trainingSet_ori = callPCA(trainingSet, 32, attrnum) testSet_2, testSet_ori = callPCA(testSet, 32, attrnum) trainingSet = trainingSet_2 testSet = testSet_2 for i in range(5000): if i % 100 == 0: model.lr = model.lr / 5 batchData = batch(trainingSet, model.batchNum) x, y = g.splitXandY(batchData, model.attrNum, len(batchData)) model.train(x, y) x, y = g.splitXandY(np.array(testSet), model.attrNum, len(testSet)) final_output, accuracy = model.predict_test(x, y) pred_acc.append(accuracy) # plot.ROC(y, final_output) plt.plot(range(1, 32), pred_acc) plt.show() return
def plot(self): clusted, label = g.splitXandY(self.data, 13, len(self.data)) separated = separateByClass(clusted, label.flatten()) import matplotlib.pyplot as plt import itertools for key, data in separated.items(): print(key, " : ", len(data)) allKey = list(separated.keys()) colors = itertools.cycle(["red", "blue", "green","yellow", "orange"]) for i in range(len(separated.keys())): color_this = next(colors) for j in range(len(separated[allKey[i]])): plt.scatter(separated[allKey[i]][j][0], separated[allKey[i]][j][1], color=color_this, alpha=0.6) plt.show()
def __init__(self, dataset, type='single', cluster_num=2): self.dataset = dataset self.X, self.Y = g.splitXandY(dataset, 13, len(dataset)) self.cluster_num = cluster_num self.dis_mat = np.zeros((dataset.shape[0], dataset.shape[0])) self.label = [] self.draw_label = [] self.allCluster = [] self.type = type # initialize label and distance matrix for i in range(dataset.shape[0]): self.label.append(i) self.draw_label.append(i) self.allCluster.append(i) for j in range(dataset.shape[0]): self.dis_mat[i][j] = np.linalg.norm(self.X[i] - self.X[j], ord=1) self.link = []
def main(): orig_attr = 4 splitRatio = 0.67 pred_acc = [] for attrnum in range(2, orig_attr): dataset = loadData.loadIris() model = Naive_Bayes(attrnum, 3) # trainingSet, testSet = g.splitDataset(dataset, splitRatio) # np.savez('Iris.npz', train=trainingSet, test=testSet) trainingSet = np.load('Iris.npz')['train'] testSet = np.load('Iris.npz')['test'] # print(trainingSet.shape) trainingSet_2, trainingSet_ori = callPCA(trainingSet, orig_attr, attrnum) testSet_2, testSet_ori = callPCA(testSet, orig_attr, attrnum) trainingSet = np.array(trainingSet_2) testSet = testSet_2 summaries = model.summarizeByClass(trainingSet) # print(summaries) predictions, result_prob = model.getPredictions(summaries, testSet) x, y = g.splitXandY(testSet, model.attrNum, len(testSet)) confusion_dim = len(summaries) confusion_matrix = np.zeros((confusion_dim, confusion_dim)) accuracy, confusion_matrix = g.getAccuracy(testSet, predictions, confusion_matrix) print(accuracy) pred_acc.append(accuracy) plot.ROC(y, result_prob) plt.plot(range(2, orig_attr), pred_acc) plt.show()
def plot(self): clusted, label = g.splitXandY(self.data, 13, len(self.data)) separated = separateByClass(clusted, label.flatten()) import matplotlib.pyplot as plt import itertools for key, data in separated.items(): print(key, " : ", len(data)) allKey = list(separated.keys()) colors = itertools.cycle(["red", "blue", "green","yellow", "orange"]) for i in range(len(separated.keys())): color_this = next(colors) for j in range(len(separated[allKey[i]])): plt.scatter(separated[allKey[i]][j][0], separated[allKey[i]][j][1], color=color_this, alpha=0.6) plt.show() dataset = loadData.loadWine() X,y = g.splitXandY(dataset,13,len(dataset)) dbscan = DBSCAN(dataset,eps=50, minPts=10) dbscan.find_core_point() print("eps=1, minPts=20") dbscan.plot() print("Adjusted Rand :", metrics.adjusted_rand_score(y.flatten(), dbscan.data[:,-1])) print("Normalized Mutual Info:", normalized_mutual_info_score(y.flatten(), dbscan.data[:,-1]))