예제 #1
0
def main():
    splitRatio = 0.67

    pred_acc = []
    for attrnum in range(2, 13):

        model = Bayesian(attrnum, 3)
        # dataset = loadData.loadWine()
        # trainingSet, testSet = g.splitDataset(dataset, splitRatio)
        # np.savez('Wine.npz', train=trainingSet, test=testSet)
        trainingSet = np.load('Wine.npz')['train']
        testSet = np.load('Wine.npz')['test']

        trainingSet_2, trainingSet_ori = callPCA(trainingSet, 13, attrnum)
        testSet_2, testSet_ori = callPCA(testSet, 13, attrnum)

        trainingSet = np.array(trainingSet_2)
        testSet = testSet_2
        print(trainingSet.shape)
        summaries = model.summarizeByClass(trainingSet)

        predictions, result_prob = model.getPredictions(summaries, testSet)

        x, y = g.splitXandY(np.array(testSet), model.attrNum, len(testSet))
        # print(x)
        # print(x.shape)
        confusion_matrix = np.zeros((len(summaries), len(summaries)))
        accuracy, confusion_matrix = g.getAccuracy(testSet, predictions,
                                                   confusion_matrix)
        print(accuracy)
        pred_acc.append(accuracy)
        plot.ROC(y, result_prob[:, 1])
    plt.plot(range(2, 13), pred_acc)
    plt.show()
    return accuracy
예제 #2
0
def callFLD(dataset, attrNum):
    fld = FLD.FLD(len(dataset[0]) - 1, 1)
    X, y = g.splitXandY(dataset, attrNum, len(dataset))

    mean = X.mean()
    std = X.std()
    X_norm = (X - mean) / std

    fld.X = {"label": y, "data": X_norm}

    fld.initClassData()
    fld.reduce()
    print(fld.Jw())
    m = np.shape(fld.X_f)[1]
    for i in range(m):
        color = ''
        if y[i] == 1: color = 'r'
        if y[i] == 2: color = 'g'
        if y[i] == 3: color = 'b'

        plt.scatter(fld.X_f[0, i], y[i], s=50, c=color)

    plt.show()
    plt.show()

    return np.hstack((fld.X_f.T, y))
예제 #3
0
def callPCA(dataset, attrNum, k):
    X, y = g.splitXandY(dataset, attrNum, len(dataset))
    print(k)
    finalData, reconMat = PCA.pca(X, k)

    # PCA.plotBestFit(finalData, reconMat, y)
    return np.hstack((finalData, y)), np.hstack((reconMat, y))
예제 #4
0
    def summarizeByClass(self, dataset):
        separated = self.separateByClass(dataset)
        summaries = {}
        for classValue, instances in separated.items():

            x, y = g.splitXandY(np.array(instances), self.attrNum,
                                len(instances))
            summaries[classValue] = self.meanVector(x), self.stdMat(x)
        return summaries
예제 #5
0
def main():

    pred_acc = []
    for attrnum in range(1, 32):
        model = Linear(attrnum, 2, 30)
        '''
        splitRatio = 0.67
        dataset = loadData.loadIono()

        trainingSet, testSet = g.splitDataset(dataset, 0.67)
        # np.savez('Gender_FLD.npz', train=trainingSet, test=testSet)
        '''
        trainingSet = np.load('Iono.npz')['train']
        testSet = np.load('Iono.npz')['test']
        """
        trainingSet = callFLD(np.array(trainingSet), 32)
        testSet = callFLD(testSet, 32)
    

        """
        trainingSet_2, trainingSet_ori = callPCA(trainingSet, 32, attrnum)
        testSet_2, testSet_ori = callPCA(testSet, 32, attrnum)

        trainingSet = trainingSet_2
        testSet = testSet_2

        for i in range(5000):
            if i % 100 == 0:
                model.lr = model.lr / 5
            batchData = batch(trainingSet, model.batchNum)

            x, y = g.splitXandY(batchData, model.attrNum, len(batchData))
            model.train(x, y)

        x, y = g.splitXandY(np.array(testSet), model.attrNum, len(testSet))
        final_output, accuracy = model.predict_test(x, y)
        pred_acc.append(accuracy)
        # plot.ROC(y, final_output)

    plt.plot(range(1, 32), pred_acc)
    plt.show()
    return
예제 #6
0
    def plot(self):
        clusted, label = g.splitXandY(self.data, 13, len(self.data))
        separated = separateByClass(clusted, label.flatten())
        import matplotlib.pyplot as plt
        import itertools

        for key, data in separated.items():
            print(key, " : ", len(data))
        allKey = list(separated.keys())
        colors = itertools.cycle(["red", "blue", "green","yellow", "orange"])
        for i in range(len(separated.keys())):
            color_this = next(colors)
            for j in range(len(separated[allKey[i]])):
                plt.scatter(separated[allKey[i]][j][0], separated[allKey[i]][j][1], color=color_this, alpha=0.6)

        plt.show()
    def __init__(self, dataset, type='single', cluster_num=2):
        self.dataset = dataset
        self.X, self.Y = g.splitXandY(dataset, 13, len(dataset))
        self.cluster_num = cluster_num
        self.dis_mat = np.zeros((dataset.shape[0], dataset.shape[0]))
        self.label = []
        self.draw_label = []

        self.allCluster = []
        self.type = type
        # initialize label and distance matrix
        for i in range(dataset.shape[0]):
            self.label.append(i)
            self.draw_label.append(i)
            self.allCluster.append(i)
            for j in range(dataset.shape[0]):
                self.dis_mat[i][j] = np.linalg.norm(self.X[i] - self.X[j],
                                                    ord=1)
        self.link = []
예제 #8
0
def main():

    orig_attr = 4
    splitRatio = 0.67
    pred_acc = []
    for attrnum in range(2, orig_attr):

        dataset = loadData.loadIris()
        model = Naive_Bayes(attrnum, 3)

        # trainingSet, testSet = g.splitDataset(dataset, splitRatio)
        # np.savez('Iris.npz', train=trainingSet, test=testSet)
        trainingSet = np.load('Iris.npz')['train']
        testSet = np.load('Iris.npz')['test']

        # print(trainingSet.shape)

        trainingSet_2, trainingSet_ori = callPCA(trainingSet, orig_attr,
                                                 attrnum)
        testSet_2, testSet_ori = callPCA(testSet, orig_attr, attrnum)

        trainingSet = np.array(trainingSet_2)
        testSet = testSet_2

        summaries = model.summarizeByClass(trainingSet)
        # print(summaries)
        predictions, result_prob = model.getPredictions(summaries, testSet)
        x, y = g.splitXandY(testSet, model.attrNum, len(testSet))
        confusion_dim = len(summaries)
        confusion_matrix = np.zeros((confusion_dim, confusion_dim))
        accuracy, confusion_matrix = g.getAccuracy(testSet, predictions,
                                                   confusion_matrix)
        print(accuracy)
        pred_acc.append(accuracy)

        plot.ROC(y, result_prob)
    plt.plot(range(2, orig_attr), pred_acc)
    plt.show()
예제 #9
0
    def plot(self):
        clusted, label = g.splitXandY(self.data, 13, len(self.data))
        separated = separateByClass(clusted, label.flatten())
        import matplotlib.pyplot as plt
        import itertools

        for key, data in separated.items():
            print(key, " : ", len(data))
        allKey = list(separated.keys())
        colors = itertools.cycle(["red", "blue", "green","yellow", "orange"])
        for i in range(len(separated.keys())):
            color_this = next(colors)
            for j in range(len(separated[allKey[i]])):
                plt.scatter(separated[allKey[i]][j][0], separated[allKey[i]][j][1], color=color_this, alpha=0.6)

        plt.show()



dataset = loadData.loadWine()
X,y = g.splitXandY(dataset,13,len(dataset))
dbscan = DBSCAN(dataset,eps=50, minPts=10)
dbscan.find_core_point()
print("eps=1, minPts=20")
dbscan.plot()
print("Adjusted Rand :", metrics.adjusted_rand_score(y.flatten(), dbscan.data[:,-1]))
print("Normalized Mutual Info:", normalized_mutual_info_score(y.flatten(), dbscan.data[:,-1]))