Ejemplo n.º 1
0
    def init_with_kmeans(self, npimg, mask):
        print("Creating GMM.....")
        # print("step8")
        self._beta = self.Beta(npimg)
        self.Smoothness(npimg, self._beta, self._gamma)

        bgd = np.where(mask == self.GT_bgd)
        prob_fgd = np.where(mask == self.P_fgd)
        BGDpixels = npimg[bgd]  #(_,3)
        FGDpixels = npimg[prob_fgd]  #(_,3)

        self.KmeansBgd = Kmeans(BGDpixels, dim=3, cluster=5, epoches=2)
        self.KmeansFgd = Kmeans(FGDpixels, dim=3, cluster=5, epoches=2)

        bgdlabel = self.KmeansBgd.run()  # (BGDpixel.shape[0],1)
        # print(bgdlabel)
        fgdlabel = self.KmeansFgd.run()  # (FGDpixel.shape[0],1)
        # print(fgdlabel)

        self.BGD_GMM = GMM()  # The GMM Model for BGD
        self.FGD_GMM = GMM()  # The GMM Model for FGD

        for idx, label in enumerate(bgdlabel):
            self.BGD_GMM.add_pixel(BGDpixels[idx], label)
        for idx, label in enumerate(fgdlabel):
            self.FGD_GMM.add_pixel(FGDpixels[idx], label)

        # learning GMM parameters
        self.BGD_GMM.learning()
        self.FGD_GMM.learning()
Ejemplo n.º 2
0
    def train(self,
              X_train,
              y_train,
              learning_rate=0.5,
              reg=1e-3,
              num_iters=100,
              batch_size=200,
              print_progress=False):
        """
        Inputs:
        - X_train: A PyTorch tensor of shape (N, D) containing training data; there are N training samples each of dimension D.
        - y_train: A PyTorch tensor of shape (N,) containing training labels; y[i] = {-1,1} means that X[i] has label  -1 or 1 depending on the class.
        - K: number of clusters
        - lamb: global regularization factor
        - learning_rate: (float) learning rate for optimization.
        - reg: (float) regularization strength. (ie. lambda)
        - num_iters: (integer) number of steps to take when optimizing
        - batch_size: (integer) number of training examples to use at each step.
        - print_progress: (boolean) If true, print progress during optimization.
        - exit_diff: (float) condition to stop the gradient descent algorithm if the change in loss is too low.
        Returns: A tuple of:
        - loss_all: A PyTorch tensor giving the values of the loss at each training iteration.
        """
        N, D = X_train.shape

        # clustering
        cluster_label, centroid = Kmeans(X_train, self.K)

        self.centroid = centroid

        # feature extension
        X_train_hat = self.feature_extension(X_train, cluster_label)

        # train linear SVM
        loss_hist = self.LSVM.train(X_train_hat,
                                    y_train,
                                    reg=reg,
                                    num_iters=num_iters,
                                    learning_rate=learning_rate)

        # SVM parameters
        W_hat = torch.tensor(self.LSVM.W,
                             dtype=X_train.dtype,
                             device=X_train.device)

        # global regularizer
        self.W = 1 / np.sqrt(self.lamb) * W_hat[:D]

        # local predictor
        self.Wl = torch.zeros(D,
                              self.K,
                              dtype=X_train.dtype,
                              device=X_train.device)
        for l in range(self.K):
            self.Wl[:, l] = W_hat[(D * (l + 1)):(D * (l + 2))] + self.W

        return loss_hist
Ejemplo n.º 3
0
def run():
    # data for multi-dimensionality (4 features)
    # data = pd.read_csv("results4-feat.csv")
    # dataset with 2 features for testing graph and visualizations
    data = pd.read_csv("results_short.csv")

    # while True:
    # 	plot_distances(data, max_val=5)
    model = Kmeans.Kmeans(k=2, data=data)
    model.train(show_graph=True)
Ejemplo n.º 4
0
def plot_distances(data, max_val, min_val=2):
    distances = []
    for i in range(min_val, max_val + 1):
        model = Kmeans.Kmeans(i, data)
        distances.append(model.train(show_graph=False))
    plt.plot([i + 2 for i in range(len(distances))], distances)
    plt.xlabel("Number of clusters")
    plt.ylabel("Total Sum")
    plt.title("Elbow Method")
    plt.show()
Ejemplo n.º 5
0
 def kmeans(self, trainset, testset, k, k_for_cluster, isClassification):
     km = Kmeans.Kmeans(k_for_cluster, trainset)
     #centroids = km.converge()
     centroids_class = km.getClusters()
     centroids_class = centroids_class[testset.columns]
     #call knn with the reduced train set- Centroids
     predicted = Knn.Knn().fit(centroids_class.values, testset, k,
                               isClassification)
     return predicted, testset.iloc[:,
                                    -1]  #return predicted and actual labels
Ejemplo n.º 6
0
def SegmentImages(trainDataPath,trainGroundTruth):
    
    
    for filename in glob.glob(trainDataPath+"\\"+"*.jpg"): 
        #reading files from training data
        
        
        img = mpimg.imread(filename,format="jpg")
        
        rows = len(img)
        cols = len(img[0])
    
        labels , clusters = Kmeans.Kmeans(img,3)
        print("Image After Clustering ")
        plt.imshow(labels)
        plt.show()
        
        labelsAs1D = np.reshape(labels,154401)
        
        #print(f" {labelsAs1D}")

        
        
        
        #reading files from ground truth
        filename_w_ext = os.path.basename(filename)
        imageName, file_extension = os.path.splitext(filename_w_ext) 
        mat = scipy.io.loadmat(trainGroundTruth+"\\"+imageName+".mat")
        
        
        
        numberOfImages = len(mat['groundTruth'][0])
        fig , ax = plt.subplots(1,numberOfImages+1)
        ax[0].imshow(img)
        
        for k in range(0,numberOfImages,1):
            groundImage = mat['groundTruth'][0][k][0][0][0]
            ax[k+1].imshow(groundImage)
            
        plt.show()
        
        for i in range(0,numberOfImages,1):
            groundImage = mat['groundTruth'][0][i][0][0][0]
            groundTruthAs1D = np.reshape(groundImage,154401)
            matrix = pd.crosstab(labelsAs1D,groundTruthAs1D, rownames=['labels'], colnames=['img'])
            #print(matrix)
            #converting DataFrame to Numpy Array
            matrix = matrix.values
            fScore = Kmeans.getFScore(matrix)
            conditionalEntropy = Kmeans.getConditionalEntropy(matrix)
            print(f"Scores against groundTruth image {i}:")
            print("fScore is ",fScore)
            print("conditionalEntropy ",conditionalEntropy)
            print("\n\n")
Ejemplo n.º 7
0
print(X_train.toarray().shape)
print(Y_train.shape)
print(X_test.toarray().shape)
print(Y_test.shape)

# SVM model to classification
clustering_with_linear_SVM_sklearn(X_train, X_test, Y_train, Y_test)

############################# Kmean ######################################
with open('./data_set/words_idfs.txt') as f:
    vocab_size = len(f.read().splitlines())

num_cluster = 20

Kmean = Kmeans(num_clusters=num_cluster, num_word_vocab=vocab_size)
print(Kmean._num_clusters)
print(Kmean._num_word_vocab)

# Load data
Kmean.load_data('./data_set/train_tf_idf.txt')

max_purity = -1
max_NMI = -1
choose_seed = 0

# Run and choose the best seed

for i in range(10):
    Kmean.run(seed_value=i + 1, criterion='centroid', threshold=0)
    print(Kmean.compute_purity())
Ejemplo n.º 8
0
def main():
    try:
        _, train_data_path, test_data_path = sys.argv
    except ValueError:
        train_data_path = 'kddcup.data_10_percent_datatreat'
        test_data_path = 'corrected_datatreat'

    """train"""
    cluster_tree = ClusterTree()
    km = Kmeans(tree=cluster_tree, kid=Kmeans.KMEANS_ID, level=1, num_dimensions=MAX_ATTRIBUTES + 1)
    km.readTrainData(train_data_path)
    k_value = MAX_LABELS

    with redirection(LOG_FILE, 'w'):
        print("Init K-value = ", k_value)

    km.runKmeans(k_value)

    print(format_msg('*', "Total Clustering process finished !"))
    with redirection(LOG_FILE, 'a'):
        print(format_msg('*', "Total Clustering process finished !"))

    cluster_tree.printLog()

    """test"""

    print(format_msg('*', "Start classify the test records"))
    with redirection(LOG_FILE, 'a'):
        print(format_msg('*', "Start classify the test records"))

    reader = test_reader(test_data_path)
    cfs_matrix = ConfuseMatrix()
    right_rcd_mun = 0
    test_rcd_mun = 0
    with redirection(RESULT_FILE, 'w'):
        print(format_msg('*', "Classification result"))
    fmt = "True Label = {} Pre Label = {} Cluster Path = {}"
    for record in reader:
        predict = cluster_tree.findNearestCluster(record)
        if record.label == predict.getClusterNodeLabel():
            right_rcd_mun += 1
        cfs_matrix.update(record.label, predict.getClusterNodeLabel())

        with redirection(RESULT_FILE, 'a'):
            print(fmt.format(LABEL_NAMES[record.label],
                             LABEL_NAMES[predict.getClusterNodeLabel()],
                             predict.strPath))
        test_rcd_mun += 1
        if test_rcd_mun % 10000 == 0:
            print("{} records have been done ...".format(test_rcd_mun))
            with redirection(LOG_FILE, 'a'):
                print("{} records have been done ...".format(test_rcd_mun))

    print(format_msg('*', "The process of classifying test records finished !"))
    with redirection(LOG_FILE, 'a'):
        print(format_msg('*', "The process of classifying test records finished !"))

    print(format_msg('=', "Classify Result"))
    fmt = "Total test record = {} Right label record = {} Right Rate = {}"
    print(fmt.format(test_rcd_mun, right_rcd_mun, right_rcd_mun / test_rcd_mun))
    with redirection(RESULT_FILE, 'a'):
        print(format_msg('=', "Classify Result"))
        print(fmt.format(test_rcd_mun, right_rcd_mun, right_rcd_mun / test_rcd_mun))

    cfs_matrix.print()
    cfs_matrix.printLog()
Ejemplo n.º 9
0
import Kmeans

def iris_f(nome_arq):
    data = open(nome_arq, 'r')
    datalist = data.readlines()
    ret_list = []
    for line in datalist:
        aux = line.split(',')
        ret_list.append([float(aux[0]), float(aux[1])])
    data.close()
    return ret_list

lista_pon = iris_f('iris.txt')

clusters = Kmeans.Kmeans(3, lista_pon)

print(clusters)
Ejemplo n.º 10
0
import numpy as np
import matplotlib.image as mpimg
import Kmeans
from sklearn.externals import joblib

classifier = joblib.load('knnModel.pkl')
print(classifier)

img = mpimg.imread("./flower_images/0002.jpg")
newFeatures = np.zeros((1, 3))
newFeatures[0][0], newFeatures[0][1], newFeatures[0][2] = Kmeans.Kmeans(
    img, 2, 5)
print("Image Features : ", newFeatures)

y_pred = classifier.predict(newFeatures)
print("Predicted Flower Class : ", y_pred[0])
Ejemplo n.º 11
0
from utils import *
from SVM import *
from Kmeans import *

# Test select_cluster
member1 = Member(label = 1, doc_id = 1, r_d = [1,0])
member2 = Member(label = 1, doc_id = 1, r_d = [0,0])
member3 = Member(label = 1, doc_id = 1, r_d = [0,1])
member4 = Member(label = 1, doc_id = 1, r_d = [1,1])
Cluster = Cluster()
Cluster.add_member(member1)
Cluster.add_member(member2)
Cluster.add_member(member3)
Cluster.add_member(member4)

Kmean = Kmeans(num_clusters =  3, num_word_vocab = 2) 
Kmean.update_centroid_of(Cluster)
print(Cluster._centroid) #[0.5,0.5]

# Test random init ###
Kmean = Kmeans(num_clusters =  3, num_word_vocab = 2) 
Kmean.random_init(1)
for cluster in Kmean._clusters:
	print(cluster._centroid)

# Test select_cluster_for
Kmean = Kmeans(num_clusters =  3, num_word_vocab = 2) 
Cluster1 = Cluster()
Cluster1._centroid = [0,0]
Cluster2 = Cluster()
Cluster2._centroid = [2,0]
    crc = np.load('files/pred_crcrate.npy')
    # crc.sort(axis=0)
    # crc = np.append(crc, 2 * crc[-1] - crc[0])
    ti = np.load('files/pred_timeinterval.npy')
    # ti.sort(axis=0)
    # ti = np.append(ti, 2 * ti[-1] - ti[0])
    #
    # np.save('files/pred_crcrate.npy', crc)
    # np.save('files/pred_timeinterval.npy', ti)
    # assert False
    result = d.load_data()[:, d.binary_result].astype(int)

    disc_result = []
    for i in range(10, 50, 10):
        print 'pid'
        kmeans_pid = Kmeans(raw_pid, None, k=i)
        pid = kmeans_pid.calc(None, 20, 2000)
        for j in range(10, 50, 5):
            print 'pressure measurement'
            pm = discrete_plus(pressure_measurement, j, 0.9)
            for k in range(10, 50, 5):
                # before = time.time()
                print 'set point'
                sp = discrete_plus(setpoint, k, 0.9)
                data_str = init.signature_all(d, crc, ti, pid, pm, sp)

                count = True
                features_normal = []
                for r in range(data_str.shape[0]):
                    if result[r] == 0:
                        if data_str[r] not in features_normal:
Ejemplo n.º 13
0
import Kmeans as km
import numpy as np

if __name__ == "__main__":

    data = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15],
                     [16, 17, 18, 19, 20], [21, 22, 23, 24, 25],
                     [26, 27, 28, 29, 30]])
    label = np.array([0, 0, 0, 1, 1, 1])

    kmeans = km.Kmeans(data, kind=2, rowsam=True)

    res = kmeans.cluster()
    print("聚类结果为 res = ", res)

    acc = kmeans.accuracy(label)
    print("聚类准确度为 acc = ", acc)