Beispiel #1
0
def vis(repin,repout,cluster=True):
    """ A function to visualize the basis of equivariant maps repin>>repout
        as an image. Only use cluster=True if you know Pv will only have
        r distinct values (true for G<S(n) but not true for many continuous groups)."""
    rep = (repin>>repout)
    P = rep.equivariant_projector() # compute the equivariant basis
    Q = rep.equivariant_basis()
    v = np.random.randn(P.shape[1])  # sample random vector
    v = np.round(P@v,decimals=4)  # project onto equivariant subspace (and round)
    if cluster: # cluster nearby values for better color separation in plot
        v = KMeans(n_clusters=Q.shape[-1]).fit(v.reshape(-1,1)).labels_
    plt.imshow(v.reshape(repout.size(),repin.size()))
    plt.axis('off')
Beispiel #2
0
    def _init_shapelets(self, X):
        if self.init_method == "random":
            for shp_len in sorted(self.shapelet_lengths.keys()):
                for _ in range(self.shapelet_lengths[shp_len]):
                    self.shapelets.append(numpy.random.randn(shp_len, self.d))
        elif self.init_method == "kmeans":
            n_draw = 10000
            n_ts, sz, d = X.shape
            for shp_len in sorted(self.shapelet_lengths.keys()):
                indices_ts = numpy.random.choice(n_ts, size=n_draw, replace=True)
                indices_time = numpy.random.choice(sz - shp_len + 1, size=n_draw, replace=True)
                subseries = numpy.zeros((n_draw, shp_len, d))
                for i in range(n_draw):
                    subseries[i] = X[indices_ts[i], indices_time[i]:indices_time[i] + shp_len]
                subseries = subseries.reshape((n_draw, shp_len * d))
                shapelets = KMeans(n_clusters=self.shapelet_lengths[shp_len]).fit(subseries).cluster_centers_
                shapelets = shapelets.reshape((self.shapelet_lengths[shp_len], shp_len, d))
                for shp in shapelets:
                    self.shapelets.append(shp)
        else:
            raise NotImplementedError("Could not initialize shapelets: unknown method %s", self.init_method)

        if self.ada_grad:
            for shp_len in sorted(self.shapelet_lengths.keys()):
                for _ in range(self.shapelet_lengths[shp_len]):
                    self.past_gradients_Skl.append(numpy.zeros((shp_len, self.d)))
            self.past_gradients_Beta = 0.
Beispiel #3
0
def trimSilence(subject, aligned=False):
    mode = "aligned" if aligned else "pedal"
    input_dir = os.path.join(PROCESSED_DATA_PATH, subject, "audio", mode)

    save_dir = os.path.join(PROCESSED_DATA_PATH, subject, "audio_trimmed",
                            mode)
    if not os.path.isdir(save_dir):
        os.makedirs(save_dir)

    audio_files = [f for f in os.listdir(input_dir) if f[0] != "."]
    sample_size = 10
    for file in audio_files:
        wav = AudioSegment.from_file(os.path.join(input_dir, file),
                                     fomat="wav")
        samples = np.array([
            wav[i:i + sample_size].dBFS
            for i in range(0, len(wav), sample_size)
        ])
        samples = samples.reshape(-1, 1)
        clusters = KMeans(n_clusters=3,
                          random_state=0).fit(samples).cluster_centers_
        clusters = clusters.reshape(3, )
        clusters = sorted(clusters, reverse=True)
        threshold = (clusters[0] - clusters[1]) * 0.9 + clusters[1]
        start = findIndex(wav, threshold)
        end = findIndex(wav.reverse(), threshold)
        wav[start:len(wav) - end].export(os.path.join(save_dir, file),
                                         format="wav")
def kmeans(img, n_clusters):
    original_shape = img.shape
    img = img.reshape((img.shape[0] * img.shape[1], 3))
    labeled_img = KMeans(n_clusters).fit_predict(img)
    labeled_img = np.asarray(labeled_img)
    labeled_img = labeled_img.reshape((original_shape[0], original_shape[1]))
    return labeled_img
Beispiel #5
0
def cluster(image):
    red = image[:, :, 0]
    X = image[:, :, 0].flatten()
    X = np.reshape(X, (X.shape[0], 1))
    y_pred = KMeans(n_clusters = 2, random_state = 170).fit_predict(X)
    y_pred = np.invert(y_pred.reshape(image.shape[0], image.shape[1]).astype(np.uint8))
    return y_pred
Beispiel #6
0
def preprocess_final(img):
    red = img[:, :, 0]
    X = img[:, :, 0].flatten()
    X = np.reshape(X, (X.shape[0], 1))
    y_pred = KMeans(n_clusters = 2, random_state = 10).fit_predict(X)
    y_pred = y_pred.reshape(img.shape[0], img.shape[1])
    mask = invert(y_pred)
    return element_wise_multiply(red, mask)


# cap = cv2.VideoCapture('./Test Videos/Srinjoy.mp4')

# while(cap.isOpened()):
#     ret, frame = cap.read()
#     cv2.imshow('frame', segment_skin_2(frame))
#     if cv2.waitKey(1) & 0xFF == ord('q'):
#         break
# cap.release()
# cv2.destroyAllWindows()


# img = cv2.imread('Sign-Language-Digits-Dataset/Dataset/0/IMG_1118.JPG')

# plt.imshow(preprocess_final(img), cmap = 'gray')
# plt.show()
def color_img_segmentation(img, n_clusters=5, show=False):
    # according to the color of img do KMeans

    img = any_to_image(img)
    # img_gray = image_to_gray(img)
    if len(img.shape) == 3:
        pass
    elif len(img.shape) == 2:
        # img_gray = img
        print('!NOTICE! The input image is gray!')
        # pass
        return None

    img_float = img / 255
    img_float = img_float.flatten()
    img_float = img_float.reshape((img.shape[0] * img.shape[1], 3))

    label = KMeans(n_clusters=n_clusters).fit_predict(img_float)
    label = label.reshape(img.shape[:2])
    label = np.array(255 / (label + 1), dtype=np.uint8)

    if show:
        cv2.imshow('color_img_segmentation()', label)
        cv2.waitKey()
        cv2.destroyAllWindows()

    return True
Beispiel #8
0
def generate_nn_pairs(sample):
    image_registered = np.load('{}_registered.npy'.format(sample))
    image_seg = np.load('{}_seg.npy'.format(sample))
    cell_info = pd.read_csv('{}_cell_information_consensus.csv'.format(sample), header = None, dtype = {100:str})
    image_cn = np.sum(image_registered, axis = 2)
    image_cn = np.log(np.sum(image_registered, axis = 2)+1e-2)
    rough = KMeans(n_clusters = 2, random_state = 0).fit_predict(image_cn.reshape(image_cn.shape[0]*image_cn.shape[1],1))
    rough_seg = rough.reshape(image_cn.shape)
    image0 = image_cn*(rough_seg == 0)
    image1 = image_cn*(rough_seg == 1)
    i0 = np.average(image0[rough_seg == 0])
    i1 = np.average(image1[rough_seg == 1])
    rough_seg_mask = rough_seg == np.argmax([i0, i1])
    adjacency_seg = skimage.segmentation.watershed(-np.sum(image_registered, axis = 2), image_seg, mask = rough_seg_mask)
    edge_map = skimage.filters.sobel(image_seg > 0)
    rag = skimage.future.graph.rag_boundary(adjacency_seg, edge_map)
    cell_label_list = cell_info.iloc[:,103].values
    centroid_spectral = []
    for i in range(cell_info.shape[0]):
        edges = list(rag.edges(cell_info.iloc[i,103]))
        for e in edges:
            node_1 = e[0]
            node_2 = e[1]
            if (node_1 != 0) and (node_2 !=0) and node_1 in cell_label_list and node_2 in cell_label_list:
                barcode_1 = cell_info.iloc[cell_info.iloc[:,103].values == node_1, 100].values[0]
                barcode_2 = cell_info.iloc[cell_info.iloc[:,103].values == node_2, 100].values[0]
                cell_1_index = np.where(image_seg == node_1)
                cell_1_pixel_intensity = image_registered[image_seg == node_1, :]
                cell_2_index = np.where(image_seg == node_2)
                cell_2_pixel_intensity = image_registered[image_seg == node_2, :]
                cxj = np.average(np.concatenate([cell_1_index[0], cell_2_index[0]]))
                cyj = np.average(np.concatenate([cell_1_index[1], cell_2_index[1]]))
                joint_intensity_x = np.concatenate([cell_1_pixel_intensity*cell_1_index[0][:,None], cell_2_pixel_intensity*cell_2_index[0][:,None]], axis = 0)
                joint_intensity_y = np.concatenate([cell_1_pixel_intensity*cell_1_index[1][:,None], cell_2_pixel_intensity*cell_2_index[1][:,None]], axis = 0)
                joint_intensity = np.concatenate([cell_1_pixel_intensity, cell_2_pixel_intensity], axis = 0)
                cxj_spectral = np.average(joint_intensity_x, axis = 0)/np.average(joint_intensity, axis = 0)
                cyj_spectral = np.average(joint_intensity_y, axis = 0)/np.average(joint_intensity, axis = 0)
                centroid_spectral_distance = np.sqrt((cxj - cxj_spectral)**2 + (cyj - cyj_spectral)**2)
                centroid_spectral.append([barcode_1, barcode_2, np.std(centroid_spectral_distance), np.median(centroid_spectral_distance)])
    centroid_spectral = pd.DataFrame(np.stack(centroid_spectral, axis = 0))
    centroid_spectral.columns = ['barcode_1', 'barcode_2', 'centroid_spectral_std', 'centroid_spectral_median']
    centroid_spectral['barcode_identity'] = centroid_spectral.barcode_1.values == centroid_spectral.barcode_2.values
    centroid_single_object = []
    for i in range(cell_info.shape[0]):
        barcode = cell_info.iloc[i, 100]
        cell_label = cell_info.iloc[i, 103]
        cell_index = np.where(image_seg == cell_label)
        cell_pixel_intensity = image_registered[image_seg == cell_label, :]
        cx = np.average(cell_index[0])
        cy = np.average(cell_index[1])
        cx_spectral = np.average(cell_pixel_intensity*cell_index[0][:,None], axis = 0)/np.average(cell_pixel_intensity, axis = 0)
        cy_spectral = np.average(cell_pixel_intensity*cell_index[1][:,None], axis = 0)/np.average(cell_pixel_intensity, axis = 0)
        centroid_spectral_distance = np.sqrt((cx - cx_spectral)**2 + (cy - cy_spectral)**2)
        centroid_single_object.append([barcode, np.std(centroid_spectral_distance), np.median(centroid_spectral_distance)])
    centroid_spectral_singlet = pd.DataFrame(np.stack(centroid_single_object, axis = 0))
    centroid_spectral_singlet.columns = ['barcode', 'centroid_spectral_std', 'centroid_spectral_median']
    centroid_spectral.to_csv('{}_centroid_spectral.csv'.format(sample), index = None)
    centroid_spectral_singlet.to_csv('{}_centroid_spectral_singlet.csv'.format(sample), index = None)
    return
def diff(img1, img2):
    delta = abs(img2 - img1)

    Z = delta.reshape(-1, 1)
    res = KMeans(n_clusters=2, init='k-means++', n_jobs=-1).fit_predict(Z)
    res2 = res.reshape(delta.shape)

    return res2
def ml(ft, bands, n_clusters=5):
    ft = np.moveaxis(ft, 0, -1)
    X = ft.reshape((-1, bands))
    X = np.nan_to_num(X, nan=0.0, posinf=0.0, neginf=0.0)

    y = KMeans(n_clusters=n_clusters).fit_predict(X)
    y_out = y.reshape(ft.shape[1:])
    return y_out
Beispiel #11
0
def main():
    parms = parseArguments()

    img = imread(parms.imageFileName)
    kTimes = parms.k
    img_size = img.shape

    # Reshape it to be 2-dimension
    # in other words, its a 1d array of pixels with colors (RGB)

    X = img.reshape(img_size[0] * img_size[1], img_size[2])

    # Insert your code here to perform
    # -- KMeans clustering
    # -- replace colors in the image with their respective centroid

    # Init must be random and n_init must be 1
    kmeans.n_iter_ = kTimes  # setting kmeans to iterate k times
    X_compressed = KMeans(init="random", n_init=1, n_clusters=15,
                          verbose=1).fit(X)  # use loop to run 10 times?

    # Document Instructions:
    #
    # For one of the images that has been supplied, run kmeans 10 times with k = 15 and
    # report/plot the sum of the squared errors (inertia_).

    # Briefly explain why the results vary (1-2 sentences).

    # save modified image (code assumes new image in a variable
    # called X_compressed)
    # Reshape to have the same dimension as the original image

    X_compressed.reshape(img_size[0], img_size[1], img_size[2])

    fig, ax = plt.subplots(1, 1, figsize=(8, 8))

    ax.imshow(X_compressed)
    for ax in fig.axes:
        ax.axis('off')
    plt.tight_layout()
    plt.savefig(parms.outputFileName,
                dpi=400,
                bbox_inches='tight',
                pad_inches=0.05)
    plt.show()
Beispiel #12
0
def main(X, Y, GT, diff):

    train_num = 2000
    max_iters = 2000
    lr = 1e-4

    index = np.argsort(diff)
    XData = X[index[0:train_num], :]
    YData = Y[index[0:train_num], :]

    inputX = tf.placeholder(dtype=tf.float32, shape=[None, X.shape[-1]])
    inputY = tf.placeholder(dtype=tf.float32, shape=[None, Y.shape[-1]])
    model = DSFANet(num=train_num)
    loss = model.forward(X=inputX, Y=inputY)

    optimizer = tf.train.GradientDescentOptimizer(lr).minimize(loss)
    init = tf.global_variables_initializer()

    gpu_options = tf.GPUOptions(allow_growth=True)
    conf = tf.ConfigProto(gpu_options=gpu_options)
    sess = tf.Session(config=conf)

    sess.run(init)

    train_loss = np.zeros(max_iters)

    for k in range(max_iters):

        _, train_loss[k] = sess.run([optimizer, loss], feed_dict={inputX: XData, inputY: YData})

        if k % 100 == 0:
            print('iter %4d, loss is %.4f' % (k, train_loss[k]))

    XTest, YTest = sess.run([model.X_, model.Y_], feed_dict={inputX: X, inputY: Y})

    sess.close()

    X_trans, Y_trans = utils.SFA(XTest, YTest)

    diff = X_trans-Y_trans
    diff = diff / np.std(diff, axis=0)

    plt.imsave('DSFAdiff.png', (diff**2).sum(axis=1).reshape(GT.shape), cmap='gray')

    bin = KMeans(n_clusters=2).fit((diff**2).sum(axis=-1, keepdims=True)).labels_
    #bin = KMeans(n_clusters=2).fit(diff).labels_
    plt.imsave('DSFACD.png', bin.reshape(GT.shape), cmap='gray')
    #diff = abs(diff)
    #plt.imsave('DSFAcolor.png',(diff/diff.max()).reshape(GT.shape[0], GT.shape[1],3))

    print(accuracy_score(GT.reshape(-1, 1)/255, bin))
    print(accuracy_score(GT.reshape(-1, 1)/255, 1-bin))

    return True
def ctmf_detector(hsi_img, tgt_sig, n_cluster = 2):
	"""
	Cluster Tuned Matched Filter
	 k-means cluster all spectra, make a matched filter for each cluster

	Inputs:
	 hsi_image - n_row x n_col x n_band hyperspectral image
	 tgt_sig - target signature (n_band x 1 - column vector)
	 n_cluster - number of clusters to use

	Outputs:
	 ctmf_out - detector output image
	 cluster_img - cluster label image

	8/15/2012 - Taylor C. Glenn
	6/02/2018 - Edited by Alina Zare
	12/2018 - Python Implementation by Yutai Zhou
	"""
	if tgt_sig.ndim == 1:
		tgt_sig = tgt_sig[:, np.newaxis]

	n_row, n_col, n_band = hsi_img.shape
	n_pixel = n_row * n_col

	hsi_data = hsi_img.reshape((n_pixel,n_band), order='F').T

	# cluster the data
	idx = KMeans(n_clusters = n_cluster, n_init = 1, max_iter=100).fit(hsi_data.T).labels_

	cluster_img = idx.reshape((n_row, n_col), order = 'F')

	# get cluster stats, create match filters
	mu = np.zeros((n_band,n_cluster))
	sig_inv = np.zeros((n_band, n_band, n_cluster))
	f = np.zeros((n_band,n_cluster))

	for i in range(n_cluster):
		z = hsi_data[:, idx == i]

		mu[:,i] = np.mean(z,1)
		sig_inv[:,:,i] = np.linalg.pinv(np.cov(z.T, rowvar=False))

		s = tgt_sig - mu[:,i][:,np.newaxis]
		f[:,i] = s.T @ sig_inv[:,:,i] / np.sqrt(s.T @ sig_inv[:,:,i] @ s)

	# compute matched filter output of each point
	ctmf_data = np.zeros(n_pixel)

	for i in range(n_pixel):
		z = hsi_data[:,i] - mu[:,idx[i]]
		ctmf_data[i] = f[:,idx[i]] @ z

	return ctmf_data.reshape([n_row, n_col], order='F'), cluster_img
Beispiel #14
0
def main():
    file_data, row, col = file_open('D:\\速度.txt')

    #聚类获得每个像素所属的类别
    label = KMeans(n_clusters=2).fit_predict(file_data)
    label = label.reshape([row, col])
    print(len(label))
    SpeedClu = [[], []]
    for i in range(row):
        SpeedClu[label[i]].append(i)
    for i in range(len(SpeedClu)):
        print(SpeedClu[i])
def plotmodelLearn():
    l = [ica, nmf, pca, randDe]
    _, axes = plt.subplots(1, 3, figsize=(20, 5))
    module = ["ica", "nmf", "pca", "randDe"]
    count = 0
    for z in l:
        print('this is the ' + module[count])
        x, y = z.transformData()
        x_kmeans = KMeans(n_clusters=25).fit_predict(x)
        x_kmeans = x_kmeans.reshape(-1, 1)
        train_sizes = np.linspace(.1, 1.0, 5)
        train_sizes, train_scores, test_scores, fit_times, _ = learning_curve(
            MLPClassifier(max_iter=400, hidden_layer_sizes=50),
            x_kmeans,
            y,
            train_sizes=train_sizes,
            return_times=True)
        train_scores_mean = np.mean(train_scores, axis=1)
        train_scores_std = np.std(train_scores, axis=1)
        test_scores_mean = np.mean(test_scores, axis=1)
        test_scores_std = np.std(test_scores, axis=1)
        fit_times_mean = np.mean(fit_times, axis=1)
        fit_times_std = np.std(fit_times, axis=1)

        axes[0].plot(train_sizes,
                     train_scores_mean,
                     label="Training score: " + module[count])
        axes[1].plot(train_sizes,
                     test_scores_mean,
                     label="Cross-validation score: " + module[count])
        axes[2].plot(train_sizes, fit_times_mean, label=module[count])
        count += 1

    # Plot learning curve
    axes[0].grid()
    axes[0].legend(loc="best")
    axes[0].set_title("Learning Curve")
    axes[0].set_xlabel("Training examples")
    axes[0].set_ylabel("Score")
    # Plot learning curve
    axes[1].grid()
    axes[1].legend(loc="best")
    axes[1].set_title("Learning Curve")
    axes[1].set_xlabel("Training examples")
    axes[1].set_ylabel("Score")
    # Plot n_samples vs fit_times
    axes[2].grid()
    axes[2].set_xlabel("Training examples")
    axes[2].set_ylabel("fit_times")
    axes[2].set_title("Scalability of the model")
    plt.suptitle('NN Cancer with Kmeans as features')
    plt.show()
Beispiel #16
0
def kmeans(path="1.png"):
    imgData, row, col = loadData(path)
    label = KMeans(n_clusters=4).fit_predict(imgData)

    label = label.reshape([row, col])
    pic_new = image.new("L", (row, col))
    for i in range(row):
        for j in range(col):
            pic_new.putpixel((i, j), int(256 / (label[i][j] + 1)))

    SavePath = "./result.png"
    pic_new.save(SavePath)
    return SavePath
Beispiel #17
0
def Kmeans_pro(img,clu):
    img = np.transpose(img)
    m = img.shape[0]
    n = img.shape[1]
    data = []
    for i in range(m):
        for j in range(n):
            data.append(img[i,j])

    img_r = np.mat(data).reshape(-1,1)
    label = KMeans(n_clusters=clu).fit_predict(img_r)  #聚类,获得每个像素的类别(有5类)
    label = label.reshape([m,n]) 
    label = np.transpose(label)
    return label
Beispiel #18
0
def gray_cluster(img,step=1):
    img_data,row,col,smallorigin = load_data(img,step)
    if step==2:
        label = KMeans(n_clusters=3).fit_predict(img_data)  #聚类中心的个数为5
        label = label.reshape((row,col))    #聚类获得每个像素所属的类别
        pic_new = Image.new("L",(row,col))
        for i in range(row):    #根据所属类别向图片中添加灰度值
            for j in range(col):
                if label[i][j] == 2:
                    pic_new.putpixel((i,j),256)
        pic_new.save('./v3/222.jpg')
        pic_new = np.asarray(pic_new)
    else:
        label = KMeans(n_clusters=5).fit_predict(img_data)  # 聚类中心的个数为5
        label = label.reshape((row, col))  # 聚类获得每个像素所属的类别
        pic_new = Image.new("L", (row, col))
        for i in range(row):  # 根据所属类别向图片中添加灰度值
            for j in range(col):
                if label[i][j] == 4:
                    pic_new.putpixel((i, j), 256)
        pic_new.save('./v3/444.jpg')
        pic_new = np.asarray(pic_new)

    return pic_new,cv2.cvtColor(np.asarray(smallorigin),cv2.COLOR_RGB2BGR)
def Kmeans(data, n_clusters):
    clusters = []
    i = 0
    for image in data:
        #unroll image and represent it as one row of pixels eac pixel has 3 dimensions RGB
        img_unrolled = image.reshape(image.shape[0]*image.shape[1],image.shape[2])/255
        kmeans = KMeans(n_clusters=n_clusters, random_state=0,n_jobs=-1).fit_predict(img_unrolled)
        #append the labels after reshaping to the shape of the real image 321*481
        #kmeans.labels_.reshape(image.shape[:-1]))
        
        clusters.append(kmeans.reshape(image.shape[:-1]))
        i = i + 1
        if i % 25 == 0:
            print(i*2,"%" ,"of images clustered")
    return clusters 
Beispiel #20
0
    def km(self, U, km_init, km_rep):
        eye_c = np.eye(self.c_true)
        tmp_y = np.zeros(self.N, dtype=np.int32)

        y = KMeans(self.c_true, n_init=km_rep, init=km_init).fit(U).labels_
        y = y.reshape(-1).astype(np.int32)

        Y = eye_c[y, :]
        cen = (self.X[self.ind_in, :].T.dot(Y)).T
        cen = cen / np.sum(Y, axis=0).reshape(-1, 1)

        sim = self.X[self.ind_out, :].dot(cen.T)
        y2 = np.argmax(sim, axis=1).astype(np.int32)

        tmp_y[self.ind_in] = y
        tmp_y[self.ind_out] = y2
        return tmp_y
Beispiel #21
0
def binary(crop):
    m,n = crop.shape
    data_ = []
    for i_ in range(m):
        for j_ in range(n):
            x = crop[i_,j_]
            data_.append([x/256])
    crop = np.mat(data_)    
    label = KMeans(n_clusters=2).fit_predict(crop)  #图片聚成2类
    color_list = [0,255]
    label = label.reshape([m,n])
    w_new = np.zeros((m,n))
    for i_1 in range(m):                          #根据所属类别给图片添加灰度
        for j_1 in range(n):
            w_new[i_1,j_1] = color_list[label[i_1][j_1]]
    if sum(w_new.flatten()==0)>sum(w_new.flatten()==255):
        w_new = 255-w_new
    return w_new
def kmeansRemovingOutlierForClassifier():
    """
    use k-means to do outlier removal
    :return: NA
    """
    # load data
    X_train = np.load('inputClf_small/X_train.npy')
    y_train = np.load('inputClf_small/y_train.npy')
    y_train_price = np.load('inputClf_small/y_train_price.npy')

    # cluster initializing
    X_train1 = X_train[np.where(y_train == 0)[0], :]
    X_train2 = X_train[np.where(y_train == 1)[0], :]
    cluster1 = KMeans(init='random', n_clusters=1,
                      random_state=0).fit(X_train1)
    cluster1 = cluster1.cluster_centers_
    cluster2 = KMeans(init='random', n_clusters=1,
                      random_state=0).fit(X_train2)
    cluster2 = cluster2.cluster_centers_
    clusters = np.concatenate((cluster1, cluster2), axis=0)

    y_pred = KMeans(init='random', n_clusters=2,
                    random_state=2).fit_predict(X_train)
    y_pred = y_pred.reshape((y_pred.shape[0], 1))
    y_pred = y_pred
    tmp = np.concatenate((y_train, y_pred), axis=1)

    sam = y_train == y_pred
    print "# total: {}".format(y_train.shape[0])
    print "# datas left: {}".format(np.sum(sam))
    # Keep 63.62% data.
    print "Keep {}% data.".format(
        round(np.sum(sam) * 100.0 / y_train.shape[0], 2))

    print tmp[0:22, :]
    print np.where(y_train == y_pred)[0]
    # keep the data which are not outliers
    X_train = X_train[np.where(y_train == y_pred)[0], :]
    y_train_price = y_train_price[np.where(y_train == y_pred)[0], :]
    y_train = y_train[np.where(y_train == y_pred)[0], :]
    np.save('inputClf_KMeansOutlierRemoval/X_train', X_train)
    np.save('inputClf_KMeansOutlierRemoval/y_train', y_train)
    np.save('inputClf_KMeansOutlierRemoval/y_train_price', y_train_price)
def small_fov_cluster(img , n , clusters=2):
    labeled = np.zeros_like(img)
    shape = [n,n]
    for k in range(np.shape(img)[2]):
        for i in range(0,np.shape(img)[0],n):
            for j in range(0,np.shape(img)[1],n):
                scaled = std_scale(img[i:i+n,j:j+n,k]).flatten()
                if(len(np.unique(scaled))==1):
                    labels = np.zeros_like(scaled)
                    labels = np.asarray(labels.reshape(shape))
                else:
                    X=[]
                    for x in scaled:
                        X.append([x])
                    labels = KMeans(n_clusters=clusters , random_state=0).fit_predict(X)
                    labels = np.asarray(labels.reshape(shape))
                labeled[i:i+n , j:j+n , k] = labels
        if(k % 5 == 0):
            sys.stdout.write('\r'+'...clustering... '+str(int(k / np.shape(img)[2] * 100))+' %')
    return(labeled)
def kmeansRemovingOutlierForClassifier():
    """
    use k-means to do outlier removal
    :return: NA
    """
    # load data
    X_train = np.load('inputClf_small/X_train.npy')
    y_train = np.load('inputClf_small/y_train.npy')
    y_train_price = np.load('inputClf_small/y_train_price.npy')

    # cluster initializing
    X_train1 = X_train[np.where(y_train==0)[0], :]
    X_train2 = X_train[np.where(y_train==1)[0], :]
    cluster1 = KMeans(init='random', n_clusters=1, random_state=0).fit(X_train1)
    cluster1 = cluster1.cluster_centers_
    cluster2 = KMeans(init='random', n_clusters=1, random_state=0).fit(X_train2)
    cluster2 = cluster2.cluster_centers_
    clusters = np.concatenate((cluster1, cluster2), axis=0)


    y_pred = KMeans(init='random', n_clusters=2, random_state=2).fit_predict(X_train)
    y_pred = y_pred.reshape((y_pred.shape[0], 1))
    y_pred = y_pred
    tmp = np.concatenate((y_train, y_pred), axis=1)

    sam = y_train == y_pred
    print "# total: {}".format(y_train.shape[0])
    print "# datas left: {}".format(np.sum(sam))
    # Keep 63.62% data.
    print "Keep {}% data.".format(round(np.sum(sam)*100.0/y_train.shape[0], 2))


    print tmp[0:22, :]
    print np.where(y_train==y_pred)[0]
    # keep the data which are not outliers
    X_train = X_train[np.where(y_train==y_pred)[0], :]
    y_train_price = y_train_price[np.where(y_train==y_pred)[0], :]
    y_train = y_train[np.where(y_train==y_pred)[0], :]
    np.save('inputClf_KMeansOutlierRemoval/X_train', X_train)
    np.save('inputClf_KMeansOutlierRemoval/y_train', y_train)
    np.save('inputClf_KMeansOutlierRemoval/y_train_price', y_train_price)
Beispiel #25
0
def phate_mean(data, label, shape):

    phate_operator = phate.PHATE(n_components=10)

    phate_operator.fit(data)
    mnist_phate = phate_operator.transform(plot_optimal_t=True)

    print(mnist_phate.shape)

    diff_potential = phate_operator.diff_potential
    y_pred = KMeans(n_clusters=10).fit_predict(diff_potential)

    true = label.to_numpy()

    #ARI
    true = true.reshape((shape, 1))
    y_pred = y_pred.reshape((shape, 1))

    combine = np.array((true, y_pred)).T
    combine = combine.reshape((shape, 2))

    np.random.shuffle(combine)

    subsampling = 20
    ARI = np.zeros(subsampling)
    gap = int(shape / subsampling)

    for it in range(subsampling):
        start = int(it * gap)
        end = int((it + 1) * gap)
        x = combine[start:end, 0]
        y = combine[start:end, 1]
        ARI[it] = adjusted_rand_score(x, y)

    print(np.average(ARI))

    phate_operator_plot = phate.PHATE(t=25)
    phate_operator_plot.fit(data)
    mnist_phate_plot = phate_operator_plot.transform()
    phate.plot.scatter2d(mnist_phate_plot, c=y_pred)
Beispiel #26
0
def process_pic():
    imgData, row, col = processData('./database/' + file)

    # 图像分割-Kmeans聚类
    label = KMeans(n_clusters=3).fit_predict(imgData)  # 图片聚成3类
    label = label.reshape([row, col])
    pic_new = image.new("L", (row, col))
    for i in range(row):  # 根据所属类别给图片添加灰度
        for j in range(col):
            pic_new.putpixel((i, j), int(256 / (label[i][j] + 1)))
    pic_new.save("./result/k-means/" + file, "JPEG")

    # 图像增强-二值化
    img = cv2.imread('./result/k-means/' + file)

    GrayImage = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    ret, thresh1 = cv2.threshold(GrayImage, 127, 255, cv2.THRESH_BINARY)
    ret, thresh2 = cv2.threshold(GrayImage, 127, 255, cv2.THRESH_BINARY_INV)

    plt.savefig('./result/binary/' + file)

    return 0
Beispiel #27
0
def find_background(image, morph_filters=True):
    """
    Returns an index map of the pixels in input image believed to be background.

    This function first converts to HSV and then uses k-means to partition the saturation-values.
    The average of the two cluster-middle-points is used as the threshold to distinguish between
    foreground and background.
    Optionally, morphological filters may be applied to fill in gaps in the detected foreground
    (e.g., holes within detected cells) and remove background specks (debris that has the same color
    as the cells we want to detect).

    :param image: RGB image (either uint8 0-255, or float 0.0-1.0), with light background.
    :param morph_filters: boolean flag. Whether or not to apply morphological filters to
        improve background detection.
    :return: index map of pixels believed to be the background.
    """
    if image.dtype == np.uint8:
        image = image.astype(np.float64) / 255.
    # convert to HSV
    # https://en.wikipedia.org/wiki/HSL_and_HSV#/media/File:HSV_color_solid_cylinder_saturation_gray.png
    hsv_img = color.rgb2hsv(image)

    # find background through a threshold saturation level
    # apply k-means to cluster the saturation values into two groups (foreground and background)
    S = hsv_img[:, :, 1].flatten()
    V = hsv_img[:, :, 2].flatten()
    data = np.stack((S, V), axis=1)
    mask = KMeans(n_clusters=2).fit_predict(data)
    mask = mask.reshape(image.shape[0:2])
    if np.average(hsv_img[mask == 1, 2]) > np.average(
            hsv_img[mask == 0, 2]):  # ensure foreground is index 1
        mask = 1 - mask
    if morph_filters:
        mask = mask
        mask = morph.binary_closing(mask, selem=morph.disk(30))
        mask = morph.binary_dilation(mask, selem=morph.disk(5))
        mask = morph.binary_opening(mask, selem=morph.disk(30))
    background_mask = mask == 0
    return background_mask
def CDetectpca(img1, img2, h, S, n):
    sp = img1.shape
    img1 = img1[sp[0] % h:, sp[1] % h:]
    img2 = img2[sp[0] % h:, sp[1] % h:]
    dif = np.array(abs(-1 * img1 + img2), np.uint8)
    sp = dif.shape

    def getblock(xd, x, y, h):
        x = x - h // 2
        y = y - h // 2
        vec = np.zeros(h * h, np.int32)
        for i in range(h):
            for j in range(h):
                vec[i * h + j] = xd[(x + i) % sp[0], (y + j) % sp[1]]

        return vec

    samples = []
    H, W = np.array(sp) // h
    for i, j in range(H), range(W):
        samples.append(getblock(dif, i * h + h // 2, j * h + h // 2, h))

    pca = PCA(n_components=S, whiten=False)
    pca.fit(samples)
    res = np.zeros((sp[0], sp[1], S))
    for i in range(sp[0]):
        for j in range(sp[1]):
            res[i, j] = pca.transform(getblock(dif, i, j, h).reshape(1, -1))

    res = res.reshape(-1, S)
    ans = KMeans(n_clusters=2, init='k-means++', n_jobs=-1).fit_predict(res)
    ans = ans.reshape(sp)
    if ans.sum() * 2 < ans.size:
        ans = 1 - ans

    if n != 0:
        kernel = np.ones((n, n), np.uint8)
        ans = cv.morphologyEx(np.uint8(ans), cv.MORPH_CLOSE, kernel)
    return ans
Beispiel #29
0
def classify_kmeans(data, params):
    """
    分类方法:KMeans
    """
    from sklearn.cluster import KMeans

    centers = params.get('centers', None)
    if isinstance(centers, list):
        centers = np.asarray(centers)

    k = params.get('k', None)
    if k is None and centers is not None:
        k = np.asarray(centers).shape[0]
    cur_shape = data.shape

    flatten_init = np.array([np.real(centers), np.imag(centers)]).T

    flatten_data = data.flatten()
    ret_ans = KMeans(n_clusters=k, init=flatten_init).fit_predict(
        np.array([np.real(flatten_data),
                  np.imag(flatten_data)]).T)
    return 2**ret_ans.reshape(cur_shape)
Beispiel #30
0
    def run(self):
        self.__im = self.__load(r'/Users/samuellin/Desktop/691.JPG')
        self.__im, self.__cov = self.__normalize(self.__im)

        print 'cal neighborWeight'

        w = self.__neighborWeight(self.__im)

        print 'cal laplace'

        L = self.__calLaplace(w)

        print 'cal eigen vector'

        eigen_v = self.__calEigenVector(L)

        normalize_eigen_v = self.__normalizeEigen(eigen_v)

        print 'normalize_eigen_v:'
        print normalize_eigen_v
        print normalize_eigen_v.shape

        ret = KMeans(n_clusters=self.__k).fit_predict(normalize_eigen_v)
        print ret.shape

        print ret

        ret = ret.reshape(self.RESIZE_SIZE)

        for i, val_i in enumerate(self.__im):
            for j, val_j in enumerate(val_i):
                if ret[i][j] == 0:
                    self.__im[i][j] = np.array([0, 0, 0])

        cv2.imshow('test', self.__im)
        cv2.waitKey(1)

        import time
        time.sleep(100)
Beispiel #31
0
"""

import numpy as np
import PIL.Image as image
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

def loadData(filePath):
    f = open(filePath, 'rb')#以二进制形式打开文件
    data = []
    img = image.open(f)
    m,n = img.size
    for i in range(m):
        for j in range(n):
            x, y, z = img.getpixel((i, j))
            data.append([x/256.0, y/256.0, z/256.0])
    f.close()
    return np.mat(data), m, n#以矩阵形式返回大小
imgData, row, col = loadData('bull.jpg')#加载数据

label = KMeans(n_clusters=4).fit_predict(imgData)
 
label = label.reshape([row,col])
pic_new = image.new("L", (row, col))
for i in range(row):
    for j in range(col):
        pic_new.putpixel((i,j), int(256/(label[i][j]+1)))
pic_new.save("result-bull-4.jpg", "JPEG")
plt.imshow(pic_new)
plt.show()
import numpy as np
import PIL.Image as image
from sklearn.cluster import KMeans

def loadData(filePath):
    f = open(filePath,'rb')
    data = []
    img = image.open(f)
    m,n = img.size
    for i in range(m):
        for j in range(n):
            x,y,z = img.getpixel((i,j))
            data.append([x/256.0,y/256.0,z/256.0])
    f.close()
    return np.mat(data),m,n

imgData,row,col = loadData('starbucks.jpg')
label = KMeans(n_clusters=4).fit_predict(imgData)

label = label.reshape([row,col])
pic_new = image.new("L", (row, col))
for i in range(row):
    for j in range(col):
        pic_new.putpixel((i,j), int(256/(label[i][j]+1)))
pic_new.save("result-bull-4.jpg", "JPEG")
Beispiel #33
0
colors = [[0, 0, 0],
            [0, 0, 255],
            [0, 255, 0],
            [255, 0, 0],
            [255, 255, 0],
            [255, 0, 255],
            [0, 255, 255]]
             

filename = sys.argv[1]
original = io.imread(filename)

if original.shape[0] % 2 != 0 or original.shape[1] % 2 != 0:
    original = tfm.resize(original, [original.shape[0]//2*2, original.shape[1]//2*2, original.shape[2]])
    io.imsave(filename, original)

#downscale = 2
img = original#downscale(original)

arr = np.reshape(img, [-1, 3])
labels = KMeans(n_clusters = 8).fit_predict(arr)
labels = labels.reshape((img.shape[0], -1))

labels *= 32

#labels = upscale(labels)


io.imsave("{}_sem.png".format(filename[:-4]), labels)