def __init__(self, k=2, lr=0.01, epochs=100, inferStds=True): self.k = k self.epochs = epochs self.inferStds = inferStds self.lr = lr if self.inferStds: # compute stds from data self.centers, self.stds = kmeans(X, self.k) print('centers, stds', self.centers, self.stds) else: # use a fixed std self.centers, _ = kmeans(X, self.k) # new_kmeans = KMeans(self.k, random_state=0).fit(X.reshape(-1,1)) # self.centers = new_kmeans.cluster_centers_.flatten() dMax = max([ np.abs(c1 - c2) for c1 in self.centers for c2 in self.centers ]) self.stds = np.repeat(dMax / np.sqrt(2 * self.k), self.k) self.centers = torch.from_numpy(self.centers).float() self.stds = torch.from_numpy(self.stds).float() self.model = RbfNet(self.centers, self.stds) self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr) self.loss_fun = nn.MSELoss()
def create_dataloader(dataset, batch_size, mode="train", n_buckets=None, trans_fn=None): """ Create dataloader. Args: dataset(obj:`paddle.io.Dataset`): Dataset instance. batch_size(obj:`int`, optional, defaults to 1): The sample number of a mini-batch. mode(obj:`str`, optional, defaults to obj:`train`): If mode is 'train', it will shuffle the dataset randomly. n_buckets(obj:`int`, optional, defaults to `None`): If n_buckets is not None, it will devide the dataset into n_buckets according to the sequence lengths. trans_fn(obj:`callable`, optional, defaults to `None`): function to convert a data sample to input ids, etc. """ if n_buckets: word_examples = [seq["FORM"] for seq in dataset] lengths = [len(i) + 1 for i in word_examples] buckets = dict(zip(*kmeans(lengths, n_buckets))) else: buckets = None if trans_fn: dataset = dataset.map(trans_fn) if n_buckets: if mode == "train": batch_sampler = BucketsSampler( buckets=buckets, batch_size=batch_size, shuffle=True, ) else: batch_sampler = BucketsSampler( buckets=buckets, batch_size=batch_size, shuffle=False, ) else: batch_sampler = SequentialSampler( batch_size=batch_size, corpus_length=len(dataset), ) # Subclass of `paddle.io.Dataset` dataset = Batchify(dataset, batch_sampler) # According to the api of `paddle.io.DataLoader` set `batch_size` # and `batch_sampler` to `None` to disable batchify dataset automatically data_loader = paddle.io.DataLoader(dataset=dataset, batch_sampler=None, batch_size=None, return_list=True) return data_loader, buckets
def _learn_centroids(X, ncentroids, nsubvects, subvect_len): ret = np.empty((ncentroids, nsubvects, subvect_len)) for i in range(nsubvects): start_col = i * subvect_len end_col = start_col + subvect_len X_in = X[:, start_col:end_col] centroids, labels = kmeans(X_in, ncentroids) ret[:, i, :] = centroids return ret
def main(img1, img2, chg_map, args=None): img_shape = np.shape(img1) im1 = np.reshape(img1, newshape=[-1,img_shape[-1]]) im2 = np.reshape(img2, newshape=[-1,img_shape[-1]]) im1 = utils.normlize(im1) im2 = utils.normlize(im2) chg_ref = np.reshape(chg_map, newshape=[-1]) imm = None all_magnitude = None differ = np.zeros(shape=[np.shape(chg_ref)[0],net_shape[-1], args.iter]) # load cva pre-detection result ind = sio.loadmat(args.area+'/cva_ref.mat') cva_ind = ind['cva_ref'] cva_ind = np.reshape(cva_ind, newshape=[-1]) for k1 in range(args.iter): logging.info('In %2d-th iteration········' % (k1)) i1, i2 = utils.getTrainSamples(cva_ind, im1, im2, args.trn) loss_log, vpro, fcx, fcy, bval = dsfa( xtrain=i1, ytrain=i2, xtest=im1, ytest=im2, net_shape=net_shape, args=args) imm, magnitude, differ_map = utils.linear_sfa(fcx, fcy, vpro, shape=img_shape) magnitude = np.reshape(magnitude, img_shape[0:-1]) differ[:, :, k1] = differ_map if all_magnitude is None: all_magnitude = magnitude / np.max(magnitude) else: all_magnitude = all_magnitude + magnitude / np.max(magnitude) change_map = np.reshape(utils.kmeans(np.reshape(all_magnitude, [-1])), img_shape[0:-1]) logging.info('Max value of change magnitude: %.4f'%(np.max(all_magnitude))) logging.info('Min value of change magnitude: %.4f'%(np.min(all_magnitude))) # magnitude acc_un, acc_chg, acc_all2, acc_tp = utils.metric(1-change_map, chg_map) acc_un, acc_chg, acc_all3, acc_tp = utils.metric(change_map, chg_map) plt.imsave('results.png',all_magnitude, cmap='gray') #plt.show() return None
def test_kmeans(self): K = 3 colors = ['r', 'b', 'g'] labels = kmeans(self.X, K) for l in range(0, K): # make scatter plot for cluster l x1 = self.X[labels == l][:, 0] x2 = self.X[labels == l][:, 1] plt.ylim(0, 6) plt.xlim(-1, 9) plt.scatter(x1, x2, c=colors[l], marker='o') plt.show()
def test_kmeans(self): K = 3 colors = ['r','b','g'] labels = kmeans(self.X, K) for l in range(0,K): # make scatter plot for cluster l x1 = self.X[labels == l][:,0] x2 = self.X[labels == l][:,1] plt.ylim(0,6) plt.xlim(-1,9) plt.scatter(x1, x2, c=colors[l], marker='o') plt.show()
def learn_pq(X, ncentroids, nsubvects, subvect_len, max_kmeans_iters=16): codebooks = np.empty((ncentroids, nsubvects, subvect_len)) assignments = np.empty((X.shape[0], nsubvects), dtype=np.int) # print "codebooks shape: ", codebooks.shape for i in range(nsubvects): start_col = i * subvect_len end_col = start_col + subvect_len X_in = X[:, start_col:end_col] centroids, labels = kmeans(X_in, ncentroids, max_iter=max_kmeans_iters) codebooks[:, i, :] = centroids assignments[:, i] = labels return codebooks, assignments # [2**nbits x M x D/M], [N x M]
def test_kmeans2(self): """ Not a homework problem. Just trying things out on sklearn """ labels = kmeans_without_K(self.X) labels_true = kmeans(self.X, 3) print 'adjusted rand score: {}'.format(metrics.adjusted_rand_score(labels_true, labels)) K = np.unique(labels).size colors = cm.rainbow(np.linspace(0,1,K)) for l in range(0,K): # make scatter plot for cluster l x1 = self.X[labels == l][:,0] x2 = self.X[labels == l][:,1] plt.ylim(0,6) plt.xlim(-1,9) plt.scatter(x1, x2, c=colors[l], marker='o') plt.show()
def main(img1, img2, chg_map, args=None): img_shape = np.shape(img1) im1 = np.reshape(img1, newshape=[-1,img_shape[-1]]) im2 = np.reshape(img2, newshape=[-1,img_shape[-1]]) im1 = utils.normlize(im1) im2 = utils.normlize(im2) chg_ref = np.reshape(chg_map, newshape=[-1]) imm = None all_magnitude = None differ = np.zeros(shape=[np.shape(chg_ref)[0],net_shape[-1]]) # load cva pre-detection result ind = sio.loadmat(args.area+'/cva_ref.mat') cva_ind = ind['cva_ref'] cva_ind = np.reshape(cva_ind, newshape=[-1]) i1, i2 = utils.getTrainSamples(cva_ind, im1, im2, args.trn) loss_log, vpro, fcx, fcy, bval = dsfa( xtrain=i1, ytrain=i2, xtest=im1, ytest=im2, net_shape=net_shape, args=args) imm, magnitude, differ_map = utils.linear_sfa(fcx, fcy, vpro, shape=img_shape) magnitude = np.reshape(magnitude, img_shape[0:-1]) differ = differ_map change_map = np.reshape(utils.kmeans(np.reshape(magnitude, [-1])), img_shape[0:-1]) # magnitude acc_un, acc_chg, acc_all2, acc_tp = utils.metric(1-change_map, chg_map) acc_un, acc_chg, acc_all3, acc_tp = utils.metric(change_map, chg_map) plt.imsave('results.png',change_map, cmap='gray') #plt.show() return None
def test_kmeans2(self): """ Not a homework problem. Just trying things out on sklearn """ labels = kmeans_without_K(self.X) labels_true = kmeans(self.X, 3) print 'adjusted rand score: {}'.format( metrics.adjusted_rand_score(labels_true, labels)) K = np.unique(labels).size colors = cm.rainbow(np.linspace(0, 1, K)) for l in range(0, K): # make scatter plot for cluster l x1 = self.X[labels == l][:, 0] x2 = self.X[labels == l][:, 1] plt.ylim(0, 6) plt.xlim(-1, 9) plt.scatter(x1, x2, c=colors[l], marker='o') plt.show()
model = EffectRegressorMLP(opts) model.load(opts["save"], "_best", 2) model.encoder2.eval() transform = data.default_transform(size=opts["size"], affine=False, mean=0.279, std=0.0094) trainset = data.PairedObjectData(transform=transform) K = 6 ok = False while not ok: ok = True centroids, assigns, mse, _ = utils.kmeans(trainset.effect, k=K) print(mse) centroids = centroids * (trainset.eff_std + 1e-6) + trainset.eff_mu effect_names = [] for i, c_i in enumerate(centroids): print("Centroid %d: %.2f, %.2f, %.2f, %.2f, %.2f, %.2f" % (i, c_i[0], c_i[1], c_i[2], c_i[3], c_i[4], c_i[5])) for i, c_i in enumerate(centroids): print("Centroid %d: %.2f, %.2f, %.2f, %.2f, %.2f, %.2f" % (i, c_i[0], c_i[1], c_i[2], c_i[3], c_i[4], c_i[5])) print("What is this effect?") print(">>>", end="") name = input() if name == "reset": ok = False
#-*- coding=utf-8 -*- from sklearn.datasets.samples_generator import make_blobs import numpy as np import tensorflow as tf from utils import kmeans cn = 10 X, y_true = make_blobs(n_samples=150, centers=5, cluster_std=0.5, random_state=10) kmeans_ops = kmeans(cluster_num=cn) mask = np.concatenate([np.ones((100, 2)), np.zeros((50, 2))], axis=0) X = X * mask points = tf.constant(X, dtype=tf.float32) centers = kmeans_ops(tf.reshape(points, [1] + points.shape.as_list())) gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: predict = sess.run(centers) print(predict) y = y_true.reshape((-1, 1)) for i in range(4): print("center i %d: " % i) label = np.sum(X * (y == i), axis=0) / np.sum((y == i), axis=0) print(label) save = np.concatenate([X, predict[0]]) np.save('result.npy', save)