def __init__(self, k=2, lr=0.01, epochs=100, inferStds=True):
        self.k = k
        self.epochs = epochs
        self.inferStds = inferStds
        self.lr = lr

        if self.inferStds:
            # compute stds from data
            self.centers, self.stds = kmeans(X, self.k)
            print('centers, stds', self.centers, self.stds)
        else:
            # use a fixed std
            self.centers, _ = kmeans(X, self.k)
            # new_kmeans = KMeans(self.k, random_state=0).fit(X.reshape(-1,1))
            # self.centers = new_kmeans.cluster_centers_.flatten()
            dMax = max([
                np.abs(c1 - c2) for c1 in self.centers for c2 in self.centers
            ])
            self.stds = np.repeat(dMax / np.sqrt(2 * self.k), self.k)

        self.centers = torch.from_numpy(self.centers).float()
        self.stds = torch.from_numpy(self.stds).float()
        self.model = RbfNet(self.centers, self.stds)
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr)
        self.loss_fun = nn.MSELoss()
Ejemplo n.º 2
0
def create_dataloader(dataset,
                      batch_size,
                      mode="train",
                      n_buckets=None,
                      trans_fn=None):
    """
    Create dataloader.

    Args:
        dataset(obj:`paddle.io.Dataset`): Dataset instance.
        batch_size(obj:`int`, optional, defaults to 1): The sample number of a mini-batch.
        mode(obj:`str`, optional, defaults to obj:`train`): If mode is 'train', it will 
            shuffle the dataset randomly.
        n_buckets(obj:`int`, optional, defaults to `None`): If n_buckets is not None, it will devide 
            the dataset into n_buckets according to the sequence lengths.
        trans_fn(obj:`callable`, optional, defaults to `None`): function to convert a 
            data sample to input ids, etc.
    """
    if n_buckets:
        word_examples = [seq["FORM"] for seq in dataset]
        lengths = [len(i) + 1 for i in word_examples]
        buckets = dict(zip(*kmeans(lengths, n_buckets)))
    else:
        buckets = None
    if trans_fn:
        dataset = dataset.map(trans_fn)

    if n_buckets:
        if mode == "train":
            batch_sampler = BucketsSampler(
                buckets=buckets,
                batch_size=batch_size,
                shuffle=True,
            )
        else:
            batch_sampler = BucketsSampler(
                buckets=buckets,
                batch_size=batch_size,
                shuffle=False,
            )
    else:
        batch_sampler = SequentialSampler(
            batch_size=batch_size,
            corpus_length=len(dataset),
        )

    # Subclass of `paddle.io.Dataset`
    dataset = Batchify(dataset, batch_sampler)

    # According to the api of `paddle.io.DataLoader` set `batch_size`
    # and `batch_sampler` to `None` to disable batchify dataset automatically
    data_loader = paddle.io.DataLoader(dataset=dataset,
                                       batch_sampler=None,
                                       batch_size=None,
                                       return_list=True)
    return data_loader, buckets
Ejemplo n.º 3
0
Archivo: main.py Proyecto: ninickl/bolt
def _learn_centroids(X, ncentroids, nsubvects, subvect_len):
    ret = np.empty((ncentroids, nsubvects, subvect_len))
    for i in range(nsubvects):
        start_col = i * subvect_len
        end_col = start_col + subvect_len
        X_in = X[:, start_col:end_col]
        centroids, labels = kmeans(X_in, ncentroids)
        ret[:, i, :] = centroids

    return ret
Ejemplo n.º 4
0
def main(img1, img2, chg_map, args=None):

    img_shape = np.shape(img1)

    im1 = np.reshape(img1, newshape=[-1,img_shape[-1]])
    im2 = np.reshape(img2, newshape=[-1,img_shape[-1]])

    im1 = utils.normlize(im1)
    im2 = utils.normlize(im2)

    chg_ref = np.reshape(chg_map, newshape=[-1])

    imm = None
    all_magnitude = None
    differ = np.zeros(shape=[np.shape(chg_ref)[0],net_shape[-1], args.iter])

    # load cva pre-detection result
    ind = sio.loadmat(args.area+'/cva_ref.mat')
    cva_ind = ind['cva_ref']
    cva_ind = np.reshape(cva_ind, newshape=[-1])

    for k1 in range(args.iter):

        logging.info('In %2d-th iteration········' % (k1))

        i1, i2 = utils.getTrainSamples(cva_ind, im1, im2, args.trn)

        loss_log, vpro, fcx, fcy, bval = dsfa(
            xtrain=i1, ytrain=i2, xtest=im1, ytest=im2, net_shape=net_shape, args=args)

        imm, magnitude, differ_map = utils.linear_sfa(fcx, fcy, vpro, shape=img_shape)

        magnitude = np.reshape(magnitude, img_shape[0:-1])
        differ[:, :, k1] = differ_map

        if all_magnitude is None:
            all_magnitude = magnitude / np.max(magnitude)
        else:
            all_magnitude = all_magnitude + magnitude / np.max(magnitude)


    change_map = np.reshape(utils.kmeans(np.reshape(all_magnitude, [-1])), img_shape[0:-1])

    logging.info('Max value of change magnitude: %.4f'%(np.max(all_magnitude)))
    logging.info('Min value of change magnitude: %.4f'%(np.min(all_magnitude)))

    # magnitude
    acc_un, acc_chg, acc_all2, acc_tp = utils.metric(1-change_map, chg_map)
    acc_un, acc_chg, acc_all3, acc_tp = utils.metric(change_map, chg_map)
    plt.imsave('results.png',all_magnitude, cmap='gray')
    #plt.show()

    return None
Ejemplo n.º 5
0
    def test_kmeans(self):
        K = 3
        colors = ['r', 'b', 'g']
        labels = kmeans(self.X, K)

        for l in range(0, K):
            # make scatter plot for cluster l
            x1 = self.X[labels == l][:, 0]
            x2 = self.X[labels == l][:, 1]
            plt.ylim(0, 6)
            plt.xlim(-1, 9)
            plt.scatter(x1, x2, c=colors[l], marker='o')

        plt.show()
Ejemplo n.º 6
0
 def test_kmeans(self):
     K = 3
     colors = ['r','b','g']
     labels = kmeans(self.X, K)
     
     for l in range(0,K):
         # make scatter plot for cluster l
         x1 = self.X[labels == l][:,0]
         x2 = self.X[labels == l][:,1]
         plt.ylim(0,6)
         plt.xlim(-1,9)
         plt.scatter(x1, x2, c=colors[l], marker='o')
     
     plt.show()
Ejemplo n.º 7
0
def learn_pq(X, ncentroids, nsubvects, subvect_len, max_kmeans_iters=16):
    codebooks = np.empty((ncentroids, nsubvects, subvect_len))
    assignments = np.empty((X.shape[0], nsubvects), dtype=np.int)

    # print "codebooks shape: ", codebooks.shape

    for i in range(nsubvects):
        start_col = i * subvect_len
        end_col = start_col + subvect_len
        X_in = X[:, start_col:end_col]
        centroids, labels = kmeans(X_in, ncentroids, max_iter=max_kmeans_iters)
        codebooks[:, i, :] = centroids
        assignments[:, i] = labels

    return codebooks, assignments  # [2**nbits x M x D/M], [N x M]
Ejemplo n.º 8
0
 def test_kmeans2(self):
     """
     Not a homework problem. Just trying things out on sklearn
     """
     labels = kmeans_without_K(self.X)
     labels_true = kmeans(self.X, 3)
     
     print 'adjusted rand score: {}'.format(metrics.adjusted_rand_score(labels_true, labels))
     
     K = np.unique(labels).size
     colors = cm.rainbow(np.linspace(0,1,K))
     
     for l in range(0,K):
         # make scatter plot for cluster l
         x1 = self.X[labels == l][:,0]
         x2 = self.X[labels == l][:,1]
         plt.ylim(0,6)
         plt.xlim(-1,9)
         plt.scatter(x1, x2, c=colors[l], marker='o')
     
     plt.show()
Ejemplo n.º 9
0
def main(img1, img2, chg_map, args=None):

    img_shape = np.shape(img1)

    im1 = np.reshape(img1, newshape=[-1,img_shape[-1]])
    im2 = np.reshape(img2, newshape=[-1,img_shape[-1]])

    im1 = utils.normlize(im1)
    im2 = utils.normlize(im2)

    chg_ref = np.reshape(chg_map, newshape=[-1])

    imm = None
    all_magnitude = None
    differ = np.zeros(shape=[np.shape(chg_ref)[0],net_shape[-1]])

    # load cva pre-detection result
    ind = sio.loadmat(args.area+'/cva_ref.mat')
    cva_ind = ind['cva_ref']
    cva_ind = np.reshape(cva_ind, newshape=[-1])

    i1, i2 = utils.getTrainSamples(cva_ind, im1, im2, args.trn)

    loss_log, vpro, fcx, fcy, bval = dsfa(
        xtrain=i1, ytrain=i2, xtest=im1, ytest=im2, net_shape=net_shape, args=args)

    imm, magnitude, differ_map = utils.linear_sfa(fcx, fcy, vpro, shape=img_shape)

    magnitude = np.reshape(magnitude, img_shape[0:-1])
    differ = differ_map

    change_map = np.reshape(utils.kmeans(np.reshape(magnitude, [-1])), img_shape[0:-1])

    # magnitude
    acc_un, acc_chg, acc_all2, acc_tp = utils.metric(1-change_map, chg_map)
    acc_un, acc_chg, acc_all3, acc_tp = utils.metric(change_map, chg_map)
    plt.imsave('results.png',change_map, cmap='gray')
    #plt.show()

    return None
Ejemplo n.º 10
0
    def test_kmeans2(self):
        """
        Not a homework problem. Just trying things out on sklearn
        """
        labels = kmeans_without_K(self.X)
        labels_true = kmeans(self.X, 3)

        print 'adjusted rand score: {}'.format(
            metrics.adjusted_rand_score(labels_true, labels))

        K = np.unique(labels).size
        colors = cm.rainbow(np.linspace(0, 1, K))

        for l in range(0, K):
            # make scatter plot for cluster l
            x1 = self.X[labels == l][:, 0]
            x2 = self.X[labels == l][:, 1]
            plt.ylim(0, 6)
            plt.xlim(-1, 9)
            plt.scatter(x1, x2, c=colors[l], marker='o')

        plt.show()
Ejemplo n.º 11
0
model = EffectRegressorMLP(opts)
model.load(opts["save"], "_best", 2)
model.encoder2.eval()

transform = data.default_transform(size=opts["size"],
                                   affine=False,
                                   mean=0.279,
                                   std=0.0094)
trainset = data.PairedObjectData(transform=transform)

K = 6
ok = False
while not ok:
    ok = True
    centroids, assigns, mse, _ = utils.kmeans(trainset.effect, k=K)
    print(mse)
    centroids = centroids * (trainset.eff_std + 1e-6) + trainset.eff_mu
    effect_names = []
    for i, c_i in enumerate(centroids):
        print("Centroid %d: %.2f, %.2f, %.2f, %.2f, %.2f, %.2f" %
              (i, c_i[0], c_i[1], c_i[2], c_i[3], c_i[4], c_i[5]))

    for i, c_i in enumerate(centroids):
        print("Centroid %d: %.2f, %.2f, %.2f, %.2f, %.2f, %.2f" %
              (i, c_i[0], c_i[1], c_i[2], c_i[3], c_i[4], c_i[5]))
        print("What is this effect?")
        print(">>>", end="")
        name = input()
        if name == "reset":
            ok = False
Ejemplo n.º 12
0
#-*- coding=utf-8 -*-
from sklearn.datasets.samples_generator import make_blobs
import numpy as np
import tensorflow as tf
from utils import kmeans

cn = 10
X, y_true = make_blobs(n_samples=150,
                       centers=5,
                       cluster_std=0.5,
                       random_state=10)
kmeans_ops = kmeans(cluster_num=cn)
mask = np.concatenate([np.ones((100, 2)), np.zeros((50, 2))], axis=0)
X = X * mask
points = tf.constant(X, dtype=tf.float32)
centers = kmeans_ops(tf.reshape(points, [1] + points.shape.as_list()))
gpu_options = tf.GPUOptions(allow_growth=True)
with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
    predict = sess.run(centers)
print(predict)
y = y_true.reshape((-1, 1))
for i in range(4):
    print("center i %d: " % i)
    label = np.sum(X * (y == i), axis=0) / np.sum((y == i), axis=0)
    print(label)
save = np.concatenate([X, predict[0]])
np.save('result.npy', save)