Ejemplos de kmeans en Python, ejemplos de em.kmeans en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: ising.py Proyecto: othercriteria/blocked_inference

        
        # Emissions according to mixture model
        data_comp = np.empty((dim,dim), dtype=int)
        data_comp[x == -1] = 0
        data_comp[x ==  1] = 1
        if init == 'true':
            init_gamma = data_comp
        data_mu = (np.array([0.0, mu]))[data_comp]
        data = np.random.normal(data_mu, 1)
        if graphics:
            plt.imshow(data)
            plt.show()

        # Initialize with K-means
        if init == 'kmeans':
            init_gamma = kmeans(data.reshape((dim*dim,1)), 2)['best']

        # Do (potentially adaptive) blocked EM, depending on strategy
        for block_strategy in block_strategies:
            # Only 'perfect' strategy uses the true states
            blocks = block(data, block_strategy, true = x)
        
            # Do EM
            results = em(data.reshape((dim*dim,)),
                         model,
                         count_restart = count_restart,
                         blocks = blocks,
                         init_gamma = init_gamma,
                         pi_max = pi_max)
            print 'Iterations: %d (%s)' % (results['reps'], block_strategy)
            dists = results['dists']

Ejemplo n.º 2

0

Mostrar archivo

Archivo: test_2.py Proyecto: othercriteria/blocked_inference

    n = num_blocks * n_block
    data_p = []
    for i in range(num_blocks):
        data_p += [block_p[i]] * n_block
    data_p = np.array(data_p)
    data_comp = np.zeros(n, dtype=int)
    data_comp[np.random.sample(n) > data_p] = 1
    if init == 'true':
        init_gamma = data_comp
    data_mu = mu[data_comp]
    data = np.random.normal(data_mu, 1)
    blocks = np.array_split(np.arange(n), num_blocks)

    # Initialize with K-means
    if init == 'kmeans':
        init_gamma = kmeans(data.reshape((n,1)), 2)['best']

    # Do EM
    results = em(data,
                 model,
                 count_restart = count_restart,
                 blocks = blocks,
                 init_gamma = init_gamma,
                 init_reps = em_steps,
                 max_reps = em_steps,
                 pi_max = pi_max,
                 trace = True)
    if show_each:
        print 'Iterations: %(reps)d' % results
    dists, dists_trace = results['dists'], results['dists_trace']
    pi, pi_trace = results['pi'], results['pi_trace']

Ejemplo n.º 3

0

Mostrar archivo

        # Emissions according to mixture model
        data_comp = np.empty((dim, dim), dtype=int)
        data_comp[x == -1] = 0
        data_comp[x == 1] = 1
        if init == 'true':
            init_gamma = data_comp
        data_mu = (np.array([0.0, mu]))[data_comp]
        data = np.random.normal(data_mu, 1)
        if graphics:
            plt.imshow(data)
            plt.show()

        # Initialize with K-means
        if init == 'kmeans':
            init_gamma = kmeans(data.reshape((dim * dim, 1)), 2)['best']

        # Do (potentially adaptive) blocked EM, depending on strategy
        for block_strategy in block_strategies:
            # Only 'perfect' strategy uses the true states
            blocks = block(data, block_strategy, true=x)

            # Do EM
            results = em(data.reshape((dim * dim, )),
                         model,
                         count_restart=count_restart,
                         blocks=blocks,
                         init_gamma=init_gamma,
                         pi_max=pi_max)
            print 'Iterations: %d (%s)' % (results['reps'], block_strategy)
            dists = results['dists']

Ejemplo n.º 4

0

Mostrar archivo

Archivo: image_test_color.py Proyecto: othercriteria/blocked_inference

def main():
    # Load image
    im = Image.open(image_file).convert('RGB')
    width, height = im.size

    # Convenience function to build image band-by-band from array data
    def image_from_array(dat):
        bands = [Image.new('L', (width, height)) for n in range(3)]
        for i in range(3):
            bands[i].putdata(dat[:,i])
        return Image.merge('RGB', bands)

    # Resize image
    width, height = int(width / image_rescale), int(height / image_rescale)
    im = im.resize((width, height))

    # Summary image
    summary = Image.new('RGB', (width * 2 + 40, height * 2 + 60),
                        (255, 255, 255))
    draw = ImageDraw.Draw(summary)
    draw.text((5, height + 10), 'Original', fill = (0, 0, 0))
    draw.text((width + 25, height + 10),
              'Noise V = %.2f, C = %.2f' % (noise_var, noise_cov),
              fill = (0, 0, 0))
    draw.text((5, 2 * height + 40), 'Blocked Gamma', fill = (0, 0, 0))
    draw.text((width + 25, 2 * height + 40), 'Dists', fill = (0, 0, 0))
    del draw
    summary.paste(im, (10, 10))

    # Flatten to emissions
    real_emissions = list(im.getdata())
    num_data = len(real_emissions)
    real_emissions = np.array(real_emissions)

    # Block emissions
    width_blocks = np.array_split(np.arange(width), block_splits)
    height_blocks = np.array_split(np.arange(height), block_splits)
    idx = np.arange(num_data)
    idx.resize((height, width))
    blocks = []
    for hb in height_blocks:
        for wb in width_blocks:
            block = [idx[h, w] for h in hb for w in wb]
            blocks.append(np.array(block))

    # Generate noise
    v, c = noise_var, noise_cov
    cov = [[v, c, c], [c, v, c], [c, c, v]]
    noise = np.random.multivariate_normal([0, 0, 0], cov, width * height)
    noisy_emissions = real_emissions + noise

    # Generate noisy image
    noisy = image_from_array(noisy_emissions)
    summary.paste(noisy, (30 + width, 10))

    # Use K-means to initialize components
    results = kmeans(noisy_emissions, num_comps)
    init_gamma = results['best']
    means = results['means']

    # Analyze color space
    if do_colormap:
        col = { 'R': 0, 'G': 1, 'B': 2 }
        plt.figure()
        for i, (d, c1, c2) in enumerate([(real_emissions, 'R', 'G'),
                                         (real_emissions, 'R', 'B'),
                                         (real_emissions, 'G', 'B'),
                                         (noisy_emissions, 'R', 'G'),
                                         (noisy_emissions, 'R', 'B'),
                                         (noisy_emissions, 'G', 'B')]):
            plt.subplot(2, 3, i+1)
            plt.hexbin(d[:,col[c1]], d[:,col[c2]], gridsize=30,
                       extent = (0, 255, 0, 255))
            plt.plot(means[:,col[c1]], means[:,col[c2]], '.k')
            plt.xlabel(c1)
            plt.ylabel(c2)
            plt.axis([-20, 275, -20, 275])
        plt.savefig('image_test_color_colormap.png')
        plt.show()

    # Do EM
    results = em(noisy_emissions,
                 [MultivariateNormal() for n in range(num_comps)],
                 count_restart = count_restart,
                 blocks = blocks,
                 max_reps = 100,
                 init_gamma = init_gamma,
                 trace = True,
                 pi_max = pi_max)
    dists = results['dists']
    dists_trace = results['dists_trace']
    pi = results['pi']
    print 'Iterations: %(reps)d' % results

    gamma = np.transpose(results['gamma'])
    means = np.array([d.mean() for d in dists])
    covs = np.array([d.cov() for d in dists])

    # Reconstruct with blocked gamma
    rec_blocked_gamma = np.array([np.average(means, weights=g, axis=0)
                                  for g in gamma])
    im_blocked_gamma = image_from_array(rec_blocked_gamma)
    summary.paste(im_blocked_gamma, (10, 40 + height))

    # Reconstruct from distributions alone
    pi_opt = pi_maximize(noisy_emissions, dists)
    phi = np.empty((num_data, num_comps))
    for c in range(num_comps):
        phi[:,c] = dists[c].density(noisy_emissions)
    phi = np.matrix(phi)
    for i, pi in enumerate(pi_opt):
        phi[:,i] *= pi
    gamma_dists = phi / np.sum(phi, axis = 1)
    rec_dists = np.array(np.dot(gamma_dists, means))
    im_dists = image_from_array(rec_dists)
    summary.paste(im_dists, (30 + width, 40 + height))

    # Show summary image
    if show_summary:
        summary.show()
    summary.save('image_test_color_reconstruction.png')

    # Compare RMSE between reconstructions
    def rmse(x):
        return np.sqrt(np.mean((x - real_emissions) ** 2))
    print 'Raw MSE: %.1f' % rmse(noisy_emissions)
    print 'Blocked Gamma MSE: %.1f' % rmse(rec_blocked_gamma)
    print 'Dists MSE: %.1f' % rmse(rec_dists)

    # Visualize variance components
    if do_variance_viz:
        temp_files = []
        col = { 'R': 0, 'G': 1, 'B': 2 }
        fig = plt.figure()
        for i, (d, c1, c2) in enumerate([(real_emissions, 'R', 'G'),
                                         (real_emissions, 'R', 'B'),
                                         (real_emissions, 'G', 'B'),
                                         (noisy_emissions, 'R', 'G'),
                                         (noisy_emissions, 'R', 'B'),
                                         (noisy_emissions, 'G', 'B')]):
            ax = fig.add_subplot(2, 3, i+1)
            plt.hexbin(d[:,col[c1]], d[:,col[c2]], gridsize=30,
                       extent = (0, 255, 0, 255))
            plt.xlabel(c1)
            plt.ylabel(c2)
            plt.axis([-20, 275, -20, 275])
        for idx, dists in enumerate(dists_trace):
            ells = []
            for i, (d, c1, c2) in enumerate([(real_emissions, 'R', 'G'),
                                             (real_emissions, 'R', 'B'),
                                             (real_emissions, 'G', 'B'),
                                             (noisy_emissions, 'R', 'G'),
                                             (noisy_emissions, 'R', 'B'),
                                             (noisy_emissions, 'G', 'B')]):
                for dist in dists:
                    m, c = dist.mean(), dist.cov()
                    cm = (c[[col[c1], col[c2]]])[:,[col[c1], col[c2]]]
                    e, v = la.eigh(cm)
                    ell = Ellipse(xy = [m[col[c1]], m[col[c2]]],
                                  width = np.sqrt(e[0]),
                                  height = np.sqrt(e[1]),
                                  angle = (180.0 / np.pi) * np.arccos(v[0,0]))
                    ells.append(ell)
                    ax = fig.add_subplot(2, 3, i+1)
                    ax.add_artist(ell)
                    ell.set_clip_box(ax.bbox)
                    ell.set_alpha(0.9)
                    ell.set_facecolor(np.fmax(np.fmin(m / 255, 1), 0))
            file_name = 'tmp_%03d.png' % idx
            temp_files.append(file_name)
            plt.savefig(file_name, dpi = 100)
            for ell in ells:
                ell.remove()
        command = ('mencoder',
                   'mf://tmp_*.png',
                   '-mf',
                   'type=png:w=800:h=600:fps=5',
                   '-ovc',
                   'lavc',
                   '-lavcopts',
                   'vcodec=mpeg4',
                   '-oac',
                   'copy',
                   '-o',
                   'image_test_color_components.avi')
        os.spawnvp(os.P_WAIT, 'mencoder', command)
        for temp_file in temp_files:
            os.unlink(temp_file)

    # Find common variance components
    print 'True noise:'
    print cov
    chols = [la.cholesky(c) for c in covs]
    chol_recon = np.zeros((3,3))
    for i in range(3):
        for j in range(3):
            if j > i: continue
            chol_recon[i,j] = np.Inf
            for chol in chols:
                if abs(chol[i,j]) < abs(chol_recon[i,j]):
                    chol_recon[i,j] = chol[i,j]
    cov_recon = np.dot(chol_recon, np.transpose(chol_recon))
    print 'Reconstructed noise:'
    print cov_recon

Ejemplo n.º 5

0

Mostrar archivo

    n = num_blocks * n_block
    data_p = []
    for i in range(num_blocks):
        data_p += [block_p[i]] * n_block
    data_p = np.array(data_p)
    data_comp = np.zeros(n, dtype=int)
    data_comp[np.random.sample(n) > data_p] = 1
    if init == 'true':
        init_gamma = data_comp
    data_mu = mu[data_comp]
    data = np.random.normal(data_mu, 1)
    blocks = np.array_split(np.arange(n), num_blocks)

    # Initialize with K-means
    if init == 'kmeans':
        init_gamma = kmeans(data.reshape((n, 1)), 2)['best']

    # Do EM
    results = em(data,
                 model,
                 count_restart=count_restart,
                 blocks=blocks,
                 init_gamma=init_gamma,
                 init_reps=em_steps,
                 max_reps=em_steps,
                 pi_max=pi_max,
                 trace=True)
    if show_each:
        print 'Iterations: %(reps)d' % results
    dists, dists_trace = results['dists'], results['dists_trace']
    pi, pi_trace = results['pi'], results['pi_trace']

Ejemplo n.º 6

0

Mostrar archivo

def main():
    # Load image
    im = Image.open(image_file).convert('RGB')
    width, height = im.size

    # Convenience function to build image band-by-band from array data
    def image_from_array(dat):
        bands = [Image.new('L', (width, height)) for n in range(3)]
        for i in range(3):
            bands[i].putdata(dat[:, i])
        return Image.merge('RGB', bands)

    # Resize image
    width, height = int(width / image_rescale), int(height / image_rescale)
    im = im.resize((width, height))

    # Summary image
    summary = Image.new('RGB', (width * 2 + 40, height * 2 + 60),
                        (255, 255, 255))
    draw = ImageDraw.Draw(summary)
    draw.text((5, height + 10), 'Original', fill=(0, 0, 0))
    draw.text((width + 25, height + 10),
              'Noise V = %.2f, C = %.2f' % (noise_var, noise_cov),
              fill=(0, 0, 0))
    draw.text((5, 2 * height + 40), 'Blocked Gamma', fill=(0, 0, 0))
    draw.text((width + 25, 2 * height + 40), 'Dists', fill=(0, 0, 0))
    del draw
    summary.paste(im, (10, 10))

    # Flatten to emissions
    real_emissions = list(im.getdata())
    num_data = len(real_emissions)
    real_emissions = np.array(real_emissions)

    # Block emissions
    width_blocks = np.array_split(np.arange(width), block_splits)
    height_blocks = np.array_split(np.arange(height), block_splits)
    idx = np.arange(num_data)
    idx.resize((height, width))
    blocks = []
    for hb in height_blocks:
        for wb in width_blocks:
            block = [idx[h, w] for h in hb for w in wb]
            blocks.append(np.array(block))

    # Generate noise
    v, c = noise_var, noise_cov
    cov = [[v, c, c], [c, v, c], [c, c, v]]
    noise = np.random.multivariate_normal([0, 0, 0], cov, width * height)
    noisy_emissions = real_emissions + noise

    # Generate noisy image
    noisy = image_from_array(noisy_emissions)
    summary.paste(noisy, (30 + width, 10))

    # Use K-means to initialize components
    results = kmeans(noisy_emissions, num_comps)
    init_gamma = results['best']
    means = results['means']

    # Analyze color space
    if do_colormap:
        col = {'R': 0, 'G': 1, 'B': 2}
        plt.figure()
        for i, (d, c1, c2) in enumerate([(real_emissions, 'R', 'G'),
                                         (real_emissions, 'R', 'B'),
                                         (real_emissions, 'G', 'B'),
                                         (noisy_emissions, 'R', 'G'),
                                         (noisy_emissions, 'R', 'B'),
                                         (noisy_emissions, 'G', 'B')]):
            plt.subplot(2, 3, i + 1)
            plt.hexbin(d[:, col[c1]],
                       d[:, col[c2]],
                       gridsize=30,
                       extent=(0, 255, 0, 255))
            plt.plot(means[:, col[c1]], means[:, col[c2]], '.k')
            plt.xlabel(c1)
            plt.ylabel(c2)
            plt.axis([-20, 275, -20, 275])
        plt.savefig('image_test_color_colormap.png')
        plt.show()

    # Do EM
    results = em(noisy_emissions,
                 [MultivariateNormal() for n in range(num_comps)],
                 count_restart=count_restart,
                 blocks=blocks,
                 max_reps=100,
                 init_gamma=init_gamma,
                 trace=True,
                 pi_max=pi_max)
    dists = results['dists']
    dists_trace = results['dists_trace']
    pi = results['pi']
    print 'Iterations: %(reps)d' % results

    gamma = np.transpose(results['gamma'])
    means = np.array([d.mean() for d in dists])
    covs = np.array([d.cov() for d in dists])

    # Reconstruct with blocked gamma
    rec_blocked_gamma = np.array(
        [np.average(means, weights=g, axis=0) for g in gamma])
    im_blocked_gamma = image_from_array(rec_blocked_gamma)
    summary.paste(im_blocked_gamma, (10, 40 + height))

    # Reconstruct from distributions alone
    pi_opt = pi_maximize(noisy_emissions, dists)
    phi = np.empty((num_data, num_comps))
    for c in range(num_comps):
        phi[:, c] = dists[c].density(noisy_emissions)
    phi = np.matrix(phi)
    for i, pi in enumerate(pi_opt):
        phi[:, i] *= pi
    gamma_dists = phi / np.sum(phi, axis=1)
    rec_dists = np.array(np.dot(gamma_dists, means))
    im_dists = image_from_array(rec_dists)
    summary.paste(im_dists, (30 + width, 40 + height))

    # Show summary image
    if show_summary:
        summary.show()
    summary.save('image_test_color_reconstruction.png')

    # Compare RMSE between reconstructions
    def rmse(x):
        return np.sqrt(np.mean((x - real_emissions)**2))

    print 'Raw MSE: %.1f' % rmse(noisy_emissions)
    print 'Blocked Gamma MSE: %.1f' % rmse(rec_blocked_gamma)
    print 'Dists MSE: %.1f' % rmse(rec_dists)

    # Visualize variance components
    if do_variance_viz:
        temp_files = []
        col = {'R': 0, 'G': 1, 'B': 2}
        fig = plt.figure()
        for i, (d, c1, c2) in enumerate([(real_emissions, 'R', 'G'),
                                         (real_emissions, 'R', 'B'),
                                         (real_emissions, 'G', 'B'),
                                         (noisy_emissions, 'R', 'G'),
                                         (noisy_emissions, 'R', 'B'),
                                         (noisy_emissions, 'G', 'B')]):
            ax = fig.add_subplot(2, 3, i + 1)
            plt.hexbin(d[:, col[c1]],
                       d[:, col[c2]],
                       gridsize=30,
                       extent=(0, 255, 0, 255))
            plt.xlabel(c1)
            plt.ylabel(c2)
            plt.axis([-20, 275, -20, 275])
        for idx, dists in enumerate(dists_trace):
            ells = []
            for i, (d, c1, c2) in enumerate([(real_emissions, 'R', 'G'),
                                             (real_emissions, 'R', 'B'),
                                             (real_emissions, 'G', 'B'),
                                             (noisy_emissions, 'R', 'G'),
                                             (noisy_emissions, 'R', 'B'),
                                             (noisy_emissions, 'G', 'B')]):
                for dist in dists:
                    m, c = dist.mean(), dist.cov()
                    cm = (c[[col[c1], col[c2]]])[:, [col[c1], col[c2]]]
                    e, v = la.eigh(cm)
                    ell = Ellipse(xy=[m[col[c1]], m[col[c2]]],
                                  width=np.sqrt(e[0]),
                                  height=np.sqrt(e[1]),
                                  angle=(180.0 / np.pi) * np.arccos(v[0, 0]))
                    ells.append(ell)
                    ax = fig.add_subplot(2, 3, i + 1)
                    ax.add_artist(ell)
                    ell.set_clip_box(ax.bbox)
                    ell.set_alpha(0.9)
                    ell.set_facecolor(np.fmax(np.fmin(m / 255, 1), 0))
            file_name = 'tmp_%03d.png' % idx
            temp_files.append(file_name)
            plt.savefig(file_name, dpi=100)
            for ell in ells:
                ell.remove()
        command = ('mencoder', 'mf://tmp_*.png', '-mf',
                   'type=png:w=800:h=600:fps=5', '-ovc', 'lavc', '-lavcopts',
                   'vcodec=mpeg4', '-oac', 'copy', '-o',
                   'image_test_color_components.avi')
        os.spawnvp(os.P_WAIT, 'mencoder', command)
        for temp_file in temp_files:
            os.unlink(temp_file)

    # Find common variance components
    print 'True noise:'
    print cov
    chols = [la.cholesky(c) for c in covs]
    chol_recon = np.zeros((3, 3))
    for i in range(3):
        for j in range(3):
            if j > i: continue
            chol_recon[i, j] = np.Inf
            for chol in chols:
                if abs(chol[i, j]) < abs(chol_recon[i, j]):
                    chol_recon[i, j] = chol[i, j]
    cov_recon = np.dot(chol_recon, np.transpose(chol_recon))
    print 'Reconstructed noise:'
    print cov_recon