Beispiel #1
0
import random
import numpy
import numpy.random

from ms import MeanShift

# Create a simple data set...
a = numpy.random.normal(3.0, 1.0, 100)
b = numpy.random.normal(5.0, 0.5, 50)

data = numpy.concatenate((a, b))

# Setup the mean shift object...
ms = MeanShift()
ms.set_data(data, 'd')

ms.set_kernel(random.choice(filter(lambda s: s != 'fisher', ms.kernels())))
ms.set_spatial(random.choice(ms.spatials()))

# Print out basic stats...
print 'kernel = %s; spatial = %s' % (ms.get_kernel(), ms.get_spatial())
print 'exemplars = %i; features = %i' % (ms.exemplars(), ms.features())
print 'quality = %.3f; epsilon = %.3f; iter_cap = %i' % (
    ms.quality, ms.epsilon, ms.iter_cap)
print

# Query the mode of various points...
for x in numpy.arange(0.0, 7.0, 0.4):
    mode = ms.mode(numpy.array([x]))
    print '%.3f: mode = %.3f' % (x, mode)
data = numpy.concatenate((a,b,c,d), axis=0)



# Use mean shift to cluster it...
ms = MeanShift()
ms.set_data(data, 'df')

ms.set_kernel(random.choice(filter(lambda s: s!='fisher', ms.kernels())))
ms.set_spatial(random.choice(ms.spatials()))

modes, indices = ms.cluster()



# Print out basic stats...
print 'kernel = %s; spatial = %s' % (ms.get_kernel(), ms.get_spatial())
print 'exemplars = %i; features = %i' % (ms.exemplars(), ms.features())
print 'quality = %.3f; epsilon = %.3f; iter_cap = %i' % (ms.quality, ms.epsilon, ms.iter_cap)
print



# Print out a grid of cluster assignments...
for j in xrange(20):
  for i in xrange(20):
    fv = numpy.array([0.25*j, 0.25*i])
    c = ms.assign_cluster(fv)
    print c,
  print
Beispiel #3
0
# Create a dataset - equally spaced samples weighted by a Gaussian, such that it should estimate a Gaussian...
x = numpy.arange(-5.0, 5.0, 0.02)
y = numpy.exp(-0.5 * x ** 2.0 / 2.0)

data = numpy.concatenate((x.reshape((-1, 1)), y.reshape((-1, 1))), axis=1)


# Setup the mean shift object...
ms = MeanShift()
ms.set_data(data, "df")

ms.set_kernel(random.choice(filter(lambda s: s != "fisher", ms.kernels())))
ms.set_spatial(random.choice(ms.spatials()))
ms.set_scale(numpy.ones(2), 1)


# Iterate and calculate the probability at every point...
sam = numpy.arange(-5.0, 5.0, 0.15)
prob = numpy.array(map(lambda v: ms.prob(numpy.array([v, 1.0])), sam))


# Print out basic stats...
print "kernel = %s; spatial = %s" % (ms.get_kernel(), ms.get_spatial())
print "exemplars = %i; features = %i" % (ms.exemplars(), ms.features())
print


# Visualise the output...
for threshold in numpy.arange(prob.max(), 0.0, -prob.max() / 15.0):
    print "".join(map(lambda p: "|" if p > threshold else " ", prob))
Beispiel #4
0
            # Draw lots of samples from it...
            sample = ms.draws(samples_dir)

            # Get the probability of each...
            p1 = ms.probs(sample)

            # Throw away samples where p1 is 0 - they are a result of the range optimisation, and break the below...
            keep = p1 > 1e-6
            sample = sample[keep, :]
            p1 = p1[keep]

            # Do a KDE of the samples, including bandwidth estimation...
            kde = MeanShift()
            kde.set_data(sample, 'df')
            kde.set_kernel(
                'fisher(%f)' %
                (conc *
                 32))  #  Hardly ideal - need something more independent/safer!
            kde.set_spatial('kd_tree')

            # Calculate a stochastic KL-divergance between the kde and the actual distribution...
            p2 = kde.probs(sample)
            kld = numpy.sum(numpy.log(p1 / p2)) / samples_dir

            # Print output to screen...
            print 'Kernel = %s; Dims = %i | KL-divergance = %.6f' % (
                ms.get_kernel(), dim, kld)

    print
Beispiel #5
0

# Now for the directional kernels...
for kernel in ['fisher', 'mirror_fisher']:
  for dim, area in zip(dir_dimensions, dir_area):
    for conc in dir_conc:
      # Create a mean shift object pointing in the [1, 0, ...] direction with the given concentration...
      data = numpy.array([1.0] + [0.0]*(dim-1), dtype=numpy.float32)
      
      ms = MeanShift()
      ms.set_data(data, 'f')
      ms.set_kernel('%s(%.1f)' % (kernel, conc))
      ms.quality = 1.0
      
      # Create uniform samples on the hyper-sphere with which we are dealing - abuse the MeanShift object by drawing with a Gaussian kernel and normalising...
      uniform = MeanShift()
      uniform.set_data(numpy.array([0.0]*dim, dtype=numpy.float32), 'f')
      uniform.set_kernel('gaussian')
      sample = uniform.draws(samples)
      
      div = numpy.sqrt(numpy.square(sample).sum(axis=1))
      sample /= div[:, numpy.newaxis]
      
      # Evaluate the probabilities of the uniform directions...
      p = ms.probs(sample)
      
      # Print their average - should again be one...
      volume = p.mean() * area
      print 'Kernel = %s; Dims = %i | Monte-Carlo vol = %.3f (max = %.1f)' % (ms.get_kernel(), dim, volume, ms.prob(data))
  print
kernel = 'composite(2:composite(1:gaussian, 1:gaussian), 2:fisher(%(ca)s), 2:mirror_fisher(%(cb)s))'  # Don't ever do this: Just wanted to check a composite kernel within a composite kernel doesn't break things!

ms = MeanShift()
ms.set_data(data, 'df', None, 'rAA')
ms.set_kernel(kernel % {'ca': 64.0, 'cb': 64.0})

# Use the MeanShiftCompositeScale object to optimise...
optimise_scale = MeanShiftCompositeScale(kernel)
optimise_scale.add_param_scale(0)
optimise_scale.add_param_kernel('ca')
optimise_scale.add_param_kernel('cb')

steps = optimise_scale(ms)

print 'Optimisation of "a" took %i steps' % steps
print 'kernel = %s' % ms.get_kernel()
print 'scale = %s' % ms.get_scale()
print


# Visualise - input and a draw from the input...
def visualise(fn, data):
    img = numpy.zeros((size, size, 3), dtype=numpy.float32)
    for sample in data:
        bx = numpy.cos(sample[0]) * sample[1]
        by = numpy.sin(sample[0]) * sample[1]

        s_x = (size - 1) * 0.5 * (1.0 + bx / scale)
        s_y = (size - 1) * 0.5 * (1.0 + by / scale)

        e_x = s_x + angle_len * numpy.cos(sample[2])
Beispiel #7
0
    ms.quality = 0.5
    ms.set_data(numpy.array([1, 0, 0], dtype=numpy.float32), 'f')
    ms.set_kernel('fisher(%.1f%s)' % (2**power, code))
    ms.set_spatial('kd_tree')

    return ms


options = map(ms_by_conc, xrange(8)) + [
    ms_by_conc(8, 'c'), ms_by_conc(8, 'a')
] + map(ms_by_conc, xrange(9, 16))

# Create it and do the bandwidth estimation...
ms = MeanShift()
ms.set_data(data, 'df')

p = ProgBar()
best = ms.scale_loo_nll_array(options, p.callback)
del p

print 'Selected kernel =', ms.get_kernel()
print 'LOO score =', best

# Visualise the best option...
visualise('bandwidth_fisher.png', ms)

# Also visualise correct vs approximate, for sanity checking...
for option in [ms_by_conc(8, 'c'), ms_by_conc(8, 'a')]:  #options:
    ms.copy_all(option)
    visualise('bandwidth_fisher_%s.png' % option.get_kernel(), ms)
ms = MeanShift()
ms.set_data(data, 'df', None, 'rAA')
ms.set_kernel(kernel % {'ca' : 64.0, 'cb' : 64.0})



# Use the MeanShiftCompositeScale object to optimise...
optimise_scale = MeanShiftCompositeScale(kernel)
optimise_scale.add_param_scale(0)
optimise_scale.add_param_kernel('ca')
optimise_scale.add_param_kernel('cb')

steps = optimise_scale(ms)

print 'Optimisation of "a" took %i steps' % steps
print 'kernel = %s' % ms.get_kernel()
print 'scale = %s' % ms.get_scale()
print



# Visualise - input and a draw from the input...
def visualise(fn, data):
  img = numpy.zeros((size, size, 3), dtype=numpy.float32)
  for sample in data:
    bx = numpy.cos(sample[0]) * sample[1]
    by = numpy.sin(sample[0]) * sample[1]
    
    s_x = (size-1) * 0.5 * (1.0 + bx / scale)
    s_y = (size-1) * 0.5 * (1.0 + by / scale)
    
      ms = MeanShift()
      ms.set_data(numpy.array([1.0] + [0.0]*(dim-1), dtype=numpy.float32), 'f')
      ms.set_kernel('%s(%.1f)' % (kernel, conc))
      ms.quality = 1.0
      
      # Draw lots of samples from it...
      sample = ms.draws(samples_dir)
    
      # Get the probability of each...
      p1 = ms.probs(sample)
      
      # Throw away samples where p1 is 0 - they are a result of the range optimisation, and break the below...
      keep = p1>1e-6
      sample = sample[keep,:]
      p1 = p1[keep]
      
      # Do a KDE of the samples, including bandwidth estimation...
      kde = MeanShift()
      kde.set_data(sample, 'df')
      kde.set_kernel('fisher(%f)' % (conc*32)) #  Hardly ideal - need something more independent/safer!
      kde.set_spatial('kd_tree')
      
      # Calculate a stochastic KL-divergance between the kde and the actual distribution...
      p2 = kde.probs(sample)
      kld = numpy.sum(numpy.log(p1/p2)) / samples_dir
    
      # Print output to screen...
      print 'Kernel = %s; Dims = %i | KL-divergance = %.6f' % (ms.get_kernel(), dim, kld)
  
  print
Beispiel #10
0

# Construct the mean shift object from it, including a composite kernel...
ms = MeanShift()
ms.set_data(data, 'df')
ms.set_kernel('composite(2:gaussian,2:fisher(32.0))')
ms.set_spatial('kd_tree')
ms.set_scale(numpy.array([10.0,5.0,1.0,1.0]))
ms.merge_range = 0.05



# Print out information in a convoluted way to test some convoluted features!..
ms2 = MeanShift()
ms2.copy_kernel(ms)
print 'Kernel:', ms2.get_kernel()
del ms2



# For our first trick visualise the data set...
img = numpy.zeros((size, size, 3), dtype=numpy.float32)

for sample in data:
  s_x = (size-1) * sample[1] / scale
  s_y = (size-1) * sample[0] / scale
  e_x = (size-1) * (sample[1] + angle_len * sample[3]) / scale
  e_y = (size-1) * (sample[0] + angle_len * sample[2]) / scale
  
  for i in xrange(angle_step):
    t = float(i) / (angle_step-1)
Beispiel #11
0
                              axis=1)
    data.append(block)
data = numpy.concatenate(data, axis=0)

# Construct the mean shift object from it, including a composite kernel...
ms = MeanShift()
ms.set_data(data, 'df')
ms.set_kernel('composite(2:gaussian,2:fisher(32.0))')
ms.set_spatial('kd_tree')
ms.set_scale(numpy.array([10.0, 5.0, 1.0, 1.0]))
ms.merge_range = 0.05

# Print out information in a convoluted way to test some convoluted features!..
ms2 = MeanShift()
ms2.copy_kernel(ms)
print 'Kernel:', ms2.get_kernel()
del ms2

# For our first trick visualise the data set...
img = numpy.zeros((size, size, 3), dtype=numpy.float32)

for sample in data:
    s_x = (size - 1) * sample[1] / scale
    s_y = (size - 1) * sample[0] / scale
    e_x = (size - 1) * (sample[1] + angle_len * sample[3]) / scale
    e_y = (size - 1) * (sample[0] + angle_len * sample[2]) / scale

    for i in xrange(angle_step):
        t = float(i) / (angle_step - 1)
        t_x = int(t * s_x + (1 - t) * e_x)
        t_y = int(t * s_y + (1 - t) * e_y)
Beispiel #12
0
# Now for the directional kernels...
for kernel in ['fisher', 'mirror_fisher']:
    for dim, area in zip(dir_dimensions, dir_area):
        for conc in dir_conc:
            # Create a mean shift object pointing in the [1, 0, ...] direction with the given concentration...
            data = numpy.array([1.0] + [0.0] * (dim - 1), dtype=numpy.float32)

            ms = MeanShift()
            ms.set_data(data, 'f')
            ms.set_kernel('%s(%.1f)' % (kernel, conc))
            ms.quality = 1.0

            # Create uniform samples on the hyper-sphere with which we are dealing - abuse the MeanShift object by drawing with a Gaussian kernel and normalising...
            uniform = MeanShift()
            uniform.set_data(numpy.array([0.0] * dim, dtype=numpy.float32),
                             'f')
            uniform.set_kernel('gaussian')
            sample = uniform.draws(samples)

            div = numpy.sqrt(numpy.square(sample).sum(axis=1))
            sample /= div[:, numpy.newaxis]

            # Evaluate the probabilities of the uniform directions...
            p = ms.probs(sample)

            # Print their average - should again be one...
            volume = p.mean() * area
            print 'Kernel = %s; Dims = %i | Monte-Carlo vol = %.3f (max = %.1f)' % (
                ms.get_kernel(), dim, volume, ms.prob(data))
    print
  ms.set_spatial('kd_tree')
  
  return ms

options = map(ms_by_conc, xrange(8)) + [ms_by_conc(8,'c'), ms_by_conc(8,'a')] + map(ms_by_conc, xrange(9,16))



# Create it and do the bandwidth estimation...
ms = MeanShift()
ms.set_data(data, 'df')

p = ProgBar()
best = ms.scale_loo_nll_array(options, p.callback)
del p

print 'Selected kernel =', ms.get_kernel()
print 'LOO score =', best



# Visualise the best option...
visualise('bandwidth_fisher.png', ms)



# Also visualise correct vs approximate, for sanity checking...
for option in [ms_by_conc(8,'c'), ms_by_conc(8,'a')]: #options:
  ms.copy_all(option)
  visualise('bandwidth_fisher_%s.png' % option.get_kernel(), ms)