Example #1
0
def doTest(name, gen):
  # Train the model...
  df = DF()
  df.setGoal(DensityGaussian(2)) # 2 = # of features
  df.setGen(gen)
  df.getPruner().setMinTrain(48) # Playing around shows that this is probably the most important number to get right when doing density estimation - the information gain heuristic just doesn't know when to stop.
  
  global es
  pb = ProgBar()
  df.learn(32, es, callback = pb.callback, mp=doMP) # 32 = number of trees to learn - you need a lot to get a good answer.
  del pb
  
  # Drop some stats...
  print '%i trees containing %i nodes.\nAverage error is %.3f.'%(df.size(), df.nodes(), df.error())
  
  # Visualise the density estimate...
  global img
  testSet = numpy.empty((pixel_width,2), dtype=numpy.float32)
  pb = ProgBar()
  
  for y in xrange(pixel_width):
    pb.callback(y,pixel_width)
    i = 0
    for x in xrange(pixel_width):
      testSet[i,0] = axis_half_width * float(x - pixel_half_width) / pixel_half_width
      testSet[i,1] = axis_half_width * float(y - pixel_half_width) / pixel_half_width
      i += 1
    
    test = MatrixES(testSet)
    res = df.evaluate(test, mp=doMP)
    
    i = 0
    for x in xrange(pixel_width):
      img[y,x,:] = res[i]
      i += 1
    
  del pb
  
  print 'Maximum probability = %.2f'%img.max()
  img /= img.max()
  cv.SaveImage('test_de_circle_%s.png'%name,array2cv(img*255))
Example #2
0
def doTest(name, gen):
    # Train the model...
    df = DF()
    df.setGoal(DensityGaussian(2))  # 2 = # of features
    df.setGen(gen)
    df.getPruner().setMinTrain(
        48
    )  # Playing around shows that this is probably the most important number to get right when doing density estimation - the information gain heuristic just doesn't know when to stop.

    global es
    pb = ProgBar()
    df.learn(
        32, es, callback=pb.callback, mp=doMP
    )  # 32 = number of trees to learn - you need a lot to get a good answer.
    del pb

    # Drop some stats...
    print '%i trees containing %i nodes.\nAverage error is %.3f.' % (
        df.size(), df.nodes(), df.error())

    # Visualise the density estimate...
    global img
    testSet = numpy.empty((pixel_width, 2), dtype=numpy.float32)
    pb = ProgBar()

    for y in xrange(pixel_width):
        pb.callback(y, pixel_width)
        i = 0
        for x in xrange(pixel_width):
            testSet[i, 0] = axis_half_width * float(
                x - pixel_half_width) / pixel_half_width
            testSet[i, 1] = axis_half_width * float(
                y - pixel_half_width) / pixel_half_width
            i += 1

        test = MatrixES(testSet)
        res = df.evaluate(test, mp=doMP)

        i = 0
        for x in xrange(pixel_width):
            img[y, x, :] = res[i]
            i += 1

    del pb

    print 'Maximum probability = %.2f' % img.max()
    img /= img.max()
    cv.SaveImage('test_de_circle_%s.png' % name, array2cv(img * 255))
Example #3
0
def drainAllPools(train, test, methods, runs, base):
    for method in methods:
        print 'method = %s' % method
        p = ProgBar()
        aq, ak, at = drainPools(train, test, method, runs, p.callback)
        del p

        print 'Average # querys by # classes found:'
        for i, aqc in enumerate(aq):
            if i > 1:
                print '  %i classes found: average of %.2f querys' % (i, aqc)
        print

        fn = '%s%s_p%i_r%i.csv' % (base, method, sum(map(
            lambda x: x[2], train)), runs)
        f = open(fn, 'w')

        f.write('querys, classes, inlier\n')
        for i in xrange(len(ak)):
            f.write('%i, %f, %f\n' % (i, ak[i], at[i]))

        f.close()
Example #4
0
step = ((args.block_size*1024*1024) // (col_in.shape[0] * 4)) + 1
if not args.quiet:
  print 'Converting... (%i pixels at a time)'%step

slices = map(lambda x: slice(x*step, (x+1)*step), xrange(data.shape[0]//step + 1))

if slices[-1].stop<data.shape[0]:
  slices.append(slice(slices[-1].stop, data.shape[0]))

## Calculate each channel in turn...
out = data.copy()

for cc in xrange(3):
  if not args.quiet:
    print 'Converting - %s...'%(['red', 'green', 'blue'][cc])
  p = ProgBar()
  for i,s in enumerate(slices):
    p.callback(i, len(slices))
    out[s,cc] = model[cc](data[s,:].astype(numpy.float32))
  del p

## Expand the data matrix back up to the order and length of the image, by expanding duplicates...
source = numpy.cumsum(keep) - 1
out = out[source,:]
out = out[numpy.argsort(index),:]
  
## Convert back from data matrix to image...
out = out.reshape(image.shape)


Example #5
0
docImage = numpy.hstack(stack).T * 255.0
img = array2cv(docImage)
cv.SaveImage('test_abnorm_lines/docs.png',img)



# Train...
params = ddhdp.Params()
params.runs = 1
params.samples = 1
#params.burnIn = 10000
#c.setOneCluster(True)


print 'Fitting model...'
p = ProgBar()
model = corpus.sampleModel(params,p.callback)
del p

#model.bestSampleOnly()


sam = model.getSample(0)

def smartVecPrint(numVec):
  ret = []
  ret.append('[')
  for i in xrange(numVec.shape[0]):
    ret.append('%s%.3f'%(' ' if i!=0 else '',numVec[i]))
  ret.append(']')
  return ''.join(ret)
Example #6
0
if train_label.shape[0] > cull:
    indices = numpy.random.permutation(train_label.shape[0])
    indices = indices[:cull]

    train_fv = train_fv[indices, :]
    train_label = train_label[indices]
    train_weight = train_weight[indices]
    print 'Culled to %i' % cull

forest = frf.Forest()
forest.configure('C', 'C', 'S' * train_fv.shape[1])
forest.min_exemplars = 8
forest.opt_features = int(numpy.sqrt(train_fv.shape[1]))

print 'frf learning:'
pb = ProgBar()
oob = forest.train(train_fv, [train_label, ('w', train_weight)], trees,
                   pb.callback)
del pb

# Report oob error rate for the forest, plus other stuff...
class_histogram = numpy.bincount(train_label)
popular = numpy.argmax(class_histogram)
popular_rate = class_histogram[popular] / float(class_histogram.sum())
popular_char = label_index[popular]

print '    Class count: %i' % len(label_index)
print '    Most common class: %s (%.2f%% of data set)' % (popular_char,
                                                          popular_rate * 100.0)

print '    frf: OOB accuracy: %.2f%%' % ((1.0 - oob.mean()) * 100.0)
Example #7
0
def doRun(tdc):
    # Create a corpus...
    c = lda.Corpus(4)
    c.setWordCount(identCount() * 4)

    for i in xrange(tdc):
        dic, abn = genDoc()

        nDic = dict()
        for key, item in dic.iteritems():
            nDic[key[0] * 4 + key[1]] = item

        doc = lda.Document(nDic)
        doc.abn = abn
        c.add(doc)

    # Fit a model...
    params = lda.Params()
    params.setRuns(16)

    print 'Fitting model...'
    p = ProgBar()
    c.fit(params, p.callback)
    del p

    tw = c.topicsWords()

    # Test on a bunch of documents, creating a list of abnormality score/actually an abnormality pairs...
    ab_gt = []
    print 'Testing...'
    p = ProgBar()
    for i in xrange(testDocCount):
        p.callback(i, testDocCount)
        dic, abn = genDoc()

        nDic = dict()
        for key, item in dic.iteritems():
            nDic[key[0] * 4 + key[1]] = item

        doc = lda.Document(nDic)
        doc.fit(tw)
        ab_gt.append((doc.negLogLikelihood(tw), abn))
    del p

    ab_gt.sort(reverse=True)

    # Use the pairs to construct a roc...
    posCount = len(filter(lambda p: p[1] == True, ab_gt))
    negCount = len(ab_gt) - posCount
    print 'positive samples = ', posCount
    print 'negative samples = ', negCount

    truePos = 0
    falsePos = 0
    trueNeg = negCount
    falseNeg = posCount

    roc = []

    for p in ab_gt:
        if p[1]:
            truePos += 1
            falseNeg -= 1
        else:
            falsePos += 1
            trueNeg -= 1

        pnt = (float(falsePos) / float(falsePos + trueNeg),
               float(truePos) / float(truePos + falseNeg))
        roc.append(pnt)

    # Save the roc to disk...
    if not sweep:
        f = open('junction_roc.txt', 'w')
        f.write('0.0 0.0\n')
        for pnt in roc:
            f.write('%f %f\n' % pnt)
        f.close()

    # Calculate and print out the area under the roc...
    area = 0.0
    for i in xrange(1, len(roc)):
        area += 0.5 * (roc[i - 1][1] + roc[i][1]) * (roc[i][0] - roc[i - 1][0])
    print 'area under roc =', area, '(above', (1.0 - area), ')'

    return area
Example #8
0
def doRun(tdc):
  # Create a corpus...
  vlda = lda.VLDA(4, identCount()*4)

  abnDict = dict()
  for i in xrange(tdc):
    dic, abn = genDoc()
    
    nDic = dict()
    for key,item in dic.iteritems(): nDic[key[0]*4+key[1]] = item
    
    doc = vlda.add(nDic)
    abnDict[doc] = abn


  # Fit a model...
  print 'Fitting model...'
  p = ProgBar()
  vlda.solve()
  del p


  # Visualise the topics...
  if not sweep:
    for t in xrange(vlda.numTopics()):
      prob = numpy.zeros((6,6,4),dtype=numpy.float_)
      beta = vlda.getBeta(t)
      for i in xrange(beta.shape[0]):
        x,y = identToCoord(i//4)
        w = i%4
        prob[x,y,w] += beta[i]

      multProb = 255.0/prob.max()
      img = cv.CreateImage((6*25,6*25),cv.IPL_DEPTH_32F,3)
      for y in xrange(6):
        for x in xrange(6):
          coords = [(x*25,y*25),((x+1)*25,y*25),((x+1)*25,(y+1)*25),(x*25,(y+1)*25)]
          centre = (x*25+12,y*25+12)
          for d in xrange(4):
            if d%2==0:
              col = cv.RGB(0.0,prob[x,y,d]*multProb,0.0)
            else:
              col = cv.RGB(prob[x,y,d]*multProb,0.0,0.0)
            cv.FillPoly(img, [(coords[d],coords[(d+1)%4],centre)], col)

      cv.SaveImage('junction_topic_%i.png'%t,img)


  # Test on a bunch of documents, creating a list of abnormality score/actually an abnormality pairs...
  ab_gt = []
  print 'Testing...'
  p = ProgBar()
  for i in xrange(testDocCount):
    p.callback(i,testDocCount)
    dic, abn = genDoc()

    nDic = dict()
    for key,item in dic.iteritems(): nDic[key[0]*4+key[1]] = item
    
    nll = vlda.getNewNLL(nDic)
    ab_gt.append((nll,abn))
  del p

  ab_gt.sort(reverse=True)


  # Use the pairs to construct a roc...
  posCount = len(filter(lambda p:p[1]==True,ab_gt))
  negCount = len(ab_gt) - posCount
  print 'positive samples = ',posCount
  print 'negative samples = ',negCount

  truePos = 0
  falsePos = 0
  trueNeg = negCount
  falseNeg = posCount

  roc = []

  for p in ab_gt:
    if p[1]:
      truePos += 1
      falseNeg -= 1
    else:
      falsePos +=1
      trueNeg -= 1

    pnt = (float(falsePos)/float(falsePos+trueNeg), float(truePos)/float(truePos+falseNeg))
    roc.append(pnt)


  # Save the roc to disk...
  if not sweep:
    f = open('junction_roc.csv','w')
    f.write('0.0, 0.0\n')
    for pnt in roc: f.write('%f, %f\n'%pnt)
    f.close()


  # Calculate and print out the area under the roc...
  area = 0.0
  for i in xrange(1,len(roc)):
    area += 0.5*(roc[i-1][1]+roc[i][1]) * (roc[i][0]-roc[i-1][0])
  print 'area under roc =',area, '(above',(1.0-area),')'

  return area
Example #9
0
def scale_loo_nll():
    p = ProgBar()
    ms.scale_loo_nll(callback=p.callback)
    del p
Example #10
0
    t_x = int(t * s_x + (1-t) * e_x)
    t_y = int(t * s_y + (1-t) * e_y)
    try:
      if img[t_y,t_x,0] < t:
        img[t_y,t_x,:] = t
    except:
      pass

img = array2cv(255.0 * img)
cv.SaveImage('composite_draw.png', img)



# Visualise the probability - both spatial and rotational in a single image, with one colour channel each for 3 directions...
img = numpy.zeros((size, size, 3), dtype=numpy.float32)
p = ProgBar()

for y in xrange(size):
  p.callback(y, size)
  
  for index, orient_x, orient_y in [(0,1.0,0.0), (1,0.0,1.0), (2,-1.0,0.0)]:
    block = numpy.concatenate(((scale * y / float(size-1)) * numpy.ones(size).reshape((-1,1)), numpy.linspace(0.0, scale, size).reshape((-1,1)), orient_x * numpy.ones(size).reshape((-1,1)), orient_y * numpy.ones(size).reshape((-1,1))), axis=1)
    
    vals = ms.probs(block)
    img[y,:,index] = vals

del p

img *= 255 / img.max()
img = array2cv(img)
cv.SaveImage('composite_prob.png', img)
Example #11
0
if not args.quiet:
    print 'Converting... (%i pixels at a time)' % step

slices = map(lambda x: slice(x * step, (x + 1) * step),
             xrange(data.shape[0] // step + 1))

if slices[-1].stop < data.shape[0]:
    slices.append(slice(slices[-1].stop, data.shape[0]))

## Calculate each channel in turn...
out = data.copy()

for cc in xrange(3):
    if not args.quiet:
        print 'Converting - %s...' % (['red', 'green', 'blue'][cc])
    p = ProgBar()
    for i, s in enumerate(slices):
        p.callback(i, len(slices))
        out[s, cc] = model[cc](data[s, :].astype(numpy.float32))
    del p

## Expand the data matrix back up to the order and length of the image, by expanding duplicates...
source = numpy.cumsum(keep) - 1
out = out[source, :]
out = out[numpy.argsort(index), :]

## Convert back from data matrix to image...
out = out.reshape(image.shape)

# Clamp unreasonable values, record where clamping occurs...
if not args.quiet:
Example #12
0
    for i in xrange(angle_step):
        t = float(i) / (angle_step - 1)
        t_x = int(t * s_x + (1 - t) * e_x)
        t_y = int(t * s_y + (1 - t) * e_y)
        try:
            if img[t_y, t_x, 0] < t:
                img[t_y, t_x, :] = t
        except:
            pass

img = array2cv(255.0 * img)
cv.SaveImage('composite_draw.png', img)

# Visualise the probability - both spatial and rotational in a single image, with one colour channel each for 3 directions...
img = numpy.zeros((size, size, 3), dtype=numpy.float32)
p = ProgBar()

for y in xrange(size):
    p.callback(y, size)

    for index, orient_x, orient_y in [(0, 1.0, 0.0), (1, 0.0, 1.0),
                                      (2, -1.0, 0.0)]:
        block = numpy.concatenate(
            ((scale * y / float(size - 1)) * numpy.ones(size).reshape(
                (-1, 1)), numpy.linspace(0.0, scale, size).reshape(
                    (-1, 1)), orient_x * numpy.ones(size).reshape(
                        (-1, 1)), orient_y * numpy.ones(size).reshape(
                            (-1, 1))),
            axis=1)

        vals = ms.probs(block)
Example #13
0
    assert (task in Pool.methods())

# Load the dataset...
data = Iris1D()
print 'Loaded %i examples' % data.getVectors().shape[0]

# Make the output directory, killing any previous versions...
try:
    shutil.rmtree(out_dir)
except:
    pass
os.makedirs(out_dir)

# This calculates a suitable precision matrix to use...
print 'Calculating loo optimal precision matrix for data set...'
p = ProgBar()
loo = PrecisionLOO()
for i in xrange(data.getVectors().shape[0]):
    loo.addSample(numpy.reshape(data.getVectors()[i], (1, 1)))
loo.solve(p.callback)
precision = loo.getBest()
del p

print 'Optimal standard deviation = %s' % str(math.sqrt(1.0 / precision[0, 0]))

# Create and fill the pool...
print 'Filling the pool...'
pool = Pool()
p = ProgBar()
for i in xrange(data.getVectors().shape[0]):
    p.callback(i, data.getVectors().shape[0])
Example #14
0
    
    dist = numpy.sqrt((data[i,0]-0.5)**2 + (data[i,1]-0.5)**2) * numpy.pi * 7.0
    data[i,2] = (1.0+numpy.sin(dist)) / (6.0 + numpy.abs(numpy.sqrt(dist)-3.0))
    
    i += 1

ms = MeanShift()
ms.set_data(data, 'df', 2)
ms.set_kernel('triangular')
ms.set_spatial('kd_tree')



# Choose a reasonable size...
print 'Selecting size using loo:'
p = ProgBar()
ms.scale_loo_nll(callback = p.callback)
del p



# Plot the pdf, for reference...
image = numpy.zeros((pixels, pixels, 3), dtype=numpy.float32)

print 'Rendering probability map:'
p = ProgBar()
for row in xrange(pixels):
  p.callback(row, pixels)
  sam = numpy.append(numpy.linspace(0.0, 1.0, pixels).reshape((-1,1)), (row / float(pixels-1)) * numpy.ones(pixels).reshape((-1,1)), axis=1)
  image[row, :, :] = ms.probs(sam).reshape((-1,1))
del p
docImage = numpy.hstack(stack).T * 255.0
img = array2cv(docImage)
cv.SaveImage('test_abnorm_lines/docs.png',img)



# Train...
params = ddhdp.Params()
params.runs = 1
params.samples = 1
#params.burnIn = 10000
#c.setOneCluster(True)


print 'Fitting model...'
p = ProgBar()
model = corpus.sampleModel(params,p.callback)
del p

#model.bestSampleOnly()


sam = model.getSample(0)

def smartVecPrint(numVec):
  ret = []
  ret.append('[')
  for i in xrange(numVec.shape[0]):
    ret.append('%s%.3f'%(' ' if i!=0 else '',numVec[i]))
  ret.append(']')
  return ''.join(ret)
Example #16
0
for name, alg in [('human_picked',
                   lambda: ms.set_scale(numpy.array([5.0, 5.0]))),
                  ('Silverman', ms.scale_silverman), ('Scott', ms.scale_scott),
                  ('loo_nll', scale_loo_nll)]:
    # Calculate and print out the scales...
    print '<', name, '>'
    alg()
    print 'Scale:', ms.get_scale()
    print 'loo nll for this scale =', ms.loo_nll()
    mean, sd = ms.stats()
    print 'mean = (%f, %f); sd = (%f, %f)' % (mean[0], mean[1], sd[0], sd[1])

    # Render out a normalised probability map...
    image = numpy.zeros((dim, dim, 3), dtype=numpy.float32)

    p = ProgBar()
    for row in xrange(dim):
        p.callback(row, dim)
        sam = numpy.append(numpy.linspace(-size, size, dim).reshape(
            (-1, 1)), ((row / (dim - 1.0) - 0.5) * 2.0 * size) *
                           numpy.ones(dim).reshape((-1, 1)),
                           axis=1)
        image[row, :, :] = ms.probs(sam).reshape((-1, 1))
    del p

    print 'Largest sampled probability =', image.max()
    image *= 255.0 / image.max()

    image = array2cv(image)
    cv.SaveImage('bandwidth_%s.png' % name, image)
    print
Example #17
0
def doTest(gen):
    # Train the model...
    df = DF()
    df.setGoal(Classification(
        3, 1))  # 3 = # of classes, 1 = channel of truth for trainning.
    df.setGen(gen)

    pb = ProgBar()
    df.learn(
        8, es, callback=pb.callback
    )  # 8 = number of trees to learn. dm is in channel 0, cat in channel 1.
    del pb

    # Drop some stats...
    print '%i trees containing %i nodes.\nAverage error is %.3f.' % (
        df.size(), df.nodes(), df.error())

    # Test...
    politician_success = 0
    politician_prob = 0.0
    res = df.evaluate(MatrixES(numpy.asarray(politician)),
                      which=['prob', 'best'])
    for i in xrange(politician_test):
        dist, best = res[i]
        if 0 == best: politician_success += 1
        politician_prob += dist[0]

    print 'Of %i politicians %i (%.1f%%) were correctly detected, with %.1f%% of total probability.' % (
        politician_test, politician_success, 100.0 * politician_success /
        float(politician_test), 100.0 * politician_prob / politician_test)

    marketing_success = 0
    marketing_prob = 0.0
    res = df.evaluate(MatrixES(numpy.asarray(marketing)),
                      which=['prob', 'best'],
                      mp=False)
    for i in xrange(marketing_test):
        dist, best = res[i]
        if 1 == best: marketing_success += 1
        marketing_prob += dist[1]

    print 'Of %i marketers %i (%.1f%%) were correctly detected, with %.1f%% of total probability.' % (
        marketing_test, marketing_success, 100.0 * marketing_success /
        float(marketing_test), 100.0 * marketing_prob / marketing_test)

    tele_sales_success = 0
    tele_sales_prob = 0.0
    for i in xrange(tele_sales_test):
        dist, best = df.evaluate(MatrixES(tele_sales[i]),
                                 which=['prob', 'best'])[0]
        if 2 == best: tele_sales_success += 1
        tele_sales_prob += dist[2]

    print 'Of %i tele-sellers %i (%.1f%%) were correctly detected, with %.1f%% of total probability.' % (
        tele_sales_test, tele_sales_success, 100.0 * tele_sales_success /
        float(tele_sales_test), 100.0 * tele_sales_prob / tele_sales_test)

    total_success = politician_success + marketing_success + tele_sales_success
    total_test = politician_test + marketing_test + tele_sales_test
    print 'Combined success is %i out of %i (%.1f%%)' % (
        total_success, total_test, 100.0 * total_success / float(total_test))
Example #18
0
def doRun(tdc):
  # Create a corpus...
  c = lda.Corpus(4)
  c.setWordCount(identCount()*4)

  for i in xrange(tdc):
    dic, abn = genDoc()
    
    nDic = dict()
    for key,item in dic.iteritems(): nDic[key[0]*4+key[1]] = item
    
    doc = lda.Document(nDic)
    doc.abn = abn
    c.add(doc)


  # Fit a model...
  params = lda.Params()
  params.setRuns(16)

  print 'Fitting model...'
  p = ProgBar()
  c.fit(params,p.callback)
  del p

  tw = c.topicsWords()


  # Test on a bunch of documents, creating a list of abnormality score/actually an abnormality pairs...
  ab_gt = []
  print 'Testing...'
  p = ProgBar()
  for i in xrange(testDocCount):
    p.callback(i,testDocCount)
    dic, abn = genDoc()

    nDic = dict()
    for key,item in dic.iteritems(): nDic[key[0]*4+key[1]] = item
    
    doc = lda.Document(nDic)
    doc.fit(tw)
    ab_gt.append((doc.negLogLikelihood(tw),abn))
  del p

  ab_gt.sort(reverse=True)


  # Use the pairs to construct a roc...
  posCount = len(filter(lambda p:p[1]==True,ab_gt))
  negCount = len(ab_gt) - posCount
  print 'positive samples = ',posCount
  print 'negative samples = ',negCount

  truePos = 0
  falsePos = 0
  trueNeg = negCount
  falseNeg = posCount

  roc = []

  for p in ab_gt:
    if p[1]:
      truePos += 1
      falseNeg -= 1
    else:
      falsePos +=1
      trueNeg -= 1

    pnt = (float(falsePos)/float(falsePos+trueNeg), float(truePos)/float(truePos+falseNeg))
    roc.append(pnt)


  # Save the roc to disk...
  if not sweep:
    f = open('junction_roc.txt','w')
    f.write('0.0 0.0\n')
    for pnt in roc: f.write('%f %f\n'%pnt)
    f.close()


  # Calculate and print out the area under the roc...
  area = 0.0
  for i in xrange(1,len(roc)):
    area += 0.5*(roc[i-1][1]+roc[i][1]) * (roc[i][0]-roc[i-1][0])
  print 'area under roc =',area, '(above',(1.0-area),')'

  return area
Example #19
0
# Load the dataset...
data = Iris1D()
print 'Loaded %i examples'%data.getVectors().shape[0]



# Make the output directory, killing any previous versions...
try: shutil.rmtree(out_dir)
except: pass
os.makedirs(out_dir)



# This calculates a suitable precision matrix to use...
print 'Calculating loo optimal precision matrix for data set...'
p = ProgBar()
loo = PrecisionLOO()
for i in xrange(data.getVectors().shape[0]):
  loo.addSample(numpy.reshape(data.getVectors()[i], (1,1)))
loo.solve(p.callback)
precision = loo.getBest()
del p


print 'Optimal standard deviation = %s'%str(math.sqrt(1.0/precision[0,0]))



# Create and fill the pool...
print 'Filling the pool...'
pool = Pool()
Example #20
0
def doRun(tdc):
  # Create directory to put images into...
  if not sweep:
    try:
      os.makedirs('junction')
    except:
      pass

    
  # Create a corpus...    
  c = rlda.Corpus(10,4)
  c.setIdentWordCounts(identCount(),4)

  for i in xrange(tdc):
    dic, abn = genDoc(False)
    doc = rlda.Document(dic)
    doc.abn = abn
    c.add(doc)

    if not sweep:
      prob = numpy.zeros((6,6,4),dtype=numpy.float_)
      for key,item in dic.iteritems():
        x,y = identToCoord(key[0])
        prob[x,y,key[1]] = item

      multProb = 255.0/prob.max()
      img = cv.CreateImage((6*25,6*25),cv.IPL_DEPTH_32F,3)
      for y in xrange(6):
        for x in xrange(6):
          coords = [(x*25,y*25),((x+1)*25,y*25),((x+1)*25,(y+1)*25),(x*25,(y+1)*25)]
          centre = (x*25+12,y*25+12)
          for d in xrange(4):
            if d%2==0:
              col = cv.RGB(0.0,prob[x,y,d]*multProb,0.0)
            else:
              col = cv.RGB(prob[x,y,d]*multProb,0.0,0.0)
            cv.FillPoly(img, [(coords[d],coords[(d+1)%4],centre)], col)

      cv.SaveImage('junction/xdoc_%i_%s.png'%(i,str(abn)),img)


  # Fit a model...
  params = rlda.Params()
  params.setRuns(16)

  print 'Fitting model...'
  p = ProgBar()
  c.fit(params,p.callback)
  del p

  ir = c.getIR()
  wrt = c.getWRT()


  # Visualise the regions...
  if not sweep:
    mult = 255.0/ir.max()
    for r in xrange(ir.shape[1]):
      rend = numpy.zeros((6,6),dtype=numpy.float_)
      for i in xrange(ir.shape[0]): rend[identToCoord(i)] = ir[i,r] * mult
      rend = numpy.repeat(numpy.repeat(rend,25,axis=0),25,axis=1)
      cv.SaveImage('junction/region_%i.png'%r,array2cv(rend))
        


  # Visualise the topics...
  if not sweep:
    for t in xrange(wrt.shape[2]):
      prob = numpy.zeros((6,6,4),dtype=numpy.float_)
      for i in xrange(ir.shape[0]):
        x,y = identToCoord(i)
        for r in xrange(wrt.shape[1]):
          for w in xrange(wrt.shape[0]):
            prob[x,y,w] += ir[i,r] * wrt[w,r,t]

      multProb = 255.0/prob.max()
      img = cv.CreateImage((6*25,6*25),cv.IPL_DEPTH_32F,3)
      for y in xrange(6):
        for x in xrange(6):
          coords = [(x*25,y*25),((x+1)*25,y*25),((x+1)*25,(y+1)*25),(x*25,(y+1)*25)]
          centre = (x*25+12,y*25+12)
          for d in xrange(4):
            if d%2==0:
              col = cv.RGB(0.0,prob[x,y,d]*multProb,0.0)
            else:
              col = cv.RGB(prob[x,y,d]*multProb,0.0,0.0)
            cv.FillPoly(img, [(coords[d],coords[(d+1)%4],centre)], col)
      
      cv.SaveImage('junction/topic_%i.png'%t,img)


  # Test on a bunch of documents, creating a list of abnormality score/actually an abnormality pairs...
  ab_gt = []
  print 'Testing...'
  p = ProgBar()
  for i in xrange(testDocCount):
    p.callback(i,testDocCount)
    dic, abn = genDoc()
    doc = rlda.Document(dic)
    doc.fit(ir,wrt)
    ab_gt.append((doc.negLogLikeRegionVec().max(),abn))
  del p

  ab_gt.sort(reverse=True)


  # Use the pairs to construct a roc...
  posCount = len(filter(lambda p:p[1]==True,ab_gt))
  negCount = len(ab_gt) - posCount
  print 'positive samples = ',posCount
  print 'negative samples = ',negCount

  truePos = 0
  falsePos = 0
  trueNeg = negCount
  falseNeg = posCount

  roc = []

  for p in ab_gt:
    if p[1]:
      truePos += 1
      falseNeg -= 1
    else:
      falsePos +=1
      trueNeg -= 1

    pnt = (float(falsePos)/float(falsePos+trueNeg), float(truePos)/float(truePos+falseNeg))
    roc.append(pnt)


  # Save the roc to disk...
  if not sweep:
    f = open('junction_roc.txt','w')
    f.write('0.0 0.0\n')
    for pnt in roc: f.write('%f %f\n'%pnt)
    f.close()


  # Calculate and print out the area under the roc...
  area = 0.0
  for i in xrange(1,len(roc)):
    area += 0.5*(roc[i-1][1]+roc[i][1]) * (roc[i][0]-roc[i-1][0])
  print 'area under roc =',area, '(above',(1.0-area),')'

  return area
Example #21
0
for kernel in kernels:
    print 'Processing', kernel

    # Create the four MeanShift objects...
    def to_ms(data):
        ms = MeanShift()
        ms.set_data(data, 'df')
        ms.set_kernel(kernel)
        ms.set_spatial('kd_tree')
        ms.quality = 1.0
        return ms

    ms = map(to_ms, samples)

    # Infer a good loo value for the first one, then set them all to the same...
    p = ProgBar()
    ms[0].scale_loo_nll(callback=p.callback)
    del p

    for i in xrange(1, 4):
        ms[i].copy_scale(ms[0])

    # Visualise the distributions using KDE...
    imgs = []
    p = ProgBar()
    for i in xrange(4):
        p.callback(i, 4)
        img = numpy.zeros((draw_scale * size[0], draw_scale * size[1]),
                          dtype=numpy.float32)

        sweep0 = numpy.linspace(0, size[0], img.shape[0])
Example #22
0
def doRun(tdc):
  # Create directory to put images into...
  if not sweep:
    try:
      os.makedirs('junction')
    except:
      pass

    
  # Create a corpus...    
  c = rlda.Corpus(10,4)
  c.setIdentWordCounts(identCount(),4)

  for i in xrange(tdc):
    dic, abn = genDoc(False)
    doc = rlda.Document(dic)
    doc.abn = abn
    c.add(doc)

    if not sweep:
      prob = numpy.zeros((6,6,4),dtype=numpy.float_)
      for key,item in dic.iteritems():
        x,y = identToCoord(key[0])
        prob[x,y,key[1]] = item

      multProb = 255.0/prob.max()
      img = cv.CreateImage((6*25,6*25),cv.IPL_DEPTH_32F,3)
      for y in xrange(6):
        for x in xrange(6):
          coords = [(x*25,y*25),((x+1)*25,y*25),((x+1)*25,(y+1)*25),(x*25,(y+1)*25)]
          centre = (x*25+12,y*25+12)
          for d in xrange(4):
            if d%2==0:
              col = cv.RGB(0.0,prob[x,y,d]*multProb,0.0)
            else:
              col = cv.RGB(prob[x,y,d]*multProb,0.0,0.0)
            cv.FillPoly(img, [(coords[d],coords[(d+1)%4],centre)], col)

      cv.SaveImage('junction/xdoc_%i_%s.png'%(i,str(abn)),img)


  # Fit a model...
  params = rlda.Params()
  params.setRuns(16)

  print 'Fitting model...'
  p = ProgBar()
  c.fit(params,p.callback)
  del p

  ir = c.getIR()
  wrt = c.getWRT()


  # Visualise the regions...
  if not sweep:
    mult = 255.0/ir.max()
    for r in xrange(ir.shape[1]):
      rend = numpy.zeros((6,6),dtype=numpy.float_)
      for i in xrange(ir.shape[0]): rend[identToCoord(i)] = ir[i,r] * mult
      rend = numpy.repeat(numpy.repeat(rend,25,axis=0),25,axis=1)
      cv.SaveImage('junction/region_%i.png'%r,array2cv(rend))
        


  # Visualise the topics...
  if not sweep:
    for t in xrange(wrt.shape[2]):
      prob = numpy.zeros((6,6,4),dtype=numpy.float_)
      for i in xrange(ir.shape[0]):
        x,y = identToCoord(i)
        for r in xrange(wrt.shape[1]):
          for w in xrange(wrt.shape[0]):
            prob[x,y,w] += ir[i,r] * wrt[w,r,t]

      multProb = 255.0/prob.max()
      img = cv.CreateImage((6*25,6*25),cv.IPL_DEPTH_32F,3)
      for y in xrange(6):
        for x in xrange(6):
          coords = [(x*25,y*25),((x+1)*25,y*25),((x+1)*25,(y+1)*25),(x*25,(y+1)*25)]
          centre = (x*25+12,y*25+12)
          for d in xrange(4):
            if d%2==0:
              col = cv.RGB(0.0,prob[x,y,d]*multProb,0.0)
            else:
              col = cv.RGB(prob[x,y,d]*multProb,0.0,0.0)
            cv.FillPoly(img, [(coords[d],coords[(d+1)%4],centre)], col)
      
      cv.SaveImage('junction/topic_%i.png'%t,img)


  # Test on a bunch of documents, creating a list of abnormality score/actually an abnormality pairs...
  ab_gt = []
  print 'Testing...'
  p = ProgBar()
  for i in xrange(testDocCount):
    p.callback(i,testDocCount)
    dic, abn = genDoc()
    doc = rlda.Document(dic)
    doc.fit(ir,wrt)
    ab_gt.append((doc.negLogLikeRegionVec().max(),abn))
  del p

  ab_gt.sort(reverse=True)


  # Use the pairs to construct a roc...
  posCount = len(filter(lambda p:p[1]==True,ab_gt))
  negCount = len(ab_gt) - posCount
  print 'positive samples = ',posCount
  print 'negative samples = ',negCount

  truePos = 0
  falsePos = 0
  trueNeg = negCount
  falseNeg = posCount

  roc = []

  for p in ab_gt:
    if p[1]:
      truePos += 1
      falseNeg -= 1
    else:
      falsePos +=1
      trueNeg -= 1

    pnt = (float(falsePos)/float(falsePos+trueNeg), float(truePos)/float(truePos+falseNeg))
    roc.append(pnt)


  # Save the roc to disk...
  if not sweep:
    f = open('junction_roc.txt','w')
    f.write('0.0 0.0\n')
    for pnt in roc: f.write('%f %f\n'%pnt)
    f.close()


  # Calculate and print out the area under the roc...
  area = 0.0
  for i in xrange(1,len(roc)):
    area += 0.5*(roc[i-1][1]+roc[i][1]) * (roc[i][0]-roc[i-1][0])
  print 'area under roc =',area, '(above',(1.0-area),')'

  return area
Example #23
0

# Setup the mean shift object...
ms = MeanShift()
ms.set_data(data, 'df')
normal_kernels = ['uniform', 'triangular', 'epanechnikov', 'cosine', 'gaussian', 'cauchy', 'logistic']
ms.set_kernel(random.choice(normal_kernels))
ms.set_spatial('kd_tree')

print 'kernel = %s' % ms.get_kernel()



# Choose a reasonable size...
print 'Selecting size using loo:'
p = ProgBar()
ms.scale_loo_nll(callback = p.callback)
del p



# Render out a normalised probability map...
image = numpy.zeros((dim, dim, 3), dtype=numpy.float32)

print 'Rendering probability map:'
p = ProgBar()
for row in xrange(dim):
  p.callback(row, dim)
  sam = numpy.append(numpy.linspace(-size, size, dim).reshape((-1,1)), ((row / (dim-1.0) - 0.5) * 2.0 * size) * numpy.ones(dim).reshape((-1,1)), axis=1)
  image[row, :, :] = ms.probs(sam).reshape((-1,1))
del p
Example #24
0
from swood import SWood

import test_model as mod

# Tests the stocahstic woodland class on the model contained within test_model.py

# Parameters...
tree_count = 256
option_count = 4

# Get trainning data...
int_dm, real_dm, cats, weight = mod.generate_train()

# Train...
p = ProgBar()
sw = SWood(int_dm,
           real_dm,
           cats,
           tree_count=tree_count,
           option_count=option_count,
           weight=weight,
           callback=p.callback)
del p

print 'Out-of-bag success rate = %.2f%%' % (100.0 * sw.oob_success())
print

# Test...
mod.test(sw.classify)
Example #25
0
        dist = numpy.sqrt((data[i, 0] - 0.5)**2 +
                          (data[i, 1] - 0.5)**2) * numpy.pi * 7.0
        data[i,
             2] = (1.0 + numpy.sin(dist)) / (6.0 +
                                             numpy.abs(numpy.sqrt(dist) - 3.0))

        i += 1

ms = MeanShift()
ms.set_data(data, 'df', 2)
ms.set_kernel('triangular')
ms.set_spatial('kd_tree')

# Choose a reasonable size...
print 'Selecting size using loo:'
p = ProgBar()
ms.scale_loo_nll(callback=p.callback)
del p

# Plot the pdf, for reference...
image = numpy.zeros((pixels, pixels, 3), dtype=numpy.float32)

print 'Rendering probability map:'
p = ProgBar()
for row in xrange(pixels):
    p.callback(row, pixels)
    sam = numpy.append(numpy.linspace(0.0, 1.0, pixels).reshape((-1, 1)),
                       (row / float(pixels - 1)) * numpy.ones(pixels).reshape(
                           (-1, 1)),
                       axis=1)
    image[row, :, :] = ms.probs(sam).reshape((-1, 1))