def doTest(name, gen): # Train the model... df = DF() df.setGoal(DensityGaussian(2)) # 2 = # of features df.setGen(gen) df.getPruner().setMinTrain(48) # Playing around shows that this is probably the most important number to get right when doing density estimation - the information gain heuristic just doesn't know when to stop. global es pb = ProgBar() df.learn(32, es, callback = pb.callback, mp=doMP) # 32 = number of trees to learn - you need a lot to get a good answer. del pb # Drop some stats... print '%i trees containing %i nodes.\nAverage error is %.3f.'%(df.size(), df.nodes(), df.error()) # Visualise the density estimate... global img testSet = numpy.empty((pixel_width,2), dtype=numpy.float32) pb = ProgBar() for y in xrange(pixel_width): pb.callback(y,pixel_width) i = 0 for x in xrange(pixel_width): testSet[i,0] = axis_half_width * float(x - pixel_half_width) / pixel_half_width testSet[i,1] = axis_half_width * float(y - pixel_half_width) / pixel_half_width i += 1 test = MatrixES(testSet) res = df.evaluate(test, mp=doMP) i = 0 for x in xrange(pixel_width): img[y,x,:] = res[i] i += 1 del pb print 'Maximum probability = %.2f'%img.max() img /= img.max() cv.SaveImage('test_de_circle_%s.png'%name,array2cv(img*255))
def doTest(name, gen): # Train the model... df = DF() df.setGoal(DensityGaussian(2)) # 2 = # of features df.setGen(gen) df.getPruner().setMinTrain( 48 ) # Playing around shows that this is probably the most important number to get right when doing density estimation - the information gain heuristic just doesn't know when to stop. global es pb = ProgBar() df.learn( 32, es, callback=pb.callback, mp=doMP ) # 32 = number of trees to learn - you need a lot to get a good answer. del pb # Drop some stats... print '%i trees containing %i nodes.\nAverage error is %.3f.' % ( df.size(), df.nodes(), df.error()) # Visualise the density estimate... global img testSet = numpy.empty((pixel_width, 2), dtype=numpy.float32) pb = ProgBar() for y in xrange(pixel_width): pb.callback(y, pixel_width) i = 0 for x in xrange(pixel_width): testSet[i, 0] = axis_half_width * float( x - pixel_half_width) / pixel_half_width testSet[i, 1] = axis_half_width * float( y - pixel_half_width) / pixel_half_width i += 1 test = MatrixES(testSet) res = df.evaluate(test, mp=doMP) i = 0 for x in xrange(pixel_width): img[y, x, :] = res[i] i += 1 del pb print 'Maximum probability = %.2f' % img.max() img /= img.max() cv.SaveImage('test_de_circle_%s.png' % name, array2cv(img * 255))
def drainAllPools(train, test, methods, runs, base): for method in methods: print 'method = %s' % method p = ProgBar() aq, ak, at = drainPools(train, test, method, runs, p.callback) del p print 'Average # querys by # classes found:' for i, aqc in enumerate(aq): if i > 1: print ' %i classes found: average of %.2f querys' % (i, aqc) print fn = '%s%s_p%i_r%i.csv' % (base, method, sum(map( lambda x: x[2], train)), runs) f = open(fn, 'w') f.write('querys, classes, inlier\n') for i in xrange(len(ak)): f.write('%i, %f, %f\n' % (i, ak[i], at[i])) f.close()
step = ((args.block_size*1024*1024) // (col_in.shape[0] * 4)) + 1 if not args.quiet: print 'Converting... (%i pixels at a time)'%step slices = map(lambda x: slice(x*step, (x+1)*step), xrange(data.shape[0]//step + 1)) if slices[-1].stop<data.shape[0]: slices.append(slice(slices[-1].stop, data.shape[0])) ## Calculate each channel in turn... out = data.copy() for cc in xrange(3): if not args.quiet: print 'Converting - %s...'%(['red', 'green', 'blue'][cc]) p = ProgBar() for i,s in enumerate(slices): p.callback(i, len(slices)) out[s,cc] = model[cc](data[s,:].astype(numpy.float32)) del p ## Expand the data matrix back up to the order and length of the image, by expanding duplicates... source = numpy.cumsum(keep) - 1 out = out[source,:] out = out[numpy.argsort(index),:] ## Convert back from data matrix to image... out = out.reshape(image.shape)
docImage = numpy.hstack(stack).T * 255.0 img = array2cv(docImage) cv.SaveImage('test_abnorm_lines/docs.png',img) # Train... params = ddhdp.Params() params.runs = 1 params.samples = 1 #params.burnIn = 10000 #c.setOneCluster(True) print 'Fitting model...' p = ProgBar() model = corpus.sampleModel(params,p.callback) del p #model.bestSampleOnly() sam = model.getSample(0) def smartVecPrint(numVec): ret = [] ret.append('[') for i in xrange(numVec.shape[0]): ret.append('%s%.3f'%(' ' if i!=0 else '',numVec[i])) ret.append(']') return ''.join(ret)
if train_label.shape[0] > cull: indices = numpy.random.permutation(train_label.shape[0]) indices = indices[:cull] train_fv = train_fv[indices, :] train_label = train_label[indices] train_weight = train_weight[indices] print 'Culled to %i' % cull forest = frf.Forest() forest.configure('C', 'C', 'S' * train_fv.shape[1]) forest.min_exemplars = 8 forest.opt_features = int(numpy.sqrt(train_fv.shape[1])) print 'frf learning:' pb = ProgBar() oob = forest.train(train_fv, [train_label, ('w', train_weight)], trees, pb.callback) del pb # Report oob error rate for the forest, plus other stuff... class_histogram = numpy.bincount(train_label) popular = numpy.argmax(class_histogram) popular_rate = class_histogram[popular] / float(class_histogram.sum()) popular_char = label_index[popular] print ' Class count: %i' % len(label_index) print ' Most common class: %s (%.2f%% of data set)' % (popular_char, popular_rate * 100.0) print ' frf: OOB accuracy: %.2f%%' % ((1.0 - oob.mean()) * 100.0)
def doRun(tdc): # Create a corpus... c = lda.Corpus(4) c.setWordCount(identCount() * 4) for i in xrange(tdc): dic, abn = genDoc() nDic = dict() for key, item in dic.iteritems(): nDic[key[0] * 4 + key[1]] = item doc = lda.Document(nDic) doc.abn = abn c.add(doc) # Fit a model... params = lda.Params() params.setRuns(16) print 'Fitting model...' p = ProgBar() c.fit(params, p.callback) del p tw = c.topicsWords() # Test on a bunch of documents, creating a list of abnormality score/actually an abnormality pairs... ab_gt = [] print 'Testing...' p = ProgBar() for i in xrange(testDocCount): p.callback(i, testDocCount) dic, abn = genDoc() nDic = dict() for key, item in dic.iteritems(): nDic[key[0] * 4 + key[1]] = item doc = lda.Document(nDic) doc.fit(tw) ab_gt.append((doc.negLogLikelihood(tw), abn)) del p ab_gt.sort(reverse=True) # Use the pairs to construct a roc... posCount = len(filter(lambda p: p[1] == True, ab_gt)) negCount = len(ab_gt) - posCount print 'positive samples = ', posCount print 'negative samples = ', negCount truePos = 0 falsePos = 0 trueNeg = negCount falseNeg = posCount roc = [] for p in ab_gt: if p[1]: truePos += 1 falseNeg -= 1 else: falsePos += 1 trueNeg -= 1 pnt = (float(falsePos) / float(falsePos + trueNeg), float(truePos) / float(truePos + falseNeg)) roc.append(pnt) # Save the roc to disk... if not sweep: f = open('junction_roc.txt', 'w') f.write('0.0 0.0\n') for pnt in roc: f.write('%f %f\n' % pnt) f.close() # Calculate and print out the area under the roc... area = 0.0 for i in xrange(1, len(roc)): area += 0.5 * (roc[i - 1][1] + roc[i][1]) * (roc[i][0] - roc[i - 1][0]) print 'area under roc =', area, '(above', (1.0 - area), ')' return area
def doRun(tdc): # Create a corpus... vlda = lda.VLDA(4, identCount()*4) abnDict = dict() for i in xrange(tdc): dic, abn = genDoc() nDic = dict() for key,item in dic.iteritems(): nDic[key[0]*4+key[1]] = item doc = vlda.add(nDic) abnDict[doc] = abn # Fit a model... print 'Fitting model...' p = ProgBar() vlda.solve() del p # Visualise the topics... if not sweep: for t in xrange(vlda.numTopics()): prob = numpy.zeros((6,6,4),dtype=numpy.float_) beta = vlda.getBeta(t) for i in xrange(beta.shape[0]): x,y = identToCoord(i//4) w = i%4 prob[x,y,w] += beta[i] multProb = 255.0/prob.max() img = cv.CreateImage((6*25,6*25),cv.IPL_DEPTH_32F,3) for y in xrange(6): for x in xrange(6): coords = [(x*25,y*25),((x+1)*25,y*25),((x+1)*25,(y+1)*25),(x*25,(y+1)*25)] centre = (x*25+12,y*25+12) for d in xrange(4): if d%2==0: col = cv.RGB(0.0,prob[x,y,d]*multProb,0.0) else: col = cv.RGB(prob[x,y,d]*multProb,0.0,0.0) cv.FillPoly(img, [(coords[d],coords[(d+1)%4],centre)], col) cv.SaveImage('junction_topic_%i.png'%t,img) # Test on a bunch of documents, creating a list of abnormality score/actually an abnormality pairs... ab_gt = [] print 'Testing...' p = ProgBar() for i in xrange(testDocCount): p.callback(i,testDocCount) dic, abn = genDoc() nDic = dict() for key,item in dic.iteritems(): nDic[key[0]*4+key[1]] = item nll = vlda.getNewNLL(nDic) ab_gt.append((nll,abn)) del p ab_gt.sort(reverse=True) # Use the pairs to construct a roc... posCount = len(filter(lambda p:p[1]==True,ab_gt)) negCount = len(ab_gt) - posCount print 'positive samples = ',posCount print 'negative samples = ',negCount truePos = 0 falsePos = 0 trueNeg = negCount falseNeg = posCount roc = [] for p in ab_gt: if p[1]: truePos += 1 falseNeg -= 1 else: falsePos +=1 trueNeg -= 1 pnt = (float(falsePos)/float(falsePos+trueNeg), float(truePos)/float(truePos+falseNeg)) roc.append(pnt) # Save the roc to disk... if not sweep: f = open('junction_roc.csv','w') f.write('0.0, 0.0\n') for pnt in roc: f.write('%f, %f\n'%pnt) f.close() # Calculate and print out the area under the roc... area = 0.0 for i in xrange(1,len(roc)): area += 0.5*(roc[i-1][1]+roc[i][1]) * (roc[i][0]-roc[i-1][0]) print 'area under roc =',area, '(above',(1.0-area),')' return area
def scale_loo_nll(): p = ProgBar() ms.scale_loo_nll(callback=p.callback) del p
t_x = int(t * s_x + (1-t) * e_x) t_y = int(t * s_y + (1-t) * e_y) try: if img[t_y,t_x,0] < t: img[t_y,t_x,:] = t except: pass img = array2cv(255.0 * img) cv.SaveImage('composite_draw.png', img) # Visualise the probability - both spatial and rotational in a single image, with one colour channel each for 3 directions... img = numpy.zeros((size, size, 3), dtype=numpy.float32) p = ProgBar() for y in xrange(size): p.callback(y, size) for index, orient_x, orient_y in [(0,1.0,0.0), (1,0.0,1.0), (2,-1.0,0.0)]: block = numpy.concatenate(((scale * y / float(size-1)) * numpy.ones(size).reshape((-1,1)), numpy.linspace(0.0, scale, size).reshape((-1,1)), orient_x * numpy.ones(size).reshape((-1,1)), orient_y * numpy.ones(size).reshape((-1,1))), axis=1) vals = ms.probs(block) img[y,:,index] = vals del p img *= 255 / img.max() img = array2cv(img) cv.SaveImage('composite_prob.png', img)
if not args.quiet: print 'Converting... (%i pixels at a time)' % step slices = map(lambda x: slice(x * step, (x + 1) * step), xrange(data.shape[0] // step + 1)) if slices[-1].stop < data.shape[0]: slices.append(slice(slices[-1].stop, data.shape[0])) ## Calculate each channel in turn... out = data.copy() for cc in xrange(3): if not args.quiet: print 'Converting - %s...' % (['red', 'green', 'blue'][cc]) p = ProgBar() for i, s in enumerate(slices): p.callback(i, len(slices)) out[s, cc] = model[cc](data[s, :].astype(numpy.float32)) del p ## Expand the data matrix back up to the order and length of the image, by expanding duplicates... source = numpy.cumsum(keep) - 1 out = out[source, :] out = out[numpy.argsort(index), :] ## Convert back from data matrix to image... out = out.reshape(image.shape) # Clamp unreasonable values, record where clamping occurs... if not args.quiet:
for i in xrange(angle_step): t = float(i) / (angle_step - 1) t_x = int(t * s_x + (1 - t) * e_x) t_y = int(t * s_y + (1 - t) * e_y) try: if img[t_y, t_x, 0] < t: img[t_y, t_x, :] = t except: pass img = array2cv(255.0 * img) cv.SaveImage('composite_draw.png', img) # Visualise the probability - both spatial and rotational in a single image, with one colour channel each for 3 directions... img = numpy.zeros((size, size, 3), dtype=numpy.float32) p = ProgBar() for y in xrange(size): p.callback(y, size) for index, orient_x, orient_y in [(0, 1.0, 0.0), (1, 0.0, 1.0), (2, -1.0, 0.0)]: block = numpy.concatenate( ((scale * y / float(size - 1)) * numpy.ones(size).reshape( (-1, 1)), numpy.linspace(0.0, scale, size).reshape( (-1, 1)), orient_x * numpy.ones(size).reshape( (-1, 1)), orient_y * numpy.ones(size).reshape( (-1, 1))), axis=1) vals = ms.probs(block)
assert (task in Pool.methods()) # Load the dataset... data = Iris1D() print 'Loaded %i examples' % data.getVectors().shape[0] # Make the output directory, killing any previous versions... try: shutil.rmtree(out_dir) except: pass os.makedirs(out_dir) # This calculates a suitable precision matrix to use... print 'Calculating loo optimal precision matrix for data set...' p = ProgBar() loo = PrecisionLOO() for i in xrange(data.getVectors().shape[0]): loo.addSample(numpy.reshape(data.getVectors()[i], (1, 1))) loo.solve(p.callback) precision = loo.getBest() del p print 'Optimal standard deviation = %s' % str(math.sqrt(1.0 / precision[0, 0])) # Create and fill the pool... print 'Filling the pool...' pool = Pool() p = ProgBar() for i in xrange(data.getVectors().shape[0]): p.callback(i, data.getVectors().shape[0])
dist = numpy.sqrt((data[i,0]-0.5)**2 + (data[i,1]-0.5)**2) * numpy.pi * 7.0 data[i,2] = (1.0+numpy.sin(dist)) / (6.0 + numpy.abs(numpy.sqrt(dist)-3.0)) i += 1 ms = MeanShift() ms.set_data(data, 'df', 2) ms.set_kernel('triangular') ms.set_spatial('kd_tree') # Choose a reasonable size... print 'Selecting size using loo:' p = ProgBar() ms.scale_loo_nll(callback = p.callback) del p # Plot the pdf, for reference... image = numpy.zeros((pixels, pixels, 3), dtype=numpy.float32) print 'Rendering probability map:' p = ProgBar() for row in xrange(pixels): p.callback(row, pixels) sam = numpy.append(numpy.linspace(0.0, 1.0, pixels).reshape((-1,1)), (row / float(pixels-1)) * numpy.ones(pixels).reshape((-1,1)), axis=1) image[row, :, :] = ms.probs(sam).reshape((-1,1)) del p
for name, alg in [('human_picked', lambda: ms.set_scale(numpy.array([5.0, 5.0]))), ('Silverman', ms.scale_silverman), ('Scott', ms.scale_scott), ('loo_nll', scale_loo_nll)]: # Calculate and print out the scales... print '<', name, '>' alg() print 'Scale:', ms.get_scale() print 'loo nll for this scale =', ms.loo_nll() mean, sd = ms.stats() print 'mean = (%f, %f); sd = (%f, %f)' % (mean[0], mean[1], sd[0], sd[1]) # Render out a normalised probability map... image = numpy.zeros((dim, dim, 3), dtype=numpy.float32) p = ProgBar() for row in xrange(dim): p.callback(row, dim) sam = numpy.append(numpy.linspace(-size, size, dim).reshape( (-1, 1)), ((row / (dim - 1.0) - 0.5) * 2.0 * size) * numpy.ones(dim).reshape((-1, 1)), axis=1) image[row, :, :] = ms.probs(sam).reshape((-1, 1)) del p print 'Largest sampled probability =', image.max() image *= 255.0 / image.max() image = array2cv(image) cv.SaveImage('bandwidth_%s.png' % name, image) print
def doTest(gen): # Train the model... df = DF() df.setGoal(Classification( 3, 1)) # 3 = # of classes, 1 = channel of truth for trainning. df.setGen(gen) pb = ProgBar() df.learn( 8, es, callback=pb.callback ) # 8 = number of trees to learn. dm is in channel 0, cat in channel 1. del pb # Drop some stats... print '%i trees containing %i nodes.\nAverage error is %.3f.' % ( df.size(), df.nodes(), df.error()) # Test... politician_success = 0 politician_prob = 0.0 res = df.evaluate(MatrixES(numpy.asarray(politician)), which=['prob', 'best']) for i in xrange(politician_test): dist, best = res[i] if 0 == best: politician_success += 1 politician_prob += dist[0] print 'Of %i politicians %i (%.1f%%) were correctly detected, with %.1f%% of total probability.' % ( politician_test, politician_success, 100.0 * politician_success / float(politician_test), 100.0 * politician_prob / politician_test) marketing_success = 0 marketing_prob = 0.0 res = df.evaluate(MatrixES(numpy.asarray(marketing)), which=['prob', 'best'], mp=False) for i in xrange(marketing_test): dist, best = res[i] if 1 == best: marketing_success += 1 marketing_prob += dist[1] print 'Of %i marketers %i (%.1f%%) were correctly detected, with %.1f%% of total probability.' % ( marketing_test, marketing_success, 100.0 * marketing_success / float(marketing_test), 100.0 * marketing_prob / marketing_test) tele_sales_success = 0 tele_sales_prob = 0.0 for i in xrange(tele_sales_test): dist, best = df.evaluate(MatrixES(tele_sales[i]), which=['prob', 'best'])[0] if 2 == best: tele_sales_success += 1 tele_sales_prob += dist[2] print 'Of %i tele-sellers %i (%.1f%%) were correctly detected, with %.1f%% of total probability.' % ( tele_sales_test, tele_sales_success, 100.0 * tele_sales_success / float(tele_sales_test), 100.0 * tele_sales_prob / tele_sales_test) total_success = politician_success + marketing_success + tele_sales_success total_test = politician_test + marketing_test + tele_sales_test print 'Combined success is %i out of %i (%.1f%%)' % ( total_success, total_test, 100.0 * total_success / float(total_test))
def doRun(tdc): # Create a corpus... c = lda.Corpus(4) c.setWordCount(identCount()*4) for i in xrange(tdc): dic, abn = genDoc() nDic = dict() for key,item in dic.iteritems(): nDic[key[0]*4+key[1]] = item doc = lda.Document(nDic) doc.abn = abn c.add(doc) # Fit a model... params = lda.Params() params.setRuns(16) print 'Fitting model...' p = ProgBar() c.fit(params,p.callback) del p tw = c.topicsWords() # Test on a bunch of documents, creating a list of abnormality score/actually an abnormality pairs... ab_gt = [] print 'Testing...' p = ProgBar() for i in xrange(testDocCount): p.callback(i,testDocCount) dic, abn = genDoc() nDic = dict() for key,item in dic.iteritems(): nDic[key[0]*4+key[1]] = item doc = lda.Document(nDic) doc.fit(tw) ab_gt.append((doc.negLogLikelihood(tw),abn)) del p ab_gt.sort(reverse=True) # Use the pairs to construct a roc... posCount = len(filter(lambda p:p[1]==True,ab_gt)) negCount = len(ab_gt) - posCount print 'positive samples = ',posCount print 'negative samples = ',negCount truePos = 0 falsePos = 0 trueNeg = negCount falseNeg = posCount roc = [] for p in ab_gt: if p[1]: truePos += 1 falseNeg -= 1 else: falsePos +=1 trueNeg -= 1 pnt = (float(falsePos)/float(falsePos+trueNeg), float(truePos)/float(truePos+falseNeg)) roc.append(pnt) # Save the roc to disk... if not sweep: f = open('junction_roc.txt','w') f.write('0.0 0.0\n') for pnt in roc: f.write('%f %f\n'%pnt) f.close() # Calculate and print out the area under the roc... area = 0.0 for i in xrange(1,len(roc)): area += 0.5*(roc[i-1][1]+roc[i][1]) * (roc[i][0]-roc[i-1][0]) print 'area under roc =',area, '(above',(1.0-area),')' return area
# Load the dataset... data = Iris1D() print 'Loaded %i examples'%data.getVectors().shape[0] # Make the output directory, killing any previous versions... try: shutil.rmtree(out_dir) except: pass os.makedirs(out_dir) # This calculates a suitable precision matrix to use... print 'Calculating loo optimal precision matrix for data set...' p = ProgBar() loo = PrecisionLOO() for i in xrange(data.getVectors().shape[0]): loo.addSample(numpy.reshape(data.getVectors()[i], (1,1))) loo.solve(p.callback) precision = loo.getBest() del p print 'Optimal standard deviation = %s'%str(math.sqrt(1.0/precision[0,0])) # Create and fill the pool... print 'Filling the pool...' pool = Pool()
def doRun(tdc): # Create directory to put images into... if not sweep: try: os.makedirs('junction') except: pass # Create a corpus... c = rlda.Corpus(10,4) c.setIdentWordCounts(identCount(),4) for i in xrange(tdc): dic, abn = genDoc(False) doc = rlda.Document(dic) doc.abn = abn c.add(doc) if not sweep: prob = numpy.zeros((6,6,4),dtype=numpy.float_) for key,item in dic.iteritems(): x,y = identToCoord(key[0]) prob[x,y,key[1]] = item multProb = 255.0/prob.max() img = cv.CreateImage((6*25,6*25),cv.IPL_DEPTH_32F,3) for y in xrange(6): for x in xrange(6): coords = [(x*25,y*25),((x+1)*25,y*25),((x+1)*25,(y+1)*25),(x*25,(y+1)*25)] centre = (x*25+12,y*25+12) for d in xrange(4): if d%2==0: col = cv.RGB(0.0,prob[x,y,d]*multProb,0.0) else: col = cv.RGB(prob[x,y,d]*multProb,0.0,0.0) cv.FillPoly(img, [(coords[d],coords[(d+1)%4],centre)], col) cv.SaveImage('junction/xdoc_%i_%s.png'%(i,str(abn)),img) # Fit a model... params = rlda.Params() params.setRuns(16) print 'Fitting model...' p = ProgBar() c.fit(params,p.callback) del p ir = c.getIR() wrt = c.getWRT() # Visualise the regions... if not sweep: mult = 255.0/ir.max() for r in xrange(ir.shape[1]): rend = numpy.zeros((6,6),dtype=numpy.float_) for i in xrange(ir.shape[0]): rend[identToCoord(i)] = ir[i,r] * mult rend = numpy.repeat(numpy.repeat(rend,25,axis=0),25,axis=1) cv.SaveImage('junction/region_%i.png'%r,array2cv(rend)) # Visualise the topics... if not sweep: for t in xrange(wrt.shape[2]): prob = numpy.zeros((6,6,4),dtype=numpy.float_) for i in xrange(ir.shape[0]): x,y = identToCoord(i) for r in xrange(wrt.shape[1]): for w in xrange(wrt.shape[0]): prob[x,y,w] += ir[i,r] * wrt[w,r,t] multProb = 255.0/prob.max() img = cv.CreateImage((6*25,6*25),cv.IPL_DEPTH_32F,3) for y in xrange(6): for x in xrange(6): coords = [(x*25,y*25),((x+1)*25,y*25),((x+1)*25,(y+1)*25),(x*25,(y+1)*25)] centre = (x*25+12,y*25+12) for d in xrange(4): if d%2==0: col = cv.RGB(0.0,prob[x,y,d]*multProb,0.0) else: col = cv.RGB(prob[x,y,d]*multProb,0.0,0.0) cv.FillPoly(img, [(coords[d],coords[(d+1)%4],centre)], col) cv.SaveImage('junction/topic_%i.png'%t,img) # Test on a bunch of documents, creating a list of abnormality score/actually an abnormality pairs... ab_gt = [] print 'Testing...' p = ProgBar() for i in xrange(testDocCount): p.callback(i,testDocCount) dic, abn = genDoc() doc = rlda.Document(dic) doc.fit(ir,wrt) ab_gt.append((doc.negLogLikeRegionVec().max(),abn)) del p ab_gt.sort(reverse=True) # Use the pairs to construct a roc... posCount = len(filter(lambda p:p[1]==True,ab_gt)) negCount = len(ab_gt) - posCount print 'positive samples = ',posCount print 'negative samples = ',negCount truePos = 0 falsePos = 0 trueNeg = negCount falseNeg = posCount roc = [] for p in ab_gt: if p[1]: truePos += 1 falseNeg -= 1 else: falsePos +=1 trueNeg -= 1 pnt = (float(falsePos)/float(falsePos+trueNeg), float(truePos)/float(truePos+falseNeg)) roc.append(pnt) # Save the roc to disk... if not sweep: f = open('junction_roc.txt','w') f.write('0.0 0.0\n') for pnt in roc: f.write('%f %f\n'%pnt) f.close() # Calculate and print out the area under the roc... area = 0.0 for i in xrange(1,len(roc)): area += 0.5*(roc[i-1][1]+roc[i][1]) * (roc[i][0]-roc[i-1][0]) print 'area under roc =',area, '(above',(1.0-area),')' return area
for kernel in kernels: print 'Processing', kernel # Create the four MeanShift objects... def to_ms(data): ms = MeanShift() ms.set_data(data, 'df') ms.set_kernel(kernel) ms.set_spatial('kd_tree') ms.quality = 1.0 return ms ms = map(to_ms, samples) # Infer a good loo value for the first one, then set them all to the same... p = ProgBar() ms[0].scale_loo_nll(callback=p.callback) del p for i in xrange(1, 4): ms[i].copy_scale(ms[0]) # Visualise the distributions using KDE... imgs = [] p = ProgBar() for i in xrange(4): p.callback(i, 4) img = numpy.zeros((draw_scale * size[0], draw_scale * size[1]), dtype=numpy.float32) sweep0 = numpy.linspace(0, size[0], img.shape[0])
# Setup the mean shift object... ms = MeanShift() ms.set_data(data, 'df') normal_kernels = ['uniform', 'triangular', 'epanechnikov', 'cosine', 'gaussian', 'cauchy', 'logistic'] ms.set_kernel(random.choice(normal_kernels)) ms.set_spatial('kd_tree') print 'kernel = %s' % ms.get_kernel() # Choose a reasonable size... print 'Selecting size using loo:' p = ProgBar() ms.scale_loo_nll(callback = p.callback) del p # Render out a normalised probability map... image = numpy.zeros((dim, dim, 3), dtype=numpy.float32) print 'Rendering probability map:' p = ProgBar() for row in xrange(dim): p.callback(row, dim) sam = numpy.append(numpy.linspace(-size, size, dim).reshape((-1,1)), ((row / (dim-1.0) - 0.5) * 2.0 * size) * numpy.ones(dim).reshape((-1,1)), axis=1) image[row, :, :] = ms.probs(sam).reshape((-1,1)) del p
from swood import SWood import test_model as mod # Tests the stocahstic woodland class on the model contained within test_model.py # Parameters... tree_count = 256 option_count = 4 # Get trainning data... int_dm, real_dm, cats, weight = mod.generate_train() # Train... p = ProgBar() sw = SWood(int_dm, real_dm, cats, tree_count=tree_count, option_count=option_count, weight=weight, callback=p.callback) del p print 'Out-of-bag success rate = %.2f%%' % (100.0 * sw.oob_success()) print # Test... mod.test(sw.classify)
dist = numpy.sqrt((data[i, 0] - 0.5)**2 + (data[i, 1] - 0.5)**2) * numpy.pi * 7.0 data[i, 2] = (1.0 + numpy.sin(dist)) / (6.0 + numpy.abs(numpy.sqrt(dist) - 3.0)) i += 1 ms = MeanShift() ms.set_data(data, 'df', 2) ms.set_kernel('triangular') ms.set_spatial('kd_tree') # Choose a reasonable size... print 'Selecting size using loo:' p = ProgBar() ms.scale_loo_nll(callback=p.callback) del p # Plot the pdf, for reference... image = numpy.zeros((pixels, pixels, 3), dtype=numpy.float32) print 'Rendering probability map:' p = ProgBar() for row in xrange(pixels): p.callback(row, pixels) sam = numpy.append(numpy.linspace(0.0, 1.0, pixels).reshape((-1, 1)), (row / float(pixels - 1)) * numpy.ones(pixels).reshape( (-1, 1)), axis=1) image[row, :, :] = ms.probs(sam).reshape((-1, 1))