Beispiel #1
0
    row = docs[i * docGridWidth:(i + 1) * docGridWidth]
    rowExt = []
    for r in row:
        rowExt.append(r)
        rowExt.append(
            numpy.zeros((gridSpace, gridScale * 5), dtype=numpy.float_))
    rowExt = rowExt[:-1]
    rows.append(numpy.vstack(rowExt))

stack = []
for r in rows:
    stack.append(r)
    stack.append(numpy.zeros((r.shape[0], gridSpace), dtype=numpy.float_))
stack = stack[:-1]
docImage = numpy.hstack(stack).T * 255.0
img = cvarray.array2cv(docImage)
cv.SaveImage('test_lines/docs.png', img)

# Train...
params = dhdp.Params()
params.runs = 1
params.samples = 1
#params.burnIn = 10000
#c.setOneCluster(True)
#c.setCalcBeta(True)

print 'Fitting model...'
p = ProgBar()
model = c.sampleModel(params, p.callback)
del p
        #print model.z.sum(axis=0)

        # Now plot the estimated distribution against the actual distribution...
        img = numpy.ones((height, width, 3))
        draw = model.sampleMixture()

        for px in xrange(width):
            x = float(px) / float(width) * (high - low) + low

            y_gt = 0.0
            for ii in xrange(len(gt)):
                y_gt += gt_weight[ii] * gt[ii].prob([x])
            y_gu = model.prob([x])
            y_gd = 0.0
            for ind, gauss in enumerate(draw[1]):
                y_gd += draw[0][ind] * gauss.prob([x])

            py_gt = int((1.0 - y_gt / scale) * height)
            py_gu = int((1.0 - y_gu / scale) * height)
            py_gd = numpy.clip(int((1.0 - y_gd / scale) * height), 0,
                               height - 1)

            img[py_gt, px, :] = [0.0, 1.0, 0.0]
            img[py_gu, px, :] = [1.0, 0.0, 0.0]
            img[py_gd, px, :] = [0.0, 0.0, 1.0]

        # Save plot out...
        img = cvarray.array2cv(img * 255.0)
        cv.SaveImage('%s/plot_%i.png' % (out_dir, i + 1), img)
        print
Beispiel #3
0
    image *= 255.0 / image.max()
    image = numpy.reshape(image, (5, 5))
    image = numpy.repeat(numpy.repeat(image, 5, axis=0), 5, axis=1)
    image = numpy.append(image,
                         numpy.atleast_2d(numpy.zeros(image.shape[1])),
                         axis=0)
    image = numpy.append(image,
                         numpy.atleast_2d(numpy.zeros(image.shape[0])).T,
                         axis=1)
    docImageSet.append(image)

docVertSet = []
for i in xrange(50):
    docVertSet.append(numpy.vstack(docImageSet[i * 20:(i + 1) * 20]))
docSet = numpy.hstack(docVertSet)
img = cvarray.array2cv(docSet)
cv.SaveImage('test_grid_docs.png', img)

# Train...
print 'Trainning...'
#p = ProgBar()
#passes = vlda.solve()
#del p
passes = vlda.solveHuman()
print 'Took %i passes' % passes

# Generate an image of the final distributions associated with the learned documents...
# Get pixel values...
tImages = []
for topic in xrange(vlda.numTopics()):
    # Get distribution...
  for px in xrange(width):
    x = float(px)/float(width) * (high-low) + low
     
    y_gt = 0.0
    for ii  in xrange(len(gt)):
      y_gt += gt_weight[ii] * gt[ii].prob([x])
    y_gu = model.prob([x])
    y_gd = 0.0
    for ind,gauss in enumerate(draw[1]):
      y_gd += draw[0][ind] * gauss.prob([x])
        
    py_gt = int((1.0 - y_gt/scale) * height)
    py_gu = int((1.0 - y_gu/scale) * height)
    py_gd = numpy.clip(int((1.0 - y_gd/scale) * height),0,height-1)

    img[py_gt,px,:] = [0.0,1.0,0.0]
    img[py_gu,px,:] = [1.0,0.0,0.0]
    img[py_gd,px,:] = [0.0,0.0,1.0]

  # Save plot out...
  img = cvarray.array2cv(img*255.0)
  cv.SaveImage('%s/plot_%i.png'%(out_dir,model.getStickCap()),img)
  print

  # Either finish or incriment the number of sticks for the next run...
  value = model.nllData()
  if prev==None or value<prev:
    prev = value
    model.incStickCap()
  else: break
Beispiel #5
0
# Save out the input documents for confirmation (50x20 grid)...
docImageSet = []
for words in inputImageSet:
  image = numpy.asfarray(words)
  image *= 255.0/image.max()
  image = numpy.reshape(image,(5,5))
  image = numpy.repeat(numpy.repeat(image,5,axis=0),5,axis=1)
  image = numpy.append(image,numpy.atleast_2d(numpy.zeros(image.shape[1])),axis=0)
  image = numpy.append(image,numpy.atleast_2d(numpy.zeros(image.shape[0])).T,axis=1)
  docImageSet.append(image)

docVertSet = []
for i in xrange(50):
  docVertSet.append(numpy.vstack(docImageSet[i*20:(i+1)*20]))
docSet = numpy.hstack(docVertSet)
img = cvarray.array2cv(docSet)
cv.SaveImage('test_grid_docs.png',img)



# Train...
print 'Trainning...'
#p = ProgBar()
#passes = vlda.solve()
#del p
passes = vlda.solveHuman()
print 'Took %i passes'%passes



# Generate an image of the final distributions associated with the learned documents...
Beispiel #6
0
for i in xrange(documentsToTrain//docGridWidth):
  row = docs[i*docGridWidth:(i+1)*docGridWidth]
  rowExt = []
  for r in row:
    rowExt.append(r)
    rowExt.append(numpy.zeros((gridSpace,gridScale*5), dtype=numpy.float_))
  rowExt = rowExt[:-1]
  rows.append(numpy.vstack(rowExt))

stack = []
for r in rows:
  stack.append(r)
  stack.append(numpy.zeros((r.shape[0],gridSpace), dtype=numpy.float_))
stack = stack[:-1]
docImage = numpy.hstack(stack).T * 255.0
img = cvarray.array2cv(docImage)
cv.SaveImage('test_lines/docs.png',img)



# Train...
params = dhdp.Params()
params.runs = 1
params.samples = 1
#params.burnIn = 10000
#c.setOneCluster(True)
#c.setCalcBeta(True)


print 'Fitting model...'
p = ProgBar()