Example #1
0
def run():

  for r,d,fs in os.walk(os.path.join(att_dir,'tmp')):
    null = [ os.remove(os.path.join(r,f)) for f in fs]
  attachments.check_att(att_dir)
  pdfs = dict([(os.path.join(att_dir,'tmp/tmp_{0:04d}.pdf'.format(idx)), \
                  os.path.join(att_dir,f)) 
               for idx, f in enumerate(os.listdir(att_dir)) if '.pdf' in f.lower()])
  for k,v in pdfs.iteritems(): os.rename(v,k)

  #Convert PDFs
  res = 300
  cvsub = ['''convert -density {2} {0} {1}; rm {0}'''.\
             format(p , p.replace('.pdf','.png'), res) 
           for p in pdfs.keys()]
  for c in cvsub: 
    print 'calling for ' +  c
    subprocess.call(c,shell = True)

  #Find all files produced by convert
  inp_files=[ os.path.join(att_dir,'tmp',e) for e in 
              it.chain(\
      *[filter( lambda x: os.path.splitext(os.path.basename(key))[0]\
                  in x and True or False,
                os.listdir(os.path.join(att_dir,'tmp')))
        for key in  pdfs.keys()])]
  
  bluechannels = []
  x_inches = .25
  y_inches = .15
  skip = 5

  #open them and get the blue channels
  for i in inp_files:
    full = mpimg.imread(i)
    
    if isflipped(full):
      full = transpose(full,(1,0,2))

    blue= squeeze(full[:,:,2])
    others = np.sum(full[:,:,0:2],2)
    blue = blue - others
    blue[less(blue,0)] = 0.
    blue[greater(blue,0)] = 1.
    xrad = floor(res/skip*x_inches)
    yrad = floor(res/skip*y_inches)


    bluesmall = blue[::skip,::skip]
    bluesmall = ss.order_filter( bluesmall,  ones((xrad,1)), xrad-1)
    bluesmall = ss.order_filter( bluesmall,  ones((1,yrad)), yrad-1)

    #FLIP THE CLUSTERS JUST TO BE CONFUSING
    cls = cluster_img(bluesmall)
    cls = [cl.T * 5 for cl in cls]
      
    root = os.path.join(att_dir,'pages')
    if not os.path.isdir(root): os.mkdir(root)
    num_max = max(array([0] + list(it.chain(*[re.findall(re.compile('[\d]+'),f) for f in os.listdir(root)])),int))
    this_folder = os.path.join(root,'page_{0:05d}'.format(num_max+1))
    os.mkdir(this_folder)
    
    Image.fromarray(transpose(array(blue*255,dtype=np.uint8),(1,0))).save(open(os.path.join(this_folder,'blue.png'),'w'))
    Image.fromarray(transpose(array(full*255,dtype=np.uint8),(1,0,2))).save(open(os.path.join(this_folder,'full.png'),'w'))
    
    outline_folder = os.path.join(this_folder,'highlights')
    for idx,c in enumerate(cls):
      bounds =array([ np.min(c[0]),np.min(c[1]),np.max(c[0]),np.max(c[1])])
      b0 = array(bounds)
      bounds =bounds + 100 *array( [-1,-1,1,1] )
      clip_bounds(bounds, shape(blue.T))
      subimg = full[bounds[0]:bounds[2], bounds[1]:bounds[3]]
      coords = { 'full_bounds':list(bounds),
               'cluster_bounds':list(b0)}
      Image.fromarray(transpose(array(subimg*255,dtype=np.uint8),(1,0,2))).save(open(os.path.join(this_folder,'hl_{0:02d}.png').format(idx),'w'))
      fopen = open('hl_{0:02d}.txt'.format(idx), 'w')
      fopen.write(simplejson.dumps(coords))
      fopen.close()
Example #2
0
def get_all():
  attachments.check_att(att_dir )
Example #3
0
def get_all():
    attachments.check_att(att_dir)