コード例 #1
0
ファイル: ocr_toolkit.py プロジェクト: jeslin01/eMOP
def check_glyph_accent(item,glyph):
  """Check two glyphs for beeing grouped to one single character. This function is for unit connected-components like i, j or colon.

  Signature:
    ``check_glyph_accent(item,glyph)``

  with

    *item*:
      Some connected-component.

    *glyph*:
      Some connected-component.

  There is returned an array with two elements. The first element keeps a list of characters (images that has been united to a single image) and the second image is a list of characters which has to be removed as these have been united to a single character.
  """

  remove = []
  add = []
  result = []
  if(glyph.contains_x(item.ul_x) or glyph.contains_x(item.lr_x) or glyph.contains_x(item.center_x)): ##nebeinander?
    if(not(item.contains_y(glyph.ul_y) or item.contains_y(glyph.lr_y) or item.contains_y(glyph.center_y))): ##nicht y-dimensions ueberschneident
      remove.append(item)
      remove.append(glyph)
      new = union_images([item,glyph])
      add.append(new)
  result.append(add)		#result[0] == ADD
  result.append(remove)		#result[1] == REMOVE
  return result
コード例 #2
0
ファイル: barfinder.py プロジェクト: DDMAL/barlineFinder
 def __bar_candidate_grouping(ungrouped_bars):
     """
     Groups bar candidates and returns grouped bars
     """
     grouped_bars = []
     grouped_bars.append(image_utilities.union_images(ungrouped_bars))
     return grouped_bars
コード例 #3
0
    def _vector_process_f_clef(self, glyph):
        # use only right portion of f clefs when getting center_of_mass
        g = None

        for gl in glyph.splitx(0.5):
            gl_coords = self._convert_bb_to_coords(self._get_glyph_bb(gl))

            # no g
            if not g:
                g = gl

            # left of g
            elif gl.offset_x + gl.ncols < g.offset_x:
                pass

            # x-intersecting g
            elif self._x_intersecting_coords(gl_coords, g.offset_x,
                                             g.offset_x + g.ncols, 0):
                g = union_images([g, gl])

            # right of g
            elif gl.offset_x >= g.offset_x + g.ncols:
                g = gl

        y_add = g.offset_y - glyph.offset_y

        return g, y_add
コード例 #4
0
    def _remove_high_density_CCs(self, image, bg):
        CCs = image.cc_analysis()
        new_CCs = []

        for g in CCs:
            # throw away glyphs
            if g.compactness()[0] >= self.density:
                continue
            else:
                new_CCs.append(g)

        combine = union_images([bg] + new_CCs)
        return combine
コード例 #5
0
ファイル: ocr_toolkit.py プロジェクト: jeslin01/eMOP
def check_upper_neighbors(item,glyph,line):
  """Check for small signs grouped beside each other like quotation marks.

  Signature:
    
    ``check_upper_neighbors(item,glyph,line)``

  with

    *item*:
      Some connected-component.

    *glyph*:
      Some connected-component.

    *line*:
      The ``Textline`` Object which includes ``item`` and ``glyph``

Returns an array with two elements. The first element keeps a list of
characters (images that has been united to a single image) and the
second image is a list of characters which has to be removed as
these have been united to a single character.
  """

  remove = []
  add = []
  result = []
  minheight = min([item.nrows,glyph.nrows])
  # glyphs must be small, of similar size and on the same height
  if(not(glyph.lr_y >= line.center_y and glyph.lr_y-(glyph.nrows/3) <= line.lr_y)): 
    if (glyph.contains_y(item.center_y) and item.contains_y(glyph.center_y)):
      minwidth = min([item.ncols,glyph.ncols])
      distance = item.lr_x - glyph.lr_x
      if(distance > 0 and distance <= minwidth*3):
	remove.append(item)
	remove.append(glyph)
	new = union_images([item,glyph])
	add.append(new)
  result.append(add) 		#result[0] == ADD
  result.append(remove)		#result[1] == REMOVE
  return result
コード例 #6
0
ファイル: ocr_toolkit.py プロジェクト: jeslin01/eMOP
def get_line_glyphs(image,textlines):
  """Splits image regions representing text lines into characters.

Signature:

    ``get_line_glyphs (image, segments)``

with

    *image*:
      The document image that is to be further segmentated. It must contin the
      same underlying image data as the second argument *segments*

    *segments*:
      A list ``Cc`` data types, each of which represents a text line region.
      The image views must correspond to *image*, i.e. each pixels has a value
      that is the unique label of the text line it belongs to. This is the
      interface used by the plugins in the \"PageSegmentation\" section of the
      Gamera core.

The result is returned as a list of Textline_ objects.

.. _Textline: gamera.toolkits.ocr.classes.Textline.html

"""

  i=0
  show = []
  lines = []
  ret,sub_ccs = image.sub_cc_analysis(textlines)

  for ccs in sub_ccs:
    line_bbox = Rect(textlines[i])
    i = i + 1
    glyphs = ccs[:]
    newlist = []

    remove = []
    add = []
    result = []
    glyphs.sort(lambda x,y: cmp(x.ul_x, y.ul_x))
    for position, item in enumerate(glyphs):
      if(True):
      #if(not(glyph.lr_y >= line_bbox.center_y and glyph.lr_y-(glyph.nrows/3) <= line_bbox.lr_y)):  ## is this part of glyph higher then line.center_y ?

        left = position - 2
        if(left < 0):
          left = 0
        right = position + 2
        if(right > len(glyphs)):
          right = len(glyphs)	
        checklist = glyphs[left:right]

        for glyph in checklist:
          if (item == glyph):
            continue

          result = check_upper_neighbors(glyph,item,line_bbox)
          if(len(result[0]) > 0):  #something has been joind...
            joind_upper_connection = result[0][0]   #joind glyph
            add.append(joind_upper_connection)
            remove.append(result[1][0])	     #first part of joind one
            remove.append(result[1][1])	     #second part of joind one
            for glyph2 in checklist: #maybe the upper joind glyphs fits to a glyph below...
              if(glyphs == joind_upper_connection):
                continue
              if(joind_upper_connection.contains_x(glyph2.center_x)):   #fits for example on ae, oe, ue in german alph
                new = union_images([glyph2,joind_upper_connection])
                add.append(new)
                remove.append(glyph2)
                add.remove(joind_upper_connection)
                break
        for elem in remove:
          if (elem in checklist):
            checklist.remove(elem)

        for glyph in checklist:
          if(item == glyph):
            continue

          result = check_glyph_accent(item,glyph)
          if(len(result[0]) > 0):  #something has been joind...
            add.append(result[0][0])   #joind glyph
            remove.append(result[1][0])	     #first part of joind one
            remove.append(result[1][1])	     #second part of joind one

    for elem in remove:
      if(elem in glyphs):
	glyphs.remove(elem)
    for elem in add:
      glyphs.append(elem)

    new_line = Textline(line_bbox)
    final = []
    if(len(glyphs) > 0):
      for glyph in glyphs:
        final.append(glyph)

    new_line.add_glyphs(final,False)
    new_line.sort_glyphs()  #reading order -- from left to right
    lines.append(new_line)

    for glyph in glyphs:
      show.append(glyph)

  return lines
コード例 #7
0
    image_bin.reset_onebit_image()

    # find likely rotation angle and correct
    angle, tmp = image_bin.rotation_angle_projections()
    image_bin = image_bin.rotate(angle=angle)

    for i in range(filter_runs):
        image_bin.filter_short_runs(filter_runs_amt, 'black')
        image_bin.filter_narrow_runs(filter_runs_amt, 'black')

    return image_bin


if __name__ == '__main__':

    manuscript = 'stgall390'
    all_files = os.listdir('./png/')
    fnames = [x for x in all_files if 'text.png' in x and manuscript in x]

    for filename in fnames:
        print('processing ' + filename + '...')

        raw_image = gc.load_image('./png/' + filename)
        image, eroded, angle = preproc.preprocess_images(raw_image,
                                                         despeckle_amt=20,
                                                         filter_runs=0)
        line_strips, lines_peak_locs, proj = preproc.identify_text_lines(
            image, eroded)
        unioned_lines = union_images(line_strips)
        unioned_lines.save_image('cleaned_' + filename)