Python get_connected_componentsの例、connected_components.get_connected_components Pythonの例

コード例 #1

0

ファイルを表示

def cleaned2segmented(cleaned, average_size):
    vertical_smoothing_threshold = defaults.VERTICAL_SMOOTHING_MULTIPLIER * average_size
    horizontal_smoothing_threshold = defaults.HORIZONTAL_SMOOTHING_MULTIPLIER * average_size
    (h, w) = cleaned.shape[:2]
    if arg.boolean_value('verbose'):
        print 'Applying run length smoothing with vertical threshold ' + str(vertical_smoothing_threshold) \
        +' and horizontal threshold ' + str(horizontal_smoothing_threshold)
    run_length_smoothed = rls.RLSO(cv2.bitwise_not(cleaned),
                                   vertical_smoothing_threshold,
                                   horizontal_smoothing_threshold)
    components = cc.get_connected_components(run_length_smoothed)
    text = np.zeros((h, w), np.uint8)
    #text_columns = np.zeros((h,w),np.uint8)
    #text_rows = np.zeros((h,w),np.uint8)
    for component in components:
        seg_thresh = arg.integer_value('segment_threshold', default_value=1)
        (aspect, v_lines,
         h_lines) = ocr.segment_into_lines(cv2.bitwise_not(cleaned),
                                           component,
                                           min_segment_threshold=seg_thresh)
        if len(v_lines) < 2 and len(h_lines) < 2: continue

        ocr.draw_2d_slices(text, [component], color=255, line_size=-1)
        #ocr.draw_2d_slices(text_columns,v_lines,color=255,line_size=-1)
        #ocr.draw_2d_slices(text_rows,h_lines,color=255,line_size=-1)
    return text

コード例 #2

0

ファイルを表示

ファイル: furigana.py プロジェクト: aristotll/MangaTextDetection

def estimate_furigana(img, segmentation):
  (w,h)=img.shape[:2]

  if arg.boolean_value('verbose'):
    print 'Estimateding furigana in ' + str(h) + 'x' + str(w) + ' image.'

  text_areas = segmentation

  #form binary image from grayscale
  binary_threshold = arg.integer_value('binary_threshold',default_value=defaults.BINARY_THRESHOLD)
  if arg.boolean_value('verbose'):
    print 'binarizing images with threshold value of ' + str(binary_threshold)
  binary = clean.binarize(img,threshold=binary_threshold)

  binary_average_size = cc.average_size(binary)
  if arg.boolean_value('verbose'):
    print 'average cc size for binaryized grayscale image is ' + str(binary_average_size)

  #apply mask and return images
  text_mask = binary_mask(text_areas)
  cleaned = cv2.bitwise_not(text_mask*binary)
  cleaned_average_size = cc.average_size(cleaned)
  if arg.boolean_value('verbose'):
    print 'average cc size for cleaned, binaryized grayscale image is ' + str(cleaned_average_size)

  columns = scipy.ndimage.filters.gaussian_filter(cleaned,(defaults.FURIGANA_VERTICAL_SIGMA_MULTIPLIER*binary_average_size,defaults.FURIGANA_HORIZONTAL_SIGMA_MULTIPLIER*binary_average_size))
  columns = clean.binarize(columns,threshold=defaults.FURIGANA_BINARY_THRESHOLD)
  furigana = columns*text_mask

  #go through the columns in each text area, and:
  #1) Estimate the standard column width (it should be similar to the average connected component width)
  #2) Separate out those columns which are significantly thinner (>75%) than the standard width
  boxes = cc.get_connected_components(furigana)
  furigana_lines = []
  non_furigana_lines = []
  lines_general = []
  for box in boxes:
    line_width = cc_width(box)
    line_to_left = find_cc_to_left(box, boxes, max_dist=line_width*defaults.FURIGANA_DISTANCE_MULTIPLIER)
    if line_to_left is None:
      non_furigana_lines.append(box)
      continue

    left_line_width = cc_width(line_to_left)
    if line_width < left_line_width * defaults.FURIGANA_WIDTH_THRESHOLD:
      furigana_lines.append(box)
    else:
      non_furigana_lines.append(box)

  furigana_mask = np.zeros(furigana.shape)
  for f in furigana_lines:
    furigana_mask[f[0].start:f[0].stop,f[1].start:f[1].stop]=255
    #furigana_mask[f]=1

  furigana = furigana_mask #furigana * furigana_mask

  if arg.boolean_value('debug'):
    furigana = 0.25*(columns*text_mask) + 0.25*img + 0.5*furigana

  return furigana

コード例 #3

0

ファイルを表示

def filter_text_like_areas(img, segmentation, average_size):
    #see if a given rectangular area (2d slice) is very text like
    #First step is to estimate furigana like elements so they can be masked
    furigana_areas = furigana.estimate_furigana(img, segmentation)
    furigana_mask = np.array(furigana_areas == 0, 'B')

    #binarize the image, clean it via the segmentation and remove furigana too
    binary_threshold = arg.integer_value(
        'binary_threshold', default_value=defaults.BINARY_THRESHOLD)
    if arg.boolean_value('verbose'):
        print 'binarizing images with threshold value of ' + str(
            binary_threshold)
    binary = clean.binarize(img, threshold=binary_threshold)

    binary_average_size = cc.average_size(binary)
    if arg.boolean_value('verbose'):
        print 'average cc size for binaryized grayscale image is ' + str(
            binary_average_size)
    segmentation_mask = np.array(segmentation != 0, 'B')
    cleaned = binary * segmentation_mask * furigana_mask
    inv_cleaned = cv2.bitwise_not(cleaned)

    areas = cc.get_connected_components(segmentation)
    text_like_areas = []
    nontext_like_areas = []
    for area in areas:
        #if area_is_text_like(cleaned, area, average_size):
        if text_like_histogram(cleaned, area, average_size):
            text_like_areas.append(area)
        else:
            nontext_like_areas.append(area)

    return (text_like_areas, nontext_like_areas)

コード例 #4

0

ファイルを表示

def estimate_furigana(img, segmentation):
  (w,h)=img.shape[:2]

  if arg.boolean_value('verbose'):
    print('Estimateding furigana in ' + str(h) + 'x' + str(w) + ' image.')

  text_areas = segmentation

  #form binary image from grayscale
  binary_threshold = arg.integer_value('binary_threshold',default_value=defaults.BINARY_THRESHOLD)
  if arg.boolean_value('verbose'):
    print('binarizing images with threshold value of ' + str(binary_threshold))
  binary = clean.binarize(img,threshold=binary_threshold)

  binary_average_size = cc.average_size(binary)
  if arg.boolean_value('verbose'):
    print('average cc size for binaryized grayscale image is ' + str(binary_average_size))

  #apply mask and return images
  text_mask = binary_mask(text_areas)
  cleaned = cv2.bitwise_not(text_mask*binary)
  cleaned_average_size = cc.average_size(cleaned)
  if arg.boolean_value('verbose'):
    print('average cc size for cleaned, binaryized grayscale image is ' + str(cleaned_average_size))

  columns = scipy.ndimage.filters.gaussian_filter(cleaned,(defaults.FURIGANA_VERTICAL_SIGMA_MULTIPLIER*binary_average_size,defaults.FURIGANA_HORIZONTAL_SIGMA_MULTIPLIER*binary_average_size))
  columns = clean.binarize(columns,threshold=defaults.FURIGANA_BINARY_THRESHOLD)
  furigana = columns*text_mask

  #go through the columns in each text area, and:
  #1) Estimate the standard column width (it should be similar to the average connected component width)
  #2) Separate out those columns which are significantly thinner (>75%) than the standard width
  boxes = cc.get_connected_components(furigana)
  furigana_lines = []
  non_furigana_lines = []
  lines_general = []
  for box in boxes:
    line_width = cc_width(box)
    line_to_left = find_cc_to_left(box, boxes, max_dist=line_width*defaults.FURIGANA_DISTANCE_MULTIPLIER)
    if line_to_left is None:
      non_furigana_lines.append(box)
      continue

    left_line_width = cc_width(line_to_left)
    if line_width < left_line_width * defaults.FURIGANA_WIDTH_THRESHOLD:
      furigana_lines.append(box)
    else:
      non_furigana_lines.append(box)

  furigana_mask = np.zeros(furigana.shape)
  for f in furigana_lines:
    furigana_mask[f[0].start:f[0].stop,f[1].start:f[1].stop]=255
    #furigana_mask[f]=1

  furigana = furigana_mask #furigana * furigana_mask

  if arg.boolean_value('debug'):
    furigana = 0.25*(columns*text_mask) + 0.25*img + 0.5*furigana

  return furigana

コード例 #5

0

ファイルを表示

def get_connected_components(img):
    components = cc.get_connected_components(img)
    components = scale_components(components)
    connected = UnionFind()
    for comp1 in components:
        for comp2 in components:
            if overlaps(comp1, comp2):
                connected.union((comp1[0].start, comp1[0].stop, comp1[1].start,
                                 comp1[1].stop),
                                (comp2[0].start, comp2[0].stop, comp2[1].start,
                                 comp2[1].stop))

    connected_sets = {}
    for child, parent in connected.parents.iteritems():
        if parent not in connected_sets:
            connected_sets[parent] = []
        connected_sets[parent].append(child)

    connected_comps = []

    for a, comps in connected_sets.iteritems():
        min_y = min(comps, key=lambda item: item[0])[0]
        max_y = max(comps, key=lambda item: item[1])[1]
        min_x = min(comps, key=lambda item: item[2])[2]
        max_x = max(comps, key=lambda item: item[3])[3]
        connected_comps.append((slice(min_y, max_y), slice(min_x, max_x)))
    return connected_comps

コード例 #6

0

ファイルを表示

ファイル: segmentation.py プロジェクト: aristotll/MangaTextDetection

def filter_text_like_areas(img, segmentation, average_size):
  #see if a given rectangular area (2d slice) is very text like
  #First step is to estimate furigana like elements so they can be masked
  furigana_areas = furigana.estimate_furigana(img, segmentation)
  furigana_mask = np.array(furigana_areas==0,'B')
  
  #binarize the image, clean it via the segmentation and remove furigana too
  binary_threshold = arg.integer_value('binary_threshold',default_value=defaults.BINARY_THRESHOLD)
  if arg.boolean_value('verbose'):
    print 'binarizing images with threshold value of ' + str(binary_threshold)
  binary = clean.binarize(img,threshold=binary_threshold)

  binary_average_size = cc.average_size(binary)
  if arg.boolean_value('verbose'):
    print 'average cc size for binaryized grayscale image is ' + str(binary_average_size)
  segmentation_mask = np.array(segmentation!=0,'B')
  cleaned = binary * segmentation_mask * furigana_mask
  inv_cleaned = cv2.bitwise_not(cleaned)

  areas = cc.get_connected_components(segmentation)
  text_like_areas = []
  nontext_like_areas = []
  for area in areas:
    #if area_is_text_like(cleaned, area, average_size):
    if text_like_histogram(cleaned, area, average_size):
      text_like_areas.append(area)
    else:
      nontext_like_areas.append(area)

  return (text_like_areas, nontext_like_areas)

コード例 #7

0

ファイルを表示

ファイル: segmentation.py プロジェクト: luis-wang/MangaTextDetection

def cleaned2segmented(cleaned, average_size):
    "cleaned是已经把图像中的字细化了"
    
    vertical_smoothing_threshold = defaults.VERTICAL_SMOOTHING_MULTIPLIER*average_size
    horizontal_smoothing_threshold = defaults.HORIZONTAL_SMOOTHING_MULTIPLIER*average_size
    
    (h,w) = cleaned.shape[:2]
    
    if arg.boolean_value('verbose'):
        print 'Applying run length smoothing with vertical threshold ' + str(vertical_smoothing_threshold) \
        +' and horizontal threshold ' + str(horizontal_smoothing_threshold)
    run_length_smoothed = rls.RLSO( cv2.bitwise_not(cleaned), vertical_smoothing_threshold, horizontal_smoothing_threshold)
    components = cc.get_connected_components(run_length_smoothed)
    text = np.zeros((h,w),np.uint8)
    #text_columns = np.zeros((h,w),np.uint8)
    #text_rows = np.zeros((h,w),np.uint8)
    for component in components:
        seg_thresh = arg.integer_value('segment_threshold',default_value=1)
        (aspect, v_lines, h_lines) = ocr.segment_into_lines(cv2.bitwise_not(cleaned), component,min_segment_threshold=seg_thresh)
        if len(v_lines)<2 and len(h_lines)<2:continue
        
        ocr.draw_2d_slices(text,[component],color=255,line_size=-1)
        #ocr.draw_2d_slices(text_columns,v_lines,color=255,line_size=-1)
        #ocr.draw_2d_slices(text_rows,h_lines,color=255,line_size=-1)
    return text

コード例 #8

0

ファイルを表示

def main():
  parser = arg.parser
  parser = argparse.ArgumentParser(description='Basic OCR on raw manga scan.')
  parser.add_argument('infile', help='Input (color) raw Manga scan image to clean.')
  parser.add_argument('-o','--output', dest='outfile', help='Output (color) cleaned raw manga scan image.')
  parser.add_argument('-v','--verbose', help='Verbose operation. Print status messages during processing', action="store_true")
  #parser.add_argument('-d','--debug', help='Overlay input image into output.', action="store_true")
  parser.add_argument('--sigma', help='Std Dev of gaussian preprocesing filter.',type=float,default=None)
  parser.add_argument('--binary_threshold', help='Binarization threshold value from 0 to 255.',type=int,default=defaults.BINARY_THRESHOLD)
  parser.add_argument('--furigana', help='Attempt to suppress furigana characters to improve OCR.', action="store_true")
  parser.add_argument('--segment_threshold', help='Threshold for nonzero pixels to separete vert/horiz text lines.',type=int,default=defaults.SEGMENTATION_THRESHOLD)
  
  arg.value = parser.parse_args()
  
  infile = arg.string_value('infile')
  outfile = arg.string_value('outfile', default_value=infile + '.html')

  if not os.path.isfile(infile):
    print ('Please provide a regular existing input file. Use -h option for help.')
    sys.exit(-1)

  if arg.boolean_value('verbose'):
    print ('\tProcessing file ' + infile)
    print ('\tGenerating output ' + outfile)

  img = cv2.imread(infile)
  gray = clean.grayscale(img)
  binary = clean.binarize(gray)

  segmented = segmentation.segment_image_file(infile)

  components = cc.get_connected_components(segmented)

  #perhaps do more strict filtering of connected components because sections of characters
  #will not be dripped from run length smoothed areas? Yes. Results quite good.
  #filtered = cc.filter_by_size(img,components,average_size*100,average_size*1)

  blurbs = ocr_on_bounding_boxes(binary, components)
  for blurb in blurbs:
    print (str(blurb.x)+','+str(blurb.y)+' '+str(blurb.w)+'x'+str(blurb.h)+' '+ str(blurb.confidence)+'% :'+ blurb.text)

コード例 #9

0

ファイルを表示

ファイル: ocr.py プロジェクト: aristotll/MangaTextDetection

def main():
  parser = arg.parser
  parser = argparse.ArgumentParser(description='Basic OCR on raw manga scan.')
  parser.add_argument('infile', help='Input (color) raw Manga scan image to clean.')
  parser.add_argument('-o','--output', dest='outfile', help='Output (color) cleaned raw manga scan image.')
  parser.add_argument('-v','--verbose', help='Verbose operation. Print status messages during processing', action="store_true")
  #parser.add_argument('-d','--debug', help='Overlay input image into output.', action="store_true")
  parser.add_argument('--sigma', help='Std Dev of gaussian preprocesing filter.',type=float,default=None)
  parser.add_argument('--binary_threshold', help='Binarization threshold value from 0 to 255.',type=int,default=defaults.BINARY_THRESHOLD)
  parser.add_argument('--furigana', help='Attempt to suppress furigana characters to improve OCR.', action="store_true")
  parser.add_argument('--segment_threshold', help='Threshold for nonzero pixels to separete vert/horiz text lines.',type=int,default=defaults.SEGMENTATION_THRESHOLD)
  
  arg.value = parser.parse_args()
  
  infile = arg.string_value('infile')
  outfile = arg.string_value('outfile', default_value=infile + '.html')

  if not os.path.isfile(infile):
    print 'Please provide a regular existing input file. Use -h option for help.'
    sys.exit(-1)

  if arg.boolean_value('verbose'):
    print '\tProcessing file ' + infile
    print '\tGenerating output ' + outfile

  img = cv2.imread(infile)
  gray = clean.grayscale(img)
  binary = clean.binarize(gray)

  segmented = segmentation.segment_image_file(infile)

  components = cc.get_connected_components(segmented)

  #perhaps do more strict filtering of connected components because sections of characters
  #will not be dripped from run length smoothed areas? Yes. Results quite good.
  #filtered = cc.filter_by_size(img,components,average_size*100,average_size*1)

  blurbs = ocr_on_bounding_boxes(binary, components)
  for blurb in blurbs:
    print str(blurb.x)+','+str(blurb.y)+' '+str(blurb.w)+'x'+str(blurb.h)+' '+ str(blurb.confidence)+'% :'+ blurb.text

コード例 #10

0

ファイルを表示

ファイル: test_connected_components.py プロジェクト: stephtzhang/algorithms

    def test_get_connected_components(self):
        # input graph:
        # 1---2   3---4   9
        # | X |   | X |
        # 5---6   7---8

        g = {
            1: [2,5,6],
            2: [1,5,6],
            6: [5,1,2],
            5: [1,2,6],
            3: [7,8,4],
            4: [3,7,8],
            8: [7,3,4],
            7: [3,4,8],
            9: []
        }

        res = get_connected_components(g)
        for component in res:
            component.sort()
        assert [1, 2, 5, 6] in res
        assert [3, 4, 7, 8] in res
        assert [9] in res

コード例 #11

0

ファイルを表示

ファイル: LocateText.py プロジェクト: zlita/MangaTextDetection

    if not os.path.isfile(infile):
        print(
            'Please provide a regular existing input file. Use -h option for help.'
        )
        sys.exit(-1)
    img = cv2.imread(infile)
    gray = clean.grayscale(img)

    binary_threshold = arg.integer_value(
        'binary_threshold', default_value=defaults.BINARY_THRESHOLD)
    if arg.boolean_value('verbose'):
        print('Binarizing with threshold value of ' + str(binary_threshold))
    inv_binary = cv2.bitwise_not(
        clean.binarize(gray, threshold=binary_threshold))
    binary = clean.binarize(gray, threshold=binary_threshold)

    segmented_image = seg.segment_image(gray)
    segmented_image = segmented_image[:, :, 2]
    components = cc.get_connected_components(segmented_image)
    cc.draw_bounding_boxes(img, components, color=(255, 0, 0), line_size=2)

    imsave(outfile, img)

    if arg.boolean_value('display'):
        cv2.imshow('segmented_image', segmented_image)

        if cv2.waitKey(0) == 27:
            cv2.destroyAllWindows()
        cv2.destroyAllWindows()

コード例 #12

0

ファイルを表示

def text_like_histogram(img, area, average_size):
    if not arg.boolean_value('additional_filtering'):
        return True
    (x, y, w, h) = dimensions_2d_slice(area)
    x_subimage = np.copy(img)
    x_histogram = np.zeros(w, int)
    y_subimage = np.copy(img)
    y_histogram = np.zeros(h, int)

    aoi = img[area]

    ccs = cc.get_connected_components(aoi)
    if (len(ccs) < 2):
        return False

    #avg = average_size
    avg = cc.average_size(aoi)
    mean_width = cc.mean_width(aoi)
    mean_height = cc.mean_height(aoi)
    if arg.boolean_value('verbose'):
        print 'average size = ' + str(avg) + ' mean width = ' + str(
            mean_width) + ' mean height = ' + str(mean_height)
    if math.isnan(avg) or avg == 0:
        if arg.boolean_value('verbose'):
            print 'Rejecting area since average size is NaN'
        #return False

    #in a text area, the average size of a blob (cc) will reflect
    #that of the used characters/typeface. Thus if there simply aren't
    #enough pixels per character, we can drop this as a text candidate
    #note the following is failing in odd situations, probably due to incorrect
    #calculation of 'avg size'
    #TODO: replace testing against "average size" with testing against
    #hard thresholds for connected component width and height. i.e.
    #if they're all thin small ccs, we can drop this area

    #if avg < defaults.MINIMUM_TEXT_SIZE_THRESHOLD:
    if mean_width < defaults.MINIMUM_TEXT_SIZE_THRESHOLD or \
      mean_height < defaults.MINIMUM_TEXT_SIZE_THRESHOLD:
        if arg.boolean_value('verbose'):
            print 'Rejecting area since average width or height is less than threshold.'
        return False

    #check the basic aspect ratio of the ccs
    if mean_width / mean_height < 0.5 or mean_width / mean_height > 2:
        if arg.boolean_value('verbose'):
            print 'Rejecting area since mean cc aspect ratio not textlike.'
        return False

    width_multiplier = float(avg)
    height_multiplier = float(avg)

    #gaussian filter the subimages in x,y directions to emphasise peaks and troughs
    x_subimage = scipy.ndimage.filters.gaussian_filter(
        x_subimage, (0.01 * width_multiplier, 0))
    y_subimage = scipy.ndimage.filters.gaussian_filter(
        y_subimage, (0, 0.01 * height_multiplier))

    #put together the histogram for black pixels over the x directon (along columns) of the component
    for i, col in enumerate(range(x, x + w)):
        black_pixel_count = np.count_nonzero(y_subimage[y:y + h, col])
        x_histogram[i] = black_pixel_count

    #and along the y direction (along rows)
    for i, row in enumerate(range(y, y + h)):
        black_pixel_count = np.count_nonzero(x_subimage[row, x:x + w])
        y_histogram[i] = black_pixel_count

    h_white_runs = get_white_runs(x_histogram)
    num_h_white_runs = len(h_white_runs)
    h_black_runs = get_black_runs(x_histogram)
    num_h_black_runs = len(h_black_runs)
    (h_spacing_mean, h_spacing_variance) = slicing_list_stats(h_white_runs)
    (h_character_mean, h_character_variance) = slicing_list_stats(h_black_runs)
    v_white_runs = get_white_runs(y_histogram)
    num_v_white_runs = len(v_white_runs)
    v_black_runs = get_black_runs(y_histogram)
    num_v_black_runs = len(v_black_runs)
    (v_spacing_mean, v_spacing_variance) = slicing_list_stats(v_white_runs)
    (v_character_mean, v_character_variance) = slicing_list_stats(v_black_runs)

    if arg.boolean_value('verbose'):
        print 'x ' + str(x) + ' y ' + str(y) + ' w ' + str(w) + ' h ' + str(h)
        print 'white runs ' + str(len(h_white_runs)) + ' ' + str(
            len(v_white_runs))
        print 'white runs mean ' + str(h_spacing_mean) + ' ' + str(
            v_spacing_mean)
        print 'white runs std  ' + str(h_spacing_variance) + ' ' + str(
            v_spacing_variance)
        print 'black runs ' + str(len(h_black_runs)) + ' ' + str(
            len(v_black_runs))
        print 'black runs mean ' + str(h_character_mean) + ' ' + str(
            v_character_mean)
        print 'black runs std  ' + str(h_character_variance) + ' ' + str(
            v_character_variance)

    if num_h_white_runs < 2 and num_v_white_runs < 2:
        if arg.boolean_value('verbose'):
            print 'Rejecting area since not sufficient amount post filtering whitespace.'
        return False

    if v_spacing_variance > defaults.MAXIMUM_VERTICAL_SPACE_VARIANCE:
        if arg.boolean_value('verbose'):
            print 'Rejecting area since vertical inter-character space variance too high.'
        return False

    if v_character_mean < avg * 0.5 or v_character_mean > avg * 2.0:
        pass
        #return False
    if h_character_mean < avg * 0.5 or h_character_mean > avg * 2.0:
        pass
        #return False

    return True

コード例 #13

0

ファイルを表示

ファイル: LocateText.py プロジェクト: luis-wang/MangaTextDetection

 
 
 inv_binary = cv2.bitwise_not(clean.binarize(gray, threshold=binary_threshold))
 #cv2.imshow('inv_binary', inv_binary)
 
 
 binary = clean.binarize(gray, threshold=binary_threshold)
 #cv2.imshow('binary', binary)
 
 
 segmented_image = seg.segment_image(gray)
 cv2.imshow('segmented_image', segmented_image)
 
 
 segmented_image = segmented_image[:,:,2]
 components = cc.get_connected_components(segmented_image)
 
 cc.draw_bounding_boxes(img,components,color=(255,0,0),line_size=2)
 
 
 outfile = 'img/no_equalizeHist.png'
 #imsave(outfile, img)
 
 
 cv2.imshow('result',img)
 #cv2.imshow('segmented_image',segmented_image)
 
 cv2.waitKey()
 cv2.destroyAllWindows()

コード例 #14

0

ファイルを表示

ファイル: segmentation.py プロジェクト: aristotll/MangaTextDetection

def text_like_histogram(img, area, average_size):
  if not arg.boolean_value('additional_filtering'):
    return True
  (x, y, w, h) = dimensions_2d_slice(area)
  x_subimage = np.copy(img)
  x_histogram = np.zeros(w,int)
  y_subimage = np.copy(img)
  y_histogram = np.zeros(h,int)

  aoi = img[area]

  ccs = cc.get_connected_components(aoi)
  if( len(ccs) < 2):
    return False

  #avg = average_size
  avg = cc.average_size(aoi)
  mean_width = cc.mean_width(aoi)
  mean_height = cc.mean_height(aoi)
  if arg.boolean_value('verbose'):
    print 'average size = ' + str(avg) + ' mean width = ' + str(mean_width) + ' mean height = ' + str(mean_height)
  if math.isnan(avg) or avg==0:
    if arg.boolean_value('verbose'):
      print 'Rejecting area since average size is NaN'
    #return False

  #in a text area, the average size of a blob (cc) will reflect
  #that of the used characters/typeface. Thus if there simply aren't
  #enough pixels per character, we can drop this as a text candidate
  #note the following is failing in odd situations, probably due to incorrect
  #calculation of 'avg size'
  #TODO: replace testing against "average size" with testing against
  #hard thresholds for connected component width and height. i.e.
  #if they're all thin small ccs, we can drop this area

  #if avg < defaults.MINIMUM_TEXT_SIZE_THRESHOLD:
  if mean_width < defaults.MINIMUM_TEXT_SIZE_THRESHOLD or \
    mean_height < defaults.MINIMUM_TEXT_SIZE_THRESHOLD:
    if arg.boolean_value('verbose'):
      print 'Rejecting area since average width or height is less than threshold.'
    return False

  #check the basic aspect ratio of the ccs
  if mean_width/mean_height < 0.5 or mean_width/mean_height > 2:
    if arg.boolean_value('verbose'):
      print 'Rejecting area since mean cc aspect ratio not textlike.'
    return False

  width_multiplier = float(avg)
  height_multiplier = float(avg)

  #gaussian filter the subimages in x,y directions to emphasise peaks and troughs
  x_subimage  = scipy.ndimage.filters.gaussian_filter(x_subimage,(0.01*width_multiplier,0))
  y_subimage  = scipy.ndimage.filters.gaussian_filter(y_subimage,(0,0.01*height_multiplier))

  #put together the histogram for black pixels over the x directon (along columns) of the component
  for i,col in enumerate(range(x,x+w)):
    black_pixel_count = np.count_nonzero(y_subimage[y:y+h,col])
    x_histogram[i] = black_pixel_count

  #and along the y direction (along rows)
  for i,row in enumerate(range(y,y+h)):
    black_pixel_count = np.count_nonzero(x_subimage[row,x:x+w])
    y_histogram[i] = black_pixel_count
  
  h_white_runs = get_white_runs(x_histogram)
  num_h_white_runs = len(h_white_runs)
  h_black_runs = get_black_runs(x_histogram)
  num_h_black_runs = len(h_black_runs)
  (h_spacing_mean, h_spacing_variance) = slicing_list_stats(h_white_runs)
  (h_character_mean, h_character_variance) = slicing_list_stats(h_black_runs)
  v_white_runs = get_white_runs(y_histogram)
  num_v_white_runs = len(v_white_runs)
  v_black_runs = get_black_runs(y_histogram)
  num_v_black_runs = len(v_black_runs)
  (v_spacing_mean, v_spacing_variance) = slicing_list_stats(v_white_runs)
  (v_character_mean, v_character_variance) = slicing_list_stats(v_black_runs)

  if arg.boolean_value('verbose'):
    print 'x ' + str(x) + ' y ' +str(y) + ' w ' + str(w) + ' h ' + str(h)
    print 'white runs ' + str(len(h_white_runs)) + ' ' + str(len(v_white_runs))
    print 'white runs mean ' + str(h_spacing_mean) + ' ' + str(v_spacing_mean)
    print 'white runs std  ' + str(h_spacing_variance) + ' ' + str(v_spacing_variance)
    print 'black runs ' + str(len(h_black_runs)) + ' ' + str(len(v_black_runs))
    print 'black runs mean ' + str(h_character_mean) + ' ' + str(v_character_mean)
    print 'black runs std  ' + str(h_character_variance) + ' ' + str(v_character_variance)

  if num_h_white_runs < 2 and num_v_white_runs < 2:
    if arg.boolean_value('verbose'):
      print 'Rejecting area since not sufficient amount post filtering whitespace.'
    return False

  if v_spacing_variance > defaults.MAXIMUM_VERTICAL_SPACE_VARIANCE:
    if arg.boolean_value('verbose'):
      print 'Rejecting area since vertical inter-character space variance too high.'
    return False

  if v_character_mean < avg*0.5 or v_character_mean > avg*2.0:
    pass    
    #return False
  if h_character_mean < avg*0.5 or h_character_mean > avg*2.0:
    pass    
    #return False
  
  return True