Example #1
0
def remove_lines(lines, filtered_lines, scores):
  new_horiz_lines = cut_lines(lines[0], filtered_lines[0], scores[0])
  new_vert_lines = cut_lines(lines[1], filtered_lines[1], scores[1])

  scorer.add_score('new_horiz_lines', len(new_horiz_lines))
  scorer.add_score('new_vert_lines', len(new_vert_lines))
  return (new_horiz_lines, new_vert_lines)
Example #2
0
def get_structure(boxes, lines):
  # rows = cluster_boxes(boxes, 1)
  # cols = cluster_boxes(boxes, 0)

  row_clusters, col_clusters = rate_combinations(boxes, lines)

  rows = translate_clusters(row_clusters)
  cols = translate_clusters(col_clusters)

  sorted_rows = sorted(rows, key = lambda row: (row[1], row[7]))
  sorted_cols = sorted(cols, key = lambda col: (col[0], col[6]))

  scorer.add_score('initial_rows', len(sorted_rows))
  scorer.add_score('initial_cols', len(sorted_cols))

  # combined_rows = combine_overlapping_neighbors(sorted_rows, 1, 0.5)
  # combined_cols = combine_overlapping_neighbors(sorted_cols, 0, 0.5)

  # scorer.add_score('combined_rows', len(combined_rows))
  # scorer.add_score('combined_cols', len(combined_cols))

  # return (combined_rows, combined_cols)
  return (sorted_rows, sorted_cols)
Example #3
0
def rate_combinations(boxes, lines):
  overall_row_scores = {}
  row_score_matrix = [[1.0 for x in range(len(boxes))] for y in range(len(boxes))]
  overall_col_scores = {}
  col_score_matrix = [[1.0 for x in range(len(boxes))] for y in range(len(boxes))]
  horiz_lines = lines[0]
  vert_lines = lines[1]

  for comb in combinations(enumerate(boxes), 2):
    row_scores = {}
    col_scores = {}

    i = comb[0][0]
    j = comb[1][0]

    box_1 = {
      'left': comb[0][1][0],
      'right': comb[0][1][0] + comb[0][1][2],
      'top': comb[0][1][1],
      'bottom': comb[0][1][1] + comb[0][1][3]
    }

    box_2 = {
      'left': comb[1][1][0],
      'right': comb[1][1][0] + comb[1][1][2],
      'top': comb[1][1][1],
      'bottom': comb[1][1][1] + comb[1][1][3]
    }

    # 1.) Their vertical (horizontal) centers align
    # May want to cut the factor down to 1.0 to make it a max of 1.0
    row_scores['center_align'] = 2.0 / (1.0 + abs(box_1['top'] + box_1['bottom'] - box_2['top'] - box_2['bottom']))
    col_scores['center_align'] = 2.0 / (1.0 + abs(box_1['left'] + box_1['right'] - box_2['left'] - box_2['right']))

    # 2.) Their left (top) edges align
    row_scores['top_align'] = 1.0 / (1.0 + abs(box_1['top'] - box_2['top']))
    col_scores['left_align'] = 1.0 / (1.0 + abs(box_1['left'] - box_2['left']))

    # 3.) Their right (bottom) edges align
    row_scores['bottom_align'] = 1.0 / (1.0 + abs(box_1['bottom'] - box_2['bottom']))
    col_scores['right_align'] = 1.0 / (1.0 + abs(box_1['right'] - box_2['right']))

    # 4.) If there is a line close to their left (above them)
    row_scores['top_line'] = calculate_preceding_line_score(box_1['top'], box_2['top'], horiz_lines)
    col_scores['left_line'] = calculate_preceding_line_score(box_1['left'], box_2['left'], vert_lines)

    # 5.) If there is a line close to their right (below them)
    row_scores['bottom_line'] = calculate_succeeding_line_score(box_1['bottom'], box_2['bottom'], horiz_lines)
    col_scores['right_line'] = calculate_succeeding_line_score(box_1['right'], box_2['right'], vert_lines)

    # 6.) They overlap significantly in their horizontal (vertical) range
    row_scores['vert_overlap'] = calculate_vertical_overlap(box_1, box_2)
    col_scores['horiz_overlap'] = calculate_horizontal_overlap(box_1, box_2)

    # 7.) I would like to add in a term regarding a shared strong score with a third object

    row_score = calculate_row_score(row_scores)
    col_score = calculate_col_score(col_scores)
    overall_row_scores[str(comb)] = row_score
    overall_col_scores[str(comb)] = col_score

    row_score_matrix[comb[0][0]][comb[1][0]] = row_score
    row_score_matrix[comb[1][0]][comb[0][0]] = row_score
    col_score_matrix[comb[0][0]][comb[1][0]] = col_score
    col_score_matrix[comb[1][0]][comb[0][0]] = col_score

  # for comb in overall_row_scores:
  #   print('comb: ' + str(comb))
  #   print('row score: ' + str(overall_row_scores[comb]))
  #   print('col score: ' + str(overall_col_scores[comb]))

  # Might want to do 0.999 later
  row_clusters = clusterer.newer_cluster_scores(row_score_matrix, 1.0)
  col_clusters = clusterer.newer_cluster_scores(col_score_matrix, 1.0)

  # print('Row clusters found:')
  # for cluster in row_clusters:
  #   print('*****************')
  #   for i in cluster:
  #     print(boxes[i])

  # print('------')
  # print('Col clusters found:')
  # for cluster in col_clusters:
  #   print('*****************')
  #   for i in cluster:
  #     print(boxes[i])

  scorer.add_score('cluster_rows', len(row_clusters))
  scorer.add_score('cluster_cols', len(col_clusters))

  # print('done clustering')

  # Now translate the clusters of indexes into clusters of boxes

  row_cluster_boxes = []

  for row in row_clusters:
    row_cluster_boxes.append([])
    for box_index in row:
      row_cluster_boxes[len(row_cluster_boxes) - 1].append(boxes[box_index])

  col_cluster_boxes = []
  for col in col_clusters:
    col_cluster_boxes.append([])
    for box_index in col:
      col_cluster_boxes[len(col_cluster_boxes) - 1].append(boxes[box_index])

  return (row_cluster_boxes, col_cluster_boxes)
Example #4
0
def get_lines(img_name, base_path):
  img = cv2.imread(base_path + '/' + img_name)
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  edges = cv2.Canny(gray, 50, 150, apertureSize = 3)
  # cv2.imwrite('regents/canny/' + img_name, edges)
  # 120, 20, 10 is good. Also 80, 20, 1
  lines = cv2.HoughLinesP(edges, 1, np.pi / 180, 120, minLineLength=40, maxLineGap=2)

  if lines is None:
    lines = []
  
  horiz_count = 0
  vert_count = 0
  
  horiz_lines = []
  vert_lines = []
  
  for info in lines:
    x1, y1, x2, y2 = info[0]

    line_info = {}

    if abs(y1 - y2) < 0.1:
      # This is a horizontal line
      line_info['border'] = int((y1 + y2) / 2)
      line_info['start'] = x1
      line_info['end'] = x2
      horiz_lines.append(line_info)
      horiz_count += 1
    elif abs(x1 - x2) < 0.1:
      # This is a vertical line
      line_info['border'] = int((x1 + x2) / 2)
      line_info['start'] = y1
      line_info['end'] = y2
      vert_lines.append(line_info)
      vert_count += 1
    elif verbose:
      print('Nonstandard line: ' + str(theta))

  scorer.add_score('horiz_lines', len(horiz_lines))
  scorer.add_score('vert_lines', len(vert_lines))

  scorer.add_score('line_outside_rows', len(horiz_lines) - 1)
  scorer.add_score('line_outside_cols', len(vert_lines) - 1)

  scorer.add_score('line_inside_rows', len(horiz_lines) + 1)
  scorer.add_score('line_inside_cols', len(vert_lines) + 1)

  return (horiz_lines, vert_lines)