def pretreate_image(im, image_name): """对图像进行预处理 :param im: 需要预处理的图像 :return: 无 """ threshold = 180 image_binary = image_binaryzation(im, threshold) image_binary_pixels = list(image_binary.getdata()) im_rgb = im.convert('RGB') list_rgb_pixels = list(im_rgb.getdata()) image_rgb_pixels = list_rgb_pixels.copy() width, height = im.size remove_noise(image_rgb_pixels, image_binary_pixels, width, height) im_rgb.putdata(image_rgb_pixels) im_rgb.save(image_name + "_without_noise.jpg") list_pixels_for_statistic = image_rgb_pixels.copy() # 用于统计的像素list,接下来要将背景色像数去掉 remove_background_pixels(list_pixels_for_statistic) # pixels_statistic = {} pixels_statistic_largest_4 = {} # 图中最多的的四种颜色 / The 4 colors with the largest area least_color_value = 0 # 四种颜色中,最小那种颜色的面积 / The area of the color with the minimal area for color in list_pixels_for_statistic: # if color not in pixels_statistic: area = list_pixels_for_statistic.count(color) # pixels_statistic.setdefault(color, area) if len(pixels_statistic_largest_4) < 4: pixels_statistic_largest_4.setdefault(color, area) least_color_key, least_color_value = CustomDataStructureUtil.get_min(pixels_statistic_largest_4) else: if color not in pixels_statistic_largest_4: if area > least_color_value: pixels_statistic_largest_4.pop(least_color_key) pixels_statistic_largest_4.setdefault(color, area) least_color_key, least_color_value = CustomDataStructureUtil.get_min(pixels_statistic_largest_4) print(CustomDataStructureUtil.get_min(pixels_statistic_largest_4)[1]) print(pixels_statistic_largest_4) largest_colors = list(pixels_statistic_largest_4.keys()) image_pixels = image_rgb_pixels.copy() for large_color in largest_colors: get_single_color_image(im_rgb.copy(), image_pixels.copy(), large_color, width, height)
def get_color_ration(pixels_statistic): """计算图中最主要的四种颜色的分布 / Get the 4 main colors' ratio in image 一幅4字的彩色验证码中,我们把占图片比例最多的四种颜色,根据面积由大到小排列为:first, second, third, fourth 考虑到噪点和干扰线,四个字颜色分布可能为 Type_A - 4个字分为4种不同颜色:first : second ∈ [1, 1.5) && second : third ∈ [1,1.5) Type_B - 4个字分为3种不同颜色,其中2个字颜色一样: first : second ∈ [1.5, 2.5) && second : third ∈ [1, 1.5) Type_C - 4个字分为2种不同颜色,每种颜色2个字: first:second ∈ [1, 1.5) && second : third ∈ [3.5, ∞) Type_D - 4个字分为2种不同颜色,其中1种颜色3个字,1种颜色1个字: first : second ∈ [2.5, 3.5) Type_E - 4个字颜色相同: first : second ∈ [3.5, ∞) :param pixels_statistic: 像数统计结果 / The result of the statistic of pixel :return: string color_ratio_type: """ if isinstance(pixels_statistic, dict): pixels_statistic_temp = pixels_statistic.copy() first = CustomDataStructureUtil.get_max(pixels_statistic_temp) pixels_statistic_temp.pop(first[0]) second = CustomDataStructureUtil.get_max(pixels_statistic_temp) pixels_statistic_temp.pop(second[0]) rotia_first_second = first[1] / second[1] if 1 <= rotia_first_second < 1.5: third = CustomDataStructureUtil.get_max(pixels_statistic_temp) pixels_statistic_temp.pop(third[0]) rotia_second_third = second[1] / third[1] if 1 <= rotia_second_third < 1.5: return ColorRatioType.__TYPE_A__ elif rotia_second_third > 3.5: return ColorRatioType.__TYPE_C__ elif 1.5 <= rotia_first_second < 2.5: return ColorRatioType.__TYPE_B__ elif 2.5 <= rotia_first_second < 3.5: return ColorRatioType.__TYPE_D__ elif 3.5 <= rotia_first_second: return ColorRatioType.__TYPE_E__