Example #1
0
def load_diabetes():
    """
    Load the diabetes dataset
    @return: diabetes dataset by (data, target)
    """

    # 文件路径
    file_path_data = './cache/sk/sk_diabete_data.csv'
    file_path_target = './cache/sk/sk_diabete_target.csv'
    # 如果存在数据文件,则直接读取
    if os.path.exists(file_path_data):
        # 读取文件
        df_data = pd.read_csv(file_path_data)
        df_target = pd.read_csv(file_path_target)
        return df_data.as_matrix(), df_target.as_matrix()

    else:
        filecm.makedir(file_path_data, by_file=True)
        # 取得数据
        diabetes = datasets.load_diabetes()
        logcm.print_obj(diabetes.DESCR, 'diabetes.DESCR')
        logcm.print_obj(diabetes.data, '样本原始数据集')
        logcm.print_obj(diabetes.target, '样本目标数据集')

        # 保存到文件
        DataFrame(diabetes.data).to_csv(file_path_data, index=False)
        DataFrame(diabetes.target).to_csv(file_path_target, index=False)

        return diabetes.data, diabetes.target
Example #2
0
def save_xml(xml_str, save_path, encoding="utf-8"):
    """
    格式化root转换为xml文件
    @param xml_str: 根节点
    @param save_path: xml文件
    @param encoding: 文本编码
    @return: 无
    """

    try:
        logcm.print_info("Saving xml file --> %s" % save_path)

        # 解析XML文档
        xml_doc = minidom.parseString(xml_str.encode(encoding))

        # 确保目录存在
        filecm.makedir(save_path, by_file=True)

        # 写入XML文件
        with open(save_path, "w+") as file:
            xml_doc.writexml(file,
                             addindent="\t",
                             newl="\n",
                             encoding=encoding)

    except Exception as e:
        logcm.print_info("Saving xml error! %s" % e, fg='red')
Example #3
0
def get_hand_digits_knn():
    """
    取得训练好的手写数字识别的KNN对象,如果有缓存文件,优先读取缓存。
    @return: KNN对象
    """

    # 缓存路径及文件设定
    path = './cache/cv'
    file_train = 'digits_knn_train.npy'
    file_label = 'digits_knn_label.npy'
    filecm.makedir(path)

    if filecm.exists(path, file_train):
        # 从缓存文件中加载训练样本和结果
        logcm.print_info("load hand digits train data from cache file.")
        train = np.load(os.path.join(path, file_train))
        label = np.load(os.path.join(path, file_label))
    else:
        # 从图片中获取训练样本和结果
        logcm.print_info("load hand digits train data from Image.")
        img = cv2.imread('./images/cv_digits.png')
        # 灰度转换
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        # 把图片切割为50*100个Cell
        cells = [np.hsplit(row, 100) for row in np.vsplit(gray, 50)]

        train = []
        # 删除图片边界空白
        for i in range(50):
            for j in range(100):
                cell_img = cells[i][j]
                cell_img2 = opencvcm.resize_by_max_contours(
                    cell_img, 20, 20, 1, 1)
                # if j == 0:
                #    cv2.imwrite('%s/digits_cell_%d_%d.jpg' % (path, i, j), cell_img2)
                # 计算训练数据,对每个Cell,进行reshape处理,
                # 把图片展开成400列,行数不确定
                train.append(cell_img2.reshape((1, 400)))

        # 训练数据整理为np.array格式
        train = np.array(train).reshape(-1, 400).astype(np.float32)
        # 每个数字500遍
        label = np.repeat(np.arange(10), 500)

        # 保存缓存文件
        np.save(os.path.join(path, file_train), train)
        np.save(os.path.join(path, file_label), label)

    # KNN算法
    knn = cv2.ml.KNearest_create()

    # 训练数据
    knn.train(train, cv2.ml.ROW_SAMPLE, label)

    return knn
from numpy.linalg import norm

from common import cvsvmcm
from common import filecm
from common import logcm
from common import plotcm

# SVM模型
svm = cvsvmcm.get_car_plate_svm()

# 获取plates文件夹下所有文件路径
img_path = gb.glob("./images/plates/*")
# 定义并创建临时目录
tmp_path = './temp/cv/plate'
filecm.makedir(tmp_path)

# 图片矩阵
img_matrix = []
# 标题矩阵
title_matrix = []

# 对每一张图片进行处理
for path in img_path:
    img_list = []
    title_list = []

    img = cv2.imread(path)
    img_list.append(img)
    filename = filecm.short_name(path)
    title_list.append('原图-%s' % filename)
Example #5
0
def get_print_number_knn():
    """
    取得训练好的打印数字识别的KNN对象,如果有缓存文件,优先读取缓存。
    @return: KNN对象
    """

    # 缓存路径及文件设定
    path = './cache/cv'
    file_train = 'number_knn_train.npy'
    file_label = 'number_knn_label.npy'
    filecm.makedir(path)

    if filecm.exists(path, file_train):
        # 从缓存文件中加载训练样本和结果
        train = np.load(os.path.join(path, file_train))
        label = np.load(os.path.join(path, file_label))
    else:
        # 从图片中获取训练样本和结果
        # 获取numbers文件夹下所有文件路径
        img_path = gb.glob("./images/numbers/*")
        # 定义并创建临时目录
        tmp_path = './temp/cv/number'
        filecm.makedir(tmp_path)

        label = []
        train = []

        func_key = "get_print_number_knn"
        ## 对每一张图片进行处理
        for file_path in img_path:
            # 文件短名称
            name = filecm.short_name(file_path)
            # 读取图片
            img = cv2.imread(file_path)

            # 颜色空间转换(转换成灰度图)
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            save_tmp(gray, func_key, "gray", tmp_path, name)

            # 高斯模糊
            blur = cv2.GaussianBlur(gray, (5, 5), 0)
            save_tmp(blur, func_key, "GaussianBlur", tmp_path, name)

            # 自适应阈值可以看成一种局部性的阈值,通过规定一个区域大小,
            thresh = cv2.adaptiveThreshold(blur, 255, 1, 1, 11, 2)
            save_tmp(thresh, func_key, "adaptiveThreshold", tmp_path, name)

            # 查找检测物体的轮廓。
            image, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
            height, width = img.shape[:2]
            ## 图片第一行和第二行数字
            list1 = []
            list2 = []
            for cnt in contours:
                # 直边界矩形
                [x, y, w, h] = cv2.boundingRect(cnt)

                # 根据轮廓矩形的宽度和高度筛识别出数字所在区域
                if w > 30 and h > (height / 4):
                    ## 按y坐标分行
                    if y < (height / 2):
                        list1.append([x, y, w, h])  ## 第一行
                    else:
                        list2.append([x, y, w, h])  ## 第二行

            ## 按x坐标排序,上面已经按y坐标分行
            list1_sorted = sorted(list1, key=lambda t: t[0])
            list2_sorted = sorted(list2, key=lambda t: t[0])

            for i in range(5):
                [x1, y1, w1, h1] = list1_sorted[i]
                [x2, y2, w2, h2] = list2_sorted[i]
                ## 切割出每一个数字
                number_roi1 = gray[y1:y1 + h1, x1:x1 + w1]  # Cut the img_test to size
                number_roi2 = gray[y2:y2 + h2, x2:x2 + w2]  # Cut the img_test to size

                ## 对图片进行大小统一和预处理
                resized_roi1 = cv2.resize(number_roi1, (20, 40))
                thresh1 = cv2.adaptiveThreshold(resized_roi1, 255, 1, 1, 11, 2)
                resized_roi2 = cv2.resize(number_roi2, (20, 40))
                thresh2 = cv2.adaptiveThreshold(resized_roi2, 255, 1, 1, 11, 2)

                j = i + 6
                if j == 10:
                    j = 0

                # 保存数字图片
                save_tmp(thresh1, func_key, "adaptiveThreshold", tmp_path, name + "-" + str(i + 1))
                save_tmp(thresh2, func_key, "adaptiveThreshold", tmp_path, name + "-" + str(j))

                ## 归一化
                normalized_roi1 = thresh1 / 255.
                normalized_roi2 = thresh2 / 255.

                ## 把图片展开成一行,然后保存到samples
                ## 保存一个图片信息,保存一个对应的标签
                train.append(normalized_roi1.reshape((1, 800)))
                label.append(float(i + 1))
                train.append(normalized_roi2.reshape((1, 800)))
                label.append(j)

        # 训练数据整理为np.array格式
        train = np.array(train).reshape(-1, 800).astype(np.float32)
        label = np.array(label).astype(np.float32)

        # 保存缓存文件
        np.save(os.path.join(path, file_train), train)
        np.save(os.path.join(path, file_label), label)

    # KNN算法
    knn = cv2.ml.KNearest_create()

    # 训练数据
    knn.train(train, cv2.ml.ROW_SAMPLE, label)

    return knn
{
    "img_root": "./images/img",
    "img_ext": ".jpeg",
    "max_count" : 10,
    "fps": 10,
    "save_path": "./temp/output.avi",
    "width": 1024,
    "height": 683
}
"""

# 加载配置文件
cfg = loadcfgcm.load("cv_video_from_img.json", default_config)

# 创建目录
filecm.makedir(cfg['save_path'], True)
filecm.makedir('./temp/cv/video-from-img')

# 新建视频写入类
fourcc = cv2.VideoWriter_fourcc(*"MJPG")
videoWriter = cv2.VideoWriter(cfg['save_path'], fourcc, cfg['fps'],
                              (cfg['width'], cfg['height']))
im_output = VideoImageOutput(videoWriter)

# 取得图片列表
path_list = filecm.search_files(cfg["img_root"], cfg['img_ext'], r'^[^\.]+')
# 取得处理数量
max_size = cfg['max_count']
if max_size > len(path_list):
    max_size = len(path_list)
from common import imagecm

# 配置
default_config = """
{
    "video_path": "/temp/output.avi",
    "img_root": "/path/to/img",
    "max_count" : 10,
    "fps": 5
}
"""

# 加载配置文件
cfg = loadcfgcm.load("cv_video_to_img.json", default_config)

filecm.makedir(cfg['img_root'])

vc = cv2.VideoCapture(cfg['video_path'])
frame_no = 0
count = 0
if vc.isOpened():
    rval, frame = vc.read()
else:
    rval = False
while rval:
    rval, frame = vc.read()
    frame_no = frame_no + 1
    if frame_no % cfg['fps'] == 0:
        count += 1
        cv2.imwrite(cfg["img_root"] + str(count) + '.jpg', frame)
Example #8
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
一个HDF5文件是一种存放两类对象的容器:dataset和group.
Dataset是类似于数组的数据集,而group是类似文件夹一样的容器,存放dataset和其他group。
在使用h5py的时候需要牢记一句话:groups类比词典,dataset类比Numpy中的数组。
"""

import h5py
import numpy as np
from common import filecm

# 输出文件
tmp_path = "./temp/file/mytestfile.hdf5"
filecm.makedir(tmp_path, by_file=True)

# HDF5的写入:
imgData = np.zeros((30, 3, 128, 256))
# 创建一个h5文件,文件指针是f
f = h5py.File(tmp_path, 'w')
# 将数据写入文件的主键data下面
f['data'] = imgData
# 将数据写入文件的主键labels下面
f['labels'] = range(100)
# 关闭文件
f.close()

# HDF5的读取:
# 打开h5文件
f = h5py.File(tmp_path, 'r')
# 可以查看所有的主键
Example #9
0
import numpy as np
import os

from numpy.linalg import norm
from common import filecm
from common import logcm
from common import opencvcm

SZ = 20  # 训练图片长宽
MAX_WIDTH = 1000  # 原始图片最大宽度
Min_Area = 2000  # 车牌区域允许最大面积
PROVINCE_START = 1000

CACHE_PATH = './cache/cv'
DATA_PATH = './data/plate'
filecm.makedir(CACHE_PATH)
filecm.makedir(DATA_PATH)


# 读取图片文件
def imreadex(filename):
    return cv2.imdecode(np.fromfile(filename, dtype=np.uint8),
                        cv2.IMREAD_COLOR)


def point_limit(point):
    if point[0] < 0:
        point[0] = 0
    if point[1] < 0:
        point[1] = 0