def write_tfrecord(decode_path, write_path): img_data = basis_code.basis_decode(decode_path) #维度为6x10x3 img_data = tf.image.resize_images(img_data, [6, 10], method=1) with tf.Session() as sess: img_value = sess.run(img_data) #总共写入多少文件 num_shards = 2 #每个文件写入多少数据 instance_per_shard = 3 for i in range(2): #将文件分为多个文件。以便读取时方便匹配 filename = os.path.join( write_path, "data.tfrecords-" + str(i) + ".5d-of-" + str(num_shards) + ".5d") writer = tf.python_io.TFRecordWriter(filename) for j in range(instance_per_shard): if i == 1: j = j + 3 Log_Util.getlogger("write img" + str(j)).info(img_value[j]) img_raw = img_value[j].tostring() example = tf.train.Example(features=tf.train.Features( feature={ "labels": _int64_feature(j), "img_data": _byte_feature(img_raw) })) try: writer.write(example.SerializeToString()) except IOError: print("write error!") writer.close() writer.close()
def adjust_brightness(filepath): img_data = basis_code.basis_decode(filepath) if img_data.dtype != tf.float32: img_data = tf.image.convert_image_dtype(img_data, tf.float32) #将图像的亮度-0.5 adjust_less = tf.image.adjust_brightness(img_data, -0.5) #色彩调整的API可能导致像素的实数值超出0.0-1.0的范围,因此在输出最终图像前需要将其值截断到0.0-1.0范围之间,否则不仅图像无法正常可视化,以此为输入的神经网络的训练质量也可能受到影响。 #如果对图像进行多项处理操作,那么这一截断过程应当在所有处理完成后进行。 adjust_less = tf.clip_by_value(adjust_less, 0.0, 1.0) adjust_add = tf.image.adjust_brightness(img_data, 0.5) adjust_add = tf.clip_by_value(adjust_add, 0.0, 1.0) #在[-0.7, 0.7)的范围内随机调整图像的亮度 random_adjust = tf.image.random_brightness(img_data, 0.7) random_adjust = tf.clip_by_value(random_adjust, 0.0, 1.0) with tf.Session() as sess: basis.drawing(sess.run(adjust_less)) basis.drawing(sess.run(adjust_add)) basis.drawing(sess.run(random_adjust)) basis_code.basis_encode(adjust_less, basis_code.get_encode_path(filepath, 1)) basis_code.basis_encode(adjust_add, basis_code.get_encode_path(filepath, 2)) basis_code.basis_encode(random_adjust, basis_code.get_encode_path(filepath, 3))
def adjust_hue(filepath): img_data = basis_code.basis_decode(filepath) if img_data.dtype != tf.float32: img_data = tf.image.convert_image_dtype(img_data, tf.float32) #下列依次将图像的色相加0.1、0.3、0.6、0.9 adjust_add1 = tf.image.adjust_hue(img_data, 0.1) adjust_add2 = tf.image.adjust_hue(img_data, 0.3) adjust_add3 = tf.image.adjust_hue(img_data, 0.6) adjust_add4 = tf.image.adjust_hue(img_data, 0.9) #在[0.3, 1)的范围内随机调整图像的色相 random_adjust = tf.image.random_hue(img_data, 0.3, 1) with tf.Session() as sess: basis.drawing(sess.run(adjust_add1)) basis.drawing(sess.run(adjust_add2)) basis.drawing(sess.run(adjust_add3)) basis.drawing(sess.run(adjust_add4)) basis.drawing(sess.run(random_adjust)) basis_code.basis_encode(adjust_add1, basis_code.get_encode_path(filepath, 1)) basis_code.basis_encode(adjust_add2, basis_code.get_encode_path(filepath, 2)) basis_code.basis_encode(adjust_add3, basis_code.get_encode_path(filepath, 3)) basis_code.basis_encode(adjust_add4, basis_code.get_encode_path(filepath, 4)) basis_code.basis_encode(random_adjust, basis_code.get_encode_path(filepath, 5))
def adjust_standardization(filepath): img_data = basis_code.basis_decode(filepath) if img_data.dtype != tf.float32: img_data = tf.image.convert_image_dtype(img_data, tf.float32) #将图像的亮度均值变为0,方差变为1 adjust = tf.image.per_image_standardization(img_data) with tf.Session() as sess: basis.drawing(sess.run(adjust)) basis_code.basis_encode(adjust, basis_code.get_encode_path(filepath, 1))
def method_two(filepath): img_data = basis_code.basis_decode(filepath) if img_data.dtype != tf.float32: img_data = tf.image.convert_image_dtype(img_data, tf.float32) #以50%概率上下翻转图像 random_up_dowm_data = tf.image.random_flip_up_down(img_data) # 以50%概率左右翻转图像 random_left_rigth = tf.image.random_flip_left_right(img_data) with tf.Session() as sess: basis.drawing(sess.run(random_up_dowm_data)) basis.drawing(sess.run(random_left_rigth)) basis_code.basis_encode(random_up_dowm_data, basis_code.get_encode_path(filepath, 1)) basis_code.basis_encode(random_left_rigth, basis_code.get_encode_path(filepath, 2))
def add_box(filepath): img_data = basis_code.basis_decode(filepath) if img_data.dtype != tf.float32: img_data = tf.image.convert_image_dtype(img_data, tf.float32) #将图像缩小一点,这样可视化能让标注框更加清楚 img_data = tf.image.resize_images(img_data, [180, 267], method=1) #tf.image.draw_bounding_boxes函数要求图像矩阵中的数字为实数,上面已经转换过。而且tf.image.draw_bounding_boxes函数的输入是一个batch的数据。 #也就是多张图像组成的四维矩阵,所以需要将解码之后的图像矩阵加一维 img_data = tf.expand_dims(img_data, 0) #下面定义表示有两个标注框。一个标注框有4个数字,分别代表[Ymin, Xmin, Ymax, Xmax]。这里的数字都是图像的相对位置,比如在180x267的图像中。 #[0.33, 0.43, 0.48, 0.67]代表的大小为[0.33*180, 0.43*267, 0.48*180, 0.67*237] boxes = tf.constant([[[0.13, 0.24, 0.55, 0.89], [0.33, 0.43, 0.48, 0.67]]]) result_data = tf.image.draw_bounding_boxes(img_data, boxes) with tf.Session() as sess: basis.drawing(sess.run(result_data[0])) basis_code.basis_encode(result_data[0], basis_code.get_encode_path(filepath, 1))
def method_one(filepath): img_data = basis_code.basis_decode(filepath) if img_data.dtype != tf.float32: img_data = tf.image.convert_image_dtype(img_data, tf.float32) #上下翻转 up_down_data = tf.image.flip_up_down(img_data) #左右翻转 left_right_data = tf.image.flip_left_right(img_data) #沿对角线翻转 transposed = tf.image.transpose_image(img_data) with tf.Session() as sess: basis.drawing(sess.run(up_down_data)) basis.drawing(sess.run(left_right_data)) basis.drawing(sess.run(transposed)) basis_code.basis_encode(up_down_data, basis_code.get_encode_path(filepath, 1)) basis_code.basis_encode(left_right_data, basis_code.get_encode_path(filepath, 2)) basis_code.basis_encode(transposed, basis_code.get_encode_path(filepath, 3))
def adjust_saturation(filepath): img_data = basis_code.basis_decode(filepath) if img_data.dtype != tf.float32: img_data = tf.image.convert_image_dtype(img_data, tf.float32) #将图像的饱和度-5 adjust_less = tf.image.adjust_saturation(img_data, -5) # 将图像的饱和度+5 adjust_add = tf.image.adjust_saturation(img_data, 5) #在[-9, 3)的范围内随机调整图像的饱和度 random_adjust = tf.image.random_saturation(img_data, 1, 4) with tf.Session() as sess: basis.drawing(sess.run(adjust_less)) basis.drawing(sess.run(adjust_add)) basis.drawing(sess.run(random_adjust)) basis_code.basis_encode(adjust_less, basis_code.get_encode_path(filepath, 1)) basis_code.basis_encode(adjust_add, basis_code.get_encode_path(filepath, 2)) basis_code.basis_encode(random_adjust, basis_code.get_encode_path(filepath, 3))
def adjust_contrast(filepath): img_data = basis_code.basis_decode(filepath) if img_data.dtype != tf.float32: img_data = tf.image.convert_image_dtype(img_data, tf.float32) #将图像的对比度减少0.5倍 adjust_less = tf.image.adjust_contrast(img_data, 0.5) # 将图像的对比度增加5倍 adjust_add = tf.image.adjust_contrast(img_data, 5) #在[0.3, 1)的范围内随机调整图像的对比度 random_adjust = tf.image.random_contrast(img_data, 0.3, 1) with tf.Session() as sess: basis.drawing(sess.run(adjust_less)) basis.drawing(sess.run(adjust_add)) basis.drawing(sess.run(random_adjust)) basis_code.basis_encode(adjust_less, basis_code.get_encode_path(filepath, 1)) basis_code.basis_encode(adjust_add, basis_code.get_encode_path(filepath, 2)) basis_code.basis_encode(random_adjust, basis_code.get_encode_path(filepath, 3))
def slice_box(filepath): img_data = basis_code.basis_decode(filepath) if img_data.dtype != tf.float32: img_data = tf.image.convert_image_dtype(img_data, tf.float32) #调整大小 img_data = tf.image.resize_images(img_data, [180, 267], method=1) #定义标注框大小 boxes = tf.constant([[[0.13, 0.24, 0.55, 0.89], [0.33, 0.43, 0.48, 0.67]]]) #可以通过提供标注框的方式来告诉随机截取图像的算法那些部分是“有信息量”的.min_object_covered=0.4表示截取部分至少包含某个标注框的40%内容 #bbox_for_draw为重新随机返回的一个标注框大小 begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box(tf.shape(img_data), bounding_boxes=boxes, min_object_covered=0.4) #增加一个维度 batch = tf.expand_dims(img_data, 0) box_data = tf.image.draw_bounding_boxes(batch, bbox_for_draw) #随机截取出来的图像。因为算法带有随机成分,所以每次得到的结果会有所不同 distorted_image = tf.slice(img_data, begin, size) with tf.Session() as sess: basis.drawing(sess.run(img_data)) basis.drawing(sess.run(box_data[0])) basis.drawing(sess.run(distorted_image)) basis_code.basis_encode(img_data, basis_code.get_encode_path(filepath, 1)) basis_code.basis_encode(box_data[0], basis_code.get_encode_path(filepath, 2)) basis_code.basis_encode(distorted_image, basis_code.get_encode_path(filepath, 3))
image = tf.image.convert_image_dtype(image, tf.float32) #随机截取图像,减少需要关注的物体大小对图像识别算法的影响 bbox_begin, bbox_size, _ = tf.image.sample_distorted_bounding_box( tf.shape(image), bounding_boxes=bbox) distorted_image = tf.slice(image, bbox_begin, bbox_size) #将随机截取的图像调整为神经网络输入层的大小。大小调整算法是随机选择的 distorted_image = tf.image.resize_images(distorted_image, [height, width], method=np.random.randint(4)) #随机上下翻转图像 distorted_image = tf.image.random_flip_up_down(distorted_image) #随机左右翻转图像 distorted_image = tf.image.random_flip_left_right(distorted_image) #随机使用一种顺序调整图片色彩 distorted_image = distort_color(distorted_image, color_ordering=np.random.randint(2)) return distorted_image #通过上面程序,就可以通过一张训练图像衍生出很多训练样本。通过将训练图像预处理,训练得到的神经网络模型可以识别不同大小、方位、色彩等方面的实体 if __name__ == "__main__": filepath = basis_code.get_andclean_image() image = basis_code.basis_decode(filepath) boxes = tf.constant([[[0.13, 0.24, 0.55, 0.89], [0.33, 0.43, 0.48, 0.67]]]) with tf.Session() as sess: #运行6次获得6种不同的图像 for i in range(6): result = preprocess_for_train(image, 180, 267, boxes) basis.drawing(sess.run(result)) basis_code.basis_encode(result, basis_code.get_encode_path(filepath, i))