def create_pickle_train(image_path, mask_path, pkl_path, img_pixel=10, channels=3): m = 0 n = 0 # image_data = Multiband2Array(image_path) image_data = m1.Multiband2Array(image_path) # mask_data = cv2.split(cv2.imread(mask_path))[0] / 255 # mask_data=np.asarray(Image.open(mask_path))//255 mask_data = m1.Multiband2Array(mask_path) // 255 x_size, y_size = image_data.shape[:2] data_list = [] flag = True for i in range(0, x_size - img_pixel + 1, img_pixel // 2): # 文件夹下的文件名 if not flag: break if i + img_pixel > x_size: i = x_size - img_pixel - 1 for j in range(0, y_size - img_pixel + 1, img_pixel // 2): if j + img_pixel > y_size: j = y_size - img_pixel - 1 cropped_data = image_data[i:i + img_pixel, j:j + img_pixel] data1 = cropped_data.reshape( (-1, img_pixel * img_pixel * channels)) # 展成一行 train_label = mask_data[i:i + img_pixel, j:j + img_pixel].max() # train_label = 1 # train_label = mask_data[i:i + img_pixel, j:j + img_pixel].min() # train_label = int(mask_data[i:i + img_pixel, j:j + img_pixel].sum() / (img_pixel*img_pixel/2+1)) data2 = np.append(data1, train_label)[np.newaxis, :] # 数据+标签 data_list.append(data2) m += 1 if m >= 10000000: data_matrix = np.array(data_list, dtype=np.float32) data_matrix = data_matrix.reshape((-1, 301)) with gzip.open(pkl_path + '_' + str(n) + '.pkl', 'wb') as writer: # 以压缩包方式创建文件,进一步压缩文件 pickle.dump(data_matrix, writer) # 数据存储成pickle文件 data_list = [] m = 0 n += 1 flag = False break # if m % 10000 == 0: print(datetime.datetime.now(), "compressed {number} images".format(number=m)) # print(m) # data_matrix = np.array(data_list, dtype=int) if data_list != []: data_matrix = np.array(data_list, dtype=np.float32) data_matrix = data_matrix.reshape((-1, 301)) data_matrix = data_matrix.astype(np.float32) # data_matrix = data_matrix.tostring() # 转成byte,缩小文件大小 with gzip.open(pkl_path + '.pkl', 'wb') as writer: # 以压缩包方式创建文件,进一步压缩文件 pickle.dump(data_matrix, writer) # 数据存储成pickle文件
def create_pickle_test(dir_name, img_pixel=60, channels=4, img_names=[]): flag = False for _, dirs, _ in os.walk(dir_name): for filename in dirs: # 文件夹名 取文件名作为标签 file_path = os.path.join(dir_name, filename) # 文件夹路径 # for _ , _,img in os.walk(file_path): for img_name in os.listdir(file_path): # 文件夹下的文件名 # img_names.append(img_name) # 依次记录图像名 imgae_path = os.path.join(file_path, img_name) # 文件路径 img = m1.Multiband2Array( imgae_path) # 使用GDAL方法读取影像 可以读取多于3个波段的影像 data1 = img.reshape( (-1, img_pixel * img_pixel * channels)) # 展成一行 label = np.array([int(filename)]) # 文件名作为标签 data2 = np.append(data1, label)[np.newaxis, :] # 数据+标签 data2 = data2.tostring() # 转成byte,缩小文件大小 data2 = zlib.compress(data2) # 使用zlib将数据进一步压缩 if flag == False: data = data2 if flag == True: data = np.vstack((data, data2)) # 上下合并 flag = True with gzip.open(dir_name + 'test_data.pkl', 'wb') as writer: # 以压缩包方式创建文件,进一步压缩文件 pickle.dump(data, writer) # 数据存储成pickle文件
def create_pickle_train(dir_name, img_pixel=60, channels=4): flag = False for _, dirs, _ in os.walk(dir_name): for filename in dirs: # 文件夹名 取文件名作为标签 file_path = os.path.join(dir_name, filename) # 文件夹路径 # for _ , _,img in os.walk(file_path): for img_name in os.listdir(file_path): # 文件夹下的文件名 imgae_path = os.path.join(file_path, img_name) # 文件路径 img = m1.Multiband2Array( imgae_path) # 使用GDAL方法读取影像 可以读取多于3个波段的影像 data1 = img.reshape( (-1, img_pixel * img_pixel * channels)) # 展成一行 label = np.array([int(filename)]) # 文件名作为标签 data2 = np.append(data1, label)[np.newaxis, :] # 数据+标签 # data2=data2.tostring() # 转成byte,缩小文件大小 # data2=zlib.compress(data2) # 使用zlib将数据进一步压缩 if flag == False: data = data2 if flag == True: data = np.vstack((data, data2)) # 上下合并 flag = True return data
def create_gzip_train(dir_name,img_pixel=60,channels=4): with gzip.open(dir_name + 'train_data.txt.gz', 'wb') as writer: for _,dirs,_ in os.walk(dir_name): for filename in dirs: # 文件夹名 取文件名作为标签 file_path=os.path.join(dir_name,filename) # 文件夹路径 # for _ , _,img in os.walk(file_path): for img_name in os.listdir(file_path): # 文件夹下的文件名 imgae_path = os.path.join(file_path, img_name) # 文件路径 img=m1.Multiband2Array(imgae_path) # 使用GDAL方法读取影像 可以读取多于3个波段的影像 data1=img.reshape((-1,img_pixel*img_pixel*channels)) # 展成一行 label=np.array([int(filename)]) # 文件名作为标签 data2=np.append(data1,label)[np.newaxis,:] # 数据+标签 data2=data2.tostring() # 转成byte,缩小文件大小 # data2=zlib.compress(data2) # 使用zlib将数据进一步压缩 writer.write(data2+b'\n')
def create_pickle_train(dir_name,img_pixel=60,channels=4): flag_0=False flag_1=False n0 = 636 # 0类样本数 n1 = 681 # 1类样本数 n_0=1 n_1=1 #记录每类样本数 for _,dirs,_ in os.walk(dir_name): for filename in dirs: # 文件夹名 取文件名作为标签 if filename=='0' and n_0<=n0: file_path=os.path.join(dir_name,filename) # 文件夹路径 # for _ , _,img in os.walk(file_path): # cdef char* img_name for img_name in os.listdir(file_path): # 文件夹下的文件名 imgae_path = os.path.join(file_path, img_name) # 文件路径 img=m1.Multiband2Array(imgae_path) # 使用GDAL方法读取影像 可以读取多于3个波段的影像 data1=img.reshape((-1,img_pixel*img_pixel*channels)) # 展成一行 label=np.array([int(filename)]) # 文件名作为标签 data2=np.append(data1,label)[np.newaxis,:] # 数据+标签 data2=data2.tostring() # 转成byte,缩小文件大小 data2=zlib.compress(data2) # 使用zlib将数据进一步压缩 if flag_0==False: data=data2 if flag_0==True: data=np.vstack((data,data2)) # 上下合并 flag_0 = True n_0=n_0+1 if n_0>n0: with gzip.open(dir_name + 'train_data.pkl', 'wb') as writer: # 以压缩包方式创建文件,进一步压缩文件 pickle.dump(data, writer) # 数据存储成pickle文件 del data,data2 break if filename == '1' and n_1<n1: # print("1") file_path = os.path.join(dir_name, filename) # 文件夹路径 # for _ , _,img in os.walk(file_path): for img_name in os.listdir(file_path): # 文件夹下的文件名 imgae_path = os.path.join(file_path, img_name) # 文件路径 img = m1.Multiband2Array(imgae_path) # 使用GDAL方法读取影像 可以读取多于3个波段的影像 data1 = img.reshape((-1, img_pixel * img_pixel * channels)) # 展成一行 label = np.array([int(filename)]) # 文件名作为标签 data2 = np.append(data1, label)[np.newaxis, :] # 数据+标签 data2 = data2.tostring() # 转成byte,缩小文件大小 data2 = zlib.compress(data2) # 使用zlib将数据进一步压缩 if flag_1==False: data=data2 if flag_1==True: data=np.vstack((data,data2)) # 上下合并 flag_1 = True n_1=n_1+1 if n_1>n1: with gzip.open(dir_name + 'train_data_1.pkl', 'wb') as writer: # 以压缩包方式创建文件,进一步压缩文件 pickle.dump(data, writer) # 数据存储成pickle文件 del data,data2 break