def progress(state, progress=0.0): if len(state_file_name): global last_state_datetime if last_state_datetime < datetime.now() + timedelta( milliseconds=-1000) or state is None: last_state_datetime = datetime.now() retry = 1 while True: try: with open(state_file_name, 'w') as f: if state is not None: f.write(state + ' ({0:3.2f}%)'.format(progress * 100)) break except: retry += 1 if retry > 100: logger.critical( 'Failed to write to {}.'.format(state_file_name)) raise time.sleep(0.1) callback.update_progress('{0} ({1:3.2f}%)'.format(state, progress * 100)) if cg_load_backend_ok: callback.update_status() if state_callback is not None: state_callback(state, progress)
def create_object_detection_dataset_command(args): # settings source_dir = args.sourcedir dest_dir = args.outdir width = int(args.width) height = int(args.height) mode = args.mode ch = int(args.channel) num_class = int(args.num_class) grid_size = int(args.grid_size) shuffle = args.shuffle == 'true' num_anchor = int(args.num_anchor) if width % grid_size != 0: logger.log(99, 'width" must be divisible by grid_size.') return if height % grid_size != 0: logger.log(99, 'height must be divisible by grid_size.') return dest_csv_file_name = [os.path.join(args.outdir, args.file1)] if args.file2: dest_csv_file_name.append(os.path.join(args.outdir, args.file2)) test_data_ratio = int(args.ratio2) if args.ratio2 else 0 if args.sourcedir == args.outdir: logger.critical("Input directory and output directory are same.") return False # create file list logger.log(99, "Creating file list...") def create_file_list(dir=""): result = [] items = os.listdir(os.path.join(source_dir, dir)) for item in items: if os.path.isdir(os.path.join(source_dir, dir, item)): result.extend(create_file_list(os.path.join(dir, item))) elif re.search('\.(bmp|jpg|jpeg|png|gif|tif|tiff)', os.path.splitext(item)[1], re.IGNORECASE): result.append(os.path.join(dir, item)) return result file_list = create_file_list() if len(file_list) == 0: logger.critical( "No image file found in the subdirectory of the input directory.") return False # calc anchor logger.log(99, "Calculating anchors...") anchors = get_anchors(source_dir, file_list, num_anchor) # create output data logger.log(99, "Creating output images...") process_args = [(data, source_dir, dest_dir, width, height, mode, ch, num_class, grid_size, anchors) for data in file_list] p = mp.Pool(mp.cpu_count()) pbar = tqdm.tqdm(total=len(process_args)) for _ in p.imap_unordered(convert_image, process_args): pbar.update() pbar.close() file_list = [os.path.join('.', 'data', file) for file in file_list] file_list = [file for file in file_list if os.path.exists( os.path.join(dest_dir, os.path.splitext(file)[0] + '.png'))] if len(file_list) == 0: logger.critical("No image and label file created correctly.") return False logger.log(99, "Creating CSV files...") if shuffle: import random random.shuffle(file_list) csv_data_num = [(len(file_list) * (100 - test_data_ratio)) // 100] csv_data_num.append(len(file_list) - csv_data_num[0]) data_head = 0 for csv_file_name, data_num in zip(dest_csv_file_name, csv_data_num): if data_num: file_list_2 = file_list[data_head:data_head + data_num] data_head += data_num with open(csv_file_name, 'w') as f: writer = csv.writer(f, lineterminator='\n') writer.writerow(['x:image', 'y:label', 'r:region']) for file in file_list_2: base_file_name = os.path.splitext(file)[0] writer.writerow([file, os.path.splitext( file)[0] + '_label.csv', os.path.splitext(file)[0] + '_region.csv']) logger.log(99, "Dataset was successfully created.") return True
def create_cache_file(args): # settings source_dir = args.sourcedir dest_dir = args.outdir width = int(args.width) height = int(args.height) padding = args.mode == 'padding' shuffle = args.shuffle == 'true' if source_dir == dest_dir: logger.critical("Input directory and output directory are same.") return # create file list logger.log(99, "Creating file list...") dirs = os.listdir(source_dir) dirs = [d for d in dirs if os.path.isdir(os.path.join(source_dir, d))] dirs.sort() # print(dirs) labels = [] label_index = -1 csv_data = [] pbar = tqdm.tqdm(total=100, unit='%') last = 0 for i, dir in enumerate(dirs): # print(dir) full_path = os.path.join(source_dir, dir) files = os.listdir(full_path) files = [ f for f in files if os.path.isfile(os.path.join(full_path, f)) ] files.sort() found = False for i2, file in enumerate(files): file_name = os.path.join(full_path, file) if imghdr.what(file_name) is not None: if not found: labels.append(dir) label_index += 1 found = True csv_data.append([os.path.join('.', dir, file), label_index]) current = round(100 * (float(i) / len(dirs) + float(i2) / (len(dirs) * len(files)))) if last < current: pbar.update(current - last) last = current pbar.close() logger.log(99, "Creating cache files...") if shuffle: import random random.shuffle(csv_data) data_size = 100 num_data_files = int((len(csv_data) - 1) / data_size + 1) for i in tqdm.tqdm(range(num_data_files)): num_image = data_size if (i + 1) * data_size < len( csv_data) else len(csv_data) - i * data_size data = {} data['x'] = [] data['y'] = [] for i2 in range(num_image): image_file_name = csv_data[i2 + i * data_size][0] class_index_str = csv_data[i2 + i * data_size][1] image_file_name = source_dir + csv_data[i2 + i * data_size][0][1:] class_index = int(class_index_str) if os.path.exists(image_file_name): im = scipy.misc.imread(image_file_name, mode='RGB') # resize h = im.shape[0] w = im.shape[1] # print(h, w) if w != width or h != height: # resize image if not padding: # trimming mode if float(h) / w > float(height) / width: target_h = int(float(w) / width * height) # print('crop_target_h', target_h) im = im[(h - target_h) // 2:h - (h - target_h) // 2, ::] else: target_w = int(float(h) / height * width) # print('crop_target_w', target_w) im = im[::, (w - target_w) // 2:w - (w - target_w) // 2] # print('before', im.shape) im = scipy.misc.imresize(arr=im, size=(height, width), interp='lanczos') # print('after', im.shape) else: # padding mode if float(h) / w < float(height) / width: target_h = int(float(height) / width * w) # print('padding_target_h', target_h) pad = (((target_h - h) // 2, target_h - (target_h - h) // 2 - h), (0, 0)) else: target_w = int(float(width) / height * h) # print('padding_target_w', target_w) pad = ((0, 0), ((target_w - w) // 2, target_w - (target_w - w) // 2 - w)) pad = pad + ((0, 0), ) im = np.pad(im, pad, 'constant') # print('before', im.shape) im = scipy.misc.imresize(arr=im, size=(height, width), interp='lanczos') # print('after', im.shape) x = np.array(im, dtype=np.uint8).transpose((2, 0, 1)) #print x.shape, x.dtype data['x'].append(x) data['y'].append(np.array([class_index], dtype=np.int16)) else: print(image_file_name, ' is not found.') out_file_name = dest_dir + '/data{:04d}_{}.h5'.format(i, num_image) h5 = h5py.File(out_file_name, 'w') h5.create_dataset('y', data=data['y']) h5.create_dataset('x', data=data['x']) h5.close
def create_image_classification_dataset_command(args): # settings source_dir = args.sourcedir dest_dir = args.outdir width = int(args.width) height = int(args.height) mode = args.mode ch = int(args.channel) shuffle = args.shuffle == 'true' dest_csv_file_name = [os.path.join(args.outdir, args.file1)] if args.file2: dest_csv_file_name.append(os.path.join(args.outdir, args.file2)) test_data_ratio = int(args.ratio2) if args.ratio2 else 0 if args.sourcedir == args.outdir: logger.critical("Input directory and output directory are same.") return False # create file list logger.log(99, "Creating file list...") dirs = os.listdir(args.sourcedir) dirs = [d for d in dirs if os.path.isdir(os.path.join(args.sourcedir, d))] dirs.sort() # print(dirs) labels = [] label_index = -1 csv_data = [] pbar = tqdm.tqdm(total=100, unit='%') last = 0 for i, dir in enumerate(dirs): # print(dir) full_path = os.path.join(args.sourcedir, dir) files = os.listdir(full_path) files = [ f for f in files if os.path.isfile(os.path.join(full_path, f)) ] files.sort() found = False for i2, file in enumerate(files): file_name = os.path.join(full_path, file) if re.search('\.(bmp|jpg|jpeg|png|gif|tif|tiff)', os.path.splitext(file_name)[1], re.IGNORECASE): if not found: labels.append(dir) label_index += 1 found = True csv_data.append([os.path.join('.', dir, file), label_index]) current = round(100 * (float(i) / len(dirs) + float(i2) / (len(dirs) * len(files)))) if last < current: pbar.update(current - last) last = current pbar.close() # create output data logger.log(99, "Creating output images...") process_args = [(data[0], source_dir, dest_dir, width, height, mode, ch) for data in csv_data] p = mp.Pool(mp.cpu_count()) pbar = tqdm.tqdm(total=len(process_args)) for _ in p.imap_unordered(convert_image, process_args): pbar.update() pbar.close() for data in csv_data: file_name = os.path.splitext(data[0])[0] + ".png" data[0] = file_name if os.path.exists(os.path.join( dest_dir, file_name)) else None for data in csv_data[:]: if not data[0]: csv_data.remove(data) logger.log(99, "Creating CSV files...") if shuffle: import random random.shuffle(csv_data) csv_data_num = [(len(csv_data) * (100 - test_data_ratio)) // 100] csv_data_num.append(len(csv_data) - csv_data_num[0]) data_head = 0 for csv_file_name, data_num in zip(dest_csv_file_name, csv_data_num): if data_num: csv_data_2 = csv_data[data_head:data_head + data_num] data_head += data_num csv_data_2.insert(0, ['x:image', 'y:label']) with open(csv_file_name, 'w') as f: writer = csv.writer(f, lineterminator='\n') writer.writerows(csv_data_2) logger.log(99, "Dataset was successfully created.") return True
def create_image_classification_dataset_command(args): # settings source_dir = args.sourcedir dest_csv_file_name = [os.path.join(args.outdir, args.file1)] if args.file2: dest_csv_file_name.append(os.path.join(args.outdir, args.file2)) dest_dir = args.outdir width = int(args.width) height = int(args.height) padding = args.mode == 'padding' ch = int(args.channel) shuffle = args.shuffle == 'true' test_data_ratio = int(args.ratio2) if args.ratio2 else 0 if source_dir == dest_dir: logger.critical("Input directory and output directory are same.") return # create file list logger.log(99, "Creating file list...") dirs = os.listdir(source_dir) dirs = [d for d in dirs if os.path.isdir(os.path.join(source_dir, d))] dirs.sort() # print(dirs) labels = [] label_index = -1 csv_data = [] pbar = tqdm.tqdm(total=100, unit='%') last = 0 for i, dir in enumerate(dirs): # print(dir) full_path = os.path.join(source_dir, dir) files = os.listdir(full_path) files = [ f for f in files if os.path.isfile(os.path.join(full_path, f)) ] files.sort() found = False for i2, file in enumerate(files): file_name = os.path.join(full_path, file) if imghdr.what(file_name) is not None: if not found: labels.append(dir) label_index += 1 found = True csv_data.append([os.path.join('.', dir, file), label_index]) current = round(100 * (float(i) / len(dirs) + float(i2) / (len(dirs) * len(files)))) if last < current: pbar.update(current - last) last = current pbar.close() # create output data logger.log(99, "Creating output images...") for data in tqdm.tqdm(csv_data, unit='images'): src_file_name = os.path.join(source_dir, data[0]) data[0] = os.path.splitext(data[0])[0] + ".png" dest_file_name = os.path.join(dest_dir, data[0]) dest_path = os.path.dirname(dest_file_name) # print(src_file_name, dest_file_name) # open source image im = scipy.misc.imread(src_file_name) if len(im.shape) < 2 or len(im.shape) > 3: logger.warning( "Illigal image file format %s.".format(src_file_name)) csv_data.remove(data) continue elif len(im.shape) == 3: # RGB image if im.shape[2] != 3: logger.warning( "The image must be RGB or monochrome %s.".format( src_file_name)) csv_data.remove(data) continue # resize h = im.shape[0] w = im.shape[1] # print(h, w) if w != width or h != height: # resize image if not padding: # trimming mode if float(h) / w > float(height) / width: target_h = int(float(w) / width * height) # print('crop_target_h', target_h) im = im[(h - target_h) // 2:h - (h - target_h) // 2, ::] else: target_w = int(float(h) / height * width) # print('crop_target_w', target_w) im = im[::, (w - target_w) // 2:w - (w - target_w) // 2] # print('before', im.shape) im = scipy.misc.imresize(arr=im, size=(height, width), interp='lanczos') # print('after', im.shape) else: # padding mode if float(h) / w < float(height) / width: target_h = int(float(height) / width * w) # print('padding_target_h', target_h) pad = (((target_h - h) // 2, target_h - (target_h - h) // 2 - h), (0, 0)) else: target_w = int(float(width) / height * h) # print('padding_target_w', target_w) pad = ((0, 0), ((target_w - w) // 2, target_w - (target_w - w) // 2 - w)) if len(im.shape) == 3: pad = pad + ((0, 0), ) im = np.pad(im, pad, 'constant') # print('before', im.shape) im = scipy.misc.imresize(arr=im, size=(height, width), interp='lanczos') # print('after', im.shape) # change color ch if len(im.shape) == 2 and ch == 3: # Monochrome to RGB im = np.array([im, im, im]).transpose((1, 2, 0)) elif len(im.shape) == 3 and ch == 1: # RGB to monochrome im = np.dot(im[..., :3], [0.299, 0.587, 0.114]) # output if not os.path.exists(dest_path): os.makedirs(dest_path) scipy.misc.imsave(dest_file_name, im) logger.log(99, "Creating CSV files...") if shuffle: import random random.shuffle(csv_data) csv_data_num = [(len(csv_data) * (100 - test_data_ratio)) // 100] csv_data_num.append(len(csv_data) - csv_data_num[0]) data_head = 0 for csv_file_name, data_num in zip(dest_csv_file_name, csv_data_num): if data_num: csv_data_2 = csv_data[data_head:data_head + data_num] data_head += data_num csv_data_2.insert(0, ['x:image', 'y:label']) with open(csv_file_name, 'w') as f: writer = csv.writer(f, lineterminator='\n') writer.writerows(csv_data_2)