def export_inference_model(self, save_dir): test_input_names = [ var.name for var in list(self.test_inputs.values()) ] test_outputs = list(self.test_outputs.values()) save_prog = self.test_prog.clone(for_test=True) with fluid.scope_guard(self.scope): fluid.io.save_inference_model( dirname=save_dir, executor=self.exe, params_filename='__params__', feeded_var_names=test_input_names, target_vars=test_outputs, main_program=save_prog) model_info = self.get_model_info() model_info['status'] = 'Infer' # 保存模型输出的变量描述 model_info['_ModelInputsOutputs'] = dict() model_info['_ModelInputsOutputs']['test_inputs'] = [ [k, v.name] for k, v in self.test_inputs.items() ] model_info['_ModelInputsOutputs']['test_outputs'] = [ [k, v.name] for k, v in self.test_outputs.items() ] with open( osp.join(save_dir, 'model.yml'), encoding='utf-8', mode='w') as f: yaml.dump(model_info, f) # 模型保存成功的标志 open(osp.join(save_dir, '.success'), 'w').close() logging.info("Model for Model deploy saved in {}.".format( save_dir))
def preparation_cam(self, data_): image_show = read_image(data_) result = self.predict_fn(image_show) logit = result[0][0] if abs(np.sum(logit) - 1.0) > 1e-4: # softmax logit = logit - np.max(logit) exp_result = np.exp(logit) probability = exp_result / np.sum(exp_result) else: probability = logit # only interpret top 1 pred_label = np.argsort(probability) pred_label = pred_label[-1:] self.predicted_label = pred_label[0] self.predicted_probability = probability[pred_label[0]] self.image = image_show[0] self.labels = pred_label fc_weights = paddle_get_fc_weights() feature_maps = result[1] l = pred_label[0] ln = l if self.label_names is not None: ln = self.label_names[l] prob_str = "%.3f" % (probability[pred_label[0]]) logging.info("predicted result: {} with probability {}.".format( ln, prob_str)) return feature_maps, fc_weights
def visualize_segmentation(image, result, weight=0.6, save_dir='./'): """ Convert segment result to color image, and save added image. Args: image: the path of origin image result: the predict result of image weight: the image weight of visual image, and the result weight is (1 - weight) save_dir: the directory for saving visual image """ label_map = result['label_map'] color_map = get_color_map_list(256) color_map = np.array(color_map).astype("uint8") # Use OpenCV LUT for color mapping c1 = cv2.LUT(label_map, color_map[:, 0]) c2 = cv2.LUT(label_map, color_map[:, 1]) c3 = cv2.LUT(label_map, color_map[:, 2]) pseudo_img = np.dstack((c1, c2, c3)) if isinstance(image, np.ndarray): im = image image_name = str(int(time.time())) + '.jpg' else: image_name = os.path.split(image)[-1] im = cv2.imread(image) vis_result = cv2.addWeighted(im, weight, pseudo_img, 1 - weight, 0) if save_dir is not None: if not os.path.exists(save_dir): os.makedirs(save_dir) out_path = os.path.join(save_dir, 'visualize_{}'.format(image_name)) cv2.imwrite(out_path, vis_result) logging.info('The visualized result is saved as {}'.format(out_path)) else: return vis_result
def save_model(self, save_dir): if not osp.isdir(save_dir): if osp.exists(save_dir): os.remove(save_dir) os.makedirs(save_dir) if self.train_prog is not None: fluid.save(self.train_prog, osp.join(save_dir, 'model')) else: fluid.save(self.test_prog, osp.join(save_dir, 'model')) model_info = self.get_model_info() model_info['status'] = self.status with open(osp.join(save_dir, 'model.yml'), encoding='utf-8', mode='w') as f: yaml.dump(model_info, f) # 评估结果保存 if hasattr(self, 'eval_details'): with open(osp.join(save_dir, 'eval_details.json'), 'w') as f: json.dump(self.eval_details, f) if self.status == 'Prune': # 保存裁剪的shape shapes = {} for block in self.train_prog.blocks: for param in block.all_parameters(): pd_var = fluid.global_scope().find_var(param.name) pd_param = pd_var.get_tensor() shapes[param.name] = np.array(pd_param).shape with open(osp.join(save_dir, 'prune.yml'), encoding='utf-8', mode='w') as f: yaml.dump(shapes, f) # 模型保存成功的标志 open(osp.join(save_dir, '.success'), 'w').close() logging.info("Model saved in {}.".format(save_dir))
def reader(): IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', '.tiff', '.webp') target_size = 256 crop_size = 224 for i, img_path in enumerate(list_image_path): if not img_path.lower().endswith(IMG_EXTENSIONS): continue img = cv2.imread(img_path) if img is None: logging.info(img_path) continue img = resize_short(img, target_size, interpolation=None) img = crop_image(img, crop_size, center=is_test) img = img[:, :, ::-1] img_show = np.expand_dims(img, axis=0) img = preprocess_image(img_show, not is_test) label = 0 if list_label is None else list_label[i] yield img_show, img, label
def save_fig(data_, save_outdir, algorithm_name, num_samples=3000): import matplotlib.pyplot as plt if algorithm_name == 'cam': f_out = "{}_{}.png".format(algorithm_name, data_.split('/')[-1]) else: f_out = "{}_{}_s{}.png".format(save_outdir, algorithm_name, num_samples) plt.savefig(f_out) logging.info('The image of intrepretation result save in {}'.format(f_out))
def __init__(self, data_dir, file_list, label_list=None, transforms=None, num_workers='auto', buffer_size=100, parallel_method='process', shuffle=False): super(ChangeDetDataset, self).__init__(transforms=transforms, num_workers=num_workers, buffer_size=buffer_size, parallel_method=parallel_method, shuffle=shuffle) self.file_list = list() self.labels = list() self._epoch = 0 if label_list is not None: with open(label_list, encoding=get_encoding(label_list)) as f: for line in f: item = line.strip() self.labels.append(item) with open(file_list, encoding=get_encoding(file_list)) as f: for line in f: items = line.strip().split() if len(items) > 3: raise Exception( "A space is defined as the separator, but it exists in image or label name {}." .format(line)) items[0] = path_normalization(items[0]) items[1] = path_normalization(items[1]) items[2] = path_normalization(items[2]) full_path_im1 = osp.join(data_dir, items[0]) full_path_im2 = osp.join(data_dir, items[1]) full_path_label = osp.join(data_dir, items[2]) if not osp.exists(full_path_im1): raise IOError('The image file {} is not exist!'.format( full_path_im1)) if not osp.exists(full_path_im2): raise IOError('The image file {} is not exist!'.format( full_path_im2)) if not osp.exists(full_path_label): raise IOError('The image file {} is not exist!'.format( full_path_label)) self.file_list.append( [full_path_im1, full_path_im2, full_path_label]) self.num_samples = len(self.file_list) logging.info("{} samples in file {}".format(len(self.file_list), file_list))
def data_labels(self, image, fudged_image, segments, classifier_fn, num_samples, batch_size=10): """Generates images and predictions in the neighborhood of this image. Args: image: 3d numpy array, the image fudged_image: 3d numpy array, image to replace original image when superpixel is turned off segments: segmentation of the image classifier_fn: function that takes a list of images and returns a matrix of prediction probabilities num_samples: size of the neighborhood to learn the linear model batch_size: classifier_fn will be called on batches of this size. Returns: A tuple (data, labels), where: data: dense num_samples * num_superpixels labels: prediction probabilities matrix """ n_features = np.unique(segments).shape[0] data = self.random_state.randint(0, 2, num_samples * n_features) \ .reshape((num_samples, n_features)) labels = [] data[0, :] = 1 imgs = [] logging.info("Computing LIME.", use_color=True) for row in tqdm.tqdm(data): temp = copy.deepcopy(image) zeros = np.where(row == 0)[0] mask = np.zeros(segments.shape).astype(bool) for z in zeros: mask[segments == z] = True temp[mask] = fudged_image[mask] imgs.append(temp) if len(imgs) == batch_size: preds = classifier_fn(np.array(imgs)) labels.extend(preds) imgs = [] if len(imgs) > 0: preds = classifier_fn(np.array(imgs)) labels.extend(preds) return data, np.array(labels)
def __init__(self, data_dir, file_list, label_list, transforms=None, num_workers='auto', buffer_size=8, parallel_method='process', shuffle=False): super(ImageNet, self).__init__( transforms=transforms, num_workers=num_workers, buffer_size=buffer_size, parallel_method=parallel_method, shuffle=shuffle) self.file_list = list() self.labels = list() self._epoch = 0 with open(label_list, encoding=get_encoding(label_list)) as f: for line in f: item = line.strip() self.labels.append(item) logging.info("Starting to read file list from dataset...") with open(file_list, encoding=get_encoding(file_list)) as f: for line in f: img_file, json_file = [osp.join(data_dir, x) \ for x in line.strip().split()[:2]] img_file = path_normalization(img_file) json_file = path_normalization(json_file) if not is_pic(img_file): continue if not osp.isfile(json_file): continue if not osp.exists(img_file): raise IOError('The image file {} is not exist!'.format( img_file)) with open(json_file, mode='r', \ encoding=get_encoding(json_file)) as j: json_info = json.load(j) label = json_info['labels'][0]['name'] self.file_list.append([img_file, self.labels.index(label)]) self.num_samples = len(self.file_list) logging.info("{} samples in file {}".format( len(self.file_list), file_list))
def save_quantized_model(self, save_model_path): ''' Save the quantized model to the disk. Args: save_model_path(str): The path to save the quantized model. Returns: None ''' with fluid.scope_guard(self._scope): fluid.io.save_inference_model(dirname=save_model_path, model_filename='__model__', params_filename='__params__', feeded_var_names=self._feed_list, target_vars=self._fetch_list, executor=self._executor, main_program=self._program) logging.info("The quantized model is saved in " + save_model_path)
def preparation_lime(self, data_): image_show = read_image(data_) result = self.predict_fn(image_show) result = result[0] # only one image here. if abs(np.sum(result) - 1.0) > 1e-4: # softmax result = result - np.max(result) exp_result = np.exp(result) probability = exp_result / np.sum(exp_result) else: probability = result # only interpret top 1 pred_label = np.argsort(probability) pred_label = pred_label[-1:] self.predicted_label = pred_label[0] self.predicted_probability = probability[pred_label[0]] self.image = image_show[0] self.labels = pred_label l = pred_label[0] ln = l if self.label_names is not None: ln = self.label_names[l] prob_str = "%.3f" % (probability[pred_label[0]]) logging.info("predicted result: {} with probability {}.".format( ln, prob_str)) end = time.time() algo = lime_base.LimeImageInterpreter() interpreter = algo.interpret_instance( self.image, self.predict_fn, self.labels, 0, num_samples=self.num_samples, batch_size=self.batch_size) self.lime_interpreter = interpreter logging.info('lime time: ' + str(time.time() - end) + 's.')
def _load_model_data(self): ''' Set data loader. ''' logging.info("Set data loader ...") if self._program.num_blocks > 1: _logger.error("The post training quantization requires that the " "program only has one block.") if self._optimize_model: self._optimize_fp32_model() feed_vars = [fluid.framework._get_var(var_name, self._program) \ for var_name in self._feed_list] self._data_loader = fluid.io.DataLoader.from_generator( feed_list=feed_vars, capacity=3 * self._batch_size, iterable=True) self._data_loader.set_sample_list_generator(self._dataset.generator( self._batch_size, drop_last=True), places=self._place)
def visualize_detection(image, result, threshold=0.5, save_dir='./'): """ Visualize bbox and mask results """ if isinstance(image, np.ndarray): image_name = str(int(time.time())) + '.jpg' else: image_name = os.path.split(image)[-1] image = cv2.imread(image) image = draw_bbox_mask(image, result, threshold=threshold) if save_dir is not None: if not os.path.exists(save_dir): os.makedirs(save_dir) out_path = os.path.join(save_dir, 'visualize_{}'.format(image_name)) cv2.imwrite(out_path, image) logging.info('The visualized result is saved as {}'.format(out_path)) else: return image
def __init__(self, data_dir, file_list, label_list, transforms=None, num_workers='auto', buffer_size=8, parallel_method='process', shuffle=False): super(ImageNet, self).__init__(transforms=transforms, num_workers=num_workers, buffer_size=buffer_size, parallel_method=parallel_method, shuffle=shuffle) self.file_list = list() self.labels = list() self._epoch = 0 with open(label_list, encoding=get_encoding(label_list)) as f: for line in f: item = line.strip() self.labels.append(item) logging.info("Starting to read file list from dataset...") with open(file_list, encoding=get_encoding(file_list)) as f: for line in f: items = line.strip().split() if len(items) > 2: raise Exception( "A space is defined as the separator, but it exists in image or label name {}." .format(line)) items[0] = path_normalization(items[0]) if not is_pic(items[0]): continue full_path = osp.join(data_dir, items[0]) if not osp.exists(full_path): raise IOError( 'The image file {} is not exist!'.format(full_path)) self.file_list.append([full_path, int(items[1])]) self.num_samples = len(self.file_list) logging.info("{} samples in file {}".format(len(self.file_list), file_list))
def preparation_normlime(self, data_): self._lime = LIME(self.predict_fn, self.label_names, self.num_samples, self.batch_size) self._lime.preparation_lime(data_) image_show = read_image(data_) self.predicted_label = self._lime.predicted_label self.predicted_probability = self._lime.predicted_probability self.image = image_show[0] self.labels = self._lime.labels logging.info('performing NormLIME operations ...') cluster_labels = self.predict_cluster_labels( compute_features_for_kmeans(image_show).transpose((1, 2, 0)), self._lime.lime_interpreter.segments) g_weights = self.predict_using_normlime_weights( self.labels, cluster_labels) return g_weights
def visualize_segmentation(image, result, weight=0.6, save_dir='./', color=None): """ Convert segment result to color image, and save added image. Args: image: the path of origin image result: the predict result of image weight: the image weight of visual image, and the result weight is (1 - weight) save_dir: the directory for saving visual image color: the list of a BGR-mode color for each label. """ label_map = result['label_map'] color_map = get_color_map_list(256) if color is not None: for i in range(len(color) // 3): color_map[i] = color[i * 3:(i + 1) * 3] color_map = np.array(color_map).astype("uint8") # Use OpenCV LUT for color mapping c1 = cv2.LUT(label_map, color_map[:, 0]) c2 = cv2.LUT(label_map, color_map[:, 1]) c3 = cv2.LUT(label_map, color_map[:, 2]) pseudo_img = np.dstack((c1, c2, c3)) if isinstance(image, np.ndarray): im = image image_name = str(int(time.time() * 1000)) + '.jpg' if image.shape[2] != 3: logging.info( "The image is not 3-channel array, so predicted label map is shown as a pseudo color image." ) weight = 0. else: image_name = os.path.split(image)[-1] if not is_pic(image): logging.info( "The image cannot be opened by opencv, so predicted label map is shown as a pseudo color image." ) image_name = image_name.split('.')[0] + '.jpg' weight = 0. else: im = cv2.imread(image) if abs(weight) < 1e-5: vis_result = pseudo_img else: vis_result = cv2.addWeighted(im, weight, pseudo_img.astype(im.dtype), 1 - weight, 0) if save_dir is not None: if not os.path.exists(save_dir): os.makedirs(save_dir) out_path = os.path.join(save_dir, 'visualize_{}'.format(image_name)) cv2.imwrite(out_path, vis_result) logging.info('The visualized result is saved as {}'.format(out_path)) else: return vis_result
def __init__(self, data_dir, file_list, label_list, transforms=None, num_workers='auto', buffer_size=100, parallel_method='process', shuffle=False): super(SegDataset, self).__init__(transforms=transforms, num_workers=num_workers, buffer_size=buffer_size, parallel_method=parallel_method, shuffle=shuffle) self.file_list = list() self.labels = list() self._epoch = 0 with open(label_list, encoding=get_encoding(label_list)) as f: for line in f: item = line.strip() self.labels.append(item) with open(file_list, encoding=get_encoding(file_list)) as f: for line in f: items = line.strip().split() if not is_pic(items[0]): continue full_path_im = osp.join(data_dir, items[0]) full_path_label = osp.join(data_dir, items[1]) if not osp.exists(full_path_im): raise IOError( 'The image file {} is not exist!'.format(full_path_im)) if not osp.exists(full_path_label): raise IOError('The image file {} is not exist!'.format( full_path_label)) self.file_list.append([full_path_im, full_path_label]) self.num_samples = len(self.file_list) logging.info("{} samples in file {}".format(len(self.file_list), file_list))
def export_inference_model(self, save_dir): test_input_names = [ var.name for var in list(self.test_inputs.values()) ] test_outputs = list(self.test_outputs.values()) if self.__class__.__name__ == 'MaskRCNN': from paddlex.utils.save import save_mask_inference_model save_mask_inference_model(dirname=save_dir, executor=self.exe, params_filename='__params__', feeded_var_names=test_input_names, target_vars=test_outputs, main_program=self.test_prog) else: fluid.io.save_inference_model(dirname=save_dir, executor=self.exe, params_filename='__params__', feeded_var_names=test_input_names, target_vars=test_outputs, main_program=self.test_prog) model_info = self.get_model_info() model_info['status'] = 'Infer' # 保存模型输出的变量描述 model_info['_ModelInputsOutputs'] = dict() model_info['_ModelInputsOutputs']['test_inputs'] = [ [k, v.name] for k, v in self.test_inputs.items() ] model_info['_ModelInputsOutputs']['test_outputs'] = [ [k, v.name] for k, v in self.test_outputs.items() ] with open(osp.join(save_dir, 'model.yml'), encoding='utf-8', mode='w') as f: yaml.dump(model_info, f) # 模型保存成功的标志 open(osp.join(save_dir, '.success'), 'w').close() logging.info( "Model for inference deploy saved in {}.".format(save_dir))
def compute_normlime_weights(a_list_lime_fnames, save_dir, lime_num_samples): normlime_weights_all_labels = {} for f in a_list_lime_fnames: try: lime_weights_and_cluster = np.load(f, allow_pickle=True).item() lime_weights = lime_weights_and_cluster['lime_weights'] cluster = lime_weights_and_cluster['cluster'] except: logging.info('When loading precomputed LIME result, skipping' + str(f)) continue logging.info('Loading precomputed LIME result,' + str(f)) pred_labels = lime_weights.keys() for y in pred_labels: normlime_weights = normlime_weights_all_labels.get(y, {}) w_f_y = [abs(w[1]) for w in lime_weights[y]] w_f_y_l1norm = sum(w_f_y) for w in lime_weights[y]: seg_label = w[0] weight = w[1] * w[1] / w_f_y_l1norm a = normlime_weights.get(cluster[seg_label], []) a.append(weight) normlime_weights[cluster[seg_label]] = a normlime_weights_all_labels[y] = normlime_weights # compute normlime for y in normlime_weights_all_labels: normlime_weights = normlime_weights_all_labels.get(y, {}) for k in normlime_weights: normlime_weights[k] = sum(normlime_weights[k]) / len( normlime_weights[k]) # check normlime if len(normlime_weights_all_labels.keys()) < max( normlime_weights_all_labels.keys()) + 1: logging.info( "\n" + \ "Warning: !!! \n" + \ "There are at least {} classes, ".format(max(normlime_weights_all_labels.keys()) + 1) + \ "but the NormLIME has results of only {} classes. \n".format(len(normlime_weights_all_labels.keys())) + \ "It may have cause unstable results in the later computation" + \ " but can be improved by computing more test samples." + \ "\n" ) n = 0 f_out = 'normlime_weights_s{}_samples_{}-{}.npy'.format( lime_num_samples, len(a_list_lime_fnames), n) while os.path.exists(os.path.join(save_dir, f_out)): n += 1 f_out = 'normlime_weights_s{}_samples_{}-{}.npy'.format( lime_num_samples, len(a_list_lime_fnames), n) continue np.save(os.path.join(save_dir, f_out), normlime_weights_all_labels) return os.path.join(save_dir, f_out)
def _analyze_results(cocoGt, cocoDt, res_type, out_dir): directory = os.path.dirname(out_dir + '/') if not os.path.exists(directory): logging.info('-------------create {}-----------------'.format( out_dir)) os.makedirs(directory) imgIds = cocoGt.getImgIds() res_out_dir = out_dir + '/' + res_type + '/' res_directory = os.path.dirname(res_out_dir) if not os.path.exists(res_directory): logging.info('-------------create {}-----------------'.format( res_out_dir)) os.makedirs(res_directory) iou_type = res_type cocoEval = COCOeval( copy.deepcopy(cocoGt), copy.deepcopy(cocoDt), iou_type) cocoEval.params.imgIds = imgIds cocoEval.params.iouThrs = [.75, .5, .1] cocoEval.params.maxDets = [100] cocoEval.evaluate() cocoEval.accumulate() ps = cocoEval.eval['precision'] ps = np.vstack([ps, np.zeros((4, *ps.shape[1:]))]) catIds = cocoGt.getCatIds() recThrs = cocoEval.params.recThrs thread_num = mp.cpu_count() if mp.cpu_count() < 8 else 8 thread_pool = mp.pool.ThreadPool(thread_num) args = [(k, cocoDt, cocoGt, catId, iou_type) for k, catId in enumerate(catIds)] analyze_results = thread_pool.starmap(analyze_individual_category, args) for k, catId in enumerate(catIds): nm = cocoGt.loadCats(catId)[0] logging.info('--------------saving {}-{}---------------'.format( k + 1, nm['name'])) analyze_result = analyze_results[k] assert k == analyze_result[0], "" ps_supercategory = analyze_result[1]['ps_supercategory'] ps_allcategory = analyze_result[1]['ps_allcategory'] # compute precision but ignore superclass confusion ps[3, :, k, :, :] = ps_supercategory # compute precision but ignore any class confusion ps[4, :, k, :, :] = ps_allcategory # fill in background and false negative errors and plot T, _, _, A, _ = ps.shape for t in range(T): for a in range(A): if np.sum(ps[t, :, k, a, :] == -1) != len(ps[t, :, k, :, :]): ps[t, :, k, a, :][ps[t, :, k, a, :] == -1] = 0 ps[5, :, k, :, :] = (ps[4, :, k, :, :] > 0) ps[6, :, k, :, :] = 1.0 makeplot(recThrs, ps[:, :, k], res_out_dir, nm['name'], iou_type) makeplot(recThrs, ps, res_out_dir, 'allclass', iou_type)
def paste_objects(templates, background, save_dir='dataset_clone'): """将目标物体粘贴在背景图片上生成新的图片,并加入到数据集中 Args: templates (list|tuple):可以将多张图像上的目标物体同时粘贴在同一个背景图片上, 因此templates是一个列表,其中每个元素是一个dict,表示一张图片的目标物体。 一张图片的目标物体有`image`和`annos`两个关键字,`image`的键值是图像的路径, 或者是解码后的排列格式为(H, W, C)且类型为uint8且为BGR格式的数组。 图像上可以有多个目标物体,因此`annos`的键值是一个列表,列表中每个元素是一个dict, 表示一个目标物体的信息。该dict包含`polygon`和`category`两个关键字, 其中`polygon`表示目标物体的边缘坐标,例如[[0, 0], [0, 1], [1, 1], [1, 0]], `category`表示目标物体的类别,例如'dog'。 background (dict): 背景图片可以有真值,因此background是一个dict,包含`image`和`annos` 两个关键字,`image`的键值是背景图像的路径,或者是解码后的排列格式为(H, W, C) 且类型为uint8且为BGR格式的数组。若背景图片上没有真值,则`annos`的键值是空列表[], 若有,则`annos`的键值是由多个dict组成的列表,每个dict表示一个物体的信息, 包含`bbox`和`category`两个关键字,`bbox`的键值是物体框左上角和右下角的坐标,即 [x1, y1, x2, y2],`category`表示目标物体的类别,例如'dog'。 save_dir (str):新图片及其标注文件的存储目录。默认值为`dataset_clone`。 """ if not osp.exists(save_dir): os.makedirs(save_dir) image_dir = osp.join(save_dir, 'JPEGImages_clone') anno_dir = osp.join(save_dir, 'Annotations_clone') json_path = osp.join(save_dir, "annotations.json") if not osp.exists(image_dir): os.makedirs(image_dir) if not osp.exists(anno_dir): os.makedirs(anno_dir) num_objs = len(background['annos']) for temp in templates: num_objs += len(temp['annos']) gt_bbox = np.zeros((num_objs, 4), dtype=np.float32) gt_class = list() gt_score = np.ones((num_objs, 1), dtype=np.float32) is_crowd = np.zeros((num_objs, 1), dtype=np.int32) difficult = np.zeros((num_objs, 1), dtype=np.int32) i = -1 for i, back_anno in enumerate(background['annos']): gt_bbox[i] = back_anno['bbox'] gt_class.append(back_anno['category']) back_im = background['image'] if isinstance(back_im, np.ndarray): if len(back_im.shape) != 3: raise Exception( "background image should be 3-dimensions, but now is {}-dimensions" .format(len(back_im.shape))) else: try: back_im = cv2.imread(back_im, cv2.IMREAD_UNCHANGED) except: raise TypeError('Can\'t read The image file {}!'.format(back_im)) back_annos = background['annos'] im_h, im_w, im_c = back_im.shape for temp in templates: temp_im = temp['image'] if isinstance(temp_im, np.ndarray): if len(temp_im.shape) != 3: raise Exception( "template image should be 3-dimensions, but now is {}-dimensions" .format(len(temp_im.shape))) else: try: temp_im = cv2.imread(temp_im, cv2.IMREAD_UNCHANGED) except: raise TypeError( 'Can\'t read The image file {}!'.format(temp_im)) if im_c != temp_im.shape[-1]: raise Exception( "The channels of template({}) and background({}) images are not same. Objects cannot be pasted normally! Please check your images." .format(temp_im.shape[-1], im_c)) temp_annos = temp['annos'] for temp_anno in temp_annos: temp_mask = np.zeros(temp_im.shape, temp_im.dtype) temp_poly = np.array(temp_anno['polygon'], np.int32) temp_category = temp_anno['category'] cv2.fillPoly(temp_mask, [temp_poly], (255, 255, 255)) x_list = [temp_poly[i][0] for i in range(len(temp_poly))] y_list = [temp_poly[i][1] for i in range(len(temp_poly))] temp_poly_w = max(x_list) - min(x_list) temp_poly_h = max(y_list) - min(y_list) found = False while not found: center_x = random.randint(1, im_w - 1) center_y = random.randint(1, im_h - 1) if center_x < temp_poly_w / 2 or center_x > im_w - temp_poly_w / 2 - 1 or \ center_y < temp_poly_h / 2 or center_y > im_h - temp_poly_h / 2 - 1: found = False continue if len(back_annos) == 0: found = True for back_anno in back_annos: x1, y1, x2, y2 = back_anno['bbox'] if center_x > x1 and center_x < x2 and center_y > y1 and center_y < y2: found = False continue found = True center = (center_x, center_y) back_im = cv2.seamlessClone(temp_im, back_im, temp_mask, center, cv2.MIXED_CLONE) i += 1 x1 = center[0] - temp_poly_w / 2 x2 = center[0] + temp_poly_w / 2 y1 = center[1] - temp_poly_h / 2 y2 = center[1] + temp_poly_h / 2 gt_bbox[i] = [x1, y1, x2, y2] gt_class.append(temp_category) im_fname = str(int(time.time() * 1000)) + '.jpg' im_info = { 'file_name': im_fname, 'image_shape': [im_h, im_w, im_c], } label_info = { 'is_crowd': is_crowd, 'gt_class': gt_class, 'gt_bbox': gt_bbox, 'gt_score': gt_score, 'difficult': difficult, 'gt_poly': [], } cv2.imwrite(osp.join(image_dir, im_fname), back_im.astype('uint8')) write_xml(im_info, label_info, anno_dir) logging.info("Gegerated image is saved in {}".format(image_dir)) logging.info( "Generated Annotation is saved as xml files in {}".format(anno_dir))
def __init__(self, data_dir, file_list, label_list, transforms=None, num_workers='auto', buffer_size=100, parallel_method='process', shuffle=False): from pycocotools.coco import COCO super(VOCDetection, self).__init__(transforms=transforms, num_workers=num_workers, buffer_size=buffer_size, parallel_method=parallel_method, shuffle=shuffle) self.file_list = list() self.labels = list() self._epoch = 0 annotations = {} annotations['images'] = [] annotations['categories'] = [] annotations['annotations'] = [] cname2cid = OrderedDict() label_id = 1 with open(label_list, 'r', encoding=get_encoding(label_list)) as fr: for line in fr.readlines(): cname2cid[line.strip()] = label_id label_id += 1 self.labels.append(line.strip()) logging.info("Starting to read file list from dataset...") for k, v in cname2cid.items(): annotations['categories'].append({ 'supercategory': 'component', 'id': v, 'name': k }) ct = 0 ann_ct = 0 with open(file_list, 'r', encoding=get_encoding(file_list)) as fr: while True: line = fr.readline() if not line: break img_file, xml_file = [osp.join(data_dir, x) \ for x in line.strip().split()[:2]] if not is_pic(img_file): continue if not osp.isfile(xml_file): continue if not osp.exists(img_file): raise IOError( 'The image file {} is not exist!'.format(img_file)) tree = ET.parse(xml_file) if tree.find('id') is None: im_id = np.array([ct]) else: ct = int(tree.find('id').text) im_id = np.array([int(tree.find('id').text)]) pattern = re.compile('<object>', re.IGNORECASE) obj_tag = pattern.findall(str(ET.tostringlist( tree.getroot())))[0][1:-1] objs = tree.findall(obj_tag) pattern = re.compile('<size>', re.IGNORECASE) size_tag = pattern.findall(str(ET.tostringlist( tree.getroot())))[0][1:-1] size_element = tree.find(size_tag) pattern = re.compile('<width>', re.IGNORECASE) width_tag = pattern.findall(str( ET.tostringlist(size_element)))[0][1:-1] im_w = float(size_element.find(width_tag).text) pattern = re.compile('<height>', re.IGNORECASE) height_tag = pattern.findall(str( ET.tostringlist(size_element)))[0][1:-1] im_h = float(size_element.find(height_tag).text) gt_bbox = np.zeros((len(objs), 4), dtype=np.float32) gt_class = np.zeros((len(objs), 1), dtype=np.int32) gt_score = np.ones((len(objs), 1), dtype=np.float32) is_crowd = np.zeros((len(objs), 1), dtype=np.int32) difficult = np.zeros((len(objs), 1), dtype=np.int32) for i, obj in enumerate(objs): pattern = re.compile('<name>', re.IGNORECASE) name_tag = pattern.findall(str( ET.tostringlist(obj)))[0][1:-1] cname = obj.find(name_tag).text.strip() gt_class[i][0] = cname2cid[cname] pattern = re.compile('<difficult>', re.IGNORECASE) diff_tag = pattern.findall(str( ET.tostringlist(obj)))[0][1:-1] try: _difficult = int(obj.find(diff_tag).text) except Exception: _difficult = 0 pattern = re.compile('<bndbox>', re.IGNORECASE) box_tag = pattern.findall(str( ET.tostringlist(obj)))[0][1:-1] box_element = obj.find(box_tag) pattern = re.compile('<xmin>', re.IGNORECASE) xmin_tag = pattern.findall( str(ET.tostringlist(box_element)))[0][1:-1] x1 = float(box_element.find(xmin_tag).text) pattern = re.compile('<ymin>', re.IGNORECASE) ymin_tag = pattern.findall( str(ET.tostringlist(box_element)))[0][1:-1] y1 = float(box_element.find(ymin_tag).text) pattern = re.compile('<xmax>', re.IGNORECASE) xmax_tag = pattern.findall( str(ET.tostringlist(box_element)))[0][1:-1] x2 = float(box_element.find(xmax_tag).text) pattern = re.compile('<ymax>', re.IGNORECASE) ymax_tag = pattern.findall( str(ET.tostringlist(box_element)))[0][1:-1] y2 = float(box_element.find(ymax_tag).text) x1 = max(0, x1) y1 = max(0, y1) if im_w > 0.5 and im_h > 0.5: x2 = min(im_w - 1, x2) y2 = min(im_h - 1, y2) gt_bbox[i] = [x1, y1, x2, y2] is_crowd[i][0] = 0 difficult[i][0] = _difficult annotations['annotations'].append({ 'iscrowd': 0, 'image_id': int(im_id[0]), 'bbox': [x1, y1, x2 - x1 + 1, y2 - y1 + 1], 'area': float((x2 - x1 + 1) * (y2 - y1 + 1)), 'category_id': cname2cid[cname], 'id': ann_ct, 'difficult': _difficult }) ann_ct += 1 im_info = { 'im_id': im_id, 'image_shape': np.array([im_h, im_w]).astype('int32'), } label_info = { 'is_crowd': is_crowd, 'gt_class': gt_class, 'gt_bbox': gt_bbox, 'gt_score': gt_score, 'difficult': difficult } voc_rec = (im_info, label_info) if len(objs) != 0: self.file_list.append([img_file, voc_rec]) ct += 1 annotations['images'].append({ 'height': im_h, 'width': im_w, 'id': int(im_id[0]), 'file_name': osp.split(img_file)[1] }) if not len(self.file_list) > 0: raise Exception('not found any voc record in %s' % (file_list)) logging.info("{} samples in file {}".format(len(self.file_list), file_list)) self.num_samples = len(self.file_list) self.coco_gt = COCO() self.coco_gt.dataset = annotations self.coco_gt.createIndex()
def evaluate(self, eval_dataset, batch_size=1, epoch_id=None, metric=None, return_details=False): """评估。 Args: eval_dataset (paddlex.datasets): 验证数据读取器。 batch_size (int): 验证数据批大小。默认为1。当前只支持设置为1。 epoch_id (int): 当前评估模型所在的训练轮数。 metric (bool): 训练过程中评估的方式,取值范围为['COCO', 'VOC']。默认为None, 根据用户传入的Dataset自动选择,如为VOCDetection,则metric为'VOC'; 如为COCODetection,则metric为'COCO'。 return_details (bool): 是否返回详细信息。默认值为False。 Returns: tuple (metrics, eval_details) /dict (metrics): 当return_details为True时,返回(metrics, eval_details), 当return_details为False时,返回metrics。metrics为dict,包含关键字:'bbox_mmap'或者’bbox_map‘, 分别表示平均准确率平均值在各个阈值下的结果取平均值的结果(mmAP)、平均准确率平均值(mAP)。 eval_details为dict,包含关键字:'bbox',对应元素预测结果列表,每个预测结果由图像id、 预测框类别id、预测框坐标、预测框得分;’gt‘:真实标注框相关信息。 """ self.arrange_transforms(transforms=eval_dataset.transforms, mode='eval') if metric is None: if hasattr(self, 'metric') and self.metric is not None: metric = self.metric else: if isinstance(eval_dataset, paddlex.datasets.CocoDetection): metric = 'COCO' elif isinstance(eval_dataset, paddlex.datasets.VOCDetection): metric = 'VOC' else: raise Exception( "eval_dataset should be datasets.VOCDetection or datasets.COCODetection." ) assert metric in ['COCO', 'VOC'], "Metric only support 'VOC' or 'COCO'" if batch_size > 1: batch_size = 1 logging.warning( "Faster RCNN supports batch_size=1 only during evaluating, so batch_size is forced to be set to 1." ) dataset = eval_dataset.generator(batch_size=batch_size, drop_last=False) total_steps = math.ceil(eval_dataset.num_samples * 1.0 / batch_size) results = list() logging.info( "Start to evaluating(total_samples={}, total_steps={})...".format( eval_dataset.num_samples, total_steps)) for step, data in tqdm.tqdm(enumerate(dataset()), total=total_steps): images = np.array([d[0] for d in data]).astype('float32') im_infos = np.array([d[1] for d in data]).astype('float32') im_shapes = np.array([d[3] for d in data]).astype('float32') feed_data = { 'image': images, 'im_info': im_infos, 'im_shape': im_shapes, } outputs = self.exe.run(self.test_prog, feed=[feed_data], fetch_list=list(self.test_outputs.values()), return_numpy=False) res = { 'bbox': (np.array(outputs[0]), outputs[0].recursive_sequence_lengths()) } res_im_id = [d[2] for d in data] res['im_info'] = (im_infos, []) res['im_shape'] = (im_shapes, []) res['im_id'] = (np.array(res_im_id), []) if metric == 'VOC': res_gt_box = [] res_gt_label = [] res_is_difficult = [] for d in data: res_gt_box.extend(d[4]) res_gt_label.extend(d[5]) res_is_difficult.extend(d[6]) res_gt_box_lod = [d[4].shape[0] for d in data] res_gt_label_lod = [d[5].shape[0] for d in data] res_is_difficult_lod = [d[6].shape[0] for d in data] res['gt_box'] = (np.array(res_gt_box), [res_gt_box_lod]) res['gt_label'] = (np.array(res_gt_label), [res_gt_label_lod]) res['is_difficult'] = (np.array(res_is_difficult), [res_is_difficult_lod]) results.append(res) logging.debug("[EVAL] Epoch={}, Step={}/{}".format( epoch_id, step + 1, total_steps)) box_ap_stats, eval_details = eval_results(results, metric, eval_dataset.coco_gt, with_background=True) metrics = OrderedDict( zip(['bbox_mmap' if metric == 'COCO' else 'bbox_map'], box_ap_stats)) if return_details: return metrics, eval_details return metrics
def __init__(self, data_dir, file_list, label_list, transforms=None, num_workers='auto', buffer_size=100, parallel_method='process', shuffle=False): super(EasyDataSeg, self).__init__( transforms=transforms, num_workers=num_workers, buffer_size=buffer_size, parallel_method=parallel_method, shuffle=shuffle) self.file_list = list() self.labels = list() self._epoch = 0 from pycocotools.mask import decode cname2cid = {} label_id = 0 with open(label_list, encoding=get_encoding(label_list)) as fr: for line in fr.readlines(): cname2cid[line.strip()] = label_id label_id += 1 self.labels.append(line.strip()) with open(file_list, encoding=get_encoding(file_list)) as f: for line in f: img_file, json_file = [osp.join(data_dir, x) \ for x in line.strip().split()[:2]] if not is_pic(img_file): continue if not osp.isfile(json_file): continue if not osp.exists(img_file): raise IOError( 'The image file {} is not exist!'.format(img_file)) with open(json_file, mode='r', \ encoding=get_encoding(json_file)) as j: json_info = json.load(j) im = cv2.imread(img_file) im_w = im.shape[1] im_h = im.shape[0] objs = json_info['labels'] lable_npy = np.zeros([im_h, im_w]).astype('uint8') for i, obj in enumerate(objs): cname = obj['name'] cid = cname2cid[cname] mask_dict = {} mask_dict['size'] = [im_h, im_w] mask_dict['counts'] = obj['mask'].encode() mask = decode(mask_dict) mask *= cid conflict_index = np.where(((lable_npy > 0) & (mask == cid)) == True) mask[conflict_index] = 0 lable_npy += mask self.file_list.append([img_file, lable_npy]) self.num_samples = len(self.file_list) logging.info("{} samples in file {}".format( len(self.file_list), file_list))
def cal_clipped_mean_std(self, clip_min_value, clip_max_value, data_info_file): if not osp.exists(data_info_file): raise Exception("Dataset information file {} does not exist.". format(data_info_file)) with open(data_info_file, 'rb') as f: im_info = pickle.load(f) channel_num = im_info['channel_num'] min_im_value = im_info['min_image_value'] max_im_value = im_info['max_image_value'] im_pixel_info = im_info['image_pixel'] if len(clip_min_value) != channel_num or len( clip_max_value) != channel_num: raise Exception( "The length of clip_min_value or clip_max_value should be equal to the number of image channel {}." .format(channle_num)) for c in range(channel_num): if clip_min_value[c] < min_im_value[c] or clip_min_value[ c] > max_im_value[c]: raise Exception( "Clip_min_value of the channel {} is not in [{}, {}]". format(c, min_im_value[c], max_im_value[c])) if clip_max_value[c] < min_im_value[c] or clip_max_value[ c] > max_im_value[c]: raise Exception( "Clip_max_value of the channel {} is not in [{}, {}]". format(c, min_im_value[c], self.max_im_value[c])) self.clipped_im_mean_list = [[] for i in range(len(self.file_list))] self.clipped_im_std_list = [[] for i in range(len(self.file_list))] num_workers = mp.cpu_count() // 2 if mp.cpu_count() // 2 < 8 else 8 threads = [] one_worker_file = len(self.file_list) // num_workers self.channel_num = channel_num for i in range(num_workers): start = one_worker_file * i end = one_worker_file * ( i + 1) if i < num_workers - 1 else len(self.file_list) t = threading.Thread( target=self._get_clipped_mean_std, args=(start, end, clip_min_value, clip_max_value)) threads.append(t) for t in threads: t.start() for t in threads: t.join() im_mean = np.asarray(self.clipped_im_mean_list) im_mean = im_mean.sum(axis=0) im_mean = im_mean / len(self.file_list) im_std = np.asarray(self.clipped_im_std_list) im_std = im_std.sum(axis=0) im_std = im_std / len(self.file_list) for c in range(channel_num): clip_pixel_num = 0 pixel_num = sum(im_pixel_info[c].values()) for v, n in im_pixel_info[c].items(): if v < clip_min_value[c] or v > clip_max_value[c]: clip_pixel_num += n logging.info("Channel {}, the ratio of pixels to be clipped = {}". format(c, clip_pixel_num / pixel_num)) logging.info( "Image mean value: {} Image standard deviation: {} (normalized by (clip_max_value - clip_min_value), arranged in 0-{} channel order).\n". format(im_mean, im_std, self.channel_num))
def analysis(self): self.im_mean_list = [[] for i in range(len(self.file_list))] self.im_std_list = [[] for i in range(len(self.file_list))] self.im_value_list = [[] for i in range(len(self.file_list))] self.im_value_num_list = [[] for i in range(len(self.file_list))] self.im_height_list = np.zeros(len(self.file_list), dtype='int64') self.im_width_list = np.zeros(len(self.file_list), dtype='int64') self.im_channel_list = np.zeros(len(self.file_list), dtype='int64') self.label_value_list = [[] for i in range(len(self.file_list))] self.label_value_num_list = [[] for i in range(len(self.file_list))] num_workers = mp.cpu_count() // 2 if mp.cpu_count() // 2 < 8 else 8 threads = [] one_worker_file = len(self.file_list) // num_workers for i in range(num_workers): start = one_worker_file * i end = one_worker_file * ( i + 1) if i < num_workers - 1 else len(self.file_list) t = threading.Thread(target=self._get_image_info, args=(start, end)) threads.append(t) for t in threads: t.start() for t in threads: t.join() unique, counts = np.unique(self.im_channel_list, return_counts=True) if len(unique) > 1: raise Exception("There are {} kinds of image channels: {}.".format( len(unique), unique[:])) self.channel_num = unique[0] shape_info = self._get_shape() self.max_height = shape_info['max_height'] self.max_width = shape_info['max_width'] self.min_height = shape_info['min_height'] self.min_width = shape_info['min_width'] self.label_pixel_info = self._get_label_pixel_info() self.im_pixel_info = self._get_image_pixel_info() mode = osp.split(self.file_list_path)[-1].split('.')[0] import matplotlib.pyplot as plt for c in range(self.channel_num): plt.figure() plt.bar(self.im_pixel_info[c].keys(), self.im_pixel_info[c].values(), width=1, log=True) plt.xlabel('image pixel value') plt.ylabel('number') plt.title('channel={}'.format(c)) plt.savefig( osp.join(self.data_dir, '{}_channel{}_distribute.png'.format(mode, c)), dpi=100) plt.close() max_im_value = list() min_im_value = list() for c in range(self.channel_num): max_im_value.append(max(self.im_pixel_info[c].keys())) min_im_value.append(min(self.im_pixel_info[c].keys())) self.max_im_value = np.asarray(max_im_value) self.min_im_value = np.asarray(min_im_value) im_mean, im_std = self._get_mean_std() info = { 'channel_num': self.channel_num, 'image_pixel': self.im_pixel_info, 'label_pixel': self.label_pixel_info, 'file_num': len(self.file_list), 'max_height': self.max_height, 'max_width': self.max_width, 'min_height': self.min_height, 'min_width': self.min_width, 'max_image_value': self.max_im_value, 'min_image_value': self.min_im_value } saved_pkl_file = osp.join(self.data_dir, '{}_infomation.pkl'.format(mode)) with open(osp.join(saved_pkl_file), 'wb') as f: pickle.dump(info, f) logging.info( "############## The analysis results are as follows ##############\n" ) logging.info("{} samples in file {}\n".format( len(self.file_list), self.file_list_path)) logging.info("Minimal image height: {} Minimal image width: {}.\n". format(self.min_height, self.min_width)) logging.info("Maximal image height: {} Maximal image width: {}.\n". format(self.max_height, self.max_width)) logging.info("Image channel is {}.\n".format(self.channel_num)) logging.info( "Minimal image value: {} Maximal image value: {} (arranged in 0-{} channel order) \n". format(self.min_im_value, self.max_im_value, self.channel_num)) logging.info( "Image pixel distribution of each channel is saved with 'distribute.png' in the {}" .format(self.data_dir)) logging.info( "Image mean value: {} Image standard deviation: {} (normalized by the (max_im_value - min_im_value), arranged in 0-{} channel order).\n". format(im_mean, im_std, self.channel_num)) logging.info( "Label pixel information is shown in a format of (label_id, the number of label_id, the ratio of label_id):" ) for v, (n, r) in self.label_pixel_info.items(): logging.info("({}, {}, {})".format(v, n, r)) logging.info("Dataset information is saved in {}".format( saved_pkl_file))
def train_loop(self, num_epochs, train_dataset, train_batch_size, eval_dataset=None, save_interval_epochs=1, log_interval_steps=10, save_dir='output', use_vdl=False, early_stop=False, early_stop_patience=5): if train_dataset.num_samples < train_batch_size: raise Exception( 'The amount of training datset must be larger than batch size.' ) if not osp.isdir(save_dir): if osp.exists(save_dir): os.remove(save_dir) os.makedirs(save_dir) if use_vdl: from visualdl import LogWriter vdl_logdir = osp.join(save_dir, 'vdl_log') # 给transform添加arrange操作 self.arrange_transforms(transforms=train_dataset.transforms, mode='train') # 构建train_data_loader self.build_train_data_loader(dataset=train_dataset, batch_size=train_batch_size) if eval_dataset is not None: self.eval_transforms = eval_dataset.transforms self.test_transforms = copy.deepcopy(eval_dataset.transforms) # 获取实时变化的learning rate lr = self.optimizer._learning_rate if isinstance(lr, fluid.framework.Variable): self.train_outputs['lr'] = lr # 在多卡上跑训练 if self.parallel_train_prog is None: build_strategy = fluid.compiler.BuildStrategy() build_strategy.fuse_all_optimizer_ops = False if paddlex.env_info['place'] != 'cpu' and len(self.places) > 1: build_strategy.sync_batch_norm = self.sync_bn exec_strategy = fluid.ExecutionStrategy() exec_strategy.num_iteration_per_drop_scope = 1 self.parallel_train_prog = fluid.CompiledProgram( self.train_prog).with_data_parallel( loss_name=self.train_outputs['loss'].name, build_strategy=build_strategy, exec_strategy=exec_strategy) total_num_steps = math.floor(train_dataset.num_samples / train_batch_size) num_steps = 0 time_stat = list() time_train_one_epoch = None time_eval_one_epoch = None total_num_steps_eval = 0 # 模型总共的评估次数 total_eval_times = math.ceil(num_epochs / save_interval_epochs) # 检测目前仅支持单卡评估,训练数据batch大小与显卡数量之商为验证数据batch大小。 eval_batch_size = train_batch_size if self.model_type == 'detector': eval_batch_size = self._get_single_card_bs(train_batch_size) if eval_dataset is not None: total_num_steps_eval = math.ceil(eval_dataset.num_samples / eval_batch_size) if use_vdl: # VisualDL component log_writer = LogWriter(vdl_logdir) thresh = 0.0001 if early_stop: earlystop = EarlyStop(early_stop_patience, thresh) best_accuracy_key = "" best_accuracy = -1.0 best_model_epoch = -1 start_epoch = self.completed_epochs for i in range(start_epoch, num_epochs): records = list() step_start_time = time.time() epoch_start_time = time.time() for step, data in enumerate(self.train_data_loader()): outputs = self.exe.run(self.parallel_train_prog, feed=data, fetch_list=list( self.train_outputs.values())) outputs_avg = np.mean(np.array(outputs), axis=1) records.append(outputs_avg) # 训练完成剩余时间预估 current_time = time.time() step_cost_time = current_time - step_start_time step_start_time = current_time if len(time_stat) < 20: time_stat.append(step_cost_time) else: time_stat[num_steps % 20] = step_cost_time # 每间隔log_interval_steps,输出loss信息 num_steps += 1 if num_steps % log_interval_steps == 0: step_metrics = OrderedDict( zip(list(self.train_outputs.keys()), outputs_avg)) if use_vdl: for k, v in step_metrics.items(): log_writer.add_scalar( 'Metrics/Training(Step): {}'.format(k), v, num_steps) # 估算剩余时间 avg_step_time = np.mean(time_stat) if time_train_one_epoch is not None: eta = (num_epochs - i - 1) * time_train_one_epoch + ( total_num_steps - step - 1) * avg_step_time else: eta = ((num_epochs - i) * total_num_steps - step - 1) * avg_step_time if time_eval_one_epoch is not None: eval_eta = (total_eval_times - i // save_interval_epochs) * time_eval_one_epoch else: eval_eta = (total_eval_times - i // save_interval_epochs ) * total_num_steps_eval * avg_step_time eta_str = seconds_to_hms(eta + eval_eta) logging.info( "[TRAIN] Epoch={}/{}, Step={}/{}, {}, time_each_step={}s, eta={}" .format(i + 1, num_epochs, step + 1, total_num_steps, dict2str(step_metrics), round(avg_step_time, 2), eta_str)) train_metrics = OrderedDict( zip(list(self.train_outputs.keys()), np.mean(records, axis=0))) logging.info('[TRAIN] Epoch {} finished, {} .'.format( i + 1, dict2str(train_metrics))) time_train_one_epoch = time.time() - epoch_start_time epoch_start_time = time.time() # 每间隔save_interval_epochs, 在验证集上评估和对模型进行保存 self.completed_epochs += 1 eval_epoch_start_time = time.time() if (i + 1) % save_interval_epochs == 0 or i == num_epochs - 1: current_save_dir = osp.join(save_dir, "epoch_{}".format(i + 1)) if not osp.isdir(current_save_dir): os.makedirs(current_save_dir) if eval_dataset is not None and eval_dataset.num_samples > 0: self.eval_metrics, self.eval_details = self.evaluate( eval_dataset=eval_dataset, batch_size=eval_batch_size, epoch_id=i + 1, return_details=True) logging.info('[EVAL] Finished, Epoch={}, {} .'.format( i + 1, dict2str(self.eval_metrics))) # 保存最优模型 best_accuracy_key = list(self.eval_metrics.keys())[0] current_accuracy = self.eval_metrics[best_accuracy_key] if current_accuracy > best_accuracy: best_accuracy = current_accuracy best_model_epoch = i + 1 best_model_dir = osp.join(save_dir, "best_model") self.save_model(save_dir=best_model_dir) if use_vdl: for k, v in self.eval_metrics.items(): if isinstance(v, list): continue if isinstance(v, np.ndarray): if v.size > 1: continue log_writer.add_scalar( "Metrics/Eval(Epoch): {}".format(k), v, i + 1) self.save_model(save_dir=current_save_dir) time_eval_one_epoch = time.time() - eval_epoch_start_time eval_epoch_start_time = time.time() if best_model_epoch > 0: logging.info( 'Current evaluated best model in eval_dataset is epoch_{}, {}={}' .format(best_model_epoch, best_accuracy_key, best_accuracy)) if eval_dataset is not None and early_stop: if earlystop(current_accuracy): break
def net_initialize(self, startup_prog=None, pretrain_weights=None, fuse_bn=False, save_dir='.', sensitivities_file=None, eval_metric_loss=0.05, resume_checkpoint=None): if not resume_checkpoint: pretrain_dir = osp.join(save_dir, 'pretrain') if not os.path.isdir(pretrain_dir): if os.path.exists(pretrain_dir): os.remove(pretrain_dir) os.makedirs(pretrain_dir) if pretrain_weights is not None and not os.path.exists( pretrain_weights): if self.model_type == 'classifier': if pretrain_weights not in ['IMAGENET']: logging.warning( "Pretrain_weights for classifier should be defined as directory path or parameter file or 'IMAGENET' or None, but it is {}, so we force to set it as 'IMAGENET'" .format(pretrain_weights)) pretrain_weights = 'IMAGENET' elif self.model_type == 'detector': if pretrain_weights not in ['IMAGENET', 'COCO']: logging.warning( "Pretrain_weights for detector should be defined as directory path or parameter file or 'IMAGENET' or 'COCO' or None, but it is {}, so we force to set it as 'IMAGENET'" .format(pretrain_weights)) pretrain_weights = 'IMAGENET' elif self.model_type == 'segmenter': if pretrain_weights not in [ 'IMAGENET', 'COCO', 'CITYSCAPES' ]: logging.warning( "Pretrain_weights for segmenter should be defined as directory path or parameter file or 'IMAGENET' or 'COCO' or 'CITYSCAPES', but it is {}, so we force to set it as 'IMAGENET'" .format(pretrain_weights)) pretrain_weights = 'IMAGENET' if hasattr(self, 'backbone'): backbone = self.backbone else: backbone = self.__class__.__name__ if backbone == "HRNet": backbone = backbone + "_W{}".format(self.width) class_name = self.__class__.__name__ pretrain_weights = get_pretrain_weights(pretrain_weights, class_name, backbone, pretrain_dir) if startup_prog is None: startup_prog = fluid.default_startup_program() self.exe.run(startup_prog) if resume_checkpoint: logging.info( "Resume checkpoint from {}.".format(resume_checkpoint), use_color=True) paddlex.utils.utils.load_pretrain_weights(self.exe, self.train_prog, resume_checkpoint, resume=True) if not osp.exists(osp.join(resume_checkpoint, "model.yml")): raise Exception( "There's not model.yml in {}".format(resume_checkpoint)) with open(osp.join(resume_checkpoint, "model.yml")) as f: info = yaml.load(f.read(), Loader=yaml.Loader) self.completed_epochs = info['completed_epochs'] elif pretrain_weights is not None: logging.info( "Load pretrain weights from {}.".format(pretrain_weights), use_color=True) paddlex.utils.utils.load_pretrain_weights(self.exe, self.train_prog, pretrain_weights, fuse_bn) # 进行裁剪 if sensitivities_file is not None: import paddleslim from .slim.prune_config import get_sensitivities sensitivities_file = get_sensitivities(sensitivities_file, self, save_dir) from .slim.prune import get_params_ratios, prune_program logging.info( "Start to prune program with eval_metric_loss = {}".format( eval_metric_loss), use_color=True) origin_flops = paddleslim.analysis.flops(self.test_prog) prune_params_ratios = get_params_ratios( sensitivities_file, eval_metric_loss=eval_metric_loss) prune_program(self, prune_params_ratios) current_flops = paddleslim.analysis.flops(self.test_prog) remaining_ratio = current_flops / origin_flops logging.info( "Finish prune program, before FLOPs:{}, after prune FLOPs:{}, remaining ratio:{}" .format(origin_flops, current_flops, remaining_ratio), use_color=True) self.status = 'Prune'
def evaluate(self, eval_dataset, batch_size=1, epoch_id=None, metric=None, return_details=False): """评估。 Args: eval_dataset (paddlex.datasets): 验证数据读取器。 batch_size (int): 验证数据批大小。默认为1。当前只支持设置为1。 epoch_id (int): 当前评估模型所在的训练轮数。 metric (bool): 训练过程中评估的方式,取值范围为['COCO', 'VOC']。默认为None, 根据用户传入的Dataset自动选择,如为VOCDetection,则metric为'VOC'; 如为COCODetection,则metric为'COCO'。 return_details (bool): 是否返回详细信息。默认值为False。 Returns: tuple (metrics, eval_details) /dict (metrics): 当return_details为True时,返回(metrics, eval_details), 当return_details为False时,返回metrics。metrics为dict,包含关键字:'bbox_mmap'和'segm_mmap' 或者’bbox_map‘和'segm_map',分别表示预测框和分割区域平均准确率平均值在 各个IoU阈值下的结果取平均值的结果(mmAP)、平均准确率平均值(mAP)。eval_details为dict, 包含bbox、mask和gt三个关键字。其中关键字bbox的键值是一个列表,列表中每个元素代表一个预测结果, 一个预测结果是一个由图像id,预测框类别id, 预测框坐标,预测框得分组成的列表。 关键字mask的键值是一个列表,列表中每个元素代表各预测框内物体的分割结果,分割结果由图像id、 预测框类别id、表示预测框内各像素点是否属于物体的二值图、预测框得分。 而关键字gt的键值是真实标注框的相关信息。 """ input_channel = getattr(self, 'input_channel', 3) arrange_transforms(model_type=self.model_type, class_name=self.__class__.__name__, transforms=eval_dataset.transforms, mode='eval', input_channel=input_channel) if metric is None: if hasattr(self, 'metric') and self.metric is not None: metric = self.metric else: if isinstance(eval_dataset, paddlex.datasets.CocoDetection): metric = 'COCO' else: raise Exception( "eval_dataset should be datasets.COCODetection.") assert metric in ['COCO', 'VOC'], "Metric only support 'VOC' or 'COCO'" if batch_size > 1: batch_size = 1 logging.warning( "Mask RCNN supports batch_size=1 only during evaluating, so batch_size is forced to be set to 1." ) data_generator = eval_dataset.generator(batch_size=batch_size, drop_last=False) total_steps = math.ceil(eval_dataset.num_samples * 1.0 / batch_size) results = list() logging.info( "Start to evaluating(total_samples={}, total_steps={})...".format( eval_dataset.num_samples, total_steps)) for step, data in tqdm.tqdm(enumerate(data_generator()), total=total_steps): images = np.array([d[0] for d in data]).astype('float32') im_infos = np.array([d[1] for d in data]).astype('float32') im_shapes = np.array([d[3] for d in data]).astype('float32') feed_data = { 'image': images, 'im_info': im_infos, 'im_shape': im_shapes, } with fluid.scope_guard(self.scope): outputs = self.exe.run(self.test_prog, feed=[feed_data], fetch_list=list( self.test_outputs.values()), return_numpy=False) res = { 'bbox': (np.array(outputs[0]), outputs[0].recursive_sequence_lengths()), 'mask': (np.array(outputs[1]), outputs[1].recursive_sequence_lengths()) } res_im_id = [d[2] for d in data] res['im_info'] = (im_infos, []) res['im_shape'] = (im_shapes, []) res['im_id'] = (np.array(res_im_id), []) results.append(res) logging.debug("[EVAL] Epoch={}, Step={}/{}".format( epoch_id, step + 1, total_steps)) ap_stats, eval_details = eval_results( results, 'COCO', eval_dataset.coco_gt, with_background=True, resolution=self.mask_head_resolution) if metric == 'VOC': if isinstance(ap_stats[0], np.ndarray) and isinstance( ap_stats[1], np.ndarray): metrics = OrderedDict( zip(['bbox_map', 'segm_map'], [ap_stats[0][1], ap_stats[1][1]])) else: metrics = OrderedDict(zip(['bbox_map', 'segm_map'], [0.0, 0.0])) elif metric == 'COCO': if isinstance(ap_stats[0], np.ndarray) and isinstance( ap_stats[1], np.ndarray): metrics = OrderedDict( zip(['bbox_mmap', 'segm_mmap'], [ap_stats[0][0], ap_stats[1][0]])) else: metrics = OrderedDict( zip(['bbox_mmap', 'segm_mmap'], [0.0, 0.0])) if return_details: return metrics, eval_details return metrics
def evaluate(self, eval_dataset, batch_size=1, epoch_id=None, return_details=False): """评估。 Args: eval_dataset (paddlex.datasets): 验证数据读取器。 batch_size (int): 验证数据批大小。默认为1。 epoch_id (int): 当前评估模型所在的训练轮数。 return_details (bool): 是否返回详细信息。 Returns: dict: 当return_details为False时,返回dict, 包含关键字:'acc1'、'acc5', 分别表示最大值的accuracy、前5个最大值的accuracy。 tuple (metrics, eval_details): 当return_details为True时,增加返回dict, 包含关键字:'true_labels'、'pred_scores',分别代表真实类别id、每个类别的预测得分。 """ arrange_transforms(model_type=self.model_type, class_name=self.__class__.__name__, transforms=eval_dataset.transforms, mode='eval') data_generator = eval_dataset.generator(batch_size=batch_size, drop_last=False) k = min(5, self.num_classes) total_steps = math.ceil(eval_dataset.num_samples * 1.0 / batch_size) true_labels = list() pred_scores = list() if not hasattr(self, 'parallel_test_prog'): with fluid.scope_guard(self.scope): self.parallel_test_prog = fluid.CompiledProgram( self.test_prog).with_data_parallel( share_vars_from=self.parallel_train_prog) batch_size_each_gpu = self._get_single_card_bs(batch_size) logging.info( "Start to evaluating(total_samples={}, total_steps={})...".format( eval_dataset.num_samples, total_steps)) for step, data in tqdm.tqdm(enumerate(data_generator()), total=total_steps): images = np.array([d[0] for d in data]).astype('float32') labels = [d[1] for d in data] num_samples = images.shape[0] if num_samples < batch_size: num_pad_samples = batch_size - num_samples pad_images = np.tile(images[0:1], (num_pad_samples, 1, 1, 1)) images = np.concatenate([images, pad_images]) with fluid.scope_guard(self.scope): outputs = self.exe.run(self.parallel_test_prog, feed={'image': images}, fetch_list=list( self.test_outputs.values())) outputs = [outputs[0][:num_samples]] true_labels.extend(labels) pred_scores.extend(outputs[0].tolist()) logging.debug("[EVAL] Epoch={}, Step={}/{}".format( epoch_id, step + 1, total_steps)) pred_top1_label = np.argsort(pred_scores)[:, -1] pred_topk_label = np.argsort(pred_scores)[:, -k:] acc1 = sum(pred_top1_label == true_labels) / len(true_labels) acck = sum( [np.isin(x, y) for x, y in zip(true_labels, pred_topk_label)]) / len(true_labels) metrics = OrderedDict([('acc1', acc1), ('acc{}'.format(k), acck)]) if return_details: eval_details = { 'true_labels': true_labels, 'pred_scores': pred_scores } return metrics, eval_details return metrics