def preparation_cam(self, data_): image_show = read_image(data_) result = self.predict_fn(image_show) logit = result[0][0] if abs(np.sum(logit) - 1.0) > 1e-4: # softmax logit = logit - np.max(logit) exp_result = np.exp(logit) probability = exp_result / np.sum(exp_result) else: probability = logit # only interpret top 1 pred_label = np.argsort(probability) pred_label = pred_label[-1:] self.predicted_label = pred_label[0] self.predicted_probability = probability[pred_label[0]] self.image = image_show[0] self.labels = pred_label fc_weights = paddle_get_fc_weights() feature_maps = result[1] l = pred_label[0] ln = l if self.label_names is not None: ln = self.label_names[l] prob_str = "%.3f" % (probability[pred_label[0]]) logging.info("predicted result: {} with probability {}.".format( ln, prob_str)) return feature_maps, fc_weights
def preparation_lime(self, data_): image_show = read_image(data_) result = self.predict_fn(image_show) result = result[0] # only one image here. if abs(np.sum(result) - 1.0) > 1e-4: # softmax result = result - np.max(result) exp_result = np.exp(result) probability = exp_result / np.sum(exp_result) else: probability = result # only interpret top 1 pred_label = np.argsort(probability) pred_label = pred_label[-1:] self.predicted_label = pred_label[0] self.predicted_probability = probability[pred_label[0]] self.image = image_show[0] self.labels = pred_label l = pred_label[0] ln = l if self.label_names is not None: ln = self.label_names[l] prob_str = "%.3f" % (probability[pred_label[0]]) logging.info("predicted result: {} with probability {}.".format( ln, prob_str)) end = time.time() algo = lime_base.LimeImageInterpreter() interpreter = algo.interpret_instance( self.image, self.predict_fn, self.labels, 0, num_samples=self.num_samples, batch_size=self.batch_size) self.lime_interpreter = interpreter logging.info('lime time: ' + str(time.time() - end) + 's.')
def preparation_normlime(self, data_): self._lime = LIME(self.predict_fn, self.label_names, self.num_samples, self.batch_size) self._lime.preparation_lime(data_) image_show = read_image(data_) self.predicted_label = self._lime.predicted_label self.predicted_probability = self._lime.predicted_probability self.image = image_show[0] self.labels = self._lime.labels logging.info('performing NormLIME operations ...') cluster_labels = self.predict_cluster_labels( compute_features_for_kmeans(image_show).transpose((1, 2, 0)), self._lime.lime_interpreter.segments) g_weights = self.predict_using_normlime_weights( self.labels, cluster_labels) return g_weights
def precompute_global_classifier(dataset, predict_fn, save_path, batch_size=50, max_num_samples=1000): from sklearn.linear_model import LogisticRegression root_path = gen_user_home() root_path = osp.join(root_path, '.paddlex') h_pre_models = osp.join(root_path, "pre_models") if not osp.exists(h_pre_models): if not osp.exists(root_path): os.makedirs(root_path) url = "https://bj.bcebos.com/paddlex/interpret/pre_models.tar.gz" pdx.utils.download_and_decompress(url, path=root_path) h_pre_models_kmeans = osp.join(h_pre_models, "kmeans_model.pkl") kmeans_model = load_kmeans_model(h_pre_models_kmeans) image_list = [] for item in dataset.file_list: image_list.append(item[0]) x_data = [] y_labels = [] num_features = len(kmeans_model.cluster_centers_) logging.info( "Initialization for NormLIME: Computing each sample in the test list.", use_color=True) for each_data_ in tqdm.tqdm(image_list): x_data_i = np.zeros((num_features)) image_show = read_image(each_data_) result = predict_fn(image_show) result = result[0] # only one image here. c = compute_features_for_kmeans(image_show).transpose((1, 2, 0)) segments = np.zeros((image_show.shape[1], image_show.shape[2]), np.int32) num_blocks = 10 height_per_i = segments.shape[0] // num_blocks + 1 width_per_i = segments.shape[1] // num_blocks + 1 for i in range(segments.shape[0]): for j in range(segments.shape[1]): segments[i, j] = i // height_per_i * num_blocks + j // width_per_i # segments = quickshift(image_show[0], sigma=1) X = get_feature_for_kmeans(c, segments) try: cluster_labels = kmeans_model.predict(X) except AttributeError: from sklearn.metrics import pairwise_distances_argmin_min cluster_labels, _ = pairwise_distances_argmin_min( X, kmeans_model.cluster_centers_) for c in cluster_labels: x_data_i[c] = 1 # x_data_i /= len(cluster_labels) pred_y_i = np.argmax(result) y_labels.append(pred_y_i) x_data.append(x_data_i) if len(np.unique(y_labels)) < 2: logging.info("Warning: The test samples in the dataset is limited.\n \ NormLIME may have no effect on the results.\n \ Try to add more test samples, or see the results of LIME." ) num_classes = np.max(np.unique(y_labels)) + 1 normlime_weights_all_labels = {} for class_index in range(num_classes): w = np.ones((num_features)) / num_features normlime_weights_all_labels[class_index] = { i: wi for i, wi in enumerate(w) } logging.info( "Saving the computed normlime_weights in {}".format(save_path)) np.save(save_path, normlime_weights_all_labels) return save_path clf = LogisticRegression(multi_class='multinomial', max_iter=1000) clf.fit(x_data, y_labels) num_classes = np.max(np.unique(y_labels)) + 1 normlime_weights_all_labels = {} if len(y_labels) / len(np.unique(y_labels)) < 3: logging.info("Warning: The test samples in the dataset is limited.\n \ NormLIME may have no effect on the results.\n \ Try to add more test samples, or see the results of LIME." ) if len(np.unique(y_labels)) == 2: # binary: clf.coef_ has shape of [1, num_features] for class_index in range(num_classes): if class_index not in clf.classes_: w = np.ones((num_features)) / num_features normlime_weights_all_labels[class_index] = { i: wi for i, wi in enumerate(w) } continue if clf.classes_[0] == class_index: w = -clf.coef_[0] else: w = clf.coef_[0] # softmax w = w - np.max(w) exp_w = np.exp(w * 10) w = exp_w / np.sum(exp_w) normlime_weights_all_labels[class_index] = { i: wi for i, wi in enumerate(w) } else: # clf.coef_ has shape of [len(np.unique(y_labels)), num_features] for class_index in range(num_classes): if class_index not in clf.classes_: w = np.ones((num_features)) / num_features normlime_weights_all_labels[class_index] = { i: wi for i, wi in enumerate(w) } continue coef_class_index = np.where(clf.classes_ == class_index)[0][0] w = clf.coef_[coef_class_index] # softmax w = w - np.max(w) exp_w = np.exp(w * 10) w = exp_w / np.sum(exp_w) normlime_weights_all_labels[class_index] = { i: wi for i, wi in enumerate(w) } logging.info( "Saving the computed normlime_weights in {}".format(save_path)) np.save(save_path, normlime_weights_all_labels) return save_path
def precompute_lime_weights(list_data_, predict_fn, num_samples, batch_size, save_dir): root_path = gen_user_home() root_path = osp.join(root_path, '.paddlex') h_pre_models = osp.join(root_path, "pre_models") if not osp.exists(h_pre_models): if not osp.exists(root_path): os.makedirs(root_path) url = "https://bj.bcebos.com/paddlex/interpret/pre_models.tar.gz" pdx.utils.download_and_decompress(url, path=root_path) h_pre_models_kmeans = osp.join(h_pre_models, "kmeans_model.pkl") kmeans_model = load_kmeans_model(h_pre_models_kmeans) for data_index, each_data_ in enumerate(list_data_): if isinstance(each_data_, str): save_path = "lime_weights_s{}_{}.npy".format( num_samples, each_data_.split('/')[-1].split('.')[0]) save_path = os.path.join(save_dir, save_path) else: save_path = "lime_weights_s{}_{}.npy".format( num_samples, data_index) save_path = os.path.join(save_dir, save_path) if os.path.exists(save_path): logging.info(save_path + ' exists, not computing this one.', use_color=True) continue img_file_name = each_data_ if isinstance(each_data_, str) else data_index logging.info('processing ' + img_file_name + ' [{}/{}]'.format(data_index, len(list_data_)), use_color=True) image_show = read_image(each_data_) result = predict_fn(image_show) result = result[0] # only one image here. if abs(np.sum(result) - 1.0) > 1e-4: # softmax exp_result = np.exp(result) probability = exp_result / np.sum(exp_result) else: probability = result pred_label = np.argsort(probability)[::-1] # top_k = argmin(top_n) > threshold threshold = 0.05 top_k = 0 for l in pred_label: if probability[l] < threshold or top_k == 5: break top_k += 1 if top_k == 0: top_k = 1 pred_label = pred_label[:top_k] algo = lime_base.LimeImageInterpreter() interpreter = algo.interpret_instance(image_show[0], predict_fn, pred_label, 0, num_samples=num_samples, batch_size=batch_size) X = get_feature_for_kmeans( compute_features_for_kmeans(image_show).transpose((1, 2, 0)), interpreter.segments) try: cluster_labels = kmeans_model.predict(X) except AttributeError: from sklearn.metrics import pairwise_distances_argmin_min cluster_labels, _ = pairwise_distances_argmin_min( X, kmeans_model.cluster_centers_) save_one_lime_predict_and_kmean_labels(interpreter.local_weights, pred_label, cluster_labels, save_path)