def _calculate_uncertainty_batch(self, batch: InstanceBatch, progress_bar: Tqdm = None) -> None: uncertainty_df = defaultdict(list) ids, predictions, labels = batch for idx, prediction, label in zip(ids, predictions, labels): for w, word in enumerate(prediction['words']): for model in self.predictor._model.all_model_keys: tag_mean_probability = prediction[f'{model}_class_probabilities'][w] tag_std_probability = prediction[f'{model}_class_prob_std'][w] actual_label_idx = label[w] predicted_label_idx = np.argmax(tag_mean_probability) uncertainty_df['instance_id'].append(idx) uncertainty_df['word_id'].append(w) uncertainty_df['model'].append(model) uncertainty_df['word'].append(word) uncertainty_df['actual_tag'].append( self.predictor._model.vocab.get_token_from_index( actual_label_idx, namespace=self.predictor._model.label_namespace ) ) uncertainty_df['predicted_tag'].append( self.predictor._model.vocab.get_token_from_index( predicted_label_idx, namespace=self.predictor._model.label_namespace ) ) uncertainty_df['actual_confidence_mean'].append(tag_mean_probability[actual_label_idx]) uncertainty_df['actual_confidence_std'].append(tag_std_probability[actual_label_idx]) uncertainty_df['predicted_confidence_mean'].append(tag_mean_probability[predicted_label_idx]) uncertainty_df['predicted_confidence_std'].append(tag_std_probability[predicted_label_idx]) uncertainty_df['mean_probability_distribution'].append(tag_mean_probability) progress_bar.update(1) return uncertainty_df
def _calculate_feature_importance_batch(self, batch: InstanceBatch, progress_bar: Tqdm = None) -> None: feature_importance_df = defaultdict(list) ids, labeled_batch, actual_labels = batch batch_text = [[li[fn].tokens for fn in self.field_names] for li in labeled_batch] fields = [list(self.field_names) for _ in range(len(labeled_batch))] predicted_labels = [li['label'].label for li in labeled_batch] seed = [self.seed for _ in range(len(labeled_batch))] for interpreter in self.feature_importance_interpreters + self.attention_interpreters: if progress_bar: progress_bar.set_description(f"{interpreter.id}: interpreting {len(labeled_batch)} instances") # Some feature importance measures are too memory-intensive to run with larger batch sizes # These numbers are based on empirical tests with a standard 16GB gpu if 'shap' in interpreter.id or 'deep' in interpreter.id or 'intgrad' in interpreter.id: batch_scores = [] for sub_batch in utils.batch(labeled_batch, 2): batch_scores.extend(interpreter.saliency_interpret_instances(sub_batch).values()) else: batch_scores = interpreter.saliency_interpret_instances(labeled_batch).values() # # There can be more than one array of scores for an instance (e.g. in the pair sequence case) scores = [[np.asarray(scoreset) for scoreset in v.values()] for v in batch_scores] feature_importance_df['scores'].extend(scores) feature_importance_df['seed'].extend(seed) feature_importance_df['instance_id'].extend(ids) feature_importance_df['instance_text'].extend(batch_text) feature_importance_df['instance_fields'].extend(fields) feature_importance_df['feature_importance_measure'].extend([interpreter.id for _ in range(len(labeled_batch))]) feature_importance_df['predicted'].extend(predicted_labels) feature_importance_df['actual'].extend(actual_labels) if progress_bar: progress_bar.update(1) return feature_importance_df