def get_exec_times_display(self): uncertain = PlotDataset(None, 'Uncertain Queries') malicious = PlotDataset(None, 'Malicious Queries') malicious.set_linestyle('dotted') malicious.set_color(get_label_color(MALICIOUS)) benign = PlotDataset(None, 'Benign Queries') benign.set_linestyle('dashed') benign.set_color(get_label_color(BENIGN)) return [malicious, uncertain, benign]
def plot_evolution(self, estimator=None): if estimator is None: for e in self.homogeneity_estimators + self.adjusted_estimators: self.plot_evolution(estimator=e) else: iterations = list(range(self.monitoring.iter_num)) plt.clf() max_value = 1 clusterings = self.annotations.getClusteringsEvaluations() for l in list(clusterings.keys()): label = l + '_' + estimator plt.plot(iterations, self.data.loc[:][label], label='%s Clustering' % l.title(), color=get_label_color(l), linewidth=4, marker='o') plt.ylim(0, max_value) plt.xlabel('Iteration') plt.ylabel(estimator) lgd = plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode='expand', borderaxespad=0., fontsize='large') filename = path.join(self.output_dir, '%s_monitoring.png' % estimator) plt.savefig(filename, bbox_extra_artists=(lgd, ), bbox_inches='tight') plt.clf()
def add_fold(self, fold_id, predictions): if (predictions.num_instances() == 0 or sum(predictions.ground_truth) == 0): return if self.probabilist: scores = predictions.probas else: scores = predictions.scores recall, thresholds = interp_recall(predictions.ground_truth, scores, self.mean_precision) if self.mean_recall is None: self.mean_recall = recall else: self.mean_recall += recall if self.num_folds > 1: self.ax1.plot(1 - self.mean_precision, recall, lw=1, label='FAR/DR fold %d' % (fold_id)) else: self.ax1.plot(1 - self.mean_precision, recall, lw=3, color=get_label_color('all'), label='FAR/DR')
def add_fold(self, fold_id, predictions): if (predictions.num_instances() == 0 or sum(predictions.ground_truth) == 0): return if self.probabilist: scores = predictions.probas else: scores = predictions.scores fpr, tpr, thresholds = roc_curve(predictions.ground_truth, scores) # Add corner cases thresholds = np.append(1, thresholds) fpr = np.append(0, fpr) tpr = np.append(0, tpr) thresholds = np.append(thresholds, 0) fpr = np.append(fpr, 1) tpr = np.append(tpr, 1) if self.mean_tpr is None: self.mean_tpr = interp(self.mean_fpr, fpr, tpr) else: self.mean_tpr += interp(self.mean_fpr, fpr, tpr) self.thresholds = interp(self.mean_fpr, fpr, thresholds) roc_auc = auc(fpr, tpr) if self.num_folds > 1: self.ax1.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (fold_id, roc_auc)) else: self.ax1.plot(fpr, tpr, lw=3, color=get_label_color('all'), label='ROC (area = %0.2f)' % (roc_auc)) return fpr, tpr, roc_auc
def plot_families_evolution(self, iteration_dir): annotations = self.evolutions['global']['annotations'] plt.clf() if self.has_ground_truth: max_value = 1 else: max_value = max(self.stats[MALICIOUS]['families'], self.stats[BENIGN]['families']) for l in [MALICIOUS, BENIGN]: evolution = self.evolutions[l]['families'] num_families = 0 if self.has_ground_truth: instances = self.monitoring.datasets.instances num_families = len( instances.ground_truth.get_families_values(label=l)) if num_families > 0: evolution = [x / num_families for x in evolution] plt.plot(annotations, evolution, label=l.title(), color=get_label_color(l), linewidth=4, marker='o') plt.ylim(0, max_value) plt.xlabel('Num Annotations') if self.has_ground_truth and num_families > 0: plt.ylabel('Prop. Families Discovered') else: plt.ylabel('Num. Families Discovered') lgd = plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode='expand', borderaxespad=0., fontsize='x-large') filename = path.join(iteration_dir, 'families_monitoring.png') plt.savefig(filename, bbox_extra_artists=(lgd,), bbox_inches='tight') plt.clf()
def display_label(self, barplot, label): label_bool = labels_tools.label_str_to_bool(label) ranges = [[x for x in l if x['ground_truth_label'] == label_bool] for l in self.ranges] dataset = PlotDataset(list(map(len, ranges)), label) dataset.set_color(get_label_color(label)) barplot.add_dataset(dataset)
def _gen_label_plot_dataset(self, instances, label): if label != 'unlabeled': instances = instances.get_annotated_instances(label=label) else: instances = instances.get_unlabeled_instances() values = instances.features.get_values_from_index(self.feature_index) dataset = PlotDataset(values, label) dataset.set_color(get_label_color(label)) self.plot_datasets[label] = dataset
def display_label(self, barplot, label): if label != 'all': label_bool = label_str_to_bool(label) ranges = [[x for x in l if x['ground_truth_label'] == label_bool] for l in self.ranges] else: ranges = self.ranges dataset = PlotDataset(np.array([len(r) for r in ranges]), label) dataset.set_color(get_label_color(label)) barplot.add_dataset(dataset)
def display(self, directory): labels = ['0-10%', '10-20%', '20-30%', '30-40%', '40-50%', '50-60%', '60-70%', '70-80%', '80-90%', '90-100%'] barplot = BarPlot(labels) if not self.has_ground_truth: dataset = PlotDataset(list(map(len, self.ranges)), 'num_instances') dataset.set_color(get_label_color('all')) barplot.add_dataset(dataset) else: self.display_label(barplot, MALICIOUS) self.display_label(barplot, BENIGN) barplot.export_to_json(path.join(directory, 'predictions_barplot.json'))
def getFamiliesBarplot(annotations_id, iteration, label): iteration = None if iteration == 'None' else int(iteration) family_counts = annotations_db_tools.get_families_counts( session, annotations_id, iter_max=iteration, label=label) df = pd.DataFrame({ 'families': list(family_counts.keys()), 'counts': [family_counts[k] for k in list(family_counts.keys())] }) sort_data_frame(df, 'families', ascending=True, inplace=True) barplot = BarPlot(df['families'].values) dataset = PlotDataset(df['counts'].values, 'Num. Instances') dataset.set_color(get_label_color(label)) barplot.add_dataset(dataset) return jsonify(barplot.to_json())
def _gen_label_plot_dataset(self, instances, label=None, family=None, color=None): if label is not None: if label != 'unlabeled': instances = instances.get_annotated_instances(label=label) else: instances = instances.get_unlabeled_instances() else: instances = instances.get_annotated_instances(family=family) values = instances.features.get_values_from_index(self.feature_index) if isinstance(values, spmatrix): values = values.toarray() plot_label = label if label is not None else family plot_color = color if plot_color is None: plot_color = get_label_color(plot_label) dataset = PlotDataset(values, plot_label) dataset.set_color(plot_color) self.plot_datasets[plot_label] = dataset
def _set_default_values(self): self.color = get_label_color('all') self.linewidth = 3 self.linestyle = 'solid' self.marker = 'o' self.error_bars = None
def _set_default_values(self): self.color = get_label_color(self.label) self.linewidth = 3 self.linestyle = 'solid' self.marker = 'o'