def executionTimeDisplay(self): uncertain = PlotDataset(None, 'Uncertain Queries') malicious = PlotDataset(None, 'Malicious Queries') malicious.setLinestyle('dotted') malicious.setColor(colors_tools.getLabelColor(labels_tools.MALICIOUS)) benign = PlotDataset(None, 'Benign Queries') benign.setLinestyle('dashed') benign.setColor(colors_tools.getLabelColor(labels_tools.BENIGN)) return [malicious, uncertain, benign]
def plotEvolutionMonitoring(self, estimator=None): if estimator is None: for e in self.homogeneity_estimators + self.adjusted_estimators: self.plotEvolutionMonitoring(estimator=e) else: iterations = list(range(self.monitoring.iteration_number)) plt.clf() max_value = 1 clusterings = self.annotations.getClusteringsEvaluations() for l in list(clusterings.keys()): color = colors_tools.getLabelColor(l) label = l + '_' + estimator plt.plot(iterations, self.data.loc[:][label], label=l.title() + ' Clustering', color=color, linewidth=4, marker='o') plt.ylim(0, max_value) plt.xlabel('Iteration') plt.ylabel(estimator) lgd = plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode='expand', borderaxespad=0., fontsize='large') filename = path.join(self.output_directory, estimator + '_monitoring.png') plt.savefig(filename, bbox_extra_artists=(lgd, ), bbox_inches='tight') plt.clf()
def generateLabelPlotDatasets(self, instances, label): instances = instances.getInstancesFromIds( instances.ground_truth.getAnnotatedIds(label)) dataset = PlotDataset( instances.features.getFeatureValues(self.feature), label) dataset.setColor(colors_tools.getLabelColor(label)) self.plot_datasets[label] = dataset
def displayLabel(self, barplot, label): label_bool = labels_tools.labelStringToBoolean(label) ranges = [[x for x in l if x['ground_truth_label'] == label_bool] for l in self.ranges] dataset = PlotDataset(list(map(len, ranges)), label) dataset.setColor(colors_tools.getLabelColor(label)) barplot.addDataset(dataset)
def addFold(self, fold_id, predictions): if predictions.numInstances() == 0 or sum( predictions.ground_truth) == 0: return if self.probabilist_model: scores = predictions.predicted_proba else: scores = predictions.predicted_scores fpr, tpr, thresholds = roc_curve(predictions.ground_truth, scores) if self.mean_tpr is None: self.mean_tpr = interp(self.mean_fpr, fpr, tpr) else: self.mean_tpr += interp(self.mean_fpr, fpr, tpr) self.thresholds = interp(self.mean_fpr, fpr, thresholds) self.mean_tpr[0] = 0.0 self.thresholds[0] = 1.0 self.thresholds[-1] = 0.0 roc_auc = auc(fpr, tpr) if self.num_folds > 1: self.ax1.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (fold_id, roc_auc)) else: self.ax1.plot(fpr, tpr, lw=3, color=colors_tools.getLabelColor('all'), label='ROC (area = %0.2f)' % (roc_auc))
def generatePlotDatasets(self, instances): self.plot_datasets = {} if self.has_ground_truth: self.generateLabelPlotDatasets(instances, labels_tools.MALICIOUS) self.generateLabelPlotDatasets(instances, labels_tools.BENIGN) else: self.plot_datasets['all'] = PlotDataset( instances.features.getFeatureValues(self.feature), 'all') self.plot_datasets['all'].setColor( colors_tools.getLabelColor('all'))
def getFamiliesBarplot(experiment_id, iteration, label): if iteration == 'None': iteration = None family_counts = annotations_db_tools.getFamiliesCounts( session, experiment_id, iteration_max=iteration, label=label) df = pd.DataFrame({ 'families': list(family_counts.keys()), 'counts': [family_counts[k] for k in list(family_counts.keys())] }) matrix_tools.sortDataFrame(df, 'families', ascending=True, inplace=True) barplot = BarPlot(list(df['families'])) dataset = PlotDataset(list(df['counts']), 'Num. Instances') dataset.setColor(colors_tools.getLabelColor(label)) barplot.addDataset(dataset) return jsonify(barplot.toJson())
def display(self, directory): labels = [ '0-10%', '10-20%', '20-30%', '30-40%', '40-50%', '50-60%', '60-70%', '70-80%', '80-90%', '90-100%' ] barplot = BarPlot(labels) if not self.has_ground_truth: dataset = PlotDataset(list(map(len, self.ranges)), 'numInstances') dataset.setColor(colors_tools.getLabelColor('all')) barplot.addDataset(dataset) else: self.displayLabel(barplot, labels_tools.MALICIOUS) self.displayLabel(barplot, labels_tools.BENIGN) filename = path.join(directory, 'predictions_barplot.json') with open(filename, 'w') as f: barplot.exportJson(f)
def plotFamiliesEvolutionMonitoring(self, iteration_dir): annotations = self.evolutions['global']['annotations'] plt.clf() if self.has_ground_truth: max_value = 1 else: max_value = max(self.stats[labels_tools.MALICIOUS]['families'], self.stats[labels_tools.BENIGN]['families']) for l in [labels_tools.MALICIOUS, labels_tools.BENIGN]: evolution = self.evolutions[l]['families'] if self.has_ground_truth: instances = self.monitoring.datasets.instances num_families = len( instances.ground_truth.getFamiliesValues(label=l)) evolution = [x / num_families for x in evolution] color = colors_tools.getLabelColor(l) plt.plot(annotations, evolution, label=l.title(), color=color, linewidth=4, marker='o') plt.ylim(0, max_value) plt.xlabel('Num Annotations') if self.has_ground_truth: plt.ylabel('Prop. Families Discovered') else: plt.ylabel('Num. Families Discovered') lgd = plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode='expand', borderaxespad=0., fontsize='x-large') filename = path.join(iteration_dir, 'families_monitoring.png') plt.savefig(filename, bbox_extra_artists=(lgd, ), bbox_inches='tight') plt.clf()
def displayFamiliesDistribution(self, directory, label=None): if label is None: self.displayFamiliesDistribution(directory, label=labels_tools.MALICIOUS) self.displayFamiliesDistribution(directory, label=labels_tools.MALICIOUS) return families = self.families[labels_tools.labelBooleanToString(label)] bandwidth = 0.1 num_points = 50 eps = 0.00001 kde = KernelDensity(kernel='gaussian', bandwidth=bandwidth) fig, (ax) = plt.subplots(1, 1) i = 0 for family in families: predictions = families[family] predictions_np = np.asarray(predictions) if i % 3 == 0: linestyle = 'solid' elif i % 3 == 1: linestyle = 'dashed' if i % 3 == 2: linestyle = 'dotted' linewidth = 2 if np.var(predictions_np) < eps: linewidth = 4 mean = np.mean(predictions_np) x = np.arange(0, 1, 0.1) x = np.sort(np.append(x, [mean, mean - eps, mean + eps])) density = [1 if v == mean else 0 for v in x] else: density_predictions = [[x] for x in predictions_np] kde.fit(density_predictions) # Computes the x axis p_max = np.amax(predictions_np) p_min = np.amin(predictions_np) delta = p_max - p_min density_delta = 1.1 * delta x = np.arange(0, 1, density_delta / num_points) x_density = [[y] for y in x] # kde.score_samples returns the 'log' of the density log_density = kde.score_samples(x_density).tolist() density = list(map(math.exp, log_density)) ax.plot(x, density, label=family, linewidth=linewidth, linestyle=linestyle) fig_f, (ax_f) = plt.subplots(1, 1) ax_f.plot(x, density, linewidth=4, color=colors_tools.getLabelColor(label)) ax_f.set_title(family) ax_f.set_xlabel('P(Malicious)') ax_f.set_ylabel('Density') filename = '%s_family_%s_prediction_distributions.png' % (label, family) fig_f.savefig(path.join(directory, filename)) plt.close(fig_f) i += 1 ax.legend(bbox_to_anchor=(0., 0.95, 1., .102), loc=3, ncol=5, mode='expand', borderaxespad=0., fontsize='xx-small') ax.set_xlabel('P(Malicious)') ax.set_ylabel('Density') filename = '%s_families_prediction_distributions.png' % (label) fig.savefig(path.join(directory, filename)) plt.close(fig)
def setDefaultValues(self): self.color = colors_tools.getLabelColor('all') self.linewidth = 3 self.linestyle = 'solid' self.marker = 'o' self.error_bars = None