def plotEvolutionMonitoring(self): iterations = range(self.monitoring.iteration_number) plt.clf() # Labels data = self.labels_accuracy.high_confidence_counts.data values = data['true_suggestions'] / data['num_suggestions'] plot = PlotDataset(values, 'Labels Suggestions') max_value = 1 plt.plot(iterations, plot.values, label = plot.label, color = plot.color, linewidth = plot.linewidth, marker = plot.marker) # Families data = self.families_accuracy.high_confidence_counts.data values = data['true_suggestions'] / data['num_suggestions'] plot = PlotDataset(values, 'Families Suggestions') max_value = 1 plt.plot(iterations, plot.values, label = plot.label, color = 'purple', linewidth = plot.linewidth, marker = plot.marker) # Plot plt.ylim(0, max_value) plt.xlabel('Iteration') plt.ylabel('Suggestions Accuracy') lgd = plt.legend(bbox_to_anchor = (0., 1.02, 1., .102), loc = 3, ncol = 2, mode = 'expand', borderaxespad = 0., fontsize = 'large') filename = self.output_directory filename += 'labels_families_high_confidence_suggestions.png' plt.savefig(filename, bbox_extra_artists=(lgd,), bbox_inches='tight') plt.clf()
def getTopWeightedFeatures(experiment_id, inst_exp_id, instance_id, size): instance_id = int(instance_id) exp = ExperimentFactory.getFactory().fromJson(experiment_id, session) validation_experiment = ExperimentFactory.getFactory().fromJson( inst_exp_id, session) #get the features features_names, features_values = validation_experiment.getFeatures( instance_id) features_values = [float(value) for value in features_values] #get the pipeline with scaler and logistic model pipeline = exp.getModelPipeline() #scale the features scaled_values = pipeline.named_steps['scaler'].transform( np.reshape(features_values, (1, -1))) weighted_values = np.multiply(scaled_values, pipeline.named_steps['model'].coef_) features = map(lambda name, value, w_value: (name, value, w_value), features_names, features_values, weighted_values[0]) features.sort(key=lambda tup: abs(tup[2])) features = features[:-int(size) - 1:-1] tooltips = [x[1] for x in features] barplot = BarPlot([x[0] for x in features]) dataset = PlotDataset([x[2] for x in features], None) dataset.setColor(colors_tools.red) barplot.addDataset(dataset) return jsonify(barplot.toJson(tooltip_data=tooltips))
def generateBinaryHistogram(self): barplot = BarPlot(['0', '1']) for label, dataset in self.plot_datasets.iteritems(): num_0 = sum(dataset.values == 0) num_1 = sum(dataset.values == 1) hist_dataset = PlotDataset([num_0, num_1], dataset.label) hist_dataset.setColor(dataset.color) barplot.addDataset(hist_dataset) output_filename = self.output_directory + 'binary_histogram.json' with open(output_filename, 'w') as f: barplot.exportJson(f)
def getTopModelFeatures(experiment_id, size): size = int(size) exp = ExperimentFactory.getFactory().fromJson(experiment_id, session) model_coefficients = exp.getTopFeatures() features_names = exp.getFeaturesNames() coefficients = map(lambda name, coef: (name, coef), features_names, model_coefficients) coefficients.sort(key=lambda tup: abs(tup[1])) coefficients = coefficients[:-size - 1:-1] barplot = BarPlot([x[0] for x in coefficients]) dataset = PlotDataset([x[1] for x in coefficients], None) if (exp.classification_conf.featureImportance() == 'weight'): dataset.setColor(colors_tools.red) barplot.addDataset(dataset) return jsonify(barplot.toJson())
def generateHistogram(self): # 10 equal-width bins computed on all the data if not self.has_true_labels: hist, bin_edges = np.histogram(self.plot_datasets['all'].values, bins = 10, density = False) else: hist, bin_edges = np.histogram(self.plot_datasets['malicious'].values, bins = 10, density = False) x_labels = [str(bin_edges[e]) + ' - ' + str(bin_edges[e+1]) for e in range(len(bin_edges)-1)] barplot = BarPlot(x_labels) for label, dataset in self.plot_datasets.iteritems(): hist, bin_edges = np.histogram(dataset.values, bins = bin_edges, density = False) hist_dataset = PlotDataset(hist, dataset.label) hist_dataset.setColor(dataset.color) barplot.addDataset(hist_dataset) output_filename = self.output_directory + 'histogram.json' with open(output_filename, 'w') as f: barplot.exportJson(f)
def plotEvolutionMonitoring(self): if self.labels_families == 'labels': title = 'Labels Suggestions Accuracy' elif self.labels_families == 'families': title = 'Families Suggestions Accuracy' plot = PlotDataset( self.data['true_suggestions'] / self.data['num_suggestions'], title) iterations = range(self.monitoring.iteration_number) plt.clf() max_value = 1 plt.plot(iterations, plot.values, label=plot.label, color=plot.color, linewidth=plot.linewidth, marker=plot.marker) plt.ylim(0, max_value) plt.xlabel('Iteration') plt.ylabel('Suggestions Accuracy') lgd = plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode='expand', borderaxespad=0., fontsize='large') filename = self.output_directory filename += self.labels_families + '_' + self.kind + '_suggestions.png' plt.savefig(filename, bbox_extra_artists=(lgd, ), bbox_inches='tight') plt.clf()
def generatePlotDatasets(self, instances): self.plot_datasets = {} if self.has_true_labels: malicious_instances = instances.getInstancesFromIds(instances.getMaliciousIds(true_labels = True)) malicious_dataset = PlotDataset(malicious_instances.getFeatureValues(self.feature), 'malicious') malicious_dataset.setColor(colors_tools.getLabelColor('malicious')) self.plot_datasets['malicious'] = malicious_dataset benign_instances = instances.getInstancesFromIds(instances.getBenignIds(true_labels = True)) benign_dataset = PlotDataset(benign_instances.getFeatureValues(self.feature), 'benign') benign_dataset.setColor(colors_tools.getLabelColor('benign')) self.plot_datasets['benign'] = benign_dataset else: self.plot_datasets['all'] = PlotDataset(instances.getFeatureValues(self.feature), 'all') self.plot_datasets['all'].setColor(colors_tools.getLabelColor('all'))
def plotEvolutionMonitoring(self): barplot = BarPlot(self.families) for i in range(self.data.shape[0]): dataset = PlotDataset([self.data.iloc[i, 1]], str(i)) barplot.addDataset(dataset) filename = self.output_directory filename += self.label + '_families_evolution.json' with open(filename, 'w') as f: barplot.exportJson(f)
def getFamiliesBarplot(experiment_id, iteration, label): experiment = updateCurrentExperiment(experiment_id) experiment_label_id = experiment.labels_id if iteration == 'None': iteration = None family_counts = labels_tools.getFamiliesCounts(experiment.session, experiment_label_id, iteration_max=iteration, label=label) df = pd.DataFrame({ 'families': family_counts.keys(), 'counts': [family_counts[k] for k in family_counts.keys()] }) matrix_tools.sortDataFrame(df, 'families', ascending=True, inplace=True) barplot = BarPlot(list(df['families'])) dataset = PlotDataset(list(df['counts']), 'Num. Instances') dataset.setColor(colors_tools.getLabelColor(label)) barplot.addDataset(dataset) return jsonify(barplot.toJson())
def plotEvolutionMonitoring(self, evolution_file, monitoring_dir): data = self.loadEvolutionMonitoring(evolution_file) barplot = BarPlot(self.families) for i in range(data.shape[0]): dataset = PlotDataset([data.iloc[i, 1]], str(i)) barplot.addDataset(dataset) filename = monitoring_dir filename += self.label + '_families_evolution.json' with open(filename, 'w') as f: barplot.exportJson(f)
def plotPerfEvolution(self, estimators, output_filename): iterations = range(1, self.monitoring.iteration_number+1) plt.clf() for estimator in estimators: plot = PlotDataset(self.data[estimator], estimator) plt.plot(iterations, plot.values, label = plot.label, color = plot.color, linewidth = plot.linewidth, marker = plot.marker) plt.ylim(0, 1) plt.xlabel('Iteration') plt.ylabel('Performance') lgd = plt.legend(bbox_to_anchor = (0., 1.02, 1., .102), loc = 3, ncol = 3, mode = 'expand', borderaxespad = 0., fontsize = 'large') filename = self.outputFilename(output_filename, 'png') plt.savefig(filename, bbox_extra_artists=(lgd,), bbox_inches='tight') plt.clf()
def getClusterStats(experiment_id): experiment = updateCurrentExperiment(experiment_id) clustering = Clustering.fromJson(experiment) num_clusters = clustering.num_clusters num_instances_v = [] labels = [] for c in range(num_clusters): instances_in_cluster = clustering.clusters[c].instances_ids num_instances = len(instances_in_cluster) # the empty clusters are not displayed #if num_instances > 0: num_instances_v.append(num_instances) #labels.append('c_' + str(c)) labels.append(clustering.clusters[c].label) barplot = BarPlot(labels) dataset = PlotDataset(num_instances_v, 'Num. Instances') barplot.addDataset(dataset) return jsonify(barplot.toJson())
def getFamiliesPerformance(experiment_id, train_test, label, threshold): experiment = updateCurrentExperiment(experiment_id) filename = experiment.getOutputDirectory() + train_test + '/families/' if label == 'malicious': filename += 'tp_' tp_fp = 'Detection Rate' elif label == 'benign': filename += 'fp_' tp_fp = 'False Positive Rate' filename += 'families_thresholds.csv' with open(filename, 'r') as f: perf = pd.read_csv(f, header=0, index_col=0) families = list(perf.columns.values[:-1]) threshold = float(threshold) / 100 thresholds = list(perf.index[:-1]) threshold_value = min(enumerate(thresholds), key=lambda x: abs(x[1] - threshold))[1] perf = list(perf.loc[threshold_value]) barplot = BarPlot(families) barplot.addDataset(PlotDataset(perf, tp_fp)) return jsonify(barplot.toJson())
def display(self, directory): labels = ['0-10%', '10-20%', '20-30%', '30-40%', '40-50%', '50-60%', '60-70%', '70-80%', '80-90%', '90-100%'] barplot = BarPlot(labels) dataset = PlotDataset(map(len, self.ranges), 'numInstances') dataset.setColor(colors_tools.getLabelColor('all')) barplot.addDataset(dataset) filename = directory + 'predictions_barplot.json' with open(filename, 'w') as f: barplot.exportJson(f) barplot = BarPlot(labels) malicious_ranges = map( lambda l: filter(lambda x: x['true_label'], l), self.ranges) benign_ranges = map( lambda l: filter(lambda x: not x['true_label'], l), self.ranges) malicious_dataset = PlotDataset(map(len, malicious_ranges), 'malicious') malicious_dataset.setColor(colors_tools.getLabelColor('malicious')) barplot.addDataset(malicious_dataset) benign_dataset = PlotDataset(map(len, benign_ranges), 'benign') benign_dataset.setColor(colors_tools.getLabelColor('benign')) barplot.addDataset(benign_dataset) filename = directory filename += 'predictions_barplot_labels.json' with open(filename, 'w') as f: barplot.exportJson(f)
def executionTimeDisplay(self): lr = PlotDataset(None, 'Logistic Regression') lr.setLinestyle('dotted') nb = PlotDataset(None, 'Naive Bayes') nb.setLinestyle('dashed') return [lr, nb] + QueryStrategy.executionTimeDisplay(self)
def executionTimeDisplay(self): binary_model = PlotDataset(None, 'Binary model') malicious = PlotDataset(None, 'Malicious Analysis') malicious.setLinestyle('dotted') malicious.setColor(colors_tools.getLabelColor('malicious')) benign = PlotDataset(None, 'Benign Analysis') benign.setLinestyle('dashed') benign.setColor(colors_tools.getLabelColor('benign')) return [binary_model, malicious, benign ] + QueryStrategy.executionTimeDisplay(self)
def executionTimeDisplay(self): uncertain = PlotDataset(None, 'Uncertain Queries') malicious = PlotDataset(None, 'Malicious Queries') malicious.setLinestyle('dotted') malicious.setColor(colors_tools.getLabelColor('malicious')) benign = PlotDataset(None, 'Benign Queries') benign.setLinestyle('dashed') benign.setColor(colors_tools.getLabelColor('benign')) return [malicious, uncertain, benign]
def executionTimeDisplay(self): clustering = PlotDataset(None, 'Analysis') return [clustering] + QueryStrategy.executionTimeDisplay(self)
def executionTimeDisplay(self): binary_model = PlotDataset(None, 'Binary model') return [binary_model] + QueryStrategy.executionTimeDisplay(self)
def executionTimeDisplay(self): generate_queries = PlotDataset(None, 'Queries generation') generate_queries.setColor('purple') return [generate_queries]