def getTopWeightedFeatures(exp_id, instance_id, size): instance_id = int(instance_id) classifier = get_classifier(exp_id) # get the features exp = update_curr_exp(exp_id) f_names, f_values = FeaturesFromExp.get_instance(exp, instance_id) # scale the features scaled_values = classifier.named_steps['scaler'].transform( np.reshape(f_values, (1, -1))) weighted_values = np.multiply(scaled_values, classifier.named_steps['model'].coef_) features = list( map(lambda name, value, w_value: (name, value, w_value), f_names, f_values, weighted_values[0])) features.sort(key=lambda tup: abs(tup[2])) features = features[:-int(size) - 1:-1] f_names, f_values, f_weighted = list(zip(*features)) labels = [str(name) for name in f_names] tooltips = [ '%s (%.2f)' % (name, f_values[i]) for i, name in enumerate(f_names) ] barplot = BarPlot(labels) dataset = PlotDataset(f_weighted, None) dataset.set_color(red) barplot.add_dataset(dataset) return jsonify(barplot.to_json(tooltip_data=tooltips))
def plot_evolution(self, monitoring_dir): iterations = list(range(1, self.monitoring.iter_num + 1)) plt.clf() # Labels data = self.labels_accuracy.high_confidence_counts.data values = data['true_suggestions'] / data['num_suggestions'] plot = PlotDataset(values.values, 'Labels Suggestions') max_value = 1 plt.plot(iterations, plot.values, label=plot.label, color=plot.color, linewidth=plot.linewidth, marker=plot.marker) # Families data = self.families_accuracy.high_confidence_counts.data values = data['true_suggestions'] / data['num_suggestions'] plot = PlotDataset(values.values, 'Families Suggestions') max_value = 1 plt.plot(iterations, plot.values, label=plot.label, color='purple', linewidth=plot.linewidth, marker=plot.marker) # Plot plt.ylim(0, max_value) plt.xlabel('Iteration') plt.ylabel('Suggestions Accuracy') lgd = plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode='expand', borderaxespad=0., fontsize='large') filename = path.join(monitoring_dir, 'labels_families_high_confidence_suggestions.png') plt.savefig(filename, bbox_extra_artists=(lgd,), bbox_inches='tight') plt.clf()
def display_label(self, barplot, label): label_bool = labels_tools.label_str_to_bool(label) ranges = [[x for x in l if x['ground_truth_label'] == label_bool] for l in self.ranges] dataset = PlotDataset(list(map(len, ranges)), label) dataset.set_color(get_label_color(label)) barplot.add_dataset(dataset)
def _gen_binary_histogram(self): self.barplot = BarPlot(['0', '1']) for label, dataset in self.plot_datasets.items(): if len(dataset.values) > 0: num_0 = sum(dataset.values == 0) num_1 = sum(dataset.values == 1) hist_dataset = PlotDataset([num_0, num_1], label) hist_dataset.set_color(dataset.color) self.barplot.add_dataset(hist_dataset)
def _gen_label_plot_dataset(self, instances, label): if label != 'unlabeled': instances = instances.get_annotated_instances(label=label) else: instances = instances.get_unlabeled_instances() values = instances.features.get_values_from_index(self.feature_index) dataset = PlotDataset(values, label) dataset.set_color(get_label_color(label)) self.plot_datasets[label] = dataset
def display_label(self, barplot, label): if label != 'all': label_bool = label_str_to_bool(label) ranges = [[x for x in l if x['ground_truth_label'] == label_bool] for l in self.ranges] else: ranges = self.ranges dataset = PlotDataset(np.array([len(r) for r in ranges]), label) dataset.set_color(get_label_color(label)) barplot.add_dataset(dataset)
def display(self, directory): labels = ['0-10%', '10-20%', '20-30%', '30-40%', '40-50%', '50-60%', '60-70%', '70-80%', '80-90%', '90-100%'] barplot = BarPlot(labels) if not self.has_ground_truth: dataset = PlotDataset(list(map(len, self.ranges)), 'num_instances') dataset.set_color(get_label_color('all')) barplot.add_dataset(dataset) else: self.display_label(barplot, MALICIOUS) self.display_label(barplot, BENIGN) barplot.export_to_json(path.join(directory, 'predictions_barplot.json'))
def set_predictions(self, predictions): self.predictions = predictions self.datasets = {} if not self.has_ground_truth: self.datasets['all'] = PlotDataset(predictions.scores, 'all') else: for label in [MALICIOUS, BENIGN]: label_bool = label_str_to_bool(label) scores = [ predictions.scores[i] for i in range(predictions.num_instances()) if predictions.ground_truth[i] == label_bool ] self.datasets[label] = PlotDataset(np.array(scores), label)
def getFamiliesBarplot(annotations_id, iteration, label): iteration = None if iteration == 'None' else int(iteration) family_counts = annotations_db_tools.get_families_counts( session, annotations_id, iter_max=iteration, label=label) df = pd.DataFrame({ 'families': list(family_counts.keys()), 'counts': [family_counts[k] for k in list(family_counts.keys())] }) sort_data_frame(df, 'families', ascending=True, inplace=True) barplot = BarPlot(df['families'].values) dataset = PlotDataset(df['counts'].values, 'Num. Instances') dataset.set_color(get_label_color(label)) barplot.add_dataset(dataset) return jsonify(barplot.to_json())
def _display(self, barplot, labels, error=None): if error is not None: values = [ len([ p for p in self.predictions[l] if (p['ground_truth_label'] == l) != error ]) for l in labels ] label = 'wrong predictions' if error else 'right predictions' else: values = [len(self.predictions[l]) for l in labels] label = 'all' dataset = PlotDataset(np.array(values), label) dataset.set_color(get_error_color(error)) barplot.add_dataset(dataset)
def get_exec_times_display(self): lr = PlotDataset(None, 'Logistic Regression') lr.set_linestyle('dotted') nb = PlotDataset(None, 'Naive Bayes') nb.set_linestyle('dashed') v = [lr, nb] v.extend(CoreAladin.get_exec_times_display(self)) return v
def _gen_histogram(self): # 10 equal-width bins computed on all the data _, bin_edges = np.histogram(self.all_values, bins=10, density=False) x_labels = [ '%.2f - %.2f' % (bin_edges[e], bin_edges[e + 1]) for e in range(len(bin_edges) - 1) ] self.barplot = BarPlot(x_labels) for label, dataset in self.plot_datasets.items(): if len(dataset.values) > 0: hist, _ = np.histogram(dataset.values, bins=bin_edges, density=False) hist_dataset = PlotDataset(hist, label) hist_dataset.set_color(dataset.color) self.barplot.add_dataset(hist_dataset)
def plot_evolution(self, evolution_file, monitoring_dir): data = self.load_evolution(evolution_file) if self.labels_families == 'labels': title = 'Labels Suggestions Accuracy' elif self.labels_families == 'families': title = 'Families Suggestions Accuracy' values = data['true_suggestions'] / data['num_suggestions'] plot = PlotDataset(values.values, title) iterations = list(range(self.monitoring.iter_num)) plt.clf() max_value = 1 plt.plot(iterations, plot.values, label=plot.label, color=plot.color, linewidth=plot.linewidth, marker=plot.marker) plt.ylim(0, max_value) plt.xlabel('Iteration') plt.ylabel('Suggestions Accuracy') lgd = plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode='expand', borderaxespad=0., fontsize='large') filename = '_'.join([self.labels_families, self.kind, 'suggestions.png']) filename = path.join(monitoring_dir, filename) plt.savefig(filename, bbox_extra_artists=(lgd,), bbox_inches='tight') plt.clf() self.data = data
def _gen_label_plot_dataset(self, instances, label=None, family=None, color=None): if label is not None: if label != 'unlabeled': instances = instances.get_annotated_instances(label=label) else: instances = instances.get_unlabeled_instances() else: instances = instances.get_annotated_instances(family=family) values = instances.features.get_values_from_index(self.feature_index) if isinstance(values, spmatrix): values = values.toarray() plot_label = label if label is not None else family plot_color = color if plot_color is None: plot_color = get_label_color(plot_label) dataset = PlotDataset(values, plot_label) dataset.set_color(plot_color) self.plot_datasets[plot_label] = dataset
def to_barplot(self, directory): head_coeff = self.coef_summary.head(n=NUM_COEFF_EXPORT) coefficients = list(head_coeff['mean']) features_ids = head_coeff.index tooltip_data = [] user_ids = [] for feature_id in features_ids: query = self.session.query(FeaturesAlchemy) query = query.filter(FeaturesAlchemy.id == int(feature_id)) row = query.one() tooltip_data.append(row.name) user_ids.append(row.user_id) barplot = BarPlot(user_ids) dataset = PlotDataset(coefficients, None) score = self.classifier_conf.get_feature_importance() if score == 'weight': dataset.set_color(red) barplot.add_dataset(dataset) out_filename = path.join(directory, 'coeff_barplot.json') return barplot.export_to_json(out_filename, tooltip_data=tooltip_data)
def getClusterStats(exp_id): experiment = update_curr_exp(exp_id) clustering = ClustersExp.from_json(experiment.output_dir()) num_clusters = clustering.num_clusters num_instances_v = [] labels = [] for c in range(num_clusters): instances_in_cluster = clustering.clusters[c].instances_ids num_instances = len(instances_in_cluster) num_instances_v.append(num_instances) labels.append(clustering.clusters[c].label) barplot = BarPlot(labels) dataset = PlotDataset(num_instances_v, 'Num. Instances') barplot.add_dataset(dataset) return jsonify(barplot.to_json())
def plot_perf_evolution(self, estimators, output_filename, data, output_dir): iterations = list(range(1, self.iter_num + 1)) plt.clf() for estimator in estimators: plot = PlotDataset(data[estimator], estimator) plt.plot(iterations, plot.values, label=plot.label, color=plot.color, linewidth=plot.linewidth, marker=plot.marker) plt.ylim(0, 1) plt.xlabel('Iteration') plt.ylabel('Performance') lgd = plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode='expand', borderaxespad=0., fontsize='large') filename = path.join(output_dir, '%s.png' % self.kind) plt.savefig(filename, bbox_extra_artists=(lgd, ), bbox_inches='tight') plt.clf()
def get_exec_times_display(self): generate_queries = PlotDataset(None, 'Queries generation') generate_queries.set_color('purple') return [generate_queries]
def exec_time_display(self): binary_model = PlotDataset(np.array([]), 'Binary model') v = [binary_model] v.extend(CoreRandom.get_exec_times_display(self)) return v
def get_exec_times_display(self): uncertain = PlotDataset(None, 'Uncertain Queries') malicious = PlotDataset(None, 'Malicious Queries') malicious.set_linestyle('dotted') malicious.set_color(get_label_color(MALICIOUS)) benign = PlotDataset(None, 'Benign Queries') benign.set_linestyle('dashed') benign.set_color(get_label_color(BENIGN)) return [malicious, uncertain, benign]
def get_exec_times_display(self): binary_model = PlotDataset(None, 'Binary model') v = [binary_model] v.extend(CoreUncertainty.get_exec_times_display(self)) return v
def get_exec_times_display(self): v = [PlotDataset(None, 'Analysis')] v.extend(CoreRcd.get_exec_times_display(self)) return v
def get_exec_times_display(self): binary_model = PlotDataset(None, 'Binary model') v = [binary_model] v.extend(CoreCesaBianchi.get_exec_times_display(self)) return v