Exemple #1
0
def getTopWeightedFeatures(exp_id, instance_id, size):
    instance_id = int(instance_id)
    classifier = get_classifier(exp_id)
    # get the features
    exp = update_curr_exp(exp_id)
    f_names, f_values = FeaturesFromExp.get_instance(exp, instance_id)
    # scale the features
    scaled_values = classifier.named_steps['scaler'].transform(
        np.reshape(f_values, (1, -1)))
    weighted_values = np.multiply(scaled_values,
                                  classifier.named_steps['model'].coef_)
    features = list(
        map(lambda name, value, w_value: (name, value, w_value), f_names,
            f_values, weighted_values[0]))
    features.sort(key=lambda tup: abs(tup[2]))
    features = features[:-int(size) - 1:-1]
    f_names, f_values, f_weighted = list(zip(*features))
    labels = [str(name) for name in f_names]
    tooltips = [
        '%s (%.2f)' % (name, f_values[i]) for i, name in enumerate(f_names)
    ]
    barplot = BarPlot(labels)
    dataset = PlotDataset(f_weighted, None)
    dataset.set_color(red)
    barplot.add_dataset(dataset)
    return jsonify(barplot.to_json(tooltip_data=tooltips))
 def plot_evolution(self, monitoring_dir):
     iterations = list(range(1, self.monitoring.iter_num + 1))
     plt.clf()
     # Labels
     data = self.labels_accuracy.high_confidence_counts.data
     values = data['true_suggestions'] / data['num_suggestions']
     plot = PlotDataset(values.values, 'Labels Suggestions')
     max_value = 1
     plt.plot(iterations, plot.values,
              label=plot.label,
              color=plot.color,
              linewidth=plot.linewidth,
              marker=plot.marker)
     # Families
     data = self.families_accuracy.high_confidence_counts.data
     values = data['true_suggestions'] / data['num_suggestions']
     plot = PlotDataset(values.values, 'Families Suggestions')
     max_value = 1
     plt.plot(iterations, plot.values,
              label=plot.label,
              color='purple',
              linewidth=plot.linewidth,
              marker=plot.marker)
     # Plot
     plt.ylim(0, max_value)
     plt.xlabel('Iteration')
     plt.ylabel('Suggestions Accuracy')
     lgd = plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3,
                      ncol=2, mode='expand', borderaxespad=0.,
                      fontsize='large')
     filename = path.join(monitoring_dir,
                          'labels_families_high_confidence_suggestions.png')
     plt.savefig(filename, bbox_extra_artists=(lgd,), bbox_inches='tight')
     plt.clf()
Exemple #3
0
 def display_label(self, barplot, label):
     label_bool = labels_tools.label_str_to_bool(label)
     ranges = [[x for x in l if x['ground_truth_label'] == label_bool]
               for l in self.ranges]
     dataset = PlotDataset(list(map(len, ranges)), label)
     dataset.set_color(get_label_color(label))
     barplot.add_dataset(dataset)
Exemple #4
0
 def _gen_binary_histogram(self):
     self.barplot = BarPlot(['0', '1'])
     for label, dataset in self.plot_datasets.items():
         if len(dataset.values) > 0:
             num_0 = sum(dataset.values == 0)
             num_1 = sum(dataset.values == 1)
             hist_dataset = PlotDataset([num_0, num_1], label)
             hist_dataset.set_color(dataset.color)
             self.barplot.add_dataset(hist_dataset)
Exemple #5
0
 def _gen_label_plot_dataset(self, instances, label):
     if label != 'unlabeled':
         instances = instances.get_annotated_instances(label=label)
     else:
         instances = instances.get_unlabeled_instances()
     values = instances.features.get_values_from_index(self.feature_index)
     dataset = PlotDataset(values, label)
     dataset.set_color(get_label_color(label))
     self.plot_datasets[label] = dataset
Exemple #6
0
 def display_label(self, barplot, label):
     if label != 'all':
         label_bool = label_str_to_bool(label)
         ranges = [[x for x in l if x['ground_truth_label'] == label_bool]
                   for l in self.ranges]
     else:
         ranges = self.ranges
     dataset = PlotDataset(np.array([len(r) for r in ranges]), label)
     dataset.set_color(get_label_color(label))
     barplot.add_dataset(dataset)
Exemple #7
0
 def display(self, directory):
     labels = ['0-10%', '10-20%', '20-30%', '30-40%', '40-50%',
               '50-60%', '60-70%', '70-80%', '80-90%', '90-100%']
     barplot = BarPlot(labels)
     if not self.has_ground_truth:
         dataset = PlotDataset(list(map(len, self.ranges)), 'num_instances')
         dataset.set_color(get_label_color('all'))
         barplot.add_dataset(dataset)
     else:
         self.display_label(barplot, MALICIOUS)
         self.display_label(barplot, BENIGN)
     barplot.export_to_json(path.join(directory, 'predictions_barplot.json'))
Exemple #8
0
 def set_predictions(self, predictions):
     self.predictions = predictions
     self.datasets = {}
     if not self.has_ground_truth:
         self.datasets['all'] = PlotDataset(predictions.scores, 'all')
     else:
         for label in [MALICIOUS, BENIGN]:
             label_bool = label_str_to_bool(label)
             scores = [
                 predictions.scores[i]
                 for i in range(predictions.num_instances())
                 if predictions.ground_truth[i] == label_bool
             ]
             self.datasets[label] = PlotDataset(np.array(scores), label)
Exemple #9
0
def getFamiliesBarplot(annotations_id, iteration, label):
    iteration = None if iteration == 'None' else int(iteration)
    family_counts = annotations_db_tools.get_families_counts(
        session, annotations_id, iter_max=iteration, label=label)
    df = pd.DataFrame({
        'families':
        list(family_counts.keys()),
        'counts': [family_counts[k] for k in list(family_counts.keys())]
    })
    sort_data_frame(df, 'families', ascending=True, inplace=True)
    barplot = BarPlot(df['families'].values)
    dataset = PlotDataset(df['counts'].values, 'Num. Instances')
    dataset.set_color(get_label_color(label))
    barplot.add_dataset(dataset)
    return jsonify(barplot.to_json())
Exemple #10
0
 def _display(self, barplot, labels, error=None):
     if error is not None:
         values = [
             len([
                 p for p in self.predictions[l]
                 if (p['ground_truth_label'] == l) != error
             ]) for l in labels
         ]
         label = 'wrong predictions' if error else 'right predictions'
     else:
         values = [len(self.predictions[l]) for l in labels]
         label = 'all'
     dataset = PlotDataset(np.array(values), label)
     dataset.set_color(get_error_color(error))
     barplot.add_dataset(dataset)
Exemple #11
0
 def get_exec_times_display(self):
     lr = PlotDataset(None, 'Logistic Regression')
     lr.set_linestyle('dotted')
     nb = PlotDataset(None, 'Naive Bayes')
     nb.set_linestyle('dashed')
     v = [lr, nb]
     v.extend(CoreAladin.get_exec_times_display(self))
     return v
Exemple #12
0
 def _gen_histogram(self):
     # 10 equal-width bins computed on all the data
     _, bin_edges = np.histogram(self.all_values, bins=10, density=False)
     x_labels = [
         '%.2f - %.2f' % (bin_edges[e], bin_edges[e + 1])
         for e in range(len(bin_edges) - 1)
     ]
     self.barplot = BarPlot(x_labels)
     for label, dataset in self.plot_datasets.items():
         if len(dataset.values) > 0:
             hist, _ = np.histogram(dataset.values,
                                    bins=bin_edges,
                                    density=False)
             hist_dataset = PlotDataset(hist, label)
             hist_dataset.set_color(dataset.color)
             self.barplot.add_dataset(hist_dataset)
 def plot_evolution(self, evolution_file, monitoring_dir):
     data = self.load_evolution(evolution_file)
     if self.labels_families == 'labels':
         title = 'Labels Suggestions Accuracy'
     elif self.labels_families == 'families':
         title = 'Families Suggestions Accuracy'
     values = data['true_suggestions'] / data['num_suggestions']
     plot = PlotDataset(values.values, title)
     iterations = list(range(self.monitoring.iter_num))
     plt.clf()
     max_value = 1
     plt.plot(iterations, plot.values,
              label=plot.label,
              color=plot.color,
              linewidth=plot.linewidth,
              marker=plot.marker)
     plt.ylim(0, max_value)
     plt.xlabel('Iteration')
     plt.ylabel('Suggestions Accuracy')
     lgd = plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3,
                      ncol=2, mode='expand', borderaxespad=0.,
                      fontsize='large')
     filename = '_'.join([self.labels_families,
                          self.kind,
                          'suggestions.png'])
     filename = path.join(monitoring_dir, filename)
     plt.savefig(filename, bbox_extra_artists=(lgd,), bbox_inches='tight')
     plt.clf()
     self.data = data
Exemple #14
0
 def _gen_label_plot_dataset(self, instances, label=None, family=None,
                             color=None):
     if label is not None:
         if label != 'unlabeled':
             instances = instances.get_annotated_instances(label=label)
         else:
             instances = instances.get_unlabeled_instances()
     else:
         instances = instances.get_annotated_instances(family=family)
     values = instances.features.get_values_from_index(self.feature_index)
     if isinstance(values, spmatrix):
         values = values.toarray()
     plot_label = label if label is not None else family
     plot_color = color
     if plot_color is None:
         plot_color = get_label_color(plot_label)
     dataset = PlotDataset(values, plot_label)
     dataset.set_color(plot_color)
     self.plot_datasets[plot_label] = dataset
Exemple #15
0
 def to_barplot(self, directory):
     head_coeff = self.coef_summary.head(n=NUM_COEFF_EXPORT)
     coefficients = list(head_coeff['mean'])
     features_ids = head_coeff.index
     tooltip_data = []
     user_ids = []
     for feature_id in features_ids:
         query = self.session.query(FeaturesAlchemy)
         query = query.filter(FeaturesAlchemy.id == int(feature_id))
         row = query.one()
         tooltip_data.append(row.name)
         user_ids.append(row.user_id)
     barplot = BarPlot(user_ids)
     dataset = PlotDataset(coefficients, None)
     score = self.classifier_conf.get_feature_importance()
     if score == 'weight':
         dataset.set_color(red)
     barplot.add_dataset(dataset)
     out_filename = path.join(directory, 'coeff_barplot.json')
     return barplot.export_to_json(out_filename, tooltip_data=tooltip_data)
Exemple #16
0
def getClusterStats(exp_id):
    experiment = update_curr_exp(exp_id)
    clustering = ClustersExp.from_json(experiment.output_dir())
    num_clusters = clustering.num_clusters
    num_instances_v = []
    labels = []
    for c in range(num_clusters):
        instances_in_cluster = clustering.clusters[c].instances_ids
        num_instances = len(instances_in_cluster)
        num_instances_v.append(num_instances)
        labels.append(clustering.clusters[c].label)
    barplot = BarPlot(labels)
    dataset = PlotDataset(num_instances_v, 'Num. Instances')
    barplot.add_dataset(dataset)
    return jsonify(barplot.to_json())
Exemple #17
0
 def plot_perf_evolution(self, estimators, output_filename, data,
                         output_dir):
     iterations = list(range(1, self.iter_num + 1))
     plt.clf()
     for estimator in estimators:
         plot = PlotDataset(data[estimator], estimator)
         plt.plot(iterations,
                  plot.values,
                  label=plot.label,
                  color=plot.color,
                  linewidth=plot.linewidth,
                  marker=plot.marker)
     plt.ylim(0, 1)
     plt.xlabel('Iteration')
     plt.ylabel('Performance')
     lgd = plt.legend(bbox_to_anchor=(0., 1.02, 1., .102),
                      loc=3,
                      ncol=3,
                      mode='expand',
                      borderaxespad=0.,
                      fontsize='large')
     filename = path.join(output_dir, '%s.png' % self.kind)
     plt.savefig(filename, bbox_extra_artists=(lgd, ), bbox_inches='tight')
     plt.clf()
Exemple #18
0
 def get_exec_times_display(self):
     generate_queries = PlotDataset(None, 'Queries generation')
     generate_queries.set_color('purple')
     return [generate_queries]
Exemple #19
0
 def exec_time_display(self):
     binary_model = PlotDataset(np.array([]), 'Binary model')
     v = [binary_model]
     v.extend(CoreRandom.get_exec_times_display(self))
     return v
Exemple #20
0
 def get_exec_times_display(self):
     uncertain = PlotDataset(None, 'Uncertain Queries')
     malicious = PlotDataset(None, 'Malicious Queries')
     malicious.set_linestyle('dotted')
     malicious.set_color(get_label_color(MALICIOUS))
     benign = PlotDataset(None, 'Benign Queries')
     benign.set_linestyle('dashed')
     benign.set_color(get_label_color(BENIGN))
     return [malicious, uncertain, benign]
Exemple #21
0
 def get_exec_times_display(self):
     binary_model = PlotDataset(None, 'Binary model')
     v = [binary_model]
     v.extend(CoreUncertainty.get_exec_times_display(self))
     return v
Exemple #22
0
 def get_exec_times_display(self):
     v = [PlotDataset(None, 'Analysis')]
     v.extend(CoreRcd.get_exec_times_display(self))
     return v
Exemple #23
0
 def get_exec_times_display(self):
     binary_model = PlotDataset(None, 'Binary model')
     v = [binary_model]
     v.extend(CoreCesaBianchi.get_exec_times_display(self))
     return v