예제 #1
0
 def display(self, directory):
     labels = [
         '0-10%', '10-20%', '20-30%', '30-40%', '40-50%', '50-60%',
         '60-70%', '70-80%', '80-90%', '90-100%'
     ]
     barplot = BarPlot(labels)
     barplot.addDataset(map(len, self.ranges),
                        colors_tools.getLabelColor('all'), 'numInstances')
     filename = directory + 'predictions_barplot.json'
     with open(filename, 'w') as f:
         barplot.display(f)
     barplot = BarPlot(labels)
     malicious_ranges = map(lambda l: filter(lambda x: x['true_label'], l),
                            self.ranges)
     benign_ranges = map(lambda l: filter(lambda x: not x['true_label'], l),
                         self.ranges)
     barplot.addDataset(map(len, malicious_ranges),
                        colors_tools.getLabelColor('malicious'),
                        'malicious')
     barplot.addDataset(map(len, benign_ranges),
                        colors_tools.getLabelColor('benign'), 'benign')
     filename = directory
     filename += 'predictions_barplot_labels.json'
     with open(filename, 'w') as f:
         barplot.display(f)
예제 #2
0
 def executionTimeDisplay(self):
     uncertain = PlotDataset(None, 'Uncertain Queries')
     malicious = PlotDataset(None, 'Malicious Queries')
     malicious.setLinestyle('dotted')
     malicious.setColor(colors_tools.getLabelColor('malicious'))
     benign = PlotDataset(None, 'Benign Queries')
     benign.setLinestyle('dashed')
     benign.setColor(colors_tools.getLabelColor('benign'))
     return [malicious, uncertain, benign]
예제 #3
0
 def executionTimeDisplay(self):
     binary_model = PlotDataset(None, 'Binary model')
     malicious = PlotDataset(None, 'Malicious Analysis')
     malicious.setLinestyle('dotted')
     malicious.setColor(colors_tools.getLabelColor('malicious'))
     benign = PlotDataset(None, 'Benign Analysis')
     benign.setLinestyle('dashed')
     benign.setColor(colors_tools.getLabelColor('benign'))
     return [binary_model, malicious, benign
             ] + QueryStrategy.executionTimeDisplay(self)
예제 #4
0
 def generatePlotDatasets(self, instances):
     self.plot_datasets = {}
     if self.has_true_labels:
         malicious_instances = instances.getInstancesFromIds(instances.getMaliciousIds(true_labels = True))
         malicious_dataset = PlotDataset(malicious_instances.getFeatureValues(self.feature), 'malicious')
         malicious_dataset.setColor(colors_tools.getLabelColor('malicious'))
         self.plot_datasets['malicious'] = malicious_dataset
         benign_instances = instances.getInstancesFromIds(instances.getBenignIds(true_labels = True))
         benign_dataset = PlotDataset(benign_instances.getFeatureValues(self.feature), 'benign')
         benign_dataset.setColor(colors_tools.getLabelColor('benign'))
         self.plot_datasets['benign'] = benign_dataset
     else:
         self.plot_datasets['all'] = PlotDataset(instances.getFeatureValues(self.feature), 'all')
         self.plot_datasets['all'].setColor(colors_tools.getLabelColor('all'))
예제 #5
0
 def plotEvolutionMonitoring(self, estimator=None):
     if estimator is None:
         for e in self.homogeneity_estimators + self.adjusted_estimators:
             self.plotEvolutionMonitoring(estimator=e)
     else:
         iterations = range(self.monitoring.iteration_number)
         plt.clf()
         max_value = 1
         clusterings = self.annotations.getClusteringsEvaluations()
         for l in clusterings.keys():
             color = colors_tools.getLabelColor(l)
             label = l + '_' + estimator
             plt.plot(iterations,
                      self.data.loc[:][label],
                      label=l.title() + ' Clustering',
                      color=color,
                      linewidth=4,
                      marker='o')
         plt.ylim(0, max_value)
         plt.xlabel('Iteration')
         plt.ylabel(estimator)
         lgd = plt.legend(bbox_to_anchor=(0., 1.02, 1., .102),
                          loc=3,
                          ncol=2,
                          mode='expand',
                          borderaxespad=0.,
                          fontsize='large')
         filename = self.output_directory
         filename += estimator + '_monitoring.png'
         plt.savefig(filename,
                     bbox_extra_artists=(lgd, ),
                     bbox_inches='tight')
         plt.clf()
예제 #6
0
 def plotFamiliesEvolutionMonitoring(self):
     annotations = self.evolutions['global']['annotations']
     plt.clf()
     if self.has_true_labels:
         max_value = 1
     else:
         max_value = max(self.families_monitoring['malicious'],
                         self.families_monitoring['benign'])
     for l in ['malicious', 'benign']:
         evolution = self.evolutions[l]['families']
         if self.has_true_labels:
             num_families = len(
                 self.monitoring.datasets.instances.getFamiliesValues(
                     label=l, true_labels=True))
             evolution = [x / num_families for x in evolution]
         color = colors_tools.getLabelColor(l)
         plt.plot(annotations,
                  evolution,
                  label=l.title(),
                  color=color,
                  linewidth=4,
                  marker='o')
     plt.ylim(0, max_value)
     plt.xlabel('Num Annotations')
     plt.ylabel('Prop. Families Discovered')
     lgd = plt.legend(bbox_to_anchor=(0., 1.02, 1., .102),
                      loc=3,
                      ncol=2,
                      mode='expand',
                      borderaxespad=0.,
                      fontsize='x-large')
     filename = self.output_directory
     filename += 'families_monitoring.png'
     plt.savefig(filename, bbox_extra_artists=(lgd, ), bbox_inches='tight')
     plt.clf()
예제 #7
0
파일: ROC.py 프로젝트: william-vu/SecuML
 def addFold(self, fold_id, true_labels, predicted_proba, predicted_scores):
     if len(true_labels) == 0:
         return
     if self.probabilist_model:
         scores = predicted_proba
     else:
         scores = predicted_scores
     fpr, tpr, thresholds = roc_curve(true_labels, scores)
     self.mean_tpr += interp(self.mean_fpr, fpr, tpr)
     self.thresholds = interp(self.mean_fpr, fpr, thresholds)
     self.mean_tpr[0] = 0.0
     self.thresholds[0] = 1.0
     self.thresholds[-1] = 0.0
     roc_auc = auc(fpr, tpr)
     if self.num_folds > 1:
         self.ax1.plot(fpr,
                       tpr,
                       lw=1,
                       label='ROC fold %d (area = %0.2f)' %
                       (fold_id, roc_auc))
     else:
         self.ax1.plot(fpr,
                       tpr,
                       lw=3,
                       color=colors_tools.getLabelColor('all'),
                       label='ROC (area = %0.2f)' % (roc_auc))
예제 #8
0
def getFamiliesPerformance(project, dataset, experiment, train_test, label, threshold):
    filename  = getDir(project, dataset, experiment) + train_test + '/families/'
    if label == 'malicious':
        filename += 'tp_'
        tp_fp = 'Detection Rate'
    elif label == 'benign':
        filename += 'fp_'
        tp_fp = 'False Positive Rate'
    filename += 'families_thresholds.csv'
    with open(filename, 'r') as f:
        perf = pd.read_csv(f, header = 0, index_col = 0)
        families = list(perf.columns.values[:-1])
        threshold = float(threshold)/100
        thresholds = list(perf.index[:-1])
        threshold_value = min(enumerate(thresholds), key=lambda x: abs(x[1]-threshold))[1]
        perf = list(perf.loc[threshold_value])
        barplot = BarPlot(families)
        barplot.addDataset(perf, colors_tools.getLabelColor('all'), tp_fp)
    return jsonify(barplot.barplot);
예제 #9
0
def getFamiliesBarplot(project, dataset, experiment_id, iteration, label):
    experiment = ExperimentFactory.getFactory().fromJson(
        project, dataset, experiment_id, db, cursor)
    experiment_label_id = experiment.experiment_label_id
    if iteration == 'None':
        iteration = None
    family_counts = labels_tools.getFamiliesCounts(cursor,
                                                   experiment_label_id,
                                                   iteration_max=iteration,
                                                   label=label)
    df = pd.DataFrame({
        'families':
        family_counts.keys(),
        'counts': [family_counts[k] for k in family_counts.keys()]
    })
    matrix_tools.sortDataFrame(df, 'families', ascending=True, inplace=True)
    barplot = BarPlot(list(df['families']))
    barplot.addDataset(list(df['counts']), colors_tools.getLabelColor(label),
                       'Num. Instances')
    return jsonify(barplot.barplot)
예제 #10
0
def getFamiliesBarplot(experiment_id, iteration, label):
    experiment = updateCurrentExperiment(experiment_id)
    experiment_label_id = experiment.labels_id
    if iteration == 'None':
        iteration = None
    family_counts = labels_tools.getFamiliesCounts(experiment.session,
                                                   experiment_label_id,
                                                   iteration_max=iteration,
                                                   label=label)
    df = pd.DataFrame({
        'families':
        family_counts.keys(),
        'counts': [family_counts[k] for k in family_counts.keys()]
    })
    matrix_tools.sortDataFrame(df, 'families', ascending=True, inplace=True)
    barplot = BarPlot(list(df['families']))
    dataset = PlotDataset(list(df['counts']), 'Num. Instances')
    dataset.setColor(colors_tools.getLabelColor(label))
    barplot.addDataset(dataset)
    return jsonify(barplot.toJson())
예제 #11
0
 def setDefaultValues(self):
     self.color     = colors_tools.getLabelColor('all')
     self.linewidth = 3
     self.linestyle = 'solid'
     self.marker    = 'o'
예제 #12
0
 def displayFamiliesDistribution(self, directory, label=None):
     if label is None:
         self.displayFamiliesDistribution(directory, label='malicious')
         self.displayFamiliesDistribution(directory, label='benign')
         return
     if label == 'malicious':
         families = self.malicious_families
     else:
         families = self.benign_families
     bandwidth = 0.1
     num_points = 50
     eps = 0.00001
     kde = KernelDensity(kernel='gaussian', bandwidth=bandwidth)
     fig, (ax) = plt.subplots(1, 1)
     i = 0
     for family in families:
         predictions = families[family]
         predictions_np = np.asarray(predictions)
         if i % 3 == 0:
             linestyle = 'solid'
         elif i % 3 == 1:
             linestyle = 'dashed'
         if i % 3 == 2:
             linestyle = 'dotted'
         linewidth = 2
         if np.var(predictions_np) < eps:
             linewidth = 4
             mean = np.mean(predictions_np)
             x = np.arange(0, 1, 0.1)
             x = np.sort(np.append(x, [mean, mean - eps, mean + eps]))
             density = [1 if v == mean else 0 for v in x]
         else:
             density_predictions = [[x] for x in predictions_np]
             kde.fit(density_predictions)
             ## Computes the x axis
             p_max = np.amax(predictions_np)
             p_min = np.amin(predictions_np)
             delta = p_max - p_min
             density_delta = 1.1 * delta
             x = np.arange(0, 1, density_delta / num_points)
             x_density = [[y] for y in x]
             ## kde.score_samples returns the 'log' of the density
             log_density = kde.score_samples(x_density).tolist()
             density = map(math.exp, log_density)
         ax.plot(x,
                 density,
                 label=family,
                 linewidth=linewidth,
                 linestyle=linestyle)
         fig_f, (ax_f) = plt.subplots(1, 1)
         ax_f.plot(x,
                   density,
                   linewidth=4,
                   color=colors_tools.getLabelColor(label))
         ax_f.set_title(family)
         ax_f.set_xlabel('P(Malicious)')
         ax_f.set_ylabel('Density')
         filename = directory
         filename += label + '_family_' + family + '_prediction_distributions.png'
         fig_f.savefig(filename)
         plt.close(fig_f)
         i += 1
     ax.legend(bbox_to_anchor=(0., 0.95, 1., .102),
               loc=3,
               ncol=5,
               mode='expand',
               borderaxespad=0.,
               fontsize='xx-small')
     ax.set_xlabel('P(Malicious)')
     ax.set_ylabel('Density')
     filename = directory
     filename += label + '_families_prediction_distributions.png'
     fig.savefig(filename)
     plt.close(fig)