Beispiel #1
0
 def plotEvolutionMonitoring(self):
     iterations = range(self.monitoring.iteration_number)
     plt.clf()
     # Labels
     data = self.labels_accuracy.high_confidence_counts.data
     values = data['true_suggestions'] / data['num_suggestions']
     plot = PlotDataset(values, 'Labels Suggestions')
     max_value = 1
     plt.plot(iterations, plot.values,
             label = plot.label,
             color = plot.color,
             linewidth = plot.linewidth,
             marker = plot.marker)
     # Families
     data = self.families_accuracy.high_confidence_counts.data
     values = data['true_suggestions'] / data['num_suggestions']
     plot = PlotDataset(values, 'Families Suggestions')
     max_value = 1
     plt.plot(iterations, plot.values,
              label = plot.label,
              color = 'purple',
              linewidth = plot.linewidth,
              marker = plot.marker)
     # Plot
     plt.ylim(0, max_value)
     plt.xlabel('Iteration')
     plt.ylabel('Suggestions Accuracy')
     lgd = plt.legend(bbox_to_anchor = (0., 1.02, 1., .102), loc = 3,
             ncol = 2, mode = 'expand', borderaxespad = 0.,
             fontsize = 'large')
     filename  = self.output_directory
     filename += 'labels_families_high_confidence_suggestions.png'
     plt.savefig(filename, bbox_extra_artists=(lgd,), bbox_inches='tight')
     plt.clf()
Beispiel #2
0
def getTopWeightedFeatures(experiment_id, inst_exp_id, instance_id, size):
    instance_id = int(instance_id)
    exp = ExperimentFactory.getFactory().fromJson(experiment_id, session)
    validation_experiment = ExperimentFactory.getFactory().fromJson(
        inst_exp_id, session)
    #get the features
    features_names, features_values = validation_experiment.getFeatures(
        instance_id)
    features_values = [float(value) for value in features_values]
    #get the pipeline with scaler and logistic model
    pipeline = exp.getModelPipeline()
    #scale the features
    scaled_values = pipeline.named_steps['scaler'].transform(
        np.reshape(features_values, (1, -1)))
    weighted_values = np.multiply(scaled_values,
                                  pipeline.named_steps['model'].coef_)
    features = map(lambda name, value, w_value: (name, value, w_value),
                   features_names, features_values, weighted_values[0])
    features.sort(key=lambda tup: abs(tup[2]))
    features = features[:-int(size) - 1:-1]
    tooltips = [x[1] for x in features]
    barplot = BarPlot([x[0] for x in features])
    dataset = PlotDataset([x[2] for x in features], None)
    dataset.setColor(colors_tools.red)
    barplot.addDataset(dataset)
    return jsonify(barplot.toJson(tooltip_data=tooltips))
 def generateBinaryHistogram(self):
     barplot = BarPlot(['0', '1'])
     for label, dataset in self.plot_datasets.iteritems():
         num_0 = sum(dataset.values == 0)
         num_1 = sum(dataset.values == 1)
         hist_dataset = PlotDataset([num_0, num_1], dataset.label)
         hist_dataset.setColor(dataset.color)
         barplot.addDataset(hist_dataset)
     output_filename = self.output_directory + 'binary_histogram.json'
     with open(output_filename, 'w') as f:
         barplot.exportJson(f)
Beispiel #4
0
def getTopModelFeatures(experiment_id, size):
    size = int(size)
    exp = ExperimentFactory.getFactory().fromJson(experiment_id, session)
    model_coefficients = exp.getTopFeatures()
    features_names = exp.getFeaturesNames()
    coefficients = map(lambda name, coef: (name, coef), features_names,
                       model_coefficients)
    coefficients.sort(key=lambda tup: abs(tup[1]))
    coefficients = coefficients[:-size - 1:-1]
    barplot = BarPlot([x[0] for x in coefficients])
    dataset = PlotDataset([x[1] for x in coefficients], None)
    if (exp.classification_conf.featureImportance() == 'weight'):
        dataset.setColor(colors_tools.red)
    barplot.addDataset(dataset)
    return jsonify(barplot.toJson())
 def generateHistogram(self):
     # 10 equal-width bins computed on all the data
     if not self.has_true_labels:
         hist, bin_edges = np.histogram(self.plot_datasets['all'].values, bins = 10, density = False)
     else:
         hist, bin_edges = np.histogram(self.plot_datasets['malicious'].values, bins = 10, density = False)
     x_labels = [str(bin_edges[e]) + ' - ' + str(bin_edges[e+1]) for e in range(len(bin_edges)-1)]
     barplot = BarPlot(x_labels)
     for label, dataset in self.plot_datasets.iteritems():
         hist, bin_edges = np.histogram(dataset.values, bins = bin_edges, density = False)
         hist_dataset = PlotDataset(hist, dataset.label)
         hist_dataset.setColor(dataset.color)
         barplot.addDataset(hist_dataset)
     output_filename = self.output_directory + 'histogram.json'
     with open(output_filename, 'w') as f:
         barplot.exportJson(f)
Beispiel #6
0
 def plotEvolutionMonitoring(self):
     if self.labels_families == 'labels':
         title = 'Labels Suggestions Accuracy'
     elif self.labels_families == 'families':
         title = 'Families Suggestions Accuracy'
     plot = PlotDataset(
         self.data['true_suggestions'] / self.data['num_suggestions'],
         title)
     iterations = range(self.monitoring.iteration_number)
     plt.clf()
     max_value = 1
     plt.plot(iterations,
              plot.values,
              label=plot.label,
              color=plot.color,
              linewidth=plot.linewidth,
              marker=plot.marker)
     plt.ylim(0, max_value)
     plt.xlabel('Iteration')
     plt.ylabel('Suggestions Accuracy')
     lgd = plt.legend(bbox_to_anchor=(0., 1.02, 1., .102),
                      loc=3,
                      ncol=2,
                      mode='expand',
                      borderaxespad=0.,
                      fontsize='large')
     filename = self.output_directory
     filename += self.labels_families + '_' + self.kind + '_suggestions.png'
     plt.savefig(filename, bbox_extra_artists=(lgd, ), bbox_inches='tight')
     plt.clf()
 def generatePlotDatasets(self, instances):
     self.plot_datasets = {}
     if self.has_true_labels:
         malicious_instances = instances.getInstancesFromIds(instances.getMaliciousIds(true_labels = True))
         malicious_dataset = PlotDataset(malicious_instances.getFeatureValues(self.feature), 'malicious')
         malicious_dataset.setColor(colors_tools.getLabelColor('malicious'))
         self.plot_datasets['malicious'] = malicious_dataset
         benign_instances = instances.getInstancesFromIds(instances.getBenignIds(true_labels = True))
         benign_dataset = PlotDataset(benign_instances.getFeatureValues(self.feature), 'benign')
         benign_dataset.setColor(colors_tools.getLabelColor('benign'))
         self.plot_datasets['benign'] = benign_dataset
     else:
         self.plot_datasets['all'] = PlotDataset(instances.getFeatureValues(self.feature), 'all')
         self.plot_datasets['all'].setColor(colors_tools.getLabelColor('all'))
Beispiel #8
0
 def plotEvolutionMonitoring(self):
     barplot = BarPlot(self.families)
     for i in range(self.data.shape[0]):
         dataset = PlotDataset([self.data.iloc[i, 1]], str(i))
         barplot.addDataset(dataset)
     filename  = self.output_directory
     filename += self.label + '_families_evolution.json'
     with open(filename, 'w') as f:
         barplot.exportJson(f)
Beispiel #9
0
def getFamiliesBarplot(experiment_id, iteration, label):
    experiment = updateCurrentExperiment(experiment_id)
    experiment_label_id = experiment.labels_id
    if iteration == 'None':
        iteration = None
    family_counts = labels_tools.getFamiliesCounts(experiment.session,
                                                   experiment_label_id,
                                                   iteration_max=iteration,
                                                   label=label)
    df = pd.DataFrame({
        'families':
        family_counts.keys(),
        'counts': [family_counts[k] for k in family_counts.keys()]
    })
    matrix_tools.sortDataFrame(df, 'families', ascending=True, inplace=True)
    barplot = BarPlot(list(df['families']))
    dataset = PlotDataset(list(df['counts']), 'Num. Instances')
    dataset.setColor(colors_tools.getLabelColor(label))
    barplot.addDataset(dataset)
    return jsonify(barplot.toJson())
Beispiel #10
0
    def plotEvolutionMonitoring(self, evolution_file, monitoring_dir):
        data = self.loadEvolutionMonitoring(evolution_file)

        barplot = BarPlot(self.families)
        for i in range(data.shape[0]):
            dataset = PlotDataset([data.iloc[i, 1]], str(i))
            barplot.addDataset(dataset)
        filename  = monitoring_dir
        filename += self.label + '_families_evolution.json'
        with open(filename, 'w') as f:
            barplot.exportJson(f)
Beispiel #11
0
 def plotPerfEvolution(self, estimators, output_filename):
     iterations = range(1, self.monitoring.iteration_number+1)
     plt.clf()
     for estimator in estimators:
         plot = PlotDataset(self.data[estimator], estimator)
         plt.plot(iterations, plot.values,
                 label = plot.label,
                 color = plot.color,
                 linewidth = plot.linewidth, marker = plot.marker)
     plt.ylim(0, 1)
     plt.xlabel('Iteration')
     plt.ylabel('Performance')
     lgd = plt.legend(bbox_to_anchor = (0., 1.02, 1., .102), loc = 3,
             ncol = 3, mode = 'expand', borderaxespad = 0.,
             fontsize = 'large')
     filename = self.outputFilename(output_filename, 'png')
     plt.savefig(filename, bbox_extra_artists=(lgd,), bbox_inches='tight')
     plt.clf()
Beispiel #12
0
def getClusterStats(experiment_id):
    experiment = updateCurrentExperiment(experiment_id)
    clustering = Clustering.fromJson(experiment)
    num_clusters = clustering.num_clusters
    num_instances_v = []
    labels = []
    for c in range(num_clusters):
        instances_in_cluster = clustering.clusters[c].instances_ids
        num_instances = len(instances_in_cluster)
        # the empty clusters are not displayed

        #if num_instances > 0:
        num_instances_v.append(num_instances)
        #labels.append('c_' + str(c))
        labels.append(clustering.clusters[c].label)
    barplot = BarPlot(labels)
    dataset = PlotDataset(num_instances_v, 'Num. Instances')
    barplot.addDataset(dataset)
    return jsonify(barplot.toJson())
Beispiel #13
0
def getFamiliesPerformance(experiment_id, train_test, label, threshold):
    experiment = updateCurrentExperiment(experiment_id)
    filename = experiment.getOutputDirectory() + train_test + '/families/'
    if label == 'malicious':
        filename += 'tp_'
        tp_fp = 'Detection Rate'
    elif label == 'benign':
        filename += 'fp_'
        tp_fp = 'False Positive Rate'
    filename += 'families_thresholds.csv'
    with open(filename, 'r') as f:
        perf = pd.read_csv(f, header=0, index_col=0)
        families = list(perf.columns.values[:-1])
        threshold = float(threshold) / 100
        thresholds = list(perf.index[:-1])
        threshold_value = min(enumerate(thresholds),
                              key=lambda x: abs(x[1] - threshold))[1]
        perf = list(perf.loc[threshold_value])
        barplot = BarPlot(families)
        barplot.addDataset(PlotDataset(perf, tp_fp))
    return jsonify(barplot.toJson())
    def display(self, directory):
        labels = ['0-10%', '10-20%', '20-30%', '30-40%', '40-50%', '50-60%', '60-70%', '70-80%', '80-90%', '90-100%']

        barplot = BarPlot(labels)
        dataset = PlotDataset(map(len, self.ranges), 'numInstances')
        dataset.setColor(colors_tools.getLabelColor('all'))
        barplot.addDataset(dataset)
        filename = directory + 'predictions_barplot.json'
        with open(filename, 'w') as f:
            barplot.exportJson(f)

        barplot = BarPlot(labels)
        malicious_ranges = map(
                lambda l: filter(lambda x: x['true_label'], l),
                self.ranges)
        benign_ranges = map(
                lambda l: filter(lambda x: not x['true_label'], l),
                self.ranges)
        malicious_dataset = PlotDataset(map(len, malicious_ranges), 'malicious')
        malicious_dataset.setColor(colors_tools.getLabelColor('malicious'))
        barplot.addDataset(malicious_dataset)
        benign_dataset = PlotDataset(map(len, benign_ranges), 'benign')
        benign_dataset.setColor(colors_tools.getLabelColor('benign'))
        barplot.addDataset(benign_dataset)
        filename  = directory
        filename += 'predictions_barplot_labels.json'
        with open(filename, 'w') as f:
            barplot.exportJson(f)
Beispiel #15
0
 def executionTimeDisplay(self):
     lr = PlotDataset(None, 'Logistic Regression')
     lr.setLinestyle('dotted')
     nb = PlotDataset(None, 'Naive Bayes')
     nb.setLinestyle('dashed')
     return [lr, nb] + QueryStrategy.executionTimeDisplay(self)
Beispiel #16
0
 def executionTimeDisplay(self):
     binary_model = PlotDataset(None, 'Binary model')
     malicious = PlotDataset(None, 'Malicious Analysis')
     malicious.setLinestyle('dotted')
     malicious.setColor(colors_tools.getLabelColor('malicious'))
     benign = PlotDataset(None, 'Benign Analysis')
     benign.setLinestyle('dashed')
     benign.setColor(colors_tools.getLabelColor('benign'))
     return [binary_model, malicious, benign
             ] + QueryStrategy.executionTimeDisplay(self)
Beispiel #17
0
 def executionTimeDisplay(self):
     uncertain = PlotDataset(None, 'Uncertain Queries')
     malicious = PlotDataset(None, 'Malicious Queries')
     malicious.setLinestyle('dotted')
     malicious.setColor(colors_tools.getLabelColor('malicious'))
     benign = PlotDataset(None, 'Benign Queries')
     benign.setLinestyle('dashed')
     benign.setColor(colors_tools.getLabelColor('benign'))
     return [malicious, uncertain, benign]
 def executionTimeDisplay(self):
     clustering = PlotDataset(None, 'Analysis')
     return [clustering] + QueryStrategy.executionTimeDisplay(self)
Beispiel #19
0
 def executionTimeDisplay(self):
     binary_model = PlotDataset(None, 'Binary model')
     return [binary_model] + QueryStrategy.executionTimeDisplay(self)
Beispiel #20
0
 def executionTimeDisplay(self):
     generate_queries = PlotDataset(None, 'Queries generation')
     generate_queries.setColor('purple')
     return [generate_queries]