Ejemplo n.º 1
0
def getTopWeightedFeatures(experiment_id, inst_exp_id, instance_id, size):
    instance_id = int(instance_id)
    exp = ExperimentFactory.getFactory().fromJson(experiment_id, session)
    validation_experiment = ExperimentFactory.getFactory().fromJson(
        inst_exp_id, session)
    #get the features
    features_names, features_values = validation_experiment.getFeatures(
        instance_id)
    features_values = [float(value) for value in features_values]
    #get the pipeline with scaler and logistic model
    pipeline = exp.getModelPipeline()
    #scale the features
    scaled_values = pipeline.named_steps['scaler'].transform(
        np.reshape(features_values, (1, -1)))
    weighted_values = np.multiply(scaled_values,
                                  pipeline.named_steps['model'].coef_)
    features = map(lambda name, value, w_value: (name, value, w_value),
                   features_names, features_values, weighted_values[0])
    features.sort(key=lambda tup: abs(tup[2]))
    features = features[:-int(size) - 1:-1]
    tooltips = [x[1] for x in features]
    barplot = BarPlot([x[0] for x in features])
    dataset = PlotDataset([x[2] for x in features], None)
    dataset.setColor(colors_tools.red)
    barplot.addDataset(dataset)
    return jsonify(barplot.toJson(tooltip_data=tooltips))
Ejemplo n.º 2
0
def getTopModelFeatures(experiment_id, size):
    size = int(size)
    exp = ExperimentFactory.getFactory().fromJson(experiment_id, session)
    model_coefficients = exp.getTopFeatures()
    features_names = exp.getFeaturesNames()
    coefficients = map(lambda name, coef: (name, coef), features_names,
                       model_coefficients)
    coefficients.sort(key=lambda tup: abs(tup[1]))
    coefficients = coefficients[:-size - 1:-1]
    barplot = BarPlot([x[0] for x in coefficients])
    dataset = PlotDataset([x[1] for x in coefficients], None)
    if (exp.classification_conf.featureImportance() == 'weight'):
        dataset.setColor(colors_tools.red)
    barplot.addDataset(dataset)
    return jsonify(barplot.toJson())
Ejemplo n.º 3
0
def getClusterStats(experiment_id):
    experiment = updateCurrentExperiment(experiment_id)
    clustering = Clustering.fromJson(experiment)
    num_clusters = clustering.num_clusters
    num_instances_v = []
    labels = []
    for c in range(num_clusters):
        instances_in_cluster = clustering.clusters[c].instances_ids
        num_instances = len(instances_in_cluster)
        # the empty clusters are not displayed

        #if num_instances > 0:
        num_instances_v.append(num_instances)
        #labels.append('c_' + str(c))
        labels.append(clustering.clusters[c].label)
    barplot = BarPlot(labels)
    dataset = PlotDataset(num_instances_v, 'Num. Instances')
    barplot.addDataset(dataset)
    return jsonify(barplot.toJson())
Ejemplo n.º 4
0
def getFamiliesBarplot(experiment_id, iteration, label):
    experiment = updateCurrentExperiment(experiment_id)
    experiment_label_id = experiment.labels_id
    if iteration == 'None':
        iteration = None
    family_counts = labels_tools.getFamiliesCounts(experiment.session,
                                                   experiment_label_id,
                                                   iteration_max=iteration,
                                                   label=label)
    df = pd.DataFrame({
        'families':
        family_counts.keys(),
        'counts': [family_counts[k] for k in family_counts.keys()]
    })
    matrix_tools.sortDataFrame(df, 'families', ascending=True, inplace=True)
    barplot = BarPlot(list(df['families']))
    dataset = PlotDataset(list(df['counts']), 'Num. Instances')
    dataset.setColor(colors_tools.getLabelColor(label))
    barplot.addDataset(dataset)
    return jsonify(barplot.toJson())
Ejemplo n.º 5
0
def getFamiliesPerformance(experiment_id, train_test, label, threshold):
    experiment = updateCurrentExperiment(experiment_id)
    filename = experiment.getOutputDirectory() + train_test + '/families/'
    if label == 'malicious':
        filename += 'tp_'
        tp_fp = 'Detection Rate'
    elif label == 'benign':
        filename += 'fp_'
        tp_fp = 'False Positive Rate'
    filename += 'families_thresholds.csv'
    with open(filename, 'r') as f:
        perf = pd.read_csv(f, header=0, index_col=0)
        families = list(perf.columns.values[:-1])
        threshold = float(threshold) / 100
        thresholds = list(perf.index[:-1])
        threshold_value = min(enumerate(thresholds),
                              key=lambda x: abs(x[1] - threshold))[1]
        perf = list(perf.loc[threshold_value])
        barplot = BarPlot(families)
        barplot.addDataset(PlotDataset(perf, tp_fp))
    return jsonify(barplot.toJson())