Esempio n. 1
0
def getClusterLabelsFamilies(experiment_id, selected_cluster):
    selected_cluster = int(selected_cluster)
    experiment = updateCurrentExperiment(experiment_id)
    clustering = ClusteringExp.from_json(experiment.output_dir())
    labels_families = clustering.getClusterLabelsFamilies(
        experiment, selected_cluster)
    return jsonify(labels_families)
Esempio n. 2
0
def getPredictions(experiment_id, train_test, fold_id, index, label):
    experiment = updateCurrentExperiment(experiment_id)
    directory = experiment.getOutputDirectory()
    if fold_id != 'None' and fold_id != 'all':
        directory = path.join(directory, fold_id)
    directory = path.join(directory, train_test)
    filename = path.join(directory, 'predictions.csv')
    index = int(index)
    min_value = index * 0.1
    max_value = (index + 1) * 0.1
    with open(filename, 'r') as f:
        data = pd.read_csv(f, header=0, index_col=0)
        selection = data.loc[:, 'predicted_proba'] >= min_value
        data = data.loc[selection, :]
        selection = data.loc[:, 'predicted_proba'] <= max_value
        data = data.loc[selection, :]
        if label != 'all':
            if label == 'malicious':
                selection = data.loc[:, 'ground_truth'] == True
            elif label == 'benign':
                selection = data.loc[:, 'ground_truth'] == False
            data = data.loc[selection, :]
        selected_instances = [int(x) for x in list(data.index.values)]
        proba = list(data['predicted_proba'])
    return jsonify({'instances': selected_instances, 'proba': proba})
Esempio n. 3
0
def activeLearningSuggestionsMonitoring(experiment_id, iteration):
    iteration = int(iteration)
    experiment = updateCurrentExperiment(experiment_id)
    filename = path.join(experiment.output_dir(), str(iteration - 1),
                         'suggestions_accuracy',
                         'labels_families_high_confidence_suggestions.png')
    return send_file(filename)
Esempio n. 4
0
def getTestExperimentId(experiment_id):
    experiment = updateCurrentExperiment(experiment_id)
    exp_filename = path.join(experiment.getOutputDirectory(),
                             'test_experiment.txt')
    with open(exp_filename, 'r') as f:
        test_experiment_id = f.readline()
        return test_experiment_id
Esempio n. 5
0
def getStatsPlot(experiment_id, plot_type, feature):
    exp = updateCurrentExperiment(experiment_id)
    if plot_type.find('histogram') >= 0:
        filename = plot_type + '.json'
    else:
        filename = plot_type + '.png'
    return send_file(path.join(exp.output_dir(), feature, filename))
Esempio n. 6
0
def getAlertsClusteringExperimentId(experiment_id, fold_id):
    experiment = updateCurrentExperiment(experiment_id)
    directory = experiment.getOutputDirectory()
    if fold_id != 'None' and fold_id != 'all':
        directory = path.join(directory, fold_id)
    filename = path.join(directory, 'alerts', 'clusters.json')
    return send_file(filename)
Esempio n. 7
0
def getTopModelFeatures(exp_id, size, train_test, fold_id):
    exp = updateCurrentExperiment(exp_id)
    directory = exp.output_dir()
    if fold_id != 'None' and fold_id != 'all':
        directory = path.join(directory, fold_id)
    directory = path.join(directory, train_test)
    filename = path.join(directory, 'model_coefficients.csv')
    with open(filename, 'r') as f:
        coefficients_df = pd.read_csv(f,
                                      header=0,
                                      index_col=0,
                                      nrows=int(size))
        coefficients = list(coefficients_df['mean'])
        features_ids = coefficients_df.index
        tooltip_data = []
        user_ids = []
        for feature_id in features_ids:
            query = session.query(FeaturesAlchemy)
            query = query.filter(FeaturesAlchemy.id == int(feature_id))
            row = query.one()
            tooltip_data.append(row.name)
            user_ids.append(row.user_id)
        barplot = BarPlot(user_ids)
        dataset = PlotDataset(coefficients, None)
        score = exp.exp_conf.core_conf.classifier_conf.featureImportance()
        if score == 'weight':
            dataset.set_color(colors_tools.red)
        barplot.add_dataset(dataset)
        return jsonify(barplot.to_json(tooltip_data=tooltip_data))
Esempio n. 8
0
def getInstancesToAnnotate(experiment_id, iteration, predicted_label):
    experiment = updateCurrentExperiment(experiment_id)
    filename = path.join(experiment.getOutputDirectory(), str(iteration),
                         'toannotate_' + predicted_label + '.csv')
    df = pd.read_csv(filename)
    queries = [int(x) for x in df.instance_id]
    return jsonify({'instances': queries})
Esempio n. 9
0
def getFeatures(experiment_id, instance_id):
    instance_id = int(instance_id)
    experiment = updateCurrentExperiment(experiment_id)
    features_names, features_values = experiment.getFeatures(instance_id)
    features = {features_names[i]: features_values[i]
                for i in range(len(features_names))}
    return jsonify(features)
Esempio n. 10
0
def getFeatures(experiment_id, instance_id):
    instance_id = int(instance_id)
    experiment = updateCurrentExperiment(experiment_id)
    features_from_exp = FeaturesFromExp(experiment)
    features_names, features_values = features_from_exp.get_instance(
        instance_id)
    features = {features_names[i]: v for i, v in enumerate(features_values)}
    return jsonify(features)
Esempio n. 11
0
def getNumComponents(experiment_id):
    experiment = updateCurrentExperiment(experiment_id)
    directory = experiment.getOutputDirectory()
    filename = 'projection_matrix.csv'
    with open(path.join(directory, filename), 'r') as f:
        header = f.readline()
        num_components = len(header.split(',')) - 1
    return str(num_components)
Esempio n. 12
0
def getNumElements(experiment_id, selected_cluster):
    selected_cluster = int(selected_cluster)
    experiment = updateCurrentExperiment(experiment_id)
    clustering = ClusteringExp.from_json(experiment.output_dir())
    cluster = clustering.clusters[selected_cluster]
    res = {}
    res['num_elements'] = cluster.numInstances()
    return jsonify(res)
Esempio n. 13
0
def getTopWeightedFeatures(experiment_id, inst_exp_id, instance_id, size,
                           fold_id):
    if fold_id == 'all':
        return None
    instance_id = int(instance_id)
    exp = updateCurrentExperiment(experiment_id)
    validation_experiment = updateCurrentExperiment(inst_exp_id)
    # get the features
    features_names, features_values = validation_experiment.getFeatures(
        instance_id)
    features_values = [float(value) for value in features_values]
    # get the pipeline with scaler and logistic model
    experiment_dir = exp.getOutputDirectory()
    if fold_id != 'None':
        experiment_dir = path.join(experiment_dir, fold_id)
    pipeline = joblib.load(path.join(experiment_dir, 'model', 'model.out'))
    # scale the features
    scaled_values = pipeline.named_steps['scaler'].transform(
        np.reshape(features_values, (1, -1)))
    weighted_values = np.multiply(scaled_values,
                                  pipeline.named_steps['model'].coef_)
    features = list(
        map(lambda name, value, w_value: (name, value, w_value),
            features_names, features_values, weighted_values[0]))
    features.sort(key=lambda tup: abs(tup[2]))
    features = features[:-int(size) - 1:-1]

    features_names = [x[0] for x in features]
    features_values = [x[1] for x in features]
    features_weighted_values = [x[2] for x in features]

    max_length = max([len(f) for f in features_names])
    if max_length > 30:
        labels = [str(i) for i in range(len(features_names))]
        tooltips = [
            features_names[i] + ' (' + str(features_values[i]) + ')'
            for i in range(len(features_names))
        ]
    else:
        labels = features_names
        tooltips = features_values
    barplot = BarPlot(labels)
    dataset = PlotDataset(features_weighted_values, None)
    dataset.setColor(colors_tools.red)
    barplot.addDataset(dataset)
    return jsonify(barplot.toJson(tooltip_data=tooltips))
Esempio n. 14
0
def getSortingCriteria(experiment_id):
    exp = updateCurrentExperiment(experiment_id)
    scoring_filename = path.join(exp.output_dir(), 'scores.csv')
    scores = pd.read_csv(scoring_filename, header=0, index_col=0)
    criteria = scores.columns.values.tolist()
    criteria = list(set([c.split('_pvalues')[0] for c in criteria]))
    criteria.extend(['alphabet', 'null_variance'])
    criteria.sort()
    return jsonify({'criteria': criteria})
Esempio n. 15
0
def getClustersLabels(experiment_id):
    experiment = updateCurrentExperiment(experiment_id)
    clustering = ClusteringExp.from_json(experiment.output_dir())
    # Do not consider empty clusters for visualization
    clusters = []
    for c in range(clustering.num_clusters):
        # if clustering.clusters[c].numInstances() > 0:
        clusters.append({'id': c, 'label': clustering.clusters[c].label})
    return jsonify({'clusters': clusters})
Esempio n. 16
0
def getClusterInstancesVisu(exp_id, selected_cluster, c_e_r, num_results):
    num_results = int(num_results)
    selected_cluster = int(selected_cluster)
    exp = updateCurrentExperiment(exp_id)
    clustering = ClusteringExp.from_json(exp.output_dir())
    ids = {}
    ids[selected_cluster] = clustering.getClusterInstancesVisu(
        selected_cluster, num_results, random=True)[c_e_r]
    return jsonify(ids)
Esempio n. 17
0
def getClusterLabelFamilyIds(exp_id, selected_cluster, label, family,
                             num_results):
    selected_cluster = int(selected_cluster)
    num_results = int(num_results)
    exp = updateCurrentExperiment(exp_id)
    clustering = ClusteringExp.from_json(exp.output_dir())
    ids = clustering.getClusterLabelFamilyIds(exp, selected_cluster, label,
                                              family)
    return jsonify(listResultWebFormat(ids, num_results))
Esempio n. 18
0
def getIterationSupervisedExperiment(experiment_id, iteration):
    experiment = updateCurrentExperiment(experiment_id)
    binary_multiclass = 'multiclass'
    if 'binary' in list(experiment.conf.models_conf.keys()):
        binary_multiclass = 'binary'
    models_exp_file = path.join(experiment.getOutputDirectory(),
                                str(iteration), 'models_experiments.json')
    with open(models_exp_file, 'r') as f:
        models_exp = json.load(f)
    return str(models_exp[binary_multiclass])
Esempio n. 19
0
def getInstancesToAnnotate(experiment_id, iteration, predicted_label):
    experiment = updateCurrentExperiment(experiment_id)
    if predicted_label == 'None':
        filename = 'toannotate.csv'
    else:
        filename = 'toannotate_%s.csv' % predicted_label
    filename = path.join(experiment.getOutputDirectory(), iteration, filename)
    df = pd.read_csv(filename)
    queries = [int(x) for x in df.instance_id]
    return jsonify({'instances': queries})
Esempio n. 20
0
def getClusterLabelFamilyIds(experiment_id, selected_cluster, label, family,
                             num_results):
    selected_cluster = int(selected_cluster)
    num_results = int(num_results)
    experiment = updateCurrentExperiment(experiment_id)
    clustering = ClusteringExp.fromJson(experiment.getOutputDirectory())
    ids = clustering.getClusterLabelFamilyIds(experiment, selected_cluster,
                                              label, family)
    res = listResultWebFormat(ids, num_results)
    return jsonify(res)
Esempio n. 21
0
def getLabelsMonitoring(experiment_id, iteration):
    experiment = updateCurrentExperiment(experiment_id)
    filename = path.join(experiment.output_dir(), str(iteration),
                         'labels_monitoring', 'labels_monitoring.json')
    with open(filename, 'r') as f:
        stats = json.load(f)
        res = {}
        res['unlabeled'] = stats['unlabeled']
        res['annotations'] = stats['global']['annotations']
        return jsonify(res)
Esempio n. 22
0
def getClusterInstancesVisu(experiment_id, selected_cluster, c_e_r,
                            num_results):
    num_results = int(num_results)
    selected_cluster = int(selected_cluster)
    experiment = updateCurrentExperiment(experiment_id)
    clustering = ClusteringExp.fromJson(experiment.getOutputDirectory())
    selected_cluster_ids = {}
    selected_cluster_ids[selected_cluster] = \
        clustering.getClusterInstancesVisu(
        selected_cluster, num_results, random=True)[c_e_r]
    return jsonify(selected_cluster_ids)
Esempio n. 23
0
def supervisedLearningMonitoring(exp_id, train_test, kind, fold_id):
    directory = updateCurrentExperiment(exp_id).output_dir()
    if fold_id != 'None' and fold_id != 'all':
        directory = path.join(directory, fold_id)
    directory = path.join(directory, train_test)
    filename = kind
    if kind in ['ROC', 'false_discovery_recall_curve']:
        filename += '.png'
    else:
        filename += '.json'
    return send_file(path.join(directory, filename))
Esempio n. 24
0
def getIterationSupervisedExperiment(exp_id, iteration):
    experiment = updateCurrentExperiment(exp_id)
    binary_multiclass = 'multiclass'
    if 'binary' in experiment.exp_conf.core_conf.models_conf.__dict__:
        binary_multiclass = 'binary'
    models_exp_file = path.join(experiment.output_dir(),
                                str(iteration),
                                'models_experiments.json')
    with open(models_exp_file, 'r') as f:
        models_exp = json.load(f)
    return str(models_exp[binary_multiclass])
Esempio n. 25
0
def getInstance(experiment_id, view_id, instance_id, ident):
    try:
        if view_id == 'None':
            view_id = None
        experiment = updateCurrentExperiment(experiment_id)
        project = experiment.project
        module = importlib.import_module(
            'SecuML.web.views.Projects.' + project)
        return module.getInstance(experiment, view_id, instance_id, ident)
    except IOError as e:
        app.logger.error(e)
        return 'Unable to display the instance', ident
Esempio n. 26
0
def supervisedLearningMonitoring(experiment_id, train_test, kind, fold_id):
    experiment = updateCurrentExperiment(experiment_id)
    directory = experiment.getOutputDirectory()
    if fold_id != 'None' and fold_id != 'all':
        directory = path.join(directory, fold_id)
    directory = path.join(directory, train_test)
    filename = kind
    if kind == 'ROC':
        filename += '.png'
    else:
        filename += '.json'
    return send_file(path.join(directory, filename))
Esempio n. 27
0
def getTopWeightedFeatures(exp_id, inst_exp_id, instance_id, size, fold_id):
    if fold_id == 'all':
        return None
    instance_id = int(instance_id)
    exp = updateCurrentExperiment(exp_id)
    inst_exp = updateCurrentExperiment(inst_exp_id)
    # get the features
    features_from_exp = FeaturesFromExp(inst_exp)
    features_names, features_values = features_from_exp.get_instance(
        instance_id)
    features_values = [float(value) for value in features_values]
    # get the pipeline with scaler and logistic model
    experiment_dir = exp.output_dir()
    if fold_id != 'None':
        experiment_dir = path.join(experiment_dir, fold_id)
    pipeline = joblib.load(path.join(experiment_dir, 'model', 'model.out'))
    # scale the features
    scaled_values = pipeline.named_steps['scaler'].transform(
        np.reshape(features_values, (1, -1)))
    weighted_values = np.multiply(scaled_values,
                                  pipeline.named_steps['model'].coef_)
    features = list(
        map(lambda name, value, w_value: (name, value, w_value),
            features_names, features_values, weighted_values[0]))
    features.sort(key=lambda tup: abs(tup[2]))
    features = features[:-int(size) - 1:-1]

    features_names = [x[0] for x in features]
    features_values = [x[1] for x in features]
    features_weighted_values = [x[2] for x in features]
    labels = [str(name) for name in features_names]
    tooltips = [
        '%s (%.2f)' % (name, features_values[i])
        for i, name in enumerate(features_names)
    ]
    barplot = BarPlot(labels)
    dataset = PlotDataset(features_weighted_values, None)
    dataset.set_color(colors_tools.red)
    barplot.add_dataset(dataset)
    return jsonify(barplot.to_json(tooltip_data=tooltips))
Esempio n. 28
0
def activeLearningModelsMonitoring(experiment_id, iteration,
                                   train_cv_validation):
    experiment = updateCurrentExperiment(experiment_id)
    binary_multiclass = 'multiclass'
    estimator = 'accuracy'
    if 'binary' in experiment.exp_conf.core_conf.models_conf.__dict__:
        binary_multiclass = 'binary'
        estimator = 'auc'
    directory = path.join(experiment.output_dir(), str(iteration),
                          'models_performance')
    filename = '_'.join(
        [binary_multiclass, train_cv_validation, estimator, 'monitoring.png'])
    return send_file(path.join(directory, filename), mimetype='image/png')
Esempio n. 29
0
def removeAnnotation(exp_id, inst_exp_id, instance_id):
    annotations_db_tools.removeAnnotation(session, inst_exp_id, instance_id)
    session.commit()
    if user_exp:
        exp = updateCurrentExperiment(exp_id)
        filename = path.join(exp.output_dir(), 'user_actions.log')
        file_exists = path.isfile(filename)
        mode = 'a' if file_exists else 'w'
        to_print = ','.join(
            map(str,
                [datetime.datetime.now(), 'removeAnnotation', instance_id]))
        with open(filename, mode) as f:
            f.write(to_print)
    return ''
Esempio n. 30
0
def runNextIteration(experiment_id, iteration_number):
    res = str(celeryRunNextIteration.s().apply_async())
    if user_exp:
        experiment = updateCurrentExperiment(experiment_id)
        filename = path.join(experiment.getOutputDirectory(),
                             'user_actions.log')
        file_exists = dir_tools.checkFileExists(filename)
        mode = 'a' if file_exists else 'w'
        to_print = [datetime.datetime.now(), 'nextIteration', iteration_number]
        to_print = list(map(str, to_print))
        to_print = ','.join(to_print)
        with open(filename, mode) as f:
            f.write(to_print)
    return res