Ejemplo n.º 1
0
def getTopWeightedFeatures(project, dataset, experiment, instance_dataset,
                           inst_exp_id, instance_id, size):
    instance_id = int(instance_id)
    model_experiment_obj = ExperimentFactory.getFactory().fromJson(
        project, dataset, experiment, db, cursor)
    validation_experiment = ExperimentFactory.getFactory().fromJson(
        project, instance_dataset, inst_exp_id, db, cursor)
    #get the features
    features_names, features_values = validation_experiment.getFeatures(
        instance_id)
    features_values = [float(value) for value in features_values]
    #get the pipeline with scaler and logistic model
    pipeline = model_experiment_obj.getModelPipeline()
    #scale the features
    scaled_values = pipeline.named_steps['scaler'].transform(
        np.reshape(features_values, (1, -1)))
    weighted_values = np.multiply(scaled_values,
                                  pipeline.named_steps['model'].coef_)
    features = map(lambda name, value, w_value: (name, value, w_value),
                   features_names, features_values, weighted_values[0])
    features.sort(key=lambda tup: abs(tup[2]))
    features = features[:-int(size) - 1:-1]
    tooltips = [x[1] for x in features]
    barplot = BarPlot([x[0] for x in features])
    barplot.addDataset([x[2] for x in features], colors_tools.red, None)
    barplot.addTooltips(tooltips)
    return jsonify(barplot.barplot)
Ejemplo n.º 2
0
def getAnnotationsTypes(project, dataset, experiment_id, iteration):
    experiment = ExperimentFactory.getFactory().fromJson(
        project, dataset, experiment_id, db, cursor)
    filename = dir_tools.getExperimentOutputDirectory(experiment) + str(
        iteration) + '/'
    filename += 'annotations_types.json'
    return send_file(filename)
Ejemplo n.º 3
0
def getConf(project, dataset, experiment_id):
    experiment = ExperimentFactory.getFactory().fromJson(
        project, dataset, experiment_id, db, cursor)
    conf = experiment.toJson()
    mysql_tools.useDatabase(cursor, project, dataset)
    conf['has_true_labels'] = labels_tools.hasTrueLabels(cursor)
    return jsonify(conf)
Ejemplo n.º 4
0
 def removeExperimentDB(self):
     experiment_id, experiment_label_id = self.isInDB()
     if experiment_id is None:
         return
     self.experiment_id = experiment_id
     self.experiment_label_id = experiment_label_id
     ## Remove children experiments
     children = experiment_db_tools.getChildren(self.cursor, experiment_id)
     for child in children:
         child_exp = ExperimentFactory.getFactory().fromJson(
             self.project, self.dataset, child, self.db, self.cursor)
         child_exp.removeExperimentDB()
     if self.parent is None:
         labels_tools.removeExperimentLabels(self.cursor,
                                             experiment_label_id)
     self.cursor.execute(
         'DELETE FROM Experiments \
             WHERE name = %s \
             AND kind = %s', (
             self.experiment_name,
             self.kind,
         ))
     self.db.commit()
     experiment_dir = dir_tools.getExperimentOutputDirectory(self)
     dir_tools.removeDirectory(experiment_dir)
Ejemplo n.º 5
0
def activeLearningSuggestionsMonitoring(project, dataset, experiment_id, iteration):
    experiment = ExperimentFactory.getFactory().fromJson(project, dataset, experiment_id, db, cursor)
    filename  = dir_tools.getExperimentOutputDirectory(experiment) + str(int(iteration) - 1) + '/'
    filename += 'suggestions_accuracy/'
    filename += 'labels_families'
    filename += '_high_confidence_suggestions.png'
    return send_file(filename)
Ejemplo n.º 6
0
def getClusterPredictedLabel(project, dataset, experiment_id, selected_cluster):
    selected_cluster = int(selected_cluster)
    experiment = ExperimentFactory.getFactory().fromJson(project, dataset, experiment_id,
            db, cursor)
    clustering = Clustering.fromJson(experiment)
    predicted_label = clustering.getClusterLabel(selected_cluster)
    return predicted_label
Ejemplo n.º 7
0
def getClusterLabelsFamilies(project, dataset, experiment_id, selected_cluster):
    selected_cluster = int(selected_cluster)
    experiment = ExperimentFactory.getFactory().fromJson(project, dataset, experiment_id,
            db, cursor)
    clustering = Clustering.fromJson(experiment)
    labels_families = clustering.getClusterLabelsFamilies(selected_cluster)
    return jsonify(labels_families)
Ejemplo n.º 8
0
def getFeatures(project, dataset, experiment, instance_dataset, instance_id):
    instance_id = int(instance_id)
    mysql_tools.useDatabase(cursor, project, dataset)
    experiment_obj = ExperimentFactory.getFactory().fromJson(
        project, instance_dataset, experiment, db, cursor)
    features_names, features_values = experiment_obj.getFeatures(instance_id)
    features = zip(features_names, features_values)
    return jsonify(features)
Ejemplo n.º 9
0
def getFamiliesInstancesToAnnotate(project, dataset, experiment_id, iteration,
                                   predicted_label):
    experiment = ExperimentFactory.getFactory().fromJson(
        project, dataset, experiment_id, db, cursor)
    filename = dir_tools.getExperimentOutputDirectory(experiment) + str(
        iteration) + '/'
    filename += 'toannotate_' + predicted_label + '.json'
    return send_file(filename)
Ejemplo n.º 10
0
def getValidationDataset(project, dataset, experiment_id):
    experiment = ExperimentFactory.getFactory().fromJson(
        project, dataset, experiment_id, db, cursor)
    test_conf = experiment.classification_conf.test_conf
    if test_conf.method == 'test_dataset':
        return test_conf.test_dataset
    else:
        return dataset
Ejemplo n.º 11
0
def getNumComponents(project, dataset, experiment_id):
    experiment = ExperimentFactory.getFactory().fromJson(
        project, dataset, experiment_id, db, cursor)
    directory = dir_tools.getExperimentOutputDirectory(experiment)
    filename = directory + 'projection_matrix.csv'
    with open(filename, 'r') as f:
        header = f.readline()
        num_components = len(header.split(',')) - 1
    return str(num_components)
Ejemplo n.º 12
0
def getNumElements(project, dataset, experiment_id, selected_cluster):
    selected_cluster = int(selected_cluster)
    experiment = ExperimentFactory.getFactory().fromJson(project, dataset, experiment_id,
            db, cursor)
    clustering = Clustering.fromJson(experiment)
    cluster = clustering.clusters[selected_cluster]
    res = {}
    res['num_elements'] = cluster.numInstances()
    return jsonify(res)
Ejemplo n.º 13
0
def removeClusterLabel(project, dataset, experiment_id, selected_cluster, num_results):
    selected_cluster = int(selected_cluster)
    num_results = int(num_results)
    experiment = ExperimentFactory.getFactory().fromJson(project, dataset, experiment_id,
            db, cursor)
    clustering = Clustering.fromJson(experiment)
    clustering.removeClusterLabel(selected_cluster, num_results)
    db.commit()
    return ''
Ejemplo n.º 14
0
def getClustersLabels(project, dataset, experiment_id):
    experiment = ExperimentFactory.getFactory().fromJson(project, dataset, experiment_id,
            db, cursor)
    clustering = Clustering.fromJson(experiment)
    # Do not consider empty clusters for visualization
    labels = []
    for c in range(clustering.num_clusters):
        if clustering.clusters[c].numInstances() > 0:
            labels.append('c_' + str(c))
    return jsonify({'labels': labels})
Ejemplo n.º 15
0
def getClustersLabels(project, dataset, experiment_id):
    experiment = ExperimentFactory.getFactory().fromJson(
        project, dataset, experiment_id, db, cursor)
    clustering = Clustering.fromJson(experiment)
    # Do not consider empty clusters for visualization
    clusters = []
    for c in range(clustering.num_clusters):
        #if clustering.clusters[c].numInstances() > 0:
        clusters.append({'id': c, 'label': clustering.clusters[c].label})
    return jsonify({'clusters': clusters})
Ejemplo n.º 16
0
def getStatsPlot(project, dataset, experiment_id, plot_type, feature):
    experiment = ExperimentFactory.getFactory().fromJson(
        project, dataset, experiment_id, db, cursor)
    filename = dir_tools.getExperimentOutputDirectory(
        experiment) + feature + '/'
    if plot_type.find('histogram') >= 0:
        filename += plot_type + '.json'
    else:
        filename += plot_type + '.png'
    return send_file(filename)
Ejemplo n.º 17
0
def getClusterLabelFamilyIds(project, dataset, experiment_id, selected_cluster, label, family, num_results):
    selected_cluster = int(selected_cluster)
    num_results = int(num_results)
    experiment = ExperimentFactory.getFactory().fromJson(project, dataset, experiment_id,
            db, cursor)
    clustering = Clustering.fromJson(experiment)
    ids = clustering.getClusterLabelFamilyIds(selected_cluster,
            label, family)
    res = web_tools.listResultWebFormat(ids, num_results)
    return jsonify(res)
Ejemplo n.º 18
0
def getInstancesToAnnotate(project, dataset, experiment_id, iteration,
                           predicted_label):
    experiment = ExperimentFactory.getFactory().fromJson(
        project, dataset, experiment_id, db, cursor)
    filename = dir_tools.getExperimentOutputDirectory(experiment) + str(
        iteration) + '/'
    filename += 'toannotate_' + predicted_label + '.csv'
    df = pd.read_csv(filename)
    queries = list(df.instance_id)
    return jsonify({'instances': queries})
Ejemplo n.º 19
0
def getClusterInstancesVisu(project, dataset, experiment_id, selected_cluster, c_e_r, num_results):
    num_results = int(num_results)
    selected_cluster = int(selected_cluster)
    experiment = ExperimentFactory.getFactory().fromJson(project, dataset, experiment_id,
            db, cursor)
    clustering = Clustering.fromJson(experiment)
    selected_cluster_ids = {}
    selected_cluster_ids[selected_cluster] = \
            clustering.getClusterInstancesVisu(
                    selected_cluster, num_results, random = True)[c_e_r]
    return jsonify(selected_cluster_ids)
Ejemplo n.º 20
0
def addClusterLabel(project, dataset, experiment_id, selected_cluster, num_results, label, family,
        label_iteration, label_method):
    selected_cluster = int(selected_cluster)
    num_results = int(num_results)
    experiment = ExperimentFactory.getFactory().fromJson(project, dataset, experiment_id,
            db, cursor)
    clustering = Clustering.fromJson(experiment)
    clustering.addClusterLabel(selected_cluster, num_results,
            label, family, label_iteration, label_method)
    db.commit()
    return ''
Ejemplo n.º 21
0
def getIterationSupervisedExperiment(project, dataset, experiment_id,
                                     iteration):
    experiment = ExperimentFactory.getFactory().fromJson(
        project, dataset, experiment_id, db, cursor)
    active_learning = Iteration(experiment, int(iteration))
    binary_multiclass = 'multiclass'
    if 'binary' in experiment.conf.models_conf.keys():
        binary_multiclass = 'binary'
    models_exp_file = active_learning.output_directory + 'models_experiments.json'
    with open(models_exp_file, 'r') as f:
        models_exp = json.load(f)
    return str(models_exp[binary_multiclass])
Ejemplo n.º 22
0
def getTopModelCoefficients(project, dataset, experiment, size):
    size = int(size)
    model_experiment_obj = ExperimentFactory.getFactory().fromJson(project, dataset, experiment, db, cursor)
    pipeline = model_experiment_obj.getModelPipeline()
    model_coefficients = pipeline.named_steps['model'].coef_[0]
    features_names = model_experiment_obj.getFeaturesNames()
    coefficients = map(lambda name, coef: (name, coef),
                          features_names, model_coefficients)
    coefficients.sort(key = lambda tup: abs(tup[1]))
    coefficients = coefficients[:-size-1:-1]
    barplot = BarPlot([x[0] for x in coefficients])
    barplot.addDataset([x[1] for x in coefficients], '#d9534f', None)
    return jsonify(barplot.barplot)
Ejemplo n.º 23
0
def activeLearningModelsMonitoring(project, dataset, experiment_id, iteration, train_cv_validation):
    experiment = ExperimentFactory.getFactory().fromJson(project, dataset, experiment_id, db, cursor)
    active_learning = Iteration(experiment, int(iteration))
    binary_multiclass = 'multiclass'
    estimator = 'accuracy'
    if 'binary' in experiment.conf.models_conf.keys():
        binary_multiclass = 'binary'
        estimator = 'auc'
    directory = active_learning.output_directory
    filename  = directory
    filename += 'models_performance/'
    filename += binary_multiclass + '_' + train_cv_validation + '_' + estimator + '_monitoring.png'
    return send_file(filename, mimetype='image/png')
Ejemplo n.º 24
0
def getAlertsClusteringExperimentId(project, dataset, experiment_id):
    experiment = ExperimentFactory.getFactory().fromJson(project, dataset, experiment_id,
            db, cursor)
    test_conf = experiment.classification_conf.test_conf
    if test_conf.method == 'random_split':
        test_dataset = dataset
        test_exp_id = experiment_id
    elif test_conf.method == 'test_dataset':
        test_dataset = test_conf.test_exp.dataset
        test_exp_id = test_conf.test_exp.experiment_id
    mysql_tools.useDatabase(cursor, project, test_dataset)
    clustering_experiment_id = AlertsMonitoring.AlertsMonitoring.getAlertsClusteringExperimentId(
            cursor, test_exp_id)
    return str(clustering_experiment_id)
Ejemplo n.º 25
0
def getTopModelFeatures(experiment_id, size):
    size = int(size)
    exp = ExperimentFactory.getFactory().fromJson(experiment_id, session)
    model_coefficients = exp.getTopFeatures()
    features_names = exp.getFeaturesNames()
    coefficients = map(lambda name, coef: (name, coef), features_names,
                       model_coefficients)
    coefficients.sort(key=lambda tup: abs(tup[1]))
    coefficients = coefficients[:-size - 1:-1]
    barplot = BarPlot([x[0] for x in coefficients])
    dataset = PlotDataset([x[1] for x in coefficients], None)
    if (exp.classification_conf.featureImportance() == 'weight'):
        dataset.setColor(colors_tools.red)
    barplot.addDataset(dataset)
    return jsonify(barplot.toJson())
Ejemplo n.º 26
0
def getAlerts(project, dataset, experiment_id, analysis_type):
    experiment = ExperimentFactory.getFactory().fromJson(project, dataset, experiment_id,
                                                         db, cursor)
    filename  = dir_tools.getExperimentOutputDirectory(experiment)
    filename += 'alerts.csv'
    with open(filename, 'r') as f:
        data = pd.read_csv(f, header = 0, index_col = 0)
        num_max_alerts = experiment.classification_conf.test_conf.alerts_conf.num_max_alerts
        alerts = list(data[['predicted_proba']].itertuples())
        if num_max_alerts < len(alerts):
            if analysis_type == 'topN':
                alerts = alerts[:num_max_alerts]
            elif analysis_type == 'random':
                alerts = random.sample(alerts, num_max_alerts)
    return jsonify({'instances': [alert[0] for alert in alerts], 'proba': dict(alerts)})
Ejemplo n.º 27
0
def getPredictions(project, dataset, experiment_id, train_test, index):
    experiment = ExperimentFactory.getFactory().fromJson(project, dataset, experiment_id,
                                                         db, cursor)
    filename  = dir_tools.getExperimentOutputDirectory(experiment)
    filename += train_test + '/predictions.csv'
    index = int(index)
    min_value = index * 0.1
    max_value = (index+1) * 0.1
    with open(filename, 'r') as f:
        data = pd.read_csv(f, header = 0, index_col = 0)
        data = matrix_tools.extractRowsWithThresholds(data, min_value, max_value,
                                                      'predicted_proba')
        selected_instances = list(data.index.values)
        proba              = list(data['predicted_proba'])
    return jsonify({'instances': selected_instances, 'proba': proba})
Ejemplo n.º 28
0
def runNextIteration(project, dataset, experiment_id, iteration_number):
    res = str(celeryRunNextIteration.s().apply_async())
    if user_exp:
        experiment = ExperimentFactory.getFactory().fromJson(
            project, dataset, experiment_id, db, cursor)
        filename = dir_tools.getExperimentOutputDirectory(experiment)
        filename += 'user_actions.log'
        file_exists = dir_tools.checkFileExists(filename)
        mode = 'a' if file_exists else 'w'
        to_print = [datetime.datetime.now(), 'nextIteration', iteration_number]
        to_print = map(str, to_print)
        to_print = ','.join(to_print)
        with open(filename, mode) as f:
            print >> f, to_print
    return res
Ejemplo n.º 29
0
def activeLearningMonitoring(project, dataset, experiment_id, iteration, kind, sub_kind):
    experiment = ExperimentFactory.getFactory().fromJson(project, dataset, experiment_id, db, cursor)
    active_learning = Iteration(experiment, int(iteration))
    directory = active_learning.output_directory
    if kind == 'labels':
        filename  = directory + 'labels_monitoring/'
        filename += 'iteration' + '_' + sub_kind + '.png'
    if kind == 'families':
        filename = directory + 'labels_monitoring/' + 'families_monitoring.png'
    if kind == 'clustering':
        filename  = directory + 'clustering_evaluation/'
        filename += sub_kind + '_monitoring.png'
    if kind == 'time':
        filename  = directory
        filename += 'execution_time_monitoring.png'
    return send_file(filename, mimetype='image/png')
Ejemplo n.º 30
0
def currentAnnotations(project, dataset, experiment_id, iteration):
    page = render_template('ActiveLearning/current_annotations.html',
                           project=project)
    if user_exp:
        experiment = ExperimentFactory.getFactory().fromJson(
            project, dataset, experiment_id, db, cursor)
        filename = dir_tools.getExperimentOutputDirectory(experiment)
        filename += 'user_actions.log'
        file_exists = dir_tools.checkFileExists(filename)
        mode = 'a' if file_exists else 'w'
        to_print = [datetime.datetime.now(), 'displayAnnotatedInstances']
        to_print = map(str, to_print)
        to_print = ','.join(to_print)
        with open(filename, mode) as f:
            print >> f, to_print
    return page