def export(self): experiment_dir = dir_tools.getExperimentOutputDirectory(self) dir_tools.createDirectory(experiment_dir) conf_filename = experiment_dir + 'conf.json' with open(conf_filename, 'w') as f: json.dump(self.toJson(), f, indent = 2)
def removeExperimentDB(self): experiment_id, experiment_label_id = self.isInDB() if experiment_id is None: return self.experiment_id = experiment_id self.experiment_label_id = experiment_label_id ## Remove children experiments children = experiment_db_tools.getChildren(self.cursor, experiment_id) for child in children: child_exp = ExperimentFactory.getFactory().fromJson( self.project, self.dataset, child, self.db, self.cursor) child_exp.removeExperimentDB() if self.parent is None: labels_tools.removeExperimentLabels(self.cursor, experiment_label_id) self.cursor.execute( 'DELETE FROM Experiments \ WHERE name = %s \ AND kind = %s', ( self.experiment_name, self.kind, )) self.db.commit() experiment_dir = dir_tools.getExperimentOutputDirectory(self) dir_tools.removeDirectory(experiment_dir)
def activeLearningSuggestionsMonitoring(project, dataset, experiment_id, iteration): experiment = ExperimentFactory.getFactory().fromJson(project, dataset, experiment_id, db, cursor) filename = dir_tools.getExperimentOutputDirectory(experiment) + str(int(iteration) - 1) + '/' filename += 'suggestions_accuracy/' filename += 'labels_families' filename += '_high_confidence_suggestions.png' return send_file(filename)
def getAnnotationsTypes(project, dataset, experiment_id, iteration): experiment = ExperimentFactory.getFactory().fromJson( project, dataset, experiment_id, db, cursor) filename = dir_tools.getExperimentOutputDirectory(experiment) + str( iteration) + '/' filename += 'annotations_types.json' return send_file(filename)
def getFamiliesInstancesToAnnotate(project, dataset, experiment_id, iteration, predicted_label): experiment = ExperimentFactory.getFactory().fromJson( project, dataset, experiment_id, db, cursor) filename = dir_tools.getExperimentOutputDirectory(experiment) + str( iteration) + '/' filename += 'toannotate_' + predicted_label + '.json' return send_file(filename)
def getNumComponents(project, dataset, experiment_id): experiment = ExperimentFactory.getFactory().fromJson( project, dataset, experiment_id, db, cursor) directory = dir_tools.getExperimentOutputDirectory(experiment) filename = directory + 'projection_matrix.csv' with open(filename, 'r') as f: header = f.readline() num_components = len(header.split(',')) - 1 return str(num_components)
def getStatsPlot(project, dataset, experiment_id, plot_type, feature): experiment = ExperimentFactory.getFactory().fromJson( project, dataset, experiment_id, db, cursor) filename = dir_tools.getExperimentOutputDirectory( experiment) + feature + '/' if plot_type.find('histogram') >= 0: filename += plot_type + '.json' else: filename += plot_type + '.png' return send_file(filename)
def getInstancesToAnnotate(project, dataset, experiment_id, iteration, predicted_label): experiment = ExperimentFactory.getFactory().fromJson( project, dataset, experiment_id, db, cursor) filename = dir_tools.getExperimentOutputDirectory(experiment) + str( iteration) + '/' filename += 'toannotate_' + predicted_label + '.csv' df = pd.read_csv(filename) queries = list(df.instance_id) return jsonify({'instances': queries})
def getPredictions(project, dataset, experiment_id, train_test, index): experiment = ExperimentFactory.getFactory().fromJson(project, dataset, experiment_id, db, cursor) filename = dir_tools.getExperimentOutputDirectory(experiment) filename += train_test + '/predictions.csv' index = int(index) min_value = index * 0.1 max_value = (index+1) * 0.1 with open(filename, 'r') as f: data = pd.read_csv(f, header = 0, index_col = 0) data = matrix_tools.extractRowsWithThresholds(data, min_value, max_value, 'predicted_proba') selected_instances = list(data.index.values) proba = list(data['predicted_proba']) return jsonify({'instances': selected_instances, 'proba': proba})
def getAlerts(project, dataset, experiment_id, analysis_type): experiment = ExperimentFactory.getFactory().fromJson(project, dataset, experiment_id, db, cursor) filename = dir_tools.getExperimentOutputDirectory(experiment) filename += 'alerts.csv' with open(filename, 'r') as f: data = pd.read_csv(f, header = 0, index_col = 0) num_max_alerts = experiment.classification_conf.test_conf.alerts_conf.num_max_alerts alerts = list(data[['predicted_proba']].itertuples()) if num_max_alerts < len(alerts): if analysis_type == 'topN': alerts = alerts[:num_max_alerts] elif analysis_type == 'random': alerts = random.sample(alerts, num_max_alerts) return jsonify({'instances': [alert[0] for alert in alerts], 'proba': dict(alerts)})
def runNextIteration(project, dataset, experiment_id, iteration_number): res = str(celeryRunNextIteration.s().apply_async()) if user_exp: experiment = ExperimentFactory.getFactory().fromJson( project, dataset, experiment_id, db, cursor) filename = dir_tools.getExperimentOutputDirectory(experiment) filename += 'user_actions.log' file_exists = dir_tools.checkFileExists(filename) mode = 'a' if file_exists else 'w' to_print = [datetime.datetime.now(), 'nextIteration', iteration_number] to_print = map(str, to_print) to_print = ','.join(to_print) with open(filename, mode) as f: print >> f, to_print return res
def currentAnnotations(project, dataset, experiment_id, iteration): page = render_template('ActiveLearning/current_annotations.html', project=project) if user_exp: experiment = ExperimentFactory.getFactory().fromJson( project, dataset, experiment_id, db, cursor) filename = dir_tools.getExperimentOutputDirectory(experiment) filename += 'user_actions.log' file_exists = dir_tools.checkFileExists(filename) mode = 'a' if file_exists else 'w' to_print = [datetime.datetime.now(), 'displayAnnotatedInstances'] to_print = map(str, to_print) to_print = ','.join(to_print) with open(filename, mode) as f: print >> f, to_print return page
def changeFamilyLabel(project, dataset, experiment_id, experiment_label_id, label, family): mysql_tools.useDatabase(cursor, project, dataset) labels_tools.changeFamilyLabel(cursor, label, family, experiment_label_id) if user_exp: experiment = ExperimentFactory.getFactory().fromJson( project, dataset, experiment_id, db, cursor) filename = dir_tools.getExperimentOutputDirectory(experiment) filename += 'user_actions.log' file_exists = dir_tools.checkFileExists(filename) mode = 'a' if file_exists else 'w' to_print = [ datetime.datetime.now(), 'changeFamilyLabel', family, label ] to_print = map(str, to_print) to_print = ','.join(to_print) with open(filename, mode) as f: print >> f, to_print return ''
def mergeFamilies(project, dataset, experiment_id, experiment_label_id, label, families, new_family_name): families = families.split(',') mysql_tools.useDatabase(cursor, project, dataset) labels_tools.mergeFamilies(cursor, label, families, new_family_name, experiment_label_id) if user_exp: experiment = ExperimentFactory.getFactory().fromJson( project, dataset, experiment_id, db, cursor) filename = dir_tools.getExperimentOutputDirectory(experiment) filename += 'user_actions.log' file_exists = dir_tools.checkFileExists(filename) mode = 'a' if file_exists else 'w' to_print = [datetime.datetime.now(), 'mergeFamilies', new_family_name] to_print += map(str, families) to_print = map(str, to_print) to_print = ','.join(to_print) with open(filename, mode) as f: print >> f, to_print return ''
def removeLabel(project, dataset, experiment_id, inst_dataset, inst_experiment_label_id, iteration_number, instance_id): mysql_tools.useDatabase(cursor, project, inst_dataset) labels_tools.removeLabel(cursor, inst_experiment_label_id, instance_id) db.commit() if user_exp: experiment = ExperimentFactory.getFactory().fromJson( project, dataset, experiment_id, db, cursor) filename = dir_tools.getExperimentOutputDirectory(experiment) filename += 'user_actions.log' file_exists = dir_tools.checkFileExists(filename) mode = 'a' if file_exists else 'w' to_print = [ datetime.datetime.now(), 'removeLabel', project, dataset, instance_id ] to_print = map(str, to_print) to_print = ','.join(to_print) with open(filename, mode) as f: print >> f, to_print return ''
def getProjectionMatrix(project, dataset, experiment_id): experiment = ExperimentFactory.getFactory().fromJson( project, dataset, experiment_id, db, cursor) directory = dir_tools.getExperimentOutputDirectory(experiment) filename = directory + 'projection_matrix.csv' return send_file(filename)
def getHexBin(project, dataset, experiment_id, x, y): experiment = ExperimentFactory.getFactory().fromJson( project, dataset, experiment_id, db, cursor) directory = dir_tools.getExperimentOutputDirectory(experiment) filename = directory + 'c_' + x + '_' + y + '_hexbin.json' return send_file(filename)
def getModelPipeline(self): experiment_dir = dir_tools.getExperimentOutputDirectory(self) pipeline = joblib.load(experiment_dir + '/model/model.out') return pipeline
def setDirectories(self): self.AL_directory = dir_tools.getExperimentOutputDirectory( self.experiment) self.iteration_dir = self.AL_directory self.iteration_dir += str(self.iteration_number) + '/'
def getFeaturesTypes(project, dataset, experiment_id): experiment = ExperimentFactory.getFactory().fromJson( project, dataset, experiment_id, db, cursor) filename = dir_tools.getExperimentOutputDirectory(experiment) filename += 'features_types.json' return send_file(filename)
def getLabelsMonitoring(project, dataset, experiment_id, iteration): experiment = ExperimentFactory.getFactory().fromJson(project, dataset, experiment_id, db, cursor) filename = dir_tools.getExperimentOutputDirectory(experiment) + str(iteration) + '/' filename += 'labels_monitoring/labels_monitoring.json' return send_file(filename)
def getAlertsClusteringExperimentId(project, dataset, experiment_id): experiment = ExperimentFactory.getFactory().fromJson( project, dataset, experiment_id, db, cursor) filename = dir_tools.getExperimentOutputDirectory(experiment) filename += 'grouping.json' return send_file(filename)
def getDir(project, dataset, experiment_id): exp = ExperimentFactory.getFactory().fromJson(project, dataset, experiment_id, db, cursor) return dir_tools.getExperimentOutputDirectory(exp)
def __init__(self, experiment): self.experiment = experiment self.conf = experiment.conf self.num_components = self.conf.num_components self.output_directory = dir_tools.getExperimentOutputDirectory( experiment)
def __init__(self, experiment): self.instances = Instances() self.instances.initFromExperiment(experiment) self.output_directory = dir_tools.getExperimentOutputDirectory( experiment)
def getCumExplVar(project, dataset, experiment_id): experiment = ExperimentFactory.getFactory().fromJson( project, dataset, experiment_id, db, cursor) directory = dir_tools.getExperimentOutputDirectory(experiment) filename = directory + 'cumuled_explained_variance.csv' return send_file(filename)
def getReconsErrors(project, dataset, experiment_id): experiment = ExperimentFactory.getFactory().fromJson( project, dataset, experiment_id, db, cursor) directory = dir_tools.getExperimentOutputDirectory(experiment) filename = directory + 'reconstruction_errors.csv' return send_file(filename)
def setOutputDirectory(self): self.output_directory = dir_tools.getExperimentOutputDirectory( self.experiment)