Example #1
0
 def checkInputProjectDatasetDir(self, logger):
     # Check project directory
     project_dir = dir_exp_tools.getProjectDirectory(
         self.secuml_conf, self.project)
     if not path.isdir(project_dir):
         raise ProjectDirNotFound(self.secuml_conf.input_data_dir,
                                  self.project)
     # Check dataset directory
     dataset_dir = dir_exp_tools.getDatasetDirectory(
         self.secuml_conf, self.project, self.dataset)
     if not path.isdir(dataset_dir):
         raise DatasetDirNotFound(self.secuml_conf.input_data_dir,
                                  self.project, self.dataset)
     # Check idents file
     self.idents_filename = dir_exp_tools.getIdentsFilename(
         self.secuml_conf, self.project, self.dataset)
     if not path.isfile(self.idents_filename):
         raise IdentsFileNotFound(self.idents_filename)
     # Check ground-truth file
     self.annotations_filename = dir_exp_tools.getGroundTruthFilename(
         self.secuml_conf, self.project, self.dataset)
     if not dir_tools.checkFileExists(self.annotations_filename):
         logger.warning('No ground-truth available for the dataset %s/%s.' %
                        (self.project, self.dataset))
         self.annotations_filename = None
Example #2
0
def checkWebLibraries():
    lib_dir = path.join(SECUML_DIR, 'web', 'static', 'lib')
    web_urls = getWebUrls()
    for k in ['js', 'css']:
        directory = path.join(lib_dir, k)
        libs = [path.basename(urlparse(u).path) for u in web_urls[k]]
        for lib in libs:
            if not dir_tools.checkFileExists(path.join(directory, lib)):
                raise MissingWebLibraries()
Example #3
0
def runNextIteration(experiment_id, iteration_number):
    res = str(celeryRunNextIteration.s().apply_async())
    if user_exp:
        experiment = updateCurrentExperiment(experiment_id)
        filename = path.join(experiment.getOutputDirectory(),
                             'user_actions.log')
        file_exists = dir_tools.checkFileExists(filename)
        mode = 'a' if file_exists else 'w'
        to_print = [datetime.datetime.now(), 'nextIteration', iteration_number]
        to_print = list(map(str, to_print))
        to_print = ','.join(to_print)
        with open(filename, mode) as f:
            f.write(to_print)
    return res
Example #4
0
def currentAnnotations(experiment_id, iteration):
    experiment = updateCurrentExperiment(experiment_id)
    page = render_template('ActiveLearning/current_annotations.html',
                           project=experiment.project)
    if user_exp:
        filename = path.join(experiment.getOutputDirectory(),
                             'user_actions.log')
        file_exists = dir_tools.checkFileExists(filename)
        mode = 'a' if file_exists else 'w'
        to_print = [datetime.datetime.now(), 'displayAnnotatedInstances']
        to_print = list(map(str, to_print))
        to_print = ','.join(to_print)
        with open(filename, mode) as f:
            f.write(to_print)
    return page
Example #5
0
def removeAnnotation(experiment_id, inst_experiment_id, iteration_number,
                     instance_id):
    annotations_db_tools.removeAnnotation(session, inst_experiment_id,
                                          instance_id)
    if user_exp:
        experiment = updateCurrentExperiment(experiment_id)
        filename = path.join(experiment.getOutputDirectory(),
                             'user_actions.log')
        file_exists = dir_tools.checkFileExists(filename)
        mode = 'a' if file_exists else 'w'
        to_print = [datetime.datetime.now(), 'removeAnnotation', instance_id]
        to_print = list(map(str, to_print))
        to_print = ','.join(to_print)
        with open(filename, mode) as f:
            f.write(to_print)
    return ''
Example #6
0
def mergeFamilies(experiment_id, label, families, new_family_name):
    families = families.split(',')
    annotations_db_tools.mergeFamilies(session, experiment_id, label, families,
                                       new_family_name)
    if user_exp:
        experiment = updateCurrentExperiment(experiment_id)
        filename = path.join(experiment.getOutputDirectory(),
                             'user_actions.log')
        file_exists = dir_tools.checkFileExists(filename)
        mode = 'a' if file_exists else 'w'
        to_print = [datetime.datetime.now(), 'mergeFamilies', new_family_name]
        to_print += list(map(str, families))
        to_print = list(map(str, to_print))
        to_print = ','.join(to_print)
        with open(filename, mode) as f:
            f.write(to_print)
    return ''
Example #7
0
def changeFamilyLabel(experiment_id, label, family):
    annotations_db_tools.changeFamilyLabel(session, experiment_id, label,
                                           family)
    if user_exp:
        experiment = updateCurrentExperiment(experiment_id)
        filename = path.join(experiment.getOutputDirectory(),
                             'user_actions.log')
        file_exists = dir_tools.checkFileExists(filename)
        mode = 'a' if file_exists else 'w'
        to_print = [
            datetime.datetime.now(), 'changeFamilyLabel', family, label
        ]
        to_print = list(map(str, to_print))
        to_print = ','.join(to_print)
        with open(filename, mode) as f:
            f.write(to_print)
    return ''
Example #8
0
 def getFeaturesNamesDescriptions(self):
     features_file = self.getFeaturesFullpath()
     features_names = []
     features_descriptions = []
     basename, ext = path.splitext(features_file)
     features_description_file = basename + '_description.csv'
     if dir_tools.checkFileExists(features_description_file):
         with open(features_description_file, 'r') as f:
             df = pd.read_csv(f, header=0, index_col=0)
             features_names.extend(df['name'])
             features_descriptions.extend(df['description'])
     else:
         with open(features_file, 'r') as f_file:
             features_reader = csv.reader(f_file)
             f_features_names = next(features_reader)
             features_names.extend(f_features_names[1:])
             features_descriptions.extend(f_features_names[1:])
     return features_names, features_descriptions
Example #9
0
    def _setAnnotationsFilename(self, annotations_filename):
        if annotations_filename is None:
            annotations_type = 'none'
        elif annotations_filename == 'ground_truth.csv':
            annotations_type = 'ground_truth'
        else:
            annotations_type = 'partial_annotations'

        exp_annotations = db_tables.ExperimentAnnotationsAlchemy(
            experiment_id=self.experiment_id,
            annotations_type=annotations_type)
        self.session.add(exp_annotations)
        self.session.flush()
        self.annotations_id = exp_annotations.annotations_id
        self.annotations_type = annotations_type

        if annotations_type == 'partial_annotations':
            filename = path.join(dir_exp_tools.getDatasetDirectory(
                                                self.secuml_conf,
                                                self.project,
                                                self.dataset),
                                 'annotations',
                                 annotations_filename)
            if not dir_tools.checkFileExists(filename):
                raise ValueError(
                    'The annotation file %s does not exist.' % filename)
            families = dir_exp_tools.annotationsWithFamilies(filename)
            conn = self.session.connection().connection
            cursor = conn.cursor()
            if self.secuml_conf.db_type == 'mysql':
                mysql_specific.loadPartialAnnotations(cursor,
                                                      filename,
                                                      families,
                                                      self.annotations_id,
                                                      self.dataset_id)
            if self.secuml_conf.db_type == 'postgresql':
                postgresql_specific.loadPartialAnnotations(cursor,
                                                           filename,
                                                           families,
                                                           self.annotations_id,
                                                           self.dataset_id)
            self.session.flush()
Example #10
0
    def _setAnnotationsFilename(self, annotations_filename):
        if annotations_filename is None:
            annotations_type = 'none'
        elif annotations_filename == 'ground_truth.csv':
            annotations_type = 'ground_truth'
        else:
            annotations_type = 'partial_annotations'

        exp_annotations = db_tables.ExperimentAnnotationsAlchemy(
            experiment_id=self.experiment_id,
            annotations_type=annotations_type)
        self.session.add(exp_annotations)
        self.session.commit()
        self.annotations_id = exp_annotations.annotations_id
        self.annotations_type = annotations_type

        if annotations_type == 'partial_annotations':
            filename = path.join(
                dir_exp_tools.getDatasetDirectory(self.project, self.dataset),
                'annotations', annotations_filename)
            if not dir_tools.checkFileExists(filename):
                raise ValueError('The annotation file %s does not exist.' %
                                 filename)
            # Check whether the file contains families
            families = False
            with open(filename, 'r') as f:
                reader = csv.reader(f)
                header = next(reader)
                if len(header) == 3:
                    families = True
            cursor = self.session.connection().connection.cursor()

            if db_tools.isMysql():
                query = 'CREATE TEMPORARY TABLE labels_import('
                query += 'instance_id integer, '
                query += 'annotations_id integer DEFAULT ' + \
                    str(self.annotations_id) + ', '
                query += 'user_instance_id integer, '
                query += 'label varchar(200), '
                query += 'family varchar(200) DEFAULT \'other\', '
                query += 'iteration integer DEFAULT 0, '
                query += 'method varchar(200) DEFAULT \'init\''
                query += ');'
                cursor.execute(query)

                query = 'LOAD DATA LOCAL INFILE \'' + filename + '\' '
                query += 'INTO TABLE ' + 'labels_import' + ' '
                query += 'FIELDS TERMINATED BY \',\' '
                query += 'IGNORE 1 LINES '
                if families:
                    query += '(user_instance_id, label, family) '
                else:
                    query += '(user_instance_id, label) '
                query += ';'
                cursor.execute(query)

                query = 'UPDATE labels_import l '
                query += 'JOIN instances i '
                query += 'ON i.user_instance_id = l.user_instance_id '
                query += 'AND i.dataset_id = ' + str(self.dataset_id) + ' '
                query += 'SET l.instance_id = i.id;'
                cursor.execute(query)

                query = 'INSERT INTO annotations(instance_id,annotations_id,label,family,iteration,method) '
                query += 'SELECT instance_id,annotations_id,label,family,iteration,method '
                query += 'FROM labels_import;'
                cursor.execute(query)

            elif db_tools.isPostgresql():
                query = 'CREATE TEMPORARY TABLE labels_import('
                query += 'instance_id integer, '
                query += 'annotations_id integer DEFAULT ' + \
                    str(self.annotations_id) + ', '
                query += 'user_instance_id integer, '
                query += 'label labels_enum, '
                query += 'family varchar(200) DEFAULT \'other\', '
                query += 'iteration integer DEFAULT 0, '
                query += 'method varchar(200) DEFAULT \'init\''
                query += ');'
                cursor.execute(query)

                with open(filename, 'r') as f:
                    if families:
                        query = 'COPY labels_import(user_instance_id,label,family) '
                    else:
                        query = 'COPY labels_import(user_instance_id,label) '
                    query += 'FROM STDIN '
                    query += 'WITH CSV HEADER DELIMITER AS \',\' ;'
                    cursor.copy_expert(sql=query, file=f)

                query = 'UPDATE labels_import AS l '
                query += 'SET instance_id = i.id '
                query += 'FROM instances AS i '
                query += 'WHERE i.user_instance_id = l.user_instance_id '
                query += 'AND i.dataset_id = ' + str(self.dataset_id) + ';'
                cursor.execute(query)

                query = 'INSERT INTO annotations(instance_id,annotations_id,label,family,iteration,method) '
                query += 'SELECT instance_id,annotations_id,label,family,iteration,method '
                query += 'FROM labels_import;'
                cursor.execute(query)

            self.session.commit()
Example #11
0
    def loadGroundTruth(self, logger):
        annotations_file = path.join(dir_exp_tools.getDatasetDirectory(
                                            self.project,
                                            self.dataset),
                                     'annotations',
                                     'ground_truth.csv')
        if not dir_tools.checkFileExists(annotations_file):
            logger.warning('No ground-truth available for this dataset')
            return

        # Check whether the file contains families
        families = False
        with open(annotations_file, 'r') as f:
            reader = csv.reader(f)
            header = next(reader)
            if len(header) == 3:
                families = True
        cursor = self.session.connection().connection.cursor()

        if db_tools.isMysql():
            query = 'CREATE TEMPORARY TABLE ground_truth_import('
            query += 'user_instance_id integer PRIMARY KEY, '
            query += 'label varchar(200), '
            query += 'family varchar(200) DEFAULT \'other\', '
            query += 'dataset_id integer DEFAULT ' + \
                str(self.dataset_id) + ', '
            query += 'id integer DEFAULT NULL'
            query += ');'
            cursor.execute(query)

            query = 'LOAD DATA LOCAL INFILE \'' + annotations_file + '\' '
            query += 'INTO TABLE ' + 'ground_truth_import' + ' '
            query += 'FIELDS TERMINATED BY \',\' '
            query += 'IGNORE 1 LINES '
            if families:
                query += '(user_instance_id, label, family) '
            else:
                query += '(user_instance_id, label) '
            query += ';'
            cursor.execute(query)

            query = 'UPDATE ground_truth_import t '
            query += 'JOIN instances i '
            query += 'ON i.user_instance_id = t.user_instance_id '
            query += 'AND i.dataset_id = t.dataset_id '
            query += 'SET t.id = i.id;'
            cursor.execute(query)

            query = 'INSERT INTO ground_truth(instance_id, dataset_id, label, family) '
            query += 'SELECT t.id, t.dataset_id, t.label, t.family '
            query += 'FROM ground_truth_import AS t;'
            cursor.execute(query)

        elif db_tools.isPostgresql():
            query = 'CREATE TEMPORARY TABLE ground_truth_import('
            query += 'user_instance_id integer PRIMARY KEY, '
            query += 'label ground_truth_enum, '
            query += 'family varchar(200) DEFAULT \'other\', '
            query += 'dataset_id integer DEFAULT ' + \
                str(self.dataset_id) + ', '
            query += 'id integer DEFAULT NULL'
            query += ');'
            cursor.execute(query)

            with open(annotations_file, 'r') as f:
                if families:
                    query = 'COPY ground_truth_import(user_instance_id,label,family) '
                else:
                    query = 'COPY ground_truth_import(user_instance_id,label) '
                query += 'FROM STDIN '
                query += 'WITH CSV HEADER DELIMITER AS \',\' ;'
                cursor.copy_expert(sql=query, file=f)

            query = 'UPDATE ground_truth_import AS t '
            query += 'SET id = i.id '
            query += 'FROM instances AS i '
            query += 'WHERE i.user_instance_id = t.user_instance_id '
            query += 'AND i.dataset_id = t.dataset_id;'
            cursor.execute(query)

            query = 'INSERT INTO ground_truth(instance_id, dataset_id, label, family) '
            query += 'SELECT t.id, t.dataset_id, t.label, t.family '
            query += 'FROM ground_truth_import AS t;'
            cursor.execute(query)

        self.session.commit()