Example #1
0
 def initFromFile(self, labels_filename):
     filename = dir_tools.getDatasetDirectory(self.project, self.dataset)
     filename += 'labels/' + labels_filename
     if not dir_tools.checkFileExists(filename):
         raise ValueError('The labels file %s does not exist.' % filename)
     ## Check whether the file contains families
     families = False
     with open(filename, 'r') as f:
         header = f.readline()
         fields = header.split(',')
         if len(fields) == 3:
             families = True
     query = 'LOAD DATA LOCAL INFILE \'' + filename + '\' '
     query += 'INTO TABLE ' + 'Labels' + ' '
     query += 'FIELDS TERMINATED BY \',\' '
     query += 'IGNORE 1 LINES '
     if families:
         query += '(instance_id, label, family) '
     else:
         query += '(instance_id, label) '
     query += 'SET experiment_label_id = ' + str(
         self.experiment_label_id) + ', '
     if not families:
         query += 'family = "other",'
     query += 'iteration = 0, '
     query += 'method = "init", '
     query += 'annotation = "0"'
     query += ';'
     self.cursor.execute(query)
     self.db.commit()
     self.checkLabelsValidity()
Example #2
0
def runNextIteration(experiment_id, iteration_number):
    res = str(celeryRunNextIteration.s().apply_async())
    if user_exp:
        experiment = updateCurrentExperiment(experiment_id)
        filename = experiment.getOutputDirectory()
        filename += 'user_actions.log'
        file_exists = dir_tools.checkFileExists(filename)
        mode = 'a' if file_exists else 'w'
        to_print = [datetime.datetime.now(), 'nextIteration', iteration_number]
        to_print = map(str, to_print)
        to_print = ','.join(to_print)
        with open(filename, mode) as f:
            print >> f, to_print
    return res
Example #3
0
 def loadTrueLabels(self):
     labels_file = dir_tools.getDatasetDirectory(self.project, self.dataset)
     labels_file += 'labels/true_labels.csv'
     # Loads the true labels in the table TrueLabels if the file exists
     # Otherwise the table TrueLabels is not created
     if not dir_tools.checkFileExists(labels_file):
         print >> sys.stderr, 'No ground truth labels for this dataset'
         return
     exp = Experiment(self.project,
                      self.dataset,
                      self.db,
                      self.cursor,
                      experiment_name='true_labels')
     exp.initLabels('true_labels.csv')
Example #4
0
def currentAnnotations(experiment_id, iteration):
    experiment = updateCurrentExperiment(experiment_id)
    page = render_template('ActiveLearning/current_annotations.html',
                           project=experiment.project)
    if user_exp:
        filename = experiment.getOutputDirectory()
        filename += 'user_actions.log'
        file_exists = dir_tools.checkFileExists(filename)
        mode = 'a' if file_exists else 'w'
        to_print = [datetime.datetime.now(), 'displayAnnotatedInstances']
        to_print = map(str, to_print)
        to_print = ','.join(to_print)
        with open(filename, mode) as f:
            print >> f, to_print
    return page
Example #5
0
def removeLabel(experiment_id, inst_experiment_label_id, iteration_number,
                instance_id):
    labels_tools.removeLabel(session, inst_experiment_label_id, instance_id)
    if user_exp:
        experiment = updateCurrentExperiment(experiment_id)
        filename = experiment.getOutputDirectory()
        filename += 'user_actions.log'
        file_exists = dir_tools.checkFileExists(filename)
        mode = 'a' if file_exists else 'w'
        to_print = [datetime.datetime.now(), 'removeLabel', instance_id]
        to_print = map(str, to_print)
        to_print = ','.join(to_print)
        with open(filename, mode) as f:
            print >> f, to_print
    return ''
Example #6
0
def runNextIteration(project, dataset, experiment_id, iteration_number):
    res = str(celeryRunNextIteration.s().apply_async())
    if user_exp:
        experiment = ExperimentFactory.getFactory().fromJson(
            project, dataset, experiment_id, db, cursor)
        filename = dir_tools.getExperimentOutputDirectory(experiment)
        filename += 'user_actions.log'
        file_exists = dir_tools.checkFileExists(filename)
        mode = 'a' if file_exists else 'w'
        to_print = [datetime.datetime.now(), 'nextIteration', iteration_number]
        to_print = map(str, to_print)
        to_print = ','.join(to_print)
        with open(filename, mode) as f:
            print >> f, to_print
    return res
Example #7
0
def currentAnnotations(project, dataset, experiment_id, iteration):
    page = render_template('ActiveLearning/current_annotations.html',
                           project=project)
    if user_exp:
        experiment = ExperimentFactory.getFactory().fromJson(
            project, dataset, experiment_id, db, cursor)
        filename = dir_tools.getExperimentOutputDirectory(experiment)
        filename += 'user_actions.log'
        file_exists = dir_tools.checkFileExists(filename)
        mode = 'a' if file_exists else 'w'
        to_print = [datetime.datetime.now(), 'displayAnnotatedInstances']
        to_print = map(str, to_print)
        to_print = ','.join(to_print)
        with open(filename, mode) as f:
            print >> f, to_print
    return page
Example #8
0
def changeFamilyLabel(experiment_id, label, family):
    experiment = updateCurrentExperiment(experiment_id)
    experiment_label_id = experiment.labels_id
    labels_tools.changeFamilyLabel(session, label, family, experiment_label_id)
    if user_exp:
        filename = experiment.getOutputDirectory()
        filename += 'user_actions.log'
        file_exists = dir_tools.checkFileExists(filename)
        mode = 'a' if file_exists else 'w'
        to_print = [
            datetime.datetime.now(), 'changeFamilyLabel', family, label
        ]
        to_print = map(str, to_print)
        to_print = ','.join(to_print)
        with open(filename, mode) as f:
            print >> f, to_print
    return ''
Example #9
0
def mergeFamilies(experiment_id, label, families, new_family_name):
    experiment = updateCurrentExperiment(experiment_id)
    experiment_label_id = experiment.labels_id
    families = families.split(',')
    labels_tools.mergeFamilies(session, label, families, new_family_name,
                               experiment_label_id)
    if user_exp:
        filename = experiment.getOutputDirectory()
        filename += 'user_actions.log'
        file_exists = dir_tools.checkFileExists(filename)
        mode = 'a' if file_exists else 'w'
        to_print = [datetime.datetime.now(), 'mergeFamilies', new_family_name]
        to_print += map(str, families)
        to_print = map(str, to_print)
        to_print = ','.join(to_print)
        with open(filename, mode) as f:
            print >> f, to_print
    return ''
Example #10
0
def changeFamilyLabel(project, dataset, experiment_id, experiment_label_id,
                      label, family):
    mysql_tools.useDatabase(cursor, project, dataset)
    labels_tools.changeFamilyLabel(cursor, label, family, experiment_label_id)
    if user_exp:
        experiment = ExperimentFactory.getFactory().fromJson(
            project, dataset, experiment_id, db, cursor)
        filename = dir_tools.getExperimentOutputDirectory(experiment)
        filename += 'user_actions.log'
        file_exists = dir_tools.checkFileExists(filename)
        mode = 'a' if file_exists else 'w'
        to_print = [
            datetime.datetime.now(), 'changeFamilyLabel', family, label
        ]
        to_print = map(str, to_print)
        to_print = ','.join(to_print)
        with open(filename, mode) as f:
            print >> f, to_print
    return ''
Example #11
0
def mergeFamilies(project, dataset, experiment_id, experiment_label_id, label,
                  families, new_family_name):
    families = families.split(',')
    mysql_tools.useDatabase(cursor, project, dataset)
    labels_tools.mergeFamilies(cursor, label, families, new_family_name,
                               experiment_label_id)
    if user_exp:
        experiment = ExperimentFactory.getFactory().fromJson(
            project, dataset, experiment_id, db, cursor)
        filename = dir_tools.getExperimentOutputDirectory(experiment)
        filename += 'user_actions.log'
        file_exists = dir_tools.checkFileExists(filename)
        mode = 'a' if file_exists else 'w'
        to_print = [datetime.datetime.now(), 'mergeFamilies', new_family_name]
        to_print += map(str, families)
        to_print = map(str, to_print)
        to_print = ','.join(to_print)
        with open(filename, mode) as f:
            print >> f, to_print
    return ''
Example #12
0
def addLabel(experiment_id, inst_experiment_label_id, iteration_number,
             instance_id, label, family, method, annotation):
    annotation = annotation == 'true'
    labels_tools.addLabel(session, inst_experiment_label_id, instance_id,
                          label, family, iteration_number, method, annotation)
    if user_exp:
        experiment = updateCurrentExperiment(experiment_id)
        filename = experiment.getOutputDirectory()
        filename += 'user_actions.log'
        file_exists = dir_tools.checkFileExists(filename)
        mode = 'a' if file_exists else 'w'
        to_print = [
            datetime.datetime.now(), 'addLabel', iteration_number, instance_id,
            label, family, method, annotation
        ]
        to_print = map(str, to_print)
        to_print = ','.join(to_print)
        with open(filename, mode) as f:
            print >> f, to_print
    return ''
Example #13
0
def removeLabel(project, dataset, experiment_id, inst_dataset,
                inst_experiment_label_id, iteration_number, instance_id):
    mysql_tools.useDatabase(cursor, project, inst_dataset)
    labels_tools.removeLabel(cursor, inst_experiment_label_id, instance_id)
    db.commit()
    if user_exp:
        experiment = ExperimentFactory.getFactory().fromJson(
            project, dataset, experiment_id, db, cursor)
        filename = dir_tools.getExperimentOutputDirectory(experiment)
        filename += 'user_actions.log'
        file_exists = dir_tools.checkFileExists(filename)
        mode = 'a' if file_exists else 'w'
        to_print = [
            datetime.datetime.now(), 'removeLabel', project, dataset,
            instance_id
        ]
        to_print = map(str, to_print)
        to_print = ','.join(to_print)
        with open(filename, mode) as f:
            print >> f, to_print
    return ''
Example #14
0
    def loadTrueLabels(self):
        labels_file = dir_tools.getDatasetDirectory(self.project, self.dataset)
        labels_file += 'labels/true_labels.csv'
        # Loads the true labels in the table TrueLabels if the file exists
        # Otherwise the table TrueLabels is not created
        if not dir_tools.checkFileExists(labels_file):
            print >> sys.stderr, 'No ground truth labels for this dataset'
            return

        ## Check whether the file contains families
        families = False
        with open(labels_file, 'r') as f:
            header = f.readline()
            fields = header.split(',')
            if len(fields) == 3:
                families = True
        db, cursor = db_tools.getRawConnection()

        if db_tools.isMysql():
            query = 'CREATE TEMPORARY TABLE true_labels_import('
            query += 'user_instance_id integer PRIMARY KEY, '
            query += 'label varchar(200), '
            query += 'family varchar(200) DEFAULT \'other\', '
            query += 'dataset_id integer DEFAULT ' + str(
                self.dataset_id) + ', '
            query += 'id integer DEFAULT NULL'
            query += ');'
            cursor.execute(query)

            query = 'LOAD DATA LOCAL INFILE \'' + labels_file + '\' '
            query += 'INTO TABLE ' + 'true_labels_import' + ' '
            query += 'FIELDS TERMINATED BY \',\' '
            query += 'IGNORE 1 LINES '
            if families:
                query += '(user_instance_id, label, family) '
            else:
                query += '(user_instance_id, label) '
            query += ';'
            cursor.execute(query)

            query = 'UPDATE true_labels_import t '
            query += 'JOIN instances i '
            query += 'ON i.user_instance_id = t.user_instance_id '
            query += 'AND i.dataset_id = t.dataset_id '
            query += 'SET t.id = i.id;'
            cursor.execute(query)

            query = 'INSERT INTO true_labels(instance_id, dataset_id, label, family) '
            query += 'SELECT t.id, t.dataset_id, t.label, t.family '
            query += 'FROM true_labels_import AS t;'
            cursor.execute(query)

        elif db_tools.isPostgresql():
            query = 'CREATE TEMPORARY TABLE true_labels_import('
            query += 'user_instance_id integer PRIMARY KEY, '
            query += 'label true_labels_enum, '
            query += 'family varchar(200) DEFAULT \'other\', '
            query += 'dataset_id integer DEFAULT ' + str(
                self.dataset_id) + ', '
            query += 'id integer DEFAULT NULL'
            query += ');'
            cursor.execute(query)

            with open(labels_file, 'r') as f:
                if families:
                    query = 'COPY true_labels_import(user_instance_id,label,family) '
                else:
                    query = 'COPY true_labels_import(user_instance_id,label) '
                query += 'FROM STDIN '
                query += 'WITH CSV HEADER DELIMITER AS \',\' ;'
                cursor.copy_expert(sql=query, file=f)

            query = 'UPDATE true_labels_import AS t '
            query += 'SET id = i.id '
            query += 'FROM instances AS i '
            query += 'WHERE i.user_instance_id = t.user_instance_id '
            query += 'AND i.dataset_id = t.dataset_id;'
            cursor.execute(query)

            query = 'INSERT INTO true_labels(instance_id, dataset_id, label, family) '
            query += 'SELECT t.id, t.dataset_id, t.label, t.family '
            query += 'FROM true_labels_import AS t;'
            cursor.execute(query)

        db_tools.closeRawConnection(db, cursor)
Example #15
0
    def initFromFile(self, labels_filename):
        if labels_filename is None:
            labels_type = 'none'
        elif labels_filename == 'true_labels.csv':
            labels_type = 'true_labels'
        else:
            labels_type = 'partial_labels'

        exp_labels = db_tables.ExperimentsLabelsAlchemy(
            experiment_id=self.experiment_id, labels_type=labels_type)
        self.session.add(exp_labels)
        self.session.commit()
        self.labels_id = exp_labels.labels_id
        self.labels_type = labels_type

        if labels_type == 'partial_labels':
            filename = dir_tools.getDatasetDirectory(self.project,
                                                     self.dataset)
            filename += 'labels/' + labels_filename
            if not dir_tools.checkFileExists(filename):
                raise ValueError('The labels file %s does not exist.' %
                                 filename)
            ## Check whether the file contains families
            families = False
            with open(filename, 'r') as f:
                header = f.readline()
                fields = header.split(',')
                if len(fields) == 3:
                    families = True
            db, cursor = db_tools.getRawConnection()

            if db_tools.isMysql():
                query = 'CREATE TEMPORARY TABLE labels_import('
                query += 'instance_id integer, '
                query += 'labels_id integer DEFAULT ' + str(
                    self.labels_id) + ', '
                query += 'user_instance_id integer, '
                query += 'label varchar(200), '
                query += 'family varchar(200) DEFAULT \'other\', '
                query += 'iteration integer DEFAULT 0, '
                query += 'method varchar(200) DEFAULT \'init\', '
                query += 'annotation boolean DEFAULT True'
                query += ');'
                cursor.execute(query)

                query = 'LOAD DATA LOCAL INFILE \'' + filename + '\' '
                query += 'INTO TABLE ' + 'labels_import' + ' '
                query += 'FIELDS TERMINATED BY \',\' '
                query += 'IGNORE 1 LINES '
                if families:
                    query += '(user_instance_id, label, family) '
                else:
                    query += '(user_instance_id, label) '
                query += ';'
                cursor.execute(query)

                query = 'UPDATE labels_import l '
                query += 'JOIN instances i '
                query += 'ON i.user_instance_id = l.user_instance_id '
                query += 'AND i.dataset_id = ' + str(self.dataset_id) + ' '
                query += 'SET l.instance_id = i.id;'
                cursor.execute(query)

                query = 'INSERT INTO labels(instance_id,labels_id,label,family,iteration,method,annotation) '
                query += 'SELECT instance_id,labels_id,label,family,iteration,method,annotation '
                query += 'FROM labels_import;'
                cursor.execute(query)

            elif db_tools.isPostgresql():
                query = 'CREATE TEMPORARY TABLE labels_import('
                query += 'instance_id integer, '
                query += 'labels_id integer DEFAULT ' + str(
                    self.labels_id) + ', '
                query += 'user_instance_id integer, '
                query += 'label labels_enum, '
                query += 'family varchar(200) DEFAULT \'other\', '
                query += 'iteration integer DEFAULT 0, '
                query += 'method varchar(200) DEFAULT \'init\', '
                query += 'annotation boolean DEFAULT True'
                query += ');'
                cursor.execute(query)

                with open(filename, 'r') as f:
                    if families:
                        query = 'COPY labels_import(user_instance_id,label,family) '
                    else:
                        query = 'COPY labels_import(user_instance_id,label) '
                    query += 'FROM STDIN '
                    query += 'WITH CSV HEADER DELIMITER AS \',\' ;'
                    cursor.copy_expert(sql=query, file=f)

                query = 'UPDATE labels_import AS l '
                query += 'SET instance_id = i.id '
                query += 'FROM instances AS i '
                query += 'WHERE i.user_instance_id = l.user_instance_id '
                query += 'AND i.dataset_id = ' + str(self.dataset_id) + ';'
                cursor.execute(query)

                query = 'INSERT INTO labels(instance_id,labels_id,label,family,iteration,method,annotation) '
                query += 'SELECT instance_id,labels_id,label,family,iteration,method,annotation '
                query += 'FROM labels_import;'
                cursor.execute(query)

            db_tools.closeRawConnection(db, cursor)
            self.session.commit()