Exemple #1
0
 def loadIdents(self):
     filename = dir_tools.getDatasetDirectory(self.project, self.dataset)
     filename += 'idents.csv'
     db, cursor = db_tools.getRawConnection()
     if db_tools.isMysql():
         query = 'LOAD DATA LOCAL INFILE \'' + filename + '\' '
         query += 'INTO TABLE ' + 'instances' + ' '
         query += 'CHARACTER SET UTF8 '
         query += 'FIELDS TERMINATED BY \',\' '
         query += 'OPTIONALLY ENCLOSED BY \'"\' '
         query += 'IGNORE 1 LINES '
         query += 'SET dataset_id = ' + str(self.dataset_id) + ','
         query += 'row_number = NULL'
         query += ';'
         cursor.execute(query)
         query = 'SET @pos = 0;'
         cursor.execute(query)
         query = 'UPDATE instances SET row_number = '
         query += '( SELECT @pos := @pos + 1 ) WHERE dataset_id = ' + str(
             self.dataset_id)
         query += ';'
         cursor.execute(query)
     elif db_tools.isPostgresql():
         query = 'CREATE TEMPORARY TABLE instances_import('
         query += 'user_instance_id integer, '
         query += 'ident varchar(200), '
         query += 'dataset_id integer DEFAULT ' + str(self.dataset_id) + ','
         query += 'row_number serial PRIMARY KEY'
         query += ');'
         cursor.execute(query)
         with open(filename, 'r') as f:
             query = 'COPY instances_import(user_instance_id,ident) '
             query += 'FROM STDIN '
             query += 'WITH CSV HEADER DELIMITER AS \',\' ;'
             cursor.copy_expert(sql=query, file=f)
         query = 'INSERT INTO instances(user_instance_id,ident,dataset_id,row_number) '
         query += 'SELECT user_instance_id, ident, dataset_id, row_number '
         query += 'FROM instances_import;'
         cursor.execute(query)
     db_tools.closeRawConnection(db, cursor)
Exemple #2
0
    def loadTrueLabels(self):
        labels_file = dir_tools.getDatasetDirectory(self.project, self.dataset)
        labels_file += 'labels/true_labels.csv'
        # Loads the true labels in the table TrueLabels if the file exists
        # Otherwise the table TrueLabels is not created
        if not dir_tools.checkFileExists(labels_file):
            print >> sys.stderr, 'No ground truth labels for this dataset'
            return

        ## Check whether the file contains families
        families = False
        with open(labels_file, 'r') as f:
            header = f.readline()
            fields = header.split(',')
            if len(fields) == 3:
                families = True
        db, cursor = db_tools.getRawConnection()

        if db_tools.isMysql():
            query = 'CREATE TEMPORARY TABLE true_labels_import('
            query += 'user_instance_id integer PRIMARY KEY, '
            query += 'label varchar(200), '
            query += 'family varchar(200) DEFAULT \'other\', '
            query += 'dataset_id integer DEFAULT ' + str(
                self.dataset_id) + ', '
            query += 'id integer DEFAULT NULL'
            query += ');'
            cursor.execute(query)

            query = 'LOAD DATA LOCAL INFILE \'' + labels_file + '\' '
            query += 'INTO TABLE ' + 'true_labels_import' + ' '
            query += 'FIELDS TERMINATED BY \',\' '
            query += 'IGNORE 1 LINES '
            if families:
                query += '(user_instance_id, label, family) '
            else:
                query += '(user_instance_id, label) '
            query += ';'
            cursor.execute(query)

            query = 'UPDATE true_labels_import t '
            query += 'JOIN instances i '
            query += 'ON i.user_instance_id = t.user_instance_id '
            query += 'AND i.dataset_id = t.dataset_id '
            query += 'SET t.id = i.id;'
            cursor.execute(query)

            query = 'INSERT INTO true_labels(instance_id, dataset_id, label, family) '
            query += 'SELECT t.id, t.dataset_id, t.label, t.family '
            query += 'FROM true_labels_import AS t;'
            cursor.execute(query)

        elif db_tools.isPostgresql():
            query = 'CREATE TEMPORARY TABLE true_labels_import('
            query += 'user_instance_id integer PRIMARY KEY, '
            query += 'label true_labels_enum, '
            query += 'family varchar(200) DEFAULT \'other\', '
            query += 'dataset_id integer DEFAULT ' + str(
                self.dataset_id) + ', '
            query += 'id integer DEFAULT NULL'
            query += ');'
            cursor.execute(query)

            with open(labels_file, 'r') as f:
                if families:
                    query = 'COPY true_labels_import(user_instance_id,label,family) '
                else:
                    query = 'COPY true_labels_import(user_instance_id,label) '
                query += 'FROM STDIN '
                query += 'WITH CSV HEADER DELIMITER AS \',\' ;'
                cursor.copy_expert(sql=query, file=f)

            query = 'UPDATE true_labels_import AS t '
            query += 'SET id = i.id '
            query += 'FROM instances AS i '
            query += 'WHERE i.user_instance_id = t.user_instance_id '
            query += 'AND i.dataset_id = t.dataset_id;'
            cursor.execute(query)

            query = 'INSERT INTO true_labels(instance_id, dataset_id, label, family) '
            query += 'SELECT t.id, t.dataset_id, t.label, t.family '
            query += 'FROM true_labels_import AS t;'
            cursor.execute(query)

        db_tools.closeRawConnection(db, cursor)
Exemple #3
0
    def initFromFile(self, labels_filename):
        if labels_filename is None:
            labels_type = 'none'
        elif labels_filename == 'true_labels.csv':
            labels_type = 'true_labels'
        else:
            labels_type = 'partial_labels'

        exp_labels = db_tables.ExperimentsLabelsAlchemy(
            experiment_id=self.experiment_id, labels_type=labels_type)
        self.session.add(exp_labels)
        self.session.commit()
        self.labels_id = exp_labels.labels_id
        self.labels_type = labels_type

        if labels_type == 'partial_labels':
            filename = dir_tools.getDatasetDirectory(self.project,
                                                     self.dataset)
            filename += 'labels/' + labels_filename
            if not dir_tools.checkFileExists(filename):
                raise ValueError('The labels file %s does not exist.' %
                                 filename)
            ## Check whether the file contains families
            families = False
            with open(filename, 'r') as f:
                header = f.readline()
                fields = header.split(',')
                if len(fields) == 3:
                    families = True
            db, cursor = db_tools.getRawConnection()

            if db_tools.isMysql():
                query = 'CREATE TEMPORARY TABLE labels_import('
                query += 'instance_id integer, '
                query += 'labels_id integer DEFAULT ' + str(
                    self.labels_id) + ', '
                query += 'user_instance_id integer, '
                query += 'label varchar(200), '
                query += 'family varchar(200) DEFAULT \'other\', '
                query += 'iteration integer DEFAULT 0, '
                query += 'method varchar(200) DEFAULT \'init\', '
                query += 'annotation boolean DEFAULT True'
                query += ');'
                cursor.execute(query)

                query = 'LOAD DATA LOCAL INFILE \'' + filename + '\' '
                query += 'INTO TABLE ' + 'labels_import' + ' '
                query += 'FIELDS TERMINATED BY \',\' '
                query += 'IGNORE 1 LINES '
                if families:
                    query += '(user_instance_id, label, family) '
                else:
                    query += '(user_instance_id, label) '
                query += ';'
                cursor.execute(query)

                query = 'UPDATE labels_import l '
                query += 'JOIN instances i '
                query += 'ON i.user_instance_id = l.user_instance_id '
                query += 'AND i.dataset_id = ' + str(self.dataset_id) + ' '
                query += 'SET l.instance_id = i.id;'
                cursor.execute(query)

                query = 'INSERT INTO labels(instance_id,labels_id,label,family,iteration,method,annotation) '
                query += 'SELECT instance_id,labels_id,label,family,iteration,method,annotation '
                query += 'FROM labels_import;'
                cursor.execute(query)

            elif db_tools.isPostgresql():
                query = 'CREATE TEMPORARY TABLE labels_import('
                query += 'instance_id integer, '
                query += 'labels_id integer DEFAULT ' + str(
                    self.labels_id) + ', '
                query += 'user_instance_id integer, '
                query += 'label labels_enum, '
                query += 'family varchar(200) DEFAULT \'other\', '
                query += 'iteration integer DEFAULT 0, '
                query += 'method varchar(200) DEFAULT \'init\', '
                query += 'annotation boolean DEFAULT True'
                query += ');'
                cursor.execute(query)

                with open(filename, 'r') as f:
                    if families:
                        query = 'COPY labels_import(user_instance_id,label,family) '
                    else:
                        query = 'COPY labels_import(user_instance_id,label) '
                    query += 'FROM STDIN '
                    query += 'WITH CSV HEADER DELIMITER AS \',\' ;'
                    cursor.copy_expert(sql=query, file=f)

                query = 'UPDATE labels_import AS l '
                query += 'SET instance_id = i.id '
                query += 'FROM instances AS i '
                query += 'WHERE i.user_instance_id = l.user_instance_id '
                query += 'AND i.dataset_id = ' + str(self.dataset_id) + ';'
                cursor.execute(query)

                query = 'INSERT INTO labels(instance_id,labels_id,label,family,iteration,method,annotation) '
                query += 'SELECT instance_id,labels_id,label,family,iteration,method,annotation '
                query += 'FROM labels_import;'
                cursor.execute(query)

            db_tools.closeRawConnection(db, cursor)
            self.session.commit()