def loadIdents(self): filename = dir_tools.getDatasetDirectory(self.project, self.dataset) filename += 'idents.csv' db, cursor = db_tools.getRawConnection() if db_tools.isMysql(): query = 'LOAD DATA LOCAL INFILE \'' + filename + '\' ' query += 'INTO TABLE ' + 'instances' + ' ' query += 'CHARACTER SET UTF8 ' query += 'FIELDS TERMINATED BY \',\' ' query += 'OPTIONALLY ENCLOSED BY \'"\' ' query += 'IGNORE 1 LINES ' query += 'SET dataset_id = ' + str(self.dataset_id) + ',' query += 'row_number = NULL' query += ';' cursor.execute(query) query = 'SET @pos = 0;' cursor.execute(query) query = 'UPDATE instances SET row_number = ' query += '( SELECT @pos := @pos + 1 ) WHERE dataset_id = ' + str( self.dataset_id) query += ';' cursor.execute(query) elif db_tools.isPostgresql(): query = 'CREATE TEMPORARY TABLE instances_import(' query += 'user_instance_id integer, ' query += 'ident varchar(200), ' query += 'dataset_id integer DEFAULT ' + str(self.dataset_id) + ',' query += 'row_number serial PRIMARY KEY' query += ');' cursor.execute(query) with open(filename, 'r') as f: query = 'COPY instances_import(user_instance_id,ident) ' query += 'FROM STDIN ' query += 'WITH CSV HEADER DELIMITER AS \',\' ;' cursor.copy_expert(sql=query, file=f) query = 'INSERT INTO instances(user_instance_id,ident,dataset_id,row_number) ' query += 'SELECT user_instance_id, ident, dataset_id, row_number ' query += 'FROM instances_import;' cursor.execute(query) db_tools.closeRawConnection(db, cursor)
def loadTrueLabels(self): labels_file = dir_tools.getDatasetDirectory(self.project, self.dataset) labels_file += 'labels/true_labels.csv' # Loads the true labels in the table TrueLabels if the file exists # Otherwise the table TrueLabels is not created if not dir_tools.checkFileExists(labels_file): print >> sys.stderr, 'No ground truth labels for this dataset' return ## Check whether the file contains families families = False with open(labels_file, 'r') as f: header = f.readline() fields = header.split(',') if len(fields) == 3: families = True db, cursor = db_tools.getRawConnection() if db_tools.isMysql(): query = 'CREATE TEMPORARY TABLE true_labels_import(' query += 'user_instance_id integer PRIMARY KEY, ' query += 'label varchar(200), ' query += 'family varchar(200) DEFAULT \'other\', ' query += 'dataset_id integer DEFAULT ' + str( self.dataset_id) + ', ' query += 'id integer DEFAULT NULL' query += ');' cursor.execute(query) query = 'LOAD DATA LOCAL INFILE \'' + labels_file + '\' ' query += 'INTO TABLE ' + 'true_labels_import' + ' ' query += 'FIELDS TERMINATED BY \',\' ' query += 'IGNORE 1 LINES ' if families: query += '(user_instance_id, label, family) ' else: query += '(user_instance_id, label) ' query += ';' cursor.execute(query) query = 'UPDATE true_labels_import t ' query += 'JOIN instances i ' query += 'ON i.user_instance_id = t.user_instance_id ' query += 'AND i.dataset_id = t.dataset_id ' query += 'SET t.id = i.id;' cursor.execute(query) query = 'INSERT INTO true_labels(instance_id, dataset_id, label, family) ' query += 'SELECT t.id, t.dataset_id, t.label, t.family ' query += 'FROM true_labels_import AS t;' cursor.execute(query) elif db_tools.isPostgresql(): query = 'CREATE TEMPORARY TABLE true_labels_import(' query += 'user_instance_id integer PRIMARY KEY, ' query += 'label true_labels_enum, ' query += 'family varchar(200) DEFAULT \'other\', ' query += 'dataset_id integer DEFAULT ' + str( self.dataset_id) + ', ' query += 'id integer DEFAULT NULL' query += ');' cursor.execute(query) with open(labels_file, 'r') as f: if families: query = 'COPY true_labels_import(user_instance_id,label,family) ' else: query = 'COPY true_labels_import(user_instance_id,label) ' query += 'FROM STDIN ' query += 'WITH CSV HEADER DELIMITER AS \',\' ;' cursor.copy_expert(sql=query, file=f) query = 'UPDATE true_labels_import AS t ' query += 'SET id = i.id ' query += 'FROM instances AS i ' query += 'WHERE i.user_instance_id = t.user_instance_id ' query += 'AND i.dataset_id = t.dataset_id;' cursor.execute(query) query = 'INSERT INTO true_labels(instance_id, dataset_id, label, family) ' query += 'SELECT t.id, t.dataset_id, t.label, t.family ' query += 'FROM true_labels_import AS t;' cursor.execute(query) db_tools.closeRawConnection(db, cursor)
def initFromFile(self, labels_filename): if labels_filename is None: labels_type = 'none' elif labels_filename == 'true_labels.csv': labels_type = 'true_labels' else: labels_type = 'partial_labels' exp_labels = db_tables.ExperimentsLabelsAlchemy( experiment_id=self.experiment_id, labels_type=labels_type) self.session.add(exp_labels) self.session.commit() self.labels_id = exp_labels.labels_id self.labels_type = labels_type if labels_type == 'partial_labels': filename = dir_tools.getDatasetDirectory(self.project, self.dataset) filename += 'labels/' + labels_filename if not dir_tools.checkFileExists(filename): raise ValueError('The labels file %s does not exist.' % filename) ## Check whether the file contains families families = False with open(filename, 'r') as f: header = f.readline() fields = header.split(',') if len(fields) == 3: families = True db, cursor = db_tools.getRawConnection() if db_tools.isMysql(): query = 'CREATE TEMPORARY TABLE labels_import(' query += 'instance_id integer, ' query += 'labels_id integer DEFAULT ' + str( self.labels_id) + ', ' query += 'user_instance_id integer, ' query += 'label varchar(200), ' query += 'family varchar(200) DEFAULT \'other\', ' query += 'iteration integer DEFAULT 0, ' query += 'method varchar(200) DEFAULT \'init\', ' query += 'annotation boolean DEFAULT True' query += ');' cursor.execute(query) query = 'LOAD DATA LOCAL INFILE \'' + filename + '\' ' query += 'INTO TABLE ' + 'labels_import' + ' ' query += 'FIELDS TERMINATED BY \',\' ' query += 'IGNORE 1 LINES ' if families: query += '(user_instance_id, label, family) ' else: query += '(user_instance_id, label) ' query += ';' cursor.execute(query) query = 'UPDATE labels_import l ' query += 'JOIN instances i ' query += 'ON i.user_instance_id = l.user_instance_id ' query += 'AND i.dataset_id = ' + str(self.dataset_id) + ' ' query += 'SET l.instance_id = i.id;' cursor.execute(query) query = 'INSERT INTO labels(instance_id,labels_id,label,family,iteration,method,annotation) ' query += 'SELECT instance_id,labels_id,label,family,iteration,method,annotation ' query += 'FROM labels_import;' cursor.execute(query) elif db_tools.isPostgresql(): query = 'CREATE TEMPORARY TABLE labels_import(' query += 'instance_id integer, ' query += 'labels_id integer DEFAULT ' + str( self.labels_id) + ', ' query += 'user_instance_id integer, ' query += 'label labels_enum, ' query += 'family varchar(200) DEFAULT \'other\', ' query += 'iteration integer DEFAULT 0, ' query += 'method varchar(200) DEFAULT \'init\', ' query += 'annotation boolean DEFAULT True' query += ');' cursor.execute(query) with open(filename, 'r') as f: if families: query = 'COPY labels_import(user_instance_id,label,family) ' else: query = 'COPY labels_import(user_instance_id,label) ' query += 'FROM STDIN ' query += 'WITH CSV HEADER DELIMITER AS \',\' ;' cursor.copy_expert(sql=query, file=f) query = 'UPDATE labels_import AS l ' query += 'SET instance_id = i.id ' query += 'FROM instances AS i ' query += 'WHERE i.user_instance_id = l.user_instance_id ' query += 'AND i.dataset_id = ' + str(self.dataset_id) + ';' cursor.execute(query) query = 'INSERT INTO labels(instance_id,labels_id,label,family,iteration,method,annotation) ' query += 'SELECT instance_id,labels_id,label,family,iteration,method,annotation ' query += 'FROM labels_import;' cursor.execute(query) db_tools.closeRawConnection(db, cursor) self.session.commit()