def export_db_dir(source_dir, dest_dir): db = get_db_conn() cursor = db.cursor() sql = 'select SHA1,filename, plus3 from tb_multi_labels where plus3 is not null and uncertain is null and other_diseases is null ' cursor.execute(sql) results = cursor.fetchall() for rs in results: sha1 = rs[0] filename = rs[1] filename_original = os.path.join(source_dir, sha1 + '#' + filename) if os.path.exists(filename_original): print(filename_original) filename_dest = os.path.join(dest_dir, str(rs[2]), sha1 + '#' + filename) if not os.path.exists(os.path.dirname(filename_dest)): os.makedirs(os.path.dirname(filename_dest)) shutil.copy(filename_original, filename_dest) else: print(filename_original, 'not found!')
def write_csv_to_db(): dir_original = '/media/ubuntu/data1/multi_labels_2919_1_15' dir_preprocess = '/home/ubuntu/multi_labels_2919_1_15/preprocess384' db_con = get_db_conn() cursor = db_con.cursor() filename_csv = os.path.abspath(os.path.join(sys.path[0], "..", 'datafiles', 'DLP_patient_based_split.csv')) df = pd.read_csv(filename_csv) print(len(df)) for i, row in df.iterrows(): image_file = str(row["images"]) labels = str(row["labels"]) patient_id = str(row["patient_id"]) image_file_orig = image_file.replace(dir_preprocess, dir_original) print(image_file) sha1 = CalcSha1(image_file_orig) sql = "insert into tb_multi_labels (pic_filename,multi_label1,patient_id,sha1) values(%s,%s,%s,%s)" cursor.execute(sql, (image_file, str(labels), patient_id, sha1)) if i % 20 == 0: db_con.commit() db_con.commit()
def export_csv_from_db(base_dir, sql, file_csv): if not os.path.exists(os.path.dirname(file_csv)): os.makedirs(os.path.dirname(file_csv)) db_con = get_db_conn() cursor = db_con.cursor() cursor.execute(sql) results = cursor.fetchall() with open(file_csv, 'w', newline='') as csvfile: csv_writer = csv.writer(csvfile, delimiter=',') csv_writer.writerow(['images', 'labels']) for rs in results: sha1 = rs[0] filename = rs[1] img_file = os.path.join(base_dir, sha1 + '#' + filename) if not os.path.exists(img_file): # raise Exception('file not found,', img_file) print('file not found,', img_file) continue class_labels = rs[2] csv_writer.writerow([img_file, class_labels]) db_con.close()
def multi_labels_export_to_csv(file_csv, sql, source_dir='', dest_dir='', contain_pat_id=True): if not source_dir.endswith('/'): source_dir += '/' if not dest_dir.endswith('/'): dest_dir += '/' db_con = get_db_conn() cursor = db_con.cursor() cursor.execute(sql) results = cursor.fetchall() if os.path.exists(file_csv): os.remove(file_csv) if not os.path.exists(os.path.dirname(file_csv)): os.makedirs(os.path.dirname(file_csv)) with open(file_csv, 'w', newline='') as csvfile: csv_writer = csv.writer(csvfile, delimiter=',') if contain_pat_id: csv_writer.writerow(['images', 'labels', 'patient_id']) else: csv_writer.writerow(['images', 'labels']) for rs in results: img_file = rs[0] if source_dir != '' and dest_dir != '': img_file = img_file.replace(source_dir, dest_dir) img_file = img_file.replace('\\', '/') class_labels = rs[1] if class_labels.endswith('_'): #delete the last character '_' class_labels = class_labels[:-1] if contain_pat_id: patient_id = rs[2] if patient_id is None: patient_id = ' ' csv_writer.writerow([img_file, class_labels, patient_id]) else: csv_writer.writerow([img_file, class_labels]) db_con.close()
def write_csv_to_db(): filename_csv = os.path.join(os.path.abspath('..'), 'datafiles', 'dataset6', 'Plus_step_two.csv') db = get_db_conn() cursor = db.cursor() df = pd.read_csv(filename_csv) for _, row in df.iterrows(): image_file = row['images'] image_label = int(row['labels']) # blood_vessel_seg_result_2020_4_27 blood_vessel_seg_result1 blood_vessel_seg_result image_file = image_file.replace('/blood_vessel_seg_result_2020_4_27/', '/original/') image_file = image_file.replace('/blood_vessel_seg_result1/', '/original/') image_file = image_file.replace('/blood_vessel_seg_result/', '/original/') # print(image_file) sha1 = CalcSha1(image_file) sql_update = 'update tb_multi_labels set plus3=%s where sha1=%s' cursor.execute(sql_update, (image_label, sha1)) db.commit() rowcount = cursor.rowcount if rowcount == 0: print(image_file) (filepath, tempfilename) = os.path.split(image_file) # (filename, extension) = os.path.splitext(tempfilename) tempfilename_new = sha1 + '#' + tempfilename image_file_dest = os.path.join('/tmp5/2020_5_21_plus', str(image_label), tempfilename_new) if not os.path.exists(os.path.dirname(image_file_dest)): os.makedirs(os.path.dirname(image_file_dest)) shutil.copy(image_file, image_file_dest) # sql_insert = 'insert into tb_multi_labels(SHA1,filename, plus3) values(%s, %s, %s)' # cursor.execute(sql_insert, (sha1, tempfilename_new, image_label)) # db.commit() else: pass
def write_labels_to_db(dir1, dict_mapping, db_field_name='stage'): db = get_db_conn() cursor = db.cursor() i = 0 for dir_path, subpaths, files in os.walk(dir1, False): for f in files: image_file_source = os.path.join(dir_path, f) file_dir, filename = os.path.split(image_file_source) file_base, file_ext = os.path.splitext(filename) # 分离文件名与扩展名 if file_ext.lower() not in [ '.bmp', '.jpg', '.jpeg', '.png', '.tiff', '.tif' ]: continue sha1 = CalcSha1(image_file_source) # print(image_file_source) is_found = False for key in dict_mapping: if '/' + key + '/' in image_file_source: label = dict_mapping[key] is_found = True if is_found: print('label:', label, image_file_source) else: raise Exception("Invalid label!", image_file_source) sql_update = "update tb_multi_labels set {0}=%s where SHA1=%s and ({0} is null or {0} <>%s)".format( db_field_name) record_num = cursor.execute(sql_update, (label, sha1, label)) if record_num == 0: print(image_file_source) # print('update record:', record_num) i += 1 if i % 20 == 0: db.commit() db.commit() db.close()
def multi_labels_update_labels(img_file, new_label, op_mode='append'): new_label = str(new_label) SHA1 = CalcSha1(img_file) db_con = get_db_conn() cursor = db_con.cursor() sql = "SELECT SHA1, BigClasses FROM tb_multi_labels_GT WHERE SHA1=(%s)" cursor.execute(sql, (SHA1,)) results = cursor.fetchall() if len(results) > 0: if op_mode == 'update': sql = "update tb_multi_labels_GT set FilePath=%s BigClasses=%s where SHA1=%s" cursor.execute(sql, (img_file, new_label, SHA1)) db_con.commit() elif op_mode == 'append': list_labels = results[0][1].split('_') if new_label in list_labels: new_labels = results[0][1] else: if results[0][1] == '': new_labels = new_label else: new_labels = results[0][1] + '_' + new_label sql = "update tb_multi_labels_GT set FilePath=%s, BigClasses=%s where SHA1=%s" cursor.execute(sql, (img_file, new_labels, SHA1)) db_con.commit() else: assert op_mode in ['update', 'append'], "op_mode Error" else: sql = "insert into tb_multi_labels_GT(SHA1,FilePath,BigClasses) values(%s,%s,%s)" cursor.execute(sql, (SHA1, img_file, new_label)) db_con.commit() # multi_labels()
def write_sha1_db(): db = get_db_conn() cursor = db.cursor() dir1 = '/media/ubuntu/data1/ROP_dataset/' dir2 = '/media/ubuntu/data1/ROP_dataset_2/' for dir_path, subpaths, files in os.walk(dir1, False): for f in files: image_file_source = os.path.join(dir_path, f) if '/original/' not in image_file_source: continue file_dir, filename = os.path.split(image_file_source) file_base, file_ext = os.path.splitext(filename) # 分离文件名与扩展名 if file_ext.lower() not in [ '.bmp', '.jpg', '.jpeg', '.png', '.tiff', '.tif' ]: continue print(image_file_source) sha1 = CalcSha1(image_file_source) image_file_dest = os.path.join(dir2, sha1 + '#' + filename) if not os.path.exists(os.path.dirname(image_file_dest)): os.makedirs(os.path.dirname(image_file_dest)) shutil.copy(image_file_source, image_file_dest) sql_delete = 'delete from tb_multi_labels where SHA1=%s' cursor.execute(sql_delete, (sha1, )) sql_insert = 'insert into tb_multi_labels(SHA1,filename) values(%s, %s)' cursor.execute(sql_insert, (sha1, filename)) db.commit() db.close()
def modify_label_based_on_dir(): dir = '/media/ubuntu/data2/add_labels/' db_con = get_db_conn() cursor = db_con.cursor() for dir_path, subpaths, files in os.walk(dir, False): for f in files: img_file_source = os.path.join(dir_path, f) filename, file_extension = os.path.splitext(img_file_source) if file_extension.upper() not in ['.BMP', '.PNG', '.JPG', '.JPEG', '.TIFF', '.TIF']: print('file ext name:', f) continue sha1 = CalcSha1(img_file_source) label_add = img_file_source.replace(dir, '').split('/')[0] print(img_file_source) sql = "select multi_label1 from tb_multi_labels where sha1=%s" cursor.execute(sql, (sha1,)) results = cursor.fetchall() assert len(results) == 1, 'error' label_old = results[0][0] print(label_old) if label_old != '0': #add label sql = "update tb_multi_labels set multi_label2=concat(multi_label1,'_',%s) where sha1=%s" cursor.execute(sql, (str(label_add), sha1)) else: #normal replace label sql = "update tb_multi_labels set multi_label2=%s where sha1=%s" cursor.execute(sql, (str(label_add), sha1)) db_con.commit() sql = "update tb_multi_labels set multi_label2=multi_label1 where multi_label2 is null" cursor.execute(sql,)
import os from LIBS.DB.db_helper_conn import get_db_conn db_con = get_db_conn() cursor = db_con.cursor() sql = 'select ID,SHA1,filename,filepath, gradable3, hemorrhage3, stage3, posterior from tb_multi_labels' cursor.execute(sql) results = cursor.fetchall() for rs in results: ID = rs[0] SHA1 = rs[1] print(SHA1) print('OK')
record_num = cursor.execute(sql_update, (label, sha1, label)) if record_num == 0: print(image_file_source) # print('update record:', record_num) i += 1 if i % 20 == 0: db.commit() db.commit() db.close() dict_mapping_hemorrhage = {'0': 0, '对照组': 0, '1': 1, '出血': 1} dir1 = '/media/ubuntu/data1/ROP_dataset/Hemorrhage/original' # write_labels_to_db(dir1, dict_mapping=dict_mapping_hemorrhage, db_field_name='hemorrhage') db = get_db_conn() cursor = db.cursor() sql_update = 'update tb_multi_labels set hemorrhage1=hemorrhage' cursor.execute(sql_update) db.commit() db.close() dir1 = '/tmp5/result_20200308_confusion_matrix_postmodification/hemorrhage' write_labels_to_db(dir1, dict_mapping=dict_mapping_hemorrhage, db_field_name='hemorrhage1') print('OK')