def get_pathology_filelists(datasetidx=''): root_name = 'root_dir' if datasetidx is not None: root_name += str(datasetidx) if hasattr(opt, root_name) and getattr(opt, root_name) is not None: #filelists, folddict = lt.filelist_training(opt.root_dir, remove_uncertain=opt.remove_uncertain, shuffle = opt.filelist_shuffle, cross_fold = opt.num_cross_folds, val_fold = opt.val_fold, test_fold = opt.test_fold) file_root = getattr(opt, root_name) if os.path.isfile(file_root): filelist = bt.filelist_load(file_root) else: filelist = glob.glob( os.path.join(getattr(opt, root_name), "npy", "*.npy")) #mt.sample_statistics(filelist, True) if 'lidc' in getattr(opt, root_name) and hasattr( opt, 'remove_uncertain') and opt.remove_uncertain: filelist = lt.filelist_remove_uncertain(filelist) if opt.filelist_shuffle: random.shuffle(filelist) filelists, folddict = bt.foldlist( filelist, opt.num_cross_folds, { 'val': getattr(opt, 'val_fold' + str(datasetidx)), 'test': getattr(opt, 'test_fold' + str(datasetidx)) }) if 'train' in filelists.keys(): bt.filelist_store( filelists['train'], save_path + '/filelist' + str(datasetidx) + '_train_fold' + str(folddict['train']) + '.log') if 'val' in filelists.keys(): bt.filelist_store( filelists['val'], save_path + '/filelist' + str(datasetidx) + '_val_fold' + str(folddict['val']) + '.log') if 'test' in filelists.keys(): bt.filelist_store( filelists['test'], save_path + '/filelist' + str(datasetidx) + '_test_fold' + str(folddict['test']) + '.log') #vis.log("trainfold:{} valfold:{} testfold:{}" .format(folddict['train'], folddict['val'], folddict['test'])) print("filelist generated") else: filelists = {} filelists['train'] = bt.filelist_load( getattr(opt, 'filelists' + str(datasetidx))['train']) filelists['val'] = bt.filelist_load( getattr(opt, 'filelists' + str(datasetidx))['val']) bt.filelist_store(filelists['train'], save_path + '/filelist_train.log') bt.filelist_store(filelists['val'], save_path + '/filelist_val.log') if 'test' in getattr(opt, 'filelists' + str(datasetidx)).keys(): filelists['test'] = bt.filelist_load( getattr(opt, 'filelists' + str(datasetidx))['test']) bt.filelist_store(filelists['test'], save_path + '/filelist_test.log') print("filelist loaded") return filelists
def get_detection_filelists(patient_uids=None, filepaths=None, easy_eliminate_filelist=None, config={}): save_path = config['model_root'] + '/' + config['env'] if easy_eliminate_filelist is not None: easy_eliminate_filelist = bt.filelist_load(easy_eliminate_filelist) if filepaths is None: filelists = {} filelists['train'] = bt.filelist_load(config['filelists']['train']) filelists['val'] = bt.filelist_load(config['filelists']['val']) if easy_eliminate_filelist is not None: filelists['train'] = bt.filelist_eliminate( filelists['train'], easy_eliminate_filelist) filelists['val'] = bt.filelist_eliminate(filelists['val'], easy_eliminate_filelist) bt.filelist_store(filelists['train'], save_path + '/filelist_train.log') bt.filelist_store(filelists['val'], save_path + '/filelist_val.log') print("filelist loaded") else: #filepaths=["/home/fyl/datasets/luna_64/train", "/home/fyl/datasets/luna_64/test", "/home/fyl/datasets/npy_non_set"] #filelist = [] if patient_uids is None: patient_uids = [] for filepath in filepaths: files = glob.glob(filepath + '/*.npy') #filelist.extend(files) for file in files: filename = os.path.basename(file) filenamenoext = os.path.splitext(filename)[0] fileinfos = filenamenoext.split('_') annolabel = fileinfos[-1] patient_uid = fileinfos[0] if patient_uid not in patient_uids: patient_uids.append(patient_uid) elif type(patient_uids) == str: patient_uids = bt.filelist_load(patient_uids) #patient_temp = patient_uids[int(len(patient_uids)/10.0+0.5)*3:] #random.shuffle(patient_temp) #patient_uids[int(len(patient_uids)/10.0+0.5)*3:] = patient_temp if config['filelist_shuffle']: random.shuffle(patient_uids) bt.filelist_store(patient_uids, save_path + '/patientlist.log') patient_folds, folddict = bt.foldlist(patient_uids, config['num_cross_folds'], { 'val': config['val_fold'], 'test': config['test_fold'] }) filelist_overall = [] filelists = {} for setname in patient_folds.keys(): filelists[setname] = [] for filelist in filepaths: if os.path.isfile(filelist): files = bt.filelist_load(filelist) else: files = os.listdir(filelist) for f in range(len(files)): files[f] = filelist + '/' + files[f] #files = glob.glob(filelist+'/*.%s' %(fileext)) filelist_overall.extend(files) if 'lidc' in filelist and 'remove_uncertain' in config.keys( ) and config['remove_uncertain']: filelist_overall = lt.filelist_remove_uncertain( filelist_overall) if easy_eliminate_filelist is not None: filelist_overall = bt.filelist_eliminate(filelist_overall, easy_eliminate_filelist) if config['filelist_shuffle']: random.shuffle(filelist_overall) for file in filelist_overall: filename_split = os.path.splitext( os.path.basename(file))[0].split('_') #if 'label_choice' in config.keys() and filename_split[-1]=='annotation' and config['label_choice']!=filename_split[2]: continue patient_uid = filename_split[0] for setname in patient_folds.keys(): if patient_uid in patient_folds[setname]: filelists[setname].append(file) for setname in patient_folds.keys(): bt.filelist_store( filelists[setname], save_path + '/filelist_' + setname + '_fold' + str(folddict[setname]) + '.log') bt.filelist_store(filelist_overall, save_path + '/filelist.log') print("filelist generated") ''' filelists = get_filelists_patientwise(patient_uids, filepaths, fileext='npy', config=config) for setname in filelists.keys(): if easy_eliminate_filelist is not None: filelists[setname] = bt.filelist_eliminate(filelists[setname], easy_eliminate_filelist) ''' return filelists
def get_filelists_patientwise(patient_uids=None, filelists=None, fileext='npy', datasetidx='', config={}): save_path = config['model_root'] + '/' + config['env'] if filelists is None: filelistdict = {} filelistdict['train'] = bt.filelist_load( config['filelists' + str(datasetidx)]['train']) filelistdict['val'] = bt.filelist_load(config['filelists' + str(datasetidx)]['val']) bt.filelist_store(filelistdict['train'], save_path + '/filelist_train.log') bt.filelist_store(filelistdict['val'], save_path + '/filelist_val.log') if 'test' in config['filelists' + str(datasetidx)].keys(): filelistdict['test'] = bt.filelist_load( config['filelists' + str(datasetidx)]['test']) bt.filelist_store(filelistdict['test'], save_path + '/filelist_test.log') print("filelist loaded") else: #filelists=["/home/fyl/datasets/luna_64/train", "/home/fyl/datasets/luna_64/test", "/home/fyl/datasets/npy_non_set"] #filelist = [] if patient_uids is None: patient_uids = [] for filelist in filelists: files = glob.glob(filelist + '/*.' + fileext) #filelist.extend(files) for file in files: filename = os.path.basename(file) filenamenoext = os.path.splitext(filename)[0] fileinfos = filenamenoext.split('_') patient_uid = fileinfos[0] if patient_uid not in patient_uids: patient_uids.append(patient_uid) elif type(patient_uids) == str: patient_uids = bt.filelist_load(patient_uids) if config['filelist_shuffle']: random.shuffle(patient_uids) bt.filelist_store( patient_uids, save_path + '/patientlist' + str(datasetidx) + '.log') #bt.filelist_store('luna16samplelist.log', filelist) patient_folds, folddict = bt.foldlist( patient_uids, config['num_cross_folds'], { 'val': config['val_fold' + str(datasetidx)], 'test': config['test_fold' + str(datasetidx)] }) filelist_overall = [] filelistdict = {} for setname in patient_folds.keys(): filelistdict[setname] = [] ''' print("filelist {} generating" .format(setname)) for patient_uid in tqdm(patient_folds[setname]): for filelist in filelists: if os.path.isfile(filelist): files = bt.filelist_load(filelist) else: files = glob.glob(filelist+'/%s*.%s' %(patient_uid, fileext)) if 'lidc' in filelist and hasattr(opt, 'remove_uncertain') and opt.remove_uncertain: files = lt.filelist_remove_uncertain(files) filelist_overall.extend(files) filelistdict[setname].extend(files) bt.filelist_store(filelistdict[setname], save_path+'/filelist'+str(datasetidx)+'_'+setname+'_fold'+str(folddict[setname])+'.log') ''' for filelist in filelists: if os.path.isfile(filelist): files = bt.filelist_load(filelist) else: files = glob.glob(filelist + '/*.%s' % (fileext)) filelist_overall.extend(files) if 'lidc' in filelist and 'remove_uncertain' in config.keys( ) and config['remove_uncertain']: filelist_overall = lt.filelist_remove_uncertain( filelist_overall) if config['filelist_shuffle']: random.shuffle(filelist_overall) for file in filelist_overall: patient_uid = os.path.basename(file).split('_')[0] for setname in patient_folds.keys(): if patient_uid in patient_folds[setname]: filelistdict[setname].append(file) for setname in patient_folds.keys(): bt.filelist_store( filelistdict[setname], save_path + '/filelist' + str(datasetidx) + '_' + setname + '_fold' + str(folddict[setname]) + '.log') bt.filelist_store(filelist_overall, save_path + '/filelist' + str(datasetidx) + '.log') print("filelist generated") return filelistdict