def identify_images(zip_file): """Interactively identify images from a folder, writing the labels to an array for later training""" with TemporaryZipDirectory(zip_file) as zfiles: filepaths = get_files(zfiles, is_dicom) feature_array = np.zeros((len(filepaths), 10000), dtype=np.float32) labels = np.zeros(len(filepaths)) split_val = 25 length = len(filepaths) rounds = int(math.ceil(length / split_val)) for n in range(rounds): fig, axes = plt.subplots(5, 5) for axis, (idx, fp) in zip(axes.flatten(), enumerate(filepaths[split_val*n:split_val*(n+1)])): img = process_image(fp) plt.sca(axis) plt.imshow(img.array, cmap=plt.cm.Greys) plt.axis('off') plt.title(idx+split_val*n) plt.show() not_done = True while not_done: label = input("Input the HU indices sequentially, one at a time. Type 'done' when finished:") if label == 'done': not_done = False else: labels[int(label)] = 1 # labels = np.array(labels) for idx, fp in enumerate(filepaths): feature_array[idx, :] = process_image(fp) # scaled_features = preprocessing.minmax_scale(feature_array, axis=1) scaled_features = feature_array dir2write = osp.dirname(zip_file) np.save(osp.join(dir2write, 'images_' + osp.splitext(osp.basename(zip_file))[0]), scaled_features) np.save(osp.join(dir2write, 'labels_' + osp.splitext(osp.basename(zip_file))[0]), labels)
def identify_images(folder, np_name=None, drop_non_dicoms=True): if drop_non_dicoms: drop_non_dicom(folder) print("Obtaining file paths...") filepaths = get_files(folder, is_dicom) print(len(filepaths), "found") feature_array = np.zeros((len(filepaths), 10000), dtype=np.float32) labels = np.zeros(len(filepaths)) for idx, path in enumerate(filepaths): # try to automatically identify good_names = ('pf', 'vmat', '90', '270', '180', 'ra', 't1', 'picket') bad_names = ('open', 't2', 't3', 'speed', 'dose', 'rate', 'drgs', 'mlcs', 'jaw', 'coll', 'strip') auto_found = False basepath = osp.basename(path).lower() for name in good_names: if name in basepath: label_str = 1 auto_found = True break for name in bad_names: if name in basepath: label_str = 0 auto_found = True break if not auto_found: img = process_image(path).reshape(100, 100) plt.imshow(img, cmap=plt.cm.viridis) plt.axis('off') plt.title(osp.basename(path)) plt.show() input_invalid = True break_out = False while input_invalid: label_str = input( "Input the classification: 1 if a PF field, 0 otherwise. Enter 'done' to finish: " ) if label_str == 'done': break_out = True input_invalid = False try: int(label_str) except: pass else: if int(label_str) in [0, 1]: input_invalid = False if break_out: break labels[idx] = int(label_str) feature_array[idx, :] = process_image(path) # trim feature array early if stopped early feature_array = feature_array[:idx, :] labels = labels[:idx] # scaled_features = preprocessing.minmax_scale(feature_array, axis=1) scaled_features = feature_array if np_name is None: np_name = osp.basename(folder) np.save(osp.join(data_dir, 'images_' + np_name + '_pf'), scaled_features) np.save(osp.join(data_dir, 'labels_' + np_name + '_pf'), labels)
def identify_images(zip_file): """Interactively identify images from a folder, writing the labels to an array for later training""" with TemporaryZipDirectory(zip_file) as zfiles: filepaths = get_files(zfiles, is_dicom) labels = np.zeros(len(filepaths)) split_val = 25 length = len(filepaths) rounds = int(math.ceil(length / split_val)) for n in range(rounds): fig, axes = plt.subplots(5, 5, figsize=(10, 10)) for axis, (idx, fp) in zip(axes.flatten(), enumerate(filepaths[split_val*n:split_val*(n+1)])): img = load(fp) plt.sca(axis) plt.imshow(img, cmap=plt.cm.Greys) plt.axis('off') plt.title(idx+split_val*n) plt.show() not_done = True while not_done: label = input("Input the HU indices as a number or range. E.g. '66' or '25-47'. Type 'done' when finished:") if label == 'done': not_done = False else: items = label.split('-') if len(items) > 1: labels[int(items[0]):int(items[1])] = 1 else: labels[int(items[0])] = 1 scaled_features = np.zeros((len(filepaths), 10000), dtype=np.float32) for idx, fp in enumerate(filepaths): scaled_features[idx, :] = process_image(fp) dir2write = osp.dirname(zip_file) np.save(osp.join(dir2write, 'images_' + osp.splitext(osp.basename(zip_file))[0]), scaled_features) np.save(osp.join(dir2write, 'labels_' + osp.splitext(osp.basename(zip_file))[0]), labels) os.remove(zip_file)
def identify_images(folder, name=None, drop_non_dicoms=True): if drop_non_dicoms: drop_non_dicom(folder) print("Obtaining file paths...") filepaths = get_files(folder, is_dicom) print(len(filepaths), "found") feature_array = np.zeros((len(filepaths), 10000), dtype=np.float32) labels = np.zeros(len(filepaths)) for idx, path in enumerate(filepaths): # try to automatically identify basepath = osp.basename(path).lower() if ('pf' in basepath) or ('vmat' in basepath): label_str = 0 print(path, "Labeled", label_str) elif 'open' in basepath: label_str = 1 print(path, "Labeled", label_str) elif 'mlc' in basepath: label_str = 3 print(path, "Labeled", label_str) elif ('drgs' in basepath) or (('dr' in basepath) and ('gs' in basepath)) or ('gantry' in basepath): label_str = 2 print(path, "Labeled", label_str) else: img = process_image(path).reshape(100, 100) plt.imshow(img, cmap=plt.cm.jet) plt.axis('off') plt.title(osp.basename(path)) plt.show() input_invalid = True break_out = False while input_invalid: label_str = input( "Input the classification: 0 if not a VMAT field, 1 if open field, 2 if drgs, 3 if mlcs. 'done' to finish: " ) if label_str == 'done': break_out = True input_invalid = False elif int(label_str) in [0, 1, 2, 3]: input_invalid = False if break_out: break labels[idx] = int(label_str) feature_array[idx, :] = process_image(path) # trim feature array early if stopped early feature_array = feature_array[:idx, :] labels = labels[:idx] # scaled_features = preprocessing.minmax_scale(feature_array, axis=1) scaled_features = feature_array if name is None: name = osp.basename(folder) np.save(osp.join(data_dir, 'images_' + name + '_vmat'), scaled_features) np.save(osp.join(data_dir, 'labels_' + name + '_vmat'), labels)
def identify_images(folder, name=None, drop_non_dicoms=True): if drop_non_dicoms: drop_non_dicom(folder) print("Obtaining file paths...") filepaths = get_files(folder, is_dicom) print(len(filepaths), "found") feature_array = np.zeros((len(filepaths), 10000), dtype=np.float32) labels = np.zeros(len(filepaths)) for idx, path in enumerate(filepaths): # try to automatically identify basepath = osp.basename(path).lower() if ('pf' in basepath) or ('vmat' in basepath): label_str = 0 print(path, "Labeled", label_str) elif 'open' in basepath: label_str = 1 print(path, "Labeled", label_str) elif 'mlc' in basepath: label_str = 3 print(path, "Labeled", label_str) elif ('drgs' in basepath) or (('dr' in basepath) and ('gs' in basepath)) or ('gantry' in basepath): label_str = 2 print(path, "Labeled", label_str) else: img = process_image(path).reshape(100, 100) plt.imshow(img, cmap=plt.cm.jet) plt.axis('off') plt.title(osp.basename(path)) plt.show() input_invalid = True break_out = False while input_invalid: label_str = input("Input the classification: 0 if not a VMAT field, 1 if open field, 2 if drgs, 3 if mlcs. 'done' to finish: ") if label_str == 'done': break_out = True input_invalid = False elif int(label_str) in [0, 1, 2, 3]: input_invalid = False if break_out: break labels[idx] = int(label_str) feature_array[idx, :] = process_image(path) # trim feature array early if stopped early feature_array = feature_array[:idx, :] labels = labels[:idx] # scaled_features = preprocessing.minmax_scale(feature_array, axis=1) scaled_features = feature_array if name is None: name = osp.basename(folder) np.save(osp.join(data_dir, 'images_' + name + '_vmat'), scaled_features) np.save(osp.join(data_dir, 'labels_' + name + '_vmat'), labels)
def identify_images(zip_file): """Interactively identify images from a folder, writing the labels to an array for later training""" with TemporaryZipDirectory(zip_file) as zfiles: filepaths = get_files(zfiles, is_dicom) labels = np.zeros(len(filepaths)) split_val = 25 length = len(filepaths) rounds = int(math.ceil(length / split_val)) for n in range(rounds): fig, axes = plt.subplots(5, 5, figsize=(10, 10)) for axis, (idx, fp) in zip( axes.flatten(), enumerate(filepaths[split_val * n:split_val * (n + 1)])): img = load(fp) plt.sca(axis) plt.imshow(img, cmap=plt.cm.Greys) plt.axis('off') plt.title(idx + split_val * n) plt.show() not_done = True while not_done: label = input( "Input the HU indices as a number or range. E.g. '66' or '25-47'. Type 'done' when finished:" ) if label == 'done': not_done = False else: items = label.split('-') if len(items) > 1: labels[int(items[0]):int(items[1])] = 1 else: labels[int(items[0])] = 1 scaled_features = np.zeros((len(filepaths), 10000), dtype=np.float32) for idx, fp in enumerate(filepaths): scaled_features[idx, :] = process_image(fp) dir2write = osp.dirname(zip_file) np.save( osp.join(dir2write, 'images_' + osp.splitext(osp.basename(zip_file))[0]), scaled_features) np.save( osp.join(dir2write, 'labels_' + osp.splitext(osp.basename(zip_file))[0]), labels) os.remove(zip_file)
def build_images(use_pool=True): """Completely load, resize, and save the images for training. Main function.""" # get image file paths for each image type path_stub = r'D:\Users\James\Dropbox\Programming\Python\Projects\pylinac test files' # fetch path filenames pf_files = get_files(osp.join(path_stub, 'Picket Fences'), is_dicom, use_pool=True) pipspro_files = get_files(osp.join(path_stub, '2D Image quality phantoms', 'QC-3'), is_dicom) leeds_files = get_files(osp.join(path_stub, '2D Image quality phantoms', 'Leeds'), is_dicom) star_files = get_files(osp.join(path_stub, 'Starshots'), image.is_image, use_pool=True) wl_files = get_files(osp.join(path_stub, 'Winston-Lutz'), is_dicom, use_pool=True) # vmat_files = get_files(osp.join(path_stub, 'VMATs'), is_dicom, use_pool=True) lv_files = get_files(osp.join(path_stub, '2D Image quality phantoms', 'Las Vegas'), is_dicom) filepaths = pf_files + pipspro_files + leeds_files + star_files + wl_files + lv_files print("{} total training files found".format(len(filepaths))) # generate label data pf_labels = np.repeat(1, len(pf_files)) pp_labels = np.repeat(2, len(pipspro_files)) leeds_labels = np.repeat(3, len(leeds_files)) star_labels = np.repeat(4, len(star_files)) wl_labels = np.repeat(5, len(wl_files)) # vmat_labels = np.repeat(6, len(vmat_files)) lv_labels = np.repeat(6, len(lv_files)) all_labels = np.concatenate((pf_labels, pp_labels, leeds_labels, star_labels, wl_labels, lv_labels)) # preallocate total_array = np.zeros((len(filepaths), 10000), dtype=np.float32) print("Training array preallocated") # resize each image and add to a training array start = time.time() if use_pool: futures = {} with concurrent.futures.ProcessPoolExecutor() as exec: for idx, path in enumerate(filepaths): future = exec.submit(process_image, path) futures[future] = idx for idx, future in enumerate(concurrent.futures.as_completed(futures)): total_array[futures[future], :] = future.result() else: for idx, path in enumerate(filepaths): future = process_image(path) total_array[idx, :] = future.result() print("Training array scaled/processed in {:.2f}s".format(time.time() - start)) # save arrays to disk for future use np.save(osp.join(osp.dirname(osp.abspath(__file__)), 'data', 'images'), total_array) np.save(osp.join(osp.dirname(osp.abspath(__file__)), 'data', 'labels'), all_labels) print("Images/labels written to disk")
"Input the HU indices as a number or range. E.g. '66' or '25-47'. Type 'done' when finished:" ) if label == 'done': not_done = False else: items = label.split('-') if len(items) > 1: labels[int(items[0]):int(items[1])] = 1 else: labels[int(items[0])] = 1 scaled_features = np.zeros((len(filepaths), 10000), dtype=np.float32) for idx, fp in enumerate(filepaths): scaled_features[idx, :] = process_image(fp) dir2write = osp.dirname(zip_file) np.save( osp.join(dir2write, 'images_' + osp.splitext(osp.basename(zip_file))[0]), scaled_features) np.save( osp.join(dir2write, 'labels_' + osp.splitext(osp.basename(zip_file))[0]), labels) os.remove(zip_file) if __name__ == '__main__': data_dir = osp.join(osp.dirname(osp.abspath(__file__)), 'data', 'CatPhan 600') zsets = get_files(data_dir, func=lambda x: x.endswith('.zip')) for zset in zsets: identify_images(zset)
def build_images(use_pool=True): """Completely load, resize, and save the images for training. Main function.""" # get image file paths for each image type path_stub = r'C:\Users\James\Dropbox\Programming\Python\Projects\pylinac test files' # fetch path filenames pf_files = get_files(osp.join(path_stub, 'Picket Fences'), is_dicom, use_pool=True) pipspro_files = get_files( osp.join(path_stub, '2D Image quality phantoms', 'PipsPro'), is_dicom) leeds_files = get_files( osp.join(path_stub, '2D Image quality phantoms', 'Leeds'), is_dicom) star_files = get_files(osp.join(path_stub, 'Starshots'), image.is_image, use_pool=True) wl_files = get_files(osp.join(path_stub, 'Winston-Lutz'), is_dicom, use_pool=True) vmat_files = get_files(osp.join(path_stub, 'VMATs'), is_dicom, use_pool=True) lv_files = get_files( osp.join(path_stub, '2D Image quality phantoms', 'Las Vegas'), is_dicom) filepaths = pf_files + pipspro_files + leeds_files + star_files + wl_files + vmat_files + lv_files print("{} total training files found".format(len(filepaths))) # generate label data pf_labels = np.repeat(1, len(pf_files)) pp_labels = np.repeat(2, len(pipspro_files)) leeds_labels = np.repeat(3, len(leeds_files)) star_labels = np.repeat(4, len(star_files)) wl_labels = np.repeat(0, len(wl_files)) vmat_labels = np.repeat(0, len(vmat_files)) lv_labels = np.repeat(0, len(lv_files)) all_labels = np.concatenate( (pf_labels, pp_labels, leeds_labels, star_labels, wl_labels, vmat_labels, lv_labels)) # preallocate total_array = np.zeros((len(filepaths), 10000), dtype=np.float32) print("Training array preallocated") # resize each image and add to a training array start = time.time() if use_pool: futures = {} with concurrent.futures.ProcessPoolExecutor() as exec: for idx, path in enumerate(filepaths): future = exec.submit(process_image, path) futures[future] = idx for idx, future in enumerate(concurrent.futures.as_completed(futures)): total_array[futures[future], :] = future.result() else: for idx, path in enumerate(filepaths): future = process_image(path) total_array[idx, :] = future.result() print("Training array set in {:.2f}s".format(time.time() - start)) # feature scale the images # scaled_array = preprocessing.minmax_scale(total_array, feature_range=(0, 1), axis=1) scaled_array = total_array print("Training array scaled") # save arrays to disk for future use np.save(osp.join(osp.dirname(osp.abspath(__file__)), 'data', 'images'), scaled_array) np.save(osp.join(osp.dirname(osp.abspath(__file__)), 'data', 'labels'), all_labels) print("Images build")
plt.sca(axis) plt.imshow(img, cmap=plt.cm.Greys) plt.axis('off') plt.title(idx+split_val*n) plt.show() not_done = True while not_done: label = input("Input the HU indices as a number or range. E.g. '66' or '25-47'. Type 'done' when finished:") if label == 'done': not_done = False else: items = label.split('-') if len(items) > 1: labels[int(items[0]):int(items[1])] = 1 else: labels[int(items[0])] = 1 scaled_features = np.zeros((len(filepaths), 10000), dtype=np.float32) for idx, fp in enumerate(filepaths): scaled_features[idx, :] = process_image(fp) dir2write = osp.dirname(zip_file) np.save(osp.join(dir2write, 'images_' + osp.splitext(osp.basename(zip_file))[0]), scaled_features) np.save(osp.join(dir2write, 'labels_' + osp.splitext(osp.basename(zip_file))[0]), labels) os.remove(zip_file) if __name__ == '__main__': data_dir = osp.join(osp.dirname(osp.abspath(__file__)), 'data', 'CatPhan 600') zsets = get_files(data_dir, func=lambda x: x.endswith('.zip')) for zset in zsets: identify_images(zset)