def identify_images(zip_file):
    """Interactively identify images from a folder, writing the labels to an array for later training"""
    with TemporaryZipDirectory(zip_file) as zfiles:
        filepaths = get_files(zfiles, is_dicom)
        feature_array = np.zeros((len(filepaths), 10000), dtype=np.float32)
        labels = np.zeros(len(filepaths))
        split_val = 25
        length = len(filepaths)
        rounds = int(math.ceil(length / split_val))
        for n in range(rounds):
            fig, axes = plt.subplots(5, 5)
            for axis, (idx, fp) in zip(axes.flatten(), enumerate(filepaths[split_val*n:split_val*(n+1)])):
                img = process_image(fp)
                plt.sca(axis)
                plt.imshow(img.array, cmap=plt.cm.Greys)
                plt.axis('off')
                plt.title(idx+split_val*n)
            plt.show()
        not_done = True
        while not_done:
            label = input("Input the HU indices sequentially, one at a time. Type 'done' when finished:")
            if label == 'done':
                not_done = False
            else:
                labels[int(label)] = 1
        # labels = np.array(labels)
        for idx, fp in enumerate(filepaths):
            feature_array[idx, :] = process_image(fp)
        # scaled_features = preprocessing.minmax_scale(feature_array, axis=1)
        scaled_features = feature_array
        dir2write = osp.dirname(zip_file)
        np.save(osp.join(dir2write, 'images_' + osp.splitext(osp.basename(zip_file))[0]), scaled_features)
        np.save(osp.join(dir2write, 'labels_' + osp.splitext(osp.basename(zip_file))[0]), labels)
def identify_images(folder, np_name=None, drop_non_dicoms=True):
    if drop_non_dicoms:
        drop_non_dicom(folder)
    print("Obtaining file paths...")
    filepaths = get_files(folder, is_dicom)
    print(len(filepaths), "found")
    feature_array = np.zeros((len(filepaths), 10000), dtype=np.float32)
    labels = np.zeros(len(filepaths))
    for idx, path in enumerate(filepaths):
        # try to automatically identify
        good_names = ('pf', 'vmat', '90', '270', '180', 'ra', 't1', 'picket')
        bad_names = ('open', 't2', 't3', 'speed', 'dose', 'rate', 'drgs',
                     'mlcs', 'jaw', 'coll', 'strip')
        auto_found = False
        basepath = osp.basename(path).lower()
        for name in good_names:
            if name in basepath:
                label_str = 1
                auto_found = True
                break
        for name in bad_names:
            if name in basepath:
                label_str = 0
                auto_found = True
                break
        if not auto_found:
            img = process_image(path).reshape(100, 100)
            plt.imshow(img, cmap=plt.cm.viridis)
            plt.axis('off')
            plt.title(osp.basename(path))
            plt.show()
            input_invalid = True
            break_out = False
            while input_invalid:
                label_str = input(
                    "Input the classification: 1 if a PF field, 0 otherwise. Enter 'done' to finish: "
                )
                if label_str == 'done':
                    break_out = True
                    input_invalid = False
                try:
                    int(label_str)
                except:
                    pass
                else:
                    if int(label_str) in [0, 1]:
                        input_invalid = False
            if break_out:
                break
        labels[idx] = int(label_str)
        feature_array[idx, :] = process_image(path)
    # trim feature array early if stopped early
    feature_array = feature_array[:idx, :]
    labels = labels[:idx]
    # scaled_features = preprocessing.minmax_scale(feature_array, axis=1)
    scaled_features = feature_array
    if np_name is None:
        np_name = osp.basename(folder)
    np.save(osp.join(data_dir, 'images_' + np_name + '_pf'), scaled_features)
    np.save(osp.join(data_dir, 'labels_' + np_name + '_pf'), labels)
Exemple #3
0
def identify_images(zip_file):
    """Interactively identify images from a folder, writing the labels to an array for later training"""
    with TemporaryZipDirectory(zip_file) as zfiles:
        filepaths = get_files(zfiles, is_dicom)
        labels = np.zeros(len(filepaths))
        split_val = 25
        length = len(filepaths)
        rounds = int(math.ceil(length / split_val))
        for n in range(rounds):
            fig, axes = plt.subplots(5, 5, figsize=(10, 10))
            for axis, (idx, fp) in zip(axes.flatten(), enumerate(filepaths[split_val*n:split_val*(n+1)])):
                img = load(fp)
                plt.sca(axis)
                plt.imshow(img, cmap=plt.cm.Greys)
                plt.axis('off')
                plt.title(idx+split_val*n)
            plt.show()
        not_done = True
        while not_done:
            label = input("Input the HU indices as a number or range. E.g. '66' or '25-47'. Type 'done' when finished:")
            if label == 'done':
                not_done = False
            else:
                items = label.split('-')
                if len(items) > 1:
                    labels[int(items[0]):int(items[1])] = 1
                else:
                    labels[int(items[0])] = 1
        scaled_features = np.zeros((len(filepaths), 10000), dtype=np.float32)
        for idx, fp in enumerate(filepaths):
            scaled_features[idx, :] = process_image(fp)
    dir2write = osp.dirname(zip_file)
    np.save(osp.join(dir2write, 'images_' + osp.splitext(osp.basename(zip_file))[0]), scaled_features)
    np.save(osp.join(dir2write, 'labels_' + osp.splitext(osp.basename(zip_file))[0]), labels)
    os.remove(zip_file)
def identify_images(folder, name=None, drop_non_dicoms=True):
    if drop_non_dicoms:
        drop_non_dicom(folder)
    print("Obtaining file paths...")
    filepaths = get_files(folder, is_dicom)
    print(len(filepaths), "found")
    feature_array = np.zeros((len(filepaths), 10000), dtype=np.float32)
    labels = np.zeros(len(filepaths))
    for idx, path in enumerate(filepaths):
        # try to automatically identify
        basepath = osp.basename(path).lower()
        if ('pf' in basepath) or ('vmat' in basepath):
            label_str = 0
            print(path, "Labeled", label_str)
        elif 'open' in basepath:
            label_str = 1
            print(path, "Labeled", label_str)
        elif 'mlc' in basepath:
            label_str = 3
            print(path, "Labeled", label_str)
        elif ('drgs' in basepath) or (('dr' in basepath) and
                                      ('gs' in basepath)) or ('gantry'
                                                              in basepath):
            label_str = 2
            print(path, "Labeled", label_str)
        else:
            img = process_image(path).reshape(100, 100)
            plt.imshow(img, cmap=plt.cm.jet)
            plt.axis('off')
            plt.title(osp.basename(path))
            plt.show()
            input_invalid = True
            break_out = False
            while input_invalid:
                label_str = input(
                    "Input the classification: 0 if not a VMAT field, 1 if open field, 2 if drgs, 3 if mlcs. 'done' to finish: "
                )
                if label_str == 'done':
                    break_out = True
                    input_invalid = False
                elif int(label_str) in [0, 1, 2, 3]:
                    input_invalid = False
            if break_out:
                break
        labels[idx] = int(label_str)
        feature_array[idx, :] = process_image(path)
    # trim feature array early if stopped early
    feature_array = feature_array[:idx, :]
    labels = labels[:idx]
    # scaled_features = preprocessing.minmax_scale(feature_array, axis=1)
    scaled_features = feature_array
    if name is None:
        name = osp.basename(folder)
    np.save(osp.join(data_dir, 'images_' + name + '_vmat'), scaled_features)
    np.save(osp.join(data_dir, 'labels_' + name + '_vmat'), labels)
Exemple #5
0
def identify_images(folder, name=None, drop_non_dicoms=True):
    if drop_non_dicoms:
        drop_non_dicom(folder)
    print("Obtaining file paths...")
    filepaths = get_files(folder, is_dicom)
    print(len(filepaths), "found")
    feature_array = np.zeros((len(filepaths), 10000), dtype=np.float32)
    labels = np.zeros(len(filepaths))
    for idx, path in enumerate(filepaths):
        # try to automatically identify
        basepath = osp.basename(path).lower()
        if ('pf' in basepath) or ('vmat' in basepath):
            label_str = 0
            print(path, "Labeled", label_str)
        elif 'open' in basepath:
            label_str = 1
            print(path, "Labeled", label_str)
        elif 'mlc' in basepath:
            label_str = 3
            print(path, "Labeled", label_str)
        elif ('drgs' in basepath) or (('dr' in basepath) and ('gs' in basepath)) or ('gantry' in basepath):
            label_str = 2
            print(path, "Labeled", label_str)
        else:
            img = process_image(path).reshape(100, 100)
            plt.imshow(img, cmap=plt.cm.jet)
            plt.axis('off')
            plt.title(osp.basename(path))
            plt.show()
            input_invalid = True
            break_out = False
            while input_invalid:
                label_str = input("Input the classification: 0 if not a VMAT field, 1 if open field, 2 if drgs, 3 if mlcs. 'done' to finish: ")
                if label_str == 'done':
                    break_out = True
                    input_invalid = False
                elif int(label_str) in [0, 1, 2, 3]:
                    input_invalid = False
            if break_out:
                break
        labels[idx] = int(label_str)
        feature_array[idx, :] = process_image(path)
    # trim feature array early if stopped early
    feature_array = feature_array[:idx, :]
    labels = labels[:idx]
    # scaled_features = preprocessing.minmax_scale(feature_array, axis=1)
    scaled_features = feature_array
    if name is None:
        name = osp.basename(folder)
    np.save(osp.join(data_dir, 'images_' + name + '_vmat'), scaled_features)
    np.save(osp.join(data_dir, 'labels_' + name + '_vmat'), labels)
Exemple #6
0
def identify_images(zip_file):
    """Interactively identify images from a folder, writing the labels to an array for later training"""
    with TemporaryZipDirectory(zip_file) as zfiles:
        filepaths = get_files(zfiles, is_dicom)
        labels = np.zeros(len(filepaths))
        split_val = 25
        length = len(filepaths)
        rounds = int(math.ceil(length / split_val))
        for n in range(rounds):
            fig, axes = plt.subplots(5, 5, figsize=(10, 10))
            for axis, (idx, fp) in zip(
                    axes.flatten(),
                    enumerate(filepaths[split_val * n:split_val * (n + 1)])):
                img = load(fp)
                plt.sca(axis)
                plt.imshow(img, cmap=plt.cm.Greys)
                plt.axis('off')
                plt.title(idx + split_val * n)
            plt.show()
        not_done = True
        while not_done:
            label = input(
                "Input the HU indices as a number or range. E.g. '66' or '25-47'. Type 'done' when finished:"
            )
            if label == 'done':
                not_done = False
            else:
                items = label.split('-')
                if len(items) > 1:
                    labels[int(items[0]):int(items[1])] = 1
                else:
                    labels[int(items[0])] = 1
        scaled_features = np.zeros((len(filepaths), 10000), dtype=np.float32)
        for idx, fp in enumerate(filepaths):
            scaled_features[idx, :] = process_image(fp)
    dir2write = osp.dirname(zip_file)
    np.save(
        osp.join(dir2write,
                 'images_' + osp.splitext(osp.basename(zip_file))[0]),
        scaled_features)
    np.save(
        osp.join(dir2write,
                 'labels_' + osp.splitext(osp.basename(zip_file))[0]), labels)
    os.remove(zip_file)
def build_images(use_pool=True):
    """Completely load, resize, and save the images for training. Main function."""
    # get image file paths for each image type
    path_stub = r'D:\Users\James\Dropbox\Programming\Python\Projects\pylinac test files'
    # fetch path filenames
    pf_files = get_files(osp.join(path_stub, 'Picket Fences'), is_dicom, use_pool=True)
    pipspro_files = get_files(osp.join(path_stub, '2D Image quality phantoms', 'QC-3'), is_dicom)
    leeds_files = get_files(osp.join(path_stub, '2D Image quality phantoms', 'Leeds'), is_dicom)
    star_files = get_files(osp.join(path_stub, 'Starshots'), image.is_image, use_pool=True)
    wl_files = get_files(osp.join(path_stub, 'Winston-Lutz'), is_dicom, use_pool=True)
    # vmat_files = get_files(osp.join(path_stub, 'VMATs'), is_dicom, use_pool=True)
    lv_files = get_files(osp.join(path_stub, '2D Image quality phantoms', 'Las Vegas'), is_dicom)
    filepaths = pf_files + pipspro_files + leeds_files + star_files + wl_files + lv_files
    print("{} total training files found".format(len(filepaths)))

    # generate label data
    pf_labels = np.repeat(1, len(pf_files))
    pp_labels = np.repeat(2, len(pipspro_files))
    leeds_labels = np.repeat(3, len(leeds_files))
    star_labels = np.repeat(4, len(star_files))
    wl_labels = np.repeat(5, len(wl_files))
    # vmat_labels = np.repeat(6, len(vmat_files))
    lv_labels = np.repeat(6, len(lv_files))
    all_labels = np.concatenate((pf_labels, pp_labels, leeds_labels, star_labels, wl_labels, lv_labels))

    # preallocate
    total_array = np.zeros((len(filepaths), 10000), dtype=np.float32)
    print("Training array preallocated")

    # resize each image and add to a training array
    start = time.time()
    if use_pool:
        futures = {}
        with concurrent.futures.ProcessPoolExecutor() as exec:
            for idx, path in enumerate(filepaths):
                future = exec.submit(process_image, path)
                futures[future] = idx
        for idx, future in enumerate(concurrent.futures.as_completed(futures)):
            total_array[futures[future], :] = future.result()
    else:
        for idx, path in enumerate(filepaths):
            future = process_image(path)
            total_array[idx, :] = future.result()
    print("Training array scaled/processed in {:.2f}s".format(time.time() - start))

    # save arrays to disk for future use
    np.save(osp.join(osp.dirname(osp.abspath(__file__)), 'data', 'images'), total_array)
    np.save(osp.join(osp.dirname(osp.abspath(__file__)), 'data', 'labels'), all_labels)
    print("Images/labels written to disk")
Exemple #8
0
                "Input the HU indices as a number or range. E.g. '66' or '25-47'. Type 'done' when finished:"
            )
            if label == 'done':
                not_done = False
            else:
                items = label.split('-')
                if len(items) > 1:
                    labels[int(items[0]):int(items[1])] = 1
                else:
                    labels[int(items[0])] = 1
        scaled_features = np.zeros((len(filepaths), 10000), dtype=np.float32)
        for idx, fp in enumerate(filepaths):
            scaled_features[idx, :] = process_image(fp)
    dir2write = osp.dirname(zip_file)
    np.save(
        osp.join(dir2write,
                 'images_' + osp.splitext(osp.basename(zip_file))[0]),
        scaled_features)
    np.save(
        osp.join(dir2write,
                 'labels_' + osp.splitext(osp.basename(zip_file))[0]), labels)
    os.remove(zip_file)


if __name__ == '__main__':
    data_dir = osp.join(osp.dirname(osp.abspath(__file__)), 'data',
                        'CatPhan 600')
    zsets = get_files(data_dir, func=lambda x: x.endswith('.zip'))
    for zset in zsets:
        identify_images(zset)
Exemple #9
0
def build_images(use_pool=True):
    """Completely load, resize, and save the images for training. Main function."""
    # get image file paths for each image type
    path_stub = r'C:\Users\James\Dropbox\Programming\Python\Projects\pylinac test files'
    # fetch path filenames
    pf_files = get_files(osp.join(path_stub, 'Picket Fences'),
                         is_dicom,
                         use_pool=True)
    pipspro_files = get_files(
        osp.join(path_stub, '2D Image quality phantoms', 'PipsPro'), is_dicom)
    leeds_files = get_files(
        osp.join(path_stub, '2D Image quality phantoms', 'Leeds'), is_dicom)
    star_files = get_files(osp.join(path_stub, 'Starshots'),
                           image.is_image,
                           use_pool=True)
    wl_files = get_files(osp.join(path_stub, 'Winston-Lutz'),
                         is_dicom,
                         use_pool=True)
    vmat_files = get_files(osp.join(path_stub, 'VMATs'),
                           is_dicom,
                           use_pool=True)
    lv_files = get_files(
        osp.join(path_stub, '2D Image quality phantoms', 'Las Vegas'),
        is_dicom)
    filepaths = pf_files + pipspro_files + leeds_files + star_files + wl_files + vmat_files + lv_files
    print("{} total training files found".format(len(filepaths)))

    # generate label data
    pf_labels = np.repeat(1, len(pf_files))
    pp_labels = np.repeat(2, len(pipspro_files))
    leeds_labels = np.repeat(3, len(leeds_files))
    star_labels = np.repeat(4, len(star_files))
    wl_labels = np.repeat(0, len(wl_files))
    vmat_labels = np.repeat(0, len(vmat_files))
    lv_labels = np.repeat(0, len(lv_files))
    all_labels = np.concatenate(
        (pf_labels, pp_labels, leeds_labels, star_labels, wl_labels,
         vmat_labels, lv_labels))

    # preallocate
    total_array = np.zeros((len(filepaths), 10000), dtype=np.float32)
    print("Training array preallocated")

    # resize each image and add to a training array
    start = time.time()
    if use_pool:
        futures = {}
        with concurrent.futures.ProcessPoolExecutor() as exec:
            for idx, path in enumerate(filepaths):
                future = exec.submit(process_image, path)
                futures[future] = idx
        for idx, future in enumerate(concurrent.futures.as_completed(futures)):
            total_array[futures[future], :] = future.result()
    else:
        for idx, path in enumerate(filepaths):
            future = process_image(path)
            total_array[idx, :] = future.result()
    print("Training array set in {:.2f}s".format(time.time() - start))

    # feature scale the images
    # scaled_array = preprocessing.minmax_scale(total_array, feature_range=(0, 1), axis=1)
    scaled_array = total_array
    print("Training array scaled")

    # save arrays to disk for future use
    np.save(osp.join(osp.dirname(osp.abspath(__file__)), 'data', 'images'),
            scaled_array)
    np.save(osp.join(osp.dirname(osp.abspath(__file__)), 'data', 'labels'),
            all_labels)
    print("Images build")
Exemple #10
0
                plt.sca(axis)
                plt.imshow(img, cmap=plt.cm.Greys)
                plt.axis('off')
                plt.title(idx+split_val*n)
            plt.show()
        not_done = True
        while not_done:
            label = input("Input the HU indices as a number or range. E.g. '66' or '25-47'. Type 'done' when finished:")
            if label == 'done':
                not_done = False
            else:
                items = label.split('-')
                if len(items) > 1:
                    labels[int(items[0]):int(items[1])] = 1
                else:
                    labels[int(items[0])] = 1
        scaled_features = np.zeros((len(filepaths), 10000), dtype=np.float32)
        for idx, fp in enumerate(filepaths):
            scaled_features[idx, :] = process_image(fp)
    dir2write = osp.dirname(zip_file)
    np.save(osp.join(dir2write, 'images_' + osp.splitext(osp.basename(zip_file))[0]), scaled_features)
    np.save(osp.join(dir2write, 'labels_' + osp.splitext(osp.basename(zip_file))[0]), labels)
    os.remove(zip_file)


if __name__ == '__main__':
    data_dir = osp.join(osp.dirname(osp.abspath(__file__)), 'data', 'CatPhan 600')
    zsets = get_files(data_dir, func=lambda x: x.endswith('.zip'))
    for zset in zsets:
        identify_images(zset)