def custom_crop_last(slide_, y, batch_to_predict, valid_bit_matrix, x_max): batch_to_predict[y] = [] valid_bit_matrix[y] = [] for x in range(0, x_max - 1): image_crop = slide.read_slide_crop( slide_, x * CROP_SIZE, slide.get_slide_size(slide_)[1] * CROP_SIZE).convert('RGB') resized_crop = slide.resize_image_a(image_crop, 224, 224) resized_crop_np = np.asarray(resized_crop, np.float32) if filter.check_valid(resized_crop_np): valid_bit_matrix[y].append(1) else: valid_bit_matrix[y].append(0) batch_to_predict[y].append(resized_crop_np) image_crop = slide.read_slide_crop( slide_, slide.get_slide_size(slide_)[0] - CROP_SIZE, slide.get_slide_size(slide_)[1] * CROP_SIZE).convert('RGB') resized_crop = slide.resize_image_a(image_crop, 224, 224) resized_crop_np = np.asarray(resized_crop, np.float32) if filter.check_valid(resized_crop_np): valid_bit_matrix[y].append(1) else: valid_bit_matrix[y].append(0) batch_to_predict[y].append(resized_crop_np)
def balance_dataset(x, y, p): cropped_dataset_folder = path.join(CROP_FOLDER, str(1120)) unique, counts = np.unique(y, return_counts=True) max_class_size = max(counts) for i in range(0, unique.shape[0]): if counts[i] < max_class_size: fileList = glob.glob( path.join(cropped_dataset_folder, "*" + CATEGORIES[unique[i]] + "*")) for j in range(0, max_class_size - counts[i]): if len(fileList) > 0: filename = random.choice(fileList) fileList.remove(filename) img_patient = os.path.basename(filename).split("_")[0] img_class = unique[i] img = Image.open(filename).convert('RGB') img_array = np.asarray(img, np.uint8) if filter.check_valid(img_array): x.append(img_array) y.append(img_class) p.append(img_patient) else: log.print_warning("Img " + filename + " not loaded: too much white") j = j - 1 else: log.print_warning("No more available images for class " + CATEGORIES[unique[i]]) break return x, y, p
def load_datasets(*sampSizes): x = [] y = [] p = [] for ss in sampSizes: log.print_debug("Opening Cropped dataset " + str(ss)) cropped_dataset_folder = path.join(CROP_FOLDER, str(ss)) for filename in os.listdir(cropped_dataset_folder): try: img_path = path.join(cropped_dataset_folder, filename) img_patient = filename.split("_")[0] img_class = CATEGORIES.index(str(filename.split("_")[1])) img = Image.open(img_path).convert('RGB') img_array = np.asarray(img, np.uint8) if filter.check_valid(img_array): to_append_img = np.asarray( img.resize((int(INPUT_SIZE), int(INPUT_SIZE)), Image.LANCZOS)) x.append(to_append_img) y.append(img_class) p.append(img_patient) else: log.print_warning("Img " + filename + " not loaded: too much white") except Exception as e: log.print_error("Cannot load image " + filename) return x, y, p
def custom_crop(slide_, y, batch_to_predict, valid_bit_list, x_max, crop_size): for x in range(0, x_max - 1): image_crop = slide.read_slide_crop(slide_, x * crop_size, y * crop_size, crop_size).convert('RGB') resized_crop = slide.resize_image_a(image_crop, 224, 224) resized_crop_np = np.asarray(resized_crop, np.float32) if filter.check_valid(resized_crop_np): valid_bit_list[y * x_max + x] = 1 else: valid_bit_list[y * x_max + x] = 0 batch_to_predict[y * x_max + x] = resized_crop_np image_crop = slide.read_slide_crop( slide_, slide.get_slide_size(slide_)[0] - crop_size, y * crop_size, crop_size).convert('RGB') resized_crop = slide.resize_image_a(image_crop, 224, 224) resized_crop_np = np.asarray(resized_crop, np.float32) if filter.check_valid(resized_crop_np): valid_bit_list[y * x_max + x_max - 1] = 1 else: valid_bit_list[y * x_max + x_max - 1] = 0 batch_to_predict[y * x_max + x_max - 1] = resized_crop_np
def balance_set(x, y, in_set_patients): log.print_debug("Balancing dataset") cropped_dataset_folder = path.join(CROP_FOLDER, str(1120)) x_list = x.tolist() unique, counts = np.unique(y, return_counts=True) max_class_size = max(counts) for i in range(0, unique.shape[0]): if counts[i] < max_class_size: file_list = glob.glob( path.join(cropped_dataset_folder, "*" + CATEGORIES[unique[i]] + "*")) cleaned_file_list = file_list.copy() for filename in cleaned_file_list: img_patient = os.path.basename(filename).split("_")[0] if img_patient in in_set_patients: cleaned_file_list.remove(filename) images_to_add = max_class_size - counts[i] for j in range(0, max_class_size - counts[i]): if len(cleaned_file_list) > 0: filename = random.choice(cleaned_file_list) cleaned_file_list.remove(filename) img_class = unique[i] img = Image.open(filename).convert('RGB') img_array = np.asarray(img, np.uint8) if filter.check_valid(img_array): to_append_img = np.asarray( img.resize((int(INPUT_SIZE), int(INPUT_SIZE)), Image.LANCZOS)) x_list.append(to_append_img) y = np.append(y, img_class) images_to_add = images_to_add - 1 #log.print_debug("Img " + filename + " added to set. " + str( images_to_add ) + " images to go.") else: log.print_warning("Img " + filename + " not loaded: too much white") continue else: log.print_warning("No more available images for class " + CATEGORIES[unique[i]]) break return np.asarray(x_list), y