def AnnoationRead(nodeInd, patientDir, level): Dir = '/media/groot/Seagate Backup Plus Drive/dataset' name = patientDir + '_node_' + str(nodeInd) mr_image = reader.open(Dir + '/' + patientDir + '/' + name + '.tif') annotation_list = mir.AnnotationList() xml_repository = mir.XmlRepository(annotation_list) xml_repository.setSource(Dir + '/' + patientDir + '/lesion_annotations/' + name + '.xml') xml_repository.load() annotation_mask = mir.AnnotationToMask() camelyon17_type_mask = True label_map = { 'metastases': 1, 'normal': 2 } if camelyon17_type_mask else { '_0': 1, '_1': 1, '_2': 0 } conversion_order = ['metastases', 'normal' ] if camelyon17_type_mask else ['_0', '_1', '_2'] output_path = Dir + '/' + patientDir + '/lesion_annotations/' annotation_mask.convert(annotation_list, output_path, mr_image.getDimensions(), mr_image.getSpacing(), label_map, conversion_order) return annotation_mask
def single_file_conversion(slide_num): output_path = '/mnt/ai/uni_warwick/camelyon16_dataset/training/Ground_Truth_Extracted/Mask/tumor_' + str( slide_num).zfill(3) + '.tif' reader = mir.MultiResolutionImageReader() mr_image = reader.open( '/mnt/ai/uni_warwick/camelyon16_dataset/training/training/tumor/tumor_' + str(slide_num).zfill(3) + '.tif') annotation_list = mir.AnnotationList() xml_repository = mir.XmlRepository(annotation_list) xml_repository.setSource( '/mnt/ai/uni_warwick/camelyon16_dataset/training/training/lesion_annotations/tumor_' + str(slide_num).zfill(3) + '.xml') xml_repository.load() annotation_mask = mir.AnnotationToMask() camelyon17_type_mask = False label_map = { 'metastases': 1, 'normal': 2 } if camelyon17_type_mask else { '_0': 1, '_1': 1, '_2': 0 } conversion_order = ['metastases', 'normal' ] if camelyon17_type_mask else ['_0', '_1', '_2'] try: annotation_mask.convert(annotation_list, output_path, mr_image.getDimensions(), mr_image.getSpacing(), label_map, conversion_order) except: print("Oops!", sys.exc_info()[0], "occured.")
def __getPatch(pathlist, start, end): print('getting start from {} to {}'.format(start, end)) pathlist = pathlist[start:end] if end != -1 else pathlist[start:] for path in pathlist: print(path) img_name = glob.glob(os.path.join(path, '*.ndpi')) xml_name = glob.glob(os.path.join(path, '*.xml')) mask_name = glob.glob(os.path.join(path, '*_mask.tiff')) assert len( img_name) == 1, 'failed to get image {} : no image or multi image'.format(img_name) assert len( xml_name) == 1, 'failed to get xml label {} : no xml label or multi xml label'.format(xml_name) assert len(mask_name) == 1, 'failed to get mask {} : no mask or multi mask'.format( mask_name) img_name = img_name[0] xml_name = xml_name[0] mask_name = mask_name[0] img_reader = mir.MultiResolutionImageReader() mask_reader = mir.MultiResolutionImageReader() img = img_reader.open(img_name) mask = mask_reader.open(mask_name) annotation_list = mir.AnnotationList() xml_repository = mir.XmlRepository(annotation_list) xml_repository.setSource(xml_name) xml_repository.load() # annotation_group = annotation_list.getGroup('Annotation Group 0') annotations = annotation_list.getAnnotations() del xml_repository if not os.path.exists(os.path.join(path, 'patch')): os.mkdir(os.path.join(path, 'patch')) if not os.path.exists(os.path.join(path, 'patch', 'imgs')): os.mkdir(os.path.join(path, 'patch', 'imgs')) if not os.path.exists(os.path.join(path, 'patch', 'masks')): os.mkdir(os.path.join(path, 'patch', 'masks')) for idx, annotation in enumerate(annotations): x, y, width, height = getPositionAndSize(annotation) level_0_width, level_0_height = img.getLevelDimensions(0) level_1_width, level_1_height = img.getLevelDimensions(1) # x *= level_1_width/level_0_width # y *= level_1_height/level_0_height width *= level_1_width/level_0_width height *= level_1_height/level_0_height x, y, width, height = int(x), int(y), int(width), int(height) patch_img = img.getUInt16Patch(x, y, width, height, 1) patch_img = np.array(patch_img, dtype=np.int8) patch_img = Image.fromarray(patch_img, mode='RGB') patch_img.save(os.path.join( path, 'patch', 'imgs', os.path.splitext(os.path.basename(img_name))[0]+'-{}.png'.format(idx))) del patch_img patch_mask = mask.getUInt16Patch(x, y, width, height, 1) patch_mask = np.array(patch_mask, dtype=np.int8) patch_mask = Image.fromarray(patch_mask[:, :, 0], mode='L') patch_mask.save(os.path.join( path, 'patch', 'masks', os.path.splitext(os.path.basename(img_name))[0]+'-{}.png'.format(idx))) del patch_mask
def __init__(self, path: str): assert os.path.splitext( path)[-1] == '.xml', f'xml file extension name must is xml' self.annotation_list = mir.AnnotationList() xml_repository = mir.XmlRepository(self.annotation_list) try: xml_repository.setSource(path) xml_repository.load() except FileNotFoundError as e: raise('file not found!')
def read_xml_get_tile(image_path, xml_path, output_path): print('Start ', str(image_path), str(xml_path)) patient_id = os.path.basename(image_path)[:-4] patient_dir = os.path.join(output_path, patient_id) if not os.path.exists(patient_dir): os.mkdir(patient_dir) reader = mir.MultiResolutionImageReader() image = reader.open(str(image_path)) # =========== read xml and write mask.tif =========== annotation_list = mir.AnnotationList() xml_repository = mir.XmlRepository(annotation_list) xml_repository.setSource(str(xml_path)) xml_repository.load() annotations = annotation_list.getAnnotations() if len(annotations) == 0: print('empty annotation, id ', patient_id) return 0 temp_map = {} for annotation in annotations: name = annotation.getName() color = annotation.getColor() temp_map[name] = label_map[color] annotations_mask = mir.AnnotationToMask() mask_output = os.path.join(patient_dir, 'mask.tif') annotations_mask.convert(annotation_list, mask_output, image.getDimensions(), image.getSpacing(), temp_map) reader2 = mir.MultiResolutionImageReader() mask = reader2.open(mask_output) # =========== make patch =========== for annotation in annotations: name = annotation.getName() color = annotation.getColor() x_coord, y_coord = [], [] for coordinate in annotation.getCoordinates(): x_coord.append(coordinate.getX()) y_coord.append(coordinate.getY()) x_max, x_min = max(x_coord), min(x_coord) y_max, y_min = max(y_coord), min(y_coord) rect_width = x_max - x_min rect_height = y_max - y_min image_tile = image.getUCharPatch(int(x_min), int(y_min), int(rect_width), int(rect_height), 0) mask_tile = mask.getUCharPatch(int(x_min), int(y_min), int(rect_width), int(rect_height), 0) tile_path = os.path.join(patient_dir, str(color)) if not os.path.exists(tile_path): os.mkdir(tile_path) sio.savemat(os.path.join(tile_path, name+'.mat'), {'img': image_tile, 'mask': mask_tile})
def main(args): pid = os.getpid() print('Running with PID', pid) logger = get_logger('XML-to-MASK-{}'.format(pid)) output_dir_path = args.output_parent_dir / args.output_folder_name logger.info('Creating output directory at %s', str(output_dir_path)) output_dir_path.mkdir(parents=True, exist_ok=True) logger.info('Reading WSI data objects.') start = args.data_offset wsi_data = parse_dataset(args.data_list_file)[start:] count = args.count if count > len(wsi_data): raise ValueError('Offset and count out of bounds.') wsi_data = wsi_data[:count] while wsi_data: data = wsi_data.pop(0) logger.info('Creating mask for %s', data.name) reader = mir.MultiResolutionImageReader() if not data.tif_path.is_file(): logger.warning('TIF File not found. Ignoring %s', data.name) continue mr_image = reader.open(str(data.tif_path)) annotation_list = mir.AnnotationList() xml_repository = mir.XmlRepository(annotation_list) if data.label_xml_path is None: logger.info('No annotation exists. Ignoring %s', data.name) continue elif not data.label_xml_path.is_file(): logger.warning('Label File not found. Ignoring %s', data.name) continue xml_repository.setSource(str(data.label_xml_path)) xml_repository.load() annotation_mask = mir.AnnotationToMask() output_path = output_dir_path / (data.name + '_Mask.tif') annotation_mask.convert( annotation_list, str(output_path), mr_image.getDimensions(), mr_image.getSpacing(), _LABEL_MAP, ) logger.info('Mask saved for %s at %s', data.name, str(output_path)) del data
def parse_one_annotation_list(reader, slide_path, ann_path, mask_path): slide = reader.open(slide_path) annotation_list = mir.AnnotationList() xml_repository = mir.XmlRepository(annotation_list) xml_repository.setSource(ann_path) xml_repository.load() annotation_mask = mir.AnnotationToMask() label_map = {'_0': 1, '_1': 1, '_2': 0} conversion_order = ['_0', '_1', '_2'] annotation_mask.convert(annotation_list, mask_path, slide.getDimensions(), slide.getSpacing(), label_map, conversion_order) slide.close()
def main(args): logger = get_logger('XML-to-MASK') output_dir_path = args.output_path / args.output_dir_name logger.info('Creating output directory at %s', str(output_dir_path)) output_dir_path.mkdir(parents=True, exist_ok=True) logger.info('Reading WSI data objects.') wsi_data = read_wsi_list_file(args.data_list_file) while wsi_data: data = wsi_data.pop(0) logger.info('Creating mask for %s', data.name) reader = mir.MultiResolutionImageReader() if not data.tif_path.is_file(): logger.warning('TIF File not found. Ignoring %s', data.name) continue mr_image = reader.open(str(data.tif_path)) annotation_list = mir.AnnotationList() xml_repository = mir.XmlRepository(annotation_list) if data.label_path is None: logger.info('No annotation exists. Ignoring %s', data.name) continue elif not data.label_path.is_file(): logger.warning('Label File not found. Ignoring %s', data.name) continue xml_repository.setSource(str(data.label_path)) xml_repository.load() annotation_mask = mir.AnnotationToMask() output_path = output_dir_path / (data.name + '_Mask.tif') annotation_mask.convert( annotation_list, str(output_path), mr_image.getDimensions(), mr_image.getSpacing(), _LABEL_MAP, ) logger.info('Mask saved for %s at %s', data.name, str(output_path)) # slide = openslide.OpenSlide(str(output)) # img = slide.read_region( # location=(0, 0), # level=args.level, # size=slide.level_dimensions[args.level], # ).convert('L') del data
def gene_mask(each_fp): reader = mir.MultiResolutionImageReader() img_id = int(path.basename(each_fp).partition('.')[0]) mr_image = reader.open(each_fp) annotation_list = mir.AnnotationList() xml_repository = mir.XmlRepository(annotation_list) xml_path = utils.id_to_xml(img_id) xml_repository.setSource(xml_path) xml_repository.load() annotation_mask = mir.AnnotationToMask() output_path = path.join(cfg.mask_path, f'{img_id}_mask.tif') annotation_mask.convert(annotation_list, output_path, mr_image.getDimensions(), mr_image.getSpacing())
def mask_gen(self, slide_file, xml_file): """ To generate mask file for one slide, and save the mask file. :param slide_file: the path of a WSI image :type slide_file: str :param xml_file: the path of a xml file for the annotation of WSI image :type xml_file: str :returns: the path of the mask file :rtype: str """ mr_image = self.reader.open(slide_file) annotation_list = mir.AnnotationList() xml_repository = mir.XmlRepository(annotation_list) xml_repository.setSource(xml_file) xml_repository.load() annotation_mask = mir.AnnotationToMask() camelyon17_type_mask = False # Here 255 is used to generate mask file so that the tumor region is obvious. # if use '1' here, a binary maks file will be generated. label_map = { 'metastases': 1, 'normal': 2 } if camelyon17_type_mask else { '_0': 255, '_1': 255, '_2': 0 } conversion_order = ['metastases', 'normal' ] if camelyon17_type_mask else ['_0', '_1', '_2'] output_path = osp.join( mask_path, osp.basename(slide_file).replace('.tif', '_mask.tif')) annotation_mask.convert(annotation_list, output_path, mr_image.getDimensions(), mr_image.getSpacing(), label_map, conversion_order) return output_path
def main(args): output_folder = args.output_dir / args.folder_name output_folder.mkdir(parents=True, exist_ok=True) names = [] with open(str(args.names)) as names_file: for name in names_file.readlines(): names.append(name.strip()) for xml_path in args.xml_dir.glob('*.xml'): stem = xml_path.stem if stem not in names: continue print(f'>> Creating mask for {stem}...') sys.stdout.flush() reader = mir.MultiResolutionImageReader() wsi_tif_path = args.wsi_dir / f'{stem}.tif' mr_image = reader.open(str(wsi_tif_path)) annotation_list = mir.AnnotationList() xml_repository = mir.XmlRepository(annotation_list) xml_repository.setSource(str(xml_path)) xml_repository.load() annotation_mask = mir.AnnotationToMask() output_path = output_folder / f'{stem}_Mask.tif' annotation_mask.convert( annotation_list, str(output_path), mr_image.getDimensions(), mr_image.getSpacing(), _LABEL_MAP, ) print(f' Mask saved for {stem} at {output_path}') sys.stdout.flush()
def main(): parser = argparse.ArgumentParser(description="CAMELYON17 dataset Preprocess") parser.add_argument('--tif', '-t', default="center_0/patient_013/patient_013_node_3.tif", help='tif file path') parser.add_argument('--annotations', '-a', default="patient_013_node_3", help='annotations file path') parser.add_argument('--level', '-l', type=int, default=2, help='down-sampling level') parser.add_argument('--resize', '-r', type=int, default=32, help='resize times') parser.add_argument('--patch', '-p', type=int, default=256, help='patch size') parser.add_argument('--out', '-o', default="other/", help='output directory path') args = parser.parse_args() # Load TIFF reader = mir.MultiResolutionImageReader() tif_dir = "../../../mnt/nas/CAMELYON/CAMELYON17/original/org/training/" tif_path = tif_dir + args.tif mr_image = reader.open(tif_path) w_max, h_max = mr_image.getDimensions() print("Load {}".format(args.tif) + " (" + str(w_max) + "×" + str(h_max) + " pixels)") #print(mr_image) # Annotations annotation_list = mir.AnnotationList() xml_repository = mir.XmlRepository(annotation_list) xml_dir = "../../../mnt/nas/CAMELYON/CAMELYON17/original/org/training/lesion_annotations/" xml_path = xml_dir + args.annotations + '.xml' xml_repository.setSource(xml_path) xml_repository.load() annotation_mask = mir.AnnotationToMask() camelyon17_type_mask = True label_map = {'metastases': 1, 'normal': 2} if camelyon17_type_mask else {'_0': 1, '_1': 1, '_2': 0} conversion_order = ['metastases', 'normal'] if camelyon17_type_mask else ['_0', '_1', '_2'] """ annotation_mask.convert(annotation_list, "annotations/" + args.annotations + '.tif', mr_image.getDimensions(), mr_image.getSpacing(), label_map, conversion_order) print("Got annotation") sys.exit() """ # Extract patch mr_imagea = reader.open("annotations/" + args.annotations + ".tif") ds = mr_image.getLevelDownsample(args.level) w_lmax, h_lmax = int(w_max/ds), int(h_max/ds) """ imagea = mr_imagea.getUCharPatch(int(0 * ds), int(0 * ds), w_lmax, h_lmax, args.level) #print(imagea.shape) imga = cv2.resize(imagea, (int(w_lmax * 1/args.resize), int(h_lmax * 1/args.resize))) #print(imga.shape) c = 0 for i in range(imga.shape[0]): for j in range(imga.shape[1]): if imga[i][j] != 0: c = c + 1 else: imga[i][j] = 255 print(c) cv2.imwrite(args.out + "c.jpg", imga) """ image = mr_image.getUCharPatch(int(0 * ds), int(0 * ds), w_lmax, h_lmax, args.level) image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) img = cv2.resize(image, (int(w_lmax * 1/args.resize), int(h_lmax * 1/args.resize))) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) c = 0 for i in range(gray.shape[0]): for j in range(gray.shape[1]): c = c + 1 print(str(c) + "/" + str(gray.shape[0] * gray.shape[1])) if gray[i][j] < 5: gray[i][j] = 255 ret, thresh = cv2.threshold(gray, 204, 255, cv2.THRESH_BINARY) cv2.imwrite(args.out + "a.jpg", img) #cv2.imwrite(args.out + "b.jpg", thresh) sys.exit() c = 0 d = 0 e = 0 f = 0 for row in range(0, int(h_lmax/args.resize - args.patch/args.resize), int(args.patch/args.resize)): for col in range(0, int(w_lmax/args.resize - args.patch/args.resize), int(args.patch/args.resize)): thresh_patch = thresh[row : row + int(args.patch/args.resize), col : col + int(args.patch/args.resize)] #print(thresh_patch.shape) #print(row, col) c = c + 1 flag = False for i in range(int(args.patch/args.resize)): for j in range(int(args.patch/args.resize)): if thresh_patch[i][j] == 0: flag = True row2 = row * args.resize col2 = col * args.resize image_patch = image[row2 : row2 + args.patch, col2 : col2 + args.patch] #print(image_patch.shape) tumor = 0 for i2 in range(int(args.patch/args.resize)): for j2 in range(int(args.patch/args.resize)): #print(row + i2, j + j2) if imga[row + i2][col + j2] != 0: tumor = tumor + 1 print(tumor) #if tumor/(args.patch/args.resize) >= 0.75: if tumor/(args.patch/args.resize) != 0: #cv2.imwrite(args.out + "positive/" + args.annotations # + "_" + str(row2) + "_" + str(col2) + ".jpg", image_patch) e = e + 1 #elif tumor/(args.patch/args.resize) != 0: #cv2.imwrite(args.out + "positive2/" + args.annotations #+ "_" + str(row2) + "_" + str(col2) + ".jpg", image_patch) #f = f + 1 else: #cv2.imwrite(args.out + "negative/" + args.annotations # + "_" + str(row2) + "_" + str(col2) + ".jpg", image_patch) d = d + 1 break if flag: break #print(str(e) + "/" + str(f) + "/" + str(d) + "/" + str(c)) print(str(e) + "/" + str(d) + "/" + str(c)) return 0
#import sys #sys.path.append('/opt/ASAP/bin') import multiresolutionimageinterface as mir reader = mir.MultiResolutionImageReader() mr_image = reader.open( '/home/suidong/Documents/camelyon17_data_backup/slide/patient_000_node_0.tif' ) annotation_list = mir.AnnotationList() xml_repository = mir.XmlRepository(annotation_list) xml_repository.setSource( '/home/suidong/Documents/camelyon17_data_backup/lesion_annotations/patient_000_node_0.xml' ) xml_repository.load() annotation_mask = mir.AnnotationToMask() label_map = {'metastases': 255, 'normal': 0} output_path = '/home/suidong/Documents/camelyon17_data_backup/test/test.tif' annotation_mask.convert(annotation_list, output_path, mr_image.getDimensions(), mr_image.getSpacing(), label_map)