# multiprocessing.set_start_method('spawn') # pool = multiprocessing.Pool(processes=4) # pool.map(patch_extractor.extract_parallel, all_fixed_float_offset) # # Save into tfRecords from wsitools.file_management.wsi_case_manager import WSI_CaseManager # # import dependent packages from wsitools.file_management.offset_csv_manager import OffsetCSVManager from wsitools.tissue_detection.tissue_detector import TissueDetector from wsitools.patch_extraction.feature_map_creator import FeatureMapCreator fixed_wsi = "/projects/shart/digital_pathology/data/PenMarking/WSIs/MELF/7bb50b5d9dcf4e53ad311d66136ae00f.tiff" float_wsi_root_dir = "/projects/shart/digital_pathology/data/PenMarking/WSIs/MELF-Clean" gnb_training_files = "/projects/shart/digital_pathology/data/PenMarking/model/tissue_loc/HE_tissue_others.tsv" tissue_detector = TissueDetector("GNB", threshold=0.5, training_files=gnb_training_files) offset_csv_fn = "/projects/shart/digital_pathology/data/PenMarking/WSIs/registration_offsets.csv" offset_csv_mn = OffsetCSVManager(offset_csv_fn) fm = FeatureMapCreator("./feature_maps/basic_fm_PP_eval.csv") case_mn = WSI_CaseManager() float_wsi = case_mn.get_counterpart_fn(fixed_wsi, float_wsi_root_dir) _, fixed_wsi_uuid, _ = case_mn.get_wsi_fn_info(fixed_wsi) _, float_wsi_uuid, _ = case_mn.get_wsi_fn_info(float_wsi) offset, state_indicator = offset_csv_mn.lookup_table( fixed_wsi_uuid, float_wsi_uuid) if state_indicator == 0:
fixed_wsi_list = open(fixed_wsi_list_txt, 'r').readlines() for fixed_wsi_t in fixed_wsi_list: fixed_wsi = os.path.join(fixed_wsi_root_dir, fixed_wsi_t + wsi_ext) case_mn = WSI_CaseManager(image_pairs_txt) float_wsi = case_mn.get_counterpart_fn(fixed_wsi, float_wsi_root_dir) _, fixed_wsi_uuid, _ = case_mn.get_wsi_fn_info(fixed_wsi) _, float_wsi_uuid, _ = case_mn.get_wsi_fn_info(float_wsi) offset_csv_mn = OffsetCSVManager(offset_csv_fn) offset, state_indicator = offset_csv_mn.lookup_table( fixed_wsi_uuid, float_wsi_uuid) if state_indicator == 0: raise Exception("No corresponding offset can be found in the file") xml_fn = os.path.join(annotation_root_path, fixed_wsi_uuid + '.xml') tissue_detector = TissueDetector("LAB_Threshold", threshold=80) parameters = PairwiseExtractorParameters(output_dir, save_format='.jpg', sample_cnt=-1) if os.path.exists(xml_fn): annotations = AnnotationRegions(xml_fn, class_label_id_csv) patch_extractor = PairwisePatchExtractor(tissue_detector, parameters, annotations=annotations) patch_cnt = patch_extractor.extract(fixed_wsi, float_wsi, offset) else: patch_extractor = PairwisePatchExtractor(tissue_detector, parameters) patch_cnt = patch_extractor.extract(fixed_wsi, float_wsi, offset) print("%d Patches have been save to %s" % (patch_cnt, output_dir))
def main(): """Console script for extracting patches from WSI files.""" parser = argparse.ArgumentParser() parser.add_argument("-w", "--wsi_fn", required=True, dest='wsi_fn', help="WSI file name") parser.add_argument("-o", "--out-dir", default=os.getcwd(), dest='out_dir', help="Where patches should be saved") parser.add_argument("-s", "--patch-size", default=256, dest='patch_size', type=int, help="H & W of patches") parser.add_argument( "-n", "--number-processors", default=8, dest='num_processors', type=int, help="Number of processors to use during patch extraction") parser.add_argument( "-c", "--number-patches", default=-1, dest='sample_cnt', type=int, help="Number of processors to use during patch extraction [-1 == all]") parser.add_argument( "-a", "--patch-filter-tissue-area", default=0.8, dest='patch_filter_by_area', type=float, help="Amount of tissue that should be present in a patch") parser.add_argument( "-R", "--rescale-rate", default=128, dest='rescale_rate', type=int, help="Fold size to scale the thumbnail to (for faster processing)") parser.add_argument("-f", "--patch-format", dest='save_format', choices=['.png', '.jpg', '.tfrecord'], default=".png", help="Output format for patches") parser.add_argument("-x", "--annotation-xml", dest='anno_xml', default=None, help="XML definig the annotations") parser.add_argument("-y", "--annotation-class_label_id_csv", dest='anno_class_label_id_csv', default=None, help="XML class_label_id_csv") parser.add_argument("-l", "--openslide-level", dest='openslide_level', default=0, help="Level used to extract patches") parser.add_argument("-T", "--tissue-detection-method", dest="tissue_detector_method", choices=['LAB_Threshold', 'GNB'], default="LAB_Threshold", help="Choose the method for finding tissue") parser.add_argument( "-t", "--tissue-detection-threshold", dest="tissue_detector_threshold", choices=range(1, 255), default=80, help= "Threshold at which there is tissue in patch (used for LAB_Threshold)") parser.add_argument("-G", "--GNB-file", dest="training_file", default=None, help="GNB training file (if GNB method is chosen)") parser.add_argument("-F", "--feature-map", dest="feature_map", default=None, help="Feature map file (used if output is TFRecords)") parser.add_argument("-W", "--wsi_reg_2", dest='wsi_reg_2', help="second WSI registration file name") parser.add_argument("-Ox", "--reg_off_set_x", dest='reg_off_set_x', type=float, help="Registration X offset") parser.add_argument("-Oy", "--reg_off_set_y", dest='reg_off_set_y', type=float, help="Registration Y offset") parser.add_argument( "-V", "--verbose", dest="logLevel", choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], default="INFO", help="Set the logging level") args = parser.parse_args() logging.basicConfig(stream=sys.stderr, level=args.logLevel, format='%(name)s (%(levelname)s): %(message)s') logger = logging.getLogger(__name__) logger.setLevel(args.logLevel) ''' If TFRecords, must have a feature map''' if args.save_format == '.tfrecord': assert args.feature_map is not None, "You must supply a feature map if you want TFRecords exported" assert os.path.exists( args.feature_map ), "Your feature map file ({}) was not found".format(args.feature_map) '''Required arguments for GNB-based tissue detection''' if args.tissue_detector_method == 'GNB': assert args.training_file is not None, "You must provide a GNB file if using GNB-based tissue detection" assert os.path.exists( args.training_file), "Your GNB file ({}) was not found".format( args.training_file) ''' If annotation is provided, change the default output for outputting annotation If Annotation xml file is provided, must have a XML class_label_id_csv file''' with_anno = False annotations = None if args.anno_xml: with_anno = True assert os.path.exists( args.anno_xml), "Your XML file ({}) was not found".format( args.anno_xml) assert args.anno_class_label_id_csv is not None, "You must supply a input file" assert os.path.exists(args.anno_class_label_id_csv ), "Your wsi file ({}) was not found".format( args.anno_class_label_id_csv) annotations = AnnotationRegions(args.anno_xml, args.anno_class_label_id_csv) '''Setting None if feature_map is not provided''' if args.feature_map is not None: fm = FeatureMapCreator(args.feature_map) else: fm = None '''Checking input param for image registration''' if args.wsi_reg_2: assert os.path.exists(args.wsi_reg_2) '''Choose a method for detecting tissue in thumbnail image''' tissue_detector = TissueDetector( args.tissue_detector_method, # Can be LAB_Threshold or GNB threshold=args. tissue_detector_threshold, # Number from 1-255, anything less than\ # this number means there is tissue training_files=args. training_file # Training file for GNB-based detection ) '''Is offset values are provided then check values are float''' if args.reg_off_set_x or args.reg_off_set_y: assert str(args.reg_off_set_x).lstrip('-').replace('.', '', 1).isdigit() assert str(args.reg_off_set_y).lstrip('-').replace('.', '', 1).isdigit() offset = (float(args.reg_off_set_x), float(args.reg_off_set_x)) else: '''Is offset values are not provided then offset values are calculated based on image registration''' if args.wsi_reg_2: matcher_parameters = MatcherParameters() matcher = WSI_Matcher(tissue_detector, matcher_parameters) offset = matcher.match(args.wsi_fn, args.wsi_reg_2) '''Calling appropriate methods if registration offsets are provided, so this block is called in patch_extraction with image regitration and in patch_extraction with image regitration & annotations''' if args.wsi_reg_2: parameters = PairwiseExtractorParameters( args.out_dir, # Where the patches should be extracted to save_format=args. save_format, # Can be '.jpg', '.png', or '.tfrecord' sample_cnt=args. sample_cnt, # Limit the number of patches to extract # (-1 == all patches) patch_size=args. patch_size, # Size of patches to extract (Height & Width) rescale_rate=args.rescale_rate, # Fold size to scale the thumbnail to (for faster \ # processing) patch_filter_by_area=args.patch_filter_by_area, # Amount of tissue that should # be present in a patch with_anno= with_anno, # If true, you need to supply an additional XML file extract_layer=args.openslide_level # OpenSlide Level ) patch_extractor = PairwisePatchExtractor(tissue_detector, parameters, feature_map=fm, annotations=annotations) '''If num_processors is zero then multi processing is turned off''' if args.num_processors > 0: # Run the extraction process multiprocessing.set_start_method('spawn') pool = multiprocessing.Pool(processes=args.num_processors) pool.map(patch_extractor.extract, [args.wsi_fn, args.wsi_reg_2, offset]) else: patch_num = patch_extractor.extract(args.wsi_fn, args.wsi_reg_2, offset) else: '''this block is called in patch_extraction and in patch_extraction with image annotations''' parameters = ExtractorParameters( args.out_dir, # Where the patches should be extracted to save_format=args. save_format, # Can be '.jpg', '.png', or '.tfrecord' sample_cnt=args. sample_cnt, # Limit the number of patches to extract # (-1 == all patches) patch_size=args. patch_size, # Size of patches to extract (Height & Width) rescale_rate=args. rescale_rate, # Fold size to scale the thumbnail to (for faster \ # processing) patch_filter_by_area=args. patch_filter_by_area, # Amount of tissue that should # be present in a patch with_anno= with_anno, # If true, you need to supply an additional XML file extract_layer=args.openslide_level # OpenSlide Level ) # Will be another step for Annotations here # Create the extractor object patch_extractor = PatchExtractor( tissue_detector, parameters, feature_map=fm, # Need to update this when available annotations=annotations # Need to update this when available ) '''If num_processors is zero then multi processing is turned off''' if args.num_processors > 0: # Run the extraction process multiprocessing.set_start_method('spawn') pool = multiprocessing.Pool(processes=args.num_processors) pool.map(patch_extractor.extract, [args.wsi_fn]) else: patch_num = patch_extractor.extract(args.wsi_fn) return 0