def extract_images_or_rois(parameters): id_annotations = parse_domain_list(parameters.cytomine_roi_annotations) # if ROI annotations are provided if len(id_annotations) > 0: image_cache = dict() # maps ImageInstance id with CytomineSlide object zones = list() for id_annot in id_annotations: annotation = Annotation().fetch(id_annot) if annotation.image not in image_cache: image_cache[annotation.image] = CytomineSlide( annotation.image, parameters.cytomine_zoom_level) window = get_iip_window_from_annotation( image_cache[annotation.image], annotation, parameters.cytomine_zoom_level) zones.append(window) return zones # work at image level or ROIs by term images = ImageInstanceCollection() if parameters.cytomine_id_images is not None: id_images = parse_domain_list(parameters.cytomine_id_images) images.extend([ImageInstance().fetch(_id) for _id in id_images]) else: images = images.fetch_with_filter("project", parameters.cytomine_id_project) slides = [ CytomineSlide(img, parameters.cytomine_zoom_level) for img in images ] if parameters.cytomine_id_roi_term is None: return slides # fetch ROI annotations collection = AnnotationCollection( terms=[parameters.cytomine_id_roi_term], reviewed=parameters.cytomine_reviewed_roi, showWKT=True) collection.fetch_with_filter(project=parameters.cytomine_id_project) slides_map = {slide.image_instance.id: slide for slide in slides} regions = list() for annotation in collection: if annotation.image not in slides_map: continue slide = slides_map[annotation.image] regions.append( get_iip_window_from_annotation(slide, annotation, parameters.cytomine_zoom_level)) return regions
def extract_images_or_rois(parameters): # work at image level or ROIs by term images = ImageInstanceCollection() if parameters.cytomine_id_images is not None: id_images = parse_domain_list(parameters.cytomine_id_images) images.extend([ImageInstance().fetch(_id) for _id in id_images]) else: images = images.fetch_with_filter("project", parameters.cytomine_id_project) slides = [ CytomineSlide(img, parameters.cytomine_zoom_level) for img in images ] if parameters.cytomine_id_roi_term is None: return slides # fetch ROI annotations, all users collection = AnnotationCollection( terms=[parameters.cytomine_id_roi_term], reviewed=parameters.cytomine_reviewed_roi, project=parameters.cytomine_id_project, showWKT=True, includeAlgo=True).fetch() slides_map = {slide.image_instance.id: slide for slide in slides} regions = list() for annotation in collection: if annotation.image not in slides_map: continue slide = slides_map[annotation.image] regions.append( get_iip_window_from_annotation(slide, annotation, parameters.cytomine_zoom_level)) return regions
def main(argv): with CytomineJob.from_cli(argv) as cj: # annotation filtering cj.logger.info(str(cj.parameters)) # use only images from the current project cj.parameters.cytomine_id_projects = "{}".format(cj.parameters.cytomine_id_project) cj.job.update(progress=1, statuscomment="Preparing execution (creating folders,...).") base_path, downloaded = setup_classify( args=cj.parameters, logger=cj.job_logger(1, 40), dest_pattern=os.path.join("{term}", "{image}_{id}.png"), root_path=Path.home(), set_folder="train", showTerm=True ) x = np.array([f for annotation in downloaded for f in annotation.filenames]) y = np.array([int(os.path.basename(os.path.dirname(filepath))) for filepath in x]) # transform classes cj.job.update(progress=50, statusComment="Transform classes...") classes = parse_domain_list(cj.parameters.cytomine_id_terms) positive_classes = parse_domain_list(cj.parameters.cytomine_positive_terms) classes = np.array(classes) if len(classes) > 0 else np.unique(y) n_classes = classes.shape[0] # filter unwanted terms cj.logger.info("Size before filtering:") cj.logger.info(" - x: {}".format(x.shape)) cj.logger.info(" - y: {}".format(y.shape)) keep = np.in1d(y, classes) x, y = x[keep], y[keep] cj.logger.info("Size after filtering:") cj.logger.info(" - x: {}".format(x.shape)) cj.logger.info(" - y: {}".format(y.shape)) if cj.parameters.cytomine_binary: cj.logger.info("Will be training on 2 classes ({} classes before binarization).".format(n_classes)) y = np.in1d(y, positive_classes).astype(np.int) else: cj.logger.info("Will be training on {} classes.".format(n_classes)) y = np.searchsorted(classes, y) # build model cj.job.update(progress=55, statusComment="Build model...") _, pyxit = build_models( n_subwindows=cj.parameters.pyxit_n_subwindows, min_size=cj.parameters.pyxit_min_size, max_size=cj.parameters.pyxit_max_size, target_width=cj.parameters.pyxit_target_width, target_height=cj.parameters.pyxit_target_height, interpolation=cj.parameters.pyxit_interpolation, transpose=cj.parameters.pyxit_transpose, colorspace=cj.parameters.pyxit_colorspace, fixed_size=cj.parameters.pyxit_fixed_size, verbose=int(cj.logger.level == 10), create_svm=cj.parameters.svm, C=cj.parameters.svm_c, random_state=cj.parameters.seed, n_estimators=cj.parameters.forest_n_estimators, min_samples_split=cj.parameters.forest_min_samples_split, max_features=cj.parameters.forest_max_features, n_jobs=cj.parameters.n_jobs ) cj.job.update(progress=60, statusComment="Train model...") pyxit.fit(x, y) cj.job.update(progress=90, statusComment="Save model....") model_filename = joblib.dump(pyxit, os.path.join(base_path, "model.joblib"), compress=3)[0] AttachedFile( cj.job, domainIdent=cj.job.id, filename=model_filename, domainClassName="be.cytomine.processing.Job" ).upload() Property(cj.job, key="classes", value=stringify(classes)).save() Property(cj.job, key="binary", value=cj.parameters.cytomine_binary).save() Property(cj.job, key="positive_classes", value=stringify(positive_classes)).save() cj.job.update(status=Job.TERMINATED, status_comment="Finish", progress=100)
def main(argv): with CytomineJob.from_cli(argv) as cj: # use only images from the current project cj.job.update(progress=1, statusComment="Preparing execution") # extract images to process if cj.parameters.cytomine_zoom_level > 0 and ( cj.parameters.cytomine_tile_size != 256 or cj.parameters.cytomine_tile_overlap != 0): raise ValueError( "when using zoom_level > 0, tile size should be 256 " "(given {}) and overlap should be 0 (given {})".format( cj.parameters.cytomine_tile_size, cj.parameters.cytomine_tile_overlap)) cj.job.update( progress=1, statusComment="Preparing execution (creating folders,...).") # working path root_path = str(Path.home()) working_path = os.path.join(root_path, "images") os.makedirs(working_path, exist_ok=True) # load training information cj.job.update(progress=5, statusComment="Extract properties from training job.") train_job = Job().fetch(cj.parameters.cytomine_id_job) properties = PropertyCollection(train_job).fetch().as_dict() binary = str2bool(properties["binary"].value) classes = parse_domain_list(properties["classes"].value) cj.job.update(progress=10, statusComment="Download the model file.") attached_files = AttachedFileCollection(train_job).fetch() model_file = attached_files.find_by_attribute("filename", "model.joblib") model_filepath = os.path.join(root_path, "model.joblib") model_file.download(model_filepath, override=True) pyxit = joblib.load(model_filepath) # set n_jobs pyxit.base_estimator.n_jobs = cj.parameters.n_jobs pyxit.n_jobs = cj.parameters.n_jobs cj.job.update(progress=45, statusComment="Build workflow.") builder = SSLWorkflowBuilder() builder.set_tile_size(cj.parameters.cytomine_tile_size, cj.parameters.cytomine_tile_size) builder.set_overlap(cj.parameters.cytomine_tile_overlap) builder.set_tile_builder( CytomineTileBuilder(working_path, n_jobs=cj.parameters.n_jobs)) builder.set_logger(StandardOutputLogger(level=Logger.INFO)) builder.set_n_jobs(1) builder.set_background_class(0) # value 0 will prevent merging but still requires to run the merging check # procedure (inefficient) builder.set_distance_tolerance(2 if cj.parameters.union_enabled else 0) builder.set_segmenter( ExtraTreesSegmenter( pyxit=pyxit, classes=classes, prediction_step=cj.parameters.pyxit_prediction_step, background=0, min_std=cj.parameters.tile_filter_min_stddev, max_mean=cj.parameters.tile_filter_max_mean)) workflow = builder.get() area_checker = AnnotationAreaChecker( min_area=cj.parameters.min_annotation_area, max_area=cj.parameters.max_annotation_area) def get_term(label): if binary: if "cytomine_id_predict_term" not in cj.parameters: return [] else: return [int(cj.parameters.cytomine_id_predict_term)] # multi-class return [label] zones = extract_images_or_rois(cj.parameters) for zone in cj.monitor(zones, start=50, end=90, period=0.05, prefix="Segmenting images/ROIs"): results = workflow.process(zone) annotations = AnnotationCollection() for obj in results: if not area_checker.check(obj.polygon): continue polygon = obj.polygon if isinstance(zone, ImageWindow): polygon = affine_transform( polygon, [1, 0, 0, 1, zone.abs_offset_x, zone.abs_offset_y]) polygon = change_referential(polygon, zone.base_image.height) if cj.parameters.cytomine_zoom_level > 0: zoom_mult = (2**cj.parameters.cytomine_zoom_level) polygon = affine_transform( polygon, [zoom_mult, 0, 0, zoom_mult, 0, 0]) annotations.append( Annotation(location=polygon.wkt, id_terms=get_term(obj.label), id_project=cj.project.id, id_image=zone.base_image.image_instance.id)) annotations.save() cj.job.update(status=Job.TERMINATED, status_comment="Finish", progress=100)
def main(argv): with CytomineJob.from_cli(argv) as cj: # annotation filtering cj.logger.info(str(cj.parameters)) cj.job.update(progress=1, statuscomment="Preparing execution (creating folders,...).") base_path, downloaded = setup_classify( args=cj.parameters, logger=cj.job_logger(1, 40), dest_pattern=os.path.join("{term}", "{image}_{id}.png"), root_path=Path.home(), set_folder="train", showTerm=True ) x = np.array([f for annotation in downloaded for f in annotation.filenames]) y = np.array([int(os.path.basename(os.path.dirname(filepath))) for filepath in x]) # transform classes cj.job.update(progress=50, statusComment="Transform classes...") classes = parse_domain_list(cj.parameters.cytomine_id_terms) positive_classes = parse_domain_list(cj.parameters.cytomine_positive_terms) classes = np.array(classes) if len(classes) > 0 else np.unique(y) n_classes = classes.shape[0] # filter unwanted terms cj.logger.info("Size before filtering:") cj.logger.info(" - x: {}".format(x.shape)) cj.logger.info(" - y: {}".format(y.shape)) keep = np.in1d(y, classes) x, y = x[keep], y[keep] cj.logger.info("Size after filtering:") cj.logger.info(" - x: {}".format(x.shape)) cj.logger.info(" - y: {}".format(y.shape)) labels = np.array([int(os.path.basename(f).split("_", 1)[0]) for f in x]) if cj.parameters.cytomine_binary: cj.logger.info("Will be training on 2 classes ({} classes before binarization).".format(n_classes)) y = np.in1d(y, positive_classes).astype(np.int) else: cj.logger.info("Will be training on {} classes.".format(n_classes)) y = np.searchsorted(classes, y) # build model random_state = check_random_state(cj.parameters.seed) cj.job.update(progress=55, statusComment="Build model...") _, pyxit = build_models( n_subwindows=cj.parameters.pyxit_n_subwindows, min_size=cj.parameters.pyxit_min_size, max_size=cj.parameters.pyxit_max_size, target_width=cj.parameters.pyxit_target_width, target_height=cj.parameters.pyxit_target_height, interpolation=cj.parameters.pyxit_interpolation, transpose=cj.parameters.pyxit_transpose, colorspace=cj.parameters.pyxit_colorspace, fixed_size=cj.parameters.pyxit_fixed_size, verbose=int(cj.logger.level == 10), create_svm=cj.parameters.svm, C=cj.parameters.svm_c, random_state=random_state, n_estimators=cj.parameters.forest_n_estimators, min_samples_split=cj.parameters.forest_min_samples_split, max_features=cj.parameters.forest_max_features, n_jobs=cj.parameters.n_jobs ) cj.job.update(progress=60, statusComment="Start cross-validation...") n_splits = cj.parameters.eval_k cv = ShuffleSplit(n_splits, test_size=cj.parameters.eval_test_fraction) if cj.parameters.folds == "group": cv = GroupKFold(n_splits) elif cj.parameters.folds == "stratified": cv = StratifiedKFold(n_splits, shuffle=True, random_state=random_state) elif cj.parameters.folds != "shuffle": raise ValueError("Unknown folding policy '{}'.".format(cj.parameters.folds)) # Fit accuracies = np.zeros(n_splits) test_sizes = np.zeros(n_splits) _x, _y = pyxit.extract_subwindows(x, y) # CV loop for i, (train, test) in cj.monitor(enumerate(cv.split(x, y, labels)), start=60, end=90, prefix="cross val. iteration"): _pyxit = clone(pyxit) w_train = window_indexes(x.shape[0], train, _pyxit.n_subwindows) w_test = window_indexes(x.shape[0], test, _pyxit.n_subwindows) _pyxit.fit(x[train], y[train], _X=_x[w_train], _y=_y[w_train]) y_pred = _pyxit.predict(x[test], _x[w_test]) accuracies[i] = accuracy_score(y[test], y_pred) test_sizes[i] = test.shape[0] / float(x.shape[0]) del _pyxit pyxit.fit(x, y) accuracy = float(np.mean(test_sizes * accuracies)) cj.job.update(progress=90, statusComment="Accuracy: {}".format(accuracy)) cj.job.update(progress=90, statusComment="Save model...") model_filename = joblib.dump(pyxit, os.path.join(base_path, "model.joblib"), compress=3)[0] AttachedFile( cj.job, domainIdent=cj.job.id, filename=model_filename, domainClassName="be.cytomine.processing.Job" ).upload() Property(cj.job, key="classes", value=stringify(classes)).save() Property(cj.job, key="binary", value=cj.parameters.cytomine_binary).save() Property(cj.job, key="positive_classes", value=stringify(positive_classes)).save() Property(cj.job, key="accuracies", value=array2str(accuracies)) Property(cj.job, key="test_sizes", value=array2str(test_sizes)) Property(cj.job, key="accuracy", value=accuracy) cj.job.update(status=Job.TERMINATED, status_comment="Finish", progress=100)
def main(argv): with CytomineJob.from_cli(argv) as cj: # annotation filtering cj.logger.info(str(cj.parameters)) # use only images from the current project cj.parameters.cytomine_id_projects = "{}".format(cj.parameters.cytomine_id_project) cj.job.update(progress=1, statuscomment="Preparing execution (creating folders,...).") root_path = "/data/" #Path.home() image_path, downloaded = setup_classify( args=cj.parameters, logger=cj.job_logger(1, 40), dest_pattern="{image}_{id}.png", root_path=root_path, set_folder="test", showWKT=True ) annotations = [annotation for annotation in downloaded for f in annotation.filenames] x = np.array([f for annotation in downloaded for f in annotation.filenames]) # extract model data from previous job cj.job.update(progress=45, statusComment="Extract properties from training job.") train_job = Job().fetch(cj.parameters.cytomine_id_job) properties = PropertyCollection(train_job).fetch().as_dict() binary = str2bool(properties["binary"].value) if binary: classes = np.array([cj.parameters.cytomine_id_term_negative, cj.parameters.cytomine_id_term_positive]) else: classes = np.array(parse_domain_list(properties["classes"].value)) # extract model cj.job.update(progress=50, statusComment="Download the model file.") attached_files = AttachedFileCollection(train_job).fetch() model_file = attached_files.find_by_attribute("filename", "model.joblib") model_filepath = os.path.join(root_path, "model.joblib") model_file.download(model_filepath, override=True) pyxit = joblib.load(model_filepath) # set n_jobs pyxit.base_estimator.n_jobs = cj.parameters.n_jobs pyxit.n_jobs = cj.parameters.n_jobs cj.job.update(progress=55, statusComment="Predict...") if hasattr(pyxit, "predict_proba"): probas = pyxit.predict_proba(x) y_pred = np.argmax(probas, axis=1) else: probas = [None] * x.shape[0] y_pred = pyxit.predict(x) predicted_terms = classes.take(y_pred, axis=0) collection = AnnotationCollection() for i in cj.monitor(range(x.shape[0]), start=80, end=99, period=0.005, prefix="Uploading predicted terms"): annot, term, proba = annotations[i], predicted_terms[i], probas[i] parameters = { "location": annot.location, "id_image": annot.image, "id_project": cj.project.id, "id_terms": [int(term)] } if proba is not None: parameters["rate"] = float(np.max(proba)) collection.append(Annotation(**parameters)) collection.save() cj.job.update(status=Job.TERMINATED, status_comment="Finish", progress=100)
def main(argv): with CytomineJob.from_cli(argv) as cj: # use only images from the current project cj.job.update( progress=1, statusComment="Preparing execution (creating folders,...).") # hardcode parameter for setup classify to fetch alphamask instead of plain crop. cj.parameters.cytomine_download_alpha = True cj.parameters.cytomine_id_projects = "{}".format(cj.project.id) cj.job.update(progress=2, statusComment="Downloading crops.") base_path, downloaded = setup_classify(args=cj.parameters, logger=cj.job_logger(2, 25), dest_pattern=os.path.join( "{term}", "{image}_{id}.png"), root_path=str("tmp"), set_folder="train", showTerm=True) x = np.array( [f for annotation in downloaded for f in annotation.filenames]) y = np.array([ int(os.path.basename(os.path.dirname(filepath))) for filepath in x ]) # transform classes cj.job.update(progress=25, statusComment="Transform classes...") positive_terms = parse_domain_list( cj.parameters.cytomine_id_positive_terms) selected_terms = parse_domain_list(cj.parameters.cytomine_id_terms) is_binary = len(selected_terms) > 0 and len(positive_terms) > 0 foreground_terms = np.unique(y) if len( selected_terms) == 0 else np.array(selected_terms) if len(positive_terms) == 0: classes = np.hstack((np.zeros((1, ), dtype=int), foreground_terms)) else: # binary foreground_terms = np.array(positive_terms) classes = np.array([0, 1]) # cast to binary fg_idx = np.in1d(y, list(foreground_terms)) bg_idx = np.in1d( y, list(set(selected_terms).difference(foreground_terms))) y[fg_idx] = 1 y[bg_idx] = 0 n_classes = classes.shape[0] # filter unwanted terms cj.logger.info("Size before filtering:") cj.logger.info(" - x: {}".format(x.shape)) cj.logger.info(" - y: {}".format(y.shape)) keep = np.in1d(y, classes) x, y = x[keep], y[keep] cj.logger.info("Size after filtering:") cj.logger.info(" - x: {}".format(x.shape)) cj.logger.info(" - y: {}".format(y.shape)) if x.shape[0] == 0: raise ValueError("No training data") if is_binary: # 0 (background) vs 1 (classes in foreground ) cj.logger.info("Binary segmentation:") cj.logger.info("> class '0': background & terms {}".format( set(selected_terms).difference(positive_terms))) cj.logger.info("> class '1': {}".format(set(foreground_terms))) else: # 0 (background vs 1 vs 2 vs ... n (n classes from cytomine_id_terms) cj.logger.info("Multi-class segmentation:") cj.logger.info("> background class '0'") cj.logger.info("> term classes: {}".format(set(foreground_terms))) # build model cj.job.update(progress=27, statusComment="Build model...") et, pyxit = build_models( n_subwindows=cj.parameters.pyxit_n_subwindows, min_size=cj.parameters.pyxit_min_size, max_size=cj.parameters.pyxit_max_size, target_width=cj.parameters.pyxit_target_width, target_height=cj.parameters.pyxit_target_height, interpolation=cj.parameters.pyxit_interpolation, transpose=cj.parameters.pyxit_transpose, colorspace=cj.parameters.pyxit_colorspace, fixed_size=cj.parameters.pyxit_fixed_size, verbose=int(cj.logger.level == 10), random_state=cj.parameters.seed, n_estimators=cj.parameters.forest_n_estimators, min_samples_split=cj.parameters.forest_min_samples_split, max_features=cj.parameters.forest_max_features, n_jobs=cj.parameters.n_jobs) # to extract the classes form the mask pyxit.get_output = _get_output_from_mask # extract subwindows manually to avoid class problem cj.job.update(progress=30, statusComment="Extract subwindwos...") _x, _y = pyxit.extract_subwindows(x, y) actual_classes = np.unique(_y) if actual_classes.shape[0] != classes.shape[0]: raise ValueError( "Some classes are missing from the dataset: actual='{}', expected='{}'" .format(",".join(map(str, actual_classes)), ",".join(map(str, classes)))) cj.logger.info("Size of actual training data:") cj.logger.info(" - x : {}".format(_x.shape)) cj.logger.info(" - y : {}".format(_y.shape)) cj.logger.info(" - dist: {}".format(", ".join([ "{}: {}".format(v, c) for v, c in zip(*np.unique(_y, return_counts=True)) ]))) cj.job.update(progress=60, statusComment="Train model...") # "re-implement" pyxit.fit to avoid incorrect class handling pyxit.classes_ = classes pyxit.n_classes_ = n_classes pyxit.base_estimator.fit(_x, _y) cj.job.update(progress=90, statusComment="Save model....") model_filename = joblib.dump(pyxit, os.path.join(base_path, "model.joblib"), compress=3)[0] AttachedFile(cj.job, domainIdent=cj.job.id, filename=model_filename, domainClassName="be.cytomine.processing.Job").upload() Property(cj.job, key="classes", value=stringify(classes)).save() Property(cj.job, key="binary", value=is_binary).save() cj.job.update(status=Job.TERMINATED, status_comment="Finish", progress=100)