예제 #1
0
def extract_images_or_rois(parameters):
    id_annotations = parse_domain_list(parameters.cytomine_roi_annotations)
    # if ROI annotations are provided
    if len(id_annotations) > 0:
        image_cache = dict()  # maps ImageInstance id with CytomineSlide object
        zones = list()
        for id_annot in id_annotations:
            annotation = Annotation().fetch(id_annot)
            if annotation.image not in image_cache:
                image_cache[annotation.image] = CytomineSlide(
                    annotation.image, parameters.cytomine_zoom_level)
            window = get_iip_window_from_annotation(
                image_cache[annotation.image], annotation,
                parameters.cytomine_zoom_level)
            zones.append(window)
        return zones

    # work at image level or ROIs by term
    images = ImageInstanceCollection()
    if parameters.cytomine_id_images is not None:
        id_images = parse_domain_list(parameters.cytomine_id_images)
        images.extend([ImageInstance().fetch(_id) for _id in id_images])
    else:
        images = images.fetch_with_filter("project",
                                          parameters.cytomine_id_project)

    slides = [
        CytomineSlide(img, parameters.cytomine_zoom_level) for img in images
    ]
    if parameters.cytomine_id_roi_term is None:
        return slides

    # fetch ROI annotations
    collection = AnnotationCollection(
        terms=[parameters.cytomine_id_roi_term],
        reviewed=parameters.cytomine_reviewed_roi,
        showWKT=True)
    collection.fetch_with_filter(project=parameters.cytomine_id_project)
    slides_map = {slide.image_instance.id: slide for slide in slides}
    regions = list()
    for annotation in collection:
        if annotation.image not in slides_map:
            continue
        slide = slides_map[annotation.image]
        regions.append(
            get_iip_window_from_annotation(slide, annotation,
                                           parameters.cytomine_zoom_level))

    return regions
예제 #2
0
def extract_images_or_rois(parameters):
    # work at image level or ROIs by term
    images = ImageInstanceCollection()
    if parameters.cytomine_id_images is not None:
        id_images = parse_domain_list(parameters.cytomine_id_images)
        images.extend([ImageInstance().fetch(_id) for _id in id_images])
    else:
        images = images.fetch_with_filter("project",
                                          parameters.cytomine_id_project)

    slides = [
        CytomineSlide(img, parameters.cytomine_zoom_level) for img in images
    ]
    if parameters.cytomine_id_roi_term is None:
        return slides

    # fetch ROI annotations, all users
    collection = AnnotationCollection(
        terms=[parameters.cytomine_id_roi_term],
        reviewed=parameters.cytomine_reviewed_roi,
        project=parameters.cytomine_id_project,
        showWKT=True,
        includeAlgo=True).fetch()

    slides_map = {slide.image_instance.id: slide for slide in slides}
    regions = list()
    for annotation in collection:
        if annotation.image not in slides_map:
            continue
        slide = slides_map[annotation.image]
        regions.append(
            get_iip_window_from_annotation(slide, annotation,
                                           parameters.cytomine_zoom_level))

    return regions
def main(argv):
    with CytomineJob.from_cli(argv) as cj:
        # annotation filtering
        cj.logger.info(str(cj.parameters))

        # use only images from the current project
        cj.parameters.cytomine_id_projects = "{}".format(cj.parameters.cytomine_id_project)

        cj.job.update(progress=1, statuscomment="Preparing execution (creating folders,...).")
        base_path, downloaded = setup_classify(
            args=cj.parameters, logger=cj.job_logger(1, 40),
            dest_pattern=os.path.join("{term}", "{image}_{id}.png"),
            root_path=Path.home(), set_folder="train", showTerm=True
        )

        x = np.array([f for annotation in downloaded for f in annotation.filenames])
        y = np.array([int(os.path.basename(os.path.dirname(filepath))) for filepath in x])

        # transform classes
        cj.job.update(progress=50, statusComment="Transform classes...")
        classes = parse_domain_list(cj.parameters.cytomine_id_terms)
        positive_classes = parse_domain_list(cj.parameters.cytomine_positive_terms)
        classes = np.array(classes) if len(classes) > 0 else np.unique(y)
        n_classes = classes.shape[0]

        # filter unwanted terms
        cj.logger.info("Size before filtering:")
        cj.logger.info(" - x: {}".format(x.shape))
        cj.logger.info(" - y: {}".format(y.shape))
        keep = np.in1d(y, classes)
        x, y = x[keep], y[keep]
        cj.logger.info("Size after filtering:")
        cj.logger.info(" - x: {}".format(x.shape))
        cj.logger.info(" - y: {}".format(y.shape))

        if cj.parameters.cytomine_binary:
            cj.logger.info("Will be training on 2 classes ({} classes before binarization).".format(n_classes))
            y = np.in1d(y, positive_classes).astype(np.int)
        else:
            cj.logger.info("Will be training on {} classes.".format(n_classes))
            y = np.searchsorted(classes, y)

        # build model
        cj.job.update(progress=55, statusComment="Build model...")
        _, pyxit = build_models(
            n_subwindows=cj.parameters.pyxit_n_subwindows,
            min_size=cj.parameters.pyxit_min_size,
            max_size=cj.parameters.pyxit_max_size,
            target_width=cj.parameters.pyxit_target_width,
            target_height=cj.parameters.pyxit_target_height,
            interpolation=cj.parameters.pyxit_interpolation,
            transpose=cj.parameters.pyxit_transpose,
            colorspace=cj.parameters.pyxit_colorspace,
            fixed_size=cj.parameters.pyxit_fixed_size,
            verbose=int(cj.logger.level == 10),
            create_svm=cj.parameters.svm,
            C=cj.parameters.svm_c,
            random_state=cj.parameters.seed,
            n_estimators=cj.parameters.forest_n_estimators,
            min_samples_split=cj.parameters.forest_min_samples_split,
            max_features=cj.parameters.forest_max_features,
            n_jobs=cj.parameters.n_jobs
        )
        cj.job.update(progress=60, statusComment="Train model...")
        pyxit.fit(x, y)

        cj.job.update(progress=90, statusComment="Save model....")
        model_filename = joblib.dump(pyxit, os.path.join(base_path, "model.joblib"), compress=3)[0]

        AttachedFile(
            cj.job,
            domainIdent=cj.job.id,
            filename=model_filename,
            domainClassName="be.cytomine.processing.Job"
        ).upload()

        Property(cj.job, key="classes", value=stringify(classes)).save()
        Property(cj.job, key="binary", value=cj.parameters.cytomine_binary).save()
        Property(cj.job, key="positive_classes", value=stringify(positive_classes)).save()

        cj.job.update(status=Job.TERMINATED, status_comment="Finish", progress=100)
예제 #4
0
def main(argv):
    with CytomineJob.from_cli(argv) as cj:
        # use only images from the current project
        cj.job.update(progress=1, statusComment="Preparing execution")

        # extract images to process
        if cj.parameters.cytomine_zoom_level > 0 and (
                cj.parameters.cytomine_tile_size != 256
                or cj.parameters.cytomine_tile_overlap != 0):
            raise ValueError(
                "when using zoom_level > 0, tile size should be 256 "
                "(given {}) and overlap should be 0 (given {})".format(
                    cj.parameters.cytomine_tile_size,
                    cj.parameters.cytomine_tile_overlap))

        cj.job.update(
            progress=1,
            statusComment="Preparing execution (creating folders,...).")
        # working path
        root_path = str(Path.home())
        working_path = os.path.join(root_path, "images")
        os.makedirs(working_path, exist_ok=True)

        # load training information
        cj.job.update(progress=5,
                      statusComment="Extract properties from training job.")
        train_job = Job().fetch(cj.parameters.cytomine_id_job)
        properties = PropertyCollection(train_job).fetch().as_dict()
        binary = str2bool(properties["binary"].value)
        classes = parse_domain_list(properties["classes"].value)

        cj.job.update(progress=10, statusComment="Download the model file.")
        attached_files = AttachedFileCollection(train_job).fetch()
        model_file = attached_files.find_by_attribute("filename",
                                                      "model.joblib")
        model_filepath = os.path.join(root_path, "model.joblib")
        model_file.download(model_filepath, override=True)
        pyxit = joblib.load(model_filepath)

        # set n_jobs
        pyxit.base_estimator.n_jobs = cj.parameters.n_jobs
        pyxit.n_jobs = cj.parameters.n_jobs

        cj.job.update(progress=45, statusComment="Build workflow.")
        builder = SSLWorkflowBuilder()
        builder.set_tile_size(cj.parameters.cytomine_tile_size,
                              cj.parameters.cytomine_tile_size)
        builder.set_overlap(cj.parameters.cytomine_tile_overlap)
        builder.set_tile_builder(
            CytomineTileBuilder(working_path, n_jobs=cj.parameters.n_jobs))
        builder.set_logger(StandardOutputLogger(level=Logger.INFO))
        builder.set_n_jobs(1)
        builder.set_background_class(0)
        # value 0 will prevent merging but still requires to run the merging check
        # procedure (inefficient)
        builder.set_distance_tolerance(2 if cj.parameters.union_enabled else 0)
        builder.set_segmenter(
            ExtraTreesSegmenter(
                pyxit=pyxit,
                classes=classes,
                prediction_step=cj.parameters.pyxit_prediction_step,
                background=0,
                min_std=cj.parameters.tile_filter_min_stddev,
                max_mean=cj.parameters.tile_filter_max_mean))
        workflow = builder.get()

        area_checker = AnnotationAreaChecker(
            min_area=cj.parameters.min_annotation_area,
            max_area=cj.parameters.max_annotation_area)

        def get_term(label):
            if binary:
                if "cytomine_id_predict_term" not in cj.parameters:
                    return []
                else:
                    return [int(cj.parameters.cytomine_id_predict_term)]
            # multi-class
            return [label]

        zones = extract_images_or_rois(cj.parameters)
        for zone in cj.monitor(zones,
                               start=50,
                               end=90,
                               period=0.05,
                               prefix="Segmenting images/ROIs"):
            results = workflow.process(zone)

            annotations = AnnotationCollection()
            for obj in results:
                if not area_checker.check(obj.polygon):
                    continue
                polygon = obj.polygon
                if isinstance(zone, ImageWindow):
                    polygon = affine_transform(
                        polygon,
                        [1, 0, 0, 1, zone.abs_offset_x, zone.abs_offset_y])
                polygon = change_referential(polygon, zone.base_image.height)
                if cj.parameters.cytomine_zoom_level > 0:
                    zoom_mult = (2**cj.parameters.cytomine_zoom_level)
                    polygon = affine_transform(
                        polygon, [zoom_mult, 0, 0, zoom_mult, 0, 0])
                annotations.append(
                    Annotation(location=polygon.wkt,
                               id_terms=get_term(obj.label),
                               id_project=cj.project.id,
                               id_image=zone.base_image.image_instance.id))
            annotations.save()

        cj.job.update(status=Job.TERMINATED,
                      status_comment="Finish",
                      progress=100)
예제 #5
0
def main(argv):
    with CytomineJob.from_cli(argv) as cj:
        # annotation filtering
        cj.logger.info(str(cj.parameters))

        cj.job.update(progress=1, statuscomment="Preparing execution (creating folders,...).")
        base_path, downloaded = setup_classify(
            args=cj.parameters, logger=cj.job_logger(1, 40),
            dest_pattern=os.path.join("{term}", "{image}_{id}.png"),
            root_path=Path.home(), set_folder="train", showTerm=True
        )

        x = np.array([f for annotation in downloaded for f in annotation.filenames])
        y = np.array([int(os.path.basename(os.path.dirname(filepath))) for filepath in x])

        # transform classes
        cj.job.update(progress=50, statusComment="Transform classes...")
        classes = parse_domain_list(cj.parameters.cytomine_id_terms)
        positive_classes = parse_domain_list(cj.parameters.cytomine_positive_terms)
        classes = np.array(classes) if len(classes) > 0 else np.unique(y)
        n_classes = classes.shape[0]

        # filter unwanted terms
        cj.logger.info("Size before filtering:")
        cj.logger.info(" - x: {}".format(x.shape))
        cj.logger.info(" - y: {}".format(y.shape))
        keep = np.in1d(y, classes)
        x, y = x[keep], y[keep]
        cj.logger.info("Size after filtering:")
        cj.logger.info(" - x: {}".format(x.shape))
        cj.logger.info(" - y: {}".format(y.shape))

        labels = np.array([int(os.path.basename(f).split("_", 1)[0]) for f in x])

        if cj.parameters.cytomine_binary:
            cj.logger.info("Will be training on 2 classes ({} classes before binarization).".format(n_classes))
            y = np.in1d(y, positive_classes).astype(np.int)
        else:
            cj.logger.info("Will be training on {} classes.".format(n_classes))
            y = np.searchsorted(classes, y)

        # build model
        random_state = check_random_state(cj.parameters.seed)
        cj.job.update(progress=55, statusComment="Build model...")
        _, pyxit = build_models(
            n_subwindows=cj.parameters.pyxit_n_subwindows,
            min_size=cj.parameters.pyxit_min_size,
            max_size=cj.parameters.pyxit_max_size,
            target_width=cj.parameters.pyxit_target_width,
            target_height=cj.parameters.pyxit_target_height,
            interpolation=cj.parameters.pyxit_interpolation,
            transpose=cj.parameters.pyxit_transpose,
            colorspace=cj.parameters.pyxit_colorspace,
            fixed_size=cj.parameters.pyxit_fixed_size,
            verbose=int(cj.logger.level == 10),
            create_svm=cj.parameters.svm,
            C=cj.parameters.svm_c,
            random_state=random_state,
            n_estimators=cj.parameters.forest_n_estimators,
            min_samples_split=cj.parameters.forest_min_samples_split,
            max_features=cj.parameters.forest_max_features,
            n_jobs=cj.parameters.n_jobs
        )

        cj.job.update(progress=60, statusComment="Start cross-validation...")
        n_splits = cj.parameters.eval_k
        cv = ShuffleSplit(n_splits, test_size=cj.parameters.eval_test_fraction)
        if cj.parameters.folds == "group":
            cv = GroupKFold(n_splits)
        elif cj.parameters.folds == "stratified":
            cv = StratifiedKFold(n_splits, shuffle=True, random_state=random_state)
        elif cj.parameters.folds != "shuffle":
            raise ValueError("Unknown folding policy '{}'.".format(cj.parameters.folds))

        # Fit
        accuracies = np.zeros(n_splits)
        test_sizes = np.zeros(n_splits)

        _x, _y = pyxit.extract_subwindows(x, y)

        # CV loop
        for i, (train, test) in cj.monitor(enumerate(cv.split(x, y, labels)), start=60, end=90, prefix="cross val. iteration"):
            _pyxit = clone(pyxit)
            w_train = window_indexes(x.shape[0], train, _pyxit.n_subwindows)
            w_test = window_indexes(x.shape[0], test, _pyxit.n_subwindows)
            _pyxit.fit(x[train], y[train], _X=_x[w_train], _y=_y[w_train])
            y_pred = _pyxit.predict(x[test], _x[w_test])
            accuracies[i] = accuracy_score(y[test], y_pred)
            test_sizes[i] = test.shape[0] / float(x.shape[0])
            del _pyxit

        pyxit.fit(x, y)

        accuracy = float(np.mean(test_sizes * accuracies))
        cj.job.update(progress=90, statusComment="Accuracy: {}".format(accuracy))
        cj.job.update(progress=90, statusComment="Save model...")

        model_filename = joblib.dump(pyxit, os.path.join(base_path, "model.joblib"), compress=3)[0]

        AttachedFile(
            cj.job,
            domainIdent=cj.job.id,
            filename=model_filename,
            domainClassName="be.cytomine.processing.Job"
        ).upload()

        Property(cj.job, key="classes", value=stringify(classes)).save()
        Property(cj.job, key="binary", value=cj.parameters.cytomine_binary).save()
        Property(cj.job, key="positive_classes", value=stringify(positive_classes)).save()
        Property(cj.job, key="accuracies", value=array2str(accuracies))
        Property(cj.job, key="test_sizes", value=array2str(test_sizes))
        Property(cj.job, key="accuracy", value=accuracy)

        cj.job.update(status=Job.TERMINATED, status_comment="Finish", progress=100)
예제 #6
0
def main(argv):
    with CytomineJob.from_cli(argv) as cj:
        # annotation filtering
        cj.logger.info(str(cj.parameters))

        # use only images from the current project
        cj.parameters.cytomine_id_projects = "{}".format(cj.parameters.cytomine_id_project)

        cj.job.update(progress=1, statuscomment="Preparing execution (creating folders,...).")
        root_path = "/data/" #Path.home()
        image_path, downloaded = setup_classify(
            args=cj.parameters, logger=cj.job_logger(1, 40),
            dest_pattern="{image}_{id}.png", root_path=root_path,
            set_folder="test", showWKT=True
        )

        annotations = [annotation for annotation in downloaded for f in annotation.filenames]
        x = np.array([f for annotation in downloaded for f in annotation.filenames])

        # extract model data from previous job
        cj.job.update(progress=45, statusComment="Extract properties from training job.")
        train_job = Job().fetch(cj.parameters.cytomine_id_job)
        properties = PropertyCollection(train_job).fetch().as_dict()

        binary = str2bool(properties["binary"].value)
        if binary:
            classes = np.array([cj.parameters.cytomine_id_term_negative, cj.parameters.cytomine_id_term_positive])
        else:
            classes = np.array(parse_domain_list(properties["classes"].value))

        # extract model
        cj.job.update(progress=50, statusComment="Download the model file.")
        attached_files = AttachedFileCollection(train_job).fetch()
        model_file = attached_files.find_by_attribute("filename", "model.joblib")
        model_filepath = os.path.join(root_path, "model.joblib")
        model_file.download(model_filepath, override=True)
        pyxit = joblib.load(model_filepath)

        # set n_jobs
        pyxit.base_estimator.n_jobs = cj.parameters.n_jobs
        pyxit.n_jobs = cj.parameters.n_jobs

        cj.job.update(progress=55, statusComment="Predict...")
        if hasattr(pyxit, "predict_proba"):
            probas = pyxit.predict_proba(x)
            y_pred = np.argmax(probas, axis=1)
        else:
            probas = [None] * x.shape[0]
            y_pred = pyxit.predict(x)

        predicted_terms = classes.take(y_pred, axis=0)
        collection = AnnotationCollection()
        for i in cj.monitor(range(x.shape[0]), start=80, end=99, period=0.005, prefix="Uploading predicted terms"):
            annot, term, proba = annotations[i], predicted_terms[i], probas[i]

            parameters = {
                "location": annot.location,
                "id_image": annot.image,
                "id_project": cj.project.id,
                "id_terms": [int(term)]
            }
            if proba is not None:
                parameters["rate"] = float(np.max(proba))
            collection.append(Annotation(**parameters))
        collection.save()
        cj.job.update(status=Job.TERMINATED, status_comment="Finish", progress=100)
def main(argv):
    with CytomineJob.from_cli(argv) as cj:
        # use only images from the current project
        cj.job.update(
            progress=1,
            statusComment="Preparing execution (creating folders,...).")

        # hardcode parameter for setup classify to fetch alphamask instead of plain crop.
        cj.parameters.cytomine_download_alpha = True
        cj.parameters.cytomine_id_projects = "{}".format(cj.project.id)
        cj.job.update(progress=2, statusComment="Downloading crops.")
        base_path, downloaded = setup_classify(args=cj.parameters,
                                               logger=cj.job_logger(2, 25),
                                               dest_pattern=os.path.join(
                                                   "{term}",
                                                   "{image}_{id}.png"),
                                               root_path=str("tmp"),
                                               set_folder="train",
                                               showTerm=True)

        x = np.array(
            [f for annotation in downloaded for f in annotation.filenames])
        y = np.array([
            int(os.path.basename(os.path.dirname(filepath))) for filepath in x
        ])

        # transform classes
        cj.job.update(progress=25, statusComment="Transform classes...")
        positive_terms = parse_domain_list(
            cj.parameters.cytomine_id_positive_terms)
        selected_terms = parse_domain_list(cj.parameters.cytomine_id_terms)
        is_binary = len(selected_terms) > 0 and len(positive_terms) > 0
        foreground_terms = np.unique(y) if len(
            selected_terms) == 0 else np.array(selected_terms)
        if len(positive_terms) == 0:
            classes = np.hstack((np.zeros((1, ), dtype=int), foreground_terms))
        else:  # binary
            foreground_terms = np.array(positive_terms)
            classes = np.array([0, 1])
            # cast to binary
            fg_idx = np.in1d(y, list(foreground_terms))
            bg_idx = np.in1d(
                y, list(set(selected_terms).difference(foreground_terms)))
            y[fg_idx] = 1
            y[bg_idx] = 0

        n_classes = classes.shape[0]

        # filter unwanted terms
        cj.logger.info("Size before filtering:")
        cj.logger.info(" - x: {}".format(x.shape))
        cj.logger.info(" - y: {}".format(y.shape))
        keep = np.in1d(y, classes)
        x, y = x[keep], y[keep]
        cj.logger.info("Size after filtering:")
        cj.logger.info(" - x: {}".format(x.shape))
        cj.logger.info(" - y: {}".format(y.shape))

        if x.shape[0] == 0:
            raise ValueError("No training data")

        if is_binary:
            # 0 (background) vs 1 (classes in foreground )
            cj.logger.info("Binary segmentation:")
            cj.logger.info("> class '0': background & terms {}".format(
                set(selected_terms).difference(positive_terms)))
            cj.logger.info("> class '1': {}".format(set(foreground_terms)))
        else:
            # 0 (background vs 1 vs 2 vs ... n (n classes from cytomine_id_terms)
            cj.logger.info("Multi-class segmentation:")
            cj.logger.info("> background class '0'")
            cj.logger.info("> term classes: {}".format(set(foreground_terms)))

        # build model
        cj.job.update(progress=27, statusComment="Build model...")
        et, pyxit = build_models(
            n_subwindows=cj.parameters.pyxit_n_subwindows,
            min_size=cj.parameters.pyxit_min_size,
            max_size=cj.parameters.pyxit_max_size,
            target_width=cj.parameters.pyxit_target_width,
            target_height=cj.parameters.pyxit_target_height,
            interpolation=cj.parameters.pyxit_interpolation,
            transpose=cj.parameters.pyxit_transpose,
            colorspace=cj.parameters.pyxit_colorspace,
            fixed_size=cj.parameters.pyxit_fixed_size,
            verbose=int(cj.logger.level == 10),
            random_state=cj.parameters.seed,
            n_estimators=cj.parameters.forest_n_estimators,
            min_samples_split=cj.parameters.forest_min_samples_split,
            max_features=cj.parameters.forest_max_features,
            n_jobs=cj.parameters.n_jobs)

        # to extract the classes form the mask
        pyxit.get_output = _get_output_from_mask

        # extract subwindows manually to avoid class problem
        cj.job.update(progress=30, statusComment="Extract subwindwos...")
        _x, _y = pyxit.extract_subwindows(x, y)

        actual_classes = np.unique(_y)
        if actual_classes.shape[0] != classes.shape[0]:
            raise ValueError(
                "Some classes are missing from the dataset: actual='{}', expected='{}'"
                .format(",".join(map(str, actual_classes)),
                        ",".join(map(str, classes))))

        cj.logger.info("Size of actual training data:")
        cj.logger.info(" - x   : {}".format(_x.shape))
        cj.logger.info(" - y   : {}".format(_y.shape))
        cj.logger.info(" - dist: {}".format(", ".join([
            "{}: {}".format(v, c)
            for v, c in zip(*np.unique(_y, return_counts=True))
        ])))

        cj.job.update(progress=60, statusComment="Train model...")
        # "re-implement" pyxit.fit to avoid incorrect class handling
        pyxit.classes_ = classes
        pyxit.n_classes_ = n_classes
        pyxit.base_estimator.fit(_x, _y)

        cj.job.update(progress=90, statusComment="Save model....")
        model_filename = joblib.dump(pyxit,
                                     os.path.join(base_path, "model.joblib"),
                                     compress=3)[0]

        AttachedFile(cj.job,
                     domainIdent=cj.job.id,
                     filename=model_filename,
                     domainClassName="be.cytomine.processing.Job").upload()

        Property(cj.job, key="classes", value=stringify(classes)).save()
        Property(cj.job, key="binary", value=is_binary).save()

        cj.job.update(status=Job.TERMINATED,
                      status_comment="Finish",
                      progress=100)