Esempio n. 1
0
def plot_val_all_labels(db: MongoCosemDB,
                        setup: str,
                        path: str,
                        threshold: int = 127,
                        filetype: str = "pdf"):
    """
    Plot validation graphs for all labels corresponding to a specific setup. Will be saved to files in `path`.

    Args:
        db: Database with crop information and evaluation results.
        setup: Setup to plot validation results for.
        path: Path in which to save all the plots.
        threshold: Threshold to be applied on top of raw predictions to generate binary segmentations for evaluation.
        filetype: Filetype for saving plots.
    """
    valcrops = db.get_all_validation_crops()
    labels = get_unet_setup(setup).labels
    for lbl in labels:
        in_crop = [check_label_in_crop(lbl, crop) for crop in valcrops]
        if any(in_crop):
            file = os.path.join(
                path,
                "{label:}_{setup:}.{filetype:}".format(label=lbl.labelname,
                                                       setup=setup,
                                                       filetype=filetype))
            plot_val(db, setup, lbl.labelname, file, threshold=threshold)
Esempio n. 2
0
def autodiscover_label_to_crops(
        setup: str,
        db: CNNectome.utils.cosem_db.MongoCosemDB) -> Dict[str, List[str]]:
    """
    For each label trained by the setup get the list of validation crops that contain that label.

    Args:
        setup: Setup for which to get the label to crop dictionary.
        db: Database with crop information and evaluation results.

    Returns:
        Dictionary mapping labelnames to crop numbers.
    """
    labels = autodiscover_labels(setup, training_version=db.training_version)

    label_to_cropnos = {}
    crops = db.get_all_validation_crops()
    for lbl in labels:
        for crop in crops:
            if check_label_in_crop(lbl, crop):
                try:
                    label_to_cropnos[lbl.labelname].append(crop["number"])
                except KeyError:
                    label_to_cropnos[lbl.labelname] = [crop["number"]]
    if len(label_to_cropnos) < len(labels):
        setup_labels = set([lbl.labelname for lbl in labels])
        crop_labels = set(label_to_cropnos.keys())
        for lblname in setup_labels - crop_labels:
            print("{0:} not in any crop".format(lblname))
    return label_to_cropnos
Esempio n. 3
0
def compare_generalization(
    db: cosem_db.MongoCosemDB,
    metric: str,
    crops: Optional[Sequence[Union[str, int]]] = None,
    tol_distance: int = 40,
    clip_distance: int = 200,
    threshold: int = 200,
    raw_ds: Union[None, str, Sequence[str]] = "volumes/raw/s0",
) -> List[List[Optional[Dict[str, Any]]]]:
    """
    Evaluate generalization experiments for er and mito.

    Args:
        db: Database with crop information and evaluation result.
        metric: Metric to use for comparison.
        crops: List of crops to run comparison on. If None will use all validation crops.
        tol_distance: tolerance distance when using a metric with tolerance distance, otherwise not used.
        clip_distance: clip distance when using a metric with clip distance
        threshold: Threshold to have been applied on top of raw predictions.
        raw_ds: raw dataset to run prediction on

    Returns:
        List of best results for setups involved in generalization experiments. Each result is a list with just one
        dictionary.
    """

    setups = ["setup03", "setup61", "setup62", "setup63", "setup64"]
    labels = ["er", "mito", "nucleus", "plasma_membrane"]
    if crops is None:
        crops = [c["number"] for c in db.get_all_validation_crops()]
    results = []
    for lbl in labels:
        for setup in setups:
            for cropno in crops:
                if crop_utils.check_label_in_crop(
                        hierarchy.hierarchy[lbl],
                        db.get_crop_by_number(cropno)):
                    results.append([
                        analyze_evals.best_result(db,
                                                  lbl, [setup],
                                                  cropno,
                                                  metric,
                                                  raw_ds=raw_ds,
                                                  tol_distance=tol_distance,
                                                  clip_distance=clip_distance,
                                                  threshold=threshold,
                                                  test=False)
                    ])
    return results
Esempio n. 4
0
def best_8nm(db: cosem_db.MongoCosemDB,
             metric: str,
             crops: Optional[Sequence[Union[str, int]]],
             tol_distance: int = 40,
             clip_distance: int = 200,
             threshold: int = 200,
             mode: str = "across-setups",
             raw_ds: Union[None, str,
                           Sequence[str]] = "volumes/subsampled/raw/0",
             test: bool = False) -> List[List[Dict[str, Any]]]:
    """
    Get the best results for the 8nm setups.

    Args:
        db: Database with crop information and evaluation result.
        metric: Metric to report and use for optimiation of iteration/setup.
        crops: List of crops to run comparison on. If None will use all validation crops.
        tol_distance: tolerance distance when using a metric with tolerance distance, otherwise not used.
        clip_distance: clip distance when using a metric with clip distance, otherwise not used.
        threshold: Threshold to have been applied on top of raw predictions.
        mode: "across-setups" to optimize both setup+iteration or "per-setup" to optimize iteration for a fixed setup.
        raw_ds: raw dataset to run prediction on.
        test: whether to run in test mode.

    Returns:
        List of best results. Each result is a list with just one dictionary.
    """
    if mode == "across-setups":
        setups = [
            "setup04", "setup26.1", "setup28", "setup32", "setup36", "setup46",
            "setup48"
        ]
        labels = [
            "ecs", "plasma_membrane", "mito", "mito_membrane", "mito_DNA",
            "vesicle", "vesicle_membrane", "MVB", "MVB_membrane", "lysosome",
            "lysosome_membrane", "er", "er_membrane", "ERES", "nucleus",
            "microtubules", "microtubules_out"
        ]
    elif mode == "per-setup":
        setups = [
            "setup04", "setup04", "setup04", "setup04", "setup04", "setup04",
            "setup04", "setup04", "setup04", "setup04", "setup04", "setup04",
            "setup04", "setup04", "setup26.1", "setup26.1", "setup26.1",
            "setup28", "setup28", "setup32", "setup32", "setup36", "setup46",
            "setup46", "setup48", "setup48", "setup48", "setup48"
        ]
        labels = [
            "ecs", "plasma_membrane", "mito", "mito_membrane", "vesicle",
            "vesicle_membrane", "MVB", "MVB_membrane", "er", "er_membrane",
            "ERES", "nucleus", "microtubules", "microtubules_out", "mito",
            "mito_membrane", "mito_DNA", "er", "er_membrane", "microtubules",
            "microtubules_out", "nucleus", "ecs", "plasma_membrane", "MVB",
            "MVB_membrane", "lysosome", "lysosome_membrane"
        ]
    else:
        raise ValueError("unknown mode {0:}".format(mode))

    results = []
    if crops is None:
        crops = [c["number"] for c in db.get_all_validation_crops()]
    for cropno in crops:
        if mode == "across-setups":
            for lbl in labels:
                if crop_utils.check_label_in_crop(
                        hierarchy.hierarchy[lbl],
                        db.get_crop_by_number(cropno)):
                    results.append([
                        analyze_evals.best_result(db,
                                                  lbl,
                                                  setups,
                                                  cropno,
                                                  metric,
                                                  raw_ds=raw_ds,
                                                  tol_distance=tol_distance,
                                                  clip_distance=clip_distance,
                                                  threshold=threshold,
                                                  test=test)
                    ])
        elif mode == "per-setup":
            for setup, lbl in zip(setups, labels):
                if crop_utils.check_label_in_crop(
                        hierarchy.hierarchy[lbl],
                        db.get_crop_by_number(cropno)):
                    results.append([
                        analyze_evals.best_result(db,
                                                  lbl, [setup],
                                                  cropno,
                                                  metric,
                                                  raw_ds=raw_ds,
                                                  tol_distance=tol_distance,
                                                  clip_distance=clip_distance,
                                                  threshold=threshold,
                                                  test=test)
                    ])
    return results
Esempio n. 5
0
def _best_automatic(db: cosem_db.MongoCosemDB,
                    label: str,
                    setups: Sequence[str],
                    cropno: Union[Sequence[str], Sequence[int]],
                    metric: str,
                    raw_ds: Optional[Sequence[str]] = None,
                    tol_distance: int = 40,
                    clip_distance: int = 200,
                    threshold: int = 127,
                    test: bool = False) -> Dict[str, Any]:
    metric_params = dict()
    metric_params["clip_distance"] = clip_distance
    metric_params["tol_distance"] = tol_distance
    filtered_params = filter_params(metric_params, metric)

    setups = [
        setup for setup in setups
        if label in [lbl.labelname for lbl in autodiscover_labels(setup)]
    ]
    # in test mode the remaining validation crops are used for determining best configuration
    if test:
        cropnos_query = [
            crop["number"] for crop in db.get_all_validation_crops()
        ]
        for cno in cropno:
            cropnos_query.pop(cropnos_query.index(str(cno)))
        cropnos_query = [
            cno for cno in cropnos_query if check_label_in_crop(
                hierarchy[label], db.get_crop_by_number(cno))
        ]
    else:
        cropnos_query = cropno
    if len(cropnos_query) == 0:  # if no crops remain return without result
        final = {
            "value": None,
            "iteration": None,
            "label": label,
            "metric": metric,
            "metric_params": filtered_params,
            "refined": False,
            "threshold": threshold,
            "setup": setups[0] if len(setups) == 1 else None,
            "crop": cropno[0] if len(cropno) == 1 else {
                "$in": cropno
            }
        }
        if raw_ds is not None:
            final["raw_dataset"] = raw_ds[0] if len(raw_ds) == 1 else {
                "$in": raw_ds
            }
        return final

    # find max iterations and put corresponding conditions in query
    conditions = []
    for setup in setups:  # several setups if both iteration and setup are being optimized ("across_setups")

        max_its = []

        for cno in cropnos_query:
            maxit_query = {
                "label": label,
                "crop": str(cno),
                "threshold": threshold,
                "refined": False,
                "setup": setup
            }
            if raw_ds is not None:
                maxit_query["raw_dataset"] = {"$in": raw_ds}
            maxit, valid = max_iteration_for_analysis(maxit_query, db)
            max_its.append(maxit)

        conditions.append({
            "setup": setup,
            "iteration": {
                "$lte": max(max_its)
            }
        })

    if len(conditions) > 1:
        match_query = {"$or": conditions}
    else:
        match_query = conditions[0]

    # prepare aggregation of best configuration on the database
    aggregator = []

    # match
    match_query.update({
        "crop": {
            "$in": cropnos_query
        },
        "label": label,
        "metric": metric,
        "metric_params": filtered_params,
        "threshold": threshold,
        "value": {
            "$ne": np.nan
        },
        "refined": False
    })
    if raw_ds is not None:
        match_query["raw_dataset"] = {"$in": raw_ds}
    aggregator.append({"$match": match_query})

    # for each combination of setup and iteration, and raw_dataset if relevant, average across the matched results
    crossval_group = {
        "_id": {
            "setup": "$setup",
            "iteration": "$iteration"
        },
        "score": {
            "$avg": "$value"
        }
    }
    if raw_ds is not None:
        crossval_group["_id"]["raw_dataset"] = "$raw_dataset"
    aggregator.append({"$group": crossval_group})

    # sort (descending/ascending determined by metric) by averaged score
    aggregator.append(
        {"$sort": {
            "score": sorting(metric),
            "_id.iteration": 1
        }})

    # only need max so limit results to one (mongodb can take advantage of this for sort)
    aggregator.append({"$limit": 1})

    # extract setup and iteration, and raw_dataset if relevant, in the end
    projection = {
        "setup": "$_id.setup",
        "iteration": "$_id.iteration",
        "_id": 0
    }
    if raw_ds is not None:
        projection["raw_dataset"] = "$_id.raw_dataset"
    aggregator.append({"$project": projection})

    # run the aggregation on the evaluation database
    col = db.access("evaluation", db.training_version)
    best_config = list(col.aggregate(aggregator))

    if len(best_config) == 0:  # if no results are found, return at this point
        final = match_query.copy()
        # final result should have actual cropno
        if len(cropno) == 1:
            final["crop"] = cropno[0]
        else:
            final["crop"] = {"$in": cropno}
        final.update({"setup": None, "value": None, "iteration": None})
        return final
    else:
        best_config = best_config[0]

    all_best = []
    for cno in cropno:
        query_best = {
            "label": label,
            "crop": str(cno),
            "metric": metric,
            "setup": best_config["setup"],
            "metric_params": filtered_params,
            "threshold": threshold,
            "iteration": best_config["iteration"],
            "refined": False
        }
        if raw_ds is not None:
            query_best["raw_dataset"] = best_config["raw_dataset"]
        best_this = db.find(query_best)
        if len(best_this) != 1:
            print("query:", query_best)
            print("results:", list(best_this))
        assert len(best_this) == 1, "Got more than one result for best"
        all_best.append(best_this[0])

    # average results for the case of several crops
    final = dict()
    final["value"] = np.mean([ab["value"] for ab in all_best])

    # assemble all entries that are shared by the best result for each crop
    all_keys = set(
        all_best[0].keys()).intersection(*(d.keys()
                                           for d in all_best)) - {"value"}
    for k in all_keys:
        if all([ab[k] == all_best[0][k] for ab in all_best]):
            final[k] = all_best[0][k]
    return final