Ejemplo n.º 1
0
def get_hosts():

    logger.debug(f"Getting host configs from {HOSTS_CONFIGS_YAML}")

    try:
        with open(HOSTS_CONFIGS_YAML, "r") as f:
            hosts_configs_ = yaml.load(f, Loader=yaml.FullLoader)

    except FileNotFoundError as ex:
        logger.exception(ex)
        logger.warning(f"Please create a yaml file at {HOSTS_CONFIGS_YAML}.")
        raise HostsConfigsError(f"FileNotFound {HOSTS_CONFIGS_YAML=}")

    assert hosts_configs_ is not None
    assert "hosts" in hosts_configs_, f"{hosts_configs_.keys()}"

    hosts_ = hosts_configs_["hosts"]

    assert isinstance(hosts_, dict), f"{type(hosts_)=}"

    for hostname, host in hosts_.items():

        assert isinstance(hostname, str), f"{type(hostname)=}"
        assert isinstance(host, dict), f"{hostname=} {type(host)=}"
        assert hostname == host["hostname"]

        hosts_[hostname] = Host(**host)

    return hosts_
Ejemplo n.º 2
0
    SequentialGridPosition
)
from tomo2seg import volume_sequence as t2s_volseq
from tomo2seg.model import Model as T2SModel, Type as T2SModelType, estimate_max_batch_size_per_gpu
from tomo2seg import callbacks as tomo2seg_callbacks
from tomo2seg import losses as tomo2seg_losses
from tomo2seg import schedule as tomo2seg_schedule
from tomo2seg import utils as tomo2seg_utils
from tomo2seg import slackme
from tomo2seg import train
from tomo2seg.train import Args, TrainingFinished, FailedToFindBatchSize
from tomo2seg import hosts as t2s_hosts

# # Args

logger.warning(Args.versions)

# [manual-input]

args = train.Args(
    script_name="train-11-paper-tomo-modelstripping-03.py",
    volume_name=t2s_datasets.VOLUME_COMPOSITE_V1[0],
    volume_version=t2s_datasets.VOLUME_COMPOSITE_V1[1],
    labels_version=t2s_datasets.VOLUME_COMPOSITE_V1_LABELS_REFINED3,

    batch_size_per_gpu=5,

    random_state_seed=42,
    #     runid = 1610978353,
    runid=None,
    host=None,  # find it with socket.hostname
Ejemplo n.º 3
0
# it has to be multiple of 16 because of the 4 cascaded 2x2-strided 2x2-downsamplings in u-net
if model_type == ModelType.input2d:
    dims_multiple_16 = [int(16 * np.floor(dim / 16)) for dim in partition.shape[:2]]
    crop_shape = tuple(dims_multiple_16 + [1])  # x-axis, y-axis, z-axis

elif model_type == ModelType.input2halfd:
    raise NotImplemented()
    
elif model_type == ModelType.input3d:
    dims_multiple_16 = [
        int(16 * np.floor(dim / 16)) for dim in partition.shape
    ]
    nvoxels_per_crop = dims_multiple_16[0] * dims_multiple_16[1] * dims_multiple_16[2]
    
    if nvoxels_per_crop > DEFAULT_3D_DIM ** 3:
        logger.warning(f"If {dims_multiple_16=} ==> {nvoxels_per_crop=}, which is too big. Using default dimension {DEFAULT_3D_DIM=}**3.")
        dims_multiple_16 = [DEFAULT_3D_DIM, DEFAULT_3D_DIM, DEFAULT_3D_DIM]
    crop_shape = tuple(dims_multiple_16)  # x-axis, y-axis, z-axis

logger.debug(f"{dims_multiple_16=}")
logger.debug(f"{crop_shape=}")

n_steps = tuple(
    int(np.ceil(vol_dim / crop_dim))
    for vol_dim, crop_dim in zip(volume_shape, crop_shape)
)
logger.debug(f"{n_steps=}")

def get_coordinates_iterator(n_steps_):
    assert len(n_steps_) == 3
    return itertools.product(*(range(n_steps_[dim]) for dim in range(3)))
Ejemplo n.º 4
0
    def get_model():

        try:
            best_autosaved_model_path = tomo2seg_model.autosaved2_best_model_path  # it's a property
            assert best_autosaved_model_path is not None, "no-autosaved2"
        except ValueError as ex:

            if ex.args[0] != "min() arg is an empty sequence":
                raise ex

            logger.warning(
                f"{tomo2seg_model.name=} did not use autosaved2 apparently, falling back to autosaved."
            )
            best_autosaved_model_path = tomo2seg_model.autosaved_model_path

        except AssertionError as ex:

            if ex.args[0] != "no-autosaved2":
                raise ex

            logger.warning(
                f"{tomo2seg_model.name=} did not use autosaved2 apparently, falling back to autosaved."
            )
            best_autosaved_model_path = tomo2seg_model.autosaved_model_path

        print(best_autosaved_model_path)
        logger.info(
            f"Loading model from autosaved file: {best_autosaved_model_path.name}"
        )

        model = tf.keras.models.load_model(str(best_autosaved_model_path),
                                           compile=False)

        logger.debug(
            "Changing the model's input type to accept any size of crop.")

        in_ = model.layers[0]
        in_shape = in_.input_shape[0]
        input_n_channels = in_shape[-1]

        logger.debug(f"{input_n_channels=}")

        if input_n_channels > 1:

            if args.model_type == Args.ModelType.input2halfd:
                if len(in_shape) != 4:
                    raise f"len({in_shape=}) > 4, so this model must be multi-channel. Not supported yet..."
            else:
                raise NotImplementedError(f"{input_n_channels=} > 1")

        # make it capable of getting any dimension in the input
        # "-2" = 1 for the batch size, 1 for the nb.channels
        anysize_target_shape = (len(in_shape) - 2) * [None] + [
            input_n_channels
        ]
        logger.debug(f"{anysize_target_shape=}")

        anysize_input = layers.Input(shape=anysize_target_shape,
                                     name="input_any_image_size")
        logger.debug(f"{anysize_input=}")

        model.layers[0] = anysize_input

        # this doesn't really matter bc this script will not fit the model
        optimizer = optimizers.Adam()
        loss_func = keras_custom_loss.jaccard2_loss

        logger.debug("Starting model compilation")
        model.compile(loss=loss_func, optimizer=optimizer)
        logger.debug("Done!")

        return model
Ejemplo n.º 5
0
    logger.info("done")

    # modify the data if necessary
    #
    # mostly the 2halfd...

    if args.model_type == Args.ModelType.input2halfd:

        try:
            # this is to prevent running the padding twice in the notebook
            half_pad

        except NameError:

            logger.warning(
                "Modifying the data to add a 'reflect' half padding to the data. Only z-layers 2.5d models are supported!"
            )

            nlayers_2halfd = model.layers[0].input_shape[0][-1]

            predicted_layer_idx_2halfd = nlayers_2halfd // 2

            slice_2halfd_data_predicted_layer = slice(
                predicted_layer_idx_2halfd, predicted_layer_idx_2halfd + 1)

            logger.debug(f"{nlayers_2halfd=}")
            logger.debug(f"{predicted_layer_idx_2halfd=}")
            logger.debug(f"{slice_2halfd_data_predicted_layer=}")

            assert nlayers_2halfd % 2 == 1, f"{nlayers_2halfd=} should be an odd number"
Ejemplo n.º 6
0
def multiclass_roc_auc_score(
    y_true,
    y_score,
    labels,
    multi_class,
    average,
    sample_weight=None,
    invalid_proba_tolerance: float = 1e-6,
):
    """Multiclass roc auc score (copied from sklearn)

    Parameters
    ----------
    y_true : array-like of shape (n_samples,)
        True multiclass labels.

    y_score : array-like of shape (n_samples, n_classes)
        Target scores corresponding to probability estimates of a sample
        belonging to a particular class

    labels : array, shape = [n_classes] or None, optional (default=None)
        List of labels to index ``y_score`` used for multiclass. If ``None``,
        the lexical order of ``y_true`` is used to index ``y_score``.

    multi_class : string, 'ovr' or 'ovo'
        Determines the type of multiclass configuration to use.
        ``'ovr'``:
            Calculate metrics for the multiclass case using the one-vs-rest
            approach.
        ``'ovo'``:
            Calculate metrics for the multiclass case using the one-vs-one
            approach.

    average : 'macro' or 'weighted', optional (default='macro')
        Determines the type of averaging performed on the pairwise binary
        metric scores
        ``'macro'``:
            Calculate metrics for each label, and find their unweighted
            mean. This does not take label imbalance into account. Classes
            are assumed to be uniformly distributed.
        ``'weighted'``:
            Calculate metrics for each label, taking into account the
            prevalence of the classes.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    :param invalid_proba_tolerance: float in [0, 1]
        The proportion of samples that can eventually be ignored if their class scores do not sum up to 1.
    """
    # validation of the input y_score
    are_close = np.isclose(1, y_score.sum(axis=1))

    # I added this try-except to deal with cases where a very small amount of voxels have an issue
    # to sum the probabilities to 1, which might happen (probably, i suppose) because I use float16 instead of 64
    try:
        if not np.all(are_close):
            raise ValueError(
                "Target scores need to be probabilities for multiclass "
                "roc_auc, i.e. they should sum up to 1.0 over classes")

    except ValueError as ex:

        logger.exception(ex)

        assert 0 <= invalid_proba_tolerance <= 1, f"{invalid_proba_tolerance=}"

        nsamples_not_close = int((~are_close).sum())
        percentage_samples_not_close = nsamples_not_close / are_close.size

        logger.warning(
            f"{nsamples_not_close=} ({percentage_samples_not_close=:.7%})")

        if percentage_samples_not_close > invalid_proba_tolerance:
            raise ValueError(
                f"Too many samples are not close 1 {nsamples_not_close=} {percentage_samples_not_close=:.7%} {invalid_proba_tolerance=:.7%}."
            )

        else:
            logger.warning(
                f"The amount of probabilities not summing up to 1 will be tolerated "
                f"{percentage_samples_not_close=:.7%} {invalid_proba_tolerance=:.7%}. "
                f"The bad samples will be ignored!")

            y_true = y_true[are_close]
            y_score = y_score[are_close, :]

    # validation for multiclass parameter specifications
    average_options = ("macro", "weighted")
    if average not in average_options:
        raise ValueError("average must be one of {0} for "
                         "multiclass problems".format(average_options))

    multiclass_options = ("ovo", "ovr")
    if multi_class not in multiclass_options:
        raise ValueError("multi_class='{0}' is not supported "
                         "for multiclass ROC AUC, multi_class must be "
                         "in {1}".format(multi_class, multiclass_options))

    from sklearn.utils import column_or_1d
    from sklearn.preprocessing._label import _encode
    from sklearn.metrics._base import _average_multiclass_ovo_score
    from sklearn.preprocessing import label_binarize
    from sklearn.metrics._ranking import _binary_roc_auc_score
    from sklearn.metrics._base import _average_binary_score

    if labels is not None:
        labels = column_or_1d(labels)
        classes = _encode(labels)
        if len(classes) != len(labels):
            raise ValueError("Parameter 'labels' must be unique")
        if not np.array_equal(classes, labels):
            raise ValueError("Parameter 'labels' must be ordered")
        if len(classes) != y_score.shape[1]:
            raise ValueError(
                "Number of given labels, {0}, not equal to the number "
                "of columns in 'y_score', {1}".format(len(classes),
                                                      y_score.shape[1]))
        if len(np.setdiff1d(y_true, classes)):
            raise ValueError(
                "'y_true' contains labels not in parameter 'labels'")
    else:
        classes = _encode(y_true)
        if len(classes) != y_score.shape[1]:
            raise ValueError(
                "Number of classes in y_true not equal to the number of "
                "columns in 'y_score'")

    if multi_class == "ovo":
        if sample_weight is not None:
            raise ValueError("sample_weight is not supported "
                             "for multiclass one-vs-one ROC AUC, "
                             "'sample_weight' must be None in this case.")
        _, y_true_encoded = _encode(y_true, uniques=classes, encode=True)
        # Hand & Till (2001) implementation (ovo)
        return _average_multiclass_ovo_score(_binary_roc_auc_score,
                                             y_true_encoded,
                                             y_score,
                                             average=average)
    else:
        # ovr is same as multi-label
        y_true_multilabel = label_binarize(y_true, classes=classes)
        return _average_binary_score(_binary_roc_auc_score,
                                     y_true_multilabel,
                                     y_score,
                                     average,
                                     sample_weight=sample_weight)