예제 #1
0
def get_hosts():

    logger.debug(f"Getting host configs from {HOSTS_CONFIGS_YAML}")

    try:
        with open(HOSTS_CONFIGS_YAML, "r") as f:
            hosts_configs_ = yaml.load(f, Loader=yaml.FullLoader)

    except FileNotFoundError as ex:
        logger.exception(ex)
        logger.warning(f"Please create a yaml file at {HOSTS_CONFIGS_YAML}.")
        raise HostsConfigsError(f"FileNotFound {HOSTS_CONFIGS_YAML=}")

    assert hosts_configs_ is not None
    assert "hosts" in hosts_configs_, f"{hosts_configs_.keys()}"

    hosts_ = hosts_configs_["hosts"]

    assert isinstance(hosts_, dict), f"{type(hosts_)=}"

    for hostname, host in hosts_.items():

        assert isinstance(hostname, str), f"{type(hostname)=}"
        assert isinstance(host, dict), f"{hostname=} {type(host)=}"
        assert hostname == host["hostname"]

        hosts_[hostname] = Host(**host)

    return hosts_
예제 #2
0
def adjacent_layers_correlation(labels: ndarray, axis: int, nslices: int,
                                correlation_func: Callable[[ndarray, ndarray],
                                                           float]):

    assert labels.ndim == 3, f"{labels.ndim=}"

    assert 0 <= axis <= 2, f"{axis=}"
    logger.debug(f"{axis=}")

    axis_size = labels.shape[axis]
    logger.debug(f"{axis_size=}")

    assert 1 <= nslices <= axis_size - 1, f"{nslices}"
    logger.debug(f"{nslices=}")

    logger.debug(f"{correlation_func=}")

    def get_slice(idx: int) -> slice:
        slice_ = 3 * [slice(None, None, None)]
        slice_[axis] = slice(idx, idx + nslices, None)
        return tuple(slice_)

    corrs = [
        correlation_func(
            labels[get_slice(idx)],
            labels[get_slice(idx + 1)],
        ) for idx in range(axis_size - nslices)
    ]

    assert all(0 <= val <= 1
               for val in corrs), f"Issue with {correlation_func=}"

    return corrs
예제 #3
0
def default_url():

    logger.debug(f"Getting default url from file {SLACK_JSON}")

    try:
        with open(SLACK_JSON, "r") as f:
            slack_json = json.load(f)

    except FileNotFoundError:
        logger.exception(
            f"Please create a json at {SLACK_JSON} with the key `webhook_url`."
        )
        raise SlackJsonError("FileNotFound")

    try:
        return slack_json["webhook_url"]

    except KeyError:
        logger.exception(
            f"Please create the key `webhook_url` in the slack.json file.")
        raise SlackJsonError("MissingWebhookUrl")
예제 #4
0
def get_model():
    
    logger.info(f"Loading model from autosaved file: {tomo2seg_model.autosaved_model_path.name}")
    
    model = tf.keras.models.load_model(
        tomo2seg_model.autosaved_model_path_str,
        compile=False
    )
    
    logger.debug("Changing the model's input type to accept any size of crop.")
    
    in_ = model.layers[0]
    in_shape = in_.input_shape[0]
    input_n_channels = in_shape[-1:]

    logger.debug(f"{input_n_channels=}")
    
    # make it capable of getting any dimension in the input
    anysize_input = layers.Input(
        shape=[None, None, None] + list(input_n_channels),
        name="input_any_image_size"
    )
    
    logger.debug(f"{anysize_input=}")
    
    model.layers[0] = anysize_input
    
    # todo keep this somewhere instead of copying and pasting
    optimizer = optimizers.Adam()
    loss_func = keras_custom_loss.jaccard2_loss

    model.compile(loss=loss_func, optimizer=optimizer)
    
    return model
예제 #5
0
def get_2d_blob_props(
    label_volume: ndarray,
    data_volume: ndarray,
    axes: Tuple[int] = (0, 1, 2),
    parallel_nprocs: Optional[int] = None,
) -> DataFrame:
    assert min(axes) >= 0, f"{min(axes)=}"
    assert max(axes) <= 2, f"{max(axes)=}"

    all_blob_props = []

    for axis in axes:
        logger.info(f"computing 2d_blob_props on plane normal to {axis=}")
        all_blob_props.append(
            get_slice_props_parallel(
                label_volume,
                data_volume,
                normal_axis=axis,
                nprocs=parallel_nprocs,
            ))

    logger.debug("Converting 2d blob props dicts to data frames.")

    for axis in axes:

        blob_props = all_blob_props[axis]

        ref_shape = len(blob_props["area"])

        for k in blob_props.keys():
            assert (shap := len(
                blob_props[k])) == ref_shape, f"{k=} {shap=} {ref_shape=}"

        all_blob_props[axis] = pd.DataFrame(blob_props)

        logger.debug(f"{all_blob_props[axis].shape=}")

    return pd.concat(all_blob_props, axis=0)
예제 #6
0
def get_slice_props_parallel(
    label_volume: ndarray,
    data_volume: ndarray,
    normal_axis: int,
    nprocs: Optional[int] = None,
    chunksize: int = 10,
) -> Dict[str, List[Union[int, float]]]:

    import sys
    assert label_volume.shape == data_volume.shape
    assert 0 <= normal_axis <= 2

    nslices = label_volume.shape[normal_axis]

    logger.debug(f"{normal_axis=} => {nslices=}")

    slices = []

    for slice_idx in range(nslices):

        slice_ = 3 * [slice(None)]
        slice_[normal_axis] = slice(slice_idx, slice_idx + 1)
        slice_ = tuple(slice_)

        slices.append(slice_)

    func = partial(
        _get_slice_props_parallel_do,
        label_volume=label_volume,
        data_volume=data_volume,
        normal_axis=normal_axis,
    )

    logger.debug(f"processing slices {nprocs=}")

    blobs2d_props = []

    with Pool(nprocs) as p:

        for i, props in enumerate(
                p.imap_unordered(func, enumerate(slices), chunksize=chunksize),
                1):
            sys.stderr.write(f'\rdone {i / nslices:.0%}')
            blobs2d_props.append(props)

    logger.debug("done => merging all slices' props dicts")

    return {
        k:
        np.concatenate([prop_dic[k] for prop_dic in blobs2d_props]).tolist()
        for k in blobs2d_props[0].keys()
    }
예제 #7
0
def notify(msg: str, url: Optional[str] = None):
    logger.debug("Sending slack notification.")

    if url is not None:
        logger.debug("A non-default url was given.")

    else:
        try:
            url = default_url()
        except SlackJsonError:
            logger.exception(
                "A notification could not be sent because the webhook url could not be found. "
                "Please correct your slack.json file.")

    try:
        resp = requests.post(url=url, json={"text": msg}, timeout=10)
        logger.debug(f"{resp.text=}")

    except Exception as ex:
        logger.exception("Something went wrong in the slack module.")
예제 #8
0
def class_counts_per_layer(labels: ndarray, axis: int,
                           nclasses: int) -> ndarray:

    assert labels.ndim == 3, f"{labels.ndim=}"

    assert 0 <= axis <= 2, f"{axis=}"
    logger.debug(f"{axis=}")

    axis_size = labels.shape[axis]
    logger.debug(f"{axis_size=}")

    counts = np.empty((axis_size, nclasses), dtype=np.int64)
    logger.debug(f"{counts.shape=}")

    for label_idx in range(3):

        axes_sum = [0, 1, 2]
        axes_sum.pop(axis)

        counts[:, label_idx] = np.sum((labels == label_idx),
                                      axis=tuple(axes_sum))

    return counts
예제 #9
0
    logger.setLevel(logging.DEBUG)
    random_state = np.random.RandomState(args.random_state_seed)

    n_gpus = len(tf.config.list_physical_devices('GPU'))

    tf_version = tf.__version__
    logger.info(f"{tf_version=}")

    logger.info(
        f"Hostname: {args.host.hostname}\nNum GPUs Available: {n_gpus}\nThis should be:\n\t"
        + '\n\t'.join(['2 on R790-TOMO', '1 on akela', '1 on hathi', '1 on krilin'])
    )

    logger.debug(
        "physical GPU devices:\n\t"
        + "\n\t".join(map(str, tf.config.list_physical_devices('GPU'))) + "\n"
        + "logical GPU devices:\n\t"
        + "\n\t".join(map(str, tf.config.list_logical_devices('GPU')))
    )

    # xla auto-clustering optimization (see: https://www.tensorflow.org/xla#auto-clustering)
    # this seems to break the training
    tf.config.optimizer.set_jit(False)

    # get a distribution strategy to use both gpus (see https://www.tensorflow.org/guide/distributed_training)
    gpu_strategy = tf.distribute.MirroredStrategy()
    logger.debug(f"{gpu_strategy=}")

    logger.info(f"{dict2str(asdict(args.host))}")

    MAX_INTERNAL_NVOXELS = int(
        args.host.gpu_max_memory_factor * t2s_hosts.MAX_INTERNAL_NVOXELS
예제 #10
0
def u_net2halfd_IIencdec(
    input_shape,
    nb_filters_0,
    output_channels,
    depth,
    sigma_noise,
    convlayer,
    updown_conv_sampling,
    unet_block_kwargs,
    unet_down_kwargs,
    unet_up_kwargs,
    name=None,
):
    """
    todo make this multichannel enabled
    """
    assert convlayer in (ConvLayer.conv2d, ConvLayer.conv2d_separable), f"{convlayer=}"
    
    unet_block_kwargs = {
        **unet_block_kwargs,
        **dict(convlayer=convlayer),
        **dict(return_layers=True),
    }
    
    logger.debug(f"{dict2str(unet_block_kwargs)=}")
    
    unet_block = functools.partial(
        generic_unet_block, 
        **unet_block_kwargs,
    )

    unet_down_kwargs = {
        **unet_down_kwargs,
        **dict(
            conv_sampling=updown_conv_sampling,
            convlayer=convlayer,
        ),
        **dict(return_layers=True),
    }
    
    logger.debug(f"{dict2str(unet_down_kwargs)=}")

    unet_down = functools.partial(
        generic_unet_down, **unet_down_kwargs
    )

    unet_up_kwargs = {
        **unet_up_kwargs,
        **dict(
            conv_sampling=updown_conv_sampling,
            convlayer=convlayer,
        ),
        **dict(return_layers=True),
    }
    
    logger.debug(f"{dict2str(unet_up_kwargs)=}")

    unet_up = functools.partial(
        generic_unet_up, **unet_up_kwargs
    )
    
    nlayers = int(input_shape[-1])
    
    logger.debug(f"{nlayers=}")
    
    predicted_layer = nlayers // 2
    
    logger.debug(f"{predicted_layer=}")
    
    from tensorflow import slice as tf_slice

#     x = x0 = layers.Input(input_shape, name="input")
    x0 = layers.Input(input_shape, name="input")
    
    x0_splitted = [
        layers.Lambda(
            lambda x_: tf_slice(x_, (0, 0, 0, ch), (-1, -1, -1, 1))
        )(x0)
        for ch in range(nlayers)
    ]
    
    xs = x0_splitted
    
    skips = {}
    for i in range(depth):
        
        nb_filters_begin = nb_filters_0 * 2 ** i
        nb_filters_end = nb_filters_0 * 2 ** (i + 1)
        
        block_name = f"enc-block-{i}"
        block = unet_block(name=block_name, nb_filters_1=nb_filters_begin, nb_filters_2=nb_filters_end)
        
        block_name = f"enc-block-{i}"
        down_block = unet_down(nb_filters=nb_filters_end, name=block_name)
        
        block_reused_layers = None
        down_reused_layers = None
        
        for layer_idx in range(nlayers): 
            
            y, block_reused_layers = block(xs[layer_idx], reused_layers=block_reused_layers)
            xs[layer_idx] = y
            
            skips[(i, layer_idx)] = y
            
            y, down_reused_layers = down_block(xs[layer_idx], reused_layers=down_reused_layers)
            xs[layer_idx] = y

    nb_filters_begin = nb_filters_0 * 2 ** depth
    nb_filters_end = nb_filters_0 * 2 ** (depth + 1)
    
    block_name = f"enc-block-{depth}"
    block = unet_block(
        name=block_name, 
        nb_filters_1=nb_filters_begin, 
        nb_filters_2=nb_filters_end
    )
    
    block_reused_layers = None
    for layer_idx in range(nlayers):
        
        y, block_reused_layers = block(xs[layer_idx], reused_layers=block_reused_layers)
        xs[layer_idx] = y
            
#     layer_name = f"join-{depth}"
#     x = layers.concatenate(xs, axis=-1, name=layer_name)
    
#     nb_filters_begin = nlayers * (nb_filters_0 * 2 ** (depth + 1))
#     nb_filters_end = nlayers * (nb_filters_0 * 2 ** depth)
    
#     block_name = f"joined-enc-block-{depth}"
#     block = unet_block(
#         name=block_name, 
#         nb_filters_1=nb_filters_begin, 
#         nb_filters_2=nb_filters_end,
#     )
    
#     x, _ = block(x, reused_layers=None)

        
    for i in reversed(range(depth)):
        
        nb_filters_up = nb_filters_0 * 2 ** (i + 2)
        nb_filters_conv = nb_filters_0 * 2 ** (i + 1)
        
        block_name = f"dec-block-{i}"
        up_block = unet_up(
            nb_filters=nb_filters_up, 
            name=block_name,
        )
        block = unet_block(
            name=block_name, 
            nb_filters_1=nb_filters_conv, 
            nb_filters_2=nb_filters_conv,
        )
        
        up_reused_layers = None
        block_reused_layers = None
        
        for layer_idx in range(nlayers): 
            
            y, up_reused_layers = up_block(
                xs[layer_idx], 
                reused_layers=up_reused_layers
            )
            xs[layer_idx] = y
        
            x_skip = skips[(i, layer_idx)]
        
            xs[layer_idx] = layers.concatenate([x_skip, xs[layer_idx]], axis=-1, name=f"concat_{i}-layer_{layer_idx}")
        
            y, block_reused_layers = block(
                xs[layer_idx], 
                reused_layers=block_reused_layers,
            )
            xs[layer_idx] = y
            
    x = layers.concatenate(xs, name="join")

    if sigma_noise > 0:
        layer_name = "gaussian-noise"
        x = layers.GaussianNoise(sigma_noise, name=layer_name)(x)

    if convlayer in (ConvLayer.conv2d, ConvLayer.conv2d_separable):
        x = layers.Conv2D(output_channels, 1, activation="softmax", name="out")(x)

    elif convlayer in (ConvLayer.conv3d, ConvLayer.conv3d_separable):
        x = layers.Conv3D(output_channels, 1, activation="softmax", name="out")(x)

    else:
        raise ValueError(f"{convlayer=}")

    return Model(x0, x, name=name)
예제 #11
0
def main():

    # ===================================================== models =====================================================

    # todo update these with the correct values
    pa66gf30_proportions = [
        .809861,  # matrix
        .189801,  # fiber
        .000338,  # porosity
    ]

    pa66gf30_classwise_histograms = np.load(
        "../data/PA66GF30.v1/ground-truth-analysis/histogram-per-label.npy")

    pa66gf30_models = [
        UniformProbabilitiesClassifier(name="pa66gf30",
                                       proportions=pa66gf30_proportions),
        Order0Classifier(
            name="pa66gf30",
            n_classes=len(pa66gf30_proportions),
            p0=pa66gf30_proportions[0],
        ),
        BinwiseOrder0Classifier(
            name="pa66gf30",
            classwise_histograms=pa66gf30_classwise_histograms,
        ),
    ]

    models = []
    models.extend(pa66gf30_models)

    # ===================================================== losses =====================================================

    global_losses = [
        attr for attr, _ in inspect.getmembers(TheoreticalModel)
        if isinstance(attr, str) and attr.endswith("loss")
    ]

    classwise_losses = [
        attr for attr, _ in inspect.getmembers(TheoreticalModel)
        if isinstance(attr, str) and attr.endswith("classwise_losses")
    ]

    losses = global_losses + classwise_losses

    logger.debug(f"{global_losses=} {classwise_losses=}")
    logger.info(f"{losses=}")

    # ===================================================== coeffs =====================================================

    global_coeffs = [
        attr for attr, _ in inspect.getmembers(TheoreticalModel)
        if isinstance(attr, str) and attr.endswith("coeff")
    ]

    classwise_coeffs = [
        attr for attr, _ in inspect.getmembers(TheoreticalModel)
        if isinstance(attr, str) and attr.endswith("classwise_coeffs")
    ]

    coeffs = global_coeffs + classwise_coeffs

    logger.debug(f"{global_coeffs=} {classwise_coeffs=}")
    logger.info(f"{coeffs=}")

    # ===================================================== table ======================================================

    def get_value(model: TheoreticalModel, attr: str,
                  is_classwise: bool) -> str:

        try:
            val = getattr(model, attr)

        except AttributeError:
            return "not def"

        if val is None:
            return "not def"

        if not is_classwise:
            return f'{float(f"{val:.4g}"):.2%}'

        else:
            return ", ".join(f'{float(f"{v:.4g}"):.2%}' for v in val)

    table_losses = pd.DataFrame(
        data={
            **{
                attr: [getattr(model, attr) for model in models]
                for attr in ["fullname"]
            },
            **{
                attr.split("_loss")[0]: [
                    get_value(model, attr, False) for model in models
                ]
                for attr in global_losses
            },
            **{
                attr.split("_losses")[0]: [
                    get_value(model, attr, True) for model in models
                ]
                for attr in classwise_losses
            },
        }).set_index("fullname")

    table_coeffs = pd.DataFrame(
        data={
            **{
                attr: [getattr(model, attr) for model in models]
                for attr in ["fullname"]
            },
            **{
                attr.split("_coeff")[0]: [
                    get_value(model, attr, False) for model in models
                ]
                for attr in global_coeffs
            },
            **{
                attr.split("_coeffs")[0]: [
                    get_value(model, attr, True) for model in models
                ]
                for attr in classwise_coeffs
            },
        }).set_index("fullname")

    print("losses:")
    print(tabulate(table_losses, headers="keys", tablefmt="psql"))

    print("coefficients:")
    print(tabulate(table_coeffs, headers="keys", tablefmt="psql"))
예제 #12
0
logger.setLevel(logging.DEBUG)


# In[5]:


random_state = 42
random_state = np.random.RandomState(random_state)
runid = int(time.time())
logger.info(f"{runid=}")


# In[6]:


logger.debug(f"{tf.__version__=}")
logger.info(f"Num GPUs Available: {len(tf.config.list_physical_devices('GPU'))}\nThis should be 2 on R790-TOMO.")
logger.debug(f"Both here should return 2 devices...\n{tf.config.list_physical_devices('GPU')=}\n{tf.config.list_logical_devices('GPU')=}")

# xla auto-clustering optimization (see: https://www.tensorflow.org/xla#auto-clustering)
# this seems to break the training
tf.config.optimizer.set_jit(False)

# get a distribution strategy to use both gpus (see https://www.tensorflow.org/guide/distributed_training)
strategy = tf.distribute.MirroredStrategy()  
logger.debug(f"{strategy=}")


# # Options

# In[7]:
예제 #13
0
if args.opts.save_logs:
    fh = logging.FileHandler(estimation_volume.exec_log_path_str)
    fh.setFormatter(logger_get_formatter())
    logger.addHandler(fh)
    logger.info(f"Added a new file handler to the logger. {estimation_volume.exec_log_path_str=}")
    logger.setLevel(logging.DEBUG)

# show inputs

# In[ ]:


logger.info(f"args\n{pprint_module.PrettyPrinter(indent=4, compact=False).pformat(dataclasses.asdict(args))}")
logger.info(f"{estimation_volume=}")
            
logger.debug(f"{volume=}")
logger.debug(f"{partition=}")
logger.debug(f"{tomo2seg_model=}")

if args.model_type == process.ModelType.input2halfd:
    raise NotImplementedError(f"{args.model_type=}")

# # Setup GPUs
# this is here so that the logs will go to the file handler

n_gpus = len(tf.config.list_physical_devices('GPU'))
    
logger.debug(f"{tf.__version__=}")
logger.info(f"Num GPUs Available: {n_gpus}\nThis should be 2 on R790-TOMO.")
logger.debug(f"Should return 2 devices...\n{tf.config.list_physical_devices('GPU')=}")
logger.debug(f"Should return 2 devices...\n{tf.config.list_logical_devices('GPU')=}")
예제 #14
0
    def get_model():

        try:
            best_autosaved_model_path = tomo2seg_model.autosaved2_best_model_path  # it's a property
            assert best_autosaved_model_path is not None, "no-autosaved2"
        except ValueError as ex:

            if ex.args[0] != "min() arg is an empty sequence":
                raise ex

            logger.warning(
                f"{tomo2seg_model.name=} did not use autosaved2 apparently, falling back to autosaved."
            )
            best_autosaved_model_path = tomo2seg_model.autosaved_model_path

        except AssertionError as ex:

            if ex.args[0] != "no-autosaved2":
                raise ex

            logger.warning(
                f"{tomo2seg_model.name=} did not use autosaved2 apparently, falling back to autosaved."
            )
            best_autosaved_model_path = tomo2seg_model.autosaved_model_path

        print(best_autosaved_model_path)
        logger.info(
            f"Loading model from autosaved file: {best_autosaved_model_path.name}"
        )

        model = tf.keras.models.load_model(str(best_autosaved_model_path),
                                           compile=False)

        logger.debug(
            "Changing the model's input type to accept any size of crop.")

        in_ = model.layers[0]
        in_shape = in_.input_shape[0]
        input_n_channels = in_shape[-1]

        logger.debug(f"{input_n_channels=}")

        if input_n_channels > 1:

            if args.model_type == Args.ModelType.input2halfd:
                if len(in_shape) != 4:
                    raise f"len({in_shape=}) > 4, so this model must be multi-channel. Not supported yet..."
            else:
                raise NotImplementedError(f"{input_n_channels=} > 1")

        # make it capable of getting any dimension in the input
        # "-2" = 1 for the batch size, 1 for the nb.channels
        anysize_target_shape = (len(in_shape) - 2) * [None] + [
            input_n_channels
        ]
        logger.debug(f"{anysize_target_shape=}")

        anysize_input = layers.Input(shape=anysize_target_shape,
                                     name="input_any_image_size")
        logger.debug(f"{anysize_input=}")

        model.layers[0] = anysize_input

        # this doesn't really matter bc this script will not fit the model
        optimizer = optimizers.Adam()
        loss_func = keras_custom_loss.jaccard2_loss

        logger.debug("Starting model compilation")
        model.compile(loss=loss_func, optimizer=optimizer)
        logger.debug("Done!")

        return model
예제 #15
0
    if args.opts.save_logs:
        logger_add_file_handler(logger, estimation_volume.exec_log_path)

    # this is informal metadata for human use
    estimation_volume["process_volume_args"] = dataclasses.asdict(args)

    # show args

    logger.info("showing args")

    logger.info(f"{estimation_volume=}")
    logger.info(f"{estimation_volume.fullname=}")
    logger.info(f"{estimation_volume.dir=}")

    logger.debug(f"{volume=}")
    logger.debug(f"{partition=}")
    logger.debug(f"{tomo2seg_model=}")
    logger.debug(f"{tomo2seg_model.name=}")

    # # Setup

    logger.info("set up")

    logger.setLevel(logging.DEBUG)
    random_state = np.random.RandomState(args.random_state_seed)

    n_gpus = len(tf.config.list_physical_devices('GPU'))
    estimation_volume["n_gpus"] = n_gpus

    tf_version = tf.__version__
예제 #16
0
def get_hist_per_label(
        data_seq: ndarray,
        labels_seq: ndarray,
        labels_idx: List[int],
        nbins: int = 256,
        min_bin_edge: int = 0,
        max_bin_edge: Optional[int] = None,  # auto from data dtype
):
    """
    data_seq: gray level data in a sequential vector
    labels_seq: segmentation classes in a sequential vector
    """
    logger.debug("computing histogram per label")

    assert (tensor_order := len(data_seq.shape)) == 1, f"{tensor_order=}"
    assert (tensor_order := len(labels_seq.shape)) == 1, f"{tensor_order=}"

    logger.debug(f"{data_seq.shape=}")
    logger.debug(f"{labels_seq.shape=}")

    assert len(labels_idx) > 0, f"{len(labels_idx)=}"
    assert all(isinstance(v, int) for v in labels_idx), f"{labels_idx=}"

    logger.debug(f"{labels_idx=}")

    nclasses = len(labels_idx)

    logger.debug(f"{nclasses=}")

    assert nbins > 1, f"{nbins=}"

    logger.debug(f"{nbins=}")

    assert min_bin_edge >= 0, f"{min_bin_edge=}"

    dtype = str(data_seq.dtype)
    auto_max_bin_edge = MAX_BIN_EDGE[dtype]

    if max_bin_edge is None:
        max_bin_edge = auto_max_bin_edge

    else:
        assert max_bin_edge <= auto_max_bin_edge, f"{max_bin_edge=}, give {dtype=} ==> max is {auto_max_bin_edge=}"

    logger.debug(f"{min_bin_edge=}")
    logger.debug(f"{max_bin_edge=}")

    # --------------------------- real stuff ---------------------------

    data_hists_per_label = np.zeros(
        (nclasses, nbins),
        dtype=np.int64,  # int64 is important to not overflow
    )

    hist_bin_edges = np.linspace(min_bin_edge, max_bin_edge,
                                 nbins + 1).astype(int)

    for label_idx in labels_idx:

        logger.debug(f"computing {label_idx=}")

        data_hists_per_label[label_idx], _ = np.histogram(
            data_seq[labels_seq == label_idx],
            bins=hist_bin_edges,
            density=False,
        )

    return data_hists_per_label, hist_bin_edges