def get_hosts(): logger.debug(f"Getting host configs from {HOSTS_CONFIGS_YAML}") try: with open(HOSTS_CONFIGS_YAML, "r") as f: hosts_configs_ = yaml.load(f, Loader=yaml.FullLoader) except FileNotFoundError as ex: logger.exception(ex) logger.warning(f"Please create a yaml file at {HOSTS_CONFIGS_YAML}.") raise HostsConfigsError(f"FileNotFound {HOSTS_CONFIGS_YAML=}") assert hosts_configs_ is not None assert "hosts" in hosts_configs_, f"{hosts_configs_.keys()}" hosts_ = hosts_configs_["hosts"] assert isinstance(hosts_, dict), f"{type(hosts_)=}" for hostname, host in hosts_.items(): assert isinstance(hostname, str), f"{type(hostname)=}" assert isinstance(host, dict), f"{hostname=} {type(host)=}" assert hostname == host["hostname"] hosts_[hostname] = Host(**host) return hosts_
def notify(msg: str, url: Optional[str] = None): logger.debug("Sending slack notification.") if url is not None: logger.debug("A non-default url was given.") else: try: url = default_url() except SlackJsonError: logger.exception( "A notification could not be sent because the webhook url could not be found. " "Please correct your slack.json file.") try: resp = requests.post(url=url, json={"text": msg}, timeout=10) logger.debug(f"{resp.text=}") except Exception as ex: logger.exception("Something went wrong in the slack module.")
def default_url(): logger.debug(f"Getting default url from file {SLACK_JSON}") try: with open(SLACK_JSON, "r") as f: slack_json = json.load(f) except FileNotFoundError: logger.exception( f"Please create a json at {SLACK_JSON} with the key `webhook_url`." ) raise SlackJsonError("FileNotFound") try: return slack_json["webhook_url"] except KeyError: logger.exception( f"Please create the key `webhook_url` in the slack.json file.") raise SlackJsonError("MissingWebhookUrl")
def validate_partitions_to_compute(partitions_to_compute, volume): if partitions_to_compute is None: logger.info("Using all available parittions.") return tuple(volume.metadata.set_partitions.keys()) assert len(partitions_to_compute) > 0 for part_alias in partitions_to_compute: try: volume[part_alias] except KeyError as ex: logger.exception(ex) raise ValueError( f"Invalid volume partition. {volume.fullname=} {partitions_to_compute=}" ) return tuple(partitions_to_compute)
) raise TrainingFinished() while True: try: fit() except TrainingFinished: slack.notify_finished() break except Exception as ex: logger.exception(ex) if args.batch_size_mode == Args.BatchSizeMode.try_max_and_fail: raise ex batch_size -= n_gpus logger.warning(f"reduced {batch_size=}") if batch_size < n_gpus: raise FailedToFindBatchSize crop_seq_train.batch_size = batch_size crop_seq_val.batch_size = batch_size # # History
def notify_exception(exception: Exception, hostname: str = None): hostname = "unknown host" if hostname is None else hostname logger.exception(f"{exception.__class__.__name__} occurred. ") notify(f"{exception.__class__.__name__}: {str(exception)}. {hostname=}")
def multiclass_roc_auc_score( y_true, y_score, labels, multi_class, average, sample_weight=None, invalid_proba_tolerance: float = 1e-6, ): """Multiclass roc auc score (copied from sklearn) Parameters ---------- y_true : array-like of shape (n_samples,) True multiclass labels. y_score : array-like of shape (n_samples, n_classes) Target scores corresponding to probability estimates of a sample belonging to a particular class labels : array, shape = [n_classes] or None, optional (default=None) List of labels to index ``y_score`` used for multiclass. If ``None``, the lexical order of ``y_true`` is used to index ``y_score``. multi_class : string, 'ovr' or 'ovo' Determines the type of multiclass configuration to use. ``'ovr'``: Calculate metrics for the multiclass case using the one-vs-rest approach. ``'ovo'``: Calculate metrics for the multiclass case using the one-vs-one approach. average : 'macro' or 'weighted', optional (default='macro') Determines the type of averaging performed on the pairwise binary metric scores ``'macro'``: Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. Classes are assumed to be uniformly distributed. ``'weighted'``: Calculate metrics for each label, taking into account the prevalence of the classes. sample_weight : array-like of shape (n_samples,), default=None Sample weights. :param invalid_proba_tolerance: float in [0, 1] The proportion of samples that can eventually be ignored if their class scores do not sum up to 1. """ # validation of the input y_score are_close = np.isclose(1, y_score.sum(axis=1)) # I added this try-except to deal with cases where a very small amount of voxels have an issue # to sum the probabilities to 1, which might happen (probably, i suppose) because I use float16 instead of 64 try: if not np.all(are_close): raise ValueError( "Target scores need to be probabilities for multiclass " "roc_auc, i.e. they should sum up to 1.0 over classes") except ValueError as ex: logger.exception(ex) assert 0 <= invalid_proba_tolerance <= 1, f"{invalid_proba_tolerance=}" nsamples_not_close = int((~are_close).sum()) percentage_samples_not_close = nsamples_not_close / are_close.size logger.warning( f"{nsamples_not_close=} ({percentage_samples_not_close=:.7%})") if percentage_samples_not_close > invalid_proba_tolerance: raise ValueError( f"Too many samples are not close 1 {nsamples_not_close=} {percentage_samples_not_close=:.7%} {invalid_proba_tolerance=:.7%}." ) else: logger.warning( f"The amount of probabilities not summing up to 1 will be tolerated " f"{percentage_samples_not_close=:.7%} {invalid_proba_tolerance=:.7%}. " f"The bad samples will be ignored!") y_true = y_true[are_close] y_score = y_score[are_close, :] # validation for multiclass parameter specifications average_options = ("macro", "weighted") if average not in average_options: raise ValueError("average must be one of {0} for " "multiclass problems".format(average_options)) multiclass_options = ("ovo", "ovr") if multi_class not in multiclass_options: raise ValueError("multi_class='{0}' is not supported " "for multiclass ROC AUC, multi_class must be " "in {1}".format(multi_class, multiclass_options)) from sklearn.utils import column_or_1d from sklearn.preprocessing._label import _encode from sklearn.metrics._base import _average_multiclass_ovo_score from sklearn.preprocessing import label_binarize from sklearn.metrics._ranking import _binary_roc_auc_score from sklearn.metrics._base import _average_binary_score if labels is not None: labels = column_or_1d(labels) classes = _encode(labels) if len(classes) != len(labels): raise ValueError("Parameter 'labels' must be unique") if not np.array_equal(classes, labels): raise ValueError("Parameter 'labels' must be ordered") if len(classes) != y_score.shape[1]: raise ValueError( "Number of given labels, {0}, not equal to the number " "of columns in 'y_score', {1}".format(len(classes), y_score.shape[1])) if len(np.setdiff1d(y_true, classes)): raise ValueError( "'y_true' contains labels not in parameter 'labels'") else: classes = _encode(y_true) if len(classes) != y_score.shape[1]: raise ValueError( "Number of classes in y_true not equal to the number of " "columns in 'y_score'") if multi_class == "ovo": if sample_weight is not None: raise ValueError("sample_weight is not supported " "for multiclass one-vs-one ROC AUC, " "'sample_weight' must be None in this case.") _, y_true_encoded = _encode(y_true, uniques=classes, encode=True) # Hand & Till (2001) implementation (ovo) return _average_multiclass_ovo_score(_binary_roc_auc_score, y_true_encoded, y_score, average=average) else: # ovr is same as multi-label y_true_multilabel = label_binarize(y_true, classes=classes) return _average_binary_score(_binary_roc_auc_score, y_true_multilabel, y_score, average, sample_weight=sample_weight)