def _check_values(df, T, E): pass_for_numeric_dtypes_or_raise(df) check_nans_or_infs(T) check_nans_or_infs(E) check_nans_or_infs(df) check_low_var(df) check_complete_separation(df, E, T)
def _check_values(self, df, T, E, weights, entries): check_for_numeric_dtypes_or_raise(df) check_nans_or_infs(df) check_nans_or_infs(T) check_nans_or_infs(E) check_positivity(T) check_complete_separation(df, E, T, self.event_col) if self.weights_col: if (weights.astype(int) != weights).any() and not self.robust: warnings.warn( dedent( """It appears your weights are not integers, possibly propensity or sampling scores then? It's important to know that the naive variance estimates of the coefficients are biased. Instead a) set `robust=True` in the call to `fit`, or b) use Monte Carlo to estimate the variances. See paper "Variance estimation when using inverse probability of treatment weighting (IPTW) with survival analysis""" ), StatisticalWarning, ) if (weights <= 0).any(): raise ValueError( "values in weight column %s must be positive." % self.weights_col) if self.entry_col: count_invalid_rows = (entries > T).sum() if count_invalid_rows: warnings.warn( """There exist %d rows where entry > duration.""")
def _check_values(self, df, T, E, event_col): check_for_numeric_dtypes_or_raise(df) check_nans_or_infs(T) check_nans_or_infs(E) check_nans_or_infs(df) check_complete_separation(df, E, T, event_col) if self.fit_intercept: check_low_var(df)
def _check_values(df, E): # check_for_overlapping_intervals(df) # this is currenty too slow for production. check_low_var(df) check_complete_separation(df, E) pass_for_numeric_dtypes_or_raise(df)
def _check_values(df, E): check_low_var(df) check_complete_separation(df, E) pass_for_numeric_dtypes_or_raise(df)