def test_split_by_group(): df = pd.DataFrame({ 'group': ['a', 'a', 'b', 'b'], 'val': [1.0, 1.0, 2.0, 2.0] }) data = utils.split_by_group(df, 'group') assert np.allclose(data['a']['val'].values, 1.0) assert np.allclose(data['b']['val'].values, 2.0)
def calc_peaks(date_df, model_df, potential_peaked_groups, poly_fit): df = model_df.merge(date_df) data = utils.split_by_group(df, 'location') peak_date = compute_peak_date(potential_peaked_groups, data, poly_fit, time_resolution=0.1) peak_day = compute_peak_day(potential_peaked_groups, data, poly_fit, time_resolution=0.1) return data, peak_date, peak_day
def get_peak_detector(self): self.df_by_group = split_by_group(self.df, self.col_group) log_derf_obs = [] times = [] peaked = [] self.groups = [] for grp, df in self.df_by_group.items(): if grp in self.peaked_groups or grp in self.not_peaked_groups: log_derf_obs.append(df[self.col_log_derf_obs].to_numpy()) times.append(df[self.col_t].to_numpy()) if grp in self.peaked_groups: peaked.append(1) else: peaked.append(0) self.groups.append(grp) self.peak_detector = PieceWiseLinearPeakDetector( log_derf_obs, self.groups, times, peaked) self.peak_detector.train_peak_classifier()
def __init__(self, df, col_obs, col_group, col_est=None, models_dict=None): if col_est is None and models_dict is None: raise RuntimeError('must have either a column of estimates or CurveModels to generate estimates') self.df = df self.col_obs = col_obs self.col_group = col_group self.col_est = col_est self.models_dict = models_dict self.df_by_group = split_by_group(self.df, self.col_group) self.obs_by_group = {} self.est_by_group = {} self.groups = [] for grp, df in self.df_by_group.items(): self.obs_by_group[grp] = df[self.col_obs].to_numpy() if self.col_est is not None: self.est_by_group[grp] = df[self.col_est].to_numpy() else: model = self.models_dict[grp] self.est_by_group[grp] = model.fun(model.t, model.params) self.groups.append(grp)