def eval_policy(save_dir: [str, None], env: [RealEnv, SimEnv, MetaDomainRandWrapper], policy: Policy, mc_estimator: bool, prefix: str, num_rollouts: int, num_parallel_envs: int = 1) -> to.Tensor: """ Evaluate a policy on the target system (real-world platform). This method is static to facilitate evaluation of specific policies in hindsight. :param save_dir: directory to save the snapshots i.e. the results in, if `None` nothing is saved :param env: target environment for evaluation, in the sim-2-sim case this is another simulation instance :param policy: policy to evaluate :param mc_estimator: estimate the return with a sample average (`True`) or a lower confidence bound (`False`) obtained from bootrapping :param prefix: to control the saving for the evaluation of an initial policy, `None` to deactivate :param num_rollouts: number of rollouts to collect on the target system :param prefix: to control the saving for the evaluation of an initial policy, `None` to deactivate :param num_parallel_envs: number of environments for the parallel sampler (only used for SimEnv) :return: estimated return in the target domain """ if save_dir is not None: print_cbt(f'Executing {prefix}_policy ...', 'c', bright=True) rets_real = to.zeros(num_rollouts) if isinstance(inner_env(env), RealEnv): # Evaluate sequentially when conducting a sim-to-real experiment for i in range(num_rollouts): rets_real[i] = rollout(env, policy, eval=True).undiscounted_return() # If a reward of -1 is given, skip evaluation ahead and set all returns to zero if rets_real[i] == -1: print_cbt('Set all returns for this policy to zero.', color='c') rets_real = to.zeros(num_rollouts) break elif isinstance(inner_env(env), SimEnv): # Create a parallel sampler when conducting a sim-to-sim experiment sampler = ParallelRolloutSampler(env, policy, num_workers=num_parallel_envs, min_rollouts=num_rollouts) ros = sampler.sample() for i in range(num_rollouts): rets_real[i] = ros[i].undiscounted_return() else: raise pyrado.TypeErr(given=inner_env(env), expected_type=[RealEnv, SimEnv]) if save_dir is not None: # Save the evaluation results to.save(rets_real, osp.join(save_dir, f'{prefix}_returns_real.pt')) print_cbt('Target domain performance', bright=True) print(tabulate([['mean return', to.mean(rets_real).item()], ['std return', to.std(rets_real)], ['min return', to.min(rets_real)], ['max return', to.max(rets_real)]])) if mc_estimator: return to.mean(rets_real) else: return to.from_numpy(bootstrap_ci(rets_real.numpy(), np.mean, num_reps=1000, alpha=0.05, ci_sides=1, studentized=False)[1])
def test_bootsrapping(): # Why you should operate on the deltas and not directly on the statistic from the resampled data sample = np.array([30, 37, 36, 43, 42, 43, 43, 46, 41, 42]) mean = np.mean(sample) print(mean) m, ci = bootstrap_ci(sample, np.mean, num_reps=20, alpha=0.1, ci_sides=2, seed=123) print(m, ci) np.random.seed(123) resampled = np.random.choice(sample, (sample.shape[0], 20), replace=True) means = np.apply_along_axis(np.mean, 0, resampled) print(np.sort(means)) ci_lo, ci_up = np.percentile(means, [100*0.05, 100*0.95]) print(ci_lo, ci_up) x = np.random.normal(10, 1, 40) # x = np.random.uniform(5, 15, 20) # x = np.random.poisson(5, 30) np.random.seed(1) # print(bs.bootstrap(x, stat_func=bs_stats.mean)) np.random.seed(1) m, ci = bootstrap_ci(x, np.mean, num_reps=1000, alpha=0.05, ci_sides=2, studentized=False, bias_correction=False) print('[use_t_for_ci=False] mean: ', m) print('[use_t_for_ci=False] CI: ', ci) np.random.seed(1) m, ci = bootstrap_ci(x, np.mean, num_reps=1000, alpha=0.05, ci_sides=2, studentized=False, bias_correction=True) print('[bias_correction=True] mean: ', m) m, ci = bootstrap_ci(x, np.mean, num_reps=2*384, alpha=0.05, ci_sides=1, studentized=False) print('[use_t_for_ci=False] mean: ', m) print('[use_t_for_ci=False] CI: ', ci) m, ci = bootstrap_ci(x, np.mean, num_reps=2*384, alpha=0.05, ci_sides=1, studentized=True) print('[use_t_for_ci=True] mean: ', m) print('[use_t_for_ci=True] CI: ', ci) print('Matlab example:') # https://de.mathworks.com/help/stats/bootci.htmls x_matlab = np.random.normal(1, 1, 40) m, ci = bootstrap_ci(x_matlab, np.mean, num_reps=2000, alpha=0.05, ci_sides=2, studentized=False) print('[use_t_for_ci=False] mean: ', m) print('[use_t_for_ci=False] CI: ', ci) m, ci = bootstrap_ci(x_matlab, np.mean, num_reps=2000, alpha=0.05, ci_sides=2, studentized=True) print('[use_t_for_ci=True] mean: ', m) print('[use_t_for_ci=True] CI: ', ci)
def test_bootsrapping(data, num_reps, seed): # Fully-fledged example bootstrap_ci(data, np.mean, num_reps, alpha=0.05, ci_sides=2, studentized=True, bias_correction=True, seed=seed) m, ci_lo, ci_up = bootstrap_ci(data, np.mean, num_reps, alpha=0.05, ci_sides=2, studentized=False, bias_correction=False, seed=seed) assert np.all(m >= ci_lo) assert np.all(m <= ci_up) m_bc, ci_lo, ci_up = bootstrap_ci(data, np.mean, num_reps, alpha=0.05, ci_sides=2, studentized=False, bias_correction=True, seed=seed) assert np.all(m_bc != m) m, ci_lo, ci_up = bootstrap_ci(data, np.mean, num_reps, alpha=0.05, ci_sides=1, studentized=False, seed=seed) m_t, ci_lo_t, ci_up_t = bootstrap_ci(data, np.mean, num_reps, alpha=0.05, ci_sides=1, studentized=True, seed=seed) assert m == pytest.approx(m_t) assert np.all(m_t >= ci_lo_t) assert np.all(m_t <= ci_up_t) # Bounds are different (not generally wider) when assuming a t-distribution assert np.all(ci_lo != ci_lo_t) assert np.all(ci_up != ci_up_t)
def test_boostrap_methods(sample, seed): # Emperical bootstrap m_bs, ci_bs_lo, ci_bs_up = bootstrap_ci(sample, np.mean, num_reps=20, alpha=0.1, ci_sides=2, seed=seed) # Percentile bootstrap # Add one to the seed because with the MD5 seed calculation and so on, the lower quantiles are actually equal by # chance. This seems to be the one-in-a-million case for this. pyrado.set_seed(seed + 1) resampled = np.random.choice(sample, (sample.shape[0], 20), replace=True) means = np.apply_along_axis(np.mean, 0, resampled) ci_lo, ci_up = np.percentile(means, [5, 95]) # You should operate on the deltas (emperical bootsrap) and not directly on the statistic from the resampled data # (percentile bootsrap) assert ci_lo != ci_bs_lo assert ci_up != ci_bs_up
def load_snapshot(self, load_dir: str = None, meta_info: dict = None): # Get the directory to load from ld = load_dir if load_dir is not None else self._save_dir if not osp.isdir(ld): raise pyrado.ValueErr(msg='Given path is not a directory!') if meta_info is None: # This algorithm instance is not a subroutine of a meta-algorithm self._env_sim = joblib.load(osp.join(ld, 'env_sim.pkl')) self._env_real = joblib.load(osp.join(ld, 'env_real.pkl')) # Crawl through the given directory and check how many policies and candidates there are found_policies, found_cands = None, None for root, dirs, files in os.walk(ld): found_policies = [ p for p in files if p.endswith('_policy.pt') ] # 'policy.pt' file should not be found found_cands = [c for c in files if c.endswith('_candidate.pt')] # Copy to the current experiment's directory. Not necessary if we are continuing in that directory. if ld != self._save_dir: for p in found_policies: copyfile(osp.join(ld, p), osp.join(self._save_dir, p)) for c in found_cands: copyfile(osp.join(ld, c), osp.join(self._save_dir, c)) if len(found_policies) > 0: # Load all found candidates to save them into a single tensor found_cands.sort( ) # the order is important since it determines the rows of the tensor self.cands = to.stack( [to.load(osp.join(ld, c)) for c in found_cands]) to.save(self.cands, osp.join(self._save_dir, 'candidates.pt')) # Catch the case that the algorithm stopped before evaluating a sampled candidate if not len(found_policies) == len(found_cands): print_cbt( f'Found {len(found_policies)} policies, but {len(found_cands)} candidates!', 'r') n = len(found_cands) - len(found_policies) delete = input( 'Delete the superfluous candidates? [y / any other]' ).lower() == 'y' if n > 0 and delete: # Delete the superfluous candidates print_cbt(f'Candidates before:\n{self.cands.numpy()}', 'w') self.cands = self.cands[:-n, :] found_cands = found_cands[:-n] to.save(self.cands, osp.join(self._save_dir, 'candidates.pt')) print_cbt(f'Candidates after:\n{self.cands.numpy()}', 'c') else: raise pyrado.ShapeErr( msg=f'Found {len(found_policies)} policies,' f'but {len(found_cands)} candidates!') else: # Assuming not even the training of the initial policies has not been finished. Redo it all. print_cbt( 'No policies have been found. Basically starting from scratch.', 'c') self.train_init_policies() self.eval_init_policies() self.initialized = True try: # Crawl through the load_dir and copy all done evaluations. # Not necessary if we are continuing in that directory. if ld != self._save_dir: for root, dirs, files in os.walk(load_dir): [ copyfile(osp.join(load_dir, c), osp.join(self._save_dir, c)) for c in files if c.endswith('_returns_real.pt') ] # Get all previously done evaluations. If we don't find any, the exception is caught. found_evals = None for root, dirs, files in os.walk(ld): found_evals = [ v for v in files if v.endswith('_returns_real.pt') ] found_evals.sort( ) # the order is important since it determines the rows of the tensor # Reconstruct candidates_values.pt self.cands_values = to.empty(self.cands.shape[0]) for i, fe in enumerate(found_evals): # Get the return estimate from the raw evaluations as in eval_policy() if self.montecarlo_estimator: self.cands_values[i] = to.mean( to.load(osp.join(ld, fe))) else: self.cands_values[i] = to.from_numpy( bootstrap_ci(to.load(osp.join(ld, fe)).numpy(), np.mean, num_reps=1000, alpha=0.05, ci_sides=1, studentized=False)[1]) if len(found_evals) < len(found_cands): print_cbt( f'Found {len(found_evals)} real-world evaluation files but {len(found_cands)} candidates.' f' Now evaluation the remaining ones.', 'c', bright=True) for i in range(len(found_cands) - len(found_evals)): # Evaluate the current policy on the target domain if len(found_evals) < self.num_init_cand: prefix = f'init_{i + len(found_evals)}' else: prefix = f'iter_{i + len(found_evals) - self.num_init_cand}' policy = to.load( osp.join(self._save_dir, f'{prefix}_policy.pt')) self.cands_values[i + len(found_evals)] = self.eval_policy( self._save_dir, self._env_real, policy, self.montecarlo_estimator, prefix, self.num_eval_rollouts_real) to.save(self.cands_values, osp.join(self._save_dir, 'candidates_values.pt')) if len(found_cands) < self.num_init_cand: print_cbt( 'Found less candidates than the number of initial candidates.', 'y') else: self.initialized = True except (FileNotFoundError, RuntimeError): # If there are returns_real.pt files but len(found_policies) > 0 (was checked earlier), # then the initial policies have not been evaluated yet self.eval_init_policies() self.initialized = True # Get current iteration count found_iter_policies = None for root, dirs, files in os.walk(ld): found_iter_policies = [ p for p in files if p.startswith('iter_') and p.endswith('_policy.pt') ] if not found_iter_policies: self._curr_iter = 0 # We don't need to init the subroutine since it will be reset for iteration 0 anyway else: self._curr_iter = len( found_iter_policies) # continue with next # Initialize subroutine with previous iteration self._subroutine.load_snapshot( ld, meta_info=dict(prefix=f'iter_{self._curr_iter - 1}')) # Evaluate and save the latest candidate on the target system. # This is the case if we found iter_i_candidate.pt but not iter_i_returns_real.pt if self.cands.shape[0] == self.cands_values.shape[0] + 1: curr_cand_value = self.eval_policy( self._save_dir, self._env_real, self._subroutine.policy, self.montecarlo_estimator, prefix=f'iter_{self._curr_iter - 1}', num_rollouts=self.num_eval_rollouts_real) self.cands_values = to.cat( [self.cands_values, curr_cand_value.view(1)], dim=0) to.save(self.cands_values, osp.join(self._save_dir, 'candidates_values.pt')) if isinstance(self._env_real, RealEnv): input( 'Evaluated in the target domain. Hit any key to continue.' ) else: raise pyrado.ValueErr( msg=f'{self.name} is not supposed be run as a subroutine!')
def _estimate_ucbog(self, nr: int): """ Collect the returns with synchronized random seeds and estimate the pessimistic and optimistic bound. :param nr: number of domains used for training the reference solutions :return: upper confidence bound on the optimality gap (UCBOG) """ # Init containers cand_rets = np.zeros((self.nG, nr)) refs_rets = np.zeros((self.nG, nr)) # Loop over all reference solutions for k in range(self.nG): print(f'Estimating the UCBOG | Reference {k + 1} of {self.nG}') # Load the domain parameters corresponding to the k-th reference solution env_params_ref = joblib.load( osp.join(self._save_dir, f'iter_{self._curr_iter}_env_params_ref_{k}.pkl')) self._env_dr.buffer = env_params_ref # Load the policies (makes a difference for snapshot_mode = best). They are set to eval mode by rollout() self._subrtn_cand.policy.load_state_dict( to.load( osp.join(self._save_dir, f'iter_{self._curr_iter}_policy_cand.pt')). state_dict()) self._subrtn_refs.policy.load_state_dict( to.load( osp.join(self._save_dir, f'iter_{self._curr_iter}_policy_ref_{k}.pt')). state_dict()) # Loop over all domain realizations of the reference solutions for i in tqdm(range(nr), total=nr, desc=f'Reference {k + 1}', unit='domains', file=sys.stdout, leave=False): # Evaluate solutions cand_rets[k, i], refs_rets[ k, i] = self._eval_cand_and_ref_one_domain(i) # Process negative optimality samples refs_rets = self._handle_neg_samples(cand_rets, refs_rets, k, i) # -------------- # Optimality Gap # -------------- # This is similar to the difference of the means that is used to calculate the optimality gap in eq. (9) in [2] self.Gn_diffs = np.subtract( refs_rets, cand_rets) # optimistic bound - pessimistic bound; dim = nG x nr Gn_samples = np.mean(self.Gn_diffs, axis=1) # dim = 1 x nr Gn_est = np.mean( Gn_samples ) # sample mean of the original (non-bootstrapped) samples ratio_neg_diffs = 1 - np.count_nonzero( self.Gn_diffs ) / self.Gn_diffs.size # assuming zero come from clipping print_cbt(f'diffs (optimistic - pessimistic bound):\n{self.Gn_diffs}', 'y') print_cbt( f'\n{100 * ratio_neg_diffs}% of the diffs would have been negative and were set to 0\n', 'r', bright=True) if ratio_neg_diffs == 1: # All diffs are negative ci_bs = [ 0, float('inf') ] # such that the UCBOG comparison in stopping_criterion_met() does not break log_dict = { 'Gn_est': np.NaN, 'UCBOG': np.NaN, 'ratio_neg_diffs': np.NaN } else: # Apply bootstrapping m_bs, ci_bs = bootstrap_ci(np.ravel(self.Gn_diffs), np.mean, self.num_bs_reps, self.alpha, 1, self.studentized_ci) print(f'm_bs: {m_bs}, ci_bs: {ci_bs}') print_cbt(f'\nOG (point estimate): {Gn_est} \nUCBOG: {ci_bs[1]}\n', 'y', bright=True) log_dict = { 'Gn_est': Gn_est, 'UCBOG': ci_bs[1], 'ratio_neg_diffs': ratio_neg_diffs } # Log the optimality gap data mode = 'w' if self.curr_iter == 0 else 'a' with open(osp.join(self._save_dir, 'OG_log.csv'), mode, newline='') as csvfile: fieldnames = list(log_dict.keys()) writer = csv.DictWriter(csvfile, fieldnames=fieldnames) if self.curr_iter == 0: writer.writeheader() writer.writerow(log_dict) # Store the current UCBOG estimated from all samples self.ucbog = ci_bs[1]
column_labels[i], metric_arr.shape[0], mean_metric[i], min_metric[i], std_metric[i] ]) print("\nAll metrics:") print(tabulate(metric_arr, column_labels)) print("\nStandard Deviation:\n", tabulate(table, headers)) # compute confidence intervals conf_headers = ["metric", "experiments", "mean", "ci low", "ci high"] for i in range(len(column_labels)): total_mean, total_ci_lo, total_ci_hi = bootstrap_ci( np.array([experiment_means[exp][i] for exp in best_experiments]), np.mean, num_reps=1000, alpha=0.05, ci_sides=2, ) conf_table.append([ column_labels[i], len(best_experiments), total_mean, total_ci_lo, total_ci_hi ]) print("\nConfidence Interval:\n", tabulate(conf_table, conf_headers)) info = "Best Experiments:\n" for exp in best_experiments: info += f"\t\t{exp}:\n" # Save the table in a latex file if requested
def draw_curve_from_data( plot_type: str, ax: plt.Axes, data: Union[list, np.ndarray, to.Tensor, pd.DataFrame], x_grid: Union[list, np.ndarray, to.Tensor], ax_calc: int, x_label: Optional[Union[str, Sequence[str]]] = None, y_label: Optional[str] = None, curve_label: Optional[str] = None, area_label: Optional[str] = "", vline_level: Optional[float] = None, vline_label: str = "approx. solved", title: Optional[str] = None, show_legend: bool = True, cmp_kwargs: dict = None, plot_kwargs: dict = None, legend_kwargs: dict = None, ) -> plt.Figure: """ Create a box or violin plot for a list of data arrays or a pandas DataFrame. The plot is neither shown nor saved. .. note:: If you want to have a tight layout, it is best to pass axes of a figure with `tight_layout=True` or `constrained_layout=True`. If you want to order the 4th element to the 2nd position in terms of colors use .. code-block:: python palette.insert(1, palette.pop(3)) :param plot_type: tye of 1-dim plot: `mean_std`, `min_mean_max`, or `ci_on_mean` :param ax: axis of the figure to plot on :param data: data to plot,me.g. a time series :param x_grid: values to plot the data over, e.g. time :param ax_calc: axis of the data array to calculate the mean, min and max, or std over :param x_label: labels for the categories on the x-axis, if `data` is not given as a `DataFrame` :param y_label: label for the y-axis, pass `None` to set no label :param curve_label: label of the (1-dim) curve, pass `None` for no label :param area_label: label of the (transparent) area, pass `None` for no label and "" for the default label :param vline_level: if not `None` (default) add a vertical line at the given level :param vline_label: label for the vertical line :param show_legend: if `True` the legend is shown, useful when handling multiple subplots :param title: title displayed above the figure, set to None to suppress the title :param cmp_kwargs: keyword arguments forwarded to functions computing the statistics of interest :param plot_kwargs: keyword arguments forwarded to the plotting` functions :param legend_kwargs: keyword arguments forwarded to pyplot's `legend()` function, e.g. `loc='best'` :return: handle to the resulting figure """ plot_type = plot_type.lower() if plot_type not in ["mean_std", "min_mean_max", "ci_on_mean"]: raise pyrado.ValueErr( given=plot_type, eq_constraint="mean_std, min_mean_max, ci_on_mean") if not isinstance(data, (list, to.Tensor, np.ndarray, pd.DataFrame)): raise pyrado.TypeErr( given=data, expected_type=[list, to.Tensor, np.ndarray, pd.DataFrame]) # Set defaults which can be overwritten by passing plot_kwargs cmp_kwargs = merge_dicts([ dict(num_reps=1000, confidence_level=0.9, bias_correction=False, studentized=False), cmp_kwargs ]) if isinstance(data, pd.DataFrame): data = data.to_numpy() elif isinstance(data, list): data = np.array(data) elif isinstance(data, to.Tensor): data = data.detach().cpu().numpy() # Extract features from data data_mean = np.mean(data, axis=ax_calc) df = pd.DataFrame() df = df.assign(mean=data_mean) if plot_type == "mean_std": data_std = np.std(data, axis=ax_calc) df = df.assign(std=data_std) elif plot_type == "min_mean_max": data_min = np.min(data, axis=ax_calc) data_max = np.max(data, axis=ax_calc) df = df.assign(min=data_min) df = df.assign(max=data_max) elif plot_type == "ci_on_mean": _, data_lo, data_up = bootstrap_ci( data.T if ax_calc == 1 else data, stat_fcn=np.mean, num_reps=cmp_kwargs["num_reps"], alpha=cmp_kwargs["confidence_level"], ci_sides=2, bias_correction=cmp_kwargs["bias_correction"], studentized=cmp_kwargs["studentized"], seed=0, ) df = df.assign(ci_lo=data_lo) df = df.assign(ci_up=data_up) # Forward the actual plotting return draw_curve( plot_type, ax, df, x_grid, x_label, y_label, curve_label, area_label, vline_level, vline_label, title, show_legend, plot_kwargs, legend_kwargs, )
def _estimate_ucbog(self, nr: int): """ Collect the returns with synchronized random seeds and estimate the pessimistic and optimistic bound. :param nr: number of domains used for training the reference solutions :return: upper confidence bound on the optimality gap (UCBOG) """ # Init containers cand_rets = np.zeros((self.nG, nr)) refs_rets = np.zeros((self.nG, nr)) # Loop over all reference solutions for k in range(self.nG): print_cbt( f"Estimating the UCBOG | Reference {k + 1} of {self.nG} ...", "c") # Load the domain parameters corresponding to the k-th reference solution env_params_ref = joblib.load( osp.join(self.save_dir, f"iter_{self._curr_iter}_env_params_ref_{k}.pkl")) self.env_dr.buffer = env_params_ref # Load the policies (makes a difference for snapshot_mode = best) self._subrtn_cand._policy = pyrado.load( "policy.pt", self.save_dir, prefix=f"iter_{self._curr_iter}", suffix="cand", obj=self._subrtn_cand._policy, ) self._subrtn_refs._policy = pyrado.load( "policy.pt", self.save_dir, prefix=f"iter_{self._curr_iter}", suffix=f"ref_{k}", obj=self._subrtn_refs._policy, ) # Loop over all domain realizations of the reference solutions for i in tqdm(range(nr), total=nr, desc=f"Reference {k + 1}", unit="domains", file=sys.stdout, leave=False): # Evaluate solutions cand_rets[k, i], refs_rets[ k, i] = self._eval_cand_and_ref_one_domain(i) # Process negative optimality samples refs_rets = self._handle_neg_samples(cand_rets, refs_rets, k, i) # -------------- # Optimality Gap # -------------- # This is similar to the difference of the means that is used to calculate the optimality gap in eq. (9) in [2] self.Gn_diffs = np.subtract( refs_rets, cand_rets) # optimistic bound - pessimistic bound; dim = nG x nr Gn_samples = np.mean(self.Gn_diffs, axis=1) # dim = 1 x nr Gn_est = np.mean( Gn_samples ) # sample mean of the original (non-bootstrapped) samples ratio_neg_diffs = 1 - np.count_nonzero( self.Gn_diffs ) / self.Gn_diffs.size # assuming zero come from clipping print_cbt(f"diffs (optimistic - pessimistic bound):\n{self.Gn_diffs}", "y") print_cbt( f"\n{100 * ratio_neg_diffs}% of the diffs would have been negative and were set to 0\n", "r", bright=True) if ratio_neg_diffs == 1: # All diffs are negative ci_bs_lo, ci_bs_up = np.zeros(1), np.array( [pyrado.inf] ) # such that the UCBOG comparison in stopping_criterion_met() does not break log_dict = { "Gn_est": np.NaN, "UCBOG": np.NaN, "ratio_neg_diffs": np.NaN } else: # Apply bootstrapping m_bs, ci_bs_lo, ci_bs_up = bootstrap_ci(np.ravel(self.Gn_diffs), np.mean, self.num_bs_reps, self.alpha, 1, self.studentized_ci) print(f"m_bs: {m_bs}, ci_bs: {ci_bs_lo, ci_bs_up}") print_cbt(f"\nOG (point estimate): {Gn_est} \nUCBOG: {ci_bs_up}\n", "y", bright=True) log_dict = { "Gn_est": Gn_est, "UCBOG": ci_bs_up, "ratio_neg_diffs": ratio_neg_diffs } # Log the optimality gap data mode = "w" if self.curr_iter == 0 else "a" with open(osp.join(self.save_dir, "OG_log.csv"), mode, newline="") as csvfile: fieldnames = list(log_dict.keys()) writer = csv.DictWriter(csvfile, fieldnames=fieldnames) if self.curr_iter == 0: writer.writeheader() writer.writerow(log_dict) # Store the current UCBOG estimated from all samples self.ucbog = ci_bs_up