def test_100_integers_update_variance(self): test_set = list(range(0, 100)) online = OnlineVariance() for number in test_set: online.update(number) self.assertEqual(online.variance, variance(test_set))
def test_100_floats_update_variance(self): test_set = [i / 3 for i in range(0, 100)] online = OnlineVariance() for number in test_set: online.update(number) #note: this test will fail on the final the test_set if `places` > 12 self.assertAlmostEqual(online.variance, variance(test_set), places=12)
def test_two_update_variance(self): test_sets = [[0, 2], [1, 1], [1, 2], [-1, 1], [10.5, 20]] for test_set in test_sets: online = OnlineVariance() for number in test_set: online.update(number) self.assertEqual(online.variance, variance(test_set))
def test_two_update_variance(self): batches = [[0, 2], [1, 1], [1, 2], [-1, 1], [10.5, 20]] for batch in batches: online = OnlineVariance() for number in batch: online.update(number) self.assertEqual(online.variance, variance(batch))
def test_three_update_variance(self): test_sets = [ [0, 2, 4], [1, 1, 1], [1,2,3], [-1,1,-1], [10.5,20,29.5] ] for test_set in test_sets: online = OnlineVariance() for number in test_set: online.update(number) #note: this test will fail on the final the test_set if `places` > 15 self.assertAlmostEqual(online.variance, variance(test_set), places = 15)
def test_three_update_variance(self): batches = [[0, 2, 4], [1, 1, 1], [1, 2, 3], [-1, 1, -1], [10.5, 20, 29.5]] for batch in batches: online = OnlineVariance() for number in batch: online.update(number) #note: this test will fail on the final the batch if `places` > 15 self.assertAlmostEqual(online.variance, variance(batch), places=15)
def learn(self, context: Context, action: Action, reward: float, probability: float, info: Info) -> None: assert 0 <= reward and reward <= 1, "This algorithm assumes that reward has support in [0,1]." if action not in self._m: self._m[action] = reward self._s[action] = 1 self._v[action] = OnlineVariance() else: self._m[action] = (1 - 1 / self._s[action]) * self._m[ action] + 1 / self._s[action] * reward self._s[action] += 1 self._v[action].update(reward)
def test_one_update_variance_nan(self): online = OnlineVariance() online.update(1) self.assertTrue(isnan(online.variance))
def test_no_updates_variance_nan(self): online = OnlineVariance() self.assertTrue(isnan(online.variance))
def standard_plot(self, select_learners: Sequence[int] = None, show_err: bool = False, show_sd: bool = False, figsize=(12,4)) -> None: check_matplotlib_support('Plots.standard_plot') def _plot(axes, label, xs, ys, vs, ns): axes.plot(xs, ys, label=label) if show_sd: ls = [ y-math.sqrt(v) for y,v in zip(ys,vs) ] us = [ y+math.sqrt(v) for y,v in zip(ys,vs) ] axes.fill_between(xs, ls, us, alpha = 0.1) if show_err: # I don't really understand what this is... For each x our distribution # is changing so its VAR is also changing. What does it mean to calculate # sample variance from a deterministic collection of random variables with # different distributions? For example sample variance of 10 random variables # from dist1 and 10 random variables from dist2... This is not the same as 20 # random variables with 50% chance drawing from dist1 and 50% chance of drawing # from dist2. So the distribution can only be defined over the whole space (i.e., # all 20 random variables) and not for a specific random variable. Oh well, for # now I'm leaving this as it is since I don't have any better ideas. I think what # I've done is ok, but I need to more some more thought into it. ls = [ y-math.sqrt(v/n) for y,v,n in zip(ys,vs,ns) ] us = [ y+math.sqrt(v/n) for y,v,n in zip(ys,vs,ns) ] axes.fill_between(xs, ls, us, alpha = 0.1) learners, _, batches = self.to_indexed_tuples() learners = {key:value for key,value in learners.items() if select_learners is None or key in select_learners} batches = {key:value for key,value in batches.items() if select_learners is None or value.learner_id in select_learners} sorted_batches = sorted(batches.values(), key=lambda batch: batch.learner_id) grouped_batches = groupby(sorted_batches , key=lambda batch: batch.learner_id) max_batch_N = 0 indexes = cast(Dict[int,List[int ]], collections.defaultdict(list)) incounts = cast(Dict[int,List[int ]], collections.defaultdict(list)) inmeans = cast(Dict[int,List[float]], collections.defaultdict(list)) invariances = cast(Dict[int,List[float]], collections.defaultdict(list)) cucounts = cast(Dict[int,List[int ]], collections.defaultdict(list)) cumeans = cast(Dict[int,List[float]], collections.defaultdict(list)) cuvariances = cast(Dict[int,List[float]], collections.defaultdict(list)) for learner_id, learner_batches in grouped_batches: cucount = 0 cumean = OnlineMean() cuvariance = OnlineVariance() Ns, Rs = list(zip(*[ (b.N, b.reward) for b in learner_batches ])) Ns = list(zip(*Ns)) Rs = list(zip(*Rs)) for batch_index, batch_Ns, batch_Rs in zip(itertools.count(), Ns,Rs): incount = 0 inmean = OnlineMean() invariance = OnlineVariance() for N, reward in zip(batch_Ns, batch_Rs): max_batch_N = max(N, max_batch_N) incount = incount + 1 inmean .update(reward) invariance .update(reward) cucount = cucount + 1 cumean .update(reward) cuvariance .update(reward) #sanity check, sorting above (in theory) should take care of this... #if this isn't the case then the cu* values will be incorrect... assert indexes[learner_id] == [] or batch_index > indexes[learner_id][-1] incounts[learner_id].append(incount) indexes[learner_id].append(batch_index) inmeans[learner_id].append(inmean.mean) invariances[learner_id].append(invariance.variance) cucounts[learner_id].append(cucount) cumeans[learner_id].append(cumean.mean) cuvariances[learner_id].append(cuvariance.variance) import matplotlib.pyplot as plt #type: ignore fig = plt.figure(figsize=figsize) index_unit = "Interaction" if max_batch_N ==1 else "Batch" ax1 = fig.add_subplot(1,2,1) #type: ignore ax2 = fig.add_subplot(1,2,2) #type: ignore for learner_id in learners: _plot(ax1, learners[learner_id].full_name, indexes[learner_id], inmeans[learner_id], invariances[learner_id], incounts[learner_id]) ax1.set_title(f"Instantaneous Reward") ax1.set_ylabel("Reward") ax1.set_xlabel(f"{index_unit} Index") for learner_id in learners: _plot(ax2, learners[learner_id].full_name, indexes[learner_id], cumeans[learner_id], cuvariances[learner_id], cucounts[learner_id]) ax2.set_title("Progressive Validation") #ax2.set_ylabel("Reward") ax2.set_xlabel(f"{index_unit} Index") (bot1, top1) = ax1.get_ylim() (bot2, top2) = ax2.get_ylim() ax1.set_ylim(min(bot1,bot2), max(top1,top2)) ax2.set_ylim(min(bot1,bot2), max(top1,top2)) scale = 0.5 box1 = ax1.get_position() box2 = ax2.get_position() ax1.set_position([box1.x0, box1.y0 + box1.height * (1-scale), box1.width, box1.height * scale]) ax2.set_position([box2.x0, box2.y0 + box2.height * (1-scale), box2.width, box2.height * scale]) # Put a legend below current axis fig.legend(*ax1.get_legend_handles_labels(), loc='upper center', bbox_to_anchor=(.5, .3), ncol=2, fontsize='small') #type: ignore plt.show()