def filter( self, interactions: Iterable[SimulatedInteraction] ) -> Iterable[SimulatedInteraction]: rng = CobaRandom(self._seed) for interaction in interactions: if isinstance(interaction, LoggedInteraction): raise CobaException( "We do not currently support adding noise to a LoggedInteraction." ) noisy_context = self._noises(interaction.context, rng, self._context_noise) noisy_actions = [ self._noises(a, rng, self._action_noise) for a in interaction.actions ] noisy_kwargs = {} if 'rewards' in interaction.kwargs and self._reward_noise: noisy_kwargs['rewards'] = self._noises( interaction.kwargs['rewards'], rng, self._reward_noise) yield SimulatedInteraction(noisy_context, noisy_actions, **noisy_kwargs)
def filter(self, interactions: Iterable[Interaction]) -> Iterable[Interaction]: iter_interactions = iter(interactions) train_interactions = list(islice(iter_interactions, self._using)) test_interactions = chain.from_iterable( [train_interactions, iter_interactions]) stats: Dict[Hashable, float] = defaultdict(int) features: Dict[Hashable, List[Number]] = defaultdict(list) for interaction in train_interactions: for name, value in self._context_as_name_values( interaction.context): if isinstance(value, Number) and not isnan(value): features[name].append(value) for feat_name, feat_numeric_values in features.items(): if self._stat == "mean": stats[feat_name] = mean(feat_numeric_values) if self._stat == "median": stats[feat_name] = median(feat_numeric_values) if self._stat == "mode": stats[feat_name] = mode(feat_numeric_values) for interaction in test_interactions: kv_imputed_context = {} for name, value in self._context_as_name_values( interaction.context): kv_imputed_context[name] = stats[name] if isinstance( value, Number) and isnan(value) else value if interaction.context is None: final_context = None elif isinstance(interaction.context, dict): final_context = kv_imputed_context elif isinstance(interaction.context, tuple): final_context = tuple(kv_imputed_context[k] for k, _ in self._context_as_name_values( interaction.context)) else: final_context = kv_imputed_context[1] if isinstance(interaction, SimulatedInteraction): yield SimulatedInteraction(final_context, interaction.actions, **interaction.kwargs) elif isinstance(interaction, LoggedInteraction): yield LoggedInteraction(final_context, interaction.action, **interaction.kwargs) else: #pragma: no cover raise CobaException( "Unknown interactions were given to the Impute filter.")
def filter( self, interactions: Iterable[SimulatedInteraction] ) -> Iterable[SimulatedInteraction]: for interaction in interactions: kwargs = interaction.kwargs.copy() max_rwd = max(kwargs["rewards"]) kwargs["rewards"] = [int(r == max_rwd) for r in kwargs["rewards"]] yield SimulatedInteraction(interaction.context, interaction.actions, **kwargs)
def read(self) -> Iterable[SimulatedInteraction]: items = list(self._source.read()) if not items: return [] features, labels = zip(*items) if self._label_type == "R": max_n_actions = 10 #Scale the labels so their range is 1. min_l, max_l = min(labels), max(labels) labels = [ float(l) / (max_l - min_l) - (min_l / (max_l - min_l)) for l in labels ] if len(labels) <= max_n_actions: actions = labels else: actions = percentile(labels, [ i / (max_n_actions + 1) for i in range(1, max_n_actions + 1) ]) values = dict(zip(OneHotEncoder().fit_encodes(actions), actions)) actions = list(values.keys()) reward = lambda action, label: 1 - abs(values[action] - float(label )) else: #how can we tell the difference between featurized labels and multilabels???? #for now we will assume multilables will be passed in as arrays not tuples... if not isinstance(labels[0], collections.abc.Hashable): actions = list(chain.from_iterable(labels)) else: actions = list(labels) is_label = lambda action, label: action == label in_multilabel = lambda action, label: isinstance( label, collections.abc.Sequence) and action in label reward = lambda action, label: int( is_label(action, label) or in_multilabel(action, label)) contexts = features actions = CobaRandom(1).shuffle(sorted(set(actions))) rewards = [[reward(action, label) for action in actions] for label in labels] for c, a, r in zip(contexts, repeat(actions), rewards): yield SimulatedInteraction(c, a, rewards=r)
def read(self) -> Iterable[SimulatedInteraction]: rng = None if not self._make_rng else CobaRandom(self._seed) _context = lambda i: self._context(i, rng) if rng else self._context(i) _actions = lambda i, c: self._actions( i, c, rng) if rng else self._actions(i, c) _reward = lambda i, c, a: self._reward( i, c, a, rng) if rng else self._reward(i, c, a) for i in islice(count(), self._n_interactions): context = _context(i) actions = _actions(i, context) rewards = [_reward(i, context, action) for action in actions] yield SimulatedInteraction(context, actions, rewards=rewards)
def filter(self, interactions: Iterable[Interaction]) -> Iterable[Interaction]: for interaction in interactions: sparse_context = self._make_sparse( interaction.context) if self._context else interaction.context if hasattr(interaction, 'actions'): sparse_actions = list( map(self._make_sparse, interaction.actions) ) if self._action else interaction.actions yield SimulatedInteraction(sparse_context, sparse_actions, **interaction.kwargs) else: sparse_action = self._make_sparse( interaction.action) if self._action else interaction.action yield LoggedInteraction(sparse_context, sparse_action, **interaction.kwargs)
def filter( self, interactions: Iterable[SimulatedInteraction] ) -> Iterable[SimulatedInteraction]: underlying_iterable = iter(interactions) sans_cycle_interactions = islice(underlying_iterable, self._after) with_cycle_interactions = underlying_iterable for interaction in sans_cycle_interactions: yield interaction try: first_interaction = next(with_cycle_interactions) action_set = set(first_interaction.actions) n_actions = len(action_set) featureless_actions = [ tuple([0] * n + [1] + [0] * (n_actions - n - 1)) for n in range(n_actions) ] with_cycle_interactions = chain([first_interaction], with_cycle_interactions) if len(set(action_set) & set(featureless_actions)) != len(action_set): warnings.warn( "Cycle only works for environments without action features. It will be ignored in this case." ) for interaction in with_cycle_interactions: yield interaction else: for interaction in with_cycle_interactions: kwargs = interaction.kwargs.copy() kwargs['rewards'] = kwargs['rewards'][-1:] + kwargs[ 'rewards'][:-1] yield SimulatedInteraction(interaction.context, interaction.actions, **kwargs) except StopIteration: pass
def filter(self, interactions: Iterable[Interaction]) -> Iterable[Interaction]: iter_interactions = iter(interactions) fitting_interactions = list(islice(iter_interactions, self._using)) shifts: Dict[Hashable, float] = defaultdict(lambda: 0) scales: Dict[Hashable, float] = defaultdict(lambda: 1) unscaled_values: Dict[Hashable, List[Any]] = defaultdict(list) if any([isinstance(i.context, dict) for i in fitting_interactions]) and self._shift != 0: raise CobaException( "Shift is required to be 0 for sparse environments. Otherwise the environment will become dense." ) mixed = [] had_non_numeric = [] for interaction in fitting_interactions: if self._target == "features": for name, value in self._feature_pairs(interaction.context): if name in mixed: continue is_numeric = isinstance(value, Number) is_nan = is_numeric and isnan(value) if (not is_numeric and name in unscaled_values) or ( is_numeric and name in had_non_numeric): mixed.append(name) if name in unscaled_values: del unscaled_values[name] if name in had_non_numeric: had_non_numeric.remove(name) elif not is_numeric: had_non_numeric.append(name) elif is_numeric and not is_nan: unscaled_values[name].append(value) if self._target == "rewards": unscaled_values["rewards"].extend( interaction.kwargs["rewards"]) if mixed: warnings.warn( f"Some features were not scaled due to having mixed types: {mixed}. " ) has_sparse_zero = set() for interaction in fitting_interactions: if isinstance(interaction.context, dict): has_sparse_zero |= unscaled_values.keys( ) - interaction.context.keys() - {"rewards"} for key in has_sparse_zero: unscaled_values[key].append(0) for name, values in unscaled_values.items(): if isinstance(self._shift, Number): shift = self._shift if self._shift == "min": shift = min(values) if self._shift == "mean": shift = mean(values) if self._shift == "med": shift = median(values) if isinstance(self._scale, Number): scale_num = self._scale scale_den = 1 if self._scale == "std": scale_num = 1 scale_den = stdev(values) if self._scale == "minmax": scale_num = 1 scale_den = max(values) - min(values) if self._scale == "iqr": scale_num = 1 scale_den = iqr(values) if self._scale == "maxabs": scale_num = 1 scale_den = max([abs(v - shift) for v in values]) shifts[name] = shift scales[name] = scale_num / scale_den if round(scale_den, 10) != 0 else 1 for interaction in chain(fitting_interactions, iter_interactions): scaled_values = {} final_context = interaction.context final_kwargs = interaction.kwargs.copy() if self._target == "features": for name, value in self._feature_pairs(interaction.context): if isinstance(value, Number): scaled_values[name] = (value - shifts[name]) * scales[name] else: scaled_values[name] = value if interaction.context is None: final_context = None elif isinstance(interaction.context, dict): final_context = scaled_values elif isinstance(interaction.context, tuple): final_context = tuple( scaled_values[k] for k, _ in self._feature_pairs(interaction.context)) else: final_context = scaled_values[1] if self._target == "rewards": final_kwargs['rewards'] = [ (r - shifts['rewards']) * scales['rewards'] for r in interaction.kwargs['rewards'] ] try: yield SimulatedInteraction(final_context, interaction.actions, **final_kwargs) except AttributeError: yield LoggedInteraction(final_context, interaction.action, **interaction.kwargs)
def read(self) -> Iterable[SimulatedInteraction]: for interaction_json in islice(self._source.read(), 1, None): deocded_interaction = self._decoder.filter(interaction_json) yield SimulatedInteraction(deocded_interaction[0], deocded_interaction[1], **deocded_interaction[2])