def get_winner_assessment_for_canarybg( experiment_resource: ExperimentResource): """ Get winner assessment using experiment resource for Canary or BlueGreen experiments """ was = WinnerAssessmentAnalysis() versions = [experiment_resource.spec.versionInfo.baseline] versions += experiment_resource.spec.versionInfo.candidates feasible_versions = get_feasible_versions(experiment_resource, versions) # names of feasible versions fvn = list(map(lambda version: version.name, feasible_versions)) if versions[1].name in fvn: was.data = WinnerAssessmentData(winnerFound = True, winner = versions[1].name, \ bestVersions = [versions[1].name]) was.message = Message.join_messages([Message(MessageLevel.INFO, \ "candidate satisfies all objectives")]) elif versions[0].name in fvn: was.data = WinnerAssessmentData(winnerFound = True, winner = versions[0].name, \ bestVersions = [versions[0].name]) was.message = Message.join_messages([Message(MessageLevel.INFO, \ "baseline satisfies all objectives; candidate does not")]) return was
def get_exploitation_weights(): """Create exploitation weights; in fraction if there are three versions: if there are no best versions: exploitation_weights = [1.0, 0, 0], i.e., baseline gets to be exploited if there is a single best version, say, the 2nd version: exploitation_weights = [0, 1.0, 0], i.e., the best version gets exploited if there are two best versions, say, the 2nd and 3rd versions: exploitation_weights = [0, 0.5, 0.5], i.e., best versions get exploited evenly """ exploitation_weights = np.full((len(versions), ), 0.0) try: bvs = experiment_resource.status.analysis.winner_assessment.data.bestVersions assert len(bvs) > 0 messages.append(Message(MessageLevel.INFO, "found best version(s)")) for i, version in enumerate(versions): if version.name in bvs: exploitation_weights[i] = 1 / len(bvs) except (KeyError, AssertionError): exploitation_weights = np.full((len(versions), ), 0.0) exploitation_weights[0] = 1.0 messages.append( Message(MessageLevel.INFO, "no best version(s) found")) return exploitation_weights
def get_winner_assessment_for_conformance(experiment_resource: ExperimentResource): """ Get winner assessment using experiment resource for Conformance """ was = WinnerAssessmentAnalysis() versions = [experiment_resource.spec.versionInfo.baseline] # no version assessments data ... # this is because there are no objectives in the experiment to satisfy ... # declare all versions to be feasible if experiment_resource.status.analysis.version_assessments.data is None or \ len(experiment_resource.status.analysis.version_assessments.data) == 0: feasible_versions = versions else: # there is version assessment data # filter out feasible versions feasible_versions = list(filter(lambda version: \ all(experiment_resource.status.analysis.version_assessments.data[version.name]), versions)) # extract names of feasible versions fvn = list(map(lambda version: version.name, feasible_versions)) if versions[0].name in fvn: was.data = WinnerAssessmentData(winnerFound = True, winner = versions[0].name, \ bestVersions = [versions[0].name]) was.message = Message.join_messages([Message(MessageLevel.INFO, \ "baseline satisfies all objectives")]) return was
def get_aggregated_metrics(expr: ExperimentResource): """ Get aggregated metrics using experiment resource and metric resources. """ versions = [expr.spec.versionInfo.baseline] if expr.spec.versionInfo.candidates is not None: versions += expr.spec.versionInfo.candidates # messages not working as intended... messages = [] # initialize aggregated metrics object iam = get_builtin_metrics(expr) # check if start time is greater than now # this is problematic.... start time is set by etc3 but checked by analytics. # clocks are not synced, so this is not right... if expr.status.startTime > (datetime.now(timezone.utc)): messages.append( Message(MessageLevel.ERROR, "Invalid startTime: greater than current time")) iam.message = Message.join_messages(messages) return iam # there are no metrics to be fetched if expr.status.metrics is None: iam.message = Message.join_messages(messages) return iam for metric_info in expr.status.metrics: # only custom metrics is handled below... not builtin metrics if metric_info.metricObj.spec.provider is None or \ metric_info.metricObj.spec.provider != "iter8": iam.data[metric_info.name] = AggregatedMetric(data={}) # fetch the metric value for each version... for version in versions: # initialize metric object for this version... iam.data[metric_info.name].data[version.name] = VersionMetric() val, err = get_metric_value(metric_info.metricObj, version, \ expr.status.startTime) if err is None and val is not None: iam.data[metric_info.name].data[version.name].value = val else: try: val = float(expr.status.analysis.aggregated_metrics.data\ [metric_info.name].data[version.name].value) except AttributeError: val = None iam.data[metric_info.name].data[version.name].value = val if err is not None: messages.append(Message(MessageLevel.ERROR, \ f"Error from metrics backend for metric: {metric_info.name} \ and version: {version.name}" )) iam.message = Message.join_messages(messages) logger.debug("Analysis object after metrics collection") logger.debug(pprint.PrettyPrinter().pformat(iam)) return iam
def get_version_assessments(experiment_resource: ExperimentResource): """ Get version assessments using experiment resource. """ versions = [experiment_resource.spec.versionInfo.baseline] if experiment_resource.spec.versionInfo.candidates is not None: versions += experiment_resource.spec.versionInfo.candidates messages = [] def check_limits(obj: Objective, value: float) -> bool: if (obj.upper_limit is not None) and (value > float(obj.upper_limit)): return False if (obj.lower_limit is not None) and (value < float(obj.lower_limit)): return False return True aggregated_metric_data = experiment_resource.status.analysis.aggregated_metrics.data version_assessments = VersionAssessmentsAnalysis(data={}) if experiment_resource.spec.criteria is None or \ experiment_resource.spec.criteria.objectives is None: return version_assessments # objectives are available for version in versions: version_assessments.data[version.name] = [False] * \ len(experiment_resource.spec.criteria.objectives) for ind, obj in enumerate(experiment_resource.spec.criteria.objectives): if obj.metric in aggregated_metric_data: versions_metric_data = aggregated_metric_data[obj.metric].data for version in versions: if version.name in versions_metric_data: if versions_metric_data[version.name].value is not None: version_assessments.data[version.name][ind] = \ check_limits(obj, float(versions_metric_data[version.name].value)) else: messages.append(Message(MessageLevel.WARNING, \ f"Value for {obj.metric} metric and {version.name} version is None.")) else: messages.append(Message(MessageLevel.WARNING, \ f"Value for {obj.metric} metric and {version.name} version is unavailable.")) else: messages.append(Message(MessageLevel.WARNING, \ f"Aggregated metric object for {obj.metric} metric is unavailable.")) version_assessments.message = Message.join_messages(messages) logger.debug("version assessments: %s", pprint.PrettyPrinter().pformat(version_assessments)) return version_assessments
def get_aggregated_metrics(expr: ExperimentResource): """ Get aggregated metrics from experiment resource and metric resources. """ versions = [expr.spec.versionInfo.baseline] if expr.spec.versionInfo.candidates is not None: versions += expr.spec.versionInfo.candidates # messages not working as intended... messages = [] # initialize aggregated metrics object iam = AggregatedMetricsAnalysis(data={}) #check if start time is greater than now if expr.status.startTime > (datetime.now(timezone.utc)): messages.append( Message(MessageLevel.ERROR, "Invalid startTime: greater than current time")) iam.message = Message.join_messages(messages) return iam # if there are metrics to be fetched... if expr.status.metrics is not None: for metric_resource in expr.status.metrics: iam.data[metric_resource.name] = AggregatedMetric(data={}) # fetch the metric value for each version... for version in versions: # initialize metric object for this version... iam.data[metric_resource.name].data[ version.name] = VersionMetric() val, err = get_metric_value(metric_resource.metricObj, version, \ expr.status.startTime) if err is None: iam.data[metric_resource.name].data[ version.name].value = val else: messages.append(Message(MessageLevel.ERROR, \ f"Error from metrics backend for metric: {metric_resource.name} \ and version: {version.name}" )) iam.message = Message.join_messages(messages) logger.debug("Analysis object after metrics collection") logger.debug(pprint.PrettyPrinter().pformat(iam)) return iam
def get_winner_assessment_for_conformance( experiment_resource: ExperimentResource): """ Get winner assessment using experiment resource for Conformance """ was = WinnerAssessmentAnalysis() versions = [experiment_resource.spec.versionInfo.baseline] feasible_versions = get_feasible_versions(experiment_resource, versions) # extract names of feasible versions fvn = list(map(lambda version: version.name, feasible_versions)) if versions[0].name in fvn: was.data = WinnerAssessmentData(winnerFound = True, winner = versions[0].name, \ bestVersions = [versions[0].name]) was.message = Message.join_messages([Message(MessageLevel.INFO, \ "baseline satisfies all objectives")]) return was
def get_weights(experiment_resource: ExperimentResource): """ Get weights using experiment resource. All weight values in the output will be integers. """ if experiment_resource.spec.strategy.testingPattern == TestingPattern.CONFORMANCE: return WeightsAnalysis(data = [], \ message = "weight computation is not applicable to a conformance experiment") versions = [experiment_resource.spec.versionInfo.baseline] versions += experiment_resource.spec.versionInfo.candidates messages = [] # create exploration weights; in fraction # if there are three versions: # exploration_weights = [1/3, 1/3, 1/3] exploration_weights = np.full((len(versions), ), 1.0 / len(versions)) def get_exploitation_weights(): """Create exploitation weights; in fraction if there are three versions: if there are no best versions: exploitation_weights = [1.0, 0, 0], i.e., baseline gets to be exploited if there is a single best version, say, the 2nd version: exploitation_weights = [0, 1.0, 0], i.e., the best version gets exploited if there are two best versions, say, the 2nd and 3rd versions: exploitation_weights = [0, 0.5, 0.5], i.e., best versions get exploited evenly """ exploitation_weights = np.full((len(versions), ), 0.0) try: bvs = experiment_resource.status.analysis.winner_assessment.data.bestVersions assert len(bvs) > 0 messages.append(Message(MessageLevel.INFO, "found best version(s)")) for i, version in enumerate(versions): if version.name in bvs: exploitation_weights[i] = 1 / len(bvs) except (KeyError, AssertionError): exploitation_weights = np.full((len(versions), ), 0.0) exploitation_weights[0] = 1.0 messages.append( Message(MessageLevel.INFO, "no best version(s) found")) return exploitation_weights exploitation_weights = get_exploitation_weights() def get_constrained_weights(input_weights): """ Take input weights in percentage. Apply weight constraints and return modified weights. Example illustrating the inner workings of this function: old_weights = [20, 40, 40] input_weights = [20, 30, 50] maxCandidateWeightIncrement = 10 maxCandidateWeight = 40 after i = 0, constrained_weights = [20, 30, 50] during i = 1 increase = -10 excess = max(0, -10 - 10, 30 - 40) = max(0, -20, -10) = 0 after i = 1, constrained_weights = [20, 30, 50] during i = 2 increase = 10 excess = max(0, 10 - 10, 50 - 40) = 10 after i = 2, constrained_weights = [30, 30, 40] """ # Suppose there are 3 versions. old_weights initialized to [100, 0, 0] old_weights = [100] + ([0] * (len(versions) - 1)) # and then, old_weights are updated to currentWeightDistribution, e.g., [5, 25, 70] if experiment_resource.status.currentWeightDistribution is not None: old_weights = list(map(lambda x: x.value, \ experiment_resource.status.currentWeightDistribution)) logger.debug("Old weights: %s", old_weights) logger.debug("Input weights: %s", input_weights) constrained_weights = input_weights.copy() if experiment_resource.spec.strategy.weights is not None: for i in range(len(versions)): if i == 0: continue # for each candidate, compute excess increase = input_weights[i] - old_weights[i] excess = max(0, \ increase - \ experiment_resource.spec.strategy.weights.maxCandidateWeightIncrement, \ input_weights[i] - experiment_resource.spec.strategy.weights.maxCandidateWeight) # cap candidate weight and add the excess to baseline constrained_weights[i] -= excess constrained_weights[0] += excess logger.debug("Constrained weights: %s", constrained_weights) return constrained_weights # create mix-weight: in fraction ewf = AdvancedParameters.exploration_traffic_percentage / 100.0 # Suppose, ewf = 0.1 (i.e., exploration_traffic_percentage = 10%) # Let exploration_weights = [1/3, 1/3, 1/3] # Let exploitation_weights = [0, 0.5, 0.5] # Then, mix_weights = 0.1 * exploration_weights + 0.9 * exploitation_weights # = 0.1 * [1/3, 1/3, 1/3] + 0.9 * [0, 0.5, 0.5] # = [0.033333, 0.033333, 0.033333] + [0.0, 0.45, 0.45] # = [0.033333, 0.483333, 0.483333] mix_weights = (exploration_weights * ewf) + (exploitation_weights * (1 - ewf)) # create mix-weight: in percent # in the above example, we have mix_weights (in percent) = [3.3333, 48.3333, 48.3333] mix_weights *= 100.0 # apply weight constraints constrained_weights = get_constrained_weights(mix_weights) # perform rounding of weights, so that they sum up to 100 integral_weights = gen_round(constrained_weights, 100) data = [] for version in versions: data.append( VersionWeight(name=version.name, value=next(integral_weights))) _weights = WeightsAnalysis(data=data) _weights.message = Message.join_messages( [Message(MessageLevel.INFO, "all ok")]) logger.debug("weights: %s", pprint.PrettyPrinter().pformat(_weights)) return _weights
def get_winner_assessment_for_abn(experiment_resource: ExperimentResource): """ Get winner assessment using experiment resource for ab or abn experiments """ was = WinnerAssessmentAnalysis() versions = [experiment_resource.spec.versionInfo.baseline] versions += experiment_resource.spec.versionInfo.candidates logger.info("Versions: %s", versions) feasible_versions = get_feasible_versions(experiment_resource, versions) logger.info("Feasible versions: %s", feasible_versions) # names of feasible versions fvn = list(map(lambda version: version.name, feasible_versions)) def get_inf_reward(reward: Reward): if reward.preferredDirection == PreferredDirection.HIGH: return -math.inf else: return math.inf def first_better_than_second(first: float, second: float, \ preferred_direction: PreferredDirection): """ Return True if first is better than second, else return False """ if preferred_direction is None: err = "Metrics cannot be compared without preferred direction" logger.error(err) return False, err if preferred_direction is PreferredDirection.HIGH: return (first > second), None return (first < second), None aggregated_metric_data = experiment_resource.status.analysis.aggregated_metrics.data if experiment_resource.spec.criteria.rewards is not None: reward_metric = experiment_resource.spec.criteria.rewards[0].metric if reward_metric in aggregated_metric_data: reward_metric_data = aggregated_metric_data[reward_metric].data (top_reward, best_versions) = (get_inf_reward(\ experiment_resource.spec.criteria.rewards[0]), []) messages = [] if not fvn: messages.append( Message(MessageLevel.INFO, "no version satisfies all objectives")) for fver in fvn: # for each feasible version if fver in reward_metric_data: if reward_metric_data[fver].value is not None: if reward_metric_data[fver].value == top_reward: best_versions.append(fver) else: # this reward not equal to top reward is_better, err = first_better_than_second(\ float(reward_metric_data[fver].value), float(top_reward), \ experiment_resource.spec.criteria.rewards[0].preferredDirection) if err is None: if is_better: (top_reward, best_versions) = \ (reward_metric_data[fver].value, [fver]) else: # there is an error in comparison was.message = Message.join_messages(Message(MessageLevel.ERROR, \ str(err))) return was else: # found a feasible version without reward value messages.append(Message(MessageLevel.WARNING, \ f"reward value for feasible version {fver} is not available")) else: # found a feasible version without reward value messages.append(Message(MessageLevel.WARNING, \ f"reward value for feasible version {fver} is not available")) was.data.bestVersions = best_versions if len(best_versions) == 1: was.data.winnerFound = True was.data.winner = best_versions[0] messages.append( Message(MessageLevel.INFO, "found unique winner")) elif len(best_versions) > 1: messages.append(Message(MessageLevel.INFO, \ "no unique winner; two or more feasible versions with same reward value")) was.message = Message.join_messages(messages) else: # reward metric values are not available was.message = Message.join_messages([Message(MessageLevel.WARNING, \ "reward metric values are not available")]) else: # ab or abn experiment without reward metric was.message = Message.join_messages([Message(MessageLevel.WARNING, \ "No reward metric in experiment. Winner assessment cannot be computed for ab or abn experiments without reward metric.")]) return was