def calc_collapsed_preds(indiv_predictions, subject_clusters, num_clusters): collapsed = np.zeros((len(subject_clusters), len(indiv_predictions[0]))) for ans in range(len(indiv_predictions[0])): indiv = np.array([ip[ans] for ip in indiv_predictions]) mns = clusters.compute_means(subject_clusters, indiv, num_clusters) collapsed[:, ans] = [mns[subject_clusters[s]] for s in range(len(subject_clusters))] return collapsed.tolist()
def calc_collapsed_param_bts(personal_probs, meta_params, meta_distribution_cdf, subject_clusters, answer_boundaries): """Make composite subjects and run bts on them. Args: personal_probs: list where each element is a subject's personal prob meta_params: list where each element gives meta parameters induced from a subject. (A tuple of alpha, beta in beta distribution case.) meta_distribution_cdf: function which calculates the CDF of the meta distribution subject_clusters: np.array giving the cluster each subject is assigned to. Used to show which constructed answer a subject endorsed. answer_boundaries: list where each element is a tuple giving (lower, upper) boundary of the answer. Answers needn't be in order. Returns: dict where each field (bts, surprise, accuracy) points to an np.array with an elt for each subject. """ num_subjects = len(subject_clusters) num_clusters = len(answer_boundaries) #is this right? cluster_personal = clusters.compute_means(subject_clusters, personal_probs, num_clusters) collapsed_personal = [cluster_personal[subject_clusters[s]] for s in range(num_subjects)] indiv_predictions = calc_predictions_cdf(meta_params, meta_distribution_cdf, answer_boundaries) collapsed = calc_collapsed_preds(indiv_predictions, subject_clusters, num_clusters) return calc_non_binary_bts(collapsed_personal, collapsed, subject_clusters, answer_boundaries)