def get_component_indices_of_task(task_node): child_var_term_threshold = task_node.sq_err_sum * child_var_fraction candidate_child_var_terms = varanalysis.get_sorted_child_var_terms(task_node, child_var_term_threshold) candidate_covar_contributions = [] for (var, i, i2) in candidate_child_var_terms: ci_covar_sum = 0.0 for j in range(len(task_node.cross_sq_err_sum)): ci_covar_sum += task_node.cross_sq_err_sum[i][j] candidate_covar_contributions.append(ci_covar_sum) high_covar_candidate_var_terms = [term for term, ci_covar_sum in zip(candidate_child_var_terms, candidate_covar_contributions) \ if ci_covar_sum >= child_var_term_threshold] # We don't check if immediate contributor's total_count exceeds min_significant_exec_time # => it is possible for a task to have a component without a significant contributor if len(high_covar_candidate_var_terms) > 0: #further prune relatively less significant terms order_mag_cutoff = high_covar_candidate_var_terms[0][0] / 10.0 pruned_candidate_child_var_terms = [term for term in high_covar_candidate_var_terms if term[0] >= order_mag_cutoff] #furthermore, if the "internal" term dominates, it will eliminate the child terms else: pruned_candidate_child_var_terms = high_covar_candidate_var_terms final_index_set = [i for (var, i, i2) in pruned_candidate_child_var_terms if i != len(task_node.funcs_called)] #cannot have duplicates, since extracted from only diagonal terms. # also eliminate internal term, as we don't want to include its effect on total return final_index_set
def get_lower_root_contributor_candidates(contrib_node, min_significant_exec_time): if is_low_var(contrib_node): return [] child_var_term_threshold = contrib_node.sq_err_sum * child_var_fraction highest_local_var_internal_and_children = varanalysis.get_sorted_child_var_terms(contrib_node, child_var_term_threshold) highest_local_var_children = [term for term in highest_local_var_internal_and_children \ if term[1] != len(contrib_node.funcs_called)] #eliminate internal term chosen_child_var_sum = 0.0 for (var, i, i2) in highest_local_var_children: chosen_child_var_sum += var chosen_child_indices = [i for (var, i, i2) in highest_local_var_children] #None of the chosen children should be below a min_significant_exec_time threshold anyBelow = False for i in chosen_child_indices: if contrib_node.funcs_called[i].total_count < min_significant_exec_time: anyBelow = True break if anyBelow == True: return [] chosen_child_correlation_sum = 0.0 for i in chosen_child_indices: for j in chosen_child_indices: if i != j: chosen_child_correlation_sum += contrib_node.cross_sq_err_sum[i][j] if (chosen_child_var_sum > 0.0 and chosen_child_var_sum >= most_var_fraction * contrib_node.sq_err_sum) \ and (abs(chosen_child_correlation_sum) / chosen_child_var_sum < correlation_to_var_insignificance_ratio): # chosen children must constitute most of parent's variance, without being affected # by correlation effects in the parent's context return chosen_child_indices else: return []