def load_probabilities_and_get_variances(slurm_id, hidden_arr, inter=0, num_bootstrap=10000): ''' Loads saved probabilities, calculates differences using bootstrapping from the value of variance computed using all the seeds and saves those diffs. Prerequisite: Probabilities should be saved earlier using the save_probabilities function with dimension (num_seeds, num_test_examples, probabilities_for_each_example), eg. (50, 10000, 10) for 50 seeds for MNIST. ''' for num_hidden in hidden_arr: probabilities = load_probabilities(slurm_id, num_hidden, inter) original_variance = calculate_variance(probabilities) diffs = [] for i in range(num_bootstrap): indices = np.random.choice(50, 50, replace=True) if slurm_id == 195683: indices = np.random.choice(28, 28, replace=True) elif slurm_id == 195684: indices = np.random.choice(43, 43, replace=True) bootstrap_probabilities = probabilities[indices] bootstrap_variance = calculate_variance(bootstrap_probabilities) diff_variance = (bootstrap_variance - original_variance) diffs.append(diff_variance) save_variance_diffs(slurm_id, num_hidden, diffs)
def get_bias(slurm_id, num_hidden, inter): ''' Returns the variance for a slurm id (corresponding to an experiment) and a hidden size. ''' test_y_onehot = get_test_y_onehot() probabilities = load_probabilities(slurm_id, num_hidden, inter) return calculate_bias(probabilities, test_y_onehot)
def load_probabilities_and_get_second_term(slurm_id, hidden_arr, num_initializations_per_split, inter=0, reverse=False): second_terms = [] for num_hidden in hidden_arr: probabilities = load_probabilities(slurm_id, num_hidden, inter) num_samplings = probabilities.shape[0] // num_initializations_per_split expected_probabilities_shape = list(probabilities.shape) expected_probabilities_shape[0] = num_samplings expected_probabilities = np.zeros(expected_probabilities_shape) for sampling_no in range(num_samplings): if not reverse: probabilities_for_this_sampling = find_probabilities_for_sampling( probabilities, sampling_no, num_initializations_per_split) else: probabilities_for_this_sampling = find_probabilities_for_optimization( probabilities, sampling_no, num_initializations_per_split) expected_probabilities[sampling_no] = np.mean( probabilities_for_this_sampling, 0) second_terms.append(calculate_variance(expected_probabilities)) return second_terms
def load_probabilities_and_get_losses_and_std(slurm_id, hidden_arr, inter=0): test_y_onehot = get_test_y_onehot() average_losses, stds = [], [] for num_hidden in hidden_arr: probabilities = load_probabilities(slurm_id, num_hidden, inter) losses = calculate_losses(probabilities, test_y_onehot) average_loss = np.mean(losses) std = np.std(losses) / math.sqrt(len(losses)) average_losses.append(average_loss) stds.append(std) return average_losses, stds
def load_probabilities_and_get_biases(slurm_id, hidden_arr, inter=0, num_bootstrap=10000): test_y_onehot = get_test_y_onehot() for num_hidden in hidden_arr: probabilities = load_probabilities(slurm_id, num_hidden, inter) original_variance = calculate_bias(probabilities, test_y_onehot) diffs = [] for i in range(num_bootstrap): indices = np.random.choice(50, 50, replace=True) if slurm_id == 195683: indices = np.random.choice(28, 28, replace=True) elif slurm_id == 195684: indices = np.random.choice(43, 43, replace=True) bootstrap_probabilities = probabilities[indices] bootstrap_variance = calculate_bias(bootstrap_probabilities, test_y_onehot) diff_variance = (bootstrap_variance - original_variance) diffs.append(diff_variance) save_bias_diffs(slurm_id, num_hidden, diffs)
def load_probabilities_and_get_first_term(slurm_id, hidden_arr, num_initializations_per_split, inter=0, reverse=False): first_terms = [] for num_hidden in hidden_arr: probabilities = load_probabilities(slurm_id, num_hidden, inter) num_samplings = probabilities.shape[0] // num_initializations_per_split individual_variances = [] for sampling_no in range(num_samplings): if not reverse: probabilities_for_this_sampling = find_probabilities_for_sampling( probabilities, sampling_no, num_initializations_per_split) else: probabilities_for_this_sampling = find_probabilities_for_optimization( probabilities, sampling_no, num_initializations_per_split) individual_variance = calculate_variance( probabilities_for_this_sampling) individual_variances.append(individual_variance) first_terms.append(np.mean(np.array(individual_variances))) return first_terms
def get_variance(slurm_id, num_hidden, inter): ''' Returns the variance for a slurm id (corresponding to an experiment) and a hidden size. ''' probabilities = load_probabilities(slurm_id, num_hidden, inter) return calculate_variance(probabilities)