# update matrix embedding_copy = np.zeros((1, 2)) for z, each_element in enumerate(mean_input): if each_element > x_test[each_t][z].item(): embedding_copy[0][z] = 0 else: embedding_copy[0][z] = 1 index = pass_in_embedding_out_state_ID(states, embedding_copy[0]) action = y_test[each_t] current_schedule_matrix[index][int(action)] += 1 per_schedule_test_accs.append(np.mean(test_accs)) sensitivity, specificity = compute_sensitivity(preds, actual), compute_specificity( preds, actual) print('Loss: {}, Accuracy: {}'.format(np.mean(test_losses), np.mean(test_accs))) print('per sched accuracy: ', np.mean(per_schedule_test_accs)) print('mean sensitivity: ', sensitivity, ', mean specificity: ', specificity) # Compute sensitivity and specificity (ideally these should be very high) file = open('heterogeneous_toy_env_results.txt', 'a') file.write('gmm -> NN: mean: ' + str(np.mean(per_schedule_test_accs)) + ', std: ' + str(np.std(per_schedule_test_accs)) + ', sensitivity: ' + str(sensitivity) + ', specificity: ' + str(specificity) + ', Distribution of Class: 0: ' + str(percent_of_zeros) + ', 1: ' + str(1 - percent_of_zeros) + '\n') file.close()
chosen_schedule_start = int(schedule_starts[i]) schedule_num = int(chosen_schedule_start / 20) MLP.set_bayesian_embedding(list(test_distributions[schedule_num])) for each_t in range(chosen_schedule_start, chosen_schedule_start + 20): solo_embedding_optimizer.zero_grad() pred = MLP(x_test[each_t]) loss = F.cross_entropy(pred.reshape(1, 2), y_test[each_t].long()) loss.backward() solo_embedding_optimizer.step() preds[i].append(pred.argmax(dim=-1).item()) actual[i].append(y_test[each_t].item()) print(pred.argmax(dim=-1), y_test[each_t]) acc = (pred.argmax(dim=-1) == y_test[each_t].item()).to(torch.float32).mean() test_losses.append(loss.item()) test_accs.append(acc.mean().item()) per_schedule_test_accs.append(np.mean(test_accs)) print('Loss: {}, Accuracy: {}'.format(np.mean(test_losses), np.mean(test_accs))) sensitivity, specificity = compute_sensitivity(preds, actual), compute_specificity(preds, actual) print('per sched accuracy: ', np.mean(per_schedule_test_accs)) print('mean sensitivity: ', sensitivity, ', mean specificity: ', specificity) file = open('heterogeneous_toy_env_results.txt', 'a') file.write('NN w/ embedding: mean: ' + str(np.mean(per_schedule_test_accs)) + ', std: ' + str(np.std(per_schedule_test_accs)) + ', sensitivity: ' + str(sensitivity) + ', specificity: '+ str(specificity) + ', Distribution of Class: 0: ' + str(percent_of_zeros) + ', 1: ' + str(1 - percent_of_zeros) + '\n') file.close()
def DTtest(tree): """ Pr[omega_i | game_g] = Pr[omega = omega_i] * \Prod_{data points j in game g} Pr[y_j | omega_i , x_j] Pr[assignment_g^{t+1} = i] = Pr[assignment_g^t = i] * Pr[omega_i | game g] :param tree: :param num_schedules: :param data: :param labels: :return: """ num_schedules = 50 x_data_test, y_test, percent_of_zeros = create_simple_classification_dataset( 50, get_percent_of_zeros=True) schedule_starts = np.linspace(0, int(50 * 20 - 20), num=50) x_test = [] preds, actual = [[] for _ in range(50)], [[] for _ in range(50)] for each_ele in x_data_test: x_test.append(each_ele[2:]) data = torch.Tensor(x_test).reshape(-1, 2) y_test = torch.Tensor(y_test).reshape((-1, 1)) distributions = [np.ones(2) * 1 / 2 for _ in range(50)] total_acc = [] for i in range(num_schedules): # choose a schedule chosen_schedule_start = int(schedule_starts[i]) schedule_num = int(chosen_schedule_start / 20) embedding_given_dis, count = get_embedding_given_dist( distributions[schedule_num]) prod = [.5, .5] acc = 0 tally = [1, 1] for each_t in range(chosen_schedule_start, chosen_schedule_start + 20): # at each timestep you what to resample the embedding if each_t == 92: print('hi') x = data[each_t] x = list(np.array(torch.cat([x, embedding_given_dis]))) y_pred = tree.predict_proba(np.array(x).reshape(1, -1)) label = y_test[each_t] print('output is ', y_pred[0], ' label is ', label) if np.argmax(y_pred[0]) == label: acc += 1 preds[i].append(np.argmax(y_pred[0])) actual[i].append(label.item()) tally[count] *= y_pred[0][int(label[0])] tally[int(not count)] *= y_pred[0][int(not label[0])] prod[count] = tally[count] * distributions[schedule_num][count] prod[int(not count)] = tally[int( not count)] * distributions[schedule_num][int(not count)] normalization_factor = sum(prod) prod = [k / normalization_factor for k in prod] distributions[schedule_num][0] = prod[0] distributions[schedule_num][1] = prod[1] normalization_factor_for_dist = sum(distributions[schedule_num]) distributions[ schedule_num] /= normalization_factor_for_dist # [i/normalization_factor_for_dist for i in distributions[schedule_num]] print('distribution at time ', each_t, ' is', distributions[schedule_num]) if each_t % 20 < 5: embedding_given_dis, count = get_embedding_given_dist( distributions[schedule_num]) else: embedding_given_dis = get_most_likely_embedding_given_dist( distributions[schedule_num]) count = np.argmax(distributions[schedule_num]) total_acc.append(acc / 20) print('mean is ', np.mean(total_acc)) print('finite') sensitivity, specificity = compute_sensitivity( preds, actual), compute_specificity(preds, actual) print('per sched accuracy: ', np.mean(total_acc)) print('mean sensitivity: ', sensitivity, ', mean specificity: ', specificity) file = open('heterogeneous_toy_env_results.txt', 'a') file.write('DT w/ bimodal embedding: mean: ' + str(np.mean(total_acc)) + ', std: ' + str(np.std(total_acc)) + ', sensitivity: ' + str(sensitivity) + ', specificity: ' + str(specificity) + ', Distribution of Class: 0: ' + str(percent_of_zeros) + ', 1: ' + str(1 - percent_of_zeros) + '\n') file.close()
def test(ddt): x_data_test, y_test, percent_of_zeros = create_simple_classification_dataset( 50, get_percent_of_zeros=True) schedule_starts = np.linspace(0, int(50 * 20 - 20), num=50) x_test = [] for each_ele in x_data_test: x_test.append(each_ele[2:]) x_test = torch.Tensor(x_test).reshape(-1, 1, 2) y_test = torch.Tensor(y_test).reshape((-1, 1)) test_losses, test_accs = [], [] per_schedule_test_losses, per_schedule_test_accs = [], [] preds, actual = [[] for _ in range(50)], [[] for _ in range(50)] test_distributions = [np.ones(2) * 1 / 2 for _ in range(50)] total_acc = [] for i in range(50): chosen_schedule_start = int(schedule_starts[i]) schedule_num = int(chosen_schedule_start / 20) embedding_given_dis, count = get_embedding_given_dist( test_distributions[schedule_num]) prod = [.5, .5] acc = 0 ddt.set_bayesian_embedding(embedding_given_dis) for each_t in range(chosen_schedule_start, chosen_schedule_start + 20): # at each timestep you what to resample the embedding x_t = x_test[each_t] output = ddt.forward(x_t).reshape(1, 2) label = y_test[each_t] label = torch.Tensor([label]).reshape(1) label = label.long() print('output is ', torch.argmax(output).item(), ' label is ', label.item()) if torch.argmax(output).item() == label.item(): acc += 1 tally = output[0][int(label.item())].item() second_tally = output[0][int(not label.item())].item() prod[count] = tally * test_distributions[i][count] prod[int(not count )] *= second_tally * test_distributions[i][int(not count)] preds[i].append(torch.argmax(output).item()) actual[i].append(label.item()) normalization_factor = sum(prod) prod = [k / normalization_factor for k in prod] test_distributions[schedule_num][0] = prod[0] test_distributions[schedule_num][1] = prod[1] normalization_factor_for_dist = sum( test_distributions[schedule_num]) test_distributions[ schedule_num] /= normalization_factor_for_dist # [i/normalization_factor_for_dist for i in distributions[schedule_num]] print('distribution at time ', each_t, ' is', test_distributions[schedule_num]) if each_t % 20 < 5: embedding_given_dis, count = get_embedding_given_dist( test_distributions[schedule_num]) else: embedding_given_dis = get_most_likely_embedding_given_dist( test_distributions[schedule_num]) ddt.set_bayesian_embedding(embedding_given_dis) per_schedule_test_accs.append(acc / 20) # print('Loss: {}, Accuracy: {}'.format(0, np.mean(per_schedule_test_accs))) print('per sched accuracy: ', np.mean(per_schedule_test_accs)) sensitivity, specificity = compute_sensitivity( preds, actual), compute_specificity(preds, actual) print('mean sensitivity: ', sensitivity, ', mean specificity: ', specificity) file = open('heterogeneous_toy_env_results.txt', 'a') file.write('DDT w/ bimodal embedding: mean: ' + str(np.mean(per_schedule_test_accs)) + ', std: ' + str(np.std(per_schedule_test_accs)) + ', sensitivity: ' + str(sensitivity) + ', specificity: ' + str(specificity) + ', Distribution of Class: 0: ' + str(percent_of_zeros) + ', 1: ' + str(1 - percent_of_zeros) + '\n') file.close()