# update matrix

        embedding_copy = np.zeros((1, 2))
        for z, each_element in enumerate(mean_input):
            if each_element > x_test[each_t][z].item():
                embedding_copy[0][z] = 0
            else:
                embedding_copy[0][z] = 1
        index = pass_in_embedding_out_state_ID(states, embedding_copy[0])
        action = y_test[each_t]
        current_schedule_matrix[index][int(action)] += 1

    per_schedule_test_accs.append(np.mean(test_accs))

sensitivity, specificity = compute_sensitivity(preds,
                                               actual), compute_specificity(
                                                   preds, actual)

print('Loss: {}, Accuracy: {}'.format(np.mean(test_losses),
                                      np.mean(test_accs)))
print('per sched accuracy: ', np.mean(per_schedule_test_accs))
print('mean sensitivity: ', sensitivity, ', mean specificity: ', specificity)
# Compute sensitivity and specificity (ideally these should be very high)
file = open('heterogeneous_toy_env_results.txt', 'a')
file.write('gmm -> NN: mean: ' + str(np.mean(per_schedule_test_accs)) +
           ', std: ' + str(np.std(per_schedule_test_accs)) +
           ', sensitivity: ' + str(sensitivity) + ', specificity: ' +
           str(specificity) + ', Distribution of Class: 0: ' +
           str(percent_of_zeros) + ', 1: ' + str(1 - percent_of_zeros) + '\n')
file.close()
    chosen_schedule_start = int(schedule_starts[i])
    schedule_num = int(chosen_schedule_start / 20)
    MLP.set_bayesian_embedding(list(test_distributions[schedule_num]))
    for each_t in range(chosen_schedule_start, chosen_schedule_start + 20):
        solo_embedding_optimizer.zero_grad()
        pred = MLP(x_test[each_t])
        loss = F.cross_entropy(pred.reshape(1, 2), y_test[each_t].long())
        loss.backward()
        solo_embedding_optimizer.step()
        preds[i].append(pred.argmax(dim=-1).item())
        actual[i].append(y_test[each_t].item())
        print(pred.argmax(dim=-1), y_test[each_t])
        acc = (pred.argmax(dim=-1) == y_test[each_t].item()).to(torch.float32).mean()
        test_losses.append(loss.item())
        test_accs.append(acc.mean().item())
    per_schedule_test_accs.append(np.mean(test_accs))
print('Loss: {}, Accuracy: {}'.format(np.mean(test_losses), np.mean(test_accs)))


sensitivity, specificity = compute_sensitivity(preds, actual), compute_specificity(preds, actual)
print('per sched accuracy: ', np.mean(per_schedule_test_accs))
print('mean sensitivity: ', sensitivity, ', mean specificity: ', specificity)
file = open('heterogeneous_toy_env_results.txt', 'a')
file.write('NN w/ embedding: mean: ' +
           str(np.mean(per_schedule_test_accs)) +
           ', std: ' + str(np.std(per_schedule_test_accs)) +
            ', sensitivity: ' + str(sensitivity) + ', specificity: '+ str(specificity) +
           ', Distribution of Class: 0: ' + str(percent_of_zeros) + ', 1: ' + str(1 - percent_of_zeros) +
           '\n')
file.close()
Ejemplo n.º 3
0
def DTtest(tree):
    """
    Pr[omega_i | game_g] = Pr[omega = omega_i] * \Prod_{data points j in game g} Pr[y_j | omega_i , x_j]
    Pr[assignment_g^{t+1} = i] = Pr[assignment_g^t = i] * Pr[omega_i | game g]
    :param tree:
    :param num_schedules:
    :param data:
    :param labels:
    :return:
    """
    num_schedules = 50
    x_data_test, y_test, percent_of_zeros = create_simple_classification_dataset(
        50, get_percent_of_zeros=True)
    schedule_starts = np.linspace(0, int(50 * 20 - 20), num=50)
    x_test = []
    preds, actual = [[] for _ in range(50)], [[] for _ in range(50)]
    for each_ele in x_data_test:
        x_test.append(each_ele[2:])

    data = torch.Tensor(x_test).reshape(-1, 2)
    y_test = torch.Tensor(y_test).reshape((-1, 1))
    distributions = [np.ones(2) * 1 / 2 for _ in range(50)]
    total_acc = []
    for i in range(num_schedules):
        # choose a schedule
        chosen_schedule_start = int(schedule_starts[i])
        schedule_num = int(chosen_schedule_start / 20)
        embedding_given_dis, count = get_embedding_given_dist(
            distributions[schedule_num])
        prod = [.5, .5]

        acc = 0
        tally = [1, 1]
        for each_t in range(chosen_schedule_start, chosen_schedule_start + 20):
            # at each timestep you what to resample the embedding
            if each_t == 92:
                print('hi')
            x = data[each_t]

            x = list(np.array(torch.cat([x, embedding_given_dis])))
            y_pred = tree.predict_proba(np.array(x).reshape(1, -1))

            label = y_test[each_t]

            print('output is ', y_pred[0], ' label is ', label)
            if np.argmax(y_pred[0]) == label:
                acc += 1
            preds[i].append(np.argmax(y_pred[0]))
            actual[i].append(label.item())
            tally[count] *= y_pred[0][int(label[0])]
            tally[int(not count)] *= y_pred[0][int(not label[0])]
            prod[count] = tally[count] * distributions[schedule_num][count]
            prod[int(not count)] = tally[int(
                not count)] * distributions[schedule_num][int(not count)]

            normalization_factor = sum(prod)
            prod = [k / normalization_factor for k in prod]

            distributions[schedule_num][0] = prod[0]
            distributions[schedule_num][1] = prod[1]
            normalization_factor_for_dist = sum(distributions[schedule_num])
            distributions[
                schedule_num] /= normalization_factor_for_dist  # [i/normalization_factor_for_dist for i in distributions[schedule_num]]

            print('distribution at time ', each_t, ' is',
                  distributions[schedule_num])
            if each_t % 20 < 5:
                embedding_given_dis, count = get_embedding_given_dist(
                    distributions[schedule_num])
            else:
                embedding_given_dis = get_most_likely_embedding_given_dist(
                    distributions[schedule_num])
                count = np.argmax(distributions[schedule_num])

        total_acc.append(acc / 20)
    print('mean is ', np.mean(total_acc))
    print('finite')

    sensitivity, specificity = compute_sensitivity(
        preds, actual), compute_specificity(preds, actual)
    print('per sched accuracy: ', np.mean(total_acc))
    print('mean sensitivity: ', sensitivity, ', mean specificity: ',
          specificity)
    file = open('heterogeneous_toy_env_results.txt', 'a')
    file.write('DT w/ bimodal embedding: mean: ' + str(np.mean(total_acc)) +
               ', std: ' + str(np.std(total_acc)) + ', sensitivity: ' +
               str(sensitivity) + ', specificity: ' + str(specificity) +
               ', Distribution of Class: 0: ' + str(percent_of_zeros) +
               ', 1: ' + str(1 - percent_of_zeros) + '\n')
    file.close()
Ejemplo n.º 4
0
def test(ddt):
    x_data_test, y_test, percent_of_zeros = create_simple_classification_dataset(
        50, get_percent_of_zeros=True)
    schedule_starts = np.linspace(0, int(50 * 20 - 20), num=50)
    x_test = []

    for each_ele in x_data_test:
        x_test.append(each_ele[2:])

    x_test = torch.Tensor(x_test).reshape(-1, 1, 2)
    y_test = torch.Tensor(y_test).reshape((-1, 1))

    test_losses, test_accs = [], []
    per_schedule_test_losses, per_schedule_test_accs = [], []
    preds, actual = [[] for _ in range(50)], [[] for _ in range(50)]
    test_distributions = [np.ones(2) * 1 / 2 for _ in range(50)]
    total_acc = []
    for i in range(50):
        chosen_schedule_start = int(schedule_starts[i])
        schedule_num = int(chosen_schedule_start / 20)
        embedding_given_dis, count = get_embedding_given_dist(
            test_distributions[schedule_num])
        prod = [.5, .5]
        acc = 0
        ddt.set_bayesian_embedding(embedding_given_dis)

        for each_t in range(chosen_schedule_start, chosen_schedule_start + 20):
            # at each timestep you what to resample the embedding

            x_t = x_test[each_t]
            output = ddt.forward(x_t).reshape(1, 2)

            label = y_test[each_t]
            label = torch.Tensor([label]).reshape(1)
            label = label.long()
            print('output is ',
                  torch.argmax(output).item(), ' label is ', label.item())
            if torch.argmax(output).item() == label.item():
                acc += 1
            tally = output[0][int(label.item())].item()
            second_tally = output[0][int(not label.item())].item()
            prod[count] = tally * test_distributions[i][count]
            prod[int(not count
                     )] *= second_tally * test_distributions[i][int(not count)]
            preds[i].append(torch.argmax(output).item())
            actual[i].append(label.item())
            normalization_factor = sum(prod)
            prod = [k / normalization_factor for k in prod]

            test_distributions[schedule_num][0] = prod[0]
            test_distributions[schedule_num][1] = prod[1]
            normalization_factor_for_dist = sum(
                test_distributions[schedule_num])
            test_distributions[
                schedule_num] /= normalization_factor_for_dist  # [i/normalization_factor_for_dist for i in distributions[schedule_num]]
            print('distribution at time ', each_t, ' is',
                  test_distributions[schedule_num])
            if each_t % 20 < 5:
                embedding_given_dis, count = get_embedding_given_dist(
                    test_distributions[schedule_num])
            else:
                embedding_given_dis = get_most_likely_embedding_given_dist(
                    test_distributions[schedule_num])
            ddt.set_bayesian_embedding(embedding_given_dis)

        per_schedule_test_accs.append(acc / 20)
    # print('Loss: {}, Accuracy: {}'.format(0, np.mean(per_schedule_test_accs)))
    print('per sched accuracy: ', np.mean(per_schedule_test_accs))
    sensitivity, specificity = compute_sensitivity(
        preds, actual), compute_specificity(preds, actual)

    print('mean sensitivity: ', sensitivity, ', mean specificity: ',
          specificity)
    file = open('heterogeneous_toy_env_results.txt', 'a')
    file.write('DDT w/ bimodal embedding: mean: ' +
               str(np.mean(per_schedule_test_accs)) + ', std: ' +
               str(np.std(per_schedule_test_accs)) + ', sensitivity: ' +
               str(sensitivity) + ', specificity: ' + str(specificity) +
               ', Distribution of Class: 0: ' + str(percent_of_zeros) +
               ', 1: ' + str(1 - percent_of_zeros) + '\n')
    file.close()