コード例 #1
0
ファイル: main.py プロジェクト: jvmncs/safe-debates
def main(args):
    """main man"""
    # reproducibility
    if args.seed is not None:
        torch.manual_seed(
            args.seed)  # unsure if this works with SparseMNIST right now
        np.random.seed(args.seed)

    # cuda
    args.use_cuda = not args.no_cuda and torch.cuda.is_available()
    args.device = torch.device("cuda" if args.use_cuda else "cpu")

    # data
    dataset = MNIST('./data/', train=False, transform=ToTensor())
    kwargs = {'num_workers': 1}
    if args.use_cuda:
        kwargs['pin_memory'] = True
    data_loader = DataLoader(dataset, args.batch_size, shuffle=True, **kwargs)
    if args.rounds is None:
        args.rounds = len(dataset) // args.batch_size

    # load judge
    judge_state = torch.load(args.checkpoint)['state_dict']

    # debate game
    judge = Judge().to(args.device)
    judge.load_state_dict(judge_state)
    judge.eval()
    helper = Agent(honest=True, args=args)
    liar = Agent(honest=False, args=args)
    debate = Debate((helper, liar), data_loader, args)

    total_meter = AverageMeter()
    class_meters = [AverageMeter() for i in range(10)]

    # TODO precommit logic
    for _ in range(args.rounds):
        print("starting round {}".format(_))
        helper.precommit_(None, None)
        liar.precommit_(None, None)
        result = debate.play(judge, args.device)
        track_stats_(total_meter, class_meters, result['helper']['preds'],
                     result['helper']['wins'], result['labels'],
                     args.precommit)

    print('Total accuracy: {}'.format(total_meter.avg))
    print('Accuracy per class\n==============================================')
    for i in range(10):
        print('Digit {}: {}'.format(i, class_meters[i].avg))
コード例 #2
0
def get_debate_results(
    start_point,
    use_test_data,
    batch_size,
    N_samples,
    N_to_mask,
    judge_path,
    restricted_first,
):
    # MNISTJudge has to be imported here, because otherwise tensorflow does not
    # work together with multiprocessing
    from judge import MNISTJudge

    judge = MNISTJudge(N_to_mask=N_to_mask,
                       model_dir=judge_path,
                       binary_rewards=False)
    if use_test_data:
        dataset = judge.eval_data
    else:
        dataset = judge.train_data

    result_list = []
    for i in range(batch_size):
        print("i", start_point + i, flush=True)
        t = time.time()
        if start_point + i > dataset.shape[0]:  # end of dataset
            break
        results_per_label = np.zeros([10, 10])
        for label in range(10):
            # print("label", label)
            sample = dataset[start_point + i]
            unrestricted_agent = DebateAgent(precommit_label=None,
                                             agentStrength=args.rollouts)
            restricted_agent = DebateAgent(precommit_label=label,
                                           agentStrength=args.rollouts)
            if restricted_first:
                agent1, agent2 = unrestricted_agent, restricted_agent
            else:
                agent1, agent2 = restricted_agent, unrestricted_agent
            debate = Debate((agent1, agent2), judge, N_to_mask, sample.flat)
            probabilities = debate.play(full_report=True)
            results_per_label[label] = probabilities
        result_list.append(results_per_label)
        print("time", time.time() - t)
    return result_list
コード例 #3
0
    def rollout_(self, judge, states, images, labels, level):
        if level > self.pixels:
            return -Debate.evaluate(self, judge, states, labels)['wins']

        valids = self.get_valid_actions(states, images).type(torch.float)
        Us = self.break_ties(self.calculate_UCT(states))
        obj = self.Q[states] + Us
        actions = torch.max(obj * valids, dim=-1)[1]
        successors = self.apply_action(actions, states, images)
        Vs = self.rollout_(judge, successors, images, labels, level=level + 1)

        # TODO: vectorize this
        for action in actions:
            self.Q[states][:, action] = (
                (self.N[states][:, action] * self.Q[states][:, action] + Vs) /
                (self.N[states][:, action] + 1))
            self.N[states][:, action] += 1

        return -Vs
コード例 #4
0
def evaluate_sample_unrestricted(N_to_mask,
                                 sample,
                                 label,
                                 judge,
                                 rollouts,
                                 index_of_truth_agent,
                                 changing_sides,
                                 allow_black_pixels,
                                 filename=None):
    """
    Evaluate unrestricted debate (without precommit)
    """
    truth_won = True
    agent_lie = DebateAgent(precommit_label=None, agentStrength=rollouts)
    agent_truth = DebateAgent(precommit_label=label, agentStrength=rollouts)

    if index_of_truth_agent == 0:
        debate = Debate(
            (agent_truth, agent_lie),
            judge,
            N_to_mask,
            sample,
            debug=False,
            changing_sides=changing_sides,
            allow_black_pixels=allow_black_pixels,
        )
        this_game_utility = debate.play(filename=filename)
        if this_game_utility == -1:
            truth_won = False
    else:
        debate = Debate(
            (agent_lie, agent_truth),
            judge,
            N_to_mask,
            sample,
            debug=False,
            changing_sides=changing_sides,
            allow_black_pixels=allow_black_pixels,
        )
        this_game_utility = debate.play(filename=filename)
        if this_game_utility == 1:
            truth_won = False
    return truth_won
コード例 #5
0
def run(
    N_to_mask,
    sample_id,
    lying_agent_label,
    judge_path,
    dataset,
    rollouts,
    index_of_true_agent,
    binary_rewards,
    changing_sides,
):
    """
    Runs one debate game on a given sample from a specified dataset (either "mnist" or "fashion") with N_to_mask rounds.
    The debate is modeled by MCTS with given number of rollouts.
    One player is the honest agents arguing for the correct label, the other either precommits to a lying_agent_label or debates without precommit if lying_agent_label is set to None.

    index_of_true_agent: Either 0 or 1 whether the honest agent plays first or second.
    binary_rewards: If set to True, rewards are either 1 or -1, if set False rewards lie between -1 and 1
    changing_sides: If set to True, agents switch sides after each round, if set to False, the first agents reveales N_to_mask/2 features followed by N_to_mask/2 features of the second agent
    """
    # Parse parameters
    if judge_path:
        path = judge_path
    elif dataset:
        path = "saved_models/" + dataset + str(N_to_mask)
    else:
        raise Exception("dataset must be specified")

    if dataset == "mnist":
        judge = MNISTJudge(
            N_to_mask=N_to_mask, model_dir=path, binary_rewards=binary_rewards
        )
    elif dataset == "fashion":
        judge = FashionJudge(
            N_to_mask=N_to_mask, model_dir=path, binary_rewards=binary_rewards
        )
    else:
        raise Exception("Unknown dataset in " + "dataset.txt: " + dataset)

    # Get debate data and run the debate
    sample = judge.eval_data[sample_id].flatten()

    label = judge.eval_labels[sample_id]

    if lying_agent_label == label:
        raise Exception("Lying label cannot be the same as the correct one")

    agent_lie = DebateAgent(precommit_label=lying_agent_label, agentStrength=rollouts)
    agent_truth = DebateAgent(precommit_label=label, agentStrength=rollouts)
    assert index_of_true_agent in [0, 1]

    player_description = DebatePlayers(
        agent_truth, agent_lie, index_of_true_agent, our_name="truth", opp_name="liar"
    )

    debate = Debate(
        player_description.agents,
        judge,
        N_to_mask,
        sample,
        debug=True,
        changing_sides=changing_sides,
    )
    utility = debate.play()
    player_description.print_debate_result(utility, label)
コード例 #6
0
def evaluate_sample_restricted(N_to_mask,
                               sample,
                               label,
                               judge,
                               rollouts,
                               index_of_truth_agent,
                               changing_sides,
                               allow_black_pixels,
                               seeds=3,
                               confusion_matrix_counter=None,
                               dirname=None):
    """
    Evaluate the sample using precommited debate as in the AISvD paper, optionaly change the number of seeds (3 seeds in the paper)
    If confusion_matrix_counter is specified, than update the counter so that confusion_matrix_counter[true_label][lying_label] specifies the number of times 'lying_label' won over 'true_label'.
    Use only the first game seed to update the counter.
    """
    majority_of_seeds = seeds // 2 + 1

    # Assume truth has won until shown otherwise
    truth_won = True

    for lying_agent_label in range(10):
        if lying_agent_label == label:
            continue
        print(lying_agent_label, end="", flush=True)
        liar_win_count = 0
        for game_number in range(seeds):
            print(end='.', flush=True)
            agent_lie = DebateAgent(precommit_label=lying_agent_label,
                                    agentStrength=rollouts)
            agent_truth = DebateAgent(precommit_label=label,
                                      agentStrength=rollouts)
            if index_of_truth_agent == 0:
                debate = Debate(
                    (agent_truth, agent_lie),
                    judge,
                    N_to_mask,
                    sample,
                    debug=False,
                    changing_sides=changing_sides,
                    allow_black_pixels=allow_black_pixels,
                )
                this_game_utility = debate.play(
                    filename=None if dirname is None else dirname +
                    str(lying_agent_label))
                if this_game_utility == -1:  # second agent won (lying)
                    liar_win_count += 1
                    if confusion_matrix_counter is not None and game_number == 0:
                        confusion_matrix_counter[label][lying_agent_label] += 1
            else:
                debate = Debate(
                    (agent_lie, agent_truth),
                    judge,
                    N_to_mask,
                    sample,
                    debug=False,
                    changing_sides=changing_sides,
                    allow_black_pixels=allow_black_pixels,
                )
                this_game_utility = debate.play(
                    filename=None if dirname is None else dirname +
                    str(lying_agent_label))
                if this_game_utility == 1:  # first agent won (lying)
                    liar_win_count += 1
                    if confusion_matrix_counter is not None and game_number == 0:
                        confusion_matrix_counter[label][lying_agent_label] += 1

            # If liar's won majority of games or doesn't have chance of winning the majority and we're not computing the confusion matrix, we can end the evaluation of this sample
            games_left = seeds - game_number - 1
            if (liar_win_count == majority_of_seeds or
                (majority_of_seeds - liar_win_count >
                 games_left)) and confusion_matrix_counter is None:
                break
        if liar_win_count >= majority_of_seeds:
            truth_won = False
            if confusion_matrix_counter is None:
                break
    return truth_won
コード例 #7
0
def run(
    N_to_mask,
    judge_path,
    dataset,
    rollouts,
    N_epochs,
    batch_size,
    learning_rate,
    learning_rate_decay,
    classifier_path,
    cheat_debate,
    only_update_for_wins,
    precomputed_debate_results_restricted_first_path,
    precomputed_debate_results_restricted_second_path,
    shuffle_batches,
    use_dropout,
    importance_sampling_weights,
    importance_sampling_cap,
):
    if judge_path:
        path = judge_path
    elif dataset:
        path = "saved_models/" + dataset + str(N_to_mask)
    else:
        raise Exception("Either judge_path or dataset needs to be specified")

    if dataset == "mnist":
        judge = MNISTJudge(N_to_mask=N_to_mask, model_dir=path)
    elif dataset == "fashion":
        judge = FashionJudge(N_to_mask=N_to_mask, model_dir=path)
    else:
        raise Exception("Unknown dataset in " + "dataset.txt: " + dataset)

    judge_accuracy = judge.evaluate_accuracy()
    print("Judge accuracy:", judge_accuracy)

    if precomputed_debate_results_restricted_first_path is not None:
        assert precomputed_debate_results_restricted_second_path is not None
        if cheat_debate:
            raise Exception(
                "cheat_debate should not be enabled when training "
                "from precomputed debate results"
            )
        debate_results_restricted_first = np.fromfile(
            precomputed_debate_results_restricted_first_path
        ).reshape(-1, 10, 10)
        debate_results_restricted_second = np.fromfile(
            precomputed_debate_results_restricted_second_path
        ).reshape(-1, 10, 10)
        print(
            "Loaded debate results from {} and {}".format(
                precomputed_debate_results_restricted_first_path,
                precomputed_debate_results_restricted_second_path,
            )
        )
        print("These will be used for training instead of re-running the debates.")
    else:
        debate_results_restricted_first, debate_results_restricted_second = None, None

    train_data = judge.train_data
    N_train = len(judge.train_labels)
    eval_data = judge.eval_data
    eval_labels = judge.eval_labels

    debate_classifier = DebateClassifier(
        learning_rate=learning_rate,
        learning_rate_decay=learning_rate_decay,
        model_dir=classifier_path,
        use_dropout=use_dropout,
    )

    batch_samples = []
    batch_labels = []
    batch_weights = []

    t = time.time()

    for epoch in range(N_epochs):
        for i in range(N_train):
            # print(i, flush=True)
            sample = train_data[i]
            probs = next(debate_classifier.predict(sample))["probabilities"]
            label = np.random.choice(range(len(probs)), p=probs)
            restricted_first = np.random.random() < 0.5

            if cheat_debate:
                # simulate a perfectly accurate debate
                if label == judge.train_labels[i]:
                    weight = 1
                elif only_update_for_wins:
                    weight = 0
                else:
                    weight = -0.1
            elif debate_results_restricted_first is not None:
                assert debate_results_restricted_second is not None
                if restricted_first:
                    debate_results = debate_results_restricted_first
                else:
                    debate_results = debate_results_restricted_second
                # use precomputed results
                judge_probabilities = debate_results[i, label]
                if np.all(judge_probabilities[label] >= judge_probabilities):
                    weight = 1
                elif only_update_for_wins:
                    weight = 0
                else:
                    weight = -0.1
            else:
                # run non-precommited debate
                agent_unrestricted = DebateAgent(
                    precommit_label=None, agentStrength=rollouts
                )
                agent_restricted = DebateAgent(
                    precommit_label=label, agentStrength=rollouts
                )
                if restricted_first:
                    agent1, agent2 = agent_restricted, agent_unrestricted
                else:
                    agent1, agent2 = agent_unrestricted, agent_restricted
                debate = Debate((agent1, agent2), judge, N_to_mask, sample.flat)
                utility = debate.play()

                if (utility == 1 and restricted_first) or (
                    utility == -1 and not restricted_first
                ):
                    weight = 1
                elif only_update_for_wins:
                    weight = 0
                else:
                    weight = -0.1

            if importance_sampling_weights:
                importance_sampling_factor = 1 / probs[label]
                if (
                    importance_sampling_cap is not None
                    and importance_sampling_factor > importance_sampling_cap
                ):
                    importance_sampling_factor = importance_sampling_cap
                weight *= importance_sampling_factor

            # print("weight", weight)
            batch_samples.append(sample)
            batch_labels.append(label)
            batch_weights.append(weight)

            if (i + 1) % batch_size == 0 or i == N_train - 1:
                # update debate classifier
                print("i", i, flush=True)
                print("batch_weights", batch_weights, flush=True)
                debate_classifier.train(
                    np.array(batch_samples),
                    np.array(batch_labels),
                    np.array(batch_weights),
                    shuffle=shuffle_batches,
                )
                acc = debate_classifier.evaluate_accuracy(eval_data, eval_labels)
                print("Updated debate_classifier", flush=True)
                print("Evaluation accuracy", acc, flush=True)
                t2 = time.time()
                print("Batch time ", t2 - t)
                t = t2
                batch_samples = []
                batch_labels = []
                batch_weights = []

    acc = debate_classifier.evaluate_accuracy(eval_data, eval_labels)
    print("Accuracy", acc, flush=True)