Beispiel #1
0
def filter_arbiter(valid_env, aug_env, tok):
    import tqdm
    listner = Seq2SeqAgent(aug_env, "", tok, args.maxAction)
    arbiter = Arbiter(aug_env, listner, tok)

    # Load the model
    arbiter.load(args.load)

    # Create Dir
    os.makedirs(os.path.join(log_dir, 'arbiter_result'), exist_ok=True)

    # Get the prob for the validation env (may be used for determining the threshold)
    # arbiter.env = valid_env
    # valid_inst2prob = arbiter.valid(wrapper=tqdm.tqdm)
    # json.dump(valid_inst2prob, open(os.path.join(log_dir, 'arbiter_result', 'valid_prob.json'), 'w'))

    # Get the prob of the augmentation data
    arbiter.env = aug_env
    aug_inst2prob = arbiter.valid(wrapper=tqdm.tqdm)
    aug_data = [datum.copy() for datum in aug_env.data]
    for datum in aug_data:
        datum['instructions'] = [datum['instructions']]
        datum.pop(
            'instr_encoding')  # Remove the redundant components in the dataset

    for datum in aug_data:
        datum['prob'] = aug_inst2prob[datum['instr_id']]
    json.dump(
        aug_data,
        open(os.path.join(log_dir, 'arbiter_result', 'aug_prob.json'), 'w'))

    # Create the Dataset
    data = [
        datum for datum in aug_data if aug_inst2prob[datum['instr_id']] > 0.5
    ]

    for datum in aug_data:
        datum.pop('instr_id')
    return data
Beispiel #2
0
def train_arbiter(arbiter_env, tok, n_iters, log_every=500, val_envs={}):
    writer = SummaryWriter(log_dir=log_dir)
    listner = Seq2SeqAgent(arbiter_env, "", tok, args.maxAction)
    arbiter = Arbiter(arbiter_env, listner, tok)
    best_f1 = 0.
    best_accu = 0.
    for idx in range(0, n_iters, log_every):
        interval = min(log_every, n_iters - idx)

        # Train for log_every interval
        arbiter.env = arbiter_env
        arbiter.train(interval)  # Train interval iters

        print()
        print("Iter: %d" % idx)

        # Evaluation
        for env_name, env in val_envs.items():
            print("............ Evaluating %s ............." % env_name)
            arbiter.env = env
            if env_name == 'train' or env_name == 'val_unseen':
                path2prob = arbiter.valid(total=500)
            else:  # val_seen need accurate accuracy to evaluate the model performance (for early stopping)
                path2prob = arbiter.valid()
            print("len path2prob", len(path2prob))
            path2answer = env.get_answer()
            print("len path2ans", len(path2answer))
            false_probs = list([
                path2prob[path] for path in path2prob if not path2answer[path]
            ])
            true_positive = len([
                1 for path in path2prob
                if (path2prob[path] >= 0.5 and path2answer[path])
            ])
            false_positive = len([
                1 for path in path2prob
                if (path2prob[path] < 0.5 and path2answer[path])
            ])
            false_negative = len([
                1 for path in path2prob
                if (path2prob[path] >= 0.5 and not path2answer[path])
            ])
            true_negative = len([
                1 for path in path2prob
                if (path2prob[path] < 0.5 and not path2answer[path])
            ])
            true_accu = true_positive / (true_positive + false_positive)
            true_recall = true_positive / max(
                (true_positive + false_negative), 1)
            true_f1 = 2 * (true_accu * true_recall) / max(
                (true_accu + true_recall), 1)
            false_accu = true_negative / (true_negative + false_negative)
            print(
                "tp %d, fp %d, fn %d, tn %d" %
                (true_positive, false_positive, false_negative, true_negative))
            print("All negative", true_negative + false_negative)
            print("All positive", true_positive + false_positive)
            writer.add_scalar("true_accu", true_accu, idx)
            writer.add_scalar("true_recall", true_recall, idx)
            writer.add_scalar("true_f1", true_f1, idx)
            writer.add_scalar("false_accu", false_accu, idx)

            if env_name == 'val_seen':
                if true_f1 > best_f1:
                    best_f1 = true_f1
                    print('Save the model with %s f1 score %0.4f' %
                          (env_name, best_f1))
                    arbiter.save(
                        idx,
                        os.path.join(log_dir, 'state_dict',
                                     'best_%s_f1' % env_name))

                if true_accu > best_accu:
                    best_accu = true_accu
                    print("Save the model with %s true accu %0.4f" %
                          (env_name, best_accu))
                    arbiter.save(
                        idx,
                        os.path.join(log_dir, 'state_dict',
                                     'best_%s_accu' % env_name))

            print("True Accu %0.4f, False Accu %0.4f" %
                  (true_accu, false_accu))
            print("Avg False probs %0.4f" %
                  (sum(false_probs) / len(false_probs)))
            sys.stdout.flush()