def filter_arbiter(valid_env, aug_env, tok): import tqdm listner = Seq2SeqAgent(aug_env, "", tok, args.maxAction) arbiter = Arbiter(aug_env, listner, tok) # Load the model arbiter.load(args.load) # Create Dir os.makedirs(os.path.join(log_dir, 'arbiter_result'), exist_ok=True) # Get the prob for the validation env (may be used for determining the threshold) # arbiter.env = valid_env # valid_inst2prob = arbiter.valid(wrapper=tqdm.tqdm) # json.dump(valid_inst2prob, open(os.path.join(log_dir, 'arbiter_result', 'valid_prob.json'), 'w')) # Get the prob of the augmentation data arbiter.env = aug_env aug_inst2prob = arbiter.valid(wrapper=tqdm.tqdm) aug_data = [datum.copy() for datum in aug_env.data] for datum in aug_data: datum['instructions'] = [datum['instructions']] datum.pop( 'instr_encoding') # Remove the redundant components in the dataset for datum in aug_data: datum['prob'] = aug_inst2prob[datum['instr_id']] json.dump( aug_data, open(os.path.join(log_dir, 'arbiter_result', 'aug_prob.json'), 'w')) # Create the Dataset data = [ datum for datum in aug_data if aug_inst2prob[datum['instr_id']] > 0.5 ] for datum in aug_data: datum.pop('instr_id') return data
def train_arbiter(arbiter_env, tok, n_iters, log_every=500, val_envs={}): writer = SummaryWriter(log_dir=log_dir) listner = Seq2SeqAgent(arbiter_env, "", tok, args.maxAction) arbiter = Arbiter(arbiter_env, listner, tok) best_f1 = 0. best_accu = 0. for idx in range(0, n_iters, log_every): interval = min(log_every, n_iters - idx) # Train for log_every interval arbiter.env = arbiter_env arbiter.train(interval) # Train interval iters print() print("Iter: %d" % idx) # Evaluation for env_name, env in val_envs.items(): print("............ Evaluating %s ............." % env_name) arbiter.env = env if env_name == 'train' or env_name == 'val_unseen': path2prob = arbiter.valid(total=500) else: # val_seen need accurate accuracy to evaluate the model performance (for early stopping) path2prob = arbiter.valid() print("len path2prob", len(path2prob)) path2answer = env.get_answer() print("len path2ans", len(path2answer)) false_probs = list([ path2prob[path] for path in path2prob if not path2answer[path] ]) true_positive = len([ 1 for path in path2prob if (path2prob[path] >= 0.5 and path2answer[path]) ]) false_positive = len([ 1 for path in path2prob if (path2prob[path] < 0.5 and path2answer[path]) ]) false_negative = len([ 1 for path in path2prob if (path2prob[path] >= 0.5 and not path2answer[path]) ]) true_negative = len([ 1 for path in path2prob if (path2prob[path] < 0.5 and not path2answer[path]) ]) true_accu = true_positive / (true_positive + false_positive) true_recall = true_positive / max( (true_positive + false_negative), 1) true_f1 = 2 * (true_accu * true_recall) / max( (true_accu + true_recall), 1) false_accu = true_negative / (true_negative + false_negative) print( "tp %d, fp %d, fn %d, tn %d" % (true_positive, false_positive, false_negative, true_negative)) print("All negative", true_negative + false_negative) print("All positive", true_positive + false_positive) writer.add_scalar("true_accu", true_accu, idx) writer.add_scalar("true_recall", true_recall, idx) writer.add_scalar("true_f1", true_f1, idx) writer.add_scalar("false_accu", false_accu, idx) if env_name == 'val_seen': if true_f1 > best_f1: best_f1 = true_f1 print('Save the model with %s f1 score %0.4f' % (env_name, best_f1)) arbiter.save( idx, os.path.join(log_dir, 'state_dict', 'best_%s_f1' % env_name)) if true_accu > best_accu: best_accu = true_accu print("Save the model with %s true accu %0.4f" % (env_name, best_accu)) arbiter.save( idx, os.path.join(log_dir, 'state_dict', 'best_%s_accu' % env_name)) print("True Accu %0.4f, False Accu %0.4f" % (true_accu, false_accu)) print("Avg False probs %0.4f" % (sum(false_probs) / len(false_probs))) sys.stdout.flush()