def main(): cfg = config() args = parse_args() option2id, all_guesses = load_quizbowl() train_iter = QuestionIterator(all_guesses['dev'], option2id, batch_size=cfg.batch_size, only_hopeful=ONLY_HOPEFUL) eval_iter = QuestionIterator(all_guesses['test'], option2id, batch_size=cfg.batch_size, only_hopeful=False) model = MLP(train_iter.n_input, 128, 2) if args.gpu != -1 and cuda.available: log.info('[dqn] using gpu', args.gpu) cuda.get_device(args.gpu).use() model.to_gpu(args.gpu) if os.path.exists(cfg.model_dir) and args.load: log.info('[dqn] loading model') chainer.serializers.load_npz(cfg.model_dir, model) trainer = QTrainer(model, train_iter, eval_iter, cfg.model_dir, cfg.log_dir) trainer.run(20, train=True, evaluate=True, save_model=args.save)
def train_cost_sensitive(config, folds): N_GUESSERS = len(GUESSERS) cfg = getattr(configs, config)() make_vector = getattr(iterator, cfg.make_vector) option2id, all_guesses = load_quizbowl() iterators = dict() for fold in c.BUZZER_INPUT_FOLDS: iterators[fold] = QuestionIterator(all_guesses[fold], option2id, batch_size=cfg.batch_size, make_vector=make_vector) model = RNN(eval_iter.n_input, cfg.n_hidden, N_GUESSERS + 1) gpu = conf['buzzer']['gpu'] if gpu != -1 and chainer.cuda.available: log.info('Using gpu {0}'.format(gpu)) chainer.cuda.get_device(gpu).use() model.to_gpu(gpu) pickle.dump(cfg, open(cfg.ckp_dir, 'wb')) trainer = Trainer(model, cfg.model_dir) trainer.run(iterators[c.BUZZER_TRAIN_FOLD], iterators[c.BUZZER_DEV_FOLD], 25) for fold in folds: test_iter = iterators[fold] buzzes = trainer.test(test_iter) log.info('{0} buzzes generated. Size {1}.'.format(fold, len(buzzes))) buzzes_dir = bc.BUZZES_DIR.format(fold, cfg.model_name) with open(buzzes_dir, 'wb') as outfile: pickle.dump(buzzes, outfile) log.info('Buzzes saved to {0}.'.format(buzzes_dir)) if fold == 'expo': buzzer2vwexpo(all_guesses['expo'], buzzes, fold)
def test(): args = parser.parse_args() option2id, all_guesses = load_quizbowl() dev_iter = QuestionIterator(all_guesses[c.BUZZER_DEV_FOLD], option2id, batch_size=128, make_vector=dense_vector) env = BuzzingGame(dev_iter) obs_size = env.observation_size action_space = env.action_space n_actions = action_space.n q_func = q_functions.FCStateQFunctionWithDiscreteAction( obs_size, n_actions, n_hidden_channels=args.n_hidden_channels, n_hidden_layers=args.n_hidden_layers) # serializers.load_npz('dqn.npz', q_func) dev_buzzes = get_buzzes(q_func, dev_iter) dev_buzzes_dir = 'output/buzzer/rl/dev_buzzes.pkl' with open(dev_buzzes_dir, 'wb') as f: pickle.dump(dev_buzzes, f) print('Dev buzz {} saved to {}'.format(len(dev_buzzes), dev_buzzes_dir)) report(dev_buzzes_dir)
def main(): option2id, all_guesses = load_quizbowl() train_iter = QuestionIterator(all_guesses[c.BUZZER_DEV_FOLD], option2id, batch_size=1, make_vector=dense_vector) env = BuzzingGame(train_iter) env.reset()
def main(): np.random.seed(0) try: import cupy cupy.random.seed(0) except Exception: pass option2id, all_guesses = load_quizbowl() train_iter = QuestionIterator(all_guesses[c.BUZZER_TRAIN_FOLD], option2id, batch_size=128, make_vector=dense_vector0) dev_iter = QuestionIterator(all_guesses[c.BUZZER_DEV_FOLD], option2id, batch_size=128, make_vector=dense_vector0) devtest_iter = QuestionIterator(all_guesses['test'], option2id, batch_size=128, make_vector=dense_vector0) expo_iter = QuestionIterator(all_guesses['expo'], option2id, batch_size=128, make_vector=dense_vector0) n_hidden = 300 model_name = 'neo_0' model_dir = 'output/buzzer/neo/{}.npz'.format(model_name) model = RNN(train_iter.n_input, n_hidden, N_GUESSERS + 1) chainer.cuda.get_device(0).use() model.to_gpu(0) trainer = Trainer(model, model_dir) trainer.run(train_iter, dev_iter, 25) chainer.serializers.save_npz(model_dir, model) dev_buzzes = trainer.test(dev_iter) dev_buzzes_dir = 'output/buzzer/neo/dev_buzzes.{}.pkl'.format(model_name) with open(dev_buzzes_dir, 'wb') as f: pickle.dump(dev_buzzes, f) print('Dev buzz {} saved to {}'.format(len(dev_buzzes), dev_buzzes_dir)) expo_buzzes = trainer.test(expo_iter) expo_buzzes_dir = 'output/buzzer/neo/expo_buzzes.{}.pkl'.format(model_name) with open(expo_buzzes_dir, 'wb') as f: pickle.dump(expo_buzzes, f) print('Expo buzz {} saved to {}'.format(len(expo_buzzes), expo_buzzes_dir)) dev_buzzes = trainer.test(dev_iter) dev_buzzes_dir = 'output/buzzer/neo/dev_buzzes.{}.pkl'.format(model_name) with open(dev_buzzes_dir, 'wb') as f: pickle.dump(dev_buzzes, f) print('Dev buzz {} saved to {}'.format(len(dev_buzzes), dev_buzzes_dir)) devtest_buzzes = trainer.test(devtest_iter) devtest_buzzes_dir = 'output/buzzer/neo/devtest_buzzes.{}.pkl'.format(model_name) with open(devtest_buzzes_dir, 'wb') as f: pickle.dump(devtest_buzzes, f) print('Devtest buzz {} saved to {}'.format(len(devtest_buzzes), devtest_buzzes_dir)) report(dev_buzzes_dir)
def hyper_search(fold): option2id, all_guesses = load_quizbowl() all_questions = QuestionDatabase().all_questions() answers = {k: v.page for k, v in all_questions.items()} guesses_df = AbstractGuesser.load_guesses(bc.GUESSES_DIR, folds=[fold]) cfgs = get_cfgs() cfg_buzzes = [] for i, cfg in enumerate(cfgs): print('**********{}**********'.format(i)) buzzes = run(cfg, fold, all_guesses, option2id) cfg_buzzes.append((cfg, buzzes)) with open('output/buzzer/cfg_buzzes_{}.pkl'.format(fold), 'wb') as outfile: pickle.dump(cfg_buzzes, outfile)
def generate(config, folds): N_GUESSERS = len(GUESSERS) option2id, all_guesses = load_quizbowl(folds) cfg = getattr(configs, config)() # cfg = pickle.load(open(cfg.ckp_dir, 'rb')) make_vector = getattr(iterator, cfg.make_vector) iterators = dict() for fold in folds: iterators[fold] = QuestionIterator(all_guesses[fold], option2id, batch_size=cfg.batch_size, make_vector=make_vector) if not os.path.exists(cfg.model_dir): log.info('Model {0} not available'.format(cfg.model_dir)) exit(0) model = RNN(iterators[folds[0]].n_input, cfg.n_hidden, N_GUESSERS + 1) log.info('Loading model {0}'.format(cfg.model_dir)) chainer.serializers.load_npz(cfg.model_dir, model) gpu = conf['buzzer']['gpu'] if gpu != -1 and chainer.cuda.available: log.info('Using gpu {0}'.format(gpu)) chainer.cuda.get_device(gpu).use() model.to_gpu(gpu) trainer = Trainer(model, cfg.model_dir) for fold in folds: buzzes = trainer.test(iterators[fold]) log.info('{0} buzzes generated. Size {1}.'.format(fold, len(buzzes))) buzzes_dir = bc.BUZZES_DIR.format(fold, cfg.model_name) with open(buzzes_dir, 'wb') as f: pickle.dump(buzzes, f) log.info('Buzzes saved to {0}.'.format(buzzes_dir)) if fold == 'expo': guesses_df = AbstractGuesser.load_guesses(bc.GUESSES_DIR, folds=[fold]) buzzer2vwexpo(guesses_df, buzzes, fold)
def report_ultimate(): all_questions = QuestionDatabase().all_questions() answers = {k: v.page for k, v in all_questions.items()} question_texts = {k: v.text for k, v in all_questions.items()} protobowl_ids = { k: all_questions[k].protobowl for k in all_questions if all_questions[k].protobowl != '' } protobowl_df, user_count = load_protobowl() guesses_df = AbstractGuesser.load_guesses(bc.GUESSES_DIR, folds=[c.BUZZER_DEV_FOLD]) questions = guesses_df.groupby('qnum') top_guesses = _multiprocess(_get_top_guesses, questions, info='Top guesses', multi=True) top_guesses = {k: v for k, v in top_guesses} option2id, all_guesses = load_quizbowl() test_iter = QuestionIterator(all_guesses[c.BUZZER_DEV_FOLD], option2id, batch_size=128) buzzes = ultimate_buzzer(test_iter) save_dir = 'output/summary/new_performance/' inputs = [top_guesses, buzzes, answers, None, c.BUZZER_DEV_FOLD, save_dir] user_answers_thresholds = [1, 10, 50, 100, 500, 1000, 2000] threshold_stats = [] for threshold in user_answers_thresholds: pdf1 = protobowl_df[protobowl_df.user_answers > threshold] p_inputs = [ question_texts, protobowl_ids, pdf1.groupby('qid'), questions ] + inputs pstats = get_protobowl(p_inputs) threshold_stats.append(pstats) print('ultimate', threshold, pstats) print('ultimate', [x['reward'] for x in threshold_stats])
def main(): import logging logging.basicConfig(level=logging.WARNING) args = parser.parse_args() args.outdir = experiments.prepare_output_dir(args, args.outdir, argv=sys.argv) print('Output files are saved in {}'.format(args.outdir)) if args.seed is not None: misc.set_random_seed(args.seed) option2id, all_guesses = load_quizbowl() train_iter = QuestionIterator(all_guesses[c.BUZZER_DEV_FOLD], option2id, batch_size=1, make_vector=dense_vector) env = BuzzingGame(train_iter) timestep_limit = 300 obs_size = env.observation_size action_space = env.action_space n_actions = action_space.n q_func = q_functions.FCStateQFunctionWithDiscreteAction( obs_size, n_actions, n_hidden_channels=args.n_hidden_channels, n_hidden_layers=args.n_hidden_layers) # Use epsilon-greedy for exploration explorer = explorers.LinearDecayEpsilonGreedy(args.start_epsilon, args.end_epsilon, args.final_exploration_steps, action_space.sample) opt = optimizers.Adam() opt.setup(q_func) rbuf_capacity = 5 * 10**5 if args.episodic_replay: if args.minibatch_size is None: args.minibatch_size = 4 if args.replay_start_size is None: args.replay_start_size = 10 if args.prioritized_replay: betasteps = \ (args.steps - timestep_limit * args.replay_start_size) \ // args.update_interval rbuf = replay_buffer.PrioritizedEpisodicReplayBuffer( rbuf_capacity, betasteps=betasteps) else: rbuf = replay_buffer.EpisodicReplayBuffer(rbuf_capacity) else: if args.minibatch_size is None: args.minibatch_size = 32 if args.replay_start_size is None: args.replay_start_size = 1000 if args.prioritized_replay: betasteps = (args.steps - args.replay_start_size) \ // args.update_interval rbuf = replay_buffer.PrioritizedReplayBuffer(rbuf_capacity, betasteps=betasteps) else: rbuf = replay_buffer.ReplayBuffer(rbuf_capacity) def phi(obs): return obs.astype(np.float32) agent = DQN(q_func, opt, rbuf, gpu=args.gpu, gamma=args.gamma, explorer=explorer, replay_start_size=args.replay_start_size, target_update_interval=args.target_update_interval, update_interval=args.update_interval, phi=phi, minibatch_size=args.minibatch_size, target_update_method=args.target_update_method, soft_update_tau=args.soft_update_tau, episodic_update=args.episodic_replay, episodic_update_len=16) if args.load: agent.load(args.load) eval_env = BuzzingGame(train_iter) if args.demo: eval_stats = experiments.eval_performance( env=eval_env, agent=agent, n_runs=args.eval_n_runs, max_episode_len=timestep_limit) print('n_runs: {} mean: {} median: {} stdev {}'.format( args.eval_n_runs, eval_stats['mean'], eval_stats['median'], eval_stats['stdev'])) else: experiments.train_agent_with_evaluation( agent=agent, env=env, steps=args.steps, eval_n_runs=args.eval_n_runs, eval_interval=args.eval_interval, outdir=args.outdir, eval_env=eval_env, max_episode_len=timestep_limit) serializers.save_npz('dqn.npz', q_func) dev_iter = QuestionIterator(all_guesses[c.BUZZER_DEV_FOLD], option2id, batch_size=128, make_vector=dense_vector) dev_buzzes = get_buzzes(q_func, dev_iter) dev_buzzes_dir = 'output/buzzer/rl/dev_buzzes.pkl' with open(dev_buzzes_dir, 'wb') as f: pickle.dump(dev_buzzes, f) print('Dev buzz {} saved to {}'.format(len(dev_buzzes), dev_buzzes_dir)) report(dev_buzzes_dir)