with Profile(f'log/{args.log_name}'): for step in range(5): action = player.act(bef_o) aft_o,r,d,i = env.step(action) player.step(bef_o,action,r,d,i) if d : bef_o = env.reset() else: bef_o = aft_o if args.render : env.render() remaining_steps = total_steps - hp.Learn_start - 25 for step in range(remaining_steps): if ((hp.Learn_start + 25 + step) % hp.Model_save) == 0 : player.save_model() score = evaluate_f(player, eval_env, vid_type) print('eval_score:{0}'.format(score)) action = player.act(bef_o) aft_o,r,d,i = env.step(action) player.step(bef_o,action,r,d,i) if d : bef_o = env.reset() else: bef_o = aft_o if args.render : env.render() else : for step in range(total_steps): if (step>0) and ((step % hp.Model_save) == 0) :
if not args.vm : env.render() for step in range(total_steps): action = player.act(bef_o) aft_o,r,d,i = env.step(action) player.step(bef_o,action,r,d,i) if d : bef_o = env.reset() else: bef_o = aft_o if not args.vm : env.render() my_tqdm.close() next_save = player.save_model() if not args.load: save_dir = player.save_dir else: save_dir, _ = os.path.split(args.load) next_dir = os.path.join(save_dir,str(next_save)) score = player.evaluate(gym.make(ENVIRONMENT), vid_type) print('eval_score:{0}'.format(score)) print('{0}steps took {1} sec'.format(total_steps,time.time()-st)) total_loop -= 1 if total_loop <= 0 : sys.exit() else : next_args = [] next_args.append('python')