Exemple #1
0
    if d :
        bef_o = env.reset()
    else:
        bef_o = aft_o
    if not args.vm :
        env.render()

my_tqdm.close()

next_save = player.save_model()
if not args.load:
    save_dir = player.save_dir
else:
    save_dir, _ = os.path.split(args.load)
next_dir = os.path.join(save_dir,str(next_save))
score = player.evaluate(gym.make(ENVIRONMENT), vid_type)
print('eval_score:{0}'.format(score))
print('{0}steps took {1} sec'.format(total_steps,time.time()-st))

total_loop -= 1
if total_loop <= 0 :
    sys.exit()
else :
    next_args = []
    next_args.append('python')
    next_args.append(__file__)
    next_args.append('-v')
    next_args.append('-l')
    next_args.append(next_dir)
    next_args.append('--step')
    next_args.append(str(total_steps))
Exemple #2
0
for step in trange(total_steps, ncols=50):
    action = player.act(o)
    o, r, d, i = env.step(action)
    player.step(o, r, d, i)
    if d:
        o = env.reset()
    if not args.vm:
        env.render()

next_save = player.save_model()
if not args.load:
    save_dir = player.save_dir
else:
    save_dir, _ = os.path.split(args.load)
next_dir = os.path.join(save_dir, str(next_save))
score = player.evaluate(gym.make('mouseCl-v0'), vid_type)
print('eval_score:{0}'.format(score))
print('{0}steps took {1} sec'.format(total_steps, time.time() - st))

total_loop -= 1
if total_loop <= 0:
    sys.exit()
else:
    next_args = []
    next_args.append('python')
    next_args.append(__file__)
    next_args.append('-v')
    next_args.append('-l')
    next_args.append(next_dir)
    next_args.append('--step')
    next_args.append(str(total_steps))