Beispiel #1
0
    if not continue_execution: os.makedirs(main_outdir)

    # Initiate learning information
    with open(main_outdir + 'episode_data.csv', 'w') as csvRWRD:
        csvRWRD_writer = csv.writer(csvRWRD, dialect='excel')
        csvRWRD_writer.writerow([
            'Episode', 'Goal', 'Steps', 'Reward', 'Total Goals',
            'Average Steps'
        ])
    csvRWRD.close()

    last_time_steps = numpy.ndarray(0)

    qlearn = qlearn.QLearn(actions=range(env.action_space.n),
                           alpha=0.1,
                           gamma=0.9,
                           epsilon=1,
                           qdir=qtabledir)

    initial_epsilon = qlearn.epsilon

    epsilon_discount = 0.9986

    start_time = time.time()
    total_goals = 0
    total_succeed_steps = 0
    total_episodes = 1000
    highest_reward = -10000000
    fewest_steps = 10000000

    best_act = []
          == 0) and (x != 0) and (x > render_skip) and (render_episodes < x):
        env.render(close=True)


if __name__ == '__main__':

    env = gym.make('GazeboProjectTurtlebot-v0')

    outdir = '/tmp/gazebo_gym_experiments'
    env = gym.wrappers.Monitor(env, outdir, force=True)
    plotter = liveplot.LivePlot(outdir)

    last_time_steps = numpy.ndarray(0)

    qlearn = qlearn.QLearn(actions=range(env.action_space.n),
                           alpha=0.2,
                           gamma=0.8,
                           epsilon=0.0)

    initial_epsilon = qlearn.epsilon

    epsilon_discount = 0.9986

    start_time = time.time()
    total_goals = 0
    total_succeed_steps = 0
    total_episodes = 1001
    highest_reward = -10000000
    fewest_steps = 10000000

    best_act = []
    best_act_time = []