Python sarsa_lambda 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: pyrl.agents.sarsa_lambda

메소드/함수: sarsa_lambda

hotexamples.com에서의 예제들: 3

Python sarsa_lambda - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 pyrl.agents.sarsa_lambda.sarsa_lambda에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: learn-options.py 프로젝트: gandalfvn/skill-acquisition

def learn_option(option, environment_name, num_episodes, max_steps):
    """
    :param source: the source community
    :type source: int
    :param target: the target community
    :param target: int
    """
    from pyrl.agents.sarsa_lambda import sarsa_lambda
    from pyrl.rlglue import RLGlueLocal as RLGlueLocal
    from pyrl.environments.pinball import PinballRLGlue
    import numpy as np
    import logging
    import pyflann
    import options
    import cPickle
    import random
    import csv

    prefix = 'option-%d-to-%d'%(option.label, option.target)
    score_file = csv.writer(open(prefix + '-score.csv', 'wb'))

    # Create agent and environments
    agent = sarsa_lambda(epsilon=0.01, alpha=0.001, gamma=0.9, lmbda=0.9,
    params={'name':'fourier', 'order':4})

    # Wrap the environment with the option's pseudo-reward
    environment = options.TrajectoryRecorder(options.PseudoRewardEnvironment(PinballRLGlue(environment_name), option, 10000), prefix + '-trajectory')

    # Connect to RL-Glue
    rlglue = RLGlueLocal.LocalGlue(environment, agent)
    rlglue.RL_init()

    # Execute episodes
    if not num_episodes:
        num_episodes = np.alen(option.initial_states)
        print 'Learning %d episodes'%(num_episodes,)

    for i in xrange(num_episodes):
        initial_state = option.initial_state()
        rlglue.RL_env_message('set-start-state %f %f %f %f'
               %(initial_state[0], initial_state[1], initial_state[2], initial_state[3]))

        terminated = rlglue.RL_episode(max_steps)

        total_steps = rlglue.RL_num_steps()
        total_reward = rlglue.RL_return()

        with open(prefix + '-score.csv', 'a') as f:
            writer = csv.writer(f)
            writer.writerow([i, total_steps, total_reward, terminated])

    rlglue.RL_cleanup()

    # Save function approximation
    option.basis = agent.basis
    option.weights = agent.weights[0,:,:]

    cPickle.dump(option, open(prefix + '-policy.pl', 'wb'))

    return option

예제 #2

파일 보기

파일: learn-flat-policy.py 프로젝트: afcarl/skill-acquisition

def learn_policy(environment_name, nepisodes, max_steps, prefix):
    from pyrl.agents.sarsa_lambda import sarsa_lambda
    from pyrl.rlglue import RLGlueLocal as RLGlueLocal
    from pyrl.environments.pinball import PinballRLGlue
    from options import TrajectoryRecorder
    import cPickle
    import csv

    # Create agent and environments
    agent = sarsa_lambda(epsilon=0.01, alpha=0.001, gamma=1.0, lmbda=0.9,
    params={'name':'fourier', 'order':4})

    # Wrap the environment with the option's pseudo-reward
    environment = TrajectoryRecorder(PinballRLGlue(environment_name), prefix + '-trajectory')

    score_file = csv.writer(open(prefix + '-scores.csv', 'wb'))

    # Connect to RL-Glue
    rlglue = RLGlueLocal.LocalGlue(environment, agent)
    rlglue.RL_init()

    # Execute episodes
    scores = []
    for i in xrange(nepisodes):
        print 'Episode ', i
        terminated = rlglue.RL_episode(max_steps)
        total_steps = rlglue.RL_num_steps()
        total_reward = rlglue.RL_return()

        print '\t %d steps, %d reward, %d terminated'%(total_steps, total_reward, terminated)
        score = [i, total_steps, total_reward, terminated]
        scores.append(score)
        score_file.writerow(score)

    rlglue.RL_cleanup()

    cPickle.dump(agent, open(prefix + '.pl', 'wb'))

    return scores

예제 #3

파일 보기

def learn_option(option, environment_name, num_episodes, max_steps):
    """
    :param source: the source community
    :type source: int
    :param target: the target community
    :param target: int
    """
    from pyrl.agents.sarsa_lambda import sarsa_lambda
    from pyrl.rlglue import RLGlueLocal as RLGlueLocal
    from pyrl.environments.pinball import PinballRLGlue
    import numpy as np
    import logging
    import pyflann
    import options
    import cPickle
    import random
    import csv

    prefix = 'option-%d-to-%d' % (option.label, option.target)
    score_file = csv.writer(open(prefix + '-score.csv', 'wb'))

    # Create agent and environments
    agent = sarsa_lambda(epsilon=0.01,
                         alpha=0.001,
                         gamma=0.9,
                         lmbda=0.9,
                         params={
                             'name': 'fourier',
                             'order': 4
                         })

    # Wrap the environment with the option's pseudo-reward
    environment = options.TrajectoryRecorder(
        options.PseudoRewardEnvironment(PinballRLGlue(environment_name),
                                        option, 10000), prefix + '-trajectory')

    # Connect to RL-Glue
    rlglue = RLGlueLocal.LocalGlue(environment, agent)
    rlglue.RL_init()

    # Execute episodes
    if not num_episodes:
        num_episodes = np.alen(option.initial_states)
        print 'Learning %d episodes' % (num_episodes, )

    for i in xrange(num_episodes):
        initial_state = option.initial_state()
        rlglue.RL_env_message('set-start-state %f %f %f %f' %
                              (initial_state[0], initial_state[1],
                               initial_state[2], initial_state[3]))

        terminated = rlglue.RL_episode(max_steps)

        total_steps = rlglue.RL_num_steps()
        total_reward = rlglue.RL_return()

        with open(prefix + '-score.csv', 'a') as f:
            writer = csv.writer(f)
            writer.writerow([i, total_steps, total_reward, terminated])

    rlglue.RL_cleanup()

    # Save function approximation
    option.basis = agent.basis
    option.weights = agent.weights[0, :, :]

    cPickle.dump(option, open(prefix + '-policy.pl', 'wb'))

    return option