Beispiel #1
0
    def __init__(self):
        self.environment = GameEnv()

        av_table = ActionValueTable(self.environment.outdim, self.environment.indim)
        av_table.initialize(0.)  # todo: save & restore agents state
        learner = Q()
        learner._setExplorer(EpsilonGreedyExplorer())
        agent = LearningAgent(av_table, learner)

        self.agent = agent
        self.task = GameTask(self.environment)
        self.experiment = Experiment(self.task, self.agent)
Beispiel #2
0
class ReinforcedController(Controller):
    NAME = "Reinforced Controller"

    def __init__(self, pendulum_length, pendulum_mass, cart_mass):
        super(ReinforcedController, self).__init__(pendulum_length,
                                                   pendulum_mass, cart_mass)
        self.ranges = self.get_ranges()
        self.model = InvertedPendulumModel(self.pendulum_length,
                                           self.pendulum_mass, self.cart_mass)
        self.force_granularity = 2
        self.environment = CartEnvironment(
            self.model, *self.ranges, force_granularity=self.force_granularity)
#        self.load()

    def learn(self, number_of_iterations):
        learner = Q(0.2, 0.8)
        task = CartMovingTask(self.environment)
        self.controller = ActionValueTable(
            reduce(lambda x, y: x * y, map(lambda x: len(x), self.ranges)),
            self.force_granularity)
        self.controller.initialize(1.)
        agent = LearningAgent(self.controller, learner)
        experiment = Experiment(task, agent)
        for i in range(number_of_iterations):
            experiment.doInteractions(1)
            agent.learn()
            agent.reset()
        with open("test.pcl", "w+") as f:
            pickle.dump(self.controller, f)

    def load(self):
        with open("test.pcl", "r+") as f:
            self.controller = pickle.load(f)

    def calculate_force(self, angular_position, angular_velocity,
                        cart_position, cart_velocity):
        state = self.environment.normalized_state(
            (angular_position, angular_velocity, cart_position, cart_velocity))
        action = self.controller.getMaxAction(state)
        force = self.environment.allActions[action]
        return force

    def get_ranges(self):
        cart_position_ranges = [(-1000000000, -2.4), (-2.4, -0.8), (-0.8, 0.8),
                                (0.8, 2.4), (2.4, 1000000000)]
        cart_velocity_ranges = [(-1000000000, -0.5), (-0.5, 0.5),
                                (0.5, 1000000000)]
        angles_ranges = [(-6.28, -1), (-1, 0), (0, 1), (1, 6.28)]
        angular_velocity_ranges = [(-1000000000, -3.14), (-3.14, 3.14),
                                   (3.14, 1000000000)]
        return (cart_position_ranges, cart_velocity_ranges, angles_ranges,
                angular_velocity_ranges)
Beispiel #3
0
 def learn(self, number_of_iterations):
     learner = Q(0.2, 0.8)
     task = CartMovingTask(self.environment)
     self.controller = ActionValueTable(
         reduce(lambda x, y: x * y, map(lambda x: len(x), self.ranges)),
         self.force_granularity)
     self.controller.initialize(1.)
     agent = LearningAgent(self.controller, learner)
     experiment = Experiment(task, agent)
     for i in range(number_of_iterations):
         experiment.doInteractions(1)
         agent.learn()
         agent.reset()
     with open("test.pcl", "w+") as f:
         pickle.dump(self.controller, f)
class ReinforcedController(Controller):
    NAME = "Reinforced Controller"

    def __init__(self, pendulum_length, pendulum_mass, cart_mass):
        super(ReinforcedController, self).__init__(pendulum_length, pendulum_mass, cart_mass)
        self.ranges = self.get_ranges()
        self.model = InvertedPendulumModel(self.pendulum_length, self.pendulum_mass, self.cart_mass)
        self.force_granularity = 2
        self.environment = CartEnvironment(self.model, *self.ranges, force_granularity=self.force_granularity)

    #        self.load()

    def learn(self, number_of_iterations):
        learner = Q(0.2, 0.8)
        task = CartMovingTask(self.environment)
        self.controller = ActionValueTable(
            reduce(lambda x, y: x * y, map(lambda x: len(x), self.ranges)), self.force_granularity
        )
        self.controller.initialize(1.0)
        agent = LearningAgent(self.controller, learner)
        experiment = Experiment(task, agent)
        for i in range(number_of_iterations):
            experiment.doInteractions(1)
            agent.learn()
            agent.reset()
        with open("test.pcl", "w+") as f:
            pickle.dump(self.controller, f)

    def load(self):
        with open("test.pcl", "r+") as f:
            self.controller = pickle.load(f)

    def calculate_force(self, angular_position, angular_velocity, cart_position, cart_velocity):
        state = self.environment.normalized_state((angular_position, angular_velocity, cart_position, cart_velocity))
        action = self.controller.getMaxAction(state)
        force = self.environment.allActions[action]
        return force

    def get_ranges(self):
        cart_position_ranges = [(-1000000000, -2.4), (-2.4, -0.8), (-0.8, 0.8), (0.8, 2.4), (2.4, 1000000000)]
        cart_velocity_ranges = [(-1000000000, -0.5), (-0.5, 0.5), (0.5, 1000000000)]
        angles_ranges = [(-6.28, -1), (-1, 0), (0, 1), (1, 6.28)]
        angular_velocity_ranges = [(-1000000000, -3.14), (-3.14, 3.14), (3.14, 1000000000)]
        return (cart_position_ranges, cart_velocity_ranges, angles_ranges, angular_velocity_ranges)
Beispiel #5
0
def main():
    vrep.simxFinish(-1)  # just in case, close all opened connections
    client_id = vrep.simxStart('127.0.0.1', 19997, True, True, 5000,
                               5)  # Connect to V-REP

    if client_id < 0:
        print('Failed connecting to remote API server')
        return -1

    print('Connected to remote API server')

    # Define RL elements
    environment = StandingUpEnvironment(client_id)

    task = StandingUpTask(environment)

    controller = ActionValueTable(task.get_state_space_size(),
                                  task.get_action_space_size())
    controller.initialize(1.)

    file = open('standing-up-q.pkl', 'rb')
    controller._params = pickle.load(file)
    file.close()

    # learner = Q()
    agent = LearningAgent(controller)

    experiment = EpisodicExperiment(task, agent)

    i = 0
    while True:
        i += 1
        print('Iteration n° ' + str(i))
        experiment.doEpisodes(1)

    vrep.simxFinish(client_id)
 def learn(self, number_of_iterations):
     learner = Q(0.2, 0.8)
     task = CartMovingTask(self.environment)
     self.controller = ActionValueTable(
         reduce(lambda x, y: x * y, map(lambda x: len(x), self.ranges)), self.force_granularity
     )
     self.controller.initialize(1.0)
     agent = LearningAgent(self.controller, learner)
     experiment = Experiment(task, agent)
     for i in range(number_of_iterations):
         experiment.doInteractions(1)
         agent.learn()
         agent.reset()
     with open("test.pcl", "w+") as f:
         pickle.dump(self.controller, f)
Beispiel #7
0
                    [1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                    [1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1],
                    [1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1],
                    [1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1],
                    [1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1],
                    [1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1],
                    [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1],
                    [1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1],
                    [1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1],
                    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])

environment = Maze(envmatrix2, (6, 8))

task = MDPMazeTask(environment)

table = ActionValueTable(324, 4)
table.initialize(3.)

agent = LearningAgent(table, Q(0.5, 0.80))
#agent._setLearning(0.01)

experiment = Experiment(task, agent)

plt.ion()
plt.hot()
#plt.set_cmap() Fix bakgrundsfarg

x = raw_input('Want to start?')
yes = "yes"
no = "no"
if x == yes:
Beispiel #8
0
from pybrain.rl.learners import Q, ActionValueTable
from pybrain.rl.experiments import Experiment

envmatrix = array([[1,1,1,1,1,1,1,1,1],
                   [1,0,0,1,0,0,0,0,1],
                   [1,0,0,1,0,0,1,0,1],
                   [1,0,0,1,0,0,1,0,1],
                   [1,0,0,1,0,1,1,0,1],
                   [1,0,0,0,0,0,1,0,1],
                   [1,1,1,1,1,1,1,0,1],
                   [1,0,0,0,0,0,0,0,1],
                   [1,1,1,1,1,1,1,1,1]])
environment = Maze(envmatrix, (7,7))
task = MDPMazeTask(environment)

table = ActionValueTable(81,4)
table.initialize(1.)

agent = LearningAgent(table,Q())

experiment = Experiment(task,agent)

plt.ion()
plt.gray()

for i in range(1000):
    experiment.doInteractions(100);
    agent.learn();
    agent.reset();
    plt.pcolor(table.params.reshape(81,4).max(axis=1).reshape(9,9))
    plt.gcf().canvas.draw()