Exemple #1
0
    def train(self, episodes, maxSteps):

        avgReward = 0

        # set up environment and task
        self.env = InfoMaxEnv(self.objectNames, self.actionNames,
                              self.numCategories)
        self.task = InfoMaxTask(self.env, maxSteps=maxSteps, \
           do_decay_beliefs = True, uniformInitialBeliefs = True)

        # create neural net and learning agent
        self.params = buildNetwork(self.task.outdim, self.task.indim, \
            bias=True, outclass=SoftmaxLayer)

        if self._PGPE:
            self.agent = OptimizationAgent(self.params,
                                           PGPE(minimize=False, verbose=False))
        elif self._CMAES:
            self.agent = OptimizationAgent(
                self.params, CMAES(minimize=False, verbose=False))

        # init and perform experiment
        exp = EpisodicExperiment(self.task, self.agent)

        for i in range(episodes):
            exp.doEpisodes(1)
            avgReward += self.task.getTotalReward()
            print "reward episode ", i, self.task.getTotalReward()

        # print initial info
        print "\naverage reward over training = ", avgReward / episodes

        # save trained network
        self._saveWeights()
Exemple #2
0
    def simulate(self, random_state):
        """ Simulates agent behavior in 'n_sim' episodes.
        """
        logger.debug("Simulating user actions ({} episodes)".format(
            self.rl_params.n_simulation_episodes))
        self.experiment = EpisodicExperiment(self.task, self.agent)

        # set training flag off
        self.task.env.training = False
        # deactivate learning for experiment
        self.agent.learning = False
        # deactivate exploration
        explorer = self.agent.learner.explorer
        self.agent.learner.explorer = EGreedyExplorer(
            epsilon=0, decay=1, random_state=random_state)
        self.agent.learner.explorer.module = self.agent.module
        # activate logging
        self.task.env.start_logging()

        # simulate behavior
        self.experiment.doEpisodes(self.rl_params.n_simulation_episodes)
        # store log data
        dataset = self.task.env.log

        # deactivate logging
        self.task.env.end_logging()
        # reactivate exploration
        self.agent.learner.explorer = explorer
        # reactivate learning for experiment
        self.agent.learning = True
        # set training flag back on
        self.task.env.training = True

        return dataset
Exemple #3
0
def someEpisodes(game_env, net, discountFactor=0.99, maxSteps=100, avgOver=1, returnEvents=False, exploretoo=True):
    """ Return the fitness value for one episode of play, given the policy defined by a neural network. """
    import pdb
    pdb.set_trace()

    task = GameTask(game_env)
    game_env.recordingEnabled = True        
    game_env.reset()        
    net.reset()
    task.maxSteps=maxSteps
    agent = LearningAgent(net)
    agent.learning = False
    agent.logging = False
    exper = EpisodicExperiment(task, agent)
    fitness = 0
    for _ in range(avgOver):
        rs = exper.doEpisodes(1)
        # add a slight bonus for more exploration, if rewards are identical
        if exploretoo:
            fitness += len(set(game_env._allEvents)) * 1e-6
        # the true, discounted reward        
        fitness += sum([sum([v*discountFactor**step for step, v in enumerate(r)]) for r in rs])
    fitness /= avgOver
    if returnEvents:
        return fitness, game_env._allEvents
    else:
        return fitness
Exemple #4
0
 def _train_model(self):
     """ Uses reinforcement learning to find the optimal strategy
     """
     self.experiment = EpisodicExperiment(self.task, self.agent)
     n_epochs = int(self.rl_params.n_training_episodes /
                    self.rl_params.n_episodes_per_epoch)
     logger.debug(
         "Fitting user model over {} epochs, each {} episodes, total {} episodes."
         .format(n_epochs, self.rl_params.n_episodes_per_epoch,
                 n_epochs * self.rl_params.n_episodes_per_epoch))
     for i in range(n_epochs):
         logger.debug("RL epoch {}".format(i))
         self.experiment.doEpisodes(self.rl_params.n_episodes_per_epoch)
         self.agent.learn()
         self.agent.reset()  # reset buffers
def main():
    client_id = Utils.connectToVREP()

    # Define RL elements
    environment = StandingUpEnvironment(client_id)
    task = StandingUpTask(environment)
    controller = MyActionValueTable()
    learner = Q(0.5, 0.9)
    learner.explorer = EpsilonGreedyExplorer(0.15, 1)  # EpsilonGreedyBoltzmannExplorer()
    agent = LearningAgent(controller, learner)
    experiment = EpisodicExperiment(task, agent)

    controller.initialize(agent)

    i = 0
    try:
        while True:
            i += 1
            print('Episode ' + str(i))
            experiment.doEpisodes()
            agent.learn()
            agent.reset()
            print('mean: '+str(numpy.mean(controller.params)))
            print('max: '+str(numpy.max(controller.params)))
            print('min: '+str(numpy.min(controller.params)))

            if i % 500 == 0:  # Save q-table every 500 episodes
                print('Save q-table')
                controller.save()
                task.t_table.save()

    except (KeyboardInterrupt, SystemExit):
        with open('../data/standing-up-q.pkl', 'wb') as handle:
            pickle.dump(controller.params, handle)
        task.t_table.save()
        controller.save()

    vrep.simxFinish(client_id)
Exemple #6
0
def main():
    vrep.simxFinish(-1)  # just in case, close all opened connections
    client_id = vrep.simxStart('127.0.0.1', 19997, True, True, 5000,
                               5)  # Connect to V-REP

    if client_id < 0:
        print('Failed connecting to remote API server')
        return -1

    print('Connected to remote API server')

    # Define RL elements
    environment = StandingUpEnvironment(client_id)

    task = StandingUpTask(environment)

    controller = ActionValueTable(task.get_state_space_size(),
                                  task.get_action_space_size())
    controller.initialize(1.)

    file = open('standing-up-q.pkl', 'rb')
    controller._params = pickle.load(file)
    file.close()

    # learner = Q()
    agent = LearningAgent(controller)

    experiment = EpisodicExperiment(task, agent)

    i = 0
    while True:
        i += 1
        print('Iteration n° ' + str(i))
        experiment.doEpisodes(1)

    vrep.simxFinish(client_id)
Exemple #7
0
for runs in range(numbExp):
    # create environment
    #Options: Bool(OpenGL), Bool(Realtime simu. while client is connected), ServerIP(default:localhost), Port(default:21560)
    if env != None: env.closeSocket()
    env = ShipSteeringEnvironment()
    # create task
    task = GoNorthwardTask(env, maxsteps=500)
    # create controller network
    net = buildNetwork(task.outdim, task.indim, outclass=TanhLayer)
    # create agent with controller and learner (and its options)
    agent = OptimizationAgent(
        net,
        PGPE(learningRate=0.3,
             sigmaLearningRate=0.15,
             momentum=0.0,
             epsilon=2.0,
             rprop=False,
             storeAllEvaluations=True))
    et.agent = agent
    #create experiment
    experiment = EpisodicExperiment(task, agent)

    #Do the experiment
    for updates in range(epis):
        for i in range(prnts):
            experiment.doEpisodes(batch)
        et.printResults((agent.learner._allEvaluations)[-50:-1], runs, updates)
    et.addExps()
et.showExps()
#To view what the simulation is doing at the moment set the environment with True, go to pybrain/rl/environments/ode/ and start viewer.py (python-openGL musst be installed, see PyBrain documentation)
Exemple #8
0
def main():
    if len(sys.argv) != 2:
        print 'Please provide a path to a model data directory.'
        print ('The script will load the newest model data from the directory,'
               'then continue to improve that model')
        sys.exit(0)

    model_directory = sys.argv[1]
    existing_models = sorted(glob(os.path.join(model_directory, '*.rlmdl')))

    if existing_models:
        newest_model_name = existing_models[-1]
        iteration_count = int(newest_model_name[-12:-6]) + 1
        print 'Loading model {}'.format(newest_model_name)

        newest_model = open(newest_model_name, 'r')
        agent = pickle.load(newest_model)
    else:
        net = buildNetwork(Environment.outdim,
                           Environment.outdim + Environment.indim,
                           Environment.indim)
        agent = OptimizationAgent(net, PGPE())
        iteration_count = 1

    environment = Environment(LOCAL_HOST, PORT, PATH_TO_SCENE)
    task = Task(environment)


    experiment = EpisodicExperiment(task, agent)


    def signal_handler(signal, frame):
        print 'Exiting gracefully'
        environment.teardown()
        sys.exit(0)

    signal.signal(signal.SIGINT, signal_handler)


    while True:
        time.sleep(1)

        print '>>>>> Running iteration {}'.format(iteration_count)
        # NOTE this weird stuff is hacky, but we need it to plug in our autosave
        # stuff properly. Took a long time to figure this out.
        experiment.optimizer.maxEvaluations = experiment.optimizer.numEvaluations + experiment.optimizer.batchSize

        try:
            experiment.doEpisodes()
        except Exception as e:
            print 'ERROR RUNNING SIMULATION: \n{}'.format(e)
            environment.teardown()
        else:
            if iteration_count % AUTOSAVE_INTERVAL == 0:
                filename = str(iteration_count).zfill(6) + '.rlmdl'
                filename = os.path.join(model_directory, filename)
                f = open(filename, 'w+')
                print 'Saving model to {}'.format(filename)

                pickle.dump(agent, f)

            iteration_count += 1

        print 'Iteration finished <<<<<'
Exemple #9
0
from environments.continous_maze_discrete import CTS_Maze
from tasks.CTS_TASK import CTS_MazeTask
from pybrain.rl.experiments import EpisodicExperiment
from learners.baseline_learner import GP_SARSA
from agents.baseline_agent import GPSARSA_Agent
env = CTS_Maze([0.40, 0.40])  #goal

task = CTS_MazeTask(env)
learner = GP_SARSA(gamma=0.95)
learner.batchMode = False  #extra , not in use , set to True for batch learning
agent = GPSARSA_Agent(learner)
agent.logging = True

exp = EpisodicExperiment(
    task,
    agent)  #epsilon greedy exploration (with and without use of uncertainity)
plt.ion()

i = 1000
performance = []  #reward accumulation, dump variable for any evaluation metric
sum = []
agent.reset()
i = 0
for num_exp in range(100):

    performance = exp.doEpisodes(1)
    sum = np.append(sum, np.sum(performance))

    if (num_exp % 10 == 0):
        agent.init_exploration -= agent.init_exploration * 0.10
def run_experiment():
    # Create the controller network
    HIDDEN_NODES = 4

    RUNS = 2
    BATCHES = 1
    PRINTS = 1
    EPISODES = 500

    env = None
    start_state_net = None

    run_results = []

    # Set up plotting tools for the experiments
    tools = ExTools(BATCHES, PRINTS)

    # Run the experiment
    for run in range(RUNS):
        if run == 0:
            continue

        # If an environment already exists, shut it down
        if env:
            env.closeSocket()

        # Create the environment
        env = create_environment()

        # Create the task
        task = Pa10MovementTask(env)

        # Create the neural network. Only create the network once so it retains
        # the same starting values for each run.
        if start_state_net:
            net = start_state_net.copy()
        else:
            # Create the initial neural network
            net = create_network(
                    in_nodes=env.obsLen,
                    hidden_nodes=HIDDEN_NODES,
                    out_nodes=env.actLen
            )
            start_state_net = net.copy()

        # Create the learning agent
        learner = HillClimber(storeAllEvaluations=True)
        agent = OptimizationAgent(net, learner)
        tools.agent = agent

        # Create the experiment
        experiment = EpisodicExperiment(task, agent)

        # Perform all episodes in the run
        for episode in range(EPISODES):
            experiment.doEpisodes(BATCHES)

        # Calculate results
        all_results = agent.learner._allEvaluations
        max_result = np.max(all_results)
        min_result = np.min(all_results)
        avg_result = np.sum(all_results) / len(all_results)
        run_results.append((run, max_result, min_result, avg_result))

        # Make the results directory if it does not exist
        if not os.path.exists(G_RESULTS_DIR):
            os.mkdir(G_RESULTS_DIR)

        # Write all results to the results file
        with open(os.path.join(G_RESULTS_DIR, 'run_%d.txt' % run), 'w+') as f:
            # Store the calculated max, min, avg
            f.write('RUN, MAX, MIN, AVG\n')
            f.write('%d, %f, %f, %f\n' % (run, max_result, min_result, avg_result))

            # Store all results from this run
            f.write('EPISODE, REWARD\n')
            for episode, result in enumerate(all_results):
                f.write('%d, %f\n' % (episode, result))

    return
Exemple #11
0
from pybrain_extension.environment.morse_environment import ContinuousControllerEnvironment
from pybrain_extension.task.red_cube_task import CameraPixelsRedCubeTask

try:
    import __builtin__
    input = getattr(__builtin__, 'raw_input')
except (ImportError, AttributeError):
    pass


with CurrentController(3) as control:
    environment = ContinuousControllerEnvironment(control)
    task = CameraPixelsRedCubeTask(environment, True)

    experiment = EpisodicExperiment(task, None)

    # control.calibrate()

    start = time()
    bias = True

    def eval_fitness(genomes):
        for g in genomes:
            # visualize.draw_net(g, view=False)
            agent = NeatAgent(g, bias=bias)
            g.fitness = 0
            for state in range(control.get_randomize_states()):
                control.randomize(state)
                g.fitness += task.f(agent)
                if not task.found_cube: