Beispiel #1
0
def run():
    """Run the agent for a finite number of trials."""

    # Code for testing multiple alphas
    """
    i = 0
    alphas = [0.5, 0.6, 0.7, 0.8, 0.9]
    for alpha in alphas:
        print "********************Run " + str(i) + " Alpha is " + str(alpha) +"********************"
        # Set up environment and agent
        e = Environment()  # create environment (also adds some dummy traffic)
        a = e.create_agent(LearningAgent, alpha)  # create agent
        e.set_primary_agent(a, enforce_deadline=True)  # specify agent to track
        # NOTE: You can set enforce_deadline=False while debugging to allow longer trials

        # Now simulate it
        sim = Simulator(e, update_delay=0.0001, display=False)  # create simulator (uses pygame when display=True, if available)
        # NOTE: To speed up simulation, reduce update_delay and/or set display=False

        sim.run(n_trials=100)  # run for a specified number of trials
        # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line
        i += 1
    """
    # Set up environment and agent
    e = Environment()  # create environment (also adds some dummy traffic)
    a = e.create_agent(LearningAgent)  # create agent
    e.set_primary_agent(a, enforce_deadline=True)  # specify agent to track
    # NOTE: You can set enforce_deadline=False while debugging to allow longer trials

    # Now simulate it
    sim = Simulator(e, update_delay=0.5, display=True)  # create simulator (uses pygame when display=True, if available)
    # NOTE: To speed up simulation, reduce update_delay and/or set display=False

    sim.run(n_trials=100)  # run for a specified number of trials
Beispiel #2
0
def run():
    """Run the agent for a finite number of trials."""
    import numpy as np

    # intuition values 1st selected
    # alpha = 0.5
    # gamma = 0.7
    # epsilon = 0.05

    # optimal values found
    alpha = 0.5
    gamma = 0.2
    epsilon = 0.07

    # some "bad" values just to test how good our optimal is
    # alpha = 0.8
    # gamma = 0.6
    # epsilon = 0.2

    # Set up environment and agent
    e = Environment()  # create environment (also adds some dummy traffic)
    a = e.create_agent(LearningAgent, alpha, gamma, epsilon)  # create agent
    e.set_primary_agent(a, enforce_deadline=True)  # set agent to track

    # Now simulate it
    sim = Simulator(e, update_delay=0.0001)  # reduce update_delay to speed up simulation
    sim.run(n_trials=100)  # press Esc or close pygame window to quit

    mean = np.mean(a.trial_array[0][75:100])
    print "Average Steps: " + str(mean)
def run():
    """Run the agent for a finite number of trials."""

    # Set up environment and agent
    e = Environment()  # create environment (also adds some dummy traffic)
    a = e.create_agent(LearningAgent)  # create agent
    e.set_primary_agent(a, enforce_deadline = True)  # specify agent to track
    # NOTE: You can set enforce_deadline=False while debugging to allow longer trials

    # Now simulate it
    sim = Simulator(e, update_delay=0.0005, display = False)  # create simulator (uses pygame when display=True, if available)
    # NOTE: To speed up simulation, reduce update_delay and/or set display=False

    sim.run(n_trials=100)  # run for a specified number of trials
    # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line

    print ("................................DATA SUMMARY................................")
    print ("alpha: ", a.alpha)
    print ("gamma: ", a.gamma)
    print ("epsilon: ", a.epsilon)
    print ("total actions: ", a.total_actions)
    print ("total rewards: ", a.total_rewards)
    print ("number of negative reward in each trial ", a.last_negative_reward_count_list.values())
    print ("number of actions in each trial ", a.last_actions_list.values())
    print ("total rewards in each trial ", a.last_rewards_list.values())
def run():
    """Run the agent for a finite number of trials."""
    
    # create common place to set debug values
    dbg_deadline = True
    dbg_update_delay = 0.01
    dbg_display = False
    dbg_trials = 100 
    
    # create switches to run as random, way_light, way_light_vehicles
    # random = take random actions only
    # way_light_only = Traffic Light, Way Point
    # way_light_Vehicle = Traffic Light, Way Point, Left, Right, Oncoming
    # way_light_modified (or any other value) = Way Point, Combination Light and Vehicle State
    dbg_runtype = 'way_light_only'

    # Set up environment and agent
    e = Environment()  # create environment (also adds some dummy traffic)
    a = e.create_agent(LearningAgent)  # create agent
    # set the run type (random choice, simple state, state with vehicles)
    a.run_type = dbg_runtype
    e.set_primary_agent(a, enforce_deadline=dbg_deadline)  # specify agent to track
    # NOTE: You can set enforce_deadline=False while debugging to allow longer trials

    # Now simulate it
    sim = Simulator(e, update_delay=dbg_update_delay, display=dbg_display)  # create simulator (uses pygame when display=True, if available)
    # NOTE: To speed up simulation, reduce update_delay and/or set display=False

    sim.run(n_trials=dbg_trials)  # run for a specified number of trials
    # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line

    # at the end of the simulation show results
    # call qlearner reset to get last trial result
    a.q_learner.reset(a.step_count)
    a.q_learner.show_results()
Beispiel #5
0
def run2(): #helps to find sweetspot for alpha, gammma values

    alphas = [0.1, 0.2, 0.4, 0.6, 0.8, 1.0]
    gammas = [0.1, 0.2, 0.4, 0.6, 0.8, 1.0]
    heatmap = []

    for i, alpha in enumerate(alphas):
        row = []
        for j, gamma in enumerate(gammas):
            e = Environment()
            a = e.create_agent(LearningAgent)
            a.alpha = alpha
            a.gamma = gamma

            e.set_primary_agent(a, enforce_deadline=True)
            sim = Simulator(e, update_delay=0.0, display=False)
            sim.run(n_trials=100)
            print "Successful journeys : {}".format(a.targetReachedCount)
            row.append(a.targetReachedCount / 100.0)
            #qstats.append(a.q_learn_stats())
        heatmap.append(row)

    print heatmap
    ax = sns.heatmap(heatmap, xticklabels=gammas, yticklabels=alphas, annot=True)
    ax.set(xlabel="gamma", ylabel="alpha")
    plt.show()
Beispiel #6
0
  def start_puzzle(self):
    while(not self.solved):
      while(True):
        response_code = self.code
        for i, l in enumerate(self.lines):
          clear()
          put_text(self.lesson)
          print_code(response_code, "\nThe code currently is:")
          resp = int(get_text('Place the line \'%s\': ' % l))
          response_code = self.process_input(resp, l, response_code)
        threads = self.translator(response_code)
        simulator = Simulator(threads, self.predicate, self.semaphores, self.poll_rate)
        success, message = simulator.run_sim()
        simulator.visualize()
        if success:
          put_text('Simulator test Passed!')
        else:  
          put_text('Simulator test Failed!')
        put_text(message)
        get_text('Check against the real answer? (y/n)')

        clear()
        put_text(self.lesson)
        print_code(response_code, "\nThe code currently is:")

        if(response_code == self.answer):
          put_text("Congratulations! That's correct. Good job!\n")
          break
        else:
          get_text("Woops! That's incorrect. Try again? (y/n)\n")
      self.solved = True
def main2():
  """
  Looks at which games are possible after a certain number of differences.
  """
  GAME_LENGTH = 16
  # possible_tuples[i] is the set of tuples for which there exists a game
  # whose ith element is that tuple
  possible_tuples = []
  for n in range(0, 2 ** GAME_LENGTH):
    g = int_to_game(n, GAME_LENGTH)
    sim = Simulator(g)
    assert sim.get_game_length() is not None, 'Non-terminating game: %s' % g
    t = 0
    while not sim.done():
      if t >= len(possible_tuples):
        possible_tuples.append(set())
      possible_tuples[t].add(tuple(sim.state))
      sim.step_forward()
      t += 1
    # Add ending tuple as well
    if t >= len(possible_tuples):
      possible_tuples.append(set())
    possible_tuples[t].add(tuple(sim.state))
  print 'Number of possible tuples after t steps:'
  for t in range(len(possible_tuples)):
    print '%d: %d' % (t, len(possible_tuples[t]))
Beispiel #8
0
 def update(self):
     ''' Learn for a single episode. '''
     simulator = Simulator()
     state = simulator.get_state()
     act = self.action_policy(state)
     feat = self.action_features[act](state)
     end_episode = False
     traces = [
         np.zeros((BASIS_COUNT,)),
         np.zeros((BASIS_COUNT,)),
         np.zeros((BASIS_COUNT,))]
     while not end_episode:
         action = self.policy(state, act)
         state, reward, end_episode, _ = simulator.take_action(action)
         new_act = self.action_policy(state)
         new_feat = self.action_features[new_act](state)
         delta = reward + self.gamma * self.action_weights[new_act].dot(new_feat) - self.action_weights[act].dot(feat)
         for i in range(3):
             traces[i] *= self.lmb * self.gamma
         traces[act] += feat
         for i in range(3):
             self.action_weights[i] += self.alpha * delta * traces[i] / COEFF_SCALE
         act = new_act
         feat = new_feat
     return [reward]
Beispiel #9
0
def run():
    """Run the agent for a finite number of trials."""

    options = parseOptions()

    env = Environment()  # create environment (also adds some dummy traffic)
    sim = Simulator(env, update_delay=0, display=options.display) # create simulator (uses pygame when display=True, if available)

    results = {}

    from settings import params
    for agent, symbol in [(options.player1, 1), (options.player2, -1)]:
        kwargs = params[agent]
        env.add_agent(
            symbol=symbol, 
            file=options.file, 
            clear=options.clear,
            save=options.save,
            **kwargs)

    sim.run(n_trials=options.iterations)  # run for a specified number of trials

    for agent in env.agents:
        results["X" if agent.symbol == 1 else 'O'] = agent.wins

    print results

    dispatcher.send(signal='main.complete', sender={})
Beispiel #10
0
def main():

    # create NonRandom instance with seed
    nr = NonRandom()
    nr.set_seed(1)

    # create game and player
    wheel = Wheel(nr)
    table = Table()
    game = Game(wheel, table)
    player = Martingale(table)

    # assign default values to prevent future changes on them
    player.BASE_AMOUNT = 1
    player.BASE_BET = "Black"

    # create simulator instance
    simulator = Simulator(game, player)
    simulator.SAMPLES = 3

    # execute simulator
    simulator.gather()

    # print results
    print "\n"
    print "Maxima", simulator.maxima, "\n"
    print "Final", simulator.final, "\n"
    print "Durations", simulator.durations, "\n"
Beispiel #11
0
def main():
    """ Example: UnitXObjectの変数を保存し,取り出し,確認する.
    """
    from simulator import Simulator
    s = Simulator()
    UnitXObject.manager = s.get_manager()
    UnitXObject.scopes = s.get_scopes()
    
    # Regist part
    crr_scope = s.get_scopes().peek()
    crr_scope['x'] = UnitXObject(value=1.5, varname='x', is_none=False, unit=Unit(ex_numer=u'm', numer=u'cm', ex_denom=None, denom=None))
    crr_scope['y'] = UnitXObject(value=1500, varname='y', is_none=False, unit=Unit(ex_numer=u'm', numer=u'km', ex_denom=u'時', denom=u'分'))
    s.get_scopes().new_scope()
    
    # Find & Show part
    found_scope = s.get_scopes().peek().find_scope_of('x')
    Util.dump(s.get_scopes())

    # Checking equals()
    tmp_obj = UnitXObject(value=1.5, varname='x', is_none=False, unit=Unit(ex_numer=None, numer=u'cm', ex_denom=None, denom=None))
    print tmp_obj
    print crr_scope['x'] == tmp_obj

    # Clear part
    s.get_scopes().del_scope()
    s.get_scopes().del_scope()
    return Constants.EXIT_SUCCESS
Beispiel #12
0
def run():
    """Run the agent for a finite number of trials."""

    # Set up environment and agent
    gammas = [x / 10.0 for x in xrange(0, 10)]
    gamma_to_success_rate = OrderedDict()
    gamma_to_average_reward = OrderedDict()
    # Run a simulation for each sample gamma value to test which
    # choice of gamma results in the most successful agent
    for gamma in gammas:
        # Run 10 trials over each choice of gamma to get average performance metrics
        for trial in xrange(10):
            e = Environment()  # create environment (also adds some dummy traffic)
            a = e.create_agent(LearningAgent, (gamma))  # create agent
            e.set_primary_agent(a, enforce_deadline=True)  # set agent to track

            # Now simulate it
            sim = Simulator(e, update_delay=0.0)  # reduce update_delay to speed up simulation
            sim.run(n_trials=50)  # press Esc or close pygame window to quit

            gamma_to_success_rate[a.GAMMA] = gamma_to_success_rate.get(a.GAMMA, 0) + sim.env.successful_trials
            gamma_to_average_reward[a.GAMMA] = (
                gamma_to_average_reward.get(a.GAMMA, 0) + a.get_average_reward_per_action()
            )

        # Get the average of the 10 trials
    for gamma in gamma_to_average_reward.keys():
        gamma_to_average_reward[gamma] = gamma_to_average_reward[gamma] / 10
        gamma_to_success_rate[gamma] = gamma_to_success_rate[gamma] / 10
    print gamma_to_average_reward
    print gamma_to_success_rate
def initialize_simulator(route):
    sim_flags = get_sim_flags()
    expert_flags = get_local_expert_flags()

    '''
    Launches the sim and sets up the world.
    '''
    launch_planning = Launcher('simulator', 'planning_stack.launch', sim_flags)
    launch_local_expert = Launcher('expert_mode', 'local_expert_mode.launch', expert_flags)

    # Take the filename and drop '*.py', this is our node name
    simulator = Simulator(
        path.basename(__file__)[:-3],
        launchers=[launch_planning, launch_local_expert])

    spawn_utils = SpawnUtils(simulator)

    # Spawn our car near the start of the intersection
    # Spawn dpv across the intersection
    with SemanticDatabase() as db:
        db.validate_route(route)
        start_x, start_y, start_angle = db.get_point_in_lane(route[0])
        dpv_x, dpv_y, _ = db.point_interpolated_along_lane(route[2], 0.02)

    # dpv_theta = spawn_utils.get_parked_car_heading((dpv_x, dpv_y), 270)
    dpv_theta = spawn_utils.get_parked_car_heading((dpv_x, dpv_y), 0)

    cruise_robot = simulator.spawn_cruise_robot(start_x, start_y, 0, start_angle)
    simulator.spawn_robot(x=dpv_x, y=dpv_y, theta=dpv_theta)

    # Now tell our robot what route to follow
    cruise_robot.brain.set_lane_plans([(1000.0, route)])

    return simulator, cruise_robot
Beispiel #14
0
def run():
    """Run the agent for a finite number of trials."""

    # Set up environment and agent
    e = Environment()  # create environment (also adds some dummy traffic)
    a = e.create_agent(LearningAgent)  # create agent
    e.set_primary_agent(a, enforce_deadline=True)  # specify agent to track
    # NOTE: You can set enforce_deadline=False while debugging to allow longer trials

    # Now simulate it
    sim = Simulator(e, update_delay=0.00001, display=False)  # create simulator (uses pygame when display=True, if available)
    # NOTE: To speed up simulation, reduce update_delay and/or set display=False

    sim.run(n_trials=100)  # run for a specified number of trials
    # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line
    
    # Print summary #
    allPenalities = a.numberOfPenaltiesList
    allFailures = a.numberOfFailuresList
    numberOfTrials = float(len(allFailures))
    numberOfFailures = float(allFailures[-1])
    numberOfSuccess = numberOfTrials - numberOfFailures
    numberOfSuccessFirstHalf = ((numberOfTrials) / 2) - float(allFailures[len(allFailures)/2])
    numberOfSuccessSecondHalf = numberOfSuccess - numberOfSuccessFirstHalf
    print ("=================================================================================")
    print ("SUMMARY")
    print ("=================================================================================")
    print ("Total Penalities received = %3.2f" % (sum(allPenalities)))
    print ("\tPenalities received in the first half of trials  = %3.2f" % (sum(allPenalities[:len(allPenalities)/2])))
    print ("\tPenalities received in the second half of trials = %3.2f" % (sum(allPenalities[len(allPenalities)/2:])))
    print ("Success Rate: %3.2f%%" % (numberOfSuccess/numberOfTrials*100))
    print ("\tSuccess Rate of the first half : %3.2f%%" % (numberOfSuccessFirstHalf/(numberOfTrials/2)*100))
    print ("\tSuccess Rate of the second half: %3.2f%%" % (numberOfSuccessSecondHalf/(numberOfTrials/2)*100))
Beispiel #15
0
def mainQ(_learning=True):
    # Set player types and logging if provided in command line
    if len(sys.argv) == 3:
        pair = (sys.argv[1], sys.argv[2])
    else:
        pair = None

    # Prompt players
    # Needs to be adapted to get define parameters
    player_pair = promptPlayers(pair, _learning)
    # Create new game
    game = Game(player_pair)

    ######
    # Create new simulation
    # Flags:
    #   - debug: (True, False)
    sim = Simulator(game)


    ######
    # Run a simulation
    # Flags:
    # - tolerance=0.05 Epsilon tolerance to being testing.
    # - n_test=0  Number of test to be conducted after training

    sim.run(tolerance=0.001,n_test=100)
Beispiel #16
0
def run():
    f = open('running_report.txt', 'w')

    # setup various parameter combinations
    discount_factors = [0.5]
    starting_learning_rates = [0.5]
    epsilon_greedy_policy = [0.09]

    for d_factor in discount_factors:
        for alpha in starting_learning_rates:
            for greedy_policy in epsilon_greedy_policy:

                """Run the agent for a finite number of trials."""
                # Set up environment and agent
                e = Environment()  # create environment (also adds some dummy traffic)
                a = e.create_agent(LearningAgent, learning_rate=alpha, discount_factor=d_factor, greedy_policy=greedy_policy)  # create agent
                e.set_primary_agent(a, enforce_deadline=True)  # specify agent to track
                # NOTE: You can set enforce_deadline=False while debugging to allow longer trials

                # Now simulate it
                sim = Simulator(e, update_delay=0, display=True)  # create simulator (uses pygame when display=True, if available)

                number_of_trials = 100

                # NOTE: To speed up simulation, reduce update_delay and/or set display=False
                sim.run(n_trials=number_of_trials)  # run for a specified number of trials

                #NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line
                print >> f, "Learning rate:", alpha
                print >> f, "Discount factor:", d_factor
                print >> f, "Greedy Policy:", greedy_policy
                print >> f, "Percentage completed: ", a.completed_trials / 100.0, "\n"

                f.flush()
    f.close()
Beispiel #17
0
def run(get_result = False, gm = 0.2, al = 0.5):
    """Run the agent for a finite number of trials."""
    if get_result:
        ## print for GridSearch
        print ("Running trial  for gamma = %.1f, alpha = %.1f" %(gm, al))

    # Set up environment and agent
    e = Environment()  # create environment (also adds some dummy traffic)
    a = e.create_agent(LearningAgent, gm = gm, al = al)  # create agent
    e.set_primary_agent(a, enforce_deadline=True)  # specify agent to track
    # NOTE: You can set enforce_deadline=False while debugging to allow longer trials

    # Now simulate it
    sim = Simulator(e, update_delay=0.0, display=False)  # create simulator (uses pygame when display=True, if available)
    # NOTE: To speed up simulation, reduce update_delay and/or set display=False

    n_trials = 100
    sim.run(n_trials=n_trials)  # run for a specified number of trials
    # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line

    print "average silly moves for the last 10 trials: ", np.average(a.silly_fq[-10])
    print "average risky moves for the last 10 trials: ", np.average(a.risk_fq[-10])


    """The Following Code is for GridSearch"""
    if get_result:
        summary = sim.rep.summary()
        rate = sum(summary[-1][-10:])/float(10)
        deadline = sum(summary[-2][-10:])/float(10)
        risk_fq = sum(a.risk_fq[-10:])
        print ("success_rate   for gamma = %.1f, alpha = %.1f is %.2f" %(gm, al, rate))
        print ("final_deadline for gamma = %.1f, alpha = %.1f is %.2f" %(gm, al, deadline))
        print ("risk_frequecy  for gamma = %.1f, alpha = %.1f is %d" %(gm, al, risk_fq))
        print
        return (rate, deadline, risk_fq)
Beispiel #18
0
def run():
    """Run the agent for a finite number of trials."""
    successnum = dict()
    for i in range(10, 36,10):
        for j in range(40,71,10):
            for k in range(6,16,4):
                arguemns = (i/100.0, j/100.0, k/100.0)
                tenSucc = []
                for index in range(0, 5):
                    # Set up environment and agent
                    e = Environment()  # create environment (also adds some dummy traffic)
                    a = e.create_agent(LearningAgent,arguemns)  # create agent
                    e.set_primary_agent(a, enforce_deadline=True)  # specify agent to track
                    # NOTE: You can set enforce_deadline=False while debugging to allow longer trials

                    # Now simulate it
                    sim = Simulator(e, update_delay=0.001, display=False)  # create simulator (uses pygame when display=True, if available)
                    # NOTE: To speed up simulation, reduce update_delay and/or set display=False

                    sim.run(n_trials=100)  # run for a specified number of trials
                    # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line
                    tenSucc.append(e.success)
                successnum[arguemns] = tenSucc

    print(successnum)
Beispiel #19
0
class level_2(unittest.TestCase):

    def setUp(self):
        self.game = Game()
        self.game.addSquare(Square(Color.blue, Direction.top, 2, 1))
        self.game.board.setColor(0, 1, Color.blue)
        self.game.addSquare(Square(Color.red, Direction.right, 0, 0))
        self.game.board.setColor(0, 2, Color.red)
        self.game.addSquare(Square(Color.grey, Direction.left, 1, 3))
        self.game.board.setColor(1, 1, Color.grey)

        self.simulator = Simulator(self.game)

    def test_goal(self):
        self.game.moveSquare(Color.red)
        self.assertTrue(not self.game.isDone())
        self.game.moveSquare(Color.red)
        self.assertTrue(not self.game.isDone())
        self.game.moveSquare(Color.blue)
        self.assertTrue(not self.game.isDone())
        self.game.moveSquare(Color.blue)
        self.assertTrue(not self.game.isDone())
        self.game.moveSquare(Color.grey)
        self.assertTrue(not self.game.isDone())
        self.game.moveSquare(Color.grey)
        self.assertTrue(self.game.isDone())

    def test_simulation(self):
        print self.simulator.find_solution()
Beispiel #20
0
class Launcher(object):
  
  def setup_logging(self):
    t = datetime.now()
    self.tstamp = '%d-%d-%d-%d-%d' % (t.year, t.month, t.day, t.hour, t.minute)
    fname = LOG_FILE_PATH + LOG_FILENAME + self.tstamp + '.log'    
    logging.basicConfig(filename=fname,level=logging.INFO,format=FORMAT)  
  
  def configure(self, p):
    print('constructing simulator')
    self.sim = Simulator(p['ins'], p['strat'], p['start_date'], p['end_date'], p['open_bal'], self.tstamp)

  def simulate(self):
    print('running simulator')
    start = clock()
    self.sim.run()
    end = clock()
    dur_str = 'seconds = %f' % (end - start)
    print(dur_str)
    logging.info('sim time = ' + dur_str)

  def report(self):
    print('plotting')
    start = clock()
    self.sim.plot()
    end = clock()
    dur_str = 'seconds = %f' % (end - start)
    print(dur_str)
    logging.info('plot time = ' + dur_str)

  def go(self, p):
    self.setup_logging()
    self.configure(p)
    self.simulate()
    self.report()
Beispiel #21
0
def run(msg = ''):
    """Run the agent for a finite number of trials."""

    # set up environment and agent
    e = Environment()  # create environment (also adds some dummy traffic)
    a = e.create_agent(LearningAgent)  # create agent
    e.set_primary_agent(a, enforce_deadline=True)  # specify agent to track
    # NOTE: you can set enforce_deadline=False while debugging to allow longer trials

    # Now simulate it
    sim = Simulator(e, update_delay=0, display=False)  # create simulator (uses pygame when display=True, if available)
    # NOTE: to speed up simulation, reduce update_delay and/or set display=False

    sim.run(n_trials=100)  # run for a specified number of trials
    # NOTE: to quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line

    results = a.results
    average_cycles = mean([result[0] for result in results])
    average_reward = mean([result[1] for result in results])
    average_violations = mean([result[2] for result in results])
    # print '=' * 10, msg
    # print 'Average Cycles:', average_cycles
    # print 'Average Reward:', average_reward
    # print 'Average Violations:', average_violations

    return average_cycles, average_reward, average_violations
Beispiel #22
0
def run():
    """Run the agent for a finite number of trials."""

    # Set up environment and agent
    e = Environment()  # create environment (also adds some dummy traffic)
    a = e.create_agent(LearningAgent)  # create agent
    e.set_primary_agent(a, enforce_deadline=True)  # set agent to track

    # Now simulate it
    sim = Simulator(e, update_delay=5.0)  # reduce update_delay to speed up simulation
    sim.run(n_trials=100)  # press Esc or close pygame window to quit

    fig, ax = plt.subplots( nrows=1, ncols=1) 
    plt.xlabel('Order of trials')
    plt.ylabel('# of incured penalties')
    plt.title('Penalties')
    ax.plot(a.records)
    fig.savefig('penalties.png')

    fig, ax = plt.subplots( nrows=1, ncols=1) 
    plt.xlabel('Order of trials')
    plt.ylabel('# of rewards')
    plt.title('Rewards')
    ax.plot(a.rewards)
    fig.savefig('rewards.png')
def run():
    """Run the agent for a finite number of trials."""
    # create output file
    target_dir = os.path.dirname(os.path.realpath(__file__))
    target_path = os.path.join(target_dir, 'qlearning_tuning_report.txt')
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)
	# loop the parameters
    for epsilon in [0.1, 0.5, 0.9]:
        for alpha in np.arange(0.1, 1, 0.2):
            for gamma in np.arange(0.1, 1, 0.2):
                print epsilon, alpha, gamma
                # Set up environment and agent
                e = Environment()  # create environment (also adds some dummy traffic)
                a = e.create_agent(QAgent, epsilon, alpha, gamma)  # create agent
                e.set_primary_agent(a, enforce_deadline=True)  # specify agent to track
				# NOTE: You can set enforce_deadline=False while debugging to allow longer trials

				# Now simulate it
                sim = Simulator(e, update_delay=0.001, display=False)  # create simulator (uses pygame when display=True, if available)
				# NOTE: To speed up simulation, reduce update_delay and/or set display=False
                sim.run(n_trials=100)  # run for a specified number of trials
                # get the count for the number of successful trials and average running time
                summary = sim.report()
                
                # write out the results
                try:
					with open(target_path, 'a') as f:
						f.write('epsilon {}, alpha {}, gamma {} : success {}, avg_time {}, total_reward {}\n'.format(epsilon, alpha, gamma, summary[0], summary[1], round(a.total_reward, 3)))
						f.close()
                except:
					raise
Beispiel #24
0
def main():
    drone = RealDrone()
    # controller = ConConController(drone=drone,
            # log=True)
    controller = SingleAxisController(drone=drone, log=True)
    sim = Simulator(drone=drone, controller=controller)
    sim.start()
Beispiel #25
0
def run():
    """Run the agent for a finite number of trials."""

    # Set up environment and agent
    e = Environment()  # create environment (also adds some dummy traffic)
    a = e.create_agent(LearningAgent)  # create agent
    e.set_primary_agent(a, enforce_deadline=True)  # specify agent to track
    # NOTE: You can set enforce_deadline=False while debugging to allow longer trials

    # Now simulate it
    sim = Simulator(e, update_delay=0, display=False)  # create simulator (uses pygame when display=True, if available)
    # NOTE: To speed up simulation, reduce update_delay and/or set display=False

    sim.run(n_trials=100)  # run for a specified number of trials
    # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line
    num_successes = np.sum(a.successes)
    last_failure = a.find_last_failure()
    total_penalty = a.cumulative_penalties
    avg_time_remaining = np.mean(a.all_times_remaining)

    print "Total number of successes: {}".format(num_successes)
    print "Failure last occurred at trial: {}".format(last_failure)
    print 'Total penalties incurred: {}'.format(total_penalty)
    print "Average time remaining: {}".format(avg_time_remaining)


    for state in a.state_q_dict:
        print state
        for action in a.state_q_dict[state]:
            print "Action: {}, Q: {:2f}".format(action,a.state_q_dict[state][action])

    print a.state_q_dict[('right','red',None,None,None)]
    
    return (num_successes,last_failure,total_penalty,avg_time_remaining)
Beispiel #26
0
def run():
    """Run the agent for a finite number of trials."""
    random.seed(42)
    if False:#save output
        f = open('out', 'w')
    else:
        f = StringIO.StringIO()
    alphas = [0.1]
    gammas = [0.1]
    epsilons = [0.1]
    for alpha in alphas:
        for gamma in gammas:
            for epsilon in epsilons:
                # Set up environment and agent
                e = Environment()  # create environment (also adds some dummy traffic)
                a = e.create_agent(LearningAgent, alpha = alpha, epsilon = epsilon, gamma = gamma)  # create agent
                e.set_primary_agent(a, enforce_deadline=True)  # specify agent to track
                # NOTE: You can set enforce_deadline=False while debugging to allow longer trials

                # Now simulate it
                sim = Simulator(e, update_delay=0.0, display=False)  # create simulator (uses pygame when display=True, if available)
                # NOTE: To speed up simulation, reduce update_delay and/or set display=False

                sim.run(n_trials=100)  # run for a specified number of trials
                # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line
                f.write('Alpha: {} Gamma: {}, Epsilon: {}, RESULTS: {}\n'.format(alpha, gamma, epsilon, sum(a.history)))
                f.write('Number of states seen: {}\n'.format(len(a.Q)))
                f.write('History of results\n')
                f.write(str(a.history))
                f.write('State frequencies:\n')
                f.write('\n'.join(str(z) for z in sorted(a.s.items(), key=lambda x: x[1])))
                f.write('\n\n')
def main():
    """Run the agent for a finite number of trials."""
    # Set up environment and agent
    file = open('Q_v2.pickle', 'r')
    Q = pickle.load(file)
    file.close()
    e = Environment()  # create environment (also adds some dummy traffic)
    a = e.create_agent(LearningAgent)  # create agent
    a.Q = Q
    print "QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ"
    print len(a.Q)
    e.set_primary_agent(a, enforce_deadline=False)  # specify agent to track

    # NOTE: You can set enforce_deadline=False while debugging to allow longer trials

    # Now simulate it
    sim = Simulator(e, update_delay=1, display=True)  # create simulator (uses pygame when display=True, if available)
    # NOTE: To speed up simulation, reduce update_delay and/or set display=False

    sim.run(n_trials=1000)  # run for a specified number of trials
    # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line
   

    f = open('Q_v2.pickle', 'w')
    pickle.dump(a.Q, f)
    f.close()
Beispiel #28
0
def run():
    """Run the agent for a finite number of trials."""

    record = []
    for q_initial in [0, 2, 10]:
        for alpha in range(1, 6):
            # Set up environment and agent
            e = Environment()  # create environment (also adds some dummy traffic)
            a = e.create_agent(LearningAgent, alpha * 0.2, q_initial)  # create agent
            e.set_primary_agent(a, enforce_deadline=True)  # specify agent to track
            # NOTE: You can set enforce_deadline=False while debugging to allow longer trials

            # Now simulate it
            sim = Simulator(e, update_delay=0, display=False)  # create simulator (uses pygame when display=True, if available)
            # NOTE: To speed up simulation, reduce update_delay and/or set display=False

            sim.run(n_trials=100)  # run for a specified number of trials
            # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line

            a.reset()
            trip_log = pd.DataFrame(a.trip_log)
            # trip_log['Used'] = trip_log['Deadline'] - trip_log['Remaining']
            trip_log['Efficiency'] = trip_log['Remaining'] / trip_log['Deadline'] * 100
            record.append({
                'Success Rate': trip_log[trip_log.Success == True].shape[0],
                'Alpha': alpha * 0.2,
                'Q Initial': q_initial,
                'Efficiency': trip_log['Efficiency'].mean(),
                'Ave Reward': trip_log['Reward'].mean(),
                'Ave Penalty': trip_log['Penalty'].mean(),
            });

    return pd.DataFrame(record)
Beispiel #29
0
 def run_episode(self, simulator=None):
     ''' Run a single episode for a maximum number of steps. '''
     if simulator == None:
         simulator = Simulator()
     state = simulator.get_state()
     states = [state]
     rewards = []
     actions = []
     end_ep = False
     act = self.action_policy(state)
     acts = [act]
     while not end_ep:
         action = self.policy(state, act)
         new_state, reward, end_ep, steps = simulator.take_action(action)
         new_act = self.action_policy(new_state)
         delta = reward - self.state_quality(state, act)
         if not end_ep:
             delta += (self.gamma**steps) * self.state_quality(new_state, new_act)
         self.tdiff += abs(delta)
         self.steps += 1.0
         state = new_state
         states.append(state)
         actions.append(action)
         rewards.append(reward)
         act = new_act
         acts.append(act)
     self.tdiffs.append(self.tdiff / self.steps)
     self.episodes += 1
     self.total += sum(rewards)
     self.returns.append(sum(rewards))
     return states, actions, rewards, acts
Beispiel #30
0
def run():
    """Run the agent for a finite number of trials."""

    # Set up environment and agent
    e = Environment()  # create environment (also adds some dummy traffic)
    a = e.create_agent(LearningAgent)  # create agent
    e.set_primary_agent(a, enforce_deadline=True)  # specify agent to track
    # NOTE: You can set enforce_deadline=False while debugging to allow longer trials

    # Now simulate it
    sim = Simulator(e, update_delay=0.0001, display=False)  # create simulator (uses pygame when display=True, if available)
    # NOTE: To speed up simulation, reduce update_delay and/or set display=False

    sim.run(n_trials=100)  # run for a specified number of trials
    # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line

    ## print Q table
    print '+++++++++++++++++++++++++++++++++++++++++++++++'
    print 'final Q table'
    print '+++++++++++++++++++++++++++++++++++++++++++++++'
    for key in a.Q:
        print key,
        print ["%0.2f" % i for i in a.Q[key]]

    print '===================================================================='
    print 'An Array of Arrays where each subarray shows neg rewards for a trial'
    print '===================================================================='
    #print neg rewards and split term
    x=a.reward_holder.split('3')
    y=[i.split(' ') for i in x]
    print y #shows an array of arrays, could calculate total neg reward for each 
def run():
    """ Driving function for running the simulation. 
        Press ESC to close the simulation, or [SPACE] to pause the simulation. """

    ##############
    # Create the environment
    # Flags:
    #   verbose     - set to True to display additional output from the simulation
    #   num_dummies - discrete number of dummy agents in the environment, default is 100
    #   grid_size   - discrete number of intersections (columns, rows), default is (8, 6)
    env = Environment()
    
    ##############
    # Create the driving agent
    # Flags:
    #   learning   - set to True to force the driving agent to use Q-learning
    #    * epsilon - continuous value for the exploration factor, default is 1
    #    * alpha   - continuous value for the learning rate, default is 0.5
    agent = env.create_agent(LearningAgent, learning=True, epsilon=0.99)
    
    ##############
    # Follow the driving agent
    # Flags:
    #   enforce_deadline - set to True to enforce a deadline metric
    env.set_primary_agent(agent, enforce_deadline=True)

    ##############
    # Create the simulation
    # Flags:
    #   update_delay - continuous time (in seconds) between actions, default is 2.0 seconds
    #   display      - set to False to disable the GUI if PyGame is enabled
    #   log_metrics  - set to True to log trial and simulation results to /logs
    #   optimized    - set to True to change the default log file name
    sim = Simulator(env, update_delay=0.01, log_metrics=True, optimized=True)
    
    ##############
    # Run the simulator
    # Flags:
    #   tolerance  - epsilon tolerance before beginning testing, default is 0.05 
    #   n_test     - discrete number of testing trials to perform, default is 0
    sim.run(n_test=10, tolerance=0.05)
Beispiel #32
0
def optimize(args):
    """
	Runs the Scheduler with the OrderList from orderListName on the Plant
	with plantName.
	"""
    plantName = args[0]
    orderListName = args[1]

    plant = Plant.fromXmlFile(plantFileExists(plantName))
    orderList = OrderList.fromXmlFile(orderListExists(orderListName))
    optimizer = Optimizer(plant, orderList, Simulator(plant), Evaluator(plant))
    optimizer.run()
def run():
    """Run the agent for a finite number of trials."""

    # Set up environment and agent
    e = Environment()  # create environment (also adds some dummy traffic)
    a = e.create_agent(LearningAgent)  # create agent
    e.set_primary_agent(a, enforce_deadline=True)  # specify agent to track
    # NOTE: You can set enforce_deadline=False while debugging to allow longer trials

    # Now simulate it
    sim = Simulator(e, update_delay=0.01, display=False)  # create simulator (uses pygame when display=True, if available)
    # NOTE: To speed up simulation, reduce update_delay and/or set display=False

    sim.run(n_trials=100)  # run for a specified number of trials
    # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line
    
    # Note: I needed to add these variables to my environment.py file
    print "\n----- RESULTS -----\n"
    print "Successes: {}".format(e.successes)
    print "Failures: {}".format(e.failures)
    print "Invalid Moves: {}".format(e.invalids)
def run():
    """Run the agent for a finite number of trials."""

    # Set up environment and agent
    e = Environment()  # create environment (also adds some dummy traffic)
    a = e.create_agent(LearningAgent)  # create agent
    e.set_primary_agent(a, enforce_deadline=True)  # specify agent to track
    # NOTE: You can set enforce_deadline=False while debugging to allow longer trials

    # Now simulate it
    sim = Simulator(
        e, update_delay=.002, display=True
    )  # create simulator (uses pygame when display=True, if available)
    # NOTE: To speed up simulation, reduce update_delay and/or set display=False

    sim.run(n_trials=100)  # run for a specified number of trials
    # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line

    # Success percentage

    print "Succes percentage is: " + str(float(a.S)) + " %"
Beispiel #35
0
def run():
    #test()
    e = Environment()  # create environment (also adds some dummy traffic)
    a = e.create_agent(LearningAgent)  # create agent
    e.set_primary_agent(a, enforce_deadline=True)  # set agent to track

    # Now simulate it
    sim = Simulator(
        e, update_delay=0.0)  # reduce update_delay to speed up simulation
    sim.run(n_trials=100)  # press Esc or close pygame window to quit
    print "STATS"
    print "Rewards: ", a.rewards
    frame = pd.DataFrame(data=a.rewards[50:])
    print frame.describe()

    print "Deadlines: ", a.deadlines
    deadline_frame = pd.DataFrame(data=a.deadlines[50:])
    print deadline_frame.describe()

    print "Q Table: "
    print a.Q
Beispiel #36
0
    def run(self, domain_path, problem_path, executive):
        parser = FDParser(domain_path, problem_path)
        sim = Simulator(parser)
        perception = Perception(sim.perceive_state)
        mediator = SimulatorMediator(parser, perception)
        if self.print_actions:

            def printer(text):
                print text

            mediator.on_action_observers.append(printer)
        executive.initilize(mediator)
        self.previous_action = None

        def next_action():
            if self.previous_action:
                mediator.on_action(sim, self.previous_action)
            self.previous_action = executive.next_action()
            return self.previous_action

        return sim.simulate(next_action)
Beispiel #37
0
def run_epsilon(epsilon_function=0, epsilonA=0.5, alpha=0.5):
    """ 
    Run a simulation with customized parameters.
    """
    env = Environment(verbose=VERBOSE)
    learning_agent = LearningAgentEpsilon0
    if epsilon_function == 1:
        learning_agent = LearningAgentEpsilon1
    elif epsilon_function == 2:
        learning_agent = LearningAgentEpsilon2
    elif epsilon_function == 3:
        learning_agent = LearningAgentEpsilon3
    elif epsilon_function == 4:
        learning_agent = LearningAgentEpsilon4
    
    agent = env.create_agent(learning_agent, learning=True, alpha=alpha)
    agent.epsilonA = epsilonA
    env.set_primary_agent(agent, enforce_deadline=True)
    sim = Simulator(env, update_delay=0.01, log_metrics=True, optimized=True, display=DISPLAY)
    sim.run(n_test=10)
    return epsilon_function, agent.epsilonA
Beispiel #38
0
def run():
    """Run the agent for a finite number of trials."""

    # Set up environment and agent
    e = Environment()  # create environment (also adds some dummy traffic)
    a = e.create_agent(LearningAgent)  # create agent
    e.set_primary_agent(a, enforce_deadline=False)  # specify agent to track
    # NOTE: You can set enforce_deadline=False while debugging to allow longer trials

    # Now simulate it
    sim = Simulator(
        e, update_delay=0.5, display=True
    )  # create simulator (uses pygame when display=True, if available)
    # NOTE: To speed up simulation, reduce update_delay and/or set display=False

    # Fine tuned parameters
    a.alpha = 0.1
    a.discount = 0.1
    a.epsilon = 0.1

    sim.run(n_trials=100)  # run for a specified number of trials
Beispiel #39
0
    def __init__(self,
                 plant,
                 orderList,
                 populationSize=10,
                 mutationRange=20,
                 indivMutationRate=0.5,
                 selectionRate=0.5,
                 iterations=50):
        assert plant != None
        assert orderList != None

        self.plant = plant
        self.orderList = orderList
        self.simulator = Simulator(self.plant)
        self.evaluator = Evaluator(self.plant)

        self.populationSize = populationSize
        self.indivMutationRate = indivMutationRate
        self.selectionRate = selectionRate
        self.mutationRange = mutationRange
        self.iterations = iterations
def test_get_orderbook_snapshot_history():
    """
    Test case to export testing/training data for reinforcement learning
    :return:
    """
    start_time = dt.now(TIMEZONE)

    sim = Simulator(use_arctic=True)
    query = {
        'ccy': ['ETC-USD', 'tETCUSD'],
        'start_date': 20181229,
        'end_date': 20181231
    }
    orderbook_snapshot_history = sim.get_orderbook_snapshot_history(query=query)

    filename = '{}_{}'.format(query['ccy'][0], query['start_date'])
    sim.export_to_csv(data=orderbook_snapshot_history, filename=filename, compress=False)

    elapsed = (dt.now(TIMEZONE) - start_time).seconds
    print('Completed %s in %i seconds' % (__name__, elapsed))
    print('DONE. EXITING %s' % __name__)
Beispiel #41
0
        def f(sample):
            # random.setstate(rand_state['random'])
            # np.random.set_state(rand_state['np'])

            start = time.time()
            sampleResult = Simulator(self.env).getTrain(
                representation=self.option.representation,
                s0=self.env.s0,
                hyperParam=sample.X)
            sample.Y = self.env.evaluateEach(sampleResult)
            # print 'rho=%.16f, Y=%.16f, time=%fs\n' % (sample.X.rho, sample.Y, time.time()-start),
            return sample
Beispiel #42
0
def run():
    """Run the agent for a finite number of trials."""

    # Set up environment and agent
    e = Environment()  # create environment (also adds some dummy traffic)
    a = e.create_agent(LearningAgent)  # create agent
    e.set_primary_agent(a, enforce_deadline=True)  # specify agent to track
    # NOTE: You can set enforce_deadline=False while debugging to allow longer trials

    # Now simulate it
    sim = Simulator(
        e, update_delay=0, display=False
    )  # create simulator (uses pygame when display=True, if available)
    # NOTE: To speed up simulation, reduce update_delay and/or set display=False

    sim.run(n_trials=100)  # run for a specified number of trials
    # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line

    # print out rewards record to observe convergence of negative rewards
    print rewards_sum_record
    print len(rewards_sum_record), debug_previous_not
Beispiel #43
0
class level_31(unittest.TestCase):
    def setUp(self):
        self.game = Game()
        self.game.addSquare(Square(Color.blue, Direction.bottom, 0, 2))
        self.game.board.setColor(2, 0, Color.blue)
        self.game.addSquare(Square(Color.red, Direction.left, 2, 3))
        self.game.board.setColor(2, 2, Color.red)
        self.game.addSquare(Square(Color.grey, Direction.right, 1, 1))
        self.game.board.setColor(2, 4, Color.grey)

        self.game.board.setDirection(1, 1, Direction.right)
        self.game.board.setDirection(0, 2, Direction.bottom)
        self.game.board.setDirection(2, 3, Direction.left)
        self.game.board.setDirection(4, 2, Direction.top)

        self.simulator = Simulator(self.game)

    def test_simulation(self):
        print
        print self.game.board
        print self.simulator.find_solution()
def test_extract_features():
    """
    Test case to export multiple testing/training data sets for reinforcement learning
    :return:
    """
    start_time = dt.now(TIMEZONE)

    sim = Simulator(use_arctic=True)

    # for ccy in ['BTC-USD', 'ETH-USD', 'LTC-USD']:  #, 'BCH-USD']:
    for ccy, ccy2 in [('LTC-USD', 'tLTCUSD')]:
        query = {
            'ccy': [ccy, ccy2],
            'start_date': 20190314,
            'end_date': 20190317
        }
        sim.extract_features(query)

    elapsed = (dt.now(TIMEZONE) - start_time).seconds
    print('Completed %s in %i seconds' % (__name__, elapsed))
    print('DONE. EXITING %s' % __name__)
Beispiel #45
0
def run():
    """Run the agent for a finite number of trials."""

    # Set up environment and agent
    e = Environment()  # create environment (also adds some dummy traffic)
    a = e.create_agent(LearningAgent)  # create agent
    e.set_primary_agent(a, enforce_deadline=True)  # specify agent to track
    # NOTE: You can set enforce_deadline=False while debugging to allow longer trials

    # Now simulate it
    sim = Simulator(
        e, update_delay=0.0, display=True
    )  # create simulator (uses pygame when display=True, if available)
    # NOTE: To speed up simulation, reduce update_delay and/or set display=False

    sim.run(n_trials=100)  # run for a specified number of trials
    # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line
    print "CONCLUSION REPORT"
    print "WINS: {}".format(e.wins)
    print "LOSSES: {}".format(e.losses)
    print "INFRACTIONS: {}".format(e.infractions)
Beispiel #46
0
def test(render, episodes, record_path):
    env = PendulumEnvironment(render=render, record_path=record_path)
    agent = DqnAgent(env.get_dim(Preprocessor.NB_STATE_HISTORY),
                     model_file=MODEL_FILE,
                     greedy=True)
    simulator = Simulator(env, agent, train=False)
    # env = PendulumEnvironment(render=render, debug=True)
    # agent = HumanAgent(env.get_dim())
    # # agent = RandomAgent(env.get_dim())
    # simulator = Simulator(env, agent, train=False)

    episode, total_reward, best_reward = 0, 0.000, -1000000
    for e, f, action, reward, episode_done in simulator.run(
            episodes, frames_per_episode=5 * 60 * 60):
        total_reward += reward
        if episode_done:
            print("episode {} achieves total reward {:.4f}.".format(
                episode, total_reward))
            episode, total_reward, best_reward = episode + 1, 0.000, max(
                total_reward, best_reward)
    print("best reward {:.4f}.".format(best_reward))
def get_result(batch_no, delta_t, sim_duration, dspt_times, \
        stop_locs, demand_rates, board_rates, stop_num, demand_start_times, \
            link_mean_speeds, link_cv_speeds, link_lengths, link_start_locs, \
                cycle_lengths, green_ratios, signal_offsets, signal_locs):

    simulator = Simulator(sim_duration, dspt_times, \
        stop_locs, demand_rates, board_rates, stop_num, demand_start_times, \
            link_mean_speeds, link_cv_speeds, link_lengths, link_start_locs, \
                cycle_lengths, green_ratios, signal_offsets, signal_locs)

    stop_headways = defaultdict(lambda: list)  # stop_no -> headways
    for stop in range(stop_num):
        stop_headways[stop] = []

    for sim_r in range(batch_no):
        for t in range(sim_duration):
            is_bunched = simulator.move_one_step(delta_t)
            if is_bunched: break

        for stop in range(stop_num):
            arr_hdws_list = simulator.get_stop_headways(stop)
            stop_headways[stop] += arr_hdws_list

        # if sim_r == batch_no-1:
        # simulator.plot_time_space()
        simulator.reset(dspt_times)

    # append to the global variable
    for stop, hdws in stop_headways.items():
        total_hdws_dict[stop] = hdws
Beispiel #48
0
def main():
    sim_length = 10  # simulation length per episode in seconds (simulation time)
    delta_t = 200  # time step duration in millisecond (simulation time)

    sim = Simulator(delta_t)
    pi = Pi(in_dim, out_dim)
    optimizer = optim.Adam(pi.parameters(), lr=0.01)

    try:
        for epoch in range(1000):
            states = sim.reset()
            for i in range(int(sim_length / delta_t * 1000)):
                actions = pi.act(states)
                states, reward, done = sim.step(delta_t, actions)  # input step in millisecond
                # sim.render()

                pi.rewards.append(reward)
                if done:
                    break

            loss = train(pi, optimizer)
            total_reward = sum(pi.rewards)
            pi.on_policy_reset()
            print(f'Epoch: {epoch}, loss: {loss}, total reward: {total_reward}')
    finally:
        print("Training has been stopped")
        torch.save(pi.model.state_dict(), './trained/straight_line')
        sim.plot_loss(loss_list)
Beispiel #49
0
def run():
    """Run the agent for a finite number of trials."""

    # Set up environment and agent
    e = Environment()  # create environment (also adds some dummy traffic)
    a = e.create_agent(LearningAgent)  # create agent
    e.set_primary_agent(a, enforce_deadline=True)  # specify agent to track
    # NOTE: You can set enforce_deadline=False while debugging to allow longer trials

    # Now simulate it
    sim = Simulator(
        e, update_delay=.5, display=True
    )  # create simulator (uses pygame when display=True, if available)
    # NOTE: To speed up simulation, reduce update_delay and/or set display=False

    success_summary = pd.DataFrame(index=['no_success', 'success'])
    validation_no = 10

    for i in range(validation_no):
        sim.run(n_trials=100)  # run for a specified number of trials
        # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line
        print "Trial Count: ", a.trial_count
        success_temp = pd.DataFrame.from_dict(a.trial_summary, orient='index')
        success_temp.index = ['no_success', 'success']
        temp_column_name = 'trial_count_' + str(i + 1)
        success_summary[temp_column_name] = success_temp
        a.trial_summary[0] = 0
        a.trial_summary[1] = 0

    print success_summary
    success_average = success_summary.mean(axis=1)
    print "Average: "
    print success_average
    print "Percentage: ", success_average[0:][1] / success_average[0:][0]

    import os
    filename = 'smartcab/data/basic_agent_trials.csv'
    filename = os.path.join(filename)
    success_summary.to_csv(filename)
    print success_summary
Beispiel #50
0
 def simulate(a, xi0, theta0, phi0, H, idx):
     sim = Simulator(a, xi0, dirichlet=True)
     sim.simulate(theta0, phi0, H)
     U = sim.conservation()
     R = sim.reflectivity()
     print(idx)
     return [U, R]
Beispiel #51
0
def test(param):
    sim = Simulator(640, 480, rate=int(1 / tau))

    env = CartPoleEnv()
    controller = Controller(param, 'net')
    #controller = Controller(param,'pid')

    s = env.reset()

    g = CartPoleGraphics()

    sim.add('cartpole', g)

    data = {'s': s}

    def step():
        s = data['s']
        ctrl = controller.compute(s)
        s, fail, up = env.step(ctrl)
        data['s'] = s
        x, _, t, _ = s
        g.update(x, t)
        return fail

    sim.run(step)
Beispiel #52
0
    def simulate_and_plot(self,x_init,T,plot_funcs):
        """
        Simulate the system controlled via explicit and implicit MPC, then call
        the passed plotting functions.

        Parameters
        ----------
        x_init : np.float
            Initial condition.
        T : float
            Simulation duration.
        plot_funcs : list
            List of callable plot functions. Should have the call signature
            plot_func(sim_explicit,sim_implicit) where the two arguments are
            the output of Simulator:run().
        """
        self.setup_mpc()
        # Simulate explicit MPC
        simulator = Simulator(self.explicit_mpc,T)
        sim_explicit = simulator.run(x_init,label='explicit')
        # Simulate implicit MPC
        simulator = Simulator(self.implicit_mpc,T)
        sim_implicit = simulator.run(x_init,label='implicit')
        # Called plotting functions
        for plot_func in plot_funcs:
            plot_func(sim_explicit,sim_implicit)
Beispiel #53
0
def cost_test_case_1(cost_to_test):
    objarr = np.transpose([[1]])
    obj = SimObject(1, 1, objarr)
    sim = Simulator(10, 10, obj)
    b_s = np.array([[
        0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571,
        0.01428571, 0.01428571, 0.01428571, 0.01428571
    ],
                    [
                        0.01428571, 0.01428571, 0.01428571, 0.01428571,
                        0.01428571, 0.01428571, 0.01428571, 0.01428571,
                        0.01428571, 0.01428571
                    ],
                    [
                        0.01428571, 0.01428571, 0.01428571, 0.01428571,
                        0.01428571, 0.01428571, 0.01428571, 0.01428571,
                        0.01428571, 0.01428571
                    ], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
                    [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
                    [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
                    [
                        0.01428571, 0.01428571, 0.01428571, 0.01428571,
                        0.01428571, 0.01428571, 0.01428571, 0.01428571,
                        0.01428571, 0.01428571
                    ],
                    [
                        0.01428571, 0.01428571, 0.01428571, 0.01428571,
                        0.01428571, 0.01428571, 0.01428571, 0.01428571,
                        0.01428571, 0.01428571
                    ],
                    [
                        0.01428571, 0.01428571, 0.01428571, 0.01428571,
                        0.01428571, 0.01428571, 0.01428571, 0.01428571,
                        0.01428571, 0.01428571
                    ],
                    [
                        0.01428571, 0.01428571, 0.01428571, 0.01428571,
                        0.01428571, 0.01428571, 0.01428571, 0.01428571,
                        0.01428571, 0.01428571
                    ]])
    actions = [(4, 0)]
    observations = [(-1, [False, False, False])]

    actual_cost = cost_to_test(sim,
                               b_s,
                               actions,
                               observations,
                               desired_loc=(0, 0))

    exp_cost = heuristic(sim, b_s) + len(actions)

    assert actual_cost == exp_cost
def main(unused_argv):
	opt = Options()
	sim = Simulator(opt.map_ind, opt.cub_siz, opt.pob_siz, opt.act_num)
	trans = TransitionTable(opt.state_siz, opt.act_num, opt.hist_len,
	                             opt.minibatch_size, opt.valid_size,
	                             opt.states_fil, opt.labels_fil)

	# 1. train
	######################################
	# TODO implement your training here!
	# you can get the full data from the transition table like this:
	#
	# # both train_data and valid_data contain tupes of images and labels
	train_data = trans.get_train()
	valid_data = trans.get_valid()

	samples_train_data = np.float32(train_data[0])
	labels_train_data = np.float32(train_data[1])
	unhotted_labels_train_data = unhot(labels_train_data)

	samples_valid_data = np.float32(valid_data[0])
	labels_valid_data = np.float32(valid_data[1])
	unhotted_labels_valid_data = unhot(labels_valid_data)

	print("Shape of samples_train_data {}".format(samples_train_data.shape))
	print("Shape of labels_train_data {}".format(labels_train_data.shape))
	print("Shape of unhotted_labels_train_data {}".format(unhotted_labels_train_data.shape))

	classifier = cnn.get_estimator()

	# Train the model
	train_input_fn = tf.estimator.inputs.numpy_input_fn(
	    x={"x": samples_train_data},
	    y=unhotted_labels_train_data,
	    batch_size=100,
	    num_epochs=None,
	    shuffle=True)

	classifier.train(
	    input_fn=train_input_fn,
	    steps=1000
	)

	eval_input_fn = tf.estimator.inputs.numpy_input_fn(
		x={"x": samples_valid_data},
		y=unhotted_labels_valid_data,
		num_epochs=1,
		shuffle=False
	)

	eval_results = classifier.evaluate(input_fn=eval_input_fn)
	print(eval_results)
Beispiel #55
0
def run():
    """Run the agent for a finite number of trials."""
    learning_rate = 0.42
    discount_rate = 0.15
    initial_q_hat = 4

    # Set up environment and agent
    e = Environment()  # create environment (also adds some dummy traffic)
    a = e.create_agent(LearningAgent, learning_rate, discount_rate,
                       initial_q_hat)  # create agent
    e.set_primary_agent(a, enforce_deadline=True)  # specify agent to track
    # NOTE: You can set enforce_deadline=False while debugging to allow longer trials

    # Now simulate it
    sim = Simulator(
        e, update_delay=0, display=False
    )  # create simulator (uses pygame when display=True, if available)
    # NOTE: To speed up simulation, reduce update_delay and/or set display=False

    sim.run(n_trials=100)  # run for a specified number of trials
    print "Failed trials: "
    print a.get_failed_trials()
Beispiel #56
0
def run():
    """Run the agent for a finite number of trials."""

    # Set up environment and agent
    e = Environment()  # create environment (also adds some dummy traffic)
    a = e.create_agent(LearningAgent)  # create agent

    # QUESTION 1- sets `enforce_deadline` to `False`
    e.set_primary_agent(a, enforce_deadline=True)  # specify agent to track
    # NOTE: You can set enforce_deadline=False
    # while debugging to allow longer trials

    # Now simulate it
    # # create simulator (uses pygame when display=True, if available)
    sim = Simulator(e, update_delay=0.01, display=False)
    # NOTE: To speed up simulation,
    # reduce update_delay and/or set display=False

    # run for a specified number of trials
    sim.run(n_trials=100)

    print "Finished!"
Beispiel #57
0
    def Simulate(self):
        """

        :return:
        """
        undefeated = {'ANY': []}
        for i in range(self.experiments):
            simulator = Simulator(self.season, self.standings,
                                  self.simulations)
            simulator.Simulate()
            count_undefeated = collections.Counter(simulator.undefeated)
            for team in count_undefeated:
                if team in undefeated:
                    undefeated[team].append(count_undefeated[team])
                else:
                    undefeated[team] = [count_undefeated[team]]
            undefeated['ANY'].append(
                collections.Counter(simulator.nUndefeated)[1])
        self.undefeated = {
            team: sorted(undefeated[team])
            for team in undefeated
        }
Beispiel #58
0
def _start_worker(G, policy_class, policy_locals, task):

    G.scope = "worker_{}".format(G.worker_id)
    with tf.variable_scope(G.scope):
        policy = policy_class.copy(policy_locals)
        simulator = Simulator(task)

    G.ops = get_param_assign_ops(policy.get_params())
    G.sampler = Sampler(simulator, policy)
    G.sess = tf.Session()

    G.sess.__enter__()
    G.sess.run(tf.global_variables_initializer())
Beispiel #59
0
def simulator_path(pytestconfig) -> Path:
    path_str = pytestconfig.getoption('--simulator')
    if path_str:
        return Path(path_str)

    # use default from dependencies directory
    path = Simulator.default_simulator_path()
    if path:
        return path

    raise pytest.UsageError('failed to find default simulator'
                            '(try to run bootstrap.py) and none was'
                            'specified using the --simulator')
Beispiel #60
0
def run():
    """Run the agent for a finite number of trials."""
    import time

    success_rates = []
    last20_redlight_violations = []
    last20_planner_noncompliance = []
    for count in range(10):
        # Set up environment and agent
        e = Environment()  # create environment (also adds some dummy traffic)
        a = e.create_agent(LearningAgent)  # create agent
        e.set_primary_agent(a, enforce_deadline=True)  # specify agent to track
        # NOTE: You can set enforce_deadline=False while debugging to allow longer trials

        # Now simulate it
        sim = Simulator(
            e, update_delay=0.0000005, display=False
        )  # create simulator (uses pygame when display=True, if available)
        # NOTE: To speed up simulation, reduce update_delay and/or set display=False

        sim.run(n_trials=100)  # run for a specified number of trials
        # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line
        #plot_agent_performance(a.alpha, a.gamma,a.success_rate, a.red_light_violations, a.planner_noncompliance, count)

        sum_last20_redlight_violations = sum(a.red_light_violations[-20:])
        sum_last20_planner_noncompliance = sum(a.planner_noncompliance[-20:])

        success_rates.append(a.success_rate)
        last20_redlight_violations.append(sum_last20_redlight_violations)
        last20_planner_noncompliance.append(sum_last20_planner_noncompliance)

    mean_success = sum(success_rates) / float(len(success_rates))
    mean_last20redlight = sum(last20_redlight_violations) / float(
        len(last20_redlight_violations))
    mean_last20planner = sum(last20_planner_noncompliance) / float(
        len(last20_planner_noncompliance))
    print 'Mean success rate: ', mean_success
    print 'Mean last 20 red light violationse: ', mean_last20redlight
    print 'Mean last 20 planner_noncompliance: ', mean_last20planner