def run(): """Run the agent for a finite number of trials.""" # Code for testing multiple alphas """ i = 0 alphas = [0.5, 0.6, 0.7, 0.8, 0.9] for alpha in alphas: print "********************Run " + str(i) + " Alpha is " + str(alpha) +"********************" # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent, alpha) # create agent e.set_primary_agent(a, enforce_deadline=True) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator(e, update_delay=0.0001, display=False) # create simulator (uses pygame when display=True, if available) # NOTE: To speed up simulation, reduce update_delay and/or set display=False sim.run(n_trials=100) # run for a specified number of trials # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line i += 1 """ # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent) # create agent e.set_primary_agent(a, enforce_deadline=True) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator(e, update_delay=0.5, display=True) # create simulator (uses pygame when display=True, if available) # NOTE: To speed up simulation, reduce update_delay and/or set display=False sim.run(n_trials=100) # run for a specified number of trials
def run(): """Run the agent for a finite number of trials.""" import numpy as np # intuition values 1st selected # alpha = 0.5 # gamma = 0.7 # epsilon = 0.05 # optimal values found alpha = 0.5 gamma = 0.2 epsilon = 0.07 # some "bad" values just to test how good our optimal is # alpha = 0.8 # gamma = 0.6 # epsilon = 0.2 # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent, alpha, gamma, epsilon) # create agent e.set_primary_agent(a, enforce_deadline=True) # set agent to track # Now simulate it sim = Simulator(e, update_delay=0.0001) # reduce update_delay to speed up simulation sim.run(n_trials=100) # press Esc or close pygame window to quit mean = np.mean(a.trial_array[0][75:100]) print "Average Steps: " + str(mean)
def run(): """Run the agent for a finite number of trials.""" # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent) # create agent e.set_primary_agent(a, enforce_deadline = True) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator(e, update_delay=0.0005, display = False) # create simulator (uses pygame when display=True, if available) # NOTE: To speed up simulation, reduce update_delay and/or set display=False sim.run(n_trials=100) # run for a specified number of trials # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line print ("................................DATA SUMMARY................................") print ("alpha: ", a.alpha) print ("gamma: ", a.gamma) print ("epsilon: ", a.epsilon) print ("total actions: ", a.total_actions) print ("total rewards: ", a.total_rewards) print ("number of negative reward in each trial ", a.last_negative_reward_count_list.values()) print ("number of actions in each trial ", a.last_actions_list.values()) print ("total rewards in each trial ", a.last_rewards_list.values())
def run(): """Run the agent for a finite number of trials.""" # create common place to set debug values dbg_deadline = True dbg_update_delay = 0.01 dbg_display = False dbg_trials = 100 # create switches to run as random, way_light, way_light_vehicles # random = take random actions only # way_light_only = Traffic Light, Way Point # way_light_Vehicle = Traffic Light, Way Point, Left, Right, Oncoming # way_light_modified (or any other value) = Way Point, Combination Light and Vehicle State dbg_runtype = 'way_light_only' # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent) # create agent # set the run type (random choice, simple state, state with vehicles) a.run_type = dbg_runtype e.set_primary_agent(a, enforce_deadline=dbg_deadline) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator(e, update_delay=dbg_update_delay, display=dbg_display) # create simulator (uses pygame when display=True, if available) # NOTE: To speed up simulation, reduce update_delay and/or set display=False sim.run(n_trials=dbg_trials) # run for a specified number of trials # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line # at the end of the simulation show results # call qlearner reset to get last trial result a.q_learner.reset(a.step_count) a.q_learner.show_results()
def run2(): #helps to find sweetspot for alpha, gammma values alphas = [0.1, 0.2, 0.4, 0.6, 0.8, 1.0] gammas = [0.1, 0.2, 0.4, 0.6, 0.8, 1.0] heatmap = [] for i, alpha in enumerate(alphas): row = [] for j, gamma in enumerate(gammas): e = Environment() a = e.create_agent(LearningAgent) a.alpha = alpha a.gamma = gamma e.set_primary_agent(a, enforce_deadline=True) sim = Simulator(e, update_delay=0.0, display=False) sim.run(n_trials=100) print "Successful journeys : {}".format(a.targetReachedCount) row.append(a.targetReachedCount / 100.0) #qstats.append(a.q_learn_stats()) heatmap.append(row) print heatmap ax = sns.heatmap(heatmap, xticklabels=gammas, yticklabels=alphas, annot=True) ax.set(xlabel="gamma", ylabel="alpha") plt.show()
def start_puzzle(self): while(not self.solved): while(True): response_code = self.code for i, l in enumerate(self.lines): clear() put_text(self.lesson) print_code(response_code, "\nThe code currently is:") resp = int(get_text('Place the line \'%s\': ' % l)) response_code = self.process_input(resp, l, response_code) threads = self.translator(response_code) simulator = Simulator(threads, self.predicate, self.semaphores, self.poll_rate) success, message = simulator.run_sim() simulator.visualize() if success: put_text('Simulator test Passed!') else: put_text('Simulator test Failed!') put_text(message) get_text('Check against the real answer? (y/n)') clear() put_text(self.lesson) print_code(response_code, "\nThe code currently is:") if(response_code == self.answer): put_text("Congratulations! That's correct. Good job!\n") break else: get_text("Woops! That's incorrect. Try again? (y/n)\n") self.solved = True
def main2(): """ Looks at which games are possible after a certain number of differences. """ GAME_LENGTH = 16 # possible_tuples[i] is the set of tuples for which there exists a game # whose ith element is that tuple possible_tuples = [] for n in range(0, 2 ** GAME_LENGTH): g = int_to_game(n, GAME_LENGTH) sim = Simulator(g) assert sim.get_game_length() is not None, 'Non-terminating game: %s' % g t = 0 while not sim.done(): if t >= len(possible_tuples): possible_tuples.append(set()) possible_tuples[t].add(tuple(sim.state)) sim.step_forward() t += 1 # Add ending tuple as well if t >= len(possible_tuples): possible_tuples.append(set()) possible_tuples[t].add(tuple(sim.state)) print 'Number of possible tuples after t steps:' for t in range(len(possible_tuples)): print '%d: %d' % (t, len(possible_tuples[t]))
def update(self): ''' Learn for a single episode. ''' simulator = Simulator() state = simulator.get_state() act = self.action_policy(state) feat = self.action_features[act](state) end_episode = False traces = [ np.zeros((BASIS_COUNT,)), np.zeros((BASIS_COUNT,)), np.zeros((BASIS_COUNT,))] while not end_episode: action = self.policy(state, act) state, reward, end_episode, _ = simulator.take_action(action) new_act = self.action_policy(state) new_feat = self.action_features[new_act](state) delta = reward + self.gamma * self.action_weights[new_act].dot(new_feat) - self.action_weights[act].dot(feat) for i in range(3): traces[i] *= self.lmb * self.gamma traces[act] += feat for i in range(3): self.action_weights[i] += self.alpha * delta * traces[i] / COEFF_SCALE act = new_act feat = new_feat return [reward]
def run(): """Run the agent for a finite number of trials.""" options = parseOptions() env = Environment() # create environment (also adds some dummy traffic) sim = Simulator(env, update_delay=0, display=options.display) # create simulator (uses pygame when display=True, if available) results = {} from settings import params for agent, symbol in [(options.player1, 1), (options.player2, -1)]: kwargs = params[agent] env.add_agent( symbol=symbol, file=options.file, clear=options.clear, save=options.save, **kwargs) sim.run(n_trials=options.iterations) # run for a specified number of trials for agent in env.agents: results["X" if agent.symbol == 1 else 'O'] = agent.wins print results dispatcher.send(signal='main.complete', sender={})
def main(): # create NonRandom instance with seed nr = NonRandom() nr.set_seed(1) # create game and player wheel = Wheel(nr) table = Table() game = Game(wheel, table) player = Martingale(table) # assign default values to prevent future changes on them player.BASE_AMOUNT = 1 player.BASE_BET = "Black" # create simulator instance simulator = Simulator(game, player) simulator.SAMPLES = 3 # execute simulator simulator.gather() # print results print "\n" print "Maxima", simulator.maxima, "\n" print "Final", simulator.final, "\n" print "Durations", simulator.durations, "\n"
def main(): """ Example: UnitXObjectの変数を保存し,取り出し,確認する. """ from simulator import Simulator s = Simulator() UnitXObject.manager = s.get_manager() UnitXObject.scopes = s.get_scopes() # Regist part crr_scope = s.get_scopes().peek() crr_scope['x'] = UnitXObject(value=1.5, varname='x', is_none=False, unit=Unit(ex_numer=u'm', numer=u'cm', ex_denom=None, denom=None)) crr_scope['y'] = UnitXObject(value=1500, varname='y', is_none=False, unit=Unit(ex_numer=u'm', numer=u'km', ex_denom=u'時', denom=u'分')) s.get_scopes().new_scope() # Find & Show part found_scope = s.get_scopes().peek().find_scope_of('x') Util.dump(s.get_scopes()) # Checking equals() tmp_obj = UnitXObject(value=1.5, varname='x', is_none=False, unit=Unit(ex_numer=None, numer=u'cm', ex_denom=None, denom=None)) print tmp_obj print crr_scope['x'] == tmp_obj # Clear part s.get_scopes().del_scope() s.get_scopes().del_scope() return Constants.EXIT_SUCCESS
def run(): """Run the agent for a finite number of trials.""" # Set up environment and agent gammas = [x / 10.0 for x in xrange(0, 10)] gamma_to_success_rate = OrderedDict() gamma_to_average_reward = OrderedDict() # Run a simulation for each sample gamma value to test which # choice of gamma results in the most successful agent for gamma in gammas: # Run 10 trials over each choice of gamma to get average performance metrics for trial in xrange(10): e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent, (gamma)) # create agent e.set_primary_agent(a, enforce_deadline=True) # set agent to track # Now simulate it sim = Simulator(e, update_delay=0.0) # reduce update_delay to speed up simulation sim.run(n_trials=50) # press Esc or close pygame window to quit gamma_to_success_rate[a.GAMMA] = gamma_to_success_rate.get(a.GAMMA, 0) + sim.env.successful_trials gamma_to_average_reward[a.GAMMA] = ( gamma_to_average_reward.get(a.GAMMA, 0) + a.get_average_reward_per_action() ) # Get the average of the 10 trials for gamma in gamma_to_average_reward.keys(): gamma_to_average_reward[gamma] = gamma_to_average_reward[gamma] / 10 gamma_to_success_rate[gamma] = gamma_to_success_rate[gamma] / 10 print gamma_to_average_reward print gamma_to_success_rate
def initialize_simulator(route): sim_flags = get_sim_flags() expert_flags = get_local_expert_flags() ''' Launches the sim and sets up the world. ''' launch_planning = Launcher('simulator', 'planning_stack.launch', sim_flags) launch_local_expert = Launcher('expert_mode', 'local_expert_mode.launch', expert_flags) # Take the filename and drop '*.py', this is our node name simulator = Simulator( path.basename(__file__)[:-3], launchers=[launch_planning, launch_local_expert]) spawn_utils = SpawnUtils(simulator) # Spawn our car near the start of the intersection # Spawn dpv across the intersection with SemanticDatabase() as db: db.validate_route(route) start_x, start_y, start_angle = db.get_point_in_lane(route[0]) dpv_x, dpv_y, _ = db.point_interpolated_along_lane(route[2], 0.02) # dpv_theta = spawn_utils.get_parked_car_heading((dpv_x, dpv_y), 270) dpv_theta = spawn_utils.get_parked_car_heading((dpv_x, dpv_y), 0) cruise_robot = simulator.spawn_cruise_robot(start_x, start_y, 0, start_angle) simulator.spawn_robot(x=dpv_x, y=dpv_y, theta=dpv_theta) # Now tell our robot what route to follow cruise_robot.brain.set_lane_plans([(1000.0, route)]) return simulator, cruise_robot
def run(): """Run the agent for a finite number of trials.""" # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent) # create agent e.set_primary_agent(a, enforce_deadline=True) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator(e, update_delay=0.00001, display=False) # create simulator (uses pygame when display=True, if available) # NOTE: To speed up simulation, reduce update_delay and/or set display=False sim.run(n_trials=100) # run for a specified number of trials # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line # Print summary # allPenalities = a.numberOfPenaltiesList allFailures = a.numberOfFailuresList numberOfTrials = float(len(allFailures)) numberOfFailures = float(allFailures[-1]) numberOfSuccess = numberOfTrials - numberOfFailures numberOfSuccessFirstHalf = ((numberOfTrials) / 2) - float(allFailures[len(allFailures)/2]) numberOfSuccessSecondHalf = numberOfSuccess - numberOfSuccessFirstHalf print ("=================================================================================") print ("SUMMARY") print ("=================================================================================") print ("Total Penalities received = %3.2f" % (sum(allPenalities))) print ("\tPenalities received in the first half of trials = %3.2f" % (sum(allPenalities[:len(allPenalities)/2]))) print ("\tPenalities received in the second half of trials = %3.2f" % (sum(allPenalities[len(allPenalities)/2:]))) print ("Success Rate: %3.2f%%" % (numberOfSuccess/numberOfTrials*100)) print ("\tSuccess Rate of the first half : %3.2f%%" % (numberOfSuccessFirstHalf/(numberOfTrials/2)*100)) print ("\tSuccess Rate of the second half: %3.2f%%" % (numberOfSuccessSecondHalf/(numberOfTrials/2)*100))
def mainQ(_learning=True): # Set player types and logging if provided in command line if len(sys.argv) == 3: pair = (sys.argv[1], sys.argv[2]) else: pair = None # Prompt players # Needs to be adapted to get define parameters player_pair = promptPlayers(pair, _learning) # Create new game game = Game(player_pair) ###### # Create new simulation # Flags: # - debug: (True, False) sim = Simulator(game) ###### # Run a simulation # Flags: # - tolerance=0.05 Epsilon tolerance to being testing. # - n_test=0 Number of test to be conducted after training sim.run(tolerance=0.001,n_test=100)
def run(): f = open('running_report.txt', 'w') # setup various parameter combinations discount_factors = [0.5] starting_learning_rates = [0.5] epsilon_greedy_policy = [0.09] for d_factor in discount_factors: for alpha in starting_learning_rates: for greedy_policy in epsilon_greedy_policy: """Run the agent for a finite number of trials.""" # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent, learning_rate=alpha, discount_factor=d_factor, greedy_policy=greedy_policy) # create agent e.set_primary_agent(a, enforce_deadline=True) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator(e, update_delay=0, display=True) # create simulator (uses pygame when display=True, if available) number_of_trials = 100 # NOTE: To speed up simulation, reduce update_delay and/or set display=False sim.run(n_trials=number_of_trials) # run for a specified number of trials #NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line print >> f, "Learning rate:", alpha print >> f, "Discount factor:", d_factor print >> f, "Greedy Policy:", greedy_policy print >> f, "Percentage completed: ", a.completed_trials / 100.0, "\n" f.flush() f.close()
def run(get_result = False, gm = 0.2, al = 0.5): """Run the agent for a finite number of trials.""" if get_result: ## print for GridSearch print ("Running trial for gamma = %.1f, alpha = %.1f" %(gm, al)) # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent, gm = gm, al = al) # create agent e.set_primary_agent(a, enforce_deadline=True) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator(e, update_delay=0.0, display=False) # create simulator (uses pygame when display=True, if available) # NOTE: To speed up simulation, reduce update_delay and/or set display=False n_trials = 100 sim.run(n_trials=n_trials) # run for a specified number of trials # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line print "average silly moves for the last 10 trials: ", np.average(a.silly_fq[-10]) print "average risky moves for the last 10 trials: ", np.average(a.risk_fq[-10]) """The Following Code is for GridSearch""" if get_result: summary = sim.rep.summary() rate = sum(summary[-1][-10:])/float(10) deadline = sum(summary[-2][-10:])/float(10) risk_fq = sum(a.risk_fq[-10:]) print ("success_rate for gamma = %.1f, alpha = %.1f is %.2f" %(gm, al, rate)) print ("final_deadline for gamma = %.1f, alpha = %.1f is %.2f" %(gm, al, deadline)) print ("risk_frequecy for gamma = %.1f, alpha = %.1f is %d" %(gm, al, risk_fq)) print return (rate, deadline, risk_fq)
def run(): """Run the agent for a finite number of trials.""" successnum = dict() for i in range(10, 36,10): for j in range(40,71,10): for k in range(6,16,4): arguemns = (i/100.0, j/100.0, k/100.0) tenSucc = [] for index in range(0, 5): # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent,arguemns) # create agent e.set_primary_agent(a, enforce_deadline=True) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator(e, update_delay=0.001, display=False) # create simulator (uses pygame when display=True, if available) # NOTE: To speed up simulation, reduce update_delay and/or set display=False sim.run(n_trials=100) # run for a specified number of trials # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line tenSucc.append(e.success) successnum[arguemns] = tenSucc print(successnum)
class level_2(unittest.TestCase): def setUp(self): self.game = Game() self.game.addSquare(Square(Color.blue, Direction.top, 2, 1)) self.game.board.setColor(0, 1, Color.blue) self.game.addSquare(Square(Color.red, Direction.right, 0, 0)) self.game.board.setColor(0, 2, Color.red) self.game.addSquare(Square(Color.grey, Direction.left, 1, 3)) self.game.board.setColor(1, 1, Color.grey) self.simulator = Simulator(self.game) def test_goal(self): self.game.moveSquare(Color.red) self.assertTrue(not self.game.isDone()) self.game.moveSquare(Color.red) self.assertTrue(not self.game.isDone()) self.game.moveSquare(Color.blue) self.assertTrue(not self.game.isDone()) self.game.moveSquare(Color.blue) self.assertTrue(not self.game.isDone()) self.game.moveSquare(Color.grey) self.assertTrue(not self.game.isDone()) self.game.moveSquare(Color.grey) self.assertTrue(self.game.isDone()) def test_simulation(self): print self.simulator.find_solution()
class Launcher(object): def setup_logging(self): t = datetime.now() self.tstamp = '%d-%d-%d-%d-%d' % (t.year, t.month, t.day, t.hour, t.minute) fname = LOG_FILE_PATH + LOG_FILENAME + self.tstamp + '.log' logging.basicConfig(filename=fname,level=logging.INFO,format=FORMAT) def configure(self, p): print('constructing simulator') self.sim = Simulator(p['ins'], p['strat'], p['start_date'], p['end_date'], p['open_bal'], self.tstamp) def simulate(self): print('running simulator') start = clock() self.sim.run() end = clock() dur_str = 'seconds = %f' % (end - start) print(dur_str) logging.info('sim time = ' + dur_str) def report(self): print('plotting') start = clock() self.sim.plot() end = clock() dur_str = 'seconds = %f' % (end - start) print(dur_str) logging.info('plot time = ' + dur_str) def go(self, p): self.setup_logging() self.configure(p) self.simulate() self.report()
def run(msg = ''): """Run the agent for a finite number of trials.""" # set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent) # create agent e.set_primary_agent(a, enforce_deadline=True) # specify agent to track # NOTE: you can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator(e, update_delay=0, display=False) # create simulator (uses pygame when display=True, if available) # NOTE: to speed up simulation, reduce update_delay and/or set display=False sim.run(n_trials=100) # run for a specified number of trials # NOTE: to quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line results = a.results average_cycles = mean([result[0] for result in results]) average_reward = mean([result[1] for result in results]) average_violations = mean([result[2] for result in results]) # print '=' * 10, msg # print 'Average Cycles:', average_cycles # print 'Average Reward:', average_reward # print 'Average Violations:', average_violations return average_cycles, average_reward, average_violations
def run(): """Run the agent for a finite number of trials.""" # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent) # create agent e.set_primary_agent(a, enforce_deadline=True) # set agent to track # Now simulate it sim = Simulator(e, update_delay=5.0) # reduce update_delay to speed up simulation sim.run(n_trials=100) # press Esc or close pygame window to quit fig, ax = plt.subplots( nrows=1, ncols=1) plt.xlabel('Order of trials') plt.ylabel('# of incured penalties') plt.title('Penalties') ax.plot(a.records) fig.savefig('penalties.png') fig, ax = plt.subplots( nrows=1, ncols=1) plt.xlabel('Order of trials') plt.ylabel('# of rewards') plt.title('Rewards') ax.plot(a.rewards) fig.savefig('rewards.png')
def run(): """Run the agent for a finite number of trials.""" # create output file target_dir = os.path.dirname(os.path.realpath(__file__)) target_path = os.path.join(target_dir, 'qlearning_tuning_report.txt') if not os.path.exists(target_dir): os.makedirs(target_dir) # loop the parameters for epsilon in [0.1, 0.5, 0.9]: for alpha in np.arange(0.1, 1, 0.2): for gamma in np.arange(0.1, 1, 0.2): print epsilon, alpha, gamma # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(QAgent, epsilon, alpha, gamma) # create agent e.set_primary_agent(a, enforce_deadline=True) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator(e, update_delay=0.001, display=False) # create simulator (uses pygame when display=True, if available) # NOTE: To speed up simulation, reduce update_delay and/or set display=False sim.run(n_trials=100) # run for a specified number of trials # get the count for the number of successful trials and average running time summary = sim.report() # write out the results try: with open(target_path, 'a') as f: f.write('epsilon {}, alpha {}, gamma {} : success {}, avg_time {}, total_reward {}\n'.format(epsilon, alpha, gamma, summary[0], summary[1], round(a.total_reward, 3))) f.close() except: raise
def main(): drone = RealDrone() # controller = ConConController(drone=drone, # log=True) controller = SingleAxisController(drone=drone, log=True) sim = Simulator(drone=drone, controller=controller) sim.start()
def run(): """Run the agent for a finite number of trials.""" # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent) # create agent e.set_primary_agent(a, enforce_deadline=True) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator(e, update_delay=0, display=False) # create simulator (uses pygame when display=True, if available) # NOTE: To speed up simulation, reduce update_delay and/or set display=False sim.run(n_trials=100) # run for a specified number of trials # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line num_successes = np.sum(a.successes) last_failure = a.find_last_failure() total_penalty = a.cumulative_penalties avg_time_remaining = np.mean(a.all_times_remaining) print "Total number of successes: {}".format(num_successes) print "Failure last occurred at trial: {}".format(last_failure) print 'Total penalties incurred: {}'.format(total_penalty) print "Average time remaining: {}".format(avg_time_remaining) for state in a.state_q_dict: print state for action in a.state_q_dict[state]: print "Action: {}, Q: {:2f}".format(action,a.state_q_dict[state][action]) print a.state_q_dict[('right','red',None,None,None)] return (num_successes,last_failure,total_penalty,avg_time_remaining)
def run(): """Run the agent for a finite number of trials.""" random.seed(42) if False:#save output f = open('out', 'w') else: f = StringIO.StringIO() alphas = [0.1] gammas = [0.1] epsilons = [0.1] for alpha in alphas: for gamma in gammas: for epsilon in epsilons: # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent, alpha = alpha, epsilon = epsilon, gamma = gamma) # create agent e.set_primary_agent(a, enforce_deadline=True) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator(e, update_delay=0.0, display=False) # create simulator (uses pygame when display=True, if available) # NOTE: To speed up simulation, reduce update_delay and/or set display=False sim.run(n_trials=100) # run for a specified number of trials # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line f.write('Alpha: {} Gamma: {}, Epsilon: {}, RESULTS: {}\n'.format(alpha, gamma, epsilon, sum(a.history))) f.write('Number of states seen: {}\n'.format(len(a.Q))) f.write('History of results\n') f.write(str(a.history)) f.write('State frequencies:\n') f.write('\n'.join(str(z) for z in sorted(a.s.items(), key=lambda x: x[1]))) f.write('\n\n')
def main(): """Run the agent for a finite number of trials.""" # Set up environment and agent file = open('Q_v2.pickle', 'r') Q = pickle.load(file) file.close() e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent) # create agent a.Q = Q print "QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ" print len(a.Q) e.set_primary_agent(a, enforce_deadline=False) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator(e, update_delay=1, display=True) # create simulator (uses pygame when display=True, if available) # NOTE: To speed up simulation, reduce update_delay and/or set display=False sim.run(n_trials=1000) # run for a specified number of trials # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line f = open('Q_v2.pickle', 'w') pickle.dump(a.Q, f) f.close()
def run(): """Run the agent for a finite number of trials.""" record = [] for q_initial in [0, 2, 10]: for alpha in range(1, 6): # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent, alpha * 0.2, q_initial) # create agent e.set_primary_agent(a, enforce_deadline=True) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator(e, update_delay=0, display=False) # create simulator (uses pygame when display=True, if available) # NOTE: To speed up simulation, reduce update_delay and/or set display=False sim.run(n_trials=100) # run for a specified number of trials # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line a.reset() trip_log = pd.DataFrame(a.trip_log) # trip_log['Used'] = trip_log['Deadline'] - trip_log['Remaining'] trip_log['Efficiency'] = trip_log['Remaining'] / trip_log['Deadline'] * 100 record.append({ 'Success Rate': trip_log[trip_log.Success == True].shape[0], 'Alpha': alpha * 0.2, 'Q Initial': q_initial, 'Efficiency': trip_log['Efficiency'].mean(), 'Ave Reward': trip_log['Reward'].mean(), 'Ave Penalty': trip_log['Penalty'].mean(), }); return pd.DataFrame(record)
def run_episode(self, simulator=None): ''' Run a single episode for a maximum number of steps. ''' if simulator == None: simulator = Simulator() state = simulator.get_state() states = [state] rewards = [] actions = [] end_ep = False act = self.action_policy(state) acts = [act] while not end_ep: action = self.policy(state, act) new_state, reward, end_ep, steps = simulator.take_action(action) new_act = self.action_policy(new_state) delta = reward - self.state_quality(state, act) if not end_ep: delta += (self.gamma**steps) * self.state_quality(new_state, new_act) self.tdiff += abs(delta) self.steps += 1.0 state = new_state states.append(state) actions.append(action) rewards.append(reward) act = new_act acts.append(act) self.tdiffs.append(self.tdiff / self.steps) self.episodes += 1 self.total += sum(rewards) self.returns.append(sum(rewards)) return states, actions, rewards, acts
def run(): """Run the agent for a finite number of trials.""" # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent) # create agent e.set_primary_agent(a, enforce_deadline=True) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator(e, update_delay=0.0001, display=False) # create simulator (uses pygame when display=True, if available) # NOTE: To speed up simulation, reduce update_delay and/or set display=False sim.run(n_trials=100) # run for a specified number of trials # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line ## print Q table print '+++++++++++++++++++++++++++++++++++++++++++++++' print 'final Q table' print '+++++++++++++++++++++++++++++++++++++++++++++++' for key in a.Q: print key, print ["%0.2f" % i for i in a.Q[key]] print '====================================================================' print 'An Array of Arrays where each subarray shows neg rewards for a trial' print '====================================================================' #print neg rewards and split term x=a.reward_holder.split('3') y=[i.split(' ') for i in x] print y #shows an array of arrays, could calculate total neg reward for each
def run(): """ Driving function for running the simulation. Press ESC to close the simulation, or [SPACE] to pause the simulation. """ ############## # Create the environment # Flags: # verbose - set to True to display additional output from the simulation # num_dummies - discrete number of dummy agents in the environment, default is 100 # grid_size - discrete number of intersections (columns, rows), default is (8, 6) env = Environment() ############## # Create the driving agent # Flags: # learning - set to True to force the driving agent to use Q-learning # * epsilon - continuous value for the exploration factor, default is 1 # * alpha - continuous value for the learning rate, default is 0.5 agent = env.create_agent(LearningAgent, learning=True, epsilon=0.99) ############## # Follow the driving agent # Flags: # enforce_deadline - set to True to enforce a deadline metric env.set_primary_agent(agent, enforce_deadline=True) ############## # Create the simulation # Flags: # update_delay - continuous time (in seconds) between actions, default is 2.0 seconds # display - set to False to disable the GUI if PyGame is enabled # log_metrics - set to True to log trial and simulation results to /logs # optimized - set to True to change the default log file name sim = Simulator(env, update_delay=0.01, log_metrics=True, optimized=True) ############## # Run the simulator # Flags: # tolerance - epsilon tolerance before beginning testing, default is 0.05 # n_test - discrete number of testing trials to perform, default is 0 sim.run(n_test=10, tolerance=0.05)
def optimize(args): """ Runs the Scheduler with the OrderList from orderListName on the Plant with plantName. """ plantName = args[0] orderListName = args[1] plant = Plant.fromXmlFile(plantFileExists(plantName)) orderList = OrderList.fromXmlFile(orderListExists(orderListName)) optimizer = Optimizer(plant, orderList, Simulator(plant), Evaluator(plant)) optimizer.run()
def run(): """Run the agent for a finite number of trials.""" # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent) # create agent e.set_primary_agent(a, enforce_deadline=True) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator(e, update_delay=0.01, display=False) # create simulator (uses pygame when display=True, if available) # NOTE: To speed up simulation, reduce update_delay and/or set display=False sim.run(n_trials=100) # run for a specified number of trials # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line # Note: I needed to add these variables to my environment.py file print "\n----- RESULTS -----\n" print "Successes: {}".format(e.successes) print "Failures: {}".format(e.failures) print "Invalid Moves: {}".format(e.invalids)
def run(): """Run the agent for a finite number of trials.""" # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent) # create agent e.set_primary_agent(a, enforce_deadline=True) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator( e, update_delay=.002, display=True ) # create simulator (uses pygame when display=True, if available) # NOTE: To speed up simulation, reduce update_delay and/or set display=False sim.run(n_trials=100) # run for a specified number of trials # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line # Success percentage print "Succes percentage is: " + str(float(a.S)) + " %"
def run(): #test() e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent) # create agent e.set_primary_agent(a, enforce_deadline=True) # set agent to track # Now simulate it sim = Simulator( e, update_delay=0.0) # reduce update_delay to speed up simulation sim.run(n_trials=100) # press Esc or close pygame window to quit print "STATS" print "Rewards: ", a.rewards frame = pd.DataFrame(data=a.rewards[50:]) print frame.describe() print "Deadlines: ", a.deadlines deadline_frame = pd.DataFrame(data=a.deadlines[50:]) print deadline_frame.describe() print "Q Table: " print a.Q
def run(self, domain_path, problem_path, executive): parser = FDParser(domain_path, problem_path) sim = Simulator(parser) perception = Perception(sim.perceive_state) mediator = SimulatorMediator(parser, perception) if self.print_actions: def printer(text): print text mediator.on_action_observers.append(printer) executive.initilize(mediator) self.previous_action = None def next_action(): if self.previous_action: mediator.on_action(sim, self.previous_action) self.previous_action = executive.next_action() return self.previous_action return sim.simulate(next_action)
def run_epsilon(epsilon_function=0, epsilonA=0.5, alpha=0.5): """ Run a simulation with customized parameters. """ env = Environment(verbose=VERBOSE) learning_agent = LearningAgentEpsilon0 if epsilon_function == 1: learning_agent = LearningAgentEpsilon1 elif epsilon_function == 2: learning_agent = LearningAgentEpsilon2 elif epsilon_function == 3: learning_agent = LearningAgentEpsilon3 elif epsilon_function == 4: learning_agent = LearningAgentEpsilon4 agent = env.create_agent(learning_agent, learning=True, alpha=alpha) agent.epsilonA = epsilonA env.set_primary_agent(agent, enforce_deadline=True) sim = Simulator(env, update_delay=0.01, log_metrics=True, optimized=True, display=DISPLAY) sim.run(n_test=10) return epsilon_function, agent.epsilonA
def run(): """Run the agent for a finite number of trials.""" # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent) # create agent e.set_primary_agent(a, enforce_deadline=False) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator( e, update_delay=0.5, display=True ) # create simulator (uses pygame when display=True, if available) # NOTE: To speed up simulation, reduce update_delay and/or set display=False # Fine tuned parameters a.alpha = 0.1 a.discount = 0.1 a.epsilon = 0.1 sim.run(n_trials=100) # run for a specified number of trials
def __init__(self, plant, orderList, populationSize=10, mutationRange=20, indivMutationRate=0.5, selectionRate=0.5, iterations=50): assert plant != None assert orderList != None self.plant = plant self.orderList = orderList self.simulator = Simulator(self.plant) self.evaluator = Evaluator(self.plant) self.populationSize = populationSize self.indivMutationRate = indivMutationRate self.selectionRate = selectionRate self.mutationRange = mutationRange self.iterations = iterations
def test_get_orderbook_snapshot_history(): """ Test case to export testing/training data for reinforcement learning :return: """ start_time = dt.now(TIMEZONE) sim = Simulator(use_arctic=True) query = { 'ccy': ['ETC-USD', 'tETCUSD'], 'start_date': 20181229, 'end_date': 20181231 } orderbook_snapshot_history = sim.get_orderbook_snapshot_history(query=query) filename = '{}_{}'.format(query['ccy'][0], query['start_date']) sim.export_to_csv(data=orderbook_snapshot_history, filename=filename, compress=False) elapsed = (dt.now(TIMEZONE) - start_time).seconds print('Completed %s in %i seconds' % (__name__, elapsed)) print('DONE. EXITING %s' % __name__)
def f(sample): # random.setstate(rand_state['random']) # np.random.set_state(rand_state['np']) start = time.time() sampleResult = Simulator(self.env).getTrain( representation=self.option.representation, s0=self.env.s0, hyperParam=sample.X) sample.Y = self.env.evaluateEach(sampleResult) # print 'rho=%.16f, Y=%.16f, time=%fs\n' % (sample.X.rho, sample.Y, time.time()-start), return sample
def run(): """Run the agent for a finite number of trials.""" # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent) # create agent e.set_primary_agent(a, enforce_deadline=True) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator( e, update_delay=0, display=False ) # create simulator (uses pygame when display=True, if available) # NOTE: To speed up simulation, reduce update_delay and/or set display=False sim.run(n_trials=100) # run for a specified number of trials # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line # print out rewards record to observe convergence of negative rewards print rewards_sum_record print len(rewards_sum_record), debug_previous_not
class level_31(unittest.TestCase): def setUp(self): self.game = Game() self.game.addSquare(Square(Color.blue, Direction.bottom, 0, 2)) self.game.board.setColor(2, 0, Color.blue) self.game.addSquare(Square(Color.red, Direction.left, 2, 3)) self.game.board.setColor(2, 2, Color.red) self.game.addSquare(Square(Color.grey, Direction.right, 1, 1)) self.game.board.setColor(2, 4, Color.grey) self.game.board.setDirection(1, 1, Direction.right) self.game.board.setDirection(0, 2, Direction.bottom) self.game.board.setDirection(2, 3, Direction.left) self.game.board.setDirection(4, 2, Direction.top) self.simulator = Simulator(self.game) def test_simulation(self): print print self.game.board print self.simulator.find_solution()
def test_extract_features(): """ Test case to export multiple testing/training data sets for reinforcement learning :return: """ start_time = dt.now(TIMEZONE) sim = Simulator(use_arctic=True) # for ccy in ['BTC-USD', 'ETH-USD', 'LTC-USD']: #, 'BCH-USD']: for ccy, ccy2 in [('LTC-USD', 'tLTCUSD')]: query = { 'ccy': [ccy, ccy2], 'start_date': 20190314, 'end_date': 20190317 } sim.extract_features(query) elapsed = (dt.now(TIMEZONE) - start_time).seconds print('Completed %s in %i seconds' % (__name__, elapsed)) print('DONE. EXITING %s' % __name__)
def run(): """Run the agent for a finite number of trials.""" # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent) # create agent e.set_primary_agent(a, enforce_deadline=True) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator( e, update_delay=0.0, display=True ) # create simulator (uses pygame when display=True, if available) # NOTE: To speed up simulation, reduce update_delay and/or set display=False sim.run(n_trials=100) # run for a specified number of trials # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line print "CONCLUSION REPORT" print "WINS: {}".format(e.wins) print "LOSSES: {}".format(e.losses) print "INFRACTIONS: {}".format(e.infractions)
def test(render, episodes, record_path): env = PendulumEnvironment(render=render, record_path=record_path) agent = DqnAgent(env.get_dim(Preprocessor.NB_STATE_HISTORY), model_file=MODEL_FILE, greedy=True) simulator = Simulator(env, agent, train=False) # env = PendulumEnvironment(render=render, debug=True) # agent = HumanAgent(env.get_dim()) # # agent = RandomAgent(env.get_dim()) # simulator = Simulator(env, agent, train=False) episode, total_reward, best_reward = 0, 0.000, -1000000 for e, f, action, reward, episode_done in simulator.run( episodes, frames_per_episode=5 * 60 * 60): total_reward += reward if episode_done: print("episode {} achieves total reward {:.4f}.".format( episode, total_reward)) episode, total_reward, best_reward = episode + 1, 0.000, max( total_reward, best_reward) print("best reward {:.4f}.".format(best_reward))
def get_result(batch_no, delta_t, sim_duration, dspt_times, \ stop_locs, demand_rates, board_rates, stop_num, demand_start_times, \ link_mean_speeds, link_cv_speeds, link_lengths, link_start_locs, \ cycle_lengths, green_ratios, signal_offsets, signal_locs): simulator = Simulator(sim_duration, dspt_times, \ stop_locs, demand_rates, board_rates, stop_num, demand_start_times, \ link_mean_speeds, link_cv_speeds, link_lengths, link_start_locs, \ cycle_lengths, green_ratios, signal_offsets, signal_locs) stop_headways = defaultdict(lambda: list) # stop_no -> headways for stop in range(stop_num): stop_headways[stop] = [] for sim_r in range(batch_no): for t in range(sim_duration): is_bunched = simulator.move_one_step(delta_t) if is_bunched: break for stop in range(stop_num): arr_hdws_list = simulator.get_stop_headways(stop) stop_headways[stop] += arr_hdws_list # if sim_r == batch_no-1: # simulator.plot_time_space() simulator.reset(dspt_times) # append to the global variable for stop, hdws in stop_headways.items(): total_hdws_dict[stop] = hdws
def main(): sim_length = 10 # simulation length per episode in seconds (simulation time) delta_t = 200 # time step duration in millisecond (simulation time) sim = Simulator(delta_t) pi = Pi(in_dim, out_dim) optimizer = optim.Adam(pi.parameters(), lr=0.01) try: for epoch in range(1000): states = sim.reset() for i in range(int(sim_length / delta_t * 1000)): actions = pi.act(states) states, reward, done = sim.step(delta_t, actions) # input step in millisecond # sim.render() pi.rewards.append(reward) if done: break loss = train(pi, optimizer) total_reward = sum(pi.rewards) pi.on_policy_reset() print(f'Epoch: {epoch}, loss: {loss}, total reward: {total_reward}') finally: print("Training has been stopped") torch.save(pi.model.state_dict(), './trained/straight_line') sim.plot_loss(loss_list)
def run(): """Run the agent for a finite number of trials.""" # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent) # create agent e.set_primary_agent(a, enforce_deadline=True) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator( e, update_delay=.5, display=True ) # create simulator (uses pygame when display=True, if available) # NOTE: To speed up simulation, reduce update_delay and/or set display=False success_summary = pd.DataFrame(index=['no_success', 'success']) validation_no = 10 for i in range(validation_no): sim.run(n_trials=100) # run for a specified number of trials # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line print "Trial Count: ", a.trial_count success_temp = pd.DataFrame.from_dict(a.trial_summary, orient='index') success_temp.index = ['no_success', 'success'] temp_column_name = 'trial_count_' + str(i + 1) success_summary[temp_column_name] = success_temp a.trial_summary[0] = 0 a.trial_summary[1] = 0 print success_summary success_average = success_summary.mean(axis=1) print "Average: " print success_average print "Percentage: ", success_average[0:][1] / success_average[0:][0] import os filename = 'smartcab/data/basic_agent_trials.csv' filename = os.path.join(filename) success_summary.to_csv(filename) print success_summary
def simulate(a, xi0, theta0, phi0, H, idx): sim = Simulator(a, xi0, dirichlet=True) sim.simulate(theta0, phi0, H) U = sim.conservation() R = sim.reflectivity() print(idx) return [U, R]
def test(param): sim = Simulator(640, 480, rate=int(1 / tau)) env = CartPoleEnv() controller = Controller(param, 'net') #controller = Controller(param,'pid') s = env.reset() g = CartPoleGraphics() sim.add('cartpole', g) data = {'s': s} def step(): s = data['s'] ctrl = controller.compute(s) s, fail, up = env.step(ctrl) data['s'] = s x, _, t, _ = s g.update(x, t) return fail sim.run(step)
def simulate_and_plot(self,x_init,T,plot_funcs): """ Simulate the system controlled via explicit and implicit MPC, then call the passed plotting functions. Parameters ---------- x_init : np.float Initial condition. T : float Simulation duration. plot_funcs : list List of callable plot functions. Should have the call signature plot_func(sim_explicit,sim_implicit) where the two arguments are the output of Simulator:run(). """ self.setup_mpc() # Simulate explicit MPC simulator = Simulator(self.explicit_mpc,T) sim_explicit = simulator.run(x_init,label='explicit') # Simulate implicit MPC simulator = Simulator(self.implicit_mpc,T) sim_implicit = simulator.run(x_init,label='implicit') # Called plotting functions for plot_func in plot_funcs: plot_func(sim_explicit,sim_implicit)
def cost_test_case_1(cost_to_test): objarr = np.transpose([[1]]) obj = SimObject(1, 1, objarr) sim = Simulator(10, 10, obj) b_s = np.array([[ 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571 ], [ 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571 ], [ 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571 ], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [ 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571 ], [ 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571 ], [ 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571 ], [ 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571, 0.01428571 ]]) actions = [(4, 0)] observations = [(-1, [False, False, False])] actual_cost = cost_to_test(sim, b_s, actions, observations, desired_loc=(0, 0)) exp_cost = heuristic(sim, b_s) + len(actions) assert actual_cost == exp_cost
def main(unused_argv): opt = Options() sim = Simulator(opt.map_ind, opt.cub_siz, opt.pob_siz, opt.act_num) trans = TransitionTable(opt.state_siz, opt.act_num, opt.hist_len, opt.minibatch_size, opt.valid_size, opt.states_fil, opt.labels_fil) # 1. train ###################################### # TODO implement your training here! # you can get the full data from the transition table like this: # # # both train_data and valid_data contain tupes of images and labels train_data = trans.get_train() valid_data = trans.get_valid() samples_train_data = np.float32(train_data[0]) labels_train_data = np.float32(train_data[1]) unhotted_labels_train_data = unhot(labels_train_data) samples_valid_data = np.float32(valid_data[0]) labels_valid_data = np.float32(valid_data[1]) unhotted_labels_valid_data = unhot(labels_valid_data) print("Shape of samples_train_data {}".format(samples_train_data.shape)) print("Shape of labels_train_data {}".format(labels_train_data.shape)) print("Shape of unhotted_labels_train_data {}".format(unhotted_labels_train_data.shape)) classifier = cnn.get_estimator() # Train the model train_input_fn = tf.estimator.inputs.numpy_input_fn( x={"x": samples_train_data}, y=unhotted_labels_train_data, batch_size=100, num_epochs=None, shuffle=True) classifier.train( input_fn=train_input_fn, steps=1000 ) eval_input_fn = tf.estimator.inputs.numpy_input_fn( x={"x": samples_valid_data}, y=unhotted_labels_valid_data, num_epochs=1, shuffle=False ) eval_results = classifier.evaluate(input_fn=eval_input_fn) print(eval_results)
def run(): """Run the agent for a finite number of trials.""" learning_rate = 0.42 discount_rate = 0.15 initial_q_hat = 4 # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent, learning_rate, discount_rate, initial_q_hat) # create agent e.set_primary_agent(a, enforce_deadline=True) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator( e, update_delay=0, display=False ) # create simulator (uses pygame when display=True, if available) # NOTE: To speed up simulation, reduce update_delay and/or set display=False sim.run(n_trials=100) # run for a specified number of trials print "Failed trials: " print a.get_failed_trials()
def run(): """Run the agent for a finite number of trials.""" # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent) # create agent # QUESTION 1- sets `enforce_deadline` to `False` e.set_primary_agent(a, enforce_deadline=True) # specify agent to track # NOTE: You can set enforce_deadline=False # while debugging to allow longer trials # Now simulate it # # create simulator (uses pygame when display=True, if available) sim = Simulator(e, update_delay=0.01, display=False) # NOTE: To speed up simulation, # reduce update_delay and/or set display=False # run for a specified number of trials sim.run(n_trials=100) print "Finished!"
def Simulate(self): """ :return: """ undefeated = {'ANY': []} for i in range(self.experiments): simulator = Simulator(self.season, self.standings, self.simulations) simulator.Simulate() count_undefeated = collections.Counter(simulator.undefeated) for team in count_undefeated: if team in undefeated: undefeated[team].append(count_undefeated[team]) else: undefeated[team] = [count_undefeated[team]] undefeated['ANY'].append( collections.Counter(simulator.nUndefeated)[1]) self.undefeated = { team: sorted(undefeated[team]) for team in undefeated }
def _start_worker(G, policy_class, policy_locals, task): G.scope = "worker_{}".format(G.worker_id) with tf.variable_scope(G.scope): policy = policy_class.copy(policy_locals) simulator = Simulator(task) G.ops = get_param_assign_ops(policy.get_params()) G.sampler = Sampler(simulator, policy) G.sess = tf.Session() G.sess.__enter__() G.sess.run(tf.global_variables_initializer())
def simulator_path(pytestconfig) -> Path: path_str = pytestconfig.getoption('--simulator') if path_str: return Path(path_str) # use default from dependencies directory path = Simulator.default_simulator_path() if path: return path raise pytest.UsageError('failed to find default simulator' '(try to run bootstrap.py) and none was' 'specified using the --simulator')
def run(): """Run the agent for a finite number of trials.""" import time success_rates = [] last20_redlight_violations = [] last20_planner_noncompliance = [] for count in range(10): # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent) # create agent e.set_primary_agent(a, enforce_deadline=True) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator( e, update_delay=0.0000005, display=False ) # create simulator (uses pygame when display=True, if available) # NOTE: To speed up simulation, reduce update_delay and/or set display=False sim.run(n_trials=100) # run for a specified number of trials # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line #plot_agent_performance(a.alpha, a.gamma,a.success_rate, a.red_light_violations, a.planner_noncompliance, count) sum_last20_redlight_violations = sum(a.red_light_violations[-20:]) sum_last20_planner_noncompliance = sum(a.planner_noncompliance[-20:]) success_rates.append(a.success_rate) last20_redlight_violations.append(sum_last20_redlight_violations) last20_planner_noncompliance.append(sum_last20_planner_noncompliance) mean_success = sum(success_rates) / float(len(success_rates)) mean_last20redlight = sum(last20_redlight_violations) / float( len(last20_redlight_violations)) mean_last20planner = sum(last20_planner_noncompliance) / float( len(last20_planner_noncompliance)) print 'Mean success rate: ', mean_success print 'Mean last 20 red light violationse: ', mean_last20redlight print 'Mean last 20 planner_noncompliance: ', mean_last20planner