def run(): """Run the agent for a finite number of trials.""" # Code for testing multiple alphas """ i = 0 alphas = [0.5, 0.6, 0.7, 0.8, 0.9] for alpha in alphas: print "********************Run " + str(i) + " Alpha is " + str(alpha) +"********************" # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent, alpha) # create agent e.set_primary_agent(a, enforce_deadline=True) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator(e, update_delay=0.0001, display=False) # create simulator (uses pygame when display=True, if available) # NOTE: To speed up simulation, reduce update_delay and/or set display=False sim.run(n_trials=100) # run for a specified number of trials # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line i += 1 """ # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent) # create agent e.set_primary_agent(a, enforce_deadline=True) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator(e, update_delay=0.5, display=True) # create simulator (uses pygame when display=True, if available) # NOTE: To speed up simulation, reduce update_delay and/or set display=False sim.run(n_trials=100) # run for a specified number of trials
def test_simple_model_checking2(self): path = "examples/rpython_performance/SigmaLoop.mch" if os.name=='nt': path="examples/rpython_performance\SigmaLoop" ast_string = file_to_AST_str(path) root = str_ast_to_python_ast(ast_string) # Test env = Environment() env._max_int = 2**31 mch = parse_ast(root, env) type_check_bmch(root, env, mch) # also checks all included, seen, used and extend solution_file_read = False bstates = set_up_constants(root, env, mch, solution_file_read) assert len(bstates)==0 # no setup possible bstates = exec_initialisation(root, env, mch, solution_file_read) assert len(bstates)==1 # only one possibility (sum:=45) assert len(env.state_space.seen_states)==0 assert isinstance(bstates[0], BState) env.state_space.set_current_state(bstates[0]) assert len(env.state_space.seen_states)==1 invatiant = root.children[2] assert isinstance(invatiant, AInvariantMachineClause) assert interpret(invatiant, env) assert len(env.state_space.stack)==2 next_states = calc_next_states(env, mch) assert len(next_states)==1 assert len(env.state_space.stack)==2 # init and empty setup assert env.get_value('sum')==55 env.state_space.set_current_state(next_states[0].bstate) assert env.get_value('sum')==55
def create_output_directory(base_path): ''' Description: Creates either a symbolic link to the online cache or a local directory. Note: With the local method, a symbolic link is created so that we can just tar.gz the product and place the checksum directly on the product cache. With the remote method, we just create a directory to hold the tar.gz and checksum before using ftp/scp to transfer the product over the network. Returns: string: The fullpath to the "output" link or directory. Parameters: base_path - The location where to create the "output" link or directory under. ''' e = Environment() distribution_method = e.get_distribution_method() if distribution_method == 'local': distribution_directory = e.get_distribution_directory() return create_linked_output_directory(base_path, distribution_directory) else: return create_local_output_directory(base_path)
def test_types_lambda2(self): # Build AST string_to_file("#PREDICATE f=" + "%" + "(x,y).(x=0 & y=10|TRUE)", file_name) ast_string = file_to_AST_str(file_name) root = str_ast_to_python_ast(ast_string) # Type env = Environment() type_with_known_types(root, env, [], ["f", "x", "y"]) assert isinstance(get_type_by_name(env, "f"), PowerSetType) assert isinstance(get_type_by_name(env, "f").data, CartType) assert isinstance(get_type_by_name(env, "f").data.left, PowerSetType) assert isinstance(get_type_by_name(env, "f").data.right, PowerSetType) dom_type = get_type_by_name(env, "f").data.left img_type = get_type_by_name(env, "f").data.right # only present if lambda is ass. to var assert isinstance(img_type.data, BoolType) assert isinstance(dom_type.data, CartType) assert isinstance(dom_type.data.left, PowerSetType) assert isinstance(dom_type.data.right, PowerSetType) assert isinstance(dom_type.data.left.data, IntegerType) assert isinstance(dom_type.data.right.data, IntegerType) lambda_node = root.children[0].children[1] assert isinstance(lambda_node, ALambdaExpression) image_type = env.get_lambda_type_by_node(lambda_node) # this function always returns a type assert isinstance(image_type, BoolType)
def __init__(self, background, player, enemy): Environment.__init__(self, background) self.player = player self.enemy = enemy if isinstance(player, BattleGroup): self.players = sprite.Group(player.groupmembers) else: self.players = sprite.Group(player) if isinstance(enemy, BattleGroup): self.enemies = sprite.Group(enemy.groupmembers) else: self.enemies = sprite.Group(enemy) self.sprites = sprite.RenderUpdates(self.players, self.enemies) self.combatants = sprite.Group(self.players, self.enemies) self.alignCombatants(self.players, 608 - 16, Direction.LEFT) self.alignCombatants(self.enemies, 32 + 16, Direction.RIGHT) # TODO: Battlefield status needs to be updated self.statusBar = BattlefieldStatus(self.players.sprites()[0], self.enemies.sprites()[0]) self.frameCount = settings.fps / 2 self.battleQueue = [(c.speed, c) for c in self.combatants] self.battleQueue.sort(key=itemgetter(0))
def add_abstract_environment(self, environment_name, environment_description): url = "%s/%s" % (self.paasmanager_url, "catalog/org/FIWARE/environment") env = Environment(environment_name, environment_description) payload = tostring(env.to_env_xml()) self.__add_environment(url, payload)
def run(): """Run the agent for a finite number of trials.""" import numpy as np # intuition values 1st selected # alpha = 0.5 # gamma = 0.7 # epsilon = 0.05 # optimal values found alpha = 0.5 gamma = 0.2 epsilon = 0.07 # some "bad" values just to test how good our optimal is # alpha = 0.8 # gamma = 0.6 # epsilon = 0.2 # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent, alpha, gamma, epsilon) # create agent e.set_primary_agent(a, enforce_deadline=True) # set agent to track # Now simulate it sim = Simulator(e, update_delay=0.0001) # reduce update_delay to speed up simulation sim.run(n_trials=100) # press Esc or close pygame window to quit mean = np.mean(a.trial_array[0][75:100]) print "Average Steps: " + str(mean)
def execute(self, agent, x, y, env): newEnv = Environment() prereqs = [("holding", agent, x)] deletions = prereqs additions = [("empty", agent), ("on", x, y), ("clear", x)] if not y == "ground": prereqs.append(("clear", y)) for prereq in prereqs: if not prereq in env.relations: return None newRelations = env.relations[:] for deletion in deletions: newRelations.remove(deletion) for addition in additions: newRelations.append(addition) newEnv.relations = newRelations; newEnv.steps = env.steps[:] newEnv.steps.append(("putDown", agent, x, y)) return newEnv
class Game(object): def __init__(self, max_population_size): pygame.display.set_caption('Generations') width, height = (800, 800) self.screen = pygame.display.set_mode((width, height)) self.env = Environment(width, height, max_population_size) self.add_creatures() def add_creatures(self): for x in range(self.env.max_population_size): self.env.add_creature(x=randint(0,800), y=randint(0,800)) def handle_event(self, event): running = True if event.type == pygame.QUIT: running = False return running def run_game(self): running = True while running: for event in pygame.event.get(): running = self.handle_event(event) self.screen.fill(self.env.color) self.env.enforce_selection_pressure() time.sleep(0.002) for creature in self.env.creatures: pygame.draw.circle(self.screen, (255,255,255), (int(creature.x), int(creature.y)), creature.size + 1, creature.thickness) pygame.draw.circle(self.screen, creature.color, (int(creature.x), int(creature.y)), creature.size, creature.thickness) pygame.display.flip()
def run(): """Run the agent for a finite number of trials.""" # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent) # create agent e.set_primary_agent(a, enforce_deadline = True) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator(e, update_delay=0.0005, display = False) # create simulator (uses pygame when display=True, if available) # NOTE: To speed up simulation, reduce update_delay and/or set display=False sim.run(n_trials=100) # run for a specified number of trials # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line print ("................................DATA SUMMARY................................") print ("alpha: ", a.alpha) print ("gamma: ", a.gamma) print ("epsilon: ", a.epsilon) print ("total actions: ", a.total_actions) print ("total rewards: ", a.total_rewards) print ("number of negative reward in each trial ", a.last_negative_reward_count_list.values()) print ("number of actions in each trial ", a.last_actions_list.values()) print ("total rewards in each trial ", a.last_rewards_list.values())
def distribute_statistics(immutability, source_path, packaging_path, parms): ''' Description: Determines if the distribution method is set to local or remote and calls the correct distribution method. Returns: product_file - The full path to the product either on the local system or the remote destination. cksum_value - The check sum value of the product. Parameters: source_path - The full path to of directory containing the data to package and distribute. package_dir - The full path on the local system for where the packaged product should be placed under. parms - All the user and system defined parameters. ''' env = Environment() distribution_method = env.get_distribution_method() product_id = parms['product_id'] order_id = parms['orderid'] # The file paths to the distributed product and checksum files product_file = 'ERROR' cksum_file = 'ERROR' if distribution_method == DISTRIBUTION_METHOD_LOCAL: # Use the local cache path cache_path = os.path.join(settings.ESPA_LOCAL_CACHE_DIRECTORY, order_id) # Adjust the packaging_path to use the cache package_path = os.path.join(packaging_path, cache_path) distribute_statistics_local(immutability, product_id, source_path, package_path) else: # remote env = Environment() # Determine the remote hostname to use destination_host = utilities.get_cache_hostname(env .get_cache_host_list()) # Use the remote cache path cache_path = os.path.join(settings.ESPA_REMOTE_CACHE_DIRECTORY, order_id) options = parms['options'] dest_user = options['destination_username'] dest_pw = options['destination_pw'] distribute_statistics_remote(immutability, product_id, source_path, destination_host, cache_path, dest_user, dest_pw) return (product_file, cksum_file)
def run(msg = ''): """Run the agent for a finite number of trials.""" # set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent) # create agent e.set_primary_agent(a, enforce_deadline=True) # specify agent to track # NOTE: you can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator(e, update_delay=0, display=False) # create simulator (uses pygame when display=True, if available) # NOTE: to speed up simulation, reduce update_delay and/or set display=False sim.run(n_trials=100) # run for a specified number of trials # NOTE: to quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line results = a.results average_cycles = mean([result[0] for result in results]) average_reward = mean([result[1] for result in results]) average_violations = mean([result[2] for result in results]) # print '=' * 10, msg # print 'Average Cycles:', average_cycles # print 'Average Reward:', average_reward # print 'Average Violations:', average_violations return average_cycles, average_reward, average_violations
def main(): """Run the agent for a finite number of trials.""" # Set up environment and agent file = open('Q_v2.pickle', 'r') Q = pickle.load(file) file.close() e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent) # create agent a.Q = Q print "QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ" print len(a.Q) e.set_primary_agent(a, enforce_deadline=False) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator(e, update_delay=1, display=True) # create simulator (uses pygame when display=True, if available) # NOTE: To speed up simulation, reduce update_delay and/or set display=False sim.run(n_trials=1000) # run for a specified number of trials # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line f = open('Q_v2.pickle', 'w') pickle.dump(a.Q, f) f.close()
def test_genAST_para_def(self): # Build AST string =''' MACHINE Test VARIABLES z INVARIANT z:MyType INITIALISATION z:= Expr(2) DEFINITIONS Expr(X) == 1+X; MyType == NAT; END''' string_to_file(string, file_name) ast_string = file_to_AST_str(file_name) root = str_ast_to_python_ast(ast_string) # Test env = Environment() dh = DefinitionHandler(env, str_ast_to_python_ast) dh.repl_defs(root) mch = parse_ast(root, env) type_check_bmch(root, env, mch) # also checks all included, seen, used and extend arbitrary_init_machine(root, env, mch) # init VARIABLES and eval INVARIANT invariant = root.children[2] assert isinstance(invariant, AInvariantMachineClause) assert interpret(invariant, env) assert env.get_value("z")==3
def run(): """Run the agent for a finite number of trials.""" record = [] for q_initial in [0, 2, 10]: for alpha in range(1, 6): # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent, alpha * 0.2, q_initial) # create agent e.set_primary_agent(a, enforce_deadline=True) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator(e, update_delay=0, display=False) # create simulator (uses pygame when display=True, if available) # NOTE: To speed up simulation, reduce update_delay and/or set display=False sim.run(n_trials=100) # run for a specified number of trials # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line a.reset() trip_log = pd.DataFrame(a.trip_log) # trip_log['Used'] = trip_log['Deadline'] - trip_log['Remaining'] trip_log['Efficiency'] = trip_log['Remaining'] / trip_log['Deadline'] * 100 record.append({ 'Success Rate': trip_log[trip_log.Success == True].shape[0], 'Alpha': alpha * 0.2, 'Q Initial': q_initial, 'Efficiency': trip_log['Efficiency'].mean(), 'Ave Reward': trip_log['Reward'].mean(), 'Ave Penalty': trip_log['Penalty'].mean(), }); return pd.DataFrame(record)
def run(): """Run the agent for a finite number of trials.""" # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent) # create agent e.set_primary_agent(a, enforce_deadline=True) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator(e, update_delay=0.00001, display=False) # create simulator (uses pygame when display=True, if available) # NOTE: To speed up simulation, reduce update_delay and/or set display=False sim.run(n_trials=100) # run for a specified number of trials # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line # Print summary # allPenalities = a.numberOfPenaltiesList allFailures = a.numberOfFailuresList numberOfTrials = float(len(allFailures)) numberOfFailures = float(allFailures[-1]) numberOfSuccess = numberOfTrials - numberOfFailures numberOfSuccessFirstHalf = ((numberOfTrials) / 2) - float(allFailures[len(allFailures)/2]) numberOfSuccessSecondHalf = numberOfSuccess - numberOfSuccessFirstHalf print ("=================================================================================") print ("SUMMARY") print ("=================================================================================") print ("Total Penalities received = %3.2f" % (sum(allPenalities))) print ("\tPenalities received in the first half of trials = %3.2f" % (sum(allPenalities[:len(allPenalities)/2]))) print ("\tPenalities received in the second half of trials = %3.2f" % (sum(allPenalities[len(allPenalities)/2:]))) print ("Success Rate: %3.2f%%" % (numberOfSuccess/numberOfTrials*100)) print ("\tSuccess Rate of the first half : %3.2f%%" % (numberOfSuccessFirstHalf/(numberOfTrials/2)*100)) print ("\tSuccess Rate of the second half: %3.2f%%" % (numberOfSuccessSecondHalf/(numberOfTrials/2)*100))
def run(): """Run the agent for a finite number of trials.""" # create common place to set debug values dbg_deadline = True dbg_update_delay = 0.01 dbg_display = False dbg_trials = 100 # create switches to run as random, way_light, way_light_vehicles # random = take random actions only # way_light_only = Traffic Light, Way Point # way_light_Vehicle = Traffic Light, Way Point, Left, Right, Oncoming # way_light_modified (or any other value) = Way Point, Combination Light and Vehicle State dbg_runtype = 'way_light_only' # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent) # create agent # set the run type (random choice, simple state, state with vehicles) a.run_type = dbg_runtype e.set_primary_agent(a, enforce_deadline=dbg_deadline) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator(e, update_delay=dbg_update_delay, display=dbg_display) # create simulator (uses pygame when display=True, if available) # NOTE: To speed up simulation, reduce update_delay and/or set display=False sim.run(n_trials=dbg_trials) # run for a specified number of trials # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line # at the end of the simulation show results # call qlearner reset to get last trial result a.q_learner.reset(a.step_count) a.q_learner.show_results()
def run(): """Run the agent for a finite number of trials.""" # create output file target_dir = os.path.dirname(os.path.realpath(__file__)) target_path = os.path.join(target_dir, 'qlearning_tuning_report.txt') if not os.path.exists(target_dir): os.makedirs(target_dir) # loop the parameters for epsilon in [0.1, 0.5, 0.9]: for alpha in np.arange(0.1, 1, 0.2): for gamma in np.arange(0.1, 1, 0.2): print epsilon, alpha, gamma # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(QAgent, epsilon, alpha, gamma) # create agent e.set_primary_agent(a, enforce_deadline=True) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator(e, update_delay=0.001, display=False) # create simulator (uses pygame when display=True, if available) # NOTE: To speed up simulation, reduce update_delay and/or set display=False sim.run(n_trials=100) # run for a specified number of trials # get the count for the number of successful trials and average running time summary = sim.report() # write out the results try: with open(target_path, 'a') as f: f.write('epsilon {}, alpha {}, gamma {} : success {}, avg_time {}, total_reward {}\n'.format(epsilon, alpha, gamma, summary[0], summary[1], round(a.total_reward, 3))) f.close() except: raise
def main(): pg.init() pg.font.init() #TEMP: hardcoded keymaps. inputs = [ #InputManager({ # 'x_axis': (pg.K_j, pg.K_l), # 'y_axis': (pg.K_k, pg.K_i), # 'brake': pg.K_SPACE, #}), InputManager({ 'thrust': pg.K_UP, 'brake': pg.K_DOWN, 'turn_direction': (pg.K_LEFT, pg.K_RIGHT), }), InputManager({ 'thrust': pg.K_PERIOD, 'brake': pg.K_e, 'turn_direction': (pg.K_o, pg.K_u), }), ] env = Environment(inputs) disp = Display(env, SCREENSIZE) env.load_level(levels.inelastic_collision_bug) # TEMP, hardcoded level selection. main_loop(env, disp) pg.quit()
def test_insert(self): e = Environment() env = e.env_list var = Lexeme(token_type="VARIABLE", value="x") val = Lexeme(token_type="NUMBER", value=5) e.insert(var, val, env) self.assertEqual(e.lookup(var, env), 5)
class Game(object): def __init__(self): pygame.display.set_caption('Ants') width, height = (600, 600) self.screen = pygame.display.set_mode((width, height)) self.env = Environment(width, height) def add_ants(self, number_of_ants): for x in range(number_of_ants): self.env.add_ant(x=50, y=50, speed=3, angle=randint(0, 360), awareness_radius=300) def handle_event(self, event): running = True if event.type == pygame.QUIT: running = False elif (event.type == pygame.MOUSEBUTTONDOWN) or (event.type == pygame.KEYDOWN): if (event.type == pygame.MOUSEBUTTONDOWN): print("MOUSE") print event return running def run_game(self): running = True while running: for event in pygame.event.get(): running = self.handle_event(event) self.env.update() self.screen.fill(self.env.color) for p in self.env.ants: pygame.draw.circle(self.screen, p.color, (int(p.x), int(p.y)), p.size, p.thickness) pygame.display.flip()
def create_new_book(self): capture_style = ca.dialog(message='Please Select a capture style:', Buttons=[('Single Camera', 1), ('Dual Cameras', 2)]) if capture_style == 1: capture_style = 'Single' else: capture_style = 'Dual' identifier = ca.dialog(message='Please name this project:', Buttons=[(Gtk.STOCK_OK, Gtk.ResponseType.OK), (Gtk.STOCK_CANCEL, Gtk.ResponseType.NO)], get_input=True) if not identifier: return False location = ca.get_user_selection(title='Please choose a location for this project:') if not location: return False while os.path.exists(location + '/' + identifier): identifier = ca.dialog(message='Sorry, a project by that name already exists. '+ 'Please try another name:', get_input=True) if not identifier: return False location = ca.get_user_selection(title='Please choose a location for this project:') if not location: return False Environment.create_new_book_stub(location, identifier) return Environment.get_books(location + '/' + identifier, None, stage='new_capture', capture_style=capture_style)
def test_genAST_subst_def2(self): string=''' MACHINE Test VARIABLES z, b, x INVARIANT x:NAT & z:NAT & b:BOOL INITIALISATION x:=2 ; Assign(x+1, z) ; Assign(TRUE, b) DEFINITIONS Assign(Expr, VarName) == VarName := Expr; END''' string_to_file(string, file_name) ast_string = file_to_AST_str(file_name) root = str_ast_to_python_ast(ast_string) #Test env = Environment() dh = DefinitionHandler(env, str_ast_to_python_ast) dh.repl_defs(root) mch = parse_ast(root, env) type_check_bmch(root, env, mch) # also checks all included, seen, used and extend arbitrary_init_machine(root, env, mch)# init VARIABLES and eval INVARIANT invariant = root.children[2] assert isinstance(invariant, AInvariantMachineClause) assert interpret(invariant, env) assert env.get_value("z")==3 assert env.get_value("b")==True assert env.get_value("x")==2
def test_genAST_sub_var(self): # Build AST string = ''' MACHINE Test VARIABLES xx INVARIANT xx:NAT INITIALISATION BEGIN xx:=1; VAR varLoc1, varLoc2 IN varLoc1 := xx + 1 ; varLoc2 := 2 * varLoc1 ; xx := varLoc2 END END END''' string_to_file(string, file_name) ast_string = file_to_AST_str(file_name) root = str_ast_to_python_ast(ast_string) # Test env = Environment() env._min_int = -1 env._max_int = 5 mch = parse_ast(root, env) type_check_bmch(root, env, mch) # also checks all included, seen, used and extend arbitrary_init_machine(root, env, mch) # init VARIABLES and eval INVARIANT assert isinstance(root.children[2], AInvariantMachineClause) assert interpret(root.children[2], env) assert env.get_value("xx")==4
def run(): """Run the agent for a finite number of trials.""" # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent) # create agent e.set_primary_agent(a, enforce_deadline=True) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator(e, update_delay=0.0001, display=False) # create simulator (uses pygame when display=True, if available) # NOTE: To speed up simulation, reduce update_delay and/or set display=False sim.run(n_trials=100) # run for a specified number of trials # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line ## print Q table print '+++++++++++++++++++++++++++++++++++++++++++++++' print 'final Q table' print '+++++++++++++++++++++++++++++++++++++++++++++++' for key in a.Q: print key, print ["%0.2f" % i for i in a.Q[key]] print '====================================================================' print 'An Array of Arrays where each subarray shows neg rewards for a trial' print '====================================================================' #print neg rewards and split term x=a.reward_holder.split('3') y=[i.split(' ') for i in x] print y #shows an array of arrays, could calculate total neg reward for each
def run(): """Run the agent for a finite number of trials.""" # Set up environment and agent gammas = [x / 10.0 for x in xrange(0, 10)] gamma_to_success_rate = OrderedDict() gamma_to_average_reward = OrderedDict() # Run a simulation for each sample gamma value to test which # choice of gamma results in the most successful agent for gamma in gammas: # Run 10 trials over each choice of gamma to get average performance metrics for trial in xrange(10): e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent, (gamma)) # create agent e.set_primary_agent(a, enforce_deadline=True) # set agent to track # Now simulate it sim = Simulator(e, update_delay=0.0) # reduce update_delay to speed up simulation sim.run(n_trials=50) # press Esc or close pygame window to quit gamma_to_success_rate[a.GAMMA] = gamma_to_success_rate.get(a.GAMMA, 0) + sim.env.successful_trials gamma_to_average_reward[a.GAMMA] = ( gamma_to_average_reward.get(a.GAMMA, 0) + a.get_average_reward_per_action() ) # Get the average of the 10 trials for gamma in gamma_to_average_reward.keys(): gamma_to_average_reward[gamma] = gamma_to_average_reward[gamma] / 10 gamma_to_success_rate[gamma] = gamma_to_success_rate[gamma] / 10 print gamma_to_average_reward print gamma_to_success_rate
def run(): """Run the agent for a finite number of trials.""" options = parseOptions() env = Environment() # create environment (also adds some dummy traffic) sim = Simulator(env, update_delay=0, display=options.display) # create simulator (uses pygame when display=True, if available) results = {} from settings import params for agent, symbol in [(options.player1, 1), (options.player2, -1)]: kwargs = params[agent] env.add_agent( symbol=symbol, file=options.file, clear=options.clear, save=options.save, **kwargs) sim.run(n_trials=options.iterations) # run for a specified number of trials for agent in env.agents: results["X" if agent.symbol == 1 else 'O'] = agent.wins print results dispatcher.send(signal='main.complete', sender={})
def test_genAST_sub_let2(self): string = ''' MACHINE Test VARIABLES X, Y INVARIANT X:NAT & Y:NAT INITIALISATION BEGIN X:=10; LET r1, X BE X = 6 & r1 = X / 2 IN Y := r1 END END END''' string_to_file(string, file_name) ast_string = file_to_AST_str(file_name) root = str_ast_to_python_ast(ast_string) # Test env = Environment() env._min_int = -1 env._max_int = 5 mch = parse_ast(root, env) type_check_bmch(root, env, mch) # also checks all included, seen, used and extend arbitrary_init_machine(root, env, mch) # init VARIABLES and eval INVARIANT assert isinstance(root.children[2], AInvariantMachineClause) assert interpret(root.children[2], env) assert env.get_value("X")==10 assert env.get_value("Y")==3
def test_genAST_sub_any(self): string = ''' MACHINE Test VARIABLES xx INVARIANT xx:NAT INITIALISATION BEGIN xx:=1; ANY r1, r2 WHERE r1 : NAT & r2 : NAT & r1*r1 + r2*r2 = 25 THEN xx := r1 + r2 END END END''' string_to_file(string, file_name) ast_string = file_to_AST_str(file_name) root = str_ast_to_python_ast(ast_string) # Test env = Environment() env._min_int = -1 env._max_int = 5 mch = parse_ast(root, env) type_check_bmch(root, env, mch) # also checks all included, seen, used and extend arbitrary_init_machine(root, env, mch) # init VARIABLES and eval INVARIANT assert isinstance(root.children[2], AInvariantMachineClause) assert interpret(root.children[2], env) assert env.get_value("xx")==5 or env.get_value("xx")==7 # 3+4 or 5+0
def run(): f = open('running_report.txt', 'w') # setup various parameter combinations discount_factors = [0.5] starting_learning_rates = [0.5] epsilon_greedy_policy = [0.09] for d_factor in discount_factors: for alpha in starting_learning_rates: for greedy_policy in epsilon_greedy_policy: """Run the agent for a finite number of trials.""" # Set up environment and agent e = Environment() # create environment (also adds some dummy traffic) a = e.create_agent(LearningAgent, learning_rate=alpha, discount_factor=d_factor, greedy_policy=greedy_policy) # create agent e.set_primary_agent(a, enforce_deadline=True) # specify agent to track # NOTE: You can set enforce_deadline=False while debugging to allow longer trials # Now simulate it sim = Simulator(e, update_delay=0, display=True) # create simulator (uses pygame when display=True, if available) number_of_trials = 100 # NOTE: To speed up simulation, reduce update_delay and/or set display=False sim.run(n_trials=number_of_trials) # run for a specified number of trials #NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line print >> f, "Learning rate:", alpha print >> f, "Discount factor:", d_factor print >> f, "Greedy Policy:", greedy_policy print >> f, "Percentage completed: ", a.completed_trials / 100.0, "\n" f.flush() f.close()
def visit_block_stmt(self, stmt: st.Block) -> None: self.execute_block(stmt.statements, Environment(self._environment))
type=int, help="Random seed for repeatable experiments.") comarg.add_argument("--log_level", choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], default="INFO", help="Log level.") args = parser.parse_args() logger = logging.getLogger() logger.setLevel(args.log_level) if args.random_seed: random.seed(args.random_seed) # instantiate classes env = Environment(args) mem = ReplayMemory(args) net = DeepQNetwork(env.numActions(), args) agent = Agent(env, mem, net, args) stats = Statistics(agent, net, mem, env, args) if args.load_weights: logger.info("Loading weights from %s" % args.load_weights) net.load_weights(args.load_weights) if args.play_games: logger.info("Playing for %d game(s)" % args.play_games) stats.reset() agent.play(args.play_games) stats.write(0, "play") sys.exit()
class Interpreter(ExprVisitor, StmtVisitor): def __init__(self): self._GLOBALS = Environment() self._environment = self._GLOBALS self._locals = {} self._GLOBALS.define('clock', natives.Clock()) self._GLOBALS.define('print', natives.Print()) self._GLOBALS.define('println', natives.PrintLn()) def visit_assign_expr(self, expr: ex.Assign) -> LoxValue: value = self._evaluate(expr.value) distance = self._locals.get(expr) if distance is None: self._GLOBALS.assign(expr.name, value) else: self._environment.assign_at(distance, expr.name, value) return value def visit_binary_expr(self, expr: ex.Binary) -> LoxValue: left = self._evaluate(expr.left) right = self._evaluate(expr.right) if expr.operator.type is TokenType.BANG_EQUAL: return not self._is_equal(left, right) if expr.operator.type is TokenType.EQUAL_EQUAL: return self._is_equal(left, right) if expr.operator.type is TokenType.GREATER: self._check_number_operands(expr.operator, left, right) return left > right if expr.operator.type is TokenType.GREATER_EQUAL: self._check_number_operands(expr.operator, left, right) return left >= right if expr.operator.type is TokenType.LESS: self._check_number_operands(expr.operator, left, right) return left < right if expr.operator.type is TokenType.LESS_EQUAL: self._check_number_operands(expr.operator, left, right) return left <= right if expr.operator.type is TokenType.MINUS: self._check_number_operands(expr.operator, left, right) return left - right if expr.operator.type is TokenType.SLASH: self._check_number_operands(expr.operator, left, right) self._check_denominator(expr.operator, right) return left / right if expr.operator.type is TokenType.STAR: self._check_number_operands(expr.operator, left, right) return left * right if expr.operator.type is TokenType.PLUS: if isinstance(left, float) and isinstance(right, float): return left + right if isinstance(left, str) or isinstance(right, str): return self.stringify(left) + self.stringify(right) raise RuntimeException(expr.operator, 'Incompatible operands.') if expr.operator.type is TokenType.COMMA: self._evaluate(expr.left) return self._evaluate(expr.right) # unreachable raise RuntimeError('Unreachable code.') def visit_call_expr(self, expr: ex.Call) -> LoxValue: callee = self._evaluate(expr.callee) arguments = [] for argument in expr.arguments: arguments.append(self._evaluate(argument)) if not isinstance(callee, LoxCallable): raise RuntimeException(expr.paren, 'Can only call functions and classes.') function = callee if len(arguments) != function.arity: raise RuntimeException( expr.paren, f'Expected {function.arity} arguments, but got {len(arguments)}.' ) return callee.call(self, arguments) def visit_get_expr(self, expr: Get) -> LoxValue: obj = self._evaluate(expr.object) if isinstance(obj, LoxInstance): return obj.get(expr.name) raise RuntimeException(expr.name, 'Only instances have properties') def visit_grouping_expr(self, expr: ex.Grouping) -> LoxValue: return self._evaluate(expr.expression) def visit_literal_expr(self, expr: ex.Literal) -> LoxValue: return expr.value def visit_logical_expr(self, expr: ex.Logical) -> LoxValue: left = self._evaluate(expr.left) if expr.operator.type is TokenType.OR and self._is_truthy(left): return left if expr.operator.type is TokenType.AND and not self._is_truthy(left): return left return self._evaluate(expr.right) def visit_set_expr(self, expr: Set) -> LoxValue: obj = self._evaluate(expr.object) if not isinstance(obj, LoxInstance): raise RuntimeException(expr.name, 'Only instances have fields.') value = self._evaluate(expr.value) obj.set(expr.name, value) return value def visit_super_expr(self, expr: Super) -> Any: distance = self._locals[expr] superclass = self._environment.get_at(distance, 'super') obj = self._environment.get_at(distance - 1, 'this') if not isinstance(superclass, LoxClass): # unreachable raise RuntimeError('Unreachable code.') method = superclass.find_method(expr.method.lexeme) if method is None: raise RuntimeException( expr.method, f"Undefined property '{expr.method.lexeme}'.") return method.bind(obj) def visit_ternary_expr(self, expr: ex.Ternary) -> LoxValue: if expr.operator1.type is TokenType.QUESTION and expr.operator2.type is TokenType.COLON: pred = self._evaluate(expr.left) if self._is_truthy(pred): return self._evaluate(expr.center) else: return self._evaluate(expr.right) # unreachable raise RuntimeError('Unreachable code.') def visit_this_expr(self, expr: This) -> Any: return self._lookup_variable(expr.keyword, expr) def visit_unary_expr(self, expr: ex.Unary) -> LoxValue: right = self._evaluate(expr.right) if expr.op.type is TokenType.MINUS: self._check_number_operands(expr.op, right) return -right if expr.op.type is TokenType.BANG: return not self._is_truthy(right) # unreachable raise RuntimeError('Unreachable code.') def visit_variable_expr(self, expr: ex.Variable) -> LoxValue: return self._lookup_variable(expr.name, expr) def visit_block_stmt(self, stmt: st.Block) -> None: self.execute_block(stmt.statements, Environment(self._environment)) def visit_break_stmt(self, stmt: st.Break) -> None: raise Break() def visit_class_stmt(self, stmt: Class) -> None: superclass = None if stmt.superclass is not None: superclass = self._evaluate(stmt.superclass) if not isinstance(superclass, LoxClass): raise RuntimeException(stmt.superclass.name, 'Superclass must be a class.') self._environment.define(stmt.name.lexeme, None) if stmt.superclass is not None: self._environment = Environment(self._environment) self._environment.define('super', superclass) methods = {} for method in stmt.methods: function = LoxFunction(method, self._environment, method.name.lexeme == 'init') methods[method.name.lexeme] = function klass = LoxClass(stmt.name.lexeme, superclass, methods) if superclass is not None: self._environment = self._environment.enclosing self._environment.assign(stmt.name, klass) def visit_continue_stmt(self, stmt: st.Continue) -> None: raise Continue() def visit_expression_stmt(self, stmt: st.Expression) -> None: self._evaluate(stmt.expression) def visit_function_stmt(self, stmt: st.Function) -> None: function = LoxFunction(stmt, self._environment, False) self._environment.define(stmt.name.lexeme, function) def visit_if_stmt(self, stmt: st.If) -> None: if self._is_truthy(self._evaluate(stmt.condition)): self._execute(stmt.then_branch) elif stmt.else_branch: self._execute(stmt.else_branch) def visit_return_stmt(self, stmt: st.Return) -> None: raise Return( None if stmt.value is None else self._evaluate(stmt.value)) def visit_var_stmt(self, stmt: st.Var) -> None: value = None if stmt.initializer is not None: value = self._evaluate(stmt.initializer) self._environment.initialize(stmt.name, value) def visit_while_stmt(self, stmt: st.While) -> None: while self._is_truthy(self._evaluate(stmt.condition)): try: self._execute(stmt.body) except Break: break except Continue: continue def interpret(self, stmts: List[st.Stmt]) -> None: try: for stmt in stmts: self._execute(stmt) except RuntimeException as e: lox.Lox.error_runtime(e) def execute_block(self, stmts: List[st.Stmt], env: Environment) -> None: prev = self._environment try: self._environment = env for stmt in stmts: self._execute(stmt) finally: self._environment = prev def resolve(self, expr: ex.Expr, depth: int) -> None: self._locals[expr] = depth @staticmethod def stringify(value: LoxValue) -> str: if value is None: return 'nil' if isinstance(value, float): text = str(value) if text.endswith('.0'): text = text[:-2] return text if isinstance(value, bool): return str(value).lower() return str(value) def _execute(self, stmt: st.Stmt) -> None: stmt.accept(self) def _evaluate(self, expr: ex.Expr) -> LoxValue: return expr.accept(self) def _lookup_variable(self, name: Token, expr: ex.Expr): distance = self._locals.get(expr) if distance is None: return self._GLOBALS.get(name) else: return self._environment.get_at(distance, name.lexeme) @staticmethod def _is_truthy(value: LoxValue) -> bool: if value is None: return False if isinstance(value, bool): return value return True @staticmethod def _is_equal(a: LoxValue, b: LoxValue) -> bool: return a == b @staticmethod def _check_number_operands(operator: Token, *args: LoxValue) -> None: for operand in args: if not isinstance(operand, float): raise RuntimeException(operator, 'Operands must be numbers.') @staticmethod def _check_denominator(operator: Token, value: LoxValue) -> None: if value == 0: raise RuntimeException(operator, 'Division by zero.')
self.layers = nn.Sequential(nn.Linear(in_dim, 128), nn.ReLU(), nn.Linear(128, 128), nn.ReLU(), nn.Linear(128, out_dim)) def forward(self, x): return self.layers(x) min_epsilon = 0.05 max_epsilon = 1 epsilon_decay = 80 epsilon_episode = lambda episode: min_epsilon + np.exp(-episode / epsilon_decay ) * 0.95 env = Environment("test_you") state_space = 3 action_space = 3 batch_size = 32 max_size = 1000 memory = ReplayBuffer(state_space, max_size, batch_size) device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu") network = Network(state_space, action_space).to(device) target_network = Network(state_space, action_space).to(device) target_network.load_state_dict(network.state_dict()) target_network.eval() optimizer = optim.Adam(network.parameters())
def train_single_net(args): start = time.time() print('Current time is: %s' % get_time()) print('Starting at train_multi_nets...') gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_fraction) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: # Initial environment, replay memory, deep_q_net and agent #ipdb.set_trace() env = Environment(args) mem = ReplayMemory(args) net = FRLDQN(sess, args) agent = Agent(env, mem, net, args) best_result = {'valid': {'success_rate': {1: 0., 3: 0., 5: 0., 10: 0., -1 : 0.}, 'avg_reward': 0., 'log_epoch': -1, 'step_diff': -1}, 'test': {'success_rate': {1: 0., 3: 0., 5: 0., 10: 0., -1 : 0.}, 'avg_reward': 0., 'log_epoch': -1, 'step_diff': -1} } # loop over epochs with open(args.result_dir, 'w') as outfile: print('\n Arguments:') outfile.write('\n Arguments:\n') for k, v in sorted(args.__dict__.items(), key=lambda x:x[0]): print('{}: {}'.format(k, v)) outfile.write('{}: {}\n'.format(k, v)) print('\n') outfile.write('\n') if args.load_weights: filename = 'weights/%s_%s.h5' % (args.train_mode, args.predict_net) net.load_weights(filename) try: for epoch in range(args.start_epoch, args.start_epoch + args.epochs): agent.train(epoch, args.train_episodes, outfile, args.predict_net) rate, reward, diff = agent.test(epoch, args.test_episodes, outfile, args.predict_net, 'valid') if rate[args.success_base] > best_result['valid']['success_rate'][args.success_base]: update_best(best_result, 'valid', epoch, rate, reward, diff) print('best_epoch: {}\t best_success: {}\t avg_reward: {}\t step_diff: {}\n'.format(epoch, rate, reward, diff)) outfile.write('best_epoch: {}\t best_success: {}\t avg_reward: {}\t step_diff: {}\n\n'.format(epoch, rate, reward, diff)) rate, reward, diff = agent.test(epoch, args.test_episodes, outfile, args.predict_net, 'test') update_best(best_result, 'test', epoch, rate, reward, diff) print('\n Test results:\n success_rate: {}\t avg_reward: {}\t step_diff: {}\n'.format(rate, reward, diff)) outfile.write('\n Test results:\n success_rate: {}\t avg_reward: {}\t step_diff: {}\n'.format(rate, reward, diff)) if args.save_weights: filename = 'weights/%s_%s.h5' % (args.train_mode, args.predict_net) net.save_weights(filename, args.predict_net) print('Saved weights %s ...\n' % filename) if epoch - best_result['valid']['log_epoch'] >= args.stop_epoch_gap: print('-----Early stopping, no improvement after %d epochs-----\n' % args.stop_epoch_gap) break except KeyboardInterrupt: print('\n Manually kill the program ... \n') print('\n\n Best results:') outfile.write('\n\n Best results:\n') for data_flag, results in best_result.items(): print('\t{}'.format(data_flag)) outfile.write('\t{}\n'.format(data_flag)) for k, v in results.items(): print('\t\t{}: {}'.format(k, v)) outfile.write('\t\t{}: {}\n'.format(k, v)) end = time.time() outfile.write('\nTotal time cost: %ds\n' % (end - start)) print('Current time is: %s' % get_time()) print('Total time cost: %ds\n' % (end - start))
class Sim: def __init__(self, allies, opponents, world_size, n_games, train_batch_size, replay_mem_limit, training_rate=10, update_rate=500, sim_moves_limit=30, exploration_steps=200000, exploration_range=(0.1, 1.0), viz=None, viz_execution=None, train_saving=None): self.allies = allies self.opponents = opponents self.world_size = world_size self.moves_limit = sim_moves_limit self.training_rate = training_rate self.policy_dist_rate = update_rate self.exploration_steps = exploration_steps self.exploration_range = exploration_range self.exploration_step_value = \ (exploration_range[1]-exploration_range[0])/exploration_steps self.experience_replay = ReplayMemory(batch_size=train_batch_size, table_size=replay_mem_limit) self.training_batch_size = train_batch_size self.n_games = n_games self.replay_mem_limit = replay_mem_limit self.environment = Environment(n_rows=world_size[0], n_cols=world_size[1], n_agents=allies, n_opponents=opponents) self.metrics = {"reward": list(), "loss": list()} self.viz = viz self.viz_execution = viz_execution self.train_saving = train_saving def run(self): """ Runs general simulation and takes care of experience table population and agents training :return: """ sim = 0 while sim < self.n_games: sim_moves = 0 # Prune replay memory by 1/5 if over limit size if self.experience_replay.is_full(): self.experience_replay.refresh() # Get agent that is training training_agent = next( filter(lambda ag: ag.training, self.environment.agents)) episode_reward = [] while sim_moves < self.moves_limit and not self.environment.is_over( ): # Apply step in Environment curr_state, next_state, reward = self.environment.step( terminal_state=sim_moves == self.moves_limit - 1) # Get agent chosen action action = training_agent.get_chosen_action() episode_reward.append(reward) # Store transition in replay table self.experience_replay.insert({ "state": curr_state, "action": action, "next_state": next_state, "reward": reward["reward_value"], }) sim_moves += 1 episode_reward = pd.DataFrame(episode_reward) # Append new collected avg episode reward self.metrics["reward"].append( OrderedDict({ "Simulation No.": sim, "Avg Cumulative Reward": episode_reward["reward_value"].mean(), "Global Capture Reward": episode_reward["Global Capture Reward"].max(), "Local Capture Reward": episode_reward["Local Capture Reward"].mean(), "Reachability Reward": episode_reward["Reachability Reward"].mean() })) sim += 1 # Diminishing exploration rate if sim < self.exploration_steps: training_agent.brain.policy[ "boltzmann"].update_exploration_rate( new_er=self.exploration_step_value) # Train every N simulations if sim % self.training_rate == 0: self.train_ally() # Update training net every 10 simulations if sim % self.policy_dist_rate == 0: self.update_target_net() print("---------------------------------------------") print("Sim No. : {}".format(sim)) print("Average Loss : {}".format( sum(self.metrics["loss"]) / len(self.metrics["loss"]))) mdf = pd.DataFrame(self.metrics["reward"]) print("Average reward : {}".format( mdf["Avg Cumulative Reward"].mean())) print("Average GCR : {}".format( mdf["Global Capture Reward"].mean())) print("Average LCR : {}".format( mdf["Local Capture Reward"].mean())) print("Average RR : {}".format( mdf["Reachability Reward"].mean())) # Create GIF self.visualize_gif(sim) # Save model checkpoint self.save_checkpoint(training_agent, sim) # Reset game setting self.environment.reset() def update_target_net(self): self.environment.training_net.save_model("Models/", "temporary_update") self.environment.target_net.load_model("Models/", "temporary_update") def train_ally(self): """ Takes a random batch from experience replay memory and uses it to train the agent's brain NN :return: """ if not self.experience_replay.can_replay(): return # Sample batch fro replay memory mini_batch = self.experience_replay.sample() training_net = self.environment.training_net target_net = self.environment.target_net X, y = self.create_training_batch( target_net=target_net, training_net=training_net, mini_batch=mini_batch, ) history = training_net.train(X, y, self.training_batch_size) self.metrics["loss"] += history.history["loss"] def create_training_batch(self, target_net, training_net, mini_batch): # Build input and target network batches input_batch = np.ndarray(shape=(self.training_batch_size, self.environment.n_rows, self.environment.n_cols, 4)) target_batch = np.ndarray(shape=(self.training_batch_size, 1, 8)) gamma = training_net.discount_rate for i, transition in enumerate(mini_batch): # Check for possible ending state if transition["next_state"] is None: # Assign reward as target Q values target = transition["reward"] else: # Compute Q values on next state q_next = target_net.model.predict(transition["next_state"])[0] # Filter not allowed moves agent_position = np.argwhere(transition["state"][0, :, :, 0]) allowed_moves = self.environment.allowed_moves( (agent_position[0][0], agent_position[0][1])) moves_mask = np.array( [1 if pos else np.nan for pos in allowed_moves]) masked_q_next = q_next * moves_mask # Compute target Q value target = transition["reward"] + gamma * ( np.nanmax(masked_q_next)) # Update Q values vector with target value target_q = training_net.model.predict(transition["state"])[0] target_q[0][transition["action"]] = target input_state = np.reshape(transition["state"], newshape=( transition["state"].shape[1], transition["state"].shape[2], transition["state"].shape[3], )) input_batch[i] = input_state target_batch[i] = target_q return input_batch, target_batch def visualize_gif(self, sim_number): # Create GIF if self.viz and self.viz_execution and self.viz_execution(sim_number): # Reset game self.environment.reset(False) # play game run sim_moves = 0 env_seq = [copy.deepcopy(self.environment.grid)] while sim_moves < self.moves_limit and not self.environment.is_over( ): self.environment.step(False) env_seq.append(copy.deepcopy(self.environment.grid)) sim_moves += 1 # Connect frame as save gif frames = [self.viz.single_frame(env) for env in env_seq] viz.create_gif(frames, name='simulation_%d' % sim_number) def save_checkpoint(self, training_agent, sim_number): if self.train_saving is not None and self.train_saving(sim_number): save_path = 'Models/' if not os.path.exists(save_path): os.makedirs(save_path) with open(save_path + 'metrics' + '.pkl', "wb") as f: pickle.dump(self.metrics, f) # Save model training_agent.brain.save_model(save_path, str(sim_number))
if os.path.exists(dataset_name): with open(dataset_name, 'r') as f: t = f.read() vocabulary = t.split('\t') vocabulary_size = len(vocabulary) else: vocabulary = create_vocabulary(text) vocabulary_size = len(vocabulary) with open('datasets/all_scipop_free_voc.txt', 'w') as f: for w_idx, w in enumerate(vocabulary): f.write(w) if w_idx < len(vocabulary) - 1: f.write('\t') cpiv = get_positions_in_vocabulary(vocabulary) # env = Environment(Lstm, BatchGenerator, vocabulary=vocabulary) env = Environment(Lstm, BatchGenerator, vocabulary=vocabulary) add_feed = [{'placeholder': 'dropout', 'value': 0.8}] valid_add_feed = [{'placeholder': 'dropout', 'value': 1.}] env.build( batch_size=256, embeddings_in_batch=False, num_layers=2, num_nodes=[1300, 1300], num_output_layers=2, num_output_nodes=[2048], # vocabulary_size=vocabulary_sizes[0], vocabulary_size=vocabulary_size, embedding_size=512, num_unrollings=10,
class Serial: ## init(): the constructor. Many of the arguments have default values # and can be skipped when calling the constructor. def __init__(self, port='COM1', baudrate=19200, timeout=1, bytesize=8, parity='N', stopbits=1, xonxoff=0, rtscts=0): self.name = port self.port = port self.timeout = timeout self.parity = parity self.baudrate = baudrate self.bytesize = bytesize self.stopbits = stopbits self.xonxoff = xonxoff self.rtscts = rtscts self.is_open = False self.in_waiting = 0 self._data = "" self.stop_event = None self.env = None ## isOpen() # returns True if the port to the Arduino is open. False otherwise def isOpen(self): return self.is_open ## open() # opens the port def open(self): if not self.is_open: self.stop_event = threading.Event() self.env = Environment(self.notify, self.stop_event) self.is_open = True ## close() # closes the port def close(self): print('closing...') if self.is_open: self.stop_event.set() self.stop_event = None self.env = None self.is_open = False def notify(self, data): self._data += data.decode('utf-8') self.in_waiting = len(self._data) ## write() # writes a string of characters to the internal buffer def write(self, data): rcv = data.decode('utf-8') if rcv[-1] == '\n': # execute the command and get back the output d = self.env.match_and_execute(rcv) + "\n" self._data += d self.in_waiting = len(self._data) ## read() # blocking read; when n > 0, then will be waiting for data def read(self, n=1): out = '' if n > 0: while len(self._data) < n: time.sleep(0.01) out = self._data[0:n] self._data = self._data[n:] self.in_waiting = len(self._data) return str.encode(out) ## readline() def readline(self): returnIndex = self._data.index("\n") if returnIndex != -1: s = self._data[0:returnIndex + 1] self._data = self._data[returnIndex + 1:] self.in_waiting = len(self._data) return str.encode(s) else: return str.encode("") ## __str__() def __str__(self): return "Serial<id=0xa81c10, open=%s>( port='%s', baudrate=%d," \ % ( str(self.is_open), self.port, self.baudrate ) \ + " bytesize=%d, parity='%s', stopbits=%d, xonxoff=%d, rtscts=%d)"\ % ( self.bytesize, self.parity, self.stopbits, self.xonxoff, self.rtscts )
experiment_name = 'dummy_demo' if not os.path.exists(experiment_name): os.makedirs(experiment_name) # do you want to continue your old run? continue_run = False continue_file = '' # initialises the framework. enemy number can be changed accordingly enemy = 2 env = Environment(experiment_name=experiment_name, playermode="ai", player_controller=player_controller(), speed="fastest", enemymode="static", level=2, enemies=[2]) # calculates the number of weights per agent n_hidden = env.player_controller.n_hidden[0] n_vars = (env.get_num_sensors() + 1) * n_hidden + (n_hidden + 1) * 5 best_fitness = 0 min_weight = -1 max_weight = 1 # parameter settings of the algorithm pop_size = 10 gen_number = 10
RENDER = False TIME_DELAY = 0.03 MAX_HUMANS = 3 MAX_ZOMBIES = 3 if RENDER: render = Renderer() total_score = 0.0 trials = 100000 for _ in range(trials): if _ % max(int(trials / 1000), 1) == 0: sys.stdout.write("\r{:.2f}% complete".format(_ * 100.0 / trials)) env = Environment(MAX_HUMANS, MAX_ZOMBIES, better_rewards=True) while len(env.humans) > 0 and len(env.zombies) > 0: # x = randrange(config.WIDTH) # y = randrange(config.HEIGHT) # x = env.zombies[0].x # y = env.zombies[0].y # x = env.humans[0].x # y = env.humans[0].y points = [(0, 0), (16000, 0), (0, 9000), (16000, 9000)] # x, y = points[randrange(len(points))] min_dist = 16000 + 9000 index = -1
def open(self): if not self.is_open: self.stop_event = threading.Event() self.env = Environment(self.notify, self.stop_event) self.is_open = True
'--host', default='127.0.0.1', type=str, help='IP address for V-Rep server (default: 127.0.0.1)' ) parser.add_argument( '-p', '--port', default=5000, type=int, help='Port for SLAM-Sim server (default: 5000)' ) namespace = parser.parse_args() env = Environment() app = Flask(__name__) @app.route("/start-v-rep-server", methods=['POST']) def start_vrep_server(): vrep_ip = request.args.get('ip') port = request.args.get('port') env.connect(vrep_ip, int(port)) if env.client_id != -1: time_ = datetime.now().time().isoformat() env.print_message(time_ + ': connected') answer = 'Successfully connected to V-REP server' # print('\033[1;32;40m ' + answer) print(answer)
from agent import DDPG, DeepQAgent from environment import Environment done_comparison_data = { 'coords_done_fail': [45, 60, 118, 180], 'coords_done_success': [5, 16, 122, 174], 'img_done_fail': 'data/s8_cut_try_again.png', 'img_done_success': 'data/game_score_s8.png', 'restart_btn_coords': [640, 1110], 'restart_ongame': [(2764, 93), (2624, 552)], } scores = { 'coords_diamonds_gathered': [11, 27, 25, 35], 'digits_mask_addr': 'data/digits', 'match_threshold': 10, 'state_area': [28, 112, 0, 296], 'time_importance': 0.7, 'diamonds_importance': 0.3, 'episode_time_limit': 60, 'diamonds_total': 7 } env = Environment(device_ref_elements_data={ 'done_comparison_data': done_comparison_data, 'scores': scores }) #agent = DDPG(env) agent = DeepQAgent(env) train(agent, env, episode_seconds_constrain=45)
def tearDown(self): Harness.tearDown(self) reset = Environment(CANONICAL_SCHEME=unicode, CANONICAL_HOST=unicode, environ=self.environ) wireup.canonical(reset)
import matplotlib.backends.backend_agg as agg import pygame, sys from pygame.locals import * from environment import Environment from agent import Agent, Action from main import * import numpy as np ###################################################################################### # teaching the agent to clean trashes properly with reinforcement learning # ###################################################################################### agent = Agent(pos=(0, 0)) # create a new agent env = Environment(agent) # add the agent to the environment facteur = 50 agent_pos = env.agent.position # get the agent's position agent_pos = (agent_pos[0] * facteur, agent_pos[1] * facteur ) # multiply it by a factor n_a = env.action_space_n # get the action space size n_s = env.state_space_n # get the state space size q_table = np.zeros([n_s, n_a]) # init Q table e_table = np.zeros([n_s, n_a]) # init eligibility traces # cleaning rate for each episode clean_rate = [] crashes = []
#!/usr/bin/env python36 # coding: utf-8 from Devices import * from environment import Environment from system import System from request import Request env = Environment() sys_ = System(env) # # # # # DEVICES # # # # # for door_name in env.door_names: door = Door(door_name) sys_.register_device(door) for room_name in env.room_names: lightsIndoor = IndoorLight(room_name) sys_.register_device(lightsIndoor) hvac = HVAC(room_name) sys_.register_device(hvac) if env.rooms[room_name].has_window(): windows = Window(room_name) sys_.register_device(windows) blinds = Blind(room_name) sys_.register_device(blinds) lightsOutdoor = OutdoorLight("Outdoor Lights")
def sample_minbatch(self): minibatch_indices = np.random.choice( range(100), 100) #choose 100 transitions randomly from the buffer mini_batch = np.array([self.buffer[i] for i in minibatch_indices], dtype=object) return mini_batch # Main entry point if __name__ == "__main__": # Create an environment. # If display is True, then the environment will be displayed after every agent step. This can be set to False to speed up training time. The evaluation in part 2 of the coursework will be done based on the time with display=False. # Magnification determines how big the window will be when displaying the environment on your monitor. For desktop monitors, a value of 1000 should be about right. For laptops, a value of 500 should be about right. Note that this value does not affect the underlying state space or the learning, just the visualisation of the environment. environment = Environment(display=False, magnification=500) # Create an agent agent = Agent(environment) # Create a DQN (Deep Q-Network) dqn = DQN() #Create an experience replay buffer buffer = ReplayBuffer() EPISODE_LENGTH = 20 TRAINING_ITERATIONS = 100 #Initialise buffer with 100 transitions count = 0 while count < 101: # Reset the environment for the start of the episode. agent.reset()