def main(): player_1 = Agent() player_2 = Agent() environent = Environment() state_winner_triples = helpers.get_state_hash_and_winner(environent) x_Values = helpers.initial_value_x(environent, state_winner_triples) o_Values = helpers.initial_value_o(environent, state_winner_triples) player_1.setV(x_Values) player_2.setV(o_Values) player_1.set_symbol(environent.x_piece) player_2.set_symbol(environent.o_piece) helpers.train_agents(player_1, player_2, EPOCHS, environent) human = Human() human.set_symbol(environent.o_piece) while True: player_1.set_verbose(True) helpers.play_game(player_1, human, Environment(), draw=2) answer = input("Play again? [Y/n]: ") if answer and answer.lower()[0] == 'n': break
def main(): env = gym.make(name_env) input_shape = env.observation_space.shape input_shape = U.preprocess(np.zeros(input_shape)).shape output_shape = (env.action_space.n,) sw = createSummaryWriter() agent = PPO(network, input_shape, output_shape,sw) renderer = Environment(name_env, agent, sw, True) renderer.daemon = True renderer.start() environments = [Environment(name_env, agent, sw, False) for x in range(num_parallel)] Environment.wait = num_parallel for env in environments: env.daemon = True env.start() while True: if Environment.wait == 0: agent.updateModel(Environment.training_queue, epochs) Environment.training_queue = [] Environment.wait = num_parallel for env in environments: env.canGo = True else: time.sleep(.001) for env in environments: env.join()
def get_Averages(num_trials, cycles_per_trial, alpha, lr, write_to_file, filename): DEBUG = False env = Environment(2, True) cache = Cache(env, lr, alpha, DEBUG) # Holding arrays for calculation/final output # Arrays indexed by cycle of trial. s0_lever_avg = [0 for x in range(cycles_per_trial)] s0_magazine_avg = [0 for x in range(cycles_per_trial)] s1_lever_avg = [0 for x in range(cycles_per_trial)] s1_magazine_avg = [0 for x in range(cycles_per_trial)] uncertainty_avg = [0 for x in range(cycles_per_trial)] # Generate data for x in range(num_trials): # Create new instance env = Environment(2, True) cache = Cache(env, lr, alpha, DEBUG) for c in range(cycles_per_trial): s0, s1, uncertainty = cache.run_task() s0_lever, s0_magazine = s0 s1_lever, s1_magazine = s1 cache.current_state = 0 # Add cycle s0_lever_avg[c] += s0_lever s0_magazine_avg[c] += s0_magazine s1_lever_avg[c] += s1_lever s1_magazine_avg[c] += s1_magazine uncertainty_avg[c] = uncertainty # Divide elements by trial count to produce averages for x in range(cycles_per_trial): s0_lever_avg[x] /= num_trials s0_magazine_avg[x] /= num_trials s1_lever_avg[x] /= num_trials s1_magazine_avg[x] /= num_trials uncertainty_avg[x] /= num_trials print(s0_lever_avg[x], s0_magazine_avg[x], s1_lever_avg[x], s1_magazine_avg[x], uncertainty_avg[x]) # Output to file if write_to_file: filen = filename + "T" + str(num_trials) + "_C" + str( cycles_per_trial) + "_A" + str(alpha) + ".csv" print("Data saved as ", filen) f = open(filen, "w") f.write( "interval, s0_lever, s0_magazine, s1_lever, s1_magazine, uncertainty\n" ) for x in range(cycles_per_trial): new_row = str(x) + ", " + str(s0_lever_avg[x]) + ", " + str( s0_magazine_avg[x]) + ", " + str(s1_lever_avg[x]) + ", " + str( s1_magazine_avg[x]) + "\n" f.write(new_row)
def main(**kwargs): if kwargs['model'] in ['1', '2']: # search env = Environment(kwargs['setting'][0]) arg = int(kwargs['setting'][1]) if kwargs['model'] == '1': # SimulatedAnnealing model = SimulatedAnnealing(env) assert arg in [1, 2], "wrong setting" elif kwargs['model'] == '2': # MountainClimbing model = MountainClimbing(env) assert 1 <= arg <= 20, "wrong setting" # execute search best_board, best_score = model.search(kwargs['executiontime'], arg) print("Final Board") showBoard(best_board) print("Score") print(best_score) elif kwargs['model'] == '3': # only evaluate board env = Environment(kwargs['setting'][0]) print("Board to Evaluate") showBoard(env.board) print("Score") print(env.score(board2map(env.board))) else: print('No model like ' + kwargs['model'])
def get_Averages_Deval(num_trials, cycles_per_trial, alpha, lr, write_to_file, filename, val_reduct): DEBUG = False env = Environment(2, True) cache = Cache(env, lr, alpha, DEBUG) # Holding arrays for calculation/final output # Arrays indexed by cycle of trial. s0_lever_avg = [0 for x in range(cycles_per_trial)] s0_magazine_avg = [0 for x in range(cycles_per_trial)] s1_lever_avg = [0 for x in range(cycles_per_trial)] s1_magazine_avg = [0 for x in range(cycles_per_trial)] # Generate data for x in range(num_trials): # Create new instance env = Environment(2, True) cache = Cache(env, True, lr, alpha, DEBUG) # Train on new environment / agent for c in range(cycles_per_trial): if random.randint(1, 2) == 2: env.states[3].reward = val_reduct s0, s1 = cache.run_task() s0_lever, s0_magazine = s0 s1_lever, s1_magazine = s1 cache.current_state = 0 # Add cycle s0_lever_avg[c] += s0_lever s0_magazine_avg[c] += s0_magazine s1_lever_avg[c] += s1_lever s1_magazine_avg[c] += s1_magazine # Divide elements by trial count to produce averages for x in range(cycles_per_trial): s0_lever_avg[x] /= num_trials s0_magazine_avg[x] /= num_trials s1_lever_avg[x] /= num_trials s1_magazine_avg[x] /= num_trials #print(s1_magazine_avg[x]) # Output to file if write_to_file: filen = filename + "_C" + str(cycles_per_trial) + "_VR" + str( val_reduct) + "_A" + str(alpha) + "_LR" + str( lr) + ".csv" #"_T"+str(num_trials) print("Data saved as ", filen) f = open(filen, "w") f.write("interval, s0_lever, s0_magazine, s1_lever, s1_magazine\n") for x in range(cycles_per_trial): new_row = str(x) + ", " + str(s0_lever_avg[x]) + ", " + str( s0_magazine_avg[x]) + ", " + str(s1_lever_avg[x]) + ", " + str( s1_magazine_avg[x]) + "\n" f.write(new_row)
def __init__(self, args, sess): self.env_act = Environment(args, 'act') self.net_act = DeepQLearner(args, 'act', 'channels_first') # self.net_act = DeepQLearner(args, sess, 'act') # for tensorflow self.env_arg = Environment(args, 'arg') self.net_arg = DeepQLearner(args, 'arg', 'channels_first') # self.net_arg = DeepQLearner(args, sess, 'arg') # for tensorflow self.num_words = args.num_words self.context_len = args.context_len
def __init__(self, args, sess): self.env_act = Environment(args, 'act') # self.net_act = DeepQLearner(args, 'act', 'channels_first') self.net_act = DeepQLearner(args, sess, 'act') # for tensorflow self.env_arg = Environment(args, 'arg') # self.net_arg = DeepQLearner(args, 'arg', 'channels_first') self.net_arg = DeepQLearner(args, sess, 'arg') # for tensorflow self.num_words = args.num_words self.context_len = args.context_len self.gamma = args.gamma self.uncertainty_mode = 'cml' # or 'cml'
def setup_agent(Args, Midi): if Args.ci or Args.c: # learn with given chords rewards = get_rewards(Midi['chords'], Midi['actions'], 1) env = Environment(states=Midi['states'], actions=Midi['actions'], rewards=rewards) agent = Agent(env) else: # learn with TAMER env = Environment(states=Midi['states'], actions=Midi['actions']) agent = Agent(env) return agent
def get_Averages(num_trials, cycles_per_trial, discount_factor, write_to_file, filename): DEBUG = False env = Environment(2, True) # Holding arrays for calculation/final output # Arrays indexed by cycle of trial. s0_lever_avg = [0 for x in range(cycles_per_trial)] s0_magazine_avg = [0 for x in range(cycles_per_trial)] s1_lever_avg = [0 for x in range(cycles_per_trial)] s1_magazine_avg = [0 for x in range(cycles_per_trial)] # Generate data for x in range(num_trials): # Create new instance env = Environment(2, True) tree = Tree(env, .2) for c in range(cycles_per_trial): s0, s1 = tree.run_task() s0_lever, s0_magazine = s0 s1_lever, s1_magazine = s1 tree.current_state = 0 # Add cycle s0_lever_avg[c] += s0_lever s0_magazine_avg[c] += s0_magazine s1_lever_avg[c] += s1_lever s1_magazine_avg[c] += s1_magazine # Divide elements by trial count to produce averages for x in range(cycles_per_trial): s0_lever_avg[x] /= num_trials s0_magazine_avg[x] /= num_trials s1_lever_avg[x] /= num_trials s1_magazine_avg[x] /= num_trials #print(s1_magazine_avg[x]) # Output to file if write_to_file: filen = filename + "T"+str(num_trials)+"_C"+str(cycles_per_trial)+"_DF"+str(discount_factor)+".csv" print("Data saved as ", filen) f = open(filen, "w") f.write("interval, s0_lever, s0_magazine, s1_lever, s1_magazine\n") for x in range(cycles_per_trial): new_row = str(x) + ", " + str(s0_lever_avg[x]) + ", " + str(s0_magazine_avg[x]) + ", " + str(s1_lever_avg[x]) + ", " + str(s1_magazine_avg[x]) + "\n" f.write(new_row)
def rendered_games(p1, p2, board_size, connections_to_win): # Stop training mode from the players if hasattr(p1, 'is_training'): p1.is_training = False if hasattr(p2, 'is_training'): p2.is_training = False # Remove randomness from players if hasattr(p1, 'is_exploring'): p1.is_exploring = False if hasattr(p2, 'is_exploring'): p2.is_exploring = False print("----------") print("Connect ", connections_to_win) print("----------") while True: print("New Game: \n") player = input( "Press 1 to play as player 1, press otherwise to play as player 2: " ) if player == "1": connect4 = Environment(p2, p1, board_size, connections_to_win) else: connect4 = Environment(p1, p2, board_size, connections_to_win) done = False while not done: connect4.print_board() done = connect4.request_action() connect4.print_board() print("-----------") print("Game Over: ") if connect4.winner == 0: print("P1 wins") elif connect4.winner == 1: print("P2 wins") else: print("Tie") print("-----------\n") exit_value = input("Enter 1 to play another game: ") if exit_value != "1": break
def main(): num_eps = 5000 num_runs = 10 random.seed(0) np.random.seed(0) agent = Agent() env = Environment() rlglue = RLGlue(env, agent) del agent, env for run in range(num_runs): rlglue.rl_init() performances = [] for ep in range(num_eps): rlglue.rl_start() #rlglue.rl_env_message('renderON') terminal = False while not terminal: reward, state, action, terminal = rlglue.rl_step() # Find the first policy that performs at 100% performance = testPolicy(rlglue.rl_agent_message('policy')) * 100 performances.append(performance) if performance >= 100: #print(rlglue.rl_agent_message('policy')) print('Episode: %d' % (ep + 1)) break plt.plot(performances) plt.savefig('test.png')
def __init__(self, thread_index, global_network, initial_learning_rate, learning_rate_input, grad_applier, device): self.thread_index = thread_index self.learning_rate_input = learning_rate_input self.local_network = Network(thread_index, device) self.local_network.build_loss() with tf.device(device): local_var_refs = [v._ref() for v in self.local_network.get_vars()] self.gradients = tf.gradients(self.local_network.total_loss, local_var_refs, gate_gradients=False, aggregation_method=None, colocate_gradients_with_ops=False) self.apply_gradients = grad_applier.apply_gradients( global_network.get_vars(), self.gradients) self.update_network = self.local_network.copy_network(global_network) self.env = Environment(thread_index == 1) self.state = self.env.reset() self.worker_total_steps = 0 self.worker_total_eps = 0 self.start_time = time.time() self.initial_learning_rate = initial_learning_rate self.episode_reward = 0
def draw_level_in_pygame(): # Initialize Level global myEnvironment myEnvironment = Environment() global myLevel myLevel = initLevel(level_set, current_level)
def setup(): env = Environment() # env.add_connections({0:[0]}) load_agent = None if SOURCE_SMART_LOADS: with open( LOAD_MODEL_PATH + '/' + load_agent_params[LOAD_MODE] + '_agent_' + str(load_agent_params[LOAD_DAY]) + '.pickle', 'rb') as f: load_agent = pickle.load(f) env.add_connections({0: list(range(SOURCE_NUM_LOADS))}) else: env.add_dumb_loads(0, SOURCE_NUM_LOADS) env.set_environment_ready() env.reset(True) source_agent_dict = { 0: QTableAgent(env.get_source_action_space(), {SOURCE_DEMAND_STATE: env.get_overall_demand_bounds(0)}, {SOURCE_DEMAND_STATE: 20}, default_action=1, discount_factor=SOURCE_DISCOUNT_FACTOR) } source_agent_dict[0].set_learning_rate(SOURCE_LEARNING_RATE) return env, source_agent_dict, load_agent
def setup(): env = Environment() loadParams = {} for i in range(NUM_COPIES): loadParams[i] = {} loadParams[i]['batteryParams'] = { 'battery_capacity': capacity_function(i), # 'battery_capacity': charging_rate_function(i)*5, 'charging_rate': capacity_function(i) / 5, # 'charging_rate':charging_rate_function(i) } env.add_connections({0: range((NUM_AGENTS * NUM_COPIES + 3))}, load_param_dict=loadParams) env.add_dumb_loads(0, 100000) env.set_environment_ready(test_mode=False) env.reset(0) # load_agent_dict = {0:QTableAgent(env.get_load_action_space(), # {LOAD_BATTERY_STATE:[0,100],LOAD_PRICE_STATE:env.get_price_bounds(0)}, # {LOAD_BATTERY_STATE:20, LOAD_PRICE_STATE:10}, # default_action=1, # discount_factor=DISCOUNT_FACTOR # )} # load_agent_dict[0].set_learning_rate(LEARNING_RATE) load_agent.set_explore_rate(0) return env
def train_sarsa_agent(n_iters): # State space is agent hand x dealer hand x agent actions (22 x 22 x 2) state_space_size = [22, 10, 2] # initialise sarsa agent sarsa_agent = sarsa(state_space_size, gamma=0.1) # Train agent for i in range(n_iters): # initialise the environment card_table = Environment() # game ends when terminal state is reached while card_table.is_state_terminal == False: s = card_table.state # Adjust sate so that it matches with 0 indexed indices of ndarrays s = (s[0] - 1, s[1] - 1) # agent takes action, gets reward a = sarsa_agent.choose_action(s) s_, r = card_table.step(a) s_ = (s_[0] - 1, s_[1] - 1) sarsa_agent.update_value_function(s, a, r, s_) # Return the trained agent return sarsa_agent
def __init__(self, sess): print("Initializing the agent...") self.sess = sess self.env = Environment() self.state_size = self.env.get_state_size() self.action_size = self.env.get_action_size() print("Creation of the main QNetwork...") self.mainQNetwork = QNetwork(self.state_size, self.action_size, 'main') print("Main QNetwork created !\n") print("Creation of the target QNetwork...") self.targetQNetwork = QNetwork(self.state_size, self.action_size, 'target') print("Target QNetwork created !\n") self.buffer = PrioritizedReplayBuffer(parameters.BUFFER_SIZE, parameters.ALPHA) self.epsilon = parameters.EPSILON_START self.beta = parameters.BETA_START self.initial_learning_rate = parameters.LEARNING_RATE trainables = tf.trainable_variables() self.update_target_ops = updateTargetGraph(trainables) self.nb_ep = 1 self.best_run = -1e10
def main(): num_eps = 200000 agent = Agent() env = Environment() rlglue = RLGlue(env, agent) del agent, env solves = 0 rlglue.rl_init() rewards = [] for ep in range(num_eps): rlglue.rl_start() #rlglue.rl_env_message('renderON') terminal = False reward = 0 while not terminal: reward, state, action, terminal = rlglue.rl_step() if ep > 1000: rlglue.rl_env_message('renderON') print(state) time.sleep(0.1) rewards.append(reward) if ep >= 99: if np.average(rewards[ep-99:ep+1]) > 0.78: print('solved at episode %d' % ep+1) break else: pass
def setup(self, imgPath): imgName, ext = os.path.splitext(os.path.basename(imgPath)) self.imgName = imgName self.env = Environment(self.settings) self.env.loadStaticStimulus(self.settings.batch + '/' + imgPath) self.eye = Eye(self.settings, self.env) self.periphMap = PeripheralAttentionalMap(self.env.height, self.env.width, self.settings) self.centralMap = CentralAttentionalMap(self.env.height, self.env.width, self.settings) self.conspMap = ConspicuityMap(self.env.height, self.env.width, self.settings) self.priorityMap = PriorityMap(self.env.height, self.env.width, self.settings) self.fixHistMap = FixationHistoryMap(self.env.height, self.env.width, self.env.hPadded, self.env.wPadded, self.settings) self.LongTermMemory = LTM(self.settings) self.visualTaskExecutive = vTE(self.settings) self.TaskRelevanceMap = TRM(self.env.height, self.env.width, self.settings) if self.settings.task_relevance == 1: #learn representations if not done previously self.LongTermMemory.learn() #get task relevance (initial) self.TaskRelevanceMap.setTRM( self.visualTaskExecutive.get_relevance(self.LongTermMemory, self.env.scene))
def test_reward(self): done = False i = 0 self.chronics_handler.next_chronics() self.env = Environment(init_grid_path=os.path.join(self.path_matpower, self.case_file), backend=self.backend, chronics_handler=self.chronics_handler, parameters=self.env_params, rewardClass=L2RPNReward, names_chronics_to_backend=self.names_chronics_to_backend) if PROFILE_CODE: cp = cProfile.Profile() cp.enable() beg_ = time.time() cum_reward = 0 while not done: do_nothing = self.env.helper_action_player({}) obs, reward, done, info = self.env.step(do_nothing) # should load the first time stamp cum_reward += reward i += 1 end_ = time.time() if DEBUG: msg_ = "\nEnv: {:.2f}s\n\t - apply act {:.2f}s\n\t - run pf: {:.2f}s\n\t - env update + observation: {:.2f}s\nTotal time: {:.2f}\nCumulative reward: {:1f}" print(msg_.format( self.env._time_apply_act+self.env._time_powerflow+self.env._time_extract_obs, self.env._time_apply_act, self.env._time_powerflow, self.env._time_extract_obs, end_-beg_, cum_reward)) if PROFILE_CODE: cp.disable() cp.print_stats(sort="tottime") assert i == 287, "Wrong number of timesteps" assert np.abs(cum_reward - 5739.929117641016) <= self.tol_one, "Wrong reward"
def nextConfig(self): N = self.whichConfig = (self.whichConfig + 1) % NUM_CONFIGS if self.envNP != None: self.envNP.removeNode() if self.pathNP != None: self.pathNP.removeNode() self.environment = Environment(OBSTACLE_FILE,\ ALL_CONFIGS[N][0], ALL_CONFIGS[N][1], ALL_CONFIGS[N][2]) self.environment.dump() print("Start: " + self.environment.start.__str__()) print("End: " + self.environment.end.__str__()) print("Shooter: " + self.environment.shooterPos.__str__()) shortestPath = getShortestPath(self.environment) print("Path: ") print(shortestPath) self.envNP = render.attachNewNode(self.environment.produceRending()) self.pathNP = render.attachNewNode( self.environment.renderPath(shortestPath, Vec4(1.000, 0.647, 0.000, 1))) for i in range(len(self.environment.obstaclesWalls)): wall = self.environment.obstaclesWalls[i] ls = LineSegment(Point2(300, -300), Point2(-300, 400)) I = ls.intersectLines(wall) if I[1] == 1: print(i)
def trainAgentOffline_random(agent, environmentParameters, trainingEpisodes): """train an agent and measure its performance""" # learn from episodes for run in range(trainingEpisodes): # select initial environment parameters initialParameters = random.choice(environmentParameters) experienceEpisode(agent, Environment(*initialParameters)) agent.wipeShortMemory() if run % 10 == 0: # train from replay memory allInput, allLabels = [], [] for shortMemory in agent.replayMemory: netInput, labels = agent.getSarsaLambda(shortMemory) allInput.append(netInput) allLabels.append(labels) allInput = torch.cat(allInput) allLabels = torch.cat(allLabels) agent.learn(allInput, allLabels) return agent
def train_mc_agent(n_iters): # State space is agent hand x dealer hand x agent actions (22 x 22 x 2) state_space_size = [22, 10, 2] mc_agent = mc(state_space_size) # Function to play many card games in order to estimate value function for i in range(n_iters): # Initialise a new game card_table = Environment() game_reward = 0 state_actions_visited = [] while card_table.is_state_terminal == False: s = card_table.state # Adjust state so that it matches with 0 indexed indices of ndarrays s = (s[0] - 1, s[1] - 1) # agent takes action, gets reward a = mc_agent.choose_action(s) sa = s + (a, ) state_actions_visited.append(sa) s, r = card_table.step(a) game_reward += r # Update agents value function at the end of the game mc_agent.update_value_function(game_reward) return mc_agent
def test_nb_timestep_overflow_nodisc_2(self): # on this _grid, first line with id 18 is overheated, # it is disconnected # then powerline 16 have a relative flow of 1.5916318201096937 # in this scenario i don't have a second line disconnection because # the overflow is a soft overflow and the powerline is presumably overflow since only 1 # timestep case_file = self.case_file env_params = copy.deepcopy(self.env_params) env_params.HARD_OVERFLOW_THRESHOLD = 1.5 env = Environment(init_grid_path=os.path.join(self.path_matpower, case_file), backend=self.backend, chronics_handler=self.chronics_handler, parameters=env_params) self.backend.load_grid(self.path_matpower, case_file) env.timestep_overflow[self.id_2nd_line_disco] = 1 thermal_limit = 10 * self.lines_flows_init thermal_limit[self.id_first_line_disco] = self.lines_flows_init[ self.id_first_line_disco] / 2 thermal_limit[self.id_2nd_line_disco] = 400 self.backend.set_thermal_limit(thermal_limit) disco, infos = self.backend.next_grid_state(env, is_dc=False) assert len(infos) == 1 # check that don't simulate a cascading failure assert disco[self.id_first_line_disco] assert np.sum(disco) == 1
def call(self, interpreter, args: List[object]): ''' Create a fresh local symbol table for this call, with a parent of the "closure" environment that was frozen-in when the function was declared. ''' environment = Environment(self.closure) ''' For each parameter named in the declaration, define that name in the environment as having the value given in the call. The arity is checked before this call() method is invoked, hence we know the arg list and param list are the same length. Use the Python builtin function zip() to avoid a boring count loop. ''' for (param, arg) in zip(self.declaration.params, args): environment.define(param.lexeme, arg) ''' With all parameters assigned their argument values, execute the body of the function. There are four cases: the body does or does not execute a return statement, and this is or isn't an initializer. In an initializer, return <expr> is not allowed. Otherwise, the value of return <expr> is in the Exception raised. No return return Initializer "this" "this" normal method None expr ''' try: interpreter.execute_block(self.declaration.body, environment) return_value = self.closure.fetch( "this") if self.isInitializer else None except ReturnUnwinder as RW: return_value = self.closure.fetch( "this") if self.isInitializer else RW.return_value return return_value
def __init__(self, path): super().__init__() self.environment = Environment() # Инициализирую среду self.auto_reload = 10 if path != "null" and os.path.isfile(path): f = open(path, 'rb') self.environment = pickle.load(f) print(utils.bordered("Information", " Bots: {0}, Epoch: {1}".format(len(self.environment.bots), self.environment.epoch))) f.close() else: self.environment.setup() # Произвожу первоначальную настройку print(utils.bordered("Information", " Data: {0} \n Epoch: {1}, Bots: {2}".format( datetime.datetime.today().strftime("%m-%d-%Y %H-%M-%S"), 0, len(self.environment.bots)))) self.setupUi(self) # Инициализирую gui thread = Thread(target=self.__update, args=()) thread.daemon = True thread.start() self.label_food_count.setText(str(len(self.environment.food))) self.label_dump_count.setText(path) self.button_reload.clicked.connect(self.__update_tabs) self.tab_box_scores.doubleClicked.connect(self.doubleClicked_table) self.mpl.canvas.plot(self.environment.history) self.check_box_auto_reload.setText("Auto-update {0}s.".format(self.auto_reload))
def setUp(self): tfi = TextFileInterface(relative_directory="TestDB/") self.environment = Environment(tfi, DEBUG=True) self.environment.database.clear_database() self.environment.database.create_account("root", "root", "administrator") self.environment.database.create_course("361", "SoftwareEngineering")
def __init__(self, error_report: Callable[[int, str], None]): self.error_report = error_report ''' Create the global environment for this run. Personally I don't like calling it "environment". It's wordy and repetitive and also confusing given the number of "[eE]nvironments" we have. I'd prefer "globals" since that's what this level is. Section 10.2.1, preparing to add built-in functions, he changes this to store a reference to the most global environment, and reasonably, but unfortunately, calls it "globals". That's the name of a Python built-in. Well we are already using Token.type, what's one more. Create the globals environment and initialize it with an instance of the clock function. ''' self.globals = Environment() # Environment self.globals.define(CONTINUE, True) # initialize magic loop variable self.globals.define('clock', Interpreter.builtinClock()) self.environment = self.globals # initialize nested environments ''' Define the "locals" as a dict. This is initialized by the Resolver so that each variable reference is associated with its access-depth: how many levels up the chain of enclosing environments to look for it. Refer to Chapter 11 and Resolver.py, and see the resolve() method below. The keys of the dict are not simple identifiers, but Expr instances. Each separate reference to an identifier is represented in the parsed program by an Expr.Variable or Expr.Assign which is a unique object. Thus there is no fear of name-collisions in the mapping; and each reference is related to its access-depth at the syntactic point it was found. ''' self.locals = dict() # Mapping[Expr,int]
def __init__(self, sess, gui, displayer, saver): """ Build a new instance of Environment and QNetwork. Args: sess : the tensorflow session in which to build the network gui : a GUI instance to manage the control of the agent displayer: a Displayer instance to keep track of the episode rewards saver : a Saver instance to save periodically the network """ print("Initializing the agent...") self.sess = sess self.gui = gui self.gui_thread = threading.Thread(target=lambda: self.gui.run(self)) self.displayer = displayer self.saver = saver signal.signal(signal.SIGINT, self.interrupt) self.env = Environment() self.QNetwork = QNetwork(sess) self.buffer = ExperienceBuffer(prioritized=Settings.PRIORITIZED_ER) self.epsilon = Settings.EPSILON_START self.beta = Settings.BETA_START self.delta_z = (Settings.MAX_Q - Settings.MIN_Q) / (Settings.NB_ATOMS - 1) self.z = np.linspace(Settings.MIN_Q, Settings.MAX_Q, Settings.NB_ATOMS) self.create_summaries() self.best_run = -1e10 self.n_gif = 0 print("Agent initialized !\n")
def test_nb_timestep_overflow_disc0(self): # on this _grid, first line with id 5 is overheated, # it is disconnected # then powerline 16 have a relative flow of 1.5916318201096937 # in this scenario i don't have a second line disconnection. case_file = self.case_file env_params = copy.deepcopy(self.env_params) env_params.HARD_OVERFLOW_THRESHOLD = 1.5 env_params.NB_TIMESTEP_POWERFLOW_ALLOWED = 0 env = Environment(init_grid_path=os.path.join(self.path_matpower, case_file), backend=self.backend, chronics_handler=self.chronics_handler, parameters=env_params) self.backend.load_grid(self.path_matpower, case_file) thermal_limit = 10 * self.lines_flows_init thermal_limit[self.id_first_line_disco] = self.lines_flows_init[ self.id_first_line_disco] / 2 thermal_limit[self.id_2nd_line_disco] = 400 self.backend.set_thermal_limit(thermal_limit) disco, infos = self.backend.next_grid_state(env, is_dc=False) assert len( infos) == 2 # check that there is a cascading failure of length 2 assert disco[self.id_first_line_disco] assert disco[self.id_2nd_line_disco] assert np.sum(disco) == 2