class AgentTest(unittest.TestCase): def setUp(self): self.json_data = '{"observations": {"screen_features": ["height_map", "player_id", "player_relative", "unit_type"], ' \ '"minimap_features": ["player_id", "selected"], "nonspatial_features": ["player", "score_cumulative"], ' \ '"action_ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]}, "rewards": [1, 1, 1, 1]}' self.config = json.loads(self.json_data) self.sess = tf.Session() self.agent_modifier = AgentModifier(self.config, 32) self.agent = A2CAgent(self.sess, self.agent_modifier) # self.obs_spec = {} # self._builder = dummy_observation.Builder(self._obs_spec) # self.obs = self._builder.build().observation self.env = Environment() self.obs = self.env.reset() def testMakeAction(self): print("Testing Make Action") action = self.agent.act(self.obs) action_made_1 = self.agent.convert_actions(action) action_2 = self.agent.act(self.obs) self.obs = self.env.reset() action_made_2 = self.agent.convert_actions(action_2) self.assertNotEqual(action_made_1, action_made_2) def testGetObservationFeed(self): print("Testing Get Observation Feed") feed_dict = self.agent._get_observation_feed(self.obs) self.obs = self.env.reset() feed_dict_2 = self.agent._get_observation_feed(self.obs) self.assertNotEqual(feed_dict, feed_dict_2)
def run(args): param, clone_id = args[0], args[1] param["transportation_cost"] = np.random.choice(param["range_transportation_cost"]) param["customer_alpha"] = np.random.uniform(*param["range_customer_alpha"]) param["customer_temp"] = np.random.uniform(*param["range_customer_temp"]) param["firm_alpha"] = np.random.uniform(*param["range_firm_alpha"]) param["firm_temp"] = np.random.uniform(*param["range_firm_temp"]) # param["utility_consumption"] = np.random.uniform(*param["range_utility_consumption"]) param["firm_positions"] = np.random.randint(1, param["n_positions"] + 1, size=param["n_firms"]) param["firm_prices"] = np.random.randint(1, param["n_prices"] + 1, size=param["n_firms"]) param["seed"] = np.random.randint(2 ** 32) job_id = param["job_id"] label = "J{}C{}".format(job_id, clone_id) env = Environment(**param) results = env.run() Backup(data=results, name="results", root_folder=cl_parameters["working_folder"], label=label) Backup(data=param, name="parameters", root_folder=cl_parameters["working_folder"], label=label)
def __init__(self): self.action_size = Environment.get_action_size(flags.env_type, flags.env_name) self.objective_size = Environment.get_objective_size( flags.env_type, flags.env_name) print('flags:use_pixel_change {}'.format(flags.use_pixel_change)) sleep(10) self.global_network = UnrealModel(self.action_size, self.objective_size, -1, flags.use_lstm, flags.use_pixel_change, flags.use_value_replay, flags.use_reward_prediction, 0.0, 0.0, "/cpu:0", for_display=True) self.environment = Environment.create_environment( flags.env_type, flags.env_name, env_args={ 'episode_schedule': flags.split, 'log_action_trace': flags.log_action_trace, 'max_states_per_scene': flags.episodes_per_scene, 'episodes_per_scene_test': flags.episodes_per_scene }) print('\n======\nENV in Evaluate::ctor') print(self.environment) print(self.global_network) print('val_replay!!! {}'.format(flags.use_value_replay)) print(flags.split) print('=======\n') sleep(10) self.episode_reward = 0
def __init__(self, thread_index): Environment.__init__(self) self.thread_index = thread_index self.max_step = 100 self.control_points_per_step = 5 self.mean_seconds_per_step = 0.1 # in average, a step every n seconds self.horizon_distance = 1 # meters self.max_distance_to_path = 0.1 # meters # obstacles related stuff self.max_obstacle_count = 3 self.min_obstacle_radius = 0.15 # meters self.max_obstacle_radius = 0.45 # meters # information about speed parameters: http://www.ijtte.com/uploads/2012-10-01/5ebd8343-9b9c-b1d4IJTTE%20vol2%20no3%20%287%29.pdf self.min_speed = 0.1 # m/s self.max_speed = 1.4 # m/s self.speed_lower_limit = 0.7 # m/s # used together with max_speed to get the random speed upper limit self.max_speed_noise = 0.25 # m/s # the fastest car has max_acceleration 9.25 m/s (https://en.wikipedia.org/wiki/List_of_fastest_production_cars_by_acceleration) # the slowest car has max_acceleration 0.7 m/s (http://automdb.com/max_acceleration) self.max_acceleration = 0.7 # m/s self.max_steering_degree = 30 self.max_steering_noise_degree = 2 self.max_steering_angle = convert_degree_to_radiant( self.max_steering_degree) self.max_steering_noise_angle = convert_degree_to_radiant( self.max_steering_noise_degree) # splines related stuff self.spline_number = 2 self.control_points_per_spline = 50 # evaluator stuff self.episodes = deque() # shapes self.state_shape = self.get_state_shape() self.action_shape = self.get_action_shape()
def __init__(self, thread_index, global_network, initial_learning_rate, learning_rate_input, grad_applier, env_type, env_name, use_lstm, use_pixel_change, use_value_replay, use_reward_prediction, pixel_change_lambda, entropy_beta, local_t_max, gamma, gamma_pc, experience_history_size, max_global_time_step, device): self.thread_index = thread_index self.learning_rate_input = learning_rate_input self.env_type = env_type self.env_name = env_name self.use_lstm = use_lstm self.use_pixel_change = use_pixel_change self.use_value_replay = use_value_replay self.use_reward_prediction = use_reward_prediction self.local_t_max = local_t_max self.gamma = gamma self.gamma_pc = gamma_pc self.experience_history_size = experience_history_size self.max_global_time_step = max_global_time_step self.action_size = Environment.get_action_size(env_type, env_name) self.objective_size = Environment.get_objective_size(env_type, env_name) self.local_network = UnrealModel(self.action_size, self.objective_size, thread_index, use_lstm, use_pixel_change, use_value_replay, use_reward_prediction, pixel_change_lambda, entropy_beta, device) self.local_network.prepare_loss() self.apply_gradients = grad_applier.minimize_local(self.local_network.total_loss, global_network.get_vars(), self.local_network.get_vars()) self.sync = self.local_network.sync_from(global_network) self.experience = Experience(self.experience_history_size) self.local_t = 0 self.initial_learning_rate = initial_learning_rate self.episode_reward = 0 # For log output self.prev_local_t = 0
def __init__(self, thread_index, global_network, initial_learning_rate, learning_rate_input, grad_applier, max_global_time_step, device): self.thread_index = thread_index self.learning_rate_input = learning_rate_input self.max_global_time_step = max_global_time_step self.action_size = Environment.get_action_size() self.local_network = UnrealModel(self.action_size, thread_index, device) self.local_network.prepare_loss() self.apply_gradients = grad_applier.minimize_local(self.local_network.total_loss, global_network.get_vars(), self.local_network.get_vars()) self.sync = self.local_network.sync_from(global_network) self.environment = Environment.create_environment() self.experience = Experience(EXPERIENCE_HISTORY_SIZE) self.local_t = 0 self.initial_learning_rate = initial_learning_rate self.episode_reward = 0 # For log output self.prev_local_t = 0
def simulate(n_trials, update_delay, dummyAgents, start, destination, learningAgentType, alpha, gamma, epsilon, experiment): # Set up environment and agent e = Environment(dummyAgent=dummyAgents, start=start, destination=destination ) # create environment (also adds some dummy traffic) if learningAgentType == "RandomActionAgent": print "" a = e.create_agent(RandomActionAgent) # create agent elif learningAgentType == "LearningAgent1": print "" a = e.create_agent(LearningAgent1) # create agent a.setQtable(alpha=alpha, gamma=gamma, epsilon=epsilon) elif learningAgentType == "LearningAgent2": print "" a = e.create_agent(LearningAgent2) # create agent a.setQtable(alpha=alpha, gamma=gamma, epsilon=epsilon) e.set_primary_agent(a, enforce_deadline=True) # set agent to track runTimeStat = RunTimeStat(learningAgentType=learningAgentType, n_trials=n_trials, alpha=alpha, gamma=gamma, epsilon=epsilon) e.setRunTimeStat(runTimeStat) # Now simulate it sim = Simulator(e, update_delay=update_delay ) # reduce update_delay to speed up simulation sim.run(n_trials=n_trials) # run for a specified number of trials # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line runtimeStatFile = "logs/" + experiment + "runTimeStat.json" mergedRunTimeStat = dict() if os.path.isfile(runtimeStatFile): with open(runtimeStatFile) as f: oldRunTimeStat = json.load(f) mergedRunTimeStat = oldRunTimeStat.copy() runTimeStat = e.getRunTimeStat().getStat() print "---------------------" print "Runstat of current run" print json.dumps(runTimeStat) mergedRunTimeStat.update(runTimeStat) with open(runtimeStatFile, 'w') as f: json.dump(mergedRunTimeStat, f) #with open(runtimeStatFile) as f: #print json.load(f) print "GREAT"
def __init__(self, thread_index, global_network, initial_learning_rate, env_args, use_pixel_change, use_value_replay, use_reward_prediction, pixel_change_lambda, entropy_beta, local_t_max, gamma, gamma_pc, experience_history_size, max_global_time_step, spatial_dim, optimizor): self.thread_index = thread_index self.env_args = env_args self.use_pixel_change = use_pixel_change self.use_value_replay = use_value_replay self.use_reward_prediction = use_reward_prediction self.local_t_max = local_t_max self.gamma = gamma self.gamma_pc = gamma_pc self.experience_history_size = experience_history_size self.max_global_time_step = max_global_time_step self.action_size = Environment.get_action_size() self.local_network = Agent(thread_index, use_pixel_change, use_value_replay, use_reward_prediction, pixel_change_lambda, entropy_beta) self.global_network = global_network self.experience = Experience(self.experience_history_size) self.local_t = 0 self.initial_learning_rate = initial_learning_rate self.episode_reward = 0 self.spatial_dim = spatial_dim self.obs_processer = ObsProcesser() self.action_processer = ActionProcesser(dim=spatial_dim) self.optimizor = optimizor self.distribution = th.distributions.Categorical # For log output self.prev_local_t = 0 self.environment = Environment.create_environment(self.env_args)
def __init__(self): self.action_size = Environment.get_action_size(flags.env_type, flags.env_name) self.objective_size = Environment.get_objective_size( flags.env_type, flags.env_name) self.global_network = UnrealModel(self.action_size, self.objective_size, -1, flags.use_lstm, flags.use_pixel_change, flags.use_value_replay, flags.use_reward_prediction, 0.0, 0.0, "/cpu:0", for_display=True) self.environment = Environment.create_environment( flags.env_type, flags.env_name, env_args={ 'episode_schedule': flags.split, 'log_action_trace': flags.log_action_trace, 'seed': flags.seed, # 'max_states_per_scene': flags.episodes_per_scene, 'episodes_per_scene_test': flags.episodes_per_scene }) self.episode_reward = 0 self.cnt_success = 0
def __init__(self, display_size): pygame.init() self.surface = pygame.display.set_mode(display_size, 0, 24) pygame.display.set_caption('UNREAL') self.action_size = Environment.get_action_size(flags.env_type, flags.env_name) self.objective_size = Environment.get_objective_size( flags.env_type, flags.env_name) self.global_network = UnrealModel(self.action_size, self.objective_size, -1, flags.use_lstm, flags.use_pixel_change, flags.use_value_replay, flags.use_reward_prediction, 0.0, 0.0, "/cpu:0", for_display=True) self.environment = Environment.create_environment( flags.env_type, flags.env_name, env_args={ 'episode_schedule': flags.split, 'log_action_trace': flags.log_action_trace, 'max_states_per_scene': flags.episodes_per_scene, 'episodes_per_scene_test': flags.episodes_per_scene }) self.font = pygame.font.SysFont(None, 20) self.value_history = ValueHistory() self.state_history = StateHistory() self.episode_reward = 0
def main(): seed = np.random.randint(1000) n_positions = 11 n_prices = 11 n_firms = 2 firms_positions = np.random.randint(1, n_positions + 1, size=n_firms) firms_prices = np.random.randint(1, n_prices + 1, size=n_firms) transportation_cost = 0.2 firm_alpha = 0.01 firm_temp = 0.02 firm_momentum = 0.0 # Only NN firm_neural_network = "MLP" # Only NN customer_alpha = 0.01 customer_temp = 0.02 customer_momentum = 0.0 # Only NN customer_neural_network = "MLP" # Only NN t_max = 10**3 firm = "StrategicNeuralNetwork" customer = "Customer" parameters = { "seed": seed, "firm": firm, "customer": customer, "n_positions": n_positions, "n_prices": n_prices, "firms_positions": firms_positions, # Initial positions "firms_prices": firms_prices, # Initial prices "transportation_cost": transportation_cost, "firm_temp": firm_temp, "firm_alpha": firm_alpha, "firm_momentum": firm_momentum, "firm_neural_network": firm_neural_network, # Useful for NN "customer_alpha": customer_alpha, "customer_temp": customer_temp, "customer_momentum": customer_momentum, "customer_neural_network": customer_neural_network, "t_max": t_max } env = Environment(**parameters) results = env.run() fig_producer = FigureProducer( results=results, parameters=parameters, root_folder=path.expanduser("~/Desktop/HotellingExample")) fig_producer.run(customers_choices_plot_period=50, other_plots_period=10000)
def __init__(self, fps=60): self._running = True self._pause = False self.size = self.width, self.height = 1920, 1020 self.env = Environment((1700, 1020)) self.info_panel_size = (self.width - self.env.width, self.height) self.info_panel_pos = (self.env.width, 0) self.fps = fps self.iters_per_second = 0
def call(self, *args): from interpreter.execute_stmt import execute_stmt env = Environment() env.outer_env = self.closure for param_position, param in enumerate(self.declaration.params): env.define(param.lexeme, args[param_position]) try: execute_stmt(self.declaration.body, env) except LoxReturn as ret: return ret.ret_val return None
def __init__(self): self.img = np.zeros(shape=(HEIGHT, WIDTH, 3), dtype=np.uint8) self.action_size = Environment.get_action_size() self.global_network = UnrealModel(self.action_size, -1, "/cpu:0", for_display=True) self.env = Environment.create_environment() self.value_history = ValueHistory() self.state_history = StateHistory() self.ep_reward = 0 self.mazemap = MazeMap()
def __init__(self, display_size): pygame.init() self.surface = pygame.display.set_mode(display_size, 0, 24) name = 'UNREAL' if flags.segnet == 0 else "A3C ErfNet" pygame.display.set_caption(name) env_config = sim_config.get(flags.env_name) self.image_shape = [ env_config.get('height', 88), env_config.get('width', 88) ] segnet_param_dict = {'segnet_mode': flags.segnet} is_training = tf.placeholder(tf.bool, name="training") map_file = env_config.get('objecttypes_file', '../../objectTypes.csv') self.label_mapping = pd.read_csv(map_file, sep=',', header=0) self.get_col_index() self.action_size = Environment.get_action_size(flags.env_type, flags.env_name) self.objective_size = Environment.get_objective_size( flags.env_type, flags.env_name) self.global_network = UnrealModel(self.action_size, self.objective_size, -1, flags.use_lstm, flags.use_pixel_change, flags.use_value_replay, flags.use_reward_prediction, 0.0, 0.0, "/gpu:0", segnet_param_dict=segnet_param_dict, image_shape=self.image_shape, is_training=is_training, n_classes=flags.n_classes, segnet_lambda=flags.segnet_lambda, dropout=flags.dropout, for_display=True) self.environment = Environment.create_environment( flags.env_type, flags.env_name, flags.termination_time_sec, env_args={ 'episode_schedule': flags.split, 'log_action_trace': flags.log_action_trace, 'max_states_per_scene': flags.episodes_per_scene, 'episodes_per_scene_test': flags.episodes_per_scene }) self.font = pygame.font.SysFont(None, 20) self.value_history = ValueHistory() self.state_history = StateHistory() self.episode_reward = 0
def setUp(self): json_data = '{"observations": {"screen_features": ["height_map", "player_id", "player_relative", "unit_type"], ' \ '"minimap_features": ["player_id", "selected"], "nonspatial_features": ["player", "score_cumulative"], ' \ '"action_ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]}, "rewards": [1, 1, 1, 1]}' config = json.loads(json_data) self._obs_mod = ObservationModifier(config["observations"], 32) #self.old_obs = [None] #self._obs_spec = {} #self._builder = dummy_observation.Builder(self._obs_spec) #self.obs = self._builder.build() self.env = Environment() #self.obs = observations = [None] * 16 self.obs = self.env.reset()
def setUp(self): self.json_data = '{"observations": {"screen_features": ["height_map", "player_id", "player_relative", "unit_type"], ' \ '"minimap_features": ["player_id", "selected"], "nonspatial_features": ["player", "score_cumulative"], ' \ '"action_ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]}, "rewards": [1, 1, 1, 1]}' self.config = json.loads(self.json_data) self.sess = tf.Session() self.agent_modifier = AgentModifier(self.config, 32) self.agent = A2CAgent(self.sess, self.agent_modifier) # self.obs_spec = {} # self._builder = dummy_observation.Builder(self._obs_spec) # self.obs = self._builder.build().observation self.env = Environment() self.obs = self.env.reset()
def __init__(self, runner, global_network, initial_learning_rate, learning_rate_input, grad_applier, env_type, env_name, entropy_beta, gamma, experience, max_global_time_step, device, value_lambda): self.runner = runner self.learning_rate_input = learning_rate_input self.env_type = env_type self.env_name = env_name self.gamma = gamma self.max_global_time_step = max_global_time_step self.action_size = Environment.get_action_size(env_type, env_name) self.obs_size = Environment.get_obs_size(env_type, env_name) self.global_network = global_network self.local_network = UnrealModel(self.action_size, self.obs_size, 1, entropy_beta, device, value_lambda=value_lambda) self.local_network.prepare_loss() self.apply_gradients = grad_applier.minimize_local(self.local_network.total_loss, self.global_network.get_vars(), self.local_network.get_vars()) self.sync = self.local_network.sync_from(self.global_network, name="base_trainer") self.experience = experience self.local_t = 0 self.next_log_t = 0 self.next_performance_t = PERFORMANCE_LOG_INTERVAL self.initial_learning_rate = initial_learning_rate self.episode_reward = 0 # trackers for the experience replay creation self.last_state = None self.last_action = 0 self.last_reward = 0 self.ep_ploss = 0. self.ep_vloss = 0. self.ep_entr = [] self.ep_grad = [] self.ep_l = 0
def findNextEnv(prereqs, additions, deletions, relations): for prereq in prereqs: if not prereq in relations: return None newRelations = relations[:] for deletion in deletions: newRelations.remove(deletion) for addition in additions: newRelations.append(addition) newEnv = Environment() newEnv.relations = newRelations return newEnv
def __init__(self, game_model, fps, pixel_size, screen_width, screen_height, navigation_bar_height): self.model = game_model self.stats = self.model.stats() self.fps = fps self.pixel_size = pixel_size self.navigation_bar_height = navigation_bar_height self.screen = pygame.display.set_mode((screen_width, screen_height), 0, Constants.SCREEN_DEPTH) self.surface = pygame.Surface(self.screen.get_size()) self.horizontal_pixels = screen_width / pixel_size self.vertical_pixels = (screen_height - navigation_bar_height) / pixel_size self.environment = Environment(width=self.horizontal_pixels, height=self.vertical_pixels) self.wall = WallScreenObject(self) self.wall.points = list( map(lambda x: self._screen_normalized_point(x), self.environment.set_wall())) self.screen_objects.append(self.wall) self.fruit = FruitScreenObject(self) self.fruit.points = list( map(lambda x: self._screen_normalized_point(x), self.environment.set_fruit())) self.screen_objects.append(self.fruit) self.snake = SnakeScreenObject(self) self.snake.points = list( map(lambda x: self._screen_normalized_point(x), self.environment.set_snake())) self.screen_objects.append(self.snake) while True: self._handle_user_input() pygame.time.Clock().tick(fps) self.environment.eat_fruit_if_possible() ai_action = self.model.move(self.environment) if not self.environment.step(ai_action): self.model.reset() self.model.log_score(self.environment.reward()) self.stats = self.model.stats() self.environment.set_snake() self._sync_screen_with_environment() self._draw_screen() self._display()
def __init__(self, args, display_size, saver): pygame.init() self.args = args self.surface = pygame.display.set_mode(display_size, 0, 24) pygame.display.set_caption('UNREAL') args.action_size = Environment.get_action_size(args.env_name) self.global_network = Agent(1, args) saver.restore(self.global_network) self.global_network.eval() self.environment = Environment.create_environment(args.env_name) self.font = pygame.font.SysFont(None, 20) self.value_history = ValueHistory() self.state_history = StateHistory() self.distribution = torch.distributions.Categorical self.episode_reward = 0
def __init__(self, env_name, process_idx): Environment.__init__(self) self.last_state = [] self.last_action = [] self.last_reward = [] self.env = env_vrep.Simu_env(20000 + process_idx) self.env.connect_vrep() # self.conn, child_conn = Pipe() # self.proc = Process(target=worker, args=(child_conn, env_name, process_idx)) # self.proc.start() # self.conn.recv() self.reset()
def test_step(self): environment = Environment.create_environment() action_size = Environment.get_action_size() if sys.platform == 'darwin': self.assertTrue(action_size == 6) else: self.assertTrue(action_size == 8) for i in range(3): self.assertTrue(environment.last_observation.shape == (84, 84)) if SAVE_IMAGE: scipy.misc.imsave("debug_observation{0}.png".format(i), environment.last_observation) reward, terminal = environment.step(0)
def main(args): action_size = Environment.get_action_size(flags.env_type, flags.env_name) objective_size = Environment.get_objective_size(flags.env_type, flags.env_name) global_network = UnrealModel(action_size, objective_size, -1, flags.use_lstm, flags.use_pixel_change, flags.use_value_replay, flags.use_reward_prediction, 0.0, 0.0, "/cpu:0") # use CPU for weight visualize tool sess = tf.Session() init = tf.global_variables_initializer() sess.run(init) saver = tf.train.Saver() checkpoint = tf.train.get_checkpoint_state(flags.checkpoint_dir) if checkpoint and checkpoint.model_checkpoint_path: saver.restore(sess, checkpoint.model_checkpoint_path) print("checkpoint loaded:", checkpoint.model_checkpoint_path) else: print("Could not find old checkpoint") vars = {} var_list = global_network.get_vars() for v in var_list: vars[v.name] = v W_conv1 = sess.run(vars['net_-1/base_conv/W_base_conv1:0']) # show graph of W_conv1 fig, axes = plt.subplots(3, 16, figsize=(12, 6), subplot_kw={ 'xticks': [], 'yticks': [] }) fig.subplots_adjust(hspace=0.1, wspace=0.1) for ax, i in zip(axes.flat, range(3 * 16)): inch = i // 16 outch = i % 16 img = W_conv1[:, :, inch, outch] ax.imshow(img, cmap=plt.cm.gray, interpolation='nearest') ax.set_title(str(inch) + "," + str(outch)) plt.show()
def __init__(self): self.action_size = Environment.get_action_size(flags.env_type, flags.env_name) self.objective_size = Environment.get_objective_size(flags.env_type, flags.env_name) env_config = sim_config.get(flags.env_name) self.image_shape = [env_config['height'], env_config['width']] segnet_param_dict = {'segnet_mode': flags.segnet} is_training = tf.placeholder(tf.bool, name="training") # for display param in UnrealModel says its value self.global_network = UnrealModel(self.action_size, self.objective_size, -1, flags.use_lstm, flags.use_pixel_change, flags.use_value_replay, flags.use_reward_prediction, 0.0, #flags.pixel_change_lambda 0.0, #flags.entropy_beta device, segnet_param_dict=segnet_param_dict, image_shape=self.image_shape, is_training=is_training, n_classes=flags.n_classes, segnet_lambda=flags.segnet_lambda, dropout=flags.dropout, for_display=True) self.environment = Environment.create_environment(flags.env_type, flags.env_name, flags.termination_time_sec, env_args={'episode_schedule': flags.split, 'log_action_trace': flags.log_action_trace, 'max_states_per_scene': flags.episodes_per_scene, 'episodes_per_scene_test': flags.episodes_per_scene}) self.global_network.prepare_loss() self.total_loss = [] self.segm_loss = [] self.episode_reward = [0] self.episode_roomtype = [] self.roomType_dict = {} self.segnet_class_dict = {} self.success_rate = [] self.batch_size = 20 self.batch_cur_num = 0 self.batch_prev_num = 0 self.batch_si = [] self.batch_sobjT = [] self.batch_a = [] self.batch_reward = []
def main(): json_data = '{"observations": {"screen_features": ["height_map", "player_id", "player_relative", "unit_type"], ' \ '"minimap_features": ["player_id", "selected"], "nonspatial_features": ["player"], ' \ '"action_ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]}, "rewards": [1, 1, 1, 1]}' config = json.loads(json_data) print("config:") print(config) print("config.observations:") print(config["observations"]) print("config.onservations.action_ids:") print(config["observations"]["action_ids"]) print("config.rewards:") print(config["rewards"]) print("Commencing magic...") sess = tf.Session() env = Environment() #test_env(env, config) agent_modifier = AgentModifier(config, 32) agent = A2CAgent(sess, agent_modifier) #agent = RandomAgent() ##for debugging TODO: delete later runner = A2CRunner(agent, env) runner.begin()
def vvo_after_rl(): #dataset contains random power injection of nodes df = load_dataset() df_train, df_test = split_dataset(df, 998) print('=====================vvo_brute_force=====================') vvo_brute_force(df_test) objectives = [ObjFuncType.ACTIVE_POWER_LOSSES] network_manager = ODSSNetworkManagement() power_flow = ODSSPowerFlow() #environment should'n have the entire dataset as an input parameter, but train and test methods environment = Environment(network_manager) print('=====================agent=====================') agent = DeepQLearningAgent(environment) n_episodes = 1000 print('agent training started') t1 = time.time() #agent.train(df_train, n_episodes) t2 = time.time() print ('agent training finished in', t2-t1) agent.test(df_test) network_manager.print_all_capacitor_statuses() objective_functions = ObjectiveFunctions(objectives, power_flow) print('=====================vvo=====================') vvo = VVO(network_manager, power_flow, objective_functions) vvo.test(df_test, resetCapacitorStatuses = False) network_manager.print_all_capacitor_statuses()
def __init__(self, display_size): pygame.init() self.surface = pygame.display.set_mode(display_size, 0, 24) pygame.display.set_caption('UNREAL') self.action_size = Environment.get_action_size() self.global_network = UnrealModel(self.action_size, -1, "/cpu:0", for_display=True) self.environment = Environment.create_environment() self.font = pygame.font.SysFont(None, 20) self.value_history = ValueHistory() self.state_history = StateHistory() self.episode_reward = 0
def __init__(self, model_size, group_id, environment_id=0, training=True): self.model_size = model_size self._training = training self.environment_id = environment_id self.group_id = group_id # Build environment self.environment = Environment.create_environment(flags.env_type, self.environment_id, self._training) self.extrinsic_reward_manipulator = eval(flags.extrinsic_reward_manipulator) self.terminal = True self._composite_batch = CompositeBatch(maxlen=flags.replay_buffer_size if flags.replay_mean > 0 else 1) # Statistics self.__client_statistics = Statistics(flags.episode_count_for_evaluation) if self._training: #logs if not os.path.isdir(flags.log_dir + "/performance"): os.mkdir(flags.log_dir + "/performance") if not os.path.isdir(flags.log_dir + "/episodes"): os.mkdir(flags.log_dir + "/episodes") formatter = logging.Formatter('%(asctime)s %(message)s') # reward logger self.__reward_logger = logging.getLogger('reward_{}_{}'.format(self.group_id, self.environment_id)) hdlr = logging.FileHandler(flags.log_dir + '/performance/reward_{}_{}.log'.format(self.group_id, self.environment_id)) hdlr.setFormatter(formatter) self.__reward_logger.addHandler(hdlr) self.__reward_logger.setLevel(logging.DEBUG) self.__max_reward = float("-inf")
def prepare(self): if self.running: self.environment = Environment.create_environment( self.maze_size, self.level_seed) print('Started trainer ', self.thread_index) self.apply_next_location_loss = 0.0 sys.stdout.flush()
def check_environment(self, env_type, env_name): env = Environment.create_environment(env_type, env_name, 0) action_size = Environment.get_action_size(env_type, env_name) for i in range(3): state, reward, terminal = env.process(0) print(state) print(reward) print(terminal) # # Check shape # self.assertTrue(state.shape == (84, 84, 3)) # # state and pixel_change value range should be [0,1] # self.assertTrue(np.amax(state) <= 1.0) env.stop()
from executers.twoDimExecuter import TwoDimExecuter from executers.oneDimExecuter import OneDimExecuter from heuristics.sort_heuristic import SortHeuristic from heuristics.heap_heuristic import HeapHeuristic from environment.environment import Environment if __name__ == '__main__': exec_env = Environment() exec_env.evaluate('env.in')