예제 #1
0
class AgentTest(unittest.TestCase):
    def setUp(self):
        self.json_data = '{"observations": {"screen_features": ["height_map", "player_id", "player_relative", "unit_type"], ' \
                    '"minimap_features": ["player_id", "selected"], "nonspatial_features": ["player", "score_cumulative"], ' \
                    '"action_ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]}, "rewards": [1, 1, 1, 1]}'
        self.config = json.loads(self.json_data)
        self.sess = tf.Session()
        self.agent_modifier = AgentModifier(self.config, 32)
        self.agent = A2CAgent(self.sess, self.agent_modifier)
        # self.obs_spec = {}
        # self._builder = dummy_observation.Builder(self._obs_spec)
        # self.obs = self._builder.build().observation
        self.env = Environment()
        self.obs = self.env.reset()

    def testMakeAction(self):
        print("Testing Make Action")
        action = self.agent.act(self.obs)
        action_made_1 = self.agent.convert_actions(action)
        action_2 = self.agent.act(self.obs)
        self.obs = self.env.reset()
        action_made_2 = self.agent.convert_actions(action_2)
        self.assertNotEqual(action_made_1, action_made_2)

    def testGetObservationFeed(self):
        print("Testing Get Observation Feed")
        feed_dict = self.agent._get_observation_feed(self.obs)
        self.obs = self.env.reset()
        feed_dict_2 = self.agent._get_observation_feed(self.obs)
        self.assertNotEqual(feed_dict, feed_dict_2)
예제 #2
0
def run(args):

    param, clone_id = args[0],  args[1]

    param["transportation_cost"] = np.random.choice(param["range_transportation_cost"])

    param["customer_alpha"] = np.random.uniform(*param["range_customer_alpha"])
    param["customer_temp"] = np.random.uniform(*param["range_customer_temp"])

    param["firm_alpha"] = np.random.uniform(*param["range_firm_alpha"])
    param["firm_temp"] = np.random.uniform(*param["range_firm_temp"])

    # param["utility_consumption"] = np.random.uniform(*param["range_utility_consumption"])

    param["firm_positions"] = np.random.randint(1, param["n_positions"] + 1, size=param["n_firms"])
    param["firm_prices"] = np.random.randint(1, param["n_prices"] + 1, size=param["n_firms"])

    param["seed"] = np.random.randint(2 ** 32)

    job_id = param["job_id"]

    label = "J{}C{}".format(job_id, clone_id)

    env = Environment(**param)
    results = env.run()

    Backup(data=results, name="results", root_folder=cl_parameters["working_folder"], label=label)
    Backup(data=param, name="parameters", root_folder=cl_parameters["working_folder"], label=label)
예제 #3
0
 def __init__(self):
     self.action_size = Environment.get_action_size(flags.env_type,
                                                    flags.env_name)
     self.objective_size = Environment.get_objective_size(
         flags.env_type, flags.env_name)
     print('flags:use_pixel_change {}'.format(flags.use_pixel_change))
     sleep(10)
     self.global_network = UnrealModel(self.action_size,
                                       self.objective_size,
                                       -1,
                                       flags.use_lstm,
                                       flags.use_pixel_change,
                                       flags.use_value_replay,
                                       flags.use_reward_prediction,
                                       0.0,
                                       0.0,
                                       "/cpu:0",
                                       for_display=True)
     self.environment = Environment.create_environment(
         flags.env_type,
         flags.env_name,
         env_args={
             'episode_schedule': flags.split,
             'log_action_trace': flags.log_action_trace,
             'max_states_per_scene': flags.episodes_per_scene,
             'episodes_per_scene_test': flags.episodes_per_scene
         })
     print('\n======\nENV in Evaluate::ctor')
     print(self.environment)
     print(self.global_network)
     print('val_replay!!! {}'.format(flags.use_value_replay))
     print(flags.split)
     print('=======\n')
     sleep(10)
     self.episode_reward = 0
예제 #4
0
 def __init__(self, thread_index):
     Environment.__init__(self)
     self.thread_index = thread_index
     self.max_step = 100
     self.control_points_per_step = 5
     self.mean_seconds_per_step = 0.1  # in average, a step every n seconds
     self.horizon_distance = 1  # meters
     self.max_distance_to_path = 0.1  # meters
     # obstacles related stuff
     self.max_obstacle_count = 3
     self.min_obstacle_radius = 0.15  # meters
     self.max_obstacle_radius = 0.45  # meters
     # information about speed parameters: http://www.ijtte.com/uploads/2012-10-01/5ebd8343-9b9c-b1d4IJTTE%20vol2%20no3%20%287%29.pdf
     self.min_speed = 0.1  # m/s
     self.max_speed = 1.4  # m/s
     self.speed_lower_limit = 0.7  # m/s # used together with max_speed to get the random speed upper limit
     self.max_speed_noise = 0.25  # m/s
     # the fastest car has max_acceleration 9.25 m/s (https://en.wikipedia.org/wiki/List_of_fastest_production_cars_by_acceleration)
     # the slowest car has max_acceleration 0.7 m/s (http://automdb.com/max_acceleration)
     self.max_acceleration = 0.7  # m/s
     self.max_steering_degree = 30
     self.max_steering_noise_degree = 2
     self.max_steering_angle = convert_degree_to_radiant(
         self.max_steering_degree)
     self.max_steering_noise_angle = convert_degree_to_radiant(
         self.max_steering_noise_degree)
     # splines related stuff
     self.spline_number = 2
     self.control_points_per_spline = 50
     # evaluator stuff
     self.episodes = deque()
     # shapes
     self.state_shape = self.get_state_shape()
     self.action_shape = self.get_action_shape()
예제 #5
0
  def __init__(self,
               thread_index,
               global_network,
               initial_learning_rate,
               learning_rate_input,
               grad_applier,
               env_type,
               env_name,
               use_lstm,
               use_pixel_change,
               use_value_replay,
               use_reward_prediction,
               pixel_change_lambda,
               entropy_beta,
               local_t_max,
               gamma,
               gamma_pc,
               experience_history_size,
               max_global_time_step,
               device):

    self.thread_index = thread_index
    self.learning_rate_input = learning_rate_input
    self.env_type = env_type
    self.env_name = env_name
    self.use_lstm = use_lstm
    self.use_pixel_change = use_pixel_change
    self.use_value_replay = use_value_replay
    self.use_reward_prediction = use_reward_prediction
    self.local_t_max = local_t_max
    self.gamma = gamma
    self.gamma_pc = gamma_pc
    self.experience_history_size = experience_history_size
    self.max_global_time_step = max_global_time_step
    self.action_size = Environment.get_action_size(env_type, env_name)
    self.objective_size = Environment.get_objective_size(env_type, env_name)
    
    self.local_network = UnrealModel(self.action_size,
                                     self.objective_size,
                                     thread_index,
                                     use_lstm,
                                     use_pixel_change,
                                     use_value_replay,
                                     use_reward_prediction,
                                     pixel_change_lambda,
                                     entropy_beta,
                                     device)
    self.local_network.prepare_loss()

    self.apply_gradients = grad_applier.minimize_local(self.local_network.total_loss,
                                                       global_network.get_vars(),
                                                       self.local_network.get_vars())
    
    self.sync = self.local_network.sync_from(global_network)
    self.experience = Experience(self.experience_history_size)
    self.local_t = 0
    self.initial_learning_rate = initial_learning_rate
    self.episode_reward = 0
    # For log output
    self.prev_local_t = 0
예제 #6
0
파일: trainer.py 프로젝트: kalaidin/unreal
  def __init__(self,
               thread_index,
               global_network,
               initial_learning_rate,
               learning_rate_input,
               grad_applier,
               max_global_time_step,
               device):

    self.thread_index = thread_index
    self.learning_rate_input = learning_rate_input
    self.max_global_time_step = max_global_time_step

    self.action_size = Environment.get_action_size()
    self.local_network = UnrealModel(self.action_size, thread_index, device)
    self.local_network.prepare_loss()

    self.apply_gradients = grad_applier.minimize_local(self.local_network.total_loss,
                                                       global_network.get_vars(),
                                                       self.local_network.get_vars())
    
    self.sync = self.local_network.sync_from(global_network)
    self.environment = Environment.create_environment()
    self.experience = Experience(EXPERIENCE_HISTORY_SIZE)
    self.local_t = 0
    self.initial_learning_rate = initial_learning_rate
    self.episode_reward = 0
    # For log output
    self.prev_local_t = 0
예제 #7
0
def simulate(n_trials, update_delay, dummyAgents, start, destination,
             learningAgentType, alpha, gamma, epsilon, experiment):

    # Set up environment and agent
    e = Environment(dummyAgent=dummyAgents,
                    start=start,
                    destination=destination
                    )  # create environment (also adds some dummy traffic)

    if learningAgentType == "RandomActionAgent":
        print ""
        a = e.create_agent(RandomActionAgent)  # create agent

    elif learningAgentType == "LearningAgent1":
        print ""
        a = e.create_agent(LearningAgent1)  # create agent
        a.setQtable(alpha=alpha, gamma=gamma, epsilon=epsilon)

    elif learningAgentType == "LearningAgent2":
        print ""
        a = e.create_agent(LearningAgent2)  # create agent
        a.setQtable(alpha=alpha, gamma=gamma, epsilon=epsilon)

    e.set_primary_agent(a, enforce_deadline=True)  # set agent to track
    runTimeStat = RunTimeStat(learningAgentType=learningAgentType,
                              n_trials=n_trials,
                              alpha=alpha,
                              gamma=gamma,
                              epsilon=epsilon)
    e.setRunTimeStat(runTimeStat)

    # Now simulate it
    sim = Simulator(e, update_delay=update_delay
                    )  # reduce update_delay to speed up simulation

    sim.run(n_trials=n_trials)  # run for a specified number of trials
    # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line

    runtimeStatFile = "logs/" + experiment + "runTimeStat.json"

    mergedRunTimeStat = dict()
    if os.path.isfile(runtimeStatFile):
        with open(runtimeStatFile) as f:
            oldRunTimeStat = json.load(f)
            mergedRunTimeStat = oldRunTimeStat.copy()

    runTimeStat = e.getRunTimeStat().getStat()
    print "---------------------"
    print "Runstat of current run"
    print json.dumps(runTimeStat)

    mergedRunTimeStat.update(runTimeStat)

    with open(runtimeStatFile, 'w') as f:
        json.dump(mergedRunTimeStat, f)

    #with open(runtimeStatFile) as f:
    #print json.load(f)

    print "GREAT"
예제 #8
0
    def __init__(self, thread_index, global_network, initial_learning_rate,
                 env_args, use_pixel_change, use_value_replay,
                 use_reward_prediction, pixel_change_lambda, entropy_beta,
                 local_t_max, gamma, gamma_pc, experience_history_size,
                 max_global_time_step, spatial_dim, optimizor):

        self.thread_index = thread_index
        self.env_args = env_args
        self.use_pixel_change = use_pixel_change
        self.use_value_replay = use_value_replay
        self.use_reward_prediction = use_reward_prediction
        self.local_t_max = local_t_max
        self.gamma = gamma
        self.gamma_pc = gamma_pc
        self.experience_history_size = experience_history_size
        self.max_global_time_step = max_global_time_step
        self.action_size = Environment.get_action_size()
        self.local_network = Agent(thread_index, use_pixel_change,
                                   use_value_replay, use_reward_prediction,
                                   pixel_change_lambda, entropy_beta)

        self.global_network = global_network
        self.experience = Experience(self.experience_history_size)
        self.local_t = 0
        self.initial_learning_rate = initial_learning_rate
        self.episode_reward = 0
        self.spatial_dim = spatial_dim
        self.obs_processer = ObsProcesser()
        self.action_processer = ActionProcesser(dim=spatial_dim)
        self.optimizor = optimizor
        self.distribution = th.distributions.Categorical
        # For log output
        self.prev_local_t = 0
        self.environment = Environment.create_environment(self.env_args)
예제 #9
0
 def __init__(self):
     self.action_size = Environment.get_action_size(flags.env_type,
                                                    flags.env_name)
     self.objective_size = Environment.get_objective_size(
         flags.env_type, flags.env_name)
     self.global_network = UnrealModel(self.action_size,
                                       self.objective_size,
                                       -1,
                                       flags.use_lstm,
                                       flags.use_pixel_change,
                                       flags.use_value_replay,
                                       flags.use_reward_prediction,
                                       0.0,
                                       0.0,
                                       "/cpu:0",
                                       for_display=True)
     self.environment = Environment.create_environment(
         flags.env_type,
         flags.env_name,
         env_args={
             'episode_schedule': flags.split,
             'log_action_trace': flags.log_action_trace,
             'seed': flags.seed,
             # 'max_states_per_scene': flags.episodes_per_scene,
             'episodes_per_scene_test': flags.episodes_per_scene
         })
     self.episode_reward = 0
     self.cnt_success = 0
예제 #10
0
파일: display.py 프로젝트: mcimpoi/unreal
    def __init__(self, display_size):
        pygame.init()

        self.surface = pygame.display.set_mode(display_size, 0, 24)
        pygame.display.set_caption('UNREAL')

        self.action_size = Environment.get_action_size(flags.env_type,
                                                       flags.env_name)
        self.objective_size = Environment.get_objective_size(
            flags.env_type, flags.env_name)
        self.global_network = UnrealModel(self.action_size,
                                          self.objective_size,
                                          -1,
                                          flags.use_lstm,
                                          flags.use_pixel_change,
                                          flags.use_value_replay,
                                          flags.use_reward_prediction,
                                          0.0,
                                          0.0,
                                          "/cpu:0",
                                          for_display=True)
        self.environment = Environment.create_environment(
            flags.env_type,
            flags.env_name,
            env_args={
                'episode_schedule': flags.split,
                'log_action_trace': flags.log_action_trace,
                'max_states_per_scene': flags.episodes_per_scene,
                'episodes_per_scene_test': flags.episodes_per_scene
            })
        self.font = pygame.font.SysFont(None, 20)
        self.value_history = ValueHistory()
        self.state_history = StateHistory()
        self.episode_reward = 0
예제 #11
0
def main():

    seed = np.random.randint(1000)

    n_positions = 11
    n_prices = 11

    n_firms = 2

    firms_positions = np.random.randint(1, n_positions + 1, size=n_firms)
    firms_prices = np.random.randint(1, n_prices + 1, size=n_firms)

    transportation_cost = 0.2

    firm_alpha = 0.01
    firm_temp = 0.02
    firm_momentum = 0.0  # Only NN
    firm_neural_network = "MLP"  # Only NN

    customer_alpha = 0.01
    customer_temp = 0.02
    customer_momentum = 0.0  # Only NN

    customer_neural_network = "MLP"  # Only NN

    t_max = 10**3

    firm = "StrategicNeuralNetwork"
    customer = "Customer"

    parameters = {
        "seed": seed,
        "firm": firm,
        "customer": customer,
        "n_positions": n_positions,
        "n_prices": n_prices,
        "firms_positions": firms_positions,  # Initial positions
        "firms_prices": firms_prices,  # Initial prices
        "transportation_cost": transportation_cost,
        "firm_temp": firm_temp,
        "firm_alpha": firm_alpha,
        "firm_momentum": firm_momentum,
        "firm_neural_network": firm_neural_network,  # Useful for NN
        "customer_alpha": customer_alpha,
        "customer_temp": customer_temp,
        "customer_momentum": customer_momentum,
        "customer_neural_network": customer_neural_network,
        "t_max": t_max
    }

    env = Environment(**parameters)
    results = env.run()

    fig_producer = FigureProducer(
        results=results,
        parameters=parameters,
        root_folder=path.expanduser("~/Desktop/HotellingExample"))
    fig_producer.run(customers_choices_plot_period=50,
                     other_plots_period=10000)
예제 #12
0
파일: main.py 프로젝트: baldhat/AntSim
 def __init__(self, fps=60):
     self._running = True
     self._pause = False
     self.size = self.width, self.height = 1920, 1020
     self.env = Environment((1700, 1020))
     self.info_panel_size = (self.width - self.env.width, self.height)
     self.info_panel_pos = (self.env.width, 0)
     self.fps = fps
     self.iters_per_second = 0
예제 #13
0
 def call(self, *args):
     from interpreter.execute_stmt import execute_stmt
     env = Environment()
     env.outer_env = self.closure
     for param_position, param in enumerate(self.declaration.params):
         env.define(param.lexeme, args[param_position])
     try:
         execute_stmt(self.declaration.body, env)
     except LoxReturn as ret:
         return ret.ret_val
     return None
예제 #14
0
 def __init__(self):
     self.img = np.zeros(shape=(HEIGHT, WIDTH, 3), dtype=np.uint8)
     self.action_size = Environment.get_action_size()
     self.global_network = UnrealModel(self.action_size,
                                       -1,
                                       "/cpu:0",
                                       for_display=True)
     self.env = Environment.create_environment()
     self.value_history = ValueHistory()
     self.state_history = StateHistory()
     self.ep_reward = 0
     self.mazemap = MazeMap()
예제 #15
0
파일: display.py 프로젝트: kvas7andy/unreal
    def __init__(self, display_size):
        pygame.init()

        self.surface = pygame.display.set_mode(display_size, 0, 24)
        name = 'UNREAL' if flags.segnet == 0 else "A3C ErfNet"
        pygame.display.set_caption(name)

        env_config = sim_config.get(flags.env_name)
        self.image_shape = [
            env_config.get('height', 88),
            env_config.get('width', 88)
        ]
        segnet_param_dict = {'segnet_mode': flags.segnet}
        is_training = tf.placeholder(tf.bool, name="training")
        map_file = env_config.get('objecttypes_file', '../../objectTypes.csv')
        self.label_mapping = pd.read_csv(map_file, sep=',', header=0)
        self.get_col_index()

        self.action_size = Environment.get_action_size(flags.env_type,
                                                       flags.env_name)
        self.objective_size = Environment.get_objective_size(
            flags.env_type, flags.env_name)
        self.global_network = UnrealModel(self.action_size,
                                          self.objective_size,
                                          -1,
                                          flags.use_lstm,
                                          flags.use_pixel_change,
                                          flags.use_value_replay,
                                          flags.use_reward_prediction,
                                          0.0,
                                          0.0,
                                          "/gpu:0",
                                          segnet_param_dict=segnet_param_dict,
                                          image_shape=self.image_shape,
                                          is_training=is_training,
                                          n_classes=flags.n_classes,
                                          segnet_lambda=flags.segnet_lambda,
                                          dropout=flags.dropout,
                                          for_display=True)
        self.environment = Environment.create_environment(
            flags.env_type,
            flags.env_name,
            flags.termination_time_sec,
            env_args={
                'episode_schedule': flags.split,
                'log_action_trace': flags.log_action_trace,
                'max_states_per_scene': flags.episodes_per_scene,
                'episodes_per_scene_test': flags.episodes_per_scene
            })
        self.font = pygame.font.SysFont(None, 20)
        self.value_history = ValueHistory()
        self.state_history = StateHistory()
        self.episode_reward = 0
예제 #16
0
 def setUp(self):
     json_data = '{"observations": {"screen_features": ["height_map", "player_id", "player_relative", "unit_type"], ' \
                 '"minimap_features": ["player_id", "selected"], "nonspatial_features": ["player", "score_cumulative"], ' \
                 '"action_ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]}, "rewards": [1, 1, 1, 1]}'
     config = json.loads(json_data)
     self._obs_mod = ObservationModifier(config["observations"], 32)
     #self.old_obs = [None]
     #self._obs_spec = {}
     #self._builder = dummy_observation.Builder(self._obs_spec)
     #self.obs = self._builder.build()
     self.env = Environment()
     #self.obs = observations = [None] * 16
     self.obs = self.env.reset()
예제 #17
0
 def setUp(self):
     self.json_data = '{"observations": {"screen_features": ["height_map", "player_id", "player_relative", "unit_type"], ' \
                 '"minimap_features": ["player_id", "selected"], "nonspatial_features": ["player", "score_cumulative"], ' \
                 '"action_ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]}, "rewards": [1, 1, 1, 1]}'
     self.config = json.loads(self.json_data)
     self.sess = tf.Session()
     self.agent_modifier = AgentModifier(self.config, 32)
     self.agent = A2CAgent(self.sess, self.agent_modifier)
     # self.obs_spec = {}
     # self._builder = dummy_observation.Builder(self._obs_spec)
     # self.obs = self._builder.build().observation
     self.env = Environment()
     self.obs = self.env.reset()
예제 #18
0
    def __init__(self,
               runner,
               global_network,
               initial_learning_rate,
               learning_rate_input,
               grad_applier,
               env_type,
               env_name,
               entropy_beta,
               gamma,
               experience,
               max_global_time_step,
               device,
               value_lambda):
        self.runner = runner
        self.learning_rate_input = learning_rate_input
        self.env_type = env_type
        self.env_name = env_name
        self.gamma = gamma
        self.max_global_time_step = max_global_time_step
        self.action_size = Environment.get_action_size(env_type, env_name)
        self.obs_size = Environment.get_obs_size(env_type, env_name)
        self.global_network = global_network
        self.local_network = UnrealModel(self.action_size,
                                         self.obs_size,
                                         1,
                                         entropy_beta,
                                         device,
                                         value_lambda=value_lambda)

        self.local_network.prepare_loss()
        
        self.apply_gradients = grad_applier.minimize_local(self.local_network.total_loss,
                                                                    self.global_network.get_vars(),
                                                                     self.local_network.get_vars())
        self.sync = self.local_network.sync_from(self.global_network, name="base_trainer")
        self.experience = experience
        self.local_t = 0
        self.next_log_t = 0
        self.next_performance_t = PERFORMANCE_LOG_INTERVAL
        self.initial_learning_rate = initial_learning_rate
        self.episode_reward = 0
        # trackers for the experience replay creation
        self.last_state = None
        self.last_action = 0
        self.last_reward = 0
        self.ep_ploss = 0.
        self.ep_vloss = 0.
        self.ep_entr = []
        self.ep_grad = []
        self.ep_l = 0
예제 #19
0
def findNextEnv(prereqs, additions, deletions, relations):
        for prereq in prereqs:
            if not prereq in relations:
                return None

        newRelations = relations[:]
        for deletion in deletions:
            newRelations.remove(deletion)

        for addition in additions:
            newRelations.append(addition)
        newEnv = Environment()
        newEnv.relations = newRelations
        return newEnv
예제 #20
0
파일: game.py 프로젝트: emcxcme/slitherin
    def __init__(self, game_model, fps, pixel_size, screen_width,
                 screen_height, navigation_bar_height):
        self.model = game_model

        self.stats = self.model.stats()
        self.fps = fps
        self.pixel_size = pixel_size
        self.navigation_bar_height = navigation_bar_height
        self.screen = pygame.display.set_mode((screen_width, screen_height), 0,
                                              Constants.SCREEN_DEPTH)
        self.surface = pygame.Surface(self.screen.get_size())
        self.horizontal_pixels = screen_width / pixel_size
        self.vertical_pixels = (screen_height -
                                navigation_bar_height) / pixel_size

        self.environment = Environment(width=self.horizontal_pixels,
                                       height=self.vertical_pixels)

        self.wall = WallScreenObject(self)
        self.wall.points = list(
            map(lambda x: self._screen_normalized_point(x),
                self.environment.set_wall()))
        self.screen_objects.append(self.wall)

        self.fruit = FruitScreenObject(self)
        self.fruit.points = list(
            map(lambda x: self._screen_normalized_point(x),
                self.environment.set_fruit()))
        self.screen_objects.append(self.fruit)

        self.snake = SnakeScreenObject(self)
        self.snake.points = list(
            map(lambda x: self._screen_normalized_point(x),
                self.environment.set_snake()))
        self.screen_objects.append(self.snake)

        while True:
            self._handle_user_input()
            pygame.time.Clock().tick(fps)
            self.environment.eat_fruit_if_possible()
            ai_action = self.model.move(self.environment)
            if not self.environment.step(ai_action):
                self.model.reset()
                self.model.log_score(self.environment.reward())
                self.stats = self.model.stats()
                self.environment.set_snake()
            self._sync_screen_with_environment()
            self._draw_screen()
            self._display()
예제 #21
0
파일: display.py 프로젝트: voiler/IMPALA
 def __init__(self, args, display_size, saver):
     pygame.init()
     self.args = args
     self.surface = pygame.display.set_mode(display_size, 0, 24)
     pygame.display.set_caption('UNREAL')
     args.action_size = Environment.get_action_size(args.env_name)
     self.global_network = Agent(1, args)
     saver.restore(self.global_network)
     self.global_network.eval()
     self.environment = Environment.create_environment(args.env_name)
     self.font = pygame.font.SysFont(None, 20)
     self.value_history = ValueHistory()
     self.state_history = StateHistory()
     self.distribution = torch.distributions.Categorical
     self.episode_reward = 0
예제 #22
0
    def __init__(self, env_name, process_idx):
        Environment.__init__(self)

        self.last_state = []
        self.last_action = []
        self.last_reward = []

        self.env = env_vrep.Simu_env(20000 + process_idx)
        self.env.connect_vrep()

        # self.conn, child_conn = Pipe()
        # self.proc = Process(target=worker, args=(child_conn, env_name, process_idx))
        # self.proc.start()
        # self.conn.recv()
        self.reset()
    def test_step(self):
        environment = Environment.create_environment()
        action_size = Environment.get_action_size()

        if sys.platform == 'darwin':
            self.assertTrue(action_size == 6)
        else:
            self.assertTrue(action_size == 8)

        for i in range(3):
            self.assertTrue(environment.last_observation.shape == (84, 84))
            if SAVE_IMAGE:
                scipy.misc.imsave("debug_observation{0}.png".format(i),
                                  environment.last_observation)
            reward, terminal = environment.step(0)
예제 #24
0
def main(args):
    action_size = Environment.get_action_size(flags.env_type, flags.env_name)
    objective_size = Environment.get_objective_size(flags.env_type,
                                                    flags.env_name)
    global_network = UnrealModel(action_size, objective_size, -1,
                                 flags.use_lstm, flags.use_pixel_change,
                                 flags.use_value_replay,
                                 flags.use_reward_prediction, 0.0, 0.0,
                                 "/cpu:0")  # use CPU for weight visualize tool

    sess = tf.Session()

    init = tf.global_variables_initializer()
    sess.run(init)

    saver = tf.train.Saver()
    checkpoint = tf.train.get_checkpoint_state(flags.checkpoint_dir)
    if checkpoint and checkpoint.model_checkpoint_path:
        saver.restore(sess, checkpoint.model_checkpoint_path)
        print("checkpoint loaded:", checkpoint.model_checkpoint_path)
    else:
        print("Could not find old checkpoint")

    vars = {}
    var_list = global_network.get_vars()
    for v in var_list:
        vars[v.name] = v

    W_conv1 = sess.run(vars['net_-1/base_conv/W_base_conv1:0'])

    # show graph of W_conv1
    fig, axes = plt.subplots(3,
                             16,
                             figsize=(12, 6),
                             subplot_kw={
                                 'xticks': [],
                                 'yticks': []
                             })
    fig.subplots_adjust(hspace=0.1, wspace=0.1)

    for ax, i in zip(axes.flat, range(3 * 16)):
        inch = i // 16
        outch = i % 16
        img = W_conv1[:, :, inch, outch]
        ax.imshow(img, cmap=plt.cm.gray, interpolation='nearest')
        ax.set_title(str(inch) + "," + str(outch))

    plt.show()
예제 #25
0
  def __init__(self):
    self.action_size = Environment.get_action_size(flags.env_type, flags.env_name)
    self.objective_size = Environment.get_objective_size(flags.env_type, flags.env_name)

    env_config = sim_config.get(flags.env_name)
    self.image_shape = [env_config['height'], env_config['width']]
    segnet_param_dict = {'segnet_mode': flags.segnet}
    is_training = tf.placeholder(tf.bool, name="training") # for display param in UnrealModel says its value

    self.global_network = UnrealModel(self.action_size,
                                      self.objective_size,
                                      -1,
                                      flags.use_lstm,
                                      flags.use_pixel_change,
                                      flags.use_value_replay,
                                      flags.use_reward_prediction,
                                      0.0, #flags.pixel_change_lambda
                                      0.0, #flags.entropy_beta
                                      device,
                                      segnet_param_dict=segnet_param_dict,
                                      image_shape=self.image_shape,
                                      is_training=is_training,
                                      n_classes=flags.n_classes,
                                      segnet_lambda=flags.segnet_lambda,
                                      dropout=flags.dropout,
                                      for_display=True)
    self.environment = Environment.create_environment(flags.env_type, flags.env_name, flags.termination_time_sec,
                                                      env_args={'episode_schedule': flags.split,
                                                                'log_action_trace': flags.log_action_trace,
                                                                'max_states_per_scene': flags.episodes_per_scene,
                                                                'episodes_per_scene_test': flags.episodes_per_scene})

    self.global_network.prepare_loss()

    self.total_loss = []
    self.segm_loss = []
    self.episode_reward = [0]
    self.episode_roomtype = []
    self.roomType_dict  = {}
    self.segnet_class_dict = {}
    self.success_rate = []
    self.batch_size = 20
    self.batch_cur_num = 0
    self.batch_prev_num = 0
    self.batch_si = []
    self.batch_sobjT = []
    self.batch_a = []
    self.batch_reward = []
예제 #26
0
파일: main.py 프로젝트: Micro-Masters/AI
def main():

    json_data = '{"observations": {"screen_features": ["height_map", "player_id", "player_relative", "unit_type"], ' \
                '"minimap_features": ["player_id", "selected"], "nonspatial_features": ["player"], ' \
                '"action_ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]}, "rewards": [1, 1, 1, 1]}'
    config = json.loads(json_data)
    print("config:")
    print(config)
    print("config.observations:")
    print(config["observations"])
    print("config.onservations.action_ids:")
    print(config["observations"]["action_ids"])
    print("config.rewards:")
    print(config["rewards"])

    print("Commencing magic...")
    sess = tf.Session()

    env = Environment()
    #test_env(env, config)

    agent_modifier = AgentModifier(config, 32)
    agent = A2CAgent(sess, agent_modifier)
    #agent = RandomAgent() ##for debugging TODO: delete later

    runner = A2CRunner(agent, env)
    runner.begin()
def vvo_after_rl():
    #dataset contains random power injection of nodes
    df = load_dataset()
    df_train, df_test = split_dataset(df, 998)

    print('=====================vvo_brute_force=====================')
    vvo_brute_force(df_test)

    objectives = [ObjFuncType.ACTIVE_POWER_LOSSES]
    network_manager = ODSSNetworkManagement()
    power_flow = ODSSPowerFlow()
    
    #environment should'n have the entire dataset as an input parameter, but train and test methods
    environment = Environment(network_manager)

    print('=====================agent=====================')
    agent = DeepQLearningAgent(environment)

    n_episodes = 1000
    print('agent training started')
    t1 = time.time()
    #agent.train(df_train, n_episodes)
    t2 = time.time()
    print ('agent training finished in', t2-t1)

    agent.test(df_test)
    network_manager.print_all_capacitor_statuses()

    objective_functions = ObjectiveFunctions(objectives, power_flow)
    print('=====================vvo=====================')
    vvo = VVO(network_manager, power_flow, objective_functions) 
    vvo.test(df_test, resetCapacitorStatuses = False)
    network_manager.print_all_capacitor_statuses()
예제 #28
0
    def __init__(self, display_size):
        pygame.init()

        self.surface = pygame.display.set_mode(display_size, 0, 24)
        pygame.display.set_caption('UNREAL')

        self.action_size = Environment.get_action_size()
        self.global_network = UnrealModel(self.action_size,
                                          -1,
                                          "/cpu:0",
                                          for_display=True)
        self.environment = Environment.create_environment()
        self.font = pygame.font.SysFont(None, 20)
        self.value_history = ValueHistory()
        self.state_history = StateHistory()
        self.episode_reward = 0
	def __init__(self, model_size, group_id, environment_id=0, training=True):
		self.model_size = model_size
		self._training = training
		self.environment_id = environment_id
		self.group_id = group_id
		# Build environment
		self.environment = Environment.create_environment(flags.env_type, self.environment_id, self._training)
		self.extrinsic_reward_manipulator = eval(flags.extrinsic_reward_manipulator)
		self.terminal = True
		self._composite_batch = CompositeBatch(maxlen=flags.replay_buffer_size if flags.replay_mean > 0 else 1)
		# Statistics
		self.__client_statistics = Statistics(flags.episode_count_for_evaluation)
		if self._training:
			#logs
			if not os.path.isdir(flags.log_dir + "/performance"):
				os.mkdir(flags.log_dir + "/performance")
			if not os.path.isdir(flags.log_dir + "/episodes"):
				os.mkdir(flags.log_dir + "/episodes")
			formatter = logging.Formatter('%(asctime)s %(message)s')
			# reward logger
			self.__reward_logger = logging.getLogger('reward_{}_{}'.format(self.group_id, self.environment_id))
			hdlr = logging.FileHandler(flags.log_dir + '/performance/reward_{}_{}.log'.format(self.group_id, self.environment_id))
			hdlr.setFormatter(formatter)
			self.__reward_logger.addHandler(hdlr) 
			self.__reward_logger.setLevel(logging.DEBUG)
			self.__max_reward = float("-inf")
예제 #30
0
 def prepare(self):
     if self.running:
         self.environment = Environment.create_environment(
             self.maze_size, self.level_seed)
         print('Started trainer ', self.thread_index)
         self.apply_next_location_loss = 0.0
         sys.stdout.flush()
예제 #31
0
    def check_environment(self, env_type, env_name):
        env = Environment.create_environment(env_type, env_name, 0)
        action_size = Environment.get_action_size(env_type, env_name)

        for i in range(3):
            state, reward, terminal = env.process(0)

            print(state)
            print(reward)
            print(terminal)
            # # Check shape
            # self.assertTrue(state.shape == (84, 84, 3))
            # # state and pixel_change value range should be [0,1]
            # self.assertTrue(np.amax(state) <= 1.0)

        env.stop()
예제 #32
0
from executers.twoDimExecuter import TwoDimExecuter
from executers.oneDimExecuter import OneDimExecuter
from heuristics.sort_heuristic import SortHeuristic
from heuristics.heap_heuristic import HeapHeuristic
from environment.environment import Environment

if __name__ == '__main__':
	exec_env = Environment()
	exec_env.evaluate('env.in')