def initialize_data(): """ initialize database using actor and movie data from scraped json files """ actor_data = json.load(open("actor.json")) movie_data = json.load(open("movie.json")) movie_objects = {} actor_objects = {} #logger.info('load data from json and prepare to construct data structure') for movie in movie_data: new_movie = Movie(movie["movieName"], movie["movieYear"], normalize_grossing(movie["movieGrossing"]), []) movie_objects[new_movie.name] = new_movie for actor in movie["movieStaring"]: for available_actor in actor_data: if available_actor["actorName"] == actor: if available_actor["actorName"] not in actor_objects: actor_objects[available_actor["actorName"]] = Actor(available_actor["actorName"], normalize_age(available_actor["actorAge"]), [], 0) actor_objects[available_actor["actorName"]].act_movie.append(new_movie) if new_movie.grossing != None: actor_objects[available_actor["actorName"]].total_grossing+=new_movie.grossing new_movie.attend_actor.append(actor_objects[available_actor["actorName"]]) break for actor in actor_data: if actor["actorName"] not in actor_objects: actor_objects[actor["actorName"]] = Actor(actor["actorName"], normalize_age(actor["actorAge"]), [], 0) return movie_objects, actor_objects
def __init__(self, task): self.task = task self.state_size = task.state_size self.action_size = task.action_size self.action_low = task.action_low self.action_high = task.action_high # Actor (Policy) Model self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights(self.critic_local.model.get_weights()) self.actor_target.model.set_weights(self.actor_local.model.get_weights()) # Noise process self.exploration_mu = 0 self.exploration_theta = 0.15 self.exploration_sigma = 0.2 self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma) # Replay memory self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size, self.batch_size) # Algorithm parameters self.gamma = 0.99 # discount factor self.tau = 0.01 # for soft update of target parameters
def __init__(self, seed, n_state, n_action, batch_size=64, buffer=1e5, gamma=0.99, lr_actor=1e-4, lr_critic=1e-3, weight_decay=0, tau=1e-3): self.batch_size = batch_size #init actor self.local_actor = Actor(n_state, n_action, seed).to(device) self.target_actor = Actor(n_state, n_action, seed).to(device) self.optim_actor = torch.optim.Adam(self.local_actor.parameters(), lr=lr_actor) #init critic self.local_critic = Critic(n_state, n_action, seed).to(device) self.target_critic = Critic(n_state, n_action, seed).to(device) self.optim_critic = torch.optim.Adam(self.local_critic.parameters(), lr=lr_critic, weight_decay=weight_decay) #init memory self.memory = memory(int(buffer), device, seed) self.tau = tau self.gamma = gamma self.noise = noise(n_action, seed=seed)
def __init__(self, state_size, action_size): """ Initializes Agent object. @Param: 1. state_size: dimension of each state. 2. action_size: number of actions. """ self.state_size = state_size self.action_size = action_size #Actor network self.actor_local = Actor(self.state_size, self.action_size).to(device) #local model self.actor_target = Actor(self.state_size, self.action_size).to(device) #target model, TD-target self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR) #initialize optimizer using Adam as regularizer for Actor network. #Critic network self.critic_local = Critic(self.state_size, self.action_size).to(device) #local model self.critic_target = Critic(self.state_size, self.action_size).to(device) #target model, TD-target self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY) #initialize optimizer using Adam as regularizer for Critic network. #Noise proccess self.noise = OUNoise(action_size) #define Ornstein-Uhlenbeck process #Replay memory self.memory = ReplayBuffer(self.action_size, BUFFER_SIZE, MINI_BATCH) #define experience replay buffer object
def __init__(self, state_size, action_size, max_action, minibatch_size, a_lr, c_lr, gamma, tau): self.state_size = state_size self.action_size = action_size self.max_action = max_action self.critic_lr = c_lr self.actor_lr = a_lr self.actor_network = Actor(self.state_size, self.action_size, self.max_action, self.actor_lr) self.actor_target_network = Actor(self.state_size, self.action_size, self.max_action, self.actor_lr) self.critic_network = Critic(self.state_size, self.action_size, self.critic_lr) self.critic_target_network = Critic(self.state_size, self.action_size, self.critic_lr) self.actor_target_network.set_weights(self.actor_network.get_weights()) self.critic_target_network.set_weights( self.critic_network.get_weights()) self.critic_optimizer = optimizers.Adam(learning_rate=self.critic_lr) self.actor_optimizer = optimizers.Adam(learning_rate=self.actor_lr) self.replay_buffer = ReplayBuffer(1e6) self.MINIBATCH_SIZE = minibatch_size self.GAMMA = tf.cast(gamma, dtype=tf.float64) self.TAU = tau self.noise = OUNoise(self.action_size)
def __init__(self, state_size, batch_size, is_eval=False): self.state_size = state_size self.action_size = 3 #buy,sell,hold #defining replay memory size self.buffer_size = 1000000 self.batch_size = batch_size self.memory = ReplayBuffer(self.buffer_size, self.batch_size) self.inventory = [] #define wether or not training is going on self.is_eval = is_eval #Discount factor self.gamma = 0.99 # soft update for AC model self.tau = 0.001 #instantiate the local and target actor models for soft updates self.actor_local = Actor(self.state_size, self.action_size) self.actor_target = Actor(self.state_size, self.action_size) #critic model mapping state-action pairs with Q-values self.critic_local = Critic(self.state_size, self.action_size) #instantiate the local and target critic models for soft updates self.critic_target = Critic(self.state_size, self.action_size) self.critic_target.model.set_weights( self.critic_local.model.get_weights()) #set target model parameter to local model parameters self.actor_target.model.set_weights( self.actor_local.model.get_weights())
def test_movie(): # check_boolean_equality_function movie = Movie("Moana", 2009) print(movie) movie3 = Movie("Moana", 2010) print(movie3) movie2 = Movie("Inception", 2010) print(movie2) print(movie > movie2) print(movie < movie3) print(movie3 == movie3) # check_remove_actor_in_list_of_actors actors = [Actor("Auli'i Cravalho"), Actor("Dwayne Johnson"), Actor("Rachel House"), Actor("Temuera Morrison")] for actor in actors: movie.add_actor(actor) movie.remove_actor(Actor("Auli'i Cravalho")) print(movie.actors) # check_for_out_of_range_runtime movie.runtime_minutes = 121 print("Movie runtime: {} minutes".format(movie.runtime_minutes)) movie.external_rating = 30 print("votes: {}".format(movie.external_rating))
def __init__(self, state_size, action_size, params, seed): """Initialize a DDPG agent Params ====== state_size (int): dimension of each state action_size (int): dimension of each action params (Params): hyperparameters seed (int): random seed """ self.gamma = params.gamma self.tau = params.tau self.seed = np.random.seed(seed) # actor networks self.actor_local = Actor(state_size, action_size, params.units_actor, seed).to(device) self.actor_target = Actor(state_size, action_size, params.units_actor, seed).to(device) self.actor_optimizer = optim.Adam(self.actor_local.parameters(), params.lr_actor) # critic newtworks self.critic_local = Critic(state_size, action_size, params.units_critic, seed).to(device) self.critic_target = Critic(state_size, action_size, params.units_critic, seed).to(device) self.critic_optimizer = optim.Adam(self.critic_local.parameters(), params.lr_critic) # Noise process self.noise = OUNoise(action_size, seed, params.mu, params.theta, params.sigma)
def __init__(self, state_size=24, action_size=2, random_seed=0): """ Initializes Agent object. @Param: 1. state_size: dimension of each state. 2. action_size: number of actions. """ self.state_size = state_size self.action_size = action_size self.seed = random.seed(random_seed) #Actor network self.actor_local = Actor(self.state_size, self.action_size, random_seed).to(device) self.actor_target = Actor(self.state_size, self.action_size, random_seed).to(device) self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR) #Critic network self.critic_local = Critic(self.state_size, self.action_size, random_seed).to(device) self.critic_target = Critic(self.state_size, self.action_size, random_seed).to(device) self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC) #Noise proccess self.noise = OUNoise(action_size, random_seed) #define Ornstein-Uhlenbeck process #Replay memory self.memory = ReplayBuffer( self.action_size, BUFFER_SIZE, MINI_BATCH, random_seed) #define experience replay buffer object
def init_game(self): # self.gamemap.load_map_from_json('res/map/test_map.json') self.gamemap.create_default_terrain() for i in range(3, 6): self.actors.append(Actor('soldier', 's', 0, sprite=0xE100, color=TEAM_COLORS[0], x=i, y=1, movement=1, stats=Stats(3,3,1))) for i in range(0, 10, 2): self.actors.append(Actor('barbarian', 'b', 1, sprite=0xE101, color=TEAM_COLORS[1], x=i, y=8, movement=2, stats=Stats(3,2,0))) self.actors.append(Actor('king', 'K', 0, sprite=0xE102, color=TEAM_COLORS[0], x=4, y=0, movement=2, stats=Stats(5,3,4))) self.actors.append(Actor('leader', 'L', 1, sprite=0xE103, color=TEAM_COLORS[1], x=4, y=9, movement=2, stats=Stats(7,4,2))) self.actors.append(Actor('Xander', 'S', 2, sprite=0xE104, color=TEAM_COLORS[2], x=5, y=5, movement=10, stats=Stats(7,40,2))) self.turn_to_take = self.actors.copy() self.turn_to_take.sort(key=lambda x: x.stats.mod['agility'], reverse=True) self.unit_turn = self.turn_to_take.pop(0) self.unit_turn.new_turn() self.game_state = 'new_turn'
def __init__(self, task): self.task=task self.state_size=task.state_size self.action_size=task.action_size self.action_low=task.action_low self.action_high=task.action_high self.actor_local=Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target=Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.critic_local=Critic(self.state_size, self.action_size) self.critic_target=Critic(self.state_size, self.action_size) self.critic_target.model.set_weights(self.critic_local.model.get_weights()) self.actor_target.model.set_weights(self.actor_local.model.get_weights()) self.mu=0 self.theta=0.2 self.sigma=0.005 # random noise self.noise=Noise(self.action_size, self.mu, self.theta, self.sigma) self.gamma=0.9 self.tau=0.1 self.best_score=-np.inf self.score=0 self.buffer_size=100000 self.batch_size=64 self.memory=ReplayBuffer(self.buffer_size, self.batch_size)
def __init__(self, state_size, action_size, random_seed, hyperparams): self.state_size = state_size self.action_size = action_size self.seed = random.seed(random_seed) self.hyperparams = hyperparams self.actor = Actor(state_size, action_size, random_seed).to(device) self.actor_noise = Actor(state_size, action_size, random_seed).to(device) self.actor_target = Actor(state_size, action_size, random_seed).to(device) self.actor_optim = optim.Adam(self.actor.parameters(), lr=hyperparams.alpha_actor) self.critic = Critic(state_size, action_size, random_seed).to(device) self.critic_target = Critic(state_size, action_size, random_seed).to(device) self.critic_optim = optim.Adam( self.critic.parameters(), lr=hyperparams.alpha_critic, weight_decay=hyperparams.weight_decay, ) self.replay_buffer = ReplayBuffer(hyperparams.buffer_size, hyperparams.batch_size, random_seed) self.noise = OUNoise( action_size, random_seed, self.hyperparams.mu, self.hyperparams.theta, self.hyperparams.sigma, )
def __init__(self, n_agents, state_size, action_size, seed): critic_input_size = (state_size+action_size)*n_agents self.actor_regular = Actor(state_size, action_size, seed).to(DEVICE) self.actor_target = Actor(state_size, action_size, seed).to(DEVICE) self.critic_regular = Critic(critic_input_size, seed).to(DEVICE) self.critic_target = Critic(critic_input_size, seed).to(DEVICE)
def __init__(self, input_dim, action_dim, action_scale, memory_size, gamma, tau, learning_rate_actor=1e-3, learning_rate_critic=1e-3, device_name="cpu:0", checkpoint_directory="ckpt/"): super(DDPG, self).__init__() self.input_dim = input_dim self.action_dim = action_dim self.action_scale = action_scale self.memory_size = memory_size self.replay_memory = ReplayMemory(memory_size) self.gamma = gamma self.tau = tau self.learning_rate_actor = learning_rate_actor self.learning_rate_critic = learning_rate_critic self.device_name = device_name self.checkpoint_directory = checkpoint_directory if not os.path.exists(self.checkpoint_directory): os.makedirs(self.checkpoint_directory) # actor self.actor_active = Actor(self.input_dim, self.action_dim, self.action_scale, name="actor_active") self.actor_target = Actor(self.input_dim, self.action_dim, self.action_scale, name="actor_target") self.actor_target.trainable = False # critic self.critic_active = Critic(self.input_dim, self.action_dim, name="critic_active") self.critic_target = Critic(self.input_dim, self.action_dim, name="critic_target") self.critic_target.trainable = False # optimizer self.optimizer_actor = tf.train.AdamOptimizer( learning_rate=self.learning_rate_actor) self.optimizer_critic = tf.train.AdamOptimizer( learning_rate=self.learning_rate_critic) # logging self.global_step = 0
def __init__(self, device, state_size, action_size, buffer_size=10, batch_size=10, actor_learning_rate=1e-4, critic_learning_rate=1e-3, discount_rate=0.99, tau=0.1, steps_per_update=4, action_range=None, dropout_p=0.0, weight_decay=0.0001, noise_max=0.2, noise_decay=1.0, n_agents=1 ): self.device: torch.device = device self.state_size = state_size self.action_size = action_size self.critic_control = Critic(state_size, action_size).to(device) self.critic_control.dropout.p = dropout_p self.critic_target = Critic(state_size, action_size).to(device) self.critic_target.eval() self.critic_optimizer = torch.optim.Adam( self.critic_control.parameters(), weight_decay=weight_decay, lr=critic_learning_rate) self.actor_control = Actor(state_size, action_size, action_range).to( device) self.actor_control.dropout.p = dropout_p self.actor_target = Actor(state_size, action_size, action_range).to( device) self.actor_target.eval() self.actor_optimizer = torch.optim.Adam( self.actor_control.parameters(), weight_decay=weight_decay, lr=actor_learning_rate) self.batch_size = batch_size self.min_buffer_size = batch_size self.replay_buffer = ReplayBuffer(device, state_size, action_size, buffer_size) self.discount_rate = discount_rate self.tau = tau self.step_count = 0 self.steps_per_update = steps_per_update self.noise_max = noise_max self.noise = OUNoise([n_agents, action_size], 15071988, sigma=self.noise_max) self.noise_decay = noise_decay self.last_score = float('-inf')
def __init__(self, env): """ :param task: (class instance) Instructions about the goal and reward """ self.env = env self.state_size = env.observation_space.shape[0] self.action_size = env.action_space.shape[0] self.action_low = env.action_space.low self.action_high = env.action_space.high self.score = 0.0 self.best = 0.0 # Instances of the policy function or actor and the value function or critic # Actor critic with Advantage # Actor local and target self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Save actor model for future use actor_local_model_yaml = self.actor_local.model.to_yaml() with open("actor_local_model.yaml", "w") as yaml_file: yaml_file.write(actor_local_model_yaml) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic local and target self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Initialize target model with local model self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) # Initialize the Gaussin Noise process self.exploration_mu = 0 self.exploration_theta = 0.15 self.exploration_sigma = 0.2 self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma) # Initialize the Replay Memory self.buffer_size = 100000 self.batch_size = 64 # original 64 self.memory = ReplayBuffer(self.buffer_size, self.batch_size) # Parameters for the Algorithm self.gamma = 0.99 # Discount factor self.tau = 0.01 # Soft update for target parameters Actor Critic with Advantage
def restart_game(self, btn): self.isGameOver = False self.player = Actor("Player", False, "X") self.enemy = Actor("Enemy", True, "O") self.lstAvailableChoice = list(self.dictIndexToButtonName.keys()) self.player.start_first() self.set_all_button_text("") self.set_all_button_disable(False)
def __init__(self, state_dim, action_dim, max_action): self.actor = Actor(state_dim, action_dim, max_action).to(device) self.actor_target = Actor(state_dim, action_dim, max_action).to(device) self.actor_target.load_state_dict(self.actor.state_dict()) self.actor_optimizer = torch.optim.Adam(self.actor.parameters()) self.critic = Critic(state_dim, action_dim).to(device) self.critic_target = Critic(state_dim, action_dim).to(device) self.critic_target.load_state_dict(self.critic.state_dict()) self.critic_optimizer = torch.optim.Adam(self.critic.parameters()) self.max_action = max_action
def __init__(self, task, buffer_size, batch_size, gamma, tau, actor_dropout, critic_dropout, exploration_theta, exploration_sigma, actor_lr, critic_lr): self.task = task self.state_size = task.state_size self.action_size = task.action_size self.action_low = task.action_low self.action_high = task.action_high self.actor_dropout = actor_dropout self.critic_dropout = critic_dropout self.actor_lr = actor_lr self.critic_lr = critic_lr # Actor (Policy) Model self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high, self.actor_dropout, self.actor_lr) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high, self.actor_dropout, self.actor_lr) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size, self.critic_dropout, self.critic_lr) self.critic_target = Critic(self.state_size, self.action_size, self.critic_dropout, self.critic_lr) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) # Noise process self.exploration_mu = 5 self.exploration_theta = exploration_theta self.exploration_sigma = exploration_sigma self.noise = OUNoise(self.action_size, self.exploration_mu, self.exploration_theta, self.exploration_sigma) # Replay memory self.buffer_size = buffer_size self.batch_size = batch_size self.memory = PrioritizedReplayBuffer(self.buffer_size, self.batch_size) # Algorithm parameters self.gamma = gamma # discount factor self.tau = tau # for soft update of target parameters self.best_score = -np.inf
def setUp(self): """ Creates a test fixture before each test method is run """ self.actor1 = Actor("Sub", "Hossan", "A01050900", "2012-12-25 00:00:00", 3) self.actor2 = Actor("Ewan", "Watt", "A01020509", "1995-04-08 00:00:00", 0) self.model1 = Model("Ashvan", "Wal", "A01023474", "2000-01-15 00:00:00", "commercial") self.model2 = Model("Phuong", "Ho", "A01023444", "2011-05-23 00:00:00", "vedette") self.talent1 = TalentAgency("testresults.json") self.logPoint()
def __init__(self): super(MainLayer, self).__init__() self.player = Actor(320, 240, (0, 0, 255)) self.add(self.player) for pos in [(100, 100), (540, 380), \ (540, 100), (100,380)]: self.add(Actor(pos[0], pos[1], (255, 0, 0))) cell = self.player.width * 1.25 self.collman = cm.CollisionManagerGrid(0, 640, 0, 480, cell, cell) self.speed = 100.0 self.pressed = defaultdict(int) self.schedule(self.update)
def __init__(self, act_dim, env_dim, act_range, buffer_size = 20000, gamma=0.99, lr=0.00005, tau=0.001): """ Initialization """ # Environment and A2C parameters self.act_dim = act_dim self.act_range = act_range self.env_dim = env_dim self.gamma = gamma # Create actor and critic networks self.actor = Actor(self.env_dim, act_dim, act_range, 0.1 * lr, tau) self.demo_actor = Actor(self.env_dim, act_dim, act_range, 0.1 * lr, tau) self.critic = Critic(self.env_dim, act_dim, lr, tau) self.buffer = Replay() self.batch_size = 2000
def __init__(self, state_size, batch_size, is_eval = False): self.state_size = state_size # self.action_size = 3 self.buffer_size = 1000000 self.batch_size = batch_size self.memory = ReplayBuffer(self.buffer_size, self.batch_size) self.inventory = [] self.is_eval = is_eval self.gamma = 0.99 self.tau = 0.001 self.actor_local = Actor(self.state_size, self.action_size) self.actor_target = Actor(self.state_size, self.action_size) self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) self.critic_target.model.set_weights(self.critic_local.model.get_weights()) self.actor_target.model.set_weights(self.actor_local.model.get_weights())
def _run_remote_tasks(self, signal_queue): # The remote actor will actually run on the local machine or other machines of xparl cluster remote_actor = Actor(self.game, self.args) while True: # receive running task signal # signal: specify task type and task input data (optional) signal = signal_queue.get() if signal["task"] == "self-play": episode_num_each_actor = self.args.numEps // self.args.actors_num result = remote_actor.self_play( self.current_agent.get_weights(), episode_num_each_actor) self.remote_actors_return_queue.put({"self-play": result}) elif signal["task"] == "pitting": games_num_each_actor = self.args.arenaCompare // self.args.actors_num result = remote_actor.pitting( self.previous_agent.get_weights(), self.current_agent.get_weights(), games_num_each_actor) self.remote_actors_return_queue.put({"pitting": result}) elif signal["task"] == "evaluate_test_dataset": test_dataset = signal["test_dataset"] result = remote_actor.evaluate_test_dataset( self.current_agent.get_weights(), test_dataset) self.remote_actors_return_queue.put( {"evaluate_test_dataset": result}) else: raise NotImplementedError
def add_movie(self, movie_name, actors): # find whether movie name exist in movie list target_moive = next( (x for x in self.__movie_list if x.get_movie_name() == movie_name), None) if target_moive is None: target_moive = Movie(movie_name) self.__movie_list.append(target_moive) movie_actors = target_moive.get_actors() for actor in actors: # find whether actor name exist in actor list target_actor = next( (x for x in self.__actor_list if x.get_actor_name() == actor), None) if target_actor is None: target_actor = Actor(actor) self.__actor_list.append(target_actor) # add the new movie to target_actor new_movie_list = target_actor.get_movies() new_movie_list.append(target_moive) target_actor.set_movies(new_movie_list) # add the new actor to target_moive pending list movie_actors.append(target_actor) target_moive.set_actors(movie_actors)
def run_remote_sample(self): """ Sample data from remote actor and update parameters of remote actor. """ remote_actor = Actor(self.config) cnt = 0 remote_actor.set_weights(self.cache_params) while True: batch = remote_actor.sample() self.sample_data_queue.put(batch) cnt += 1 if cnt % self.config['get_remote_metrics_interval'] == 0: metrics = remote_actor.get_metrics() if metrics: self.remote_metrics_queue.put(metrics) self.params_lock.acquire() if self.params_updated and self.cache_params_sent_cnt >= self.config[ 'params_broadcast_interval']: self.params_updated = False self.cache_params = self.agent.get_weights() self.cache_params_sent_cnt = 0 self.cache_params_sent_cnt += 1 self.total_params_sync += 1 self.params_lock.release() remote_actor.set_weights(self.cache_params)
def __init__(self, state_item_num, action_item_num, emb_dim, batch_size, tau, actor_lr, critic_lr, gamma, buffer_size, item_space, summary_dir): self.state_item_num = state_item_num self.action_item_num = action_item_num self.emb_dim = emb_dim self.batch_size = batch_size self.tau = tau self.actor_lr = actor_lr self.critic_lr = critic_lr self.gamma = gamma self.buffer_size = buffer_size self.item_space = item_space self.summary_dir = summary_dir self.sess = tf.Session() self.s_dim = emb_dim * state_item_num self.a_dim = emb_dim * action_item_num self.actor = Actor(self.sess, state_item_num, action_item_num, emb_dim, batch_size, tau, actor_lr) self.critic = Critic(self.sess, state_item_num, action_item_num, emb_dim, self.actor.get_num_trainable_vars(), gamma, tau, critic_lr) self.exploration_noise = OUNoise(self.a_dim) # set up summary operators self.summary_ops, self.summary_vars = self.build_summaries() self.sess.run(tf.global_variables_initializer()) self.writer = tf.summary.FileWriter(summary_dir, self.sess.graph) # initialize target network weights self.actor.hard_update_target_network() self.critic.hard_update_target_network() # initialize replay memory self.replay_buffer = ReplayBuffer(buffer_size)
def _create_new_children(self): """ Private function to create the actors in the carla world which are children actors of this parent. :return: """ for actor in self.carla_world.get_actors(): if ((actor.parent and actor.parent.id == self.carla_id) or (actor.parent is None and self.carla_id == 0)): if actor.id not in self.child_actors: if actor.type_id.startswith('traffic'): self.child_actors[actor.id] = Traffic.create_actor( carla_actor=actor, parent=self) elif actor.type_id.startswith("vehicle"): self.child_actors[actor.id] = Vehicle.create_actor( carla_actor=actor, parent=self) elif actor.type_id.startswith("sensor"): self.child_actors[actor.id] = Sensor.create_actor( carla_actor=actor, parent=self) elif actor.type_id.startswith("spectator"): self.child_actors[actor.id] = Spectator( carla_actor=actor, parent=self) else: self.child_actors[actor.id] = Actor(carla_actor=actor, parent=self)
def __init__(self, parent=None, mode='add', actor_id=None): EditorBaseFrame.__init__(self, parent) self.actor = Actor() self.mode = mode self.actor_id = actor_id if self.actor_id is not None: self.load_actor() # create widgets name_frame = ttk.LabelFrame(self, text='Name') self.name_entry = ttk.Entry(name_frame, textvariable=self.actor.get_name_var()) alignment_frame = ttk.LabelFrame(self, text='Alignment') self.alignment_combobox = \ ttk.Combobox(alignment_frame, values=('Hero', 'Anti-Hero', 'Villain', 'Civilian', 'Wild Card'), textvariable=self.actor.get_alignment_var()) self.alignment_combobox.set(self.actor.alignment) self.save_button = ttk.Button(self, text='Save', command=self._b_save) self.back_button = ttk.Button(self, text='Back', command=self._b_back) # grid widgets self.name_entry.grid() self.alignment_combobox.grid() name_frame.grid(column=0, row=0) alignment_frame.grid(column=1, row=0) self.save_button.grid(column=10, row=0) self.back_button.grid(column=11, row=0)
def exec(self): input_lines = [] for line in sys.stdin.readlines(): input_lines.append(line.rstrip()) try: first_line = input_lines[0] first_line = self.split_line(first_line) self.define_variables(first_line) self.define_group() current_line_idx = 1 for i in range(self._number_actors): current_line = input_lines[current_line_idx] current_line = self.split_line(current_line) actor_cost = int(current_line[0]) actor_number_groups = int(current_line[1]) actor_groups = set() end_block_idx = current_line_idx+actor_number_groups+1 for line_idx_group in range(current_line_idx+1, end_block_idx): actor_groups.add(int(input_lines[line_idx_group])) self._actors.append(Actor(i+1, actor_cost, actor_groups)) current_line_idx = end_block_idx except (ValueError, TypeError): print("Incorrect input format")