def __init__(self, max_velocity, turn_speed, max_health, max_armor, spawn_point=(200, 200), starting_angle=0, starter_weapon_pack=None, starter_ammo_pack=None, color='#303030', radius=10): BaseAgent.__init__(self, max_velocity, turn_speed, max_health, max_armor, spawn_point, starting_angle, starter_weapon_pack, starter_ammo_pack, color, radius) input_layer = Input(shape=(17, 13)) flattened_input = Flatten()(input_layer) inner_layer = Dense(20, activation='relu')(flattened_input) output_layer = Dense(11, activation='tanh')(inner_layer) self.model = Model(input_layer, output_layer) self.model.compile(RMSprop(), loss='hinge') self.delta = 1-1e-5 self.epsilon = 1
def __init__(self, max_velocity, turn_speed, max_health, max_armor, spawn_point=(200, 200), starting_angle=0, starter_weapon_pack=None, starter_ammo_pack=None, color='#303030', radius=10): BaseAgent.__init__(self, max_velocity, turn_speed, max_health, max_armor, spawn_point, starting_angle, starter_weapon_pack, starter_ammo_pack, color, radius) #input_layer = Input(shape=(17, 13)) #inner_layer1 = Convolution1D(20, 5, activation='relu')(input_layer) #pooling1 = MaxPooling1D(2)(inner_layer1) #inner_layer2 = Convolution1D(20, 3, activation='relu')(pooling1) #pooling2 = MaxPooling1D(2)(inner_layer2) #flattened = Flatten()(pooling2) #inner_layer3 = Dense(20, activation='relu')(flattened) #bn = BatchNormalization()(inner_layer3) #output_layer = Dense(11, activation='tanh')(bn) #self.model = Model(input_layer, output_layer) #self.model.compile(RMSprop(), # loss='hinge') self.delta = 1-1e-5 #decrease coefficient of epsilon-greedy self.epsilon = 1 #probability of random action self.max_memory_size = 50000 self.observation_memory = [] self.action_memory = [] self.max_buffer_size = 100 self.observation_buffer = [] self.action_buffer = [] self.reward_buffer = [] self.tau = 0.97 self.batch_size = 16 self.skip = 5 self.t = 0 self.episode_rewards = [] self.age = 0 self.to_learn = True
def __init__(self, config, session): BaseAgent.__init__(self, config, session) self.action_modes = {str(config.testing_epsilon)+"_greedy":self.e_greedy_action, "plan_"+str(config.testing_epsilon)+"_greedy":self.plan_e_greedy_action} self.default_action_mode = self.action_modes.items()[0] self.action_mode = self.default_action_mode # build the net with tf.device(config.device): # Create all variables and the FIFOQueue self.state_ph = tf.placeholder( tf.float32, [None, 84, 84, 4], name="state_ph") self.action_ph = tf.placeholder(tf.int64, [None], name="action_ph") self.reward_ph = tf.placeholder(tf.float32, [None], name="reward_ph") self.terminal_ph = tf.placeholder(tf.float32, [None], name="terminal_ph") self.stateT_ph = tf.placeholder( tf.float32, [None, 84, 84, 4], name="stateT_ph") # Define all the ops with tf.variable_scope("Q"): self.h_state = self.state_to_hidden(self.state_ph, config, "Normal") self.Q = self.hidden_to_Q(self.h_state, config, "Normal") self.predicted_reward = self.hidden_to_reward(self.h_state, config, "Normal") self.predicted_h_state = self.hidden_to_hidden(self.h_state, self.action_ph, config, "Normal") tf.get_variable_scope().reuse_variables() self.predicted_next_Q = self.hidden_to_Q(self.predicted_h_state, config, "Normal") with tf.variable_scope("QT"): self.h_stateT = self.state_to_hidden(self.stateT_ph, config, "Target") self.QT = self.hidden_to_Q(self.h_stateT, config, "Target") self.train_op = self.train_op(self.Q, self.predicted_reward, self.predicted_next_Q, self.QT, self.reward_ph, self.action_ph, self.terminal_ph, config, "Normal") self.sync_QT_op = [] for W_pair in zip( tf.get_collection("Target_weights"), tf.get_collection("Normal_weights")): self.sync_QT_op.append(W_pair[0].assign(W_pair[1])) # Define the summary ops self.Q_summary_op = tf.merge_summary( tf.get_collection("Normal_summaries")) self.QT_summary_op = tf.merge_summary( tf.get_collection("Target_summaries")) if config.logging: self.summary_writter = tf.train.SummaryWriter( self.config.log_path, self.sess.graph, flush_secs=20)
def __init__(self, max_velocity, turn_speed, max_health, max_armor, spawn_point=(200, 200), starting_angle=0, starter_weapon_pack=None, starter_ammo_pack=None, color='#303030', radius=10): BaseAgent.__init__(self, max_velocity, turn_speed, max_health, max_armor, spawn_point, starting_angle, starter_weapon_pack, starter_ammo_pack, color, radius)
def __init__(self, config, url): BaseAgent.__init__(self, config, url)
def __init__(self, config, url): self.url = url self.config = config BaseAgent.__init__(self, config, url)