Esempio n. 1
0
 def __init__(self,
              max_velocity,
              turn_speed,
              max_health,
              max_armor,
              spawn_point=(200, 200),
              starting_angle=0,
              starter_weapon_pack=None,
              starter_ammo_pack=None,
              color='#303030',
              radius=10):
     BaseAgent.__init__(self,
                        max_velocity,
                        turn_speed,
                        max_health,
                        max_armor,
                        spawn_point,
                        starting_angle,
                        starter_weapon_pack,
                        starter_ammo_pack,
                        color,
                        radius)
     input_layer = Input(shape=(17, 13))
     flattened_input = Flatten()(input_layer)
     inner_layer = Dense(20, activation='relu')(flattened_input)
     output_layer = Dense(11, activation='tanh')(inner_layer)
     self.model = Model(input_layer, output_layer)
     self.model.compile(RMSprop(),
                        loss='hinge')
     self.delta = 1-1e-5
     self.epsilon = 1
Esempio n. 2
0
    def __init__(self,
                 max_velocity,
                 turn_speed,
                 max_health,
                 max_armor,
                 spawn_point=(200, 200),
                 starting_angle=0,
                 starter_weapon_pack=None,
                 starter_ammo_pack=None,
                 color='#303030',
                 radius=10):
        BaseAgent.__init__(self,
                           max_velocity,
                           turn_speed,
                           max_health,
                           max_armor,
                           spawn_point,
                           starting_angle,
                           starter_weapon_pack,
                           starter_ammo_pack,
                           color,
                           radius)
        #input_layer = Input(shape=(17, 13))
        #inner_layer1 = Convolution1D(20, 5, activation='relu')(input_layer)
        #pooling1 = MaxPooling1D(2)(inner_layer1)
        #inner_layer2 = Convolution1D(20, 3, activation='relu')(pooling1)
        #pooling2 = MaxPooling1D(2)(inner_layer2)
        #flattened = Flatten()(pooling2)
        #inner_layer3 = Dense(20, activation='relu')(flattened)
        #bn = BatchNormalization()(inner_layer3)
        #output_layer = Dense(11, activation='tanh')(bn)
        #self.model = Model(input_layer, output_layer)
        #self.model.compile(RMSprop(),
        #                   loss='hinge')

        self.delta = 1-1e-5 #decrease coefficient of epsilon-greedy
        self.epsilon = 1 #probability of random action

        self.max_memory_size = 50000
        self.observation_memory = []
        self.action_memory = []

        self.max_buffer_size = 100
        self.observation_buffer = []
        self.action_buffer = []
        self.reward_buffer = []

        self.tau = 0.97

        self.batch_size = 16

        self.skip = 5
        self.t = 0

        self.episode_rewards = []

        self.age = 0

        self.to_learn = True
Esempio n. 3
0
    def __init__(self, config, session):
        BaseAgent.__init__(self, config, session)
        self.action_modes = {str(config.testing_epsilon)+"_greedy":self.e_greedy_action,
                            "plan_"+str(config.testing_epsilon)+"_greedy":self.plan_e_greedy_action}
        self.default_action_mode = self.action_modes.items()[0]
        self.action_mode = self.default_action_mode
        # build the net
        with tf.device(config.device):
            # Create all variables and the FIFOQueue
            self.state_ph = tf.placeholder(
                tf.float32, [None, 84, 84, 4], name="state_ph")
            self.action_ph = tf.placeholder(tf.int64, [None], name="action_ph")
            self.reward_ph = tf.placeholder(tf.float32, [None], name="reward_ph")
            self.terminal_ph = tf.placeholder(tf.float32, [None], name="terminal_ph")
            self.stateT_ph = tf.placeholder(
                tf.float32, [None, 84, 84, 4], name="stateT_ph")
            # Define all the ops
            with tf.variable_scope("Q"):
                self.h_state = self.state_to_hidden(self.state_ph, config, "Normal")
                self.Q = self.hidden_to_Q(self.h_state, config, "Normal")
                self.predicted_reward = self.hidden_to_reward(self.h_state, config, "Normal")
                self.predicted_h_state = self.hidden_to_hidden(self.h_state, self.action_ph, config, "Normal")
                tf.get_variable_scope().reuse_variables()
                self.predicted_next_Q = self.hidden_to_Q(self.predicted_h_state, config, "Normal")
            with tf.variable_scope("QT"):
                self.h_stateT = self.state_to_hidden(self.stateT_ph, config, "Target")
                self.QT = self.hidden_to_Q(self.h_stateT, config, "Target")

            self.train_op = self.train_op(self.Q, self.predicted_reward,
                                self.predicted_next_Q, self.QT, self.reward_ph,
                                self.action_ph, self.terminal_ph, config, "Normal")
            self.sync_QT_op = []
            for W_pair in zip(
                    tf.get_collection("Target_weights"),
                    tf.get_collection("Normal_weights")):
                self.sync_QT_op.append(W_pair[0].assign(W_pair[1]))
            # Define the summary ops
            self.Q_summary_op = tf.merge_summary(
                tf.get_collection("Normal_summaries"))
            self.QT_summary_op = tf.merge_summary(
                tf.get_collection("Target_summaries"))
        if config.logging:
            self.summary_writter = tf.train.SummaryWriter(
                self.config.log_path, self.sess.graph, flush_secs=20)
Esempio n. 4
0
 def __init__(self,
              max_velocity,
              turn_speed,
              max_health,
              max_armor,
              spawn_point=(200, 200),
              starting_angle=0,
              starter_weapon_pack=None,
              starter_ammo_pack=None,
              color='#303030',
              radius=10):
     BaseAgent.__init__(self,
                        max_velocity,
                        turn_speed,
                        max_health,
                        max_armor,
                        spawn_point,
                        starting_angle,
                        starter_weapon_pack,
                        starter_ammo_pack,
                        color,
                        radius)
Esempio n. 5
0
 def __init__(self, config, url):
     BaseAgent.__init__(self, config, url)
Esempio n. 6
0
 def __init__(self, config, url):
     self.url = url
     self.config = config
     BaseAgent.__init__(self, config, url)