Ejemplo n.º 1
0
    def __init__(self, model_info):
        """
        Initialize XingTian Model.

        To avoid the compatibility problems about tensorflow's versions.
        Model class will hold their graph&session within itself.
        Now, we used the keras's API to create models.
        :param model_info:
        """
        sess, self.graph = get_sess_graph()
        # User Could assign it within create model.
        self.actor_var = None
        self._summary = model_info.get("summary", False)

        with self.graph.as_default():
            # init sess within the graph without assign the graph into sess.
            self.sess = sess
            K.set_session(self.sess)
            self.model_format = model_info.get('model_format')
            self.max_to_keep = model_info.get("max_to_keep", 100)
            self.model = self.create_model(model_info)
            if 'init_weights' in model_info:
                model_name = model_info['init_weights']
                try:
                    self.load_model(model_name)
                    print("load weight: {} success.".format(model_name))
                except BaseException:
                    print("load weight: {} failed!".format(model_name))
Ejemplo n.º 2
0
    def train(self, state, label):
        with self.graph.as_default():
            K.set_session(self.sess)
            nbatch_train = BATCH_SIZE
            nbatch = state[0].shape[0]

            inds = np.arange(nbatch)
            loss_val = []
            start_time = time.time()
            for _ in range(4):
                # Randomize the indexes
                np.random.shuffle(inds)
                # 0 to batch_size with batch_train_size step
                for start in range(0, nbatch, nbatch_train):
                    end = start + nbatch_train
                    mbinds = inds[start:end]

                    feed_dict = {self.state: state[0][mbinds],
                                 self.adv: state[1][mbinds],
                                 self.old_p: state[2][mbinds],
                                 self.old_v: state[3][mbinds],
                                 self.target_p: label[0][mbinds],
                                 self.target_v: label[1][mbinds],}
                    ret_value = self.sess.run([self.train_op, self.loss], feed_dict)

                    loss_val.append(np.mean(ret_value[1]))

            return np.mean(loss_val)
Ejemplo n.º 3
0
 def load_model(self, model_name):
     if self.actor_var:
         self.actor_var.set_weights_with_npz(model_name)
     else:
         with self.graph.as_default():
             K.set_session(self.sess)
             self.model.load_weights(model_name, by_name=True)
Ejemplo n.º 4
0
    def __init__(self, model_info):
        """
        Initialize XingTian Model.

        To avoid the compatibility problems about tensorflow's versions.
        Model class will hold their graph&session within itself.
        Now, we used the keras's API to create models.
        :param model_info:
        """
        self.graph = tf.Graph()

        # User Could assign it within create model.
        self.actor_var = None

        with self.graph.as_default():
            config = tf.ConfigProto()
            config.gpu_options.allow_growth = True
            sess = tf.compat.v1.Session(config=config)
            self.sess = sess
            K.set_session(self.sess)
            self.model_format = model_info.get('model_format')
            self.max_to_keep = model_info.get("max_to_keep", 100)
            self.model = self.create_model(model_info)
            if 'init_weights' in model_info:
                model_name = model_info['init_weights']
                try:
                    self.load_model(model_name)
                    print("load weight: {} success.".format(model_name))
                except BaseException:
                    print("load weight: {} failed!".format(model_name))
Ejemplo n.º 5
0
 def load_model(self, model_name):
     if self.actor_var and str(model_name).endswith(".npz"):
         self.actor_var.set_weights_with_npz(model_name)
     else:
         with self.graph.as_default():
             K.set_session(self.sess)
             self.model.load_weights(model_name)
Ejemplo n.º 6
0
 def predict(self, state):
     """
     Do predict use the latest model.
     """
     with self.graph.as_default():
         K.set_session(self.sess)
         feed_dict = {self.infer_state: state[0], self.adv: state[1]}
         return self.sess.run([self.infer_p, self.infer_v], feed_dict)
Ejemplo n.º 7
0
 def train(self, state, label, batch_size, verbose=False):
     """Train the model."""
     with self.graph.as_default():
         K.set_session(self.sess)
         history = self.model.fit(
             state, label, batch_size=batch_size, verbose=verbose
         )
         return history.history["loss"][0]
Ejemplo n.º 8
0
 def set_weights(self, weights):
     """Set weight with memory tensor."""
     # split keras and xingtian npz
     with self.graph.as_default():
         K.set_session(self.sess)
         if isinstance(weights, dict) and self.actor_var:
             self.actor_var.set_weights(weights)
         else:  # keras
             self.model.set_weights(weights)
Ejemplo n.º 9
0
    def initial_inference(self, input_data):
        with self.graph.as_default():
            K.set_session(self.sess)

            feed_dict = {self.obs: input_data}
            policy, value, hidden = self.sess.run(self.init_infer, feed_dict)
            value = self.value_transform(value[0], self.value_support_size, self.value_min, self.value_max)

        return NetworkOutput(value, 0, policy[0], hidden[0])
Ejemplo n.º 10
0
 def predict(self, state):
     """
     Do predict use the newest model.
     :param state:
     :return:
     """
     with self.graph.as_default():
         K.set_session(self.sess)
         return self.model.predict(state)
Ejemplo n.º 11
0
 def predict(self, state):
     """
     Do predict use the newest model.
     :param state:
     :return:
     """
     with self.graph.as_default():
         K.set_session(self.sess)
         feed_dict = {self.infer_state: state}
         return self.sess.run(self.infer_v, feed_dict)
Ejemplo n.º 12
0
    def save_model(self, file_name):
        """save weights into .h5 file"""
        # check max model file to keep
        check_keep_model(os.path.dirname(file_name), self.max_to_keep)

        with self.graph.as_default():
            K.set_session(self.sess)
            self.model.save_weights(file_name + ".h5", overwrite=True)
        if self.model_format == 'pb':
            pb_model(self.model, file_name)
        return file_name + ".h5"
Ejemplo n.º 13
0
 def train(self, state, label):
     with self.graph.as_default():
         # print(type(state[2][0][0]))
         K.set_session(self.sess)
         loss = self.model.fit(x={'state_input': state[0], 'adv': state[1]},
                               y={
                                   "output_actions": label[0],
                                   "output_value": label[1]
                               },
                               batch_size=128,
                               verbose=0)
         return loss
Ejemplo n.º 14
0
    def value_inference(self, input_data):
        with self.graph.as_default():
            K.set_session(self.sess)

            feed_dict = {self.obs: input_data}
            policy, value, hidden = self.sess.run(self.init_infer, feed_dict)

            value_list = []
            for value_data in value:
                value_list.append(self.value_transform(value_data, self.value_support_size, self.value_min, self.value_max))

        return np.asarray(value_list)
Ejemplo n.º 15
0
    def recurrent_inference(self, hidden_state, action):
        with self.graph.as_default():
            K.set_session(self.sess)
            action = np.expand_dims(np.eye(self.action_dim)[action], 0)
            hidden_state = np.expand_dims(hidden_state, 0)
            conditioned_hidden = np.hstack((hidden_state, action))
            feed_dict = {self.conditioned_hidden: conditioned_hidden}
            hidden, reward, policy, value = self.sess.run(self.rec_infer, feed_dict)

            value = self.value_transform(value[0], self.value_support_size, self.value_min, self.value_max)
            reward = self.value_transform(reward[0], self.reward_support_size, self.reward_min, self.reward_max)

        return NetworkOutput(value, reward, policy[0], hidden[0])
Ejemplo n.º 16
0
    def save_model(self, file_name):
        """Save weights into .h5 file."""
        # check max model file to keep
        if self.max_to_keep > -1:
            check_keep_model(os.path.dirname(file_name), self.max_to_keep)

        if self.actor_var:
            self.actor_var.save_weights(file_name + ".npz")
        else:
            with self.graph.as_default():  # keras
                K.set_session(self.sess)
                self.model.save_weights(file_name)

        if self.model_format == 'pb':
            pb_model(self.model, file_name)
        return file_name + ".npz"
Ejemplo n.º 17
0
    def train(self, state, label):
        with self.graph.as_default():
            K.set_session(self.sess)

            target_value = self.conver_value(label[0], self.value_support_size, self.value_min, self.value_max)
            target_reward = self.conver_value(label[1], self.reward_support_size, self.reward_min, self.reward_max)

            feed_dict = {self.obs: state[0],
                         self.action: state[1],
                         self.loss_weights: state[2],
                         self.target_value: target_value,
                         self.target_reward: target_reward,
                         self.target_policy: label[2]}
            _, loss = self.sess.run([self.train_op, self.loss], feed_dict)

            return np.mean(loss)
Ejemplo n.º 18
0
 def get_grad(self, data):
     with self.graph.as_default():
         K.set_session(self.sess)
         self.model.get_grad(data)
Ejemplo n.º 19
0
 def train(self, state, label):
     """Train the model."""
     with self.graph.as_default():
         K.set_session(self.sess)
         loss = self.model.train_on_batch(state, label)
         return loss
Ejemplo n.º 20
0
 def get_weights(self):
     """return the weights of the model"""
     with self.graph.as_default():
         K.set_session(self.sess)
         return self.model.get_weights()
Ejemplo n.º 21
0
 def load_model(self, model_name, by_name=False):
     with self.graph.as_default():
         K.set_session(self.sess)
         self.model.load_weights(model_name, by_name)
Ejemplo n.º 22
0
 def set_weights(self, weights):
     """set the new weights"""
     with self.graph.as_default():
         K.set_session(self.sess)
         self.model.set_weights(weights)