def create_agent(): env_config = load_json(file_path=CONFIG_PATH + '/testTorcsEnvironmentConfig.json') env = TorcsEnvironment(env_config) agent_config = load_json(file_path=CONFIG_PATH + '/testDDPGAgentConfig.json') a = DDPGConfig(config_path=CONFIG_PATH + '/testDDPGConfig.json', ddpg_standard_key_list=DDPGModel.standard_key_list, actor_standard_key_list=LSTMActor.standard_key_list, critic_standard_key_list=LSTMCritic.standard_key_list) ddpg = DDPGModel(config=a, actor=LSTMActor, critic=LSTMCritic) agent = DDPGAgent(env=env, config=agent_config, model=ddpg)
class DDPGAgent(Agent): standard_key_list = load_json(CONFIG_STANDARD_KEY_LIST + '/DDPGAgentKeyList.json') def __init__(self, env, config, model=None): super(DDPGAgent, self).__init__(env=env, config=config, model=model) if model is None: a = DDPGConfig(config_path=CONFIG_PATH + '/testDDPGConfig.json', ddpg_standard_key_list=DDPGModel.standard_key_list, actor_standard_key_list=LSTMActor.standard_key_list, critic_standard_key_list=LSTMCritic.standard_key_list) self.model = DDPGModel(config=a, actor=LSTMActor, critic=LSTMCritic) def play(self): for i in range(self.config.config_dict['EPISODE_COUNT']): self.state = self.env.reset() for j in range(self.config.config_dict['MAX_STEPS']): self.action = self.model.noise_action(state=self.state) new_state, reward, done = self.env.step(action=self.action) self.model.perceive(state=self.state, action=self.action, reward=self.reward, next_state=new_state, done=done ) if done is True: break
def create_data(): conf = Config(standard_key_list=FISData.standard_key_list, config_dict=None) config = utils.load_json(file_path=CONFIG_PATH + '/data/testFisAttackDataConfig.json') config['FILE_PATH'] = DATASET1_PATH + '/Attack.csv' conf.load_config(path=None, json_dict=config) data = FISData(config=conf) data.load_data() return data
class DenseModel(Model): standard_key_list = utils.load_json(file_path=CONFIG_STANDARD_KEY_LIST + '/denseKeyList.json') def __init__(self, config, sess_flag=False, data=None): super(DenseModel, self).__init__(config, sess_flag, data) self.state = Inputs(config=self.config.config_dict['STATE']) self.label = tf.placeholder( tf.float32, shape=[None, self.config.config_dict['OUTPUT_DIM']]) self.net = self.create_model(self.state('STATE'), 'DENSE_') self.loss, self.optimizer = self.create_training_method() self.optimize_loss = self.optimizer.minimize( loss=self.loss, var_list=self.net.all_params) def create_model(self, state, name_prefix): net = tl.layers.InputLayer(inputs=state, name=name_prefix + 'INPUT_LAYER') net = tl.layers.DenseLayer( layer=net, n_units=self.config.config_dict['DENSE_LAYER_1_UNIT'], act=tf.nn.leaky_relu, name=name_prefix + 'DENSE_LAYER_1') net = tl.layers.DenseLayer( layer=net, n_units=self.config.config_dict['DENSE_LAYER_2_UNIT'], act=tf.nn.leaky_relu, name=name_prefix + 'DENSE_LAYER_2') net = tl.layers.DenseLayer( layer=net, n_units=self.config.config_dict['OUTPUT_DIM'], act=tf.nn.softmax, name=name_prefix + 'OUTPUT_LAYER') return net def create_training_method(self): # weight_decay = tf.add_n([self.config.config_dict['L2'] * tf.nn.l2_loss(var) for var in self.var_list]) # loss = tf.reduce_mean(tf.square(self.label - self.net.outputs)) + weight_decay loss = tf.reduce_mean(tf.square(self.label - self.net.outputs)) optimizer = tf.train.AdamOptimizer( self.config.config_dict['LEARNING_RATE']) return loss, optimizer
class DenseActor(Actor): standard_key_list = utils.load_json(file_path=CONFIG_STANDARD_KEY_LIST + '/actorKeyList.json') def __init__(self, config, sess_flag=False, data=None): super(DenseActor, self).__init__(config, sess_flag, data) self.net = self.create_model(self.state('test'), 'ACTOR_') self.target_net = self.create_model(self.target_state('test'), 'TARGET_ACTOR_') self.optimizer, self.optimize_loss = self.create_training_method() def create_model(self, state, name_prefix): net = tl.layers.InputLayer(inputs=state, name=name_prefix + 'INPUT_LAYER') net = tl.layers.DenseLayer( layer=net, n_units=self.config.config_dict['DENSE_LAYER_1_UNIT'], act=tf.nn.relu, name=name_prefix + 'DENSE_LAYER_1') net = tl.layers.DropconnectDenseLayer( layer=net, n_units=self.config.config_dict['DENSE_LAYER_2_UNIT'], act=tf.nn.relu, name=name_prefix + 'DENSE_LAYER_2', keep=self.config.config_dict['DROP_OUT_PROB_VALUE']) net = tl.layers.DenseLayer( layer=net, n_units=self.config.config_dict['ACTION_DIM'], act=tf.nn.tanh, name=name_prefix + 'OUTPUT_LAYER') return net def create_training_method(self): parameters_gradients = tf.gradients(self.action, self.var_list, -self.q_value_gradients) optimizer = tf.train.AdamOptimizer( learning_rate=self.config.config_dict['LEARNING_RATE']) optimize_loss = optimizer.apply_gradients( grads_and_vars=zip(parameters_gradients, self.var_list)) return optimizer, optimize_loss
class FISData(Data): standard_key_list = utils.load_json(CONFIG_STANDARD_KEY_LIST + '/fisDataKeyList.json') def __init__(self, config): super().__init__(config) self.state_list = [] self.output_list = [] self.sample_count = 0 def load_data(self, *args, **kwargs): # super().load_data(*args, **kwargs) with open(self.config.config_dict['FILE_PATH']) as f: reader = csv.DictReader(f) for row in reader: state_data_sample = [] output_data_sample = [] for name in self.config.config_dict['STATE_NAME_LIST']: state_data_sample.append(float(row[name])) for name in self.config.config_dict['OUTPUT_NAME_LIST']: output_data_sample.append(float(row[name])) self.state_list.append(state_data_sample) self.output_list.append(output_data_sample) self.data_list.append({"STATE": state_data_sample, "OUTPUT": output_data_sample}) self.sample_count = len(self.data_list) def shuffle_data(self): temp_data = np.array(self.data_list) np.random.shuffle(temp_data) self.data_list = temp_data self.state_list = [] self.output_list = [] for sample in self.data_list: self.state_list.append(sample['STATE']) self.output_list.append(sample['OUTPUT']) pass self.state_list = np.array(self.state_list) self.output_list = np.array(self.output_list) def return_batch_data(self, index, size): return self.state_list[index * size:index * size + size], self.output_list[index * size:index * size + size] pass
class LSTMCritic(Critic): standard_key_list = utils.load_json(file_path=CONFIG_STANDARD_KEY_LIST + '/lstmCriticKeyList.json') def __init__(self, config, sess_flag=False, data=None): super(LSTMCritic, self).__init__(config, sess_flag, data) self.net = self.create_model(state=self.state, action=self.action, name_prefix='CRITIC_') self.target_net = self.create_model(state=self.target_state, action=self.action, name_prefix='TARGET_CRITIC_') def create_model(self, state, action, name_prefix): image_state_shape = state('IMAGE').get_shape().as_list() batch_size = image_state_shape[0] state_length = image_state_shape[1] merged_flattened_input = utils.flatten_and_concat_tensors( name_prefix=name_prefix + 'ACTION_', tensor_dict=action()) # Create aciton net action_net = tl.layers.DenseLayer( layer=merged_flattened_input, n_units=self.config.config_dict['ACTION_LAYER_1_UNIT'], act=tf.nn.relu, name=name_prefix + 'ACTION_DENSE_LAYER_1') action_net = tl.layers.DenseLayer( layer=action_net, n_units=self.config.config_dict['ACTION_LAYER_2_UNIT'], act=tf.nn.relu, name=name_prefix + 'ACTION_DENSE_LAYER_2') action_net = tl.layers.ReshapeLayer( layer=action_net, shape=[ -1, state_length, self.config.config_dict['ACTION_LAYER_2_UNIT'] ], name=name_prefix + 'ACTION_RESHAPE_LAYER') # Create state lstm cnn net W_init = tf.truncated_normal_initializer(stddev=0.01) b_init = tf.constant_initializer(value=0.0) image_state_shape = state('IMAGE').get_shape().as_list() state_image_batch = tf.reshape(tensor=state('IMAGE'), shape=[ -1, image_state_shape[2], image_state_shape[3], image_state_shape[4] ]) inputs_image = tl.layers.InputLayer(inputs=state_image_batch, name=name_prefix + 'INPUT_LAYER_' + 'IMAGE') merge_low_dim_state_tensor_dict = {} for name, tensor in state().items(): if name != 'IMAGE': merge_low_dim_state_tensor_dict[name] = tensor merged_flattened_input = utils.flatten_and_concat_tensors( name_prefix=name_prefix + 'LOW_DIM_STATE_', tensor_dict=merge_low_dim_state_tensor_dict) merged_flattened_input = tl.layers.ReshapeLayer( layer=merged_flattened_input, shape=[ -1, state_length, merged_flattened_input.outputs.get_shape().as_list()[1] ], name=name_prefix + 'LOW_DIM_STATE_RESHAPE_LAYER') conv1 = tl.layers.Conv2d( net=inputs_image, n_filter=self.config.config_dict['CONV1_1_CHANNEL_SIZE'], filter_size=self.config.config_dict['CONV1_1_FILTER_SIZE'], strides=self.config.config_dict['CONV1_1_STRIDE_SIZE'], act=tf.nn.relu, W_init=W_init, b_init=b_init, name=name_prefix + 'CONV1_1_LAYER') conv1 = tl.layers.Conv2d( net=conv1, n_filter=self.config.config_dict['CONV1_2_CHANNEL_SIZE'], filter_size=self.config.config_dict['CONV1_2_FILTER_SIZE'], strides=self.config.config_dict['CONV1_2_STRIDE_SIZE'], act=tf.nn.relu, W_init=W_init, b_init=b_init, name=name_prefix + 'CONV1_2_LAYER') if self.is_training is True: conv1 = tl.layers.BatchNormLayer(layer=conv1, epsilon=0.000001, is_train=True, name=name_prefix + 'CONV1_BATCH_NORM_LAYER') else: conv1 = tl.layers.BatchNormLayer(layer=conv1, epsilon=0.000001, is_train=False, name=name_prefix + 'CONV1_BATCH_NORM_LAYER') pool1 = tl.layers.MaxPool2d( net=conv1, filter_size=(self.config.config_dict['POOL1_FILTER_SIZE']), strides=self.config.config_dict['POOL1_STRIDE_SIZE'], name=name_prefix + 'POOL1_LAYER') conv2 = tl.layers.Conv2d( net=pool1, n_filter=self.config.config_dict['CONV2_1_CHANNEL_SIZE'], filter_size=self.config.config_dict['CONV2_1_FILTER_SIZE'], strides=self.config.config_dict['CONV2_1_STRIDE_SIZE'], act=tf.nn.relu, W_init=W_init, b_init=b_init, name=name_prefix + 'CONV2_1_LAYER') conv2 = tl.layers.Conv2d( net=conv2, n_filter=self.config.config_dict['CONV2_2_CHANNEL_SIZE'], filter_size=self.config.config_dict['CONV2_2_FILTER_SIZE'], strides=self.config.config_dict['CONV2_2_STRIDE_SIZE'], act=tf.nn.relu, W_init=W_init, b_init=b_init, name=name_prefix + 'CONV2_2_LAYER') if self.is_training is True: conv2 = tl.layers.BatchNormLayer(layer=conv2, epsilon=0.000001, is_train=True, name=name_prefix + 'CONV2_BATCH_NORM_LAYER') else: conv2 = tl.layers.BatchNormLayer(layer=conv2, epsilon=0.000001, is_train=True, name=name_prefix + 'CONV2_BATCH_NORM_LAYER') pool2 = tl.layers.MaxPool2d( net=conv2, filter_size=(self.config.config_dict['POOL2_FILTER_SIZE']), strides=self.config.config_dict['POOL2_STRIDE_SIZE'], name=name_prefix + 'POOL2_LAYER') pool2 = tl.layers.FlattenLayer(layer=pool2, name=name_prefix + 'POOL2_FLATTEN_LAYER') fc1 = tl.layers.DenseLayer( layer=pool2, n_units=self.config.config_dict['DENSE_LAYER_1_UNIT'], act=tf.nn.relu, name=name_prefix + 'DENSE_LAYER_1_LAYER') fc2 = tl.layers.DropconnectDenseLayer( layer=fc1, n_units=self.config.config_dict['DENSE_LAYER_2_UNIT'], act=tf.nn.sigmoid, name=name_prefix + 'DENSE_LAYER_2_LAYER', keep=self.config.config_dict['DROP_OUT_PROB_VALUE']) feature_length_per_image = fc2.outputs.get_shape().as_list()[1] # LSTM INPUT IS [BATCH_SIZE, LENGTH, FEATURE_DIM] image_feature_input = tl.layers.ReshapeLayer( layer=fc2, shape=[-1, state_length, feature_length_per_image], name=name_prefix + 'IMAGE_LSTM_FEATURE_RESHAPE_LAYER') lstm_input = tl.layers.ConcatLayer( layer=[image_feature_input, merged_flattened_input, action_net], concat_dim=2, name=name_prefix + 'LSTM_INPUT_CONCAT_LAYER') # TODO # be aware of the init_state when train a lstm init_state = tf.placeholder( dtype=tf.float32, shape=[ self.config.config_dict['LSTM_LAYERS_NUM'], 2, batch_size, self.config.config_dict['LSMT_INPUT_LENGTH'] ]) state_per_layers = tf.unstack(init_state, axis=0) rnn_tuple_state = tuple([ tf.nn.rnn_cell.LSTMStateTuple(state_per_layers[idx][0], state_per_layers[idx][1]) for idx in range(self.config.config_dict['LSTM_LAYERS_NUM']) ]) rnn = tl.layers.DynamicRNNLayer( layer=lstm_input, cell_fn=tf.nn.rnn_cell.LSTMCell, sequence_length=None, n_hidden=self.config.config_dict['LSMT_INPUT_LENGTH'], initial_state=rnn_tuple_state, n_layer=self.config.config_dict['LSTM_LAYERS_NUM'], return_last=True, name=name_prefix + 'LSTM_LAYER') lstm_fc1 = tl.layers.DenseLayer( layer=rnn, n_units=self.config.config_dict['LSTM_DENSE_LAYER1_UNIT'], act=tf.nn.relu, name=name_prefix + 'LSTM_DENSE_LAYER_1') lstm_fc2 = tl.layers.DenseLayer( layer=lstm_fc1, n_units=self.config.config_dict['LSTM_DENSE_LAYER_2_UNIT'], act=tf.nn.tanh, name=name_prefix + 'LSTM_DENSE_LAYER_2') net = tl.layers.DenseLayer( layer=lstm_fc2, n_units=self.config.config_dict['MERGED_LAYER_1_UNIT'], act=tf.nn.relu, name=name_prefix + 'MERGED_DENSE_LAYER_1') net = tl.layers.DenseLayer( layer=net, n_units=self.config.config_dict['MERGED_LAYER_2_UNIT'], act=tf.nn.relu, name=name_prefix + 'MERGED_DENSE_LAYER_2') return net
class DenseCritic(Critic): standard_key_list = utils.load_json(file_path=CONFIG_STANDARD_KEY_LIST + '/criticKeyList.json') def __init__(self, config, sess_flag=False, data=None): super(DenseCritic, self).__init__(config, sess_flag, data) self.net = self.create_model(state=self.state, action=self.action, name_prefix='CRITIC_') self.loss, self.optimizer = self.create_training_method( q_value=self.q_value, action=self.action, var_list=self.var_list) self.target_net = self.create_model(state=self.target_state, action=self.target_action, name_prefix='TARGET_CRITIC_') def create_model(self, state, action, name_prefix): state_net = tl.layers.InputLayer(inputs=state, name=name_prefix + 'CRITIC_STATE_INPUT_LAYER') action_net = tl.layers.InputLayer(inputs=action, name=name_prefix + 'CRITIC_ACTION_INPUT_LAYER') state_net = tl.layers.DenseLayer( layer=state_net, n_units=self.config.config_dict['STATE_LAYER_1_UNIT'], act=tf.nn.relu, name=name_prefix + 'STATE_DENSE_LAYER_1') action_net = tl.layers.DenseLayer( layer=action_net, n_units=self.config.config_dict['ACTION_LAYER_1_UNIT'], act=tf.nn.relu, name=name_prefix + 'ACTION_DENSE_LAYER_1') # net = tf.stack(values=[state_net.outputs, action_net.outputs], axis=1) # net = tf.reshape(tensor=net, # shape=[-1, self.config.config_dict['ACTION_LAYER_1_UNIT'] + # self.config.config_dict['STATE_LAYER_1_UNIT']]) net = tl.layers.ConcatLayer(layer=[state_net, action_net], concat_dim=1, name=name_prefix + 'STATE_ACTION_CONTACT_LAYER') net = tl.layers.DenseLayer( layer=net, n_units=self.config.config_dict['MERGED_LAYER_1_UNIT'], act=tf.nn.relu, name=name_prefix + 'MERGED_DENSE_LAYER_1') net = tl.layers.DenseLayer( layer=net, n_units=self.config.config_dict['OUTPUT_LAYER'], act=tf.nn.relu, name=name_prefix + 'OUTPUT_LAYER') return net def create_training_method(self, q_value, action, var_list): weight_decay = tf.add_n([ self.config.config_dict['L2'] * tf.nn.l2_loss(var) for var in self.var_list ]) loss = tf.reduce_mean( tf.square(self.q_label - self.q_value)) + weight_decay optimizer = tf.train.AdamOptimizer( self.config.config_dict['LEARNING_RATE']) return loss, optimizer def update(self, sess, q_label, state, action): loss, _, grad = sess.run( fetches=[self.loss, self.optimize_loss, self.gradients], feed_dict={ self.q_label: q_label, self.state: state, self.action: action, self.is_training: True }) return loss, grad def eval_tensor(self, ): raise NotImplementedError pass
self.data_list = temp_data self.state_list = [] self.output_list = [] for sample in self.data_list: self.state_list.append(sample['STATE']) self.output_list.append(sample['OUTPUT']) pass self.state_list = np.array(self.state_list) self.output_list = np.array(self.output_list) def return_batch_data(self, index, size): return self.state_list[index * size:index * size + size], self.output_list[index * size:index * size + size] pass if __name__ == '__main__': from src.config.config import Config from src.configuration.standard_key_list import CONFIG_STANDARD_KEY_LIST from src.configuration import CONFIG_PATH conf = Config(standard_key_list=FISData.standard_key_list, config_dict=None) config = utils.load_json(file_path=CONFIG_PATH + '/testFisAttackDataConfig.json') config['FILE_PATH'] = DATASET1_PATH + '/Attack.csv' conf.load_config(path=None, json_dict=config) data = FISData(config=conf) data.load_data() data.shuffle_data()
class LSTMActor(Actor): standard_key_list = utils.load_json(file_path=CONFIG_STANDARD_KEY_LIST + '/lstmActorKeyList.json') def __init__(self, config, sess_flag=False, data=None): super(LSTMActor, self).__init__(config, sess_flag, data) self.net = self.create_model(state=self.state, name_prefix='ACTOR_') self.target_net = self.create_model(state=self.target_state, name_prefix='TARGET_ACTOR') self.optimizer, self.optimize_loss = self.create_training_method() def create_model(self, state, name_prefix): W_init = tf.truncated_normal_initializer(stddev=0.01) b_init = tf.constant_initializer(value=0.0) image_state_shape = state('IMAGE').get_shape().as_list() batch_size = image_state_shape[0] state_length = image_state_shape[1] state_batch = tf.reshape(tensor=state('IMAGE'), shape=[ -1, image_state_shape[2], image_state_shape[3], image_state_shape[4] ]) inputs_image = tl.layers.InputLayer(inputs=state_batch, name=name_prefix + 'INPUT_LAYER_' + 'IMAGE') merge_tensor_dict = {} for name, tensor in state().items(): if name != 'IMAGE': merge_tensor_dict[name] = tensor merged_flattened_input = utils.flatten_and_concat_tensors( name_prefix=name_prefix, tensor_dict=merge_tensor_dict) conv1 = tl.layers.Conv2d( net=inputs_image, n_filter=self.config.config_dict['CONV1_1_CHANNEL_SIZE'], filter_size=self.config.config_dict['CONV1_1_FILTER_SIZE'], strides=self.config.config_dict['CONV1_1_STRIDE_SIZE'], act=tf.nn.relu, W_init=W_init, b_init=b_init, name=name_prefix + 'CONV1_1_LAYER') conv1 = tl.layers.Conv2d( net=conv1, n_filter=self.config.config_dict['CONV1_2_CHANNEL_SIZE'], filter_size=self.config.config_dict['CONV1_2_FILTER_SIZE'], strides=self.config.config_dict['CONV1_2_STRIDE_SIZE'], act=tf.nn.relu, W_init=W_init, b_init=b_init, name=name_prefix + 'CONV1_2_LAYER') if self.is_training is True: conv1 = tl.layers.BatchNormLayer(layer=conv1, epsilon=0.000001, is_train=True, name=name_prefix + 'CONV1_BATCH_NORM_LAYER') else: conv1 = tl.layers.BatchNormLayer(layer=conv1, epsilon=0.000001, is_train=False, name=name_prefix + 'CONV1_BATCH_NORM_LAYER') pool1 = tl.layers.MaxPool2d( net=conv1, filter_size=(self.config.config_dict['POOL1_FILTER_SIZE']), strides=self.config.config_dict['POOL1_STRIDE_SIZE'], name=name_prefix + 'POOL1_LAYER') conv2 = tl.layers.Conv2d( net=pool1, n_filter=self.config.config_dict['CONV2_1_CHANNEL_SIZE'], filter_size=self.config.config_dict['CONV2_1_FILTER_SIZE'], strides=self.config.config_dict['CONV2_1_STRIDE_SIZE'], act=tf.nn.relu, W_init=W_init, b_init=b_init, name=name_prefix + 'CONV2_1_LAYER') conv2 = tl.layers.Conv2d( net=conv2, n_filter=self.config.config_dict['CONV2_2_CHANNEL_SIZE'], filter_size=self.config.config_dict['CONV2_2_FILTER_SIZE'], strides=self.config.config_dict['CONV2_2_STRIDE_SIZE'], act=tf.nn.relu, W_init=W_init, b_init=b_init, name=name_prefix + 'CONV2_2_LAYER') if self.is_training is True: conv2 = tl.layers.BatchNormLayer(layer=conv2, epsilon=0.000001, is_train=True, name=name_prefix + 'CONV2_BATCH_NORM_LAYER') else: conv2 = tl.layers.BatchNormLayer(layer=conv2, epsilon=0.000001, is_train=True, name=name_prefix + 'CONV2_BATCH_NORM_LAYER') pool2 = tl.layers.MaxPool2d( net=conv2, filter_size=(self.config.config_dict['POOL2_FILTER_SIZE']), strides=self.config.config_dict['POOL2_STRIDE_SIZE'], name=name_prefix + 'POOL2_LAYER') pool2 = tl.layers.FlattenLayer(layer=pool2, name=name_prefix + 'POOL2_FLATTEN_LAYER') fc1 = tl.layers.DenseLayer( layer=pool2, n_units=self.config.config_dict['DENSE_LAYER_1_UNIT'], act=tf.nn.relu, name=name_prefix + 'DENSE_LAYER_1_LAYER') fc2 = tl.layers.DropconnectDenseLayer( layer=fc1, n_units=self.config.config_dict['DENSE_LAYER_2_UNIT'], act=tf.nn.sigmoid, name=name_prefix + 'DENSE_LAYER_2_LAYER', keep=self.config.config_dict['DROP_OUT_PROB_VALUE']) feature_layer = tl.layers.ConcatLayer( layer=[fc2, merged_flattened_input], concat_dim=1, name=name_prefix + 'LSTM_FEATURE_CONCAT_LAYER') feature_length = feature_layer.outputs.get_shape().as_list()[1] # LSTM INPUT IS [BATCH_SIZE, LENGTH, FEATURE_DIM] lstm_input = tl.layers.ReshapeLayer( layer=feature_layer, shape=[-1, state_length, feature_length], name=name_prefix + 'LSTM_FEATURE_RESHAPE_LAYER') # TODO # be aware of the init_state when train a lstm init_state = tf.placeholder( dtype=tf.float32, shape=[ self.config.config_dict['LSTM_LAYERS_NUM'], 2, batch_size, self.config.config_dict['LSMT_INPUT_LENGTH'] ]) state_per_layers = tf.unstack(init_state, axis=0) rnn_tuple_state = tuple([ tf.nn.rnn_cell.LSTMStateTuple(state_per_layers[idx][0], state_per_layers[idx][1]) for idx in range(self.config.config_dict['LSTM_LAYERS_NUM']) ]) rnn = tl.layers.DynamicRNNLayer( layer=lstm_input, cell_fn=tf.nn.rnn_cell.LSTMCell, sequence_length=None, n_hidden=self.config.config_dict['LSMT_INPUT_LENGTH'], initial_state=rnn_tuple_state, n_layer=self.config.config_dict['LSTM_LAYERS_NUM'], return_last=True, name=name_prefix + 'LSTM_LAYER') lstm_fc1 = tl.layers.DenseLayer( layer=rnn, n_units=self.config.config_dict['LSTM_DENSE_LAYER1_UNIT'], act=tf.nn.relu, name=name_prefix + 'LSTM_DENSE_LAYER_1') lstm_fc2 = tl.layers.DenseLayer( layer=lstm_fc1, n_units=self.config.config_dict['LSTM_DENSE_LAYER_2_UNIT'], act=tf.nn.tanh, name=name_prefix + 'LSTM_DENSE_LAYER_2') return lstm_fc2 def create_training_method(self): parameters_gradients = tf.gradients(self.action, self.var_list, -self.q_value_gradients) optimizer = tf.train.AdamOptimizer( learning_rate=self.config.config_dict['LEARNING_RATE']) optimize_loss = optimizer.apply_gradients( grads_and_vars=zip(parameters_gradients, self.var_list)) return optimizer, optimize_loss
loss, _ = sess.run(fetches=[model.loss, model.optimize_loss], feed_dict={ model.state('STATE'): state, model.label: label }) count = count + 1 aver_loss = (float(aver_loss) * (count - 1) + loss) / float(count) state, label = data.return_batch_data(0, data.sample_count) summary = sess.run(fetches=[merged], feed_dict={ model.state('STATE'): state, model.label: label }) train_writer.add_summary(summary[0], global_step=i) print("epoch = ", i, " loss = ", aver_loss) if __name__ == '__main__': test_config_standard_key_list = utils.load_json( file_path=CONFIG_STANDARD_KEY_LIST + '/fisTestKeyList.json') test_config = Config(config_dict=None, standard_key_list=test_config_standard_key_list) test_config_dict = utils.load_json(file_path=CONFIG_PATH + '/testFisTestConfig.json') test_config.load_config(json_dict=test_config_dict, path=None) model = create_two_layer_dense_model() train(test_config, model)