def RNN(x, weights, biases): print("input: x = {}".format(x)) x = tf.transpose(x, [1, 0, 2]) print("x transpose: {}".format(x)) x = tf.reshape(x, [-1, n_input]) print("x reshape:{}".format(x)) x = tf.split(axis=0, num_or_size_splits=n_steps, value=x) x = tf.split(axis=0, num_or_size_splits=n_steps, value=x) lstm_cell = nn.rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32) return tf.matmul(outputs[-1], weights['out']) + biases['out']
def generate_embedding_RNN_output(encoder_inputs, cell, num_encoder_symbols, word_embedding_size, num_heads=1, dtype=dtypes.float32, scope=None, initial_state_attention=False, sequence_length=None, bidirectional_rnn=False): """ Generate RNN state outputs with word embeddings as inputs - Note that this example code does not include output label dependency modeling. One may add a loop function as in the rnn_decoder function in tf seq2seq.py example to feed emitted label embedding back to RNN state. """ with variable_scope.variable_scope(scope or "generate_embedding_RNN_output"): if bidirectional_rnn: encoder_cell_fw = cell encoder_cell_bw = cell embedding = variable_scope.get_variable("embedding", [num_encoder_symbols, word_embedding_size]) encoder_embedded_inputs = list() encoder_embedded_inputs = [embedding_ops.embedding_lookup(embedding, encoder_input) for encoder_input in encoder_inputs] encoder_outputs, encoder_state_fw, encoder_state_bw = rnn.static_bidirectional_rnn( encoder_cell_fw, encoder_cell_bw, encoder_embedded_inputs, sequence_length=sequence_length, dtype=dtype) encoder_state = array_ops.concat(axis=1, values=[array_ops.concat(axis=1, values=encoder_state_fw), array_ops.concat(axis=1, values=encoder_state_bw)]) top_states = [array_ops.reshape(e, [-1, 1, cell.output_size*2]) for e in encoder_outputs] attention_states = array_ops.concat(axis=1, values=top_states) else: encoder_cell = cell embedding = variable_scope.get_variable("embedding", [num_encoder_symbols, word_embedding_size]) encoder_embedded_inputs = list() encoder_embedded_inputs = [embedding_ops.embedding_lookup(embedding, encoder_input) for encoder_input in encoder_inputs] encoder_outputs, encoder_state = rnn.rnn( encoder_cell, encoder_embedded_inputs, sequence_length=sequence_length, dtype=dtype) encoder_state = array_ops.concat(1, encoder_state) top_states = [array_ops.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs] attention_states = array_ops.concat(1, top_states) return encoder_outputs, encoder_state, attention_states
def lstm_inference(x): RNN_HIDDEN_UNITS = 128 # x was [BATCH_SIZE, 32, 32, 3] # x changes to [32, BATCH_SIZE, 32, 3] x = tf.transpose(x, [1, 0, 2, 3]) # x changes to [32 * BATCH_SIZE, 32 * 3] x = tf.reshape(x, [-1, IMAGE_SIZE * RGB_CHANNEL_SIZE]) # x changes to array of 32 * [BATCH_SIZE, 32 * 3] x = tf.split(axis=0, num_or_size_splits=IMAGE_SIZE, value=x) weights = tf.Variable(tf.random_normal([RNN_HIDDEN_UNITS, LABEL_SIZE])) biases = tf.Variable(tf.random_normal([LABEL_SIZE])) # output size is 128, state size is (c=128, h=128) lstm_cell = rnn.BasicLSTMCell(RNN_HIDDEN_UNITS, forget_bias=1.0) # outputs is array of 32 * [BATCH_SIZE, 128] outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32) # outputs[-1] is [BATCH_SIZE, 128] return tf.matmul(outputs[-1], weights) + biases
def generate_embedding_RNN_output(encoder_inputs, cell, num_encoder_symbols, word_embedding_size, embedding, num_heads=1, dtype=dtypes.float32, scope=None, initial_state_attention=False, sequence_length=None, bidirectional_rnn=False): """ Generate RNN state outputs with word embeddings as inputs - Note that this example code does not include output label dependency modeling. One may add a loop function as in the rnn_decoder function in tf seq2seq.py example to feed emitted label embedding back to RNN state. """ with variable_scope.variable_scope(scope or "generate_embedding_RNN_output"): if bidirectional_rnn: encoder_cell_fw = cell encoder_cell_bw = cell #embedding = variable_scope.get_variable("embedding", [num_encoder_symbols, word_embedding_size]) encoder_embedded_inputs = list() #n_symbol, embed_size = embedding.shape #X = variable_scope.get_variable("X", [embed_size, embed_size]) #b = variable_scope.get_variable("b", [embed_size]) #encoder_embedded_inputs = [tf.multiply(embedding_ops.embedding_lookup(embedding, encoder_input), X) + b for encoder_input in encoder_inputs] encoder_embedded_inputs = [ embedding_ops.embedding_lookup(embedding, encoder_input) for encoder_input in encoder_inputs ] encoder_outputs, encoder_state_fw, encoder_state_bw = rnn.static_bidirectional_rnn( encoder_cell_fw, encoder_cell_bw, encoder_embedded_inputs, sequence_length=sequence_length, dtype=dtype) encoder_state = array_ops.concat([ array_ops.concat(encoder_state_fw, 1), array_ops.concat(encoder_state_bw, 1) ], 1) top_states = [ array_ops.reshape(e, [-1, 1, cell.output_size * 2]) for e in encoder_outputs ] attention_states = array_ops.concat(top_states, 1) else: encoder_cell = cell embedding = variable_scope.get_variable( "embedding", [num_encoder_symbols, word_embedding_size]) encoder_embedded_inputs = list() encoder_embedded_inputs = [ embedding_ops.embedding_lookup(embedding, encoder_input) for encoder_input in encoder_inputs ] encoder_outputs, encoder_state = rnn.rnn( encoder_cell, encoder_embedded_inputs, sequence_length=sequence_length, dtype=dtype) encoder_state = array_ops.concat(encoder_state, 1) top_states = [ array_ops.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs ] attention_states = array_ops.concat(top_states, 1) return encoder_outputs, encoder_state, attention_states
def __init__(self, config, training=False): self.config = config self.time_batch_len = time_batch_len = config.time_batch_len self.input_dim = input_dim = config.input_dim hidden_size = config.hidden_size num_layers = config.num_layers dropout_prob = config.dropout_prob input_dropout_prob = config.input_dropout_prob cell_type = config.cell_type self.seq_input = \ tf.placeholder(tf.float32, shape=[self.time_batch_len, None, input_dim]) if (dropout_prob <= 0.0 or dropout_prob > 1.0): raise Exception( "Invalid dropout probability: {}".format(dropout_prob)) if (input_dropout_prob <= 0.0 or input_dropout_prob > 1.0): raise Exception("Invalid input dropout probability: {}".format( input_dropout_prob)) # setup variables with tf.variable_scope("rnnlstm"): output_W = tf.get_variable("output_w", [hidden_size, input_dim]) output_b = tf.get_variable("output_b", [input_dim]) self.lr = tf.constant(config.learning_rate, name="learning_rate") self.lr_decay = tf.constant(config.learning_rate_decay, name="learning_rate_decay") def create_cell(input_size): if cell_type == "vanilla": cell_class = BasicRNNCell elif cell_type == "gru": cell_class = GRUCell elif cell_type == "lstm": cell_class = BasicLSTMCell else: raise Exception("Invalid cell type: {}".format(cell_type)) cell = cell_class(hidden_size, input_size) if training: return DropoutWrapper(cell, output_keep_prob=dropout_prob) else: return cell if training: self.seq_input_dropout = tf.nn.dropout( self.seq_input, keep_prob=input_dropout_prob) else: self.seq_input_dropout = self.seq_input self.cell = BasicRNNCell( [create_cell(input_dim)] + [create_cell(hidden_size) for i in range(1, num_layers)]) batch_size = tf.shape(self.seq_input_dropout)[0] self.initial_state = self.cell.zero_state(batch_size, tf.float32) inputs_list = tf.unpack(self.seq_input_dropout) # rnn outputs a list of [batch_size x H] outputs outputs_list, self.final_state = rnn.rnn( self.cell, inputs_list, initial_state=self.initial_state) outputs = tf.pack(outputs_list) outputs_concat = tf.reshape(outputs, [-1, hidden_size]) logits_concat = tf.matmul(outputs_concat, output_W) + output_b logits = tf.reshape(logits_concat, [self.time_batch_len, -1, input_dim]) # probabilities of each note self.probs = self.calculate_probs(logits) self.loss = self.init_loss(logits, logits_concat) self.train_step = tf.train.RMSPropOptimizer(self.lr, decay = self.lr_decay) \ .minimize(self.loss)
def main(): # 初始化一些参数 print("Start Pokemon classifier") if os.path.exists(FLAGS.checkpoint_path) == False: os.makedirs(FLAGS.checkpoint_path) CHECKPOINT_FILE = FLAGS.checkpoint_path + "/checkpoint.ckpt" LATEST_CHECKPOINT = tf.train.latest_checkpoint(FLAGS.checkpoint_path) # Initialize train and test data TRAIN_IMAGE_NUMBER = 646 TEST_IMAGE_NUMBER = 68 IMAGE_SIZE = 32 RGB_CHANNEL_SIZE = 3 LABEL_SIZE = 17 train_dataset = np.ndarray( shape=(TRAIN_IMAGE_NUMBER, IMAGE_SIZE, IMAGE_SIZE, RGB_CHANNEL_SIZE), # channel last dtype=np.float32) test_dataset = np.ndarray( shape=(TEST_IMAGE_NUMBER, IMAGE_SIZE, IMAGE_SIZE, RGB_CHANNEL_SIZE), dtype=np.float32) train_labels = np.ndarray(shape=(TRAIN_IMAGE_NUMBER, ), dtype=np.int32) test_labels = np.ndarray(shape=(TEST_IMAGE_NUMBER, ), dtype=np.int32) TRAIN_DATA_DIR = "./data/train/" TEST_DATA_DIR = "./data/test/" VALIDATE_DATA_DIR = "./data/validate/" IMAGE_FORMAT = ".png" index = 0 #图像个数计数器 pokemon_type_id_map = { "Bug": 0, "Dark": 1, "Dragon": 2, "Electric": 3, "Fairy": 4, "Fighting": 5, "Fire": 6, "Ghost": 7, "Grass": 8, "Ground": 9, "Ice": 10, "Normal": 11, "Poison": 12, "Psychic": 13, "Rock": 14, "Steel": 15, "Water": 16 } pokemon_types = [ "Bug", "Dark", "Dragon", "Electric", "Fairy", "Fighting", "Fire", "Ghost", "Grass", "Ground", "Ice", "Normal", "Poison", "Psychic", "Rock", "Steel", "Water" ] # step 1加载训练数据 for pokemon_type in os.listdir(TRAIN_DATA_DIR): for image_filename in os.listdir( os.path.join(TRAIN_DATA_DIR, pokemon_type)): if image_filename.endswith(IMAGE_FORMAT): image_filepath = os.path.join(TRAIN_DATA_DIR, pokemon_type, image_filename) image_ndarray = ndimage.imread(image_filepath, mode="RGB") #RGB train_dataset[index] = image_ndarray train_labels[index] = pokemon_type_id_map.get(pokemon_type) # 把label转化成数值型 index += 1 index = 0 # step2 加载测试数据 for pokemon_type in os.listdir(TEST_DATA_DIR): for image_filename in os.listdir( os.path.join(TEST_DATA_DIR, pokemon_type)): if image_filename.endswith(IMAGE_FORMAT): image_filepath = os.path.join(TEST_DATA_DIR, pokemon_type, image_filename) image_ndarray = ndimage.imread(image_filepath, mode="RGB") test_dataset[index] = image_ndarray test_labels[index] = pokemon_type_id_map.get(pokemon_type) index += 1 # step3 定义model # placeholder keys_placeholder = tf.placeholder(tf.int32, shape=[None, 1]) keys = tf.identity(keys_placeholder) # base64编码图像 model_base64_placeholder = tf.placeholder( shape=[None], dtype=tf.string, name="model_input_b64_images") model_base64_string = tf.decode_base64(model_base64_placeholder) # 等价于python的map() model_base64_input = tf.map_fn(lambda x: tf.image.resize_images(tf.image.decode_jpeg(x, channels=RGB_CHANNEL_SIZE), [IMAGE_SIZE, IMAGE_SIZE]), model_base64_string, dtype=tf.float32) x = tf.placeholder( tf.float32, shape=(None, IMAGE_SIZE, IMAGE_SIZE, RGB_CHANNEL_SIZE)) y = tf.placeholder(tf.int32, shape=(None, )) batch_size = FLAGS.batch_size epoch_number = FLAGS.epoch_number checkpoint_dir = FLAGS.checkpoint_dir if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) tensorboard_dir = FLAGS.tensorboard_dir mode = FLAGS.mode checkpoint_file = checkpoint_dir + "/checkpoint.ckpt" steps_to_validate = FLAGS.steps_to_validate def cnn_inference(x): # Convolution layer result: [BATCH_SIZE, 16, 16, 64] # (n+2p-f)/s+1 with tf.variable_scope("conv1"): weights = tf.get_variable( "weights", [3, 3, 3, 32], initializer=tf.random_normal_initializer()) bias = tf.get_variable( "bias", [32], initializer=tf.random_normal_initializer()) layer = tf.nn.conv2d(x, weights, strides=[1, 1, 1, 1], padding="SAME") # 32*32*32 layer = tf.nn.bias_add(layer, bias) layer = tf.nn.relu(layer) # (n-f)/s+1 layer = tf.nn.max_pool( layer, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") #16*16*32 # Convolution layer result: [BATCH_SIZE, 8, 8, 64] with tf.variable_scope("conv2"): weights = tf.get_variable( "weights", [3, 3, 32, 64], initializer=tf.random_normal_initializer()) bias = tf.get_variable( "bias", [64], initializer=tf.random_normal_initializer()) layer = tf.nn.conv2d( layer, weights, strides=[1, 1, 1, 1], padding="SAME") #16*16*64 layer = tf.nn.bias_add(layer, bias) layer = tf.nn.relu(layer) layer = tf.nn.max_pool( layer, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") #8*8*64 # 拉直做全连接 layer = tf.reshape(layer, [-1, 8 * 8 * 64]) # Full connected layer result: [BATCH_SIZE, 17] with tf.variable_scope("fc1"): # weights.get_shape().as_list()[0]] = 8 * 8 * 64 weights = tf.get_variable( "weights", [8 * 8 * 64, LABEL_SIZE], initializer=tf.random_normal_initializer()) bias = tf.get_variable( "bias", [LABEL_SIZE], initializer=tf.random_normal_initializer()) layer = tf.add(tf.matmul(layer, weights), bias) return layer # 17个节点 def lstm_inference(x): RNN_HIDDEN_UNITS = 128 # x was [BATCH_SIZE, 32, 32, 3] # x changes to [32, BATCH_SIZE, 32, 3] x = tf.transpose(x, [1, 0, 2, 3]) # x changes to [32 * BATCH_SIZE, 32 * 3] x = tf.reshape(x, [-1, IMAGE_SIZE * RGB_CHANNEL_SIZE]) # x changes to array of 32 * [BATCH_SIZE, 32 * 3] x = tf.split(axis=0, num_or_size_splits=IMAGE_SIZE, value=x) weights = tf.Variable(tf.random_normal([RNN_HIDDEN_UNITS, LABEL_SIZE])) biases = tf.Variable(tf.random_normal([LABEL_SIZE])) # output size is 128, state size is (c=128, h=128) lstm_cell = rnn.BasicLSTMCell(RNN_HIDDEN_UNITS, forget_bias=1.0) # outputs is array of 32 * [BATCH_SIZE, 128] outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32) # outputs[-1] is [BATCH_SIZE, 128] return tf.matmul(outputs[-1], weights) + biases def bidirectional_lstm_inference(x): RNN_HIDDEN_UNITS = 128 # x was [BATCH_SIZE, 32, 32, 3] # x changes to [32, BATCH_SIZE, 32, 3] x = tf.transpose(x, [1, 0, 2, 3]) # x changes to [32 * BATCH_SIZE, 32 * 3] x = tf.reshape(x, [-1, IMAGE_SIZE * RGB_CHANNEL_SIZE]) # x changes to array of 32 * [BATCH_SIZE, 32 * 3] x = tf.split(axis=0, num_or_size_splits=IMAGE_SIZE, value=x) weights = tf.Variable(tf.random_normal([2 * RNN_HIDDEN_UNITS, LABEL_SIZE])) biases = tf.Variable(tf.random_normal([LABEL_SIZE])) # output size is 128, state size is (c=128, h=128) fw_lstm_cell = rnn.BasicLSTMCell(RNN_HIDDEN_UNITS, forget_bias=1.0) bw_lstm_cell = rnn.BasicLSTMCell(RNN_HIDDEN_UNITS, forget_bias=1.0) # outputs is array of 32 * [BATCH_SIZE, 128] outputs, _, _ = rnn.bidirectional_rnn( fw_lstm_cell, bw_lstm_cell, x, dtype=tf.float32) # outputs[-1] is [BATCH_SIZE, 128] return tf.matmul(outputs[-1], weights) + biases def stacked_lstm_inference(x): ''' lstm_inference(x) ''' RNN_HIDDEN_UNITS = 128 # x was [BATCH_SIZE, 32, 32, 3] # x changes to [32, BATCH_SIZE, 32, 3] x = tf.transpose(x, [1, 0, 2, 3]) # x changes to [32 * BATCH_SIZE, 32 * 3] x = tf.reshape(x, [-1, IMAGE_SIZE * RGB_CHANNEL_SIZE]) # x changes to array of 32 * [BATCH_SIZE, 32 * 3] x = tf.split(axis=0, num_or_size_splits=IMAGE_SIZE, value=x) weights = tf.Variable(tf.random_normal([RNN_HIDDEN_UNITS, LABEL_SIZE])) biases = tf.Variable(tf.random_normal([LABEL_SIZE])) # output size is 128, state size is (c=128, h=128) lstm_cell = rnn.BasicLSTMCell(RNN_HIDDEN_UNITS, forget_bias=1.0) lstm_cells = rnn.MultiRNNCell([lstm_cell] * 2) # 2层 # outputs is array of 32 * [BATCH_SIZE, 128] outputs, states = rnn.rnn(lstm_cells, x, dtype=tf.float32) # outputs[-1] is [BATCH_SIZE, 128] return tf.matmul(outputs[-1], weights) + biases