Exemple #1
0
def RNN(x, weights, biases):
    print("input: x = {}".format(x))
    x = tf.transpose(x, [1, 0, 2])
    print("x transpose: {}".format(x))
    x = tf.reshape(x, [-1, n_input])
    print("x reshape:{}".format(x))
    x = tf.split(axis=0, num_or_size_splits=n_steps, value=x)
    x = tf.split(axis=0, num_or_size_splits=n_steps, value=x)
    lstm_cell = nn.rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
    outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
    return tf.matmul(outputs[-1], weights['out']) + biases['out']
def generate_embedding_RNN_output(encoder_inputs,
                                  cell,
                                  num_encoder_symbols,
                                  word_embedding_size,
                                  num_heads=1,
                                  dtype=dtypes.float32,
                                  scope=None,
                                  initial_state_attention=False,
                                  sequence_length=None,
                                  bidirectional_rnn=False):
  """
  Generate RNN state outputs with word embeddings as inputs
      - Note that this example code does not include output label dependency modeling.
      One may add a loop function as in the rnn_decoder function in tf seq2seq.py
      example to feed emitted label embedding back to RNN state.
  """
  with variable_scope.variable_scope(scope or "generate_embedding_RNN_output"):
    if bidirectional_rnn:
      encoder_cell_fw = cell
      encoder_cell_bw = cell
      embedding = variable_scope.get_variable("embedding", [num_encoder_symbols, word_embedding_size])
      encoder_embedded_inputs = list()
      encoder_embedded_inputs = [embedding_ops.embedding_lookup(embedding, encoder_input) for encoder_input in encoder_inputs]
      encoder_outputs, encoder_state_fw, encoder_state_bw = rnn.static_bidirectional_rnn(
          encoder_cell_fw, encoder_cell_bw, encoder_embedded_inputs, sequence_length=sequence_length, dtype=dtype)
      encoder_state = array_ops.concat(axis=1, values=[array_ops.concat(axis=1, values=encoder_state_fw), array_ops.concat(axis=1, values=encoder_state_bw)])
      top_states = [array_ops.reshape(e, [-1, 1, cell.output_size*2])
                    for e in encoder_outputs]
      attention_states = array_ops.concat(axis=1, values=top_states)
    else:
      encoder_cell = cell
      embedding = variable_scope.get_variable("embedding", [num_encoder_symbols, word_embedding_size])
      encoder_embedded_inputs = list()
      encoder_embedded_inputs = [embedding_ops.embedding_lookup(embedding, encoder_input) for encoder_input in encoder_inputs]
      encoder_outputs, encoder_state = rnn.rnn(
          encoder_cell, encoder_embedded_inputs, sequence_length=sequence_length, dtype=dtype)
      encoder_state = array_ops.concat(1, encoder_state)
      top_states = [array_ops.reshape(e, [-1, 1, cell.output_size])
                    for e in encoder_outputs]
      attention_states = array_ops.concat(1, top_states)

    return encoder_outputs, encoder_state, attention_states
Exemple #3
0
    def lstm_inference(x):
        RNN_HIDDEN_UNITS = 128

        # x was [BATCH_SIZE, 32, 32, 3]
        # x changes to [32, BATCH_SIZE, 32, 3]
        x = tf.transpose(x, [1, 0, 2, 3])
        # x changes to [32 * BATCH_SIZE, 32 * 3]
        x = tf.reshape(x, [-1, IMAGE_SIZE * RGB_CHANNEL_SIZE])
        # x changes to array of 32 * [BATCH_SIZE, 32 * 3]
        x = tf.split(axis=0, num_or_size_splits=IMAGE_SIZE, value=x)

        weights = tf.Variable(tf.random_normal([RNN_HIDDEN_UNITS, LABEL_SIZE]))
        biases = tf.Variable(tf.random_normal([LABEL_SIZE]))

        # output size is 128, state size is (c=128, h=128)
        lstm_cell = rnn.BasicLSTMCell(RNN_HIDDEN_UNITS, forget_bias=1.0)
        # outputs is array of 32 * [BATCH_SIZE, 128]
        outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)

        # outputs[-1] is [BATCH_SIZE, 128]
        return tf.matmul(outputs[-1], weights) + biases
def generate_embedding_RNN_output(encoder_inputs,
                                  cell,
                                  num_encoder_symbols,
                                  word_embedding_size,
                                  embedding,
                                  num_heads=1,
                                  dtype=dtypes.float32,
                                  scope=None,
                                  initial_state_attention=False,
                                  sequence_length=None,
                                  bidirectional_rnn=False):
    """
  Generate RNN state outputs with word embeddings as inputs
      - Note that this example code does not include output label dependency modeling.
      One may add a loop function as in the rnn_decoder function in tf seq2seq.py
      example to feed emitted label embedding back to RNN state.
  """
    with variable_scope.variable_scope(scope
                                       or "generate_embedding_RNN_output"):
        if bidirectional_rnn:
            encoder_cell_fw = cell
            encoder_cell_bw = cell
            #embedding = variable_scope.get_variable("embedding", [num_encoder_symbols, word_embedding_size])
            encoder_embedded_inputs = list()
            #n_symbol, embed_size = embedding.shape
            #X = variable_scope.get_variable("X", [embed_size, embed_size])
            #b = variable_scope.get_variable("b", [embed_size])
            #encoder_embedded_inputs = [tf.multiply(embedding_ops.embedding_lookup(embedding, encoder_input), X) + b for encoder_input in encoder_inputs]
            encoder_embedded_inputs = [
                embedding_ops.embedding_lookup(embedding, encoder_input)
                for encoder_input in encoder_inputs
            ]
            encoder_outputs, encoder_state_fw, encoder_state_bw = rnn.static_bidirectional_rnn(
                encoder_cell_fw,
                encoder_cell_bw,
                encoder_embedded_inputs,
                sequence_length=sequence_length,
                dtype=dtype)
            encoder_state = array_ops.concat([
                array_ops.concat(encoder_state_fw, 1),
                array_ops.concat(encoder_state_bw, 1)
            ], 1)
            top_states = [
                array_ops.reshape(e, [-1, 1, cell.output_size * 2])
                for e in encoder_outputs
            ]
            attention_states = array_ops.concat(top_states, 1)
        else:
            encoder_cell = cell
            embedding = variable_scope.get_variable(
                "embedding", [num_encoder_symbols, word_embedding_size])
            encoder_embedded_inputs = list()
            encoder_embedded_inputs = [
                embedding_ops.embedding_lookup(embedding, encoder_input)
                for encoder_input in encoder_inputs
            ]
            encoder_outputs, encoder_state = rnn.rnn(
                encoder_cell,
                encoder_embedded_inputs,
                sequence_length=sequence_length,
                dtype=dtype)
            encoder_state = array_ops.concat(encoder_state, 1)
            top_states = [
                array_ops.reshape(e, [-1, 1, cell.output_size])
                for e in encoder_outputs
            ]
            attention_states = array_ops.concat(top_states, 1)

        return encoder_outputs, encoder_state, attention_states
Exemple #5
0
    def __init__(self, config, training=False):
        self.config = config
        self.time_batch_len = time_batch_len = config.time_batch_len
        self.input_dim = input_dim = config.input_dim
        hidden_size = config.hidden_size
        num_layers = config.num_layers
        dropout_prob = config.dropout_prob
        input_dropout_prob = config.input_dropout_prob
        cell_type = config.cell_type
        self.seq_input = \
            tf.placeholder(tf.float32, shape=[self.time_batch_len, None, input_dim])

        if (dropout_prob <= 0.0 or dropout_prob > 1.0):
            raise Exception(
                "Invalid dropout probability: {}".format(dropout_prob))

        if (input_dropout_prob <= 0.0 or input_dropout_prob > 1.0):
            raise Exception("Invalid input dropout probability: {}".format(
                input_dropout_prob))

        # setup variables
        with tf.variable_scope("rnnlstm"):
            output_W = tf.get_variable("output_w", [hidden_size, input_dim])
            output_b = tf.get_variable("output_b", [input_dim])
            self.lr = tf.constant(config.learning_rate, name="learning_rate")
            self.lr_decay = tf.constant(config.learning_rate_decay,
                                        name="learning_rate_decay")

        def create_cell(input_size):
            if cell_type == "vanilla":
                cell_class = BasicRNNCell
            elif cell_type == "gru":
                cell_class = GRUCell
            elif cell_type == "lstm":
                cell_class = BasicLSTMCell
            else:
                raise Exception("Invalid cell type: {}".format(cell_type))

            cell = cell_class(hidden_size, input_size)
            if training:
                return DropoutWrapper(cell, output_keep_prob=dropout_prob)
            else:
                return cell

        if training:
            self.seq_input_dropout = tf.nn.dropout(
                self.seq_input, keep_prob=input_dropout_prob)
        else:
            self.seq_input_dropout = self.seq_input

        self.cell = BasicRNNCell(
            [create_cell(input_dim)] +
            [create_cell(hidden_size) for i in range(1, num_layers)])

        batch_size = tf.shape(self.seq_input_dropout)[0]
        self.initial_state = self.cell.zero_state(batch_size, tf.float32)
        inputs_list = tf.unpack(self.seq_input_dropout)

        # rnn outputs a list of [batch_size x H] outputs
        outputs_list, self.final_state = rnn.rnn(
            self.cell, inputs_list, initial_state=self.initial_state)

        outputs = tf.pack(outputs_list)
        outputs_concat = tf.reshape(outputs, [-1, hidden_size])
        logits_concat = tf.matmul(outputs_concat, output_W) + output_b
        logits = tf.reshape(logits_concat,
                            [self.time_batch_len, -1, input_dim])

        # probabilities of each note
        self.probs = self.calculate_probs(logits)
        self.loss = self.init_loss(logits, logits_concat)
        self.train_step = tf.train.RMSPropOptimizer(self.lr, decay = self.lr_decay) \
                            .minimize(self.loss)
def main():
  # 初始化一些参数
  print("Start Pokemon classifier")
  if os.path.exists(FLAGS.checkpoint_path) == False:
    os.makedirs(FLAGS.checkpoint_path)
  CHECKPOINT_FILE = FLAGS.checkpoint_path + "/checkpoint.ckpt"
  LATEST_CHECKPOINT = tf.train.latest_checkpoint(FLAGS.checkpoint_path)

  # Initialize train and test data
  TRAIN_IMAGE_NUMBER = 646
  TEST_IMAGE_NUMBER = 68
  IMAGE_SIZE = 32
  RGB_CHANNEL_SIZE = 3
  LABEL_SIZE = 17

  train_dataset = np.ndarray(
      shape=(TRAIN_IMAGE_NUMBER, IMAGE_SIZE, IMAGE_SIZE, RGB_CHANNEL_SIZE), # channel last
      dtype=np.float32)
  test_dataset = np.ndarray(
      shape=(TEST_IMAGE_NUMBER, IMAGE_SIZE, IMAGE_SIZE, RGB_CHANNEL_SIZE),
      dtype=np.float32)

  train_labels = np.ndarray(shape=(TRAIN_IMAGE_NUMBER, ), dtype=np.int32)
  test_labels = np.ndarray(shape=(TEST_IMAGE_NUMBER, ), dtype=np.int32)

  TRAIN_DATA_DIR = "./data/train/"
  TEST_DATA_DIR = "./data/test/"
  VALIDATE_DATA_DIR = "./data/validate/"
  IMAGE_FORMAT = ".png"
  index = 0 #图像个数计数器
  pokemon_type_id_map = {
      "Bug": 0,
      "Dark": 1,
      "Dragon": 2,
      "Electric": 3,
      "Fairy": 4,
      "Fighting": 5,
      "Fire": 6,
      "Ghost": 7,
      "Grass": 8,
      "Ground": 9,
      "Ice": 10,
      "Normal": 11,
      "Poison": 12,
      "Psychic": 13,
      "Rock": 14,
      "Steel": 15,
      "Water": 16
  }
  pokemon_types = [
      "Bug", "Dark", "Dragon", "Electric", "Fairy", "Fighting", "Fire",
      "Ghost", "Grass", "Ground", "Ice", "Normal", "Poison", "Psychic", "Rock",
      "Steel", "Water"
  ]

  # step 1加载训练数据
  for pokemon_type in os.listdir(TRAIN_DATA_DIR):
    for image_filename in os.listdir(
        os.path.join(TRAIN_DATA_DIR, pokemon_type)):
      if image_filename.endswith(IMAGE_FORMAT):

        image_filepath = os.path.join(TRAIN_DATA_DIR, pokemon_type,
                                      image_filename)
        image_ndarray = ndimage.imread(image_filepath, mode="RGB") #RGB
        train_dataset[index] = image_ndarray

        train_labels[index] = pokemon_type_id_map.get(pokemon_type) # 把label转化成数值型
        index += 1

  
  index = 0
  # step2 加载测试数据
  for pokemon_type in os.listdir(TEST_DATA_DIR):
    for image_filename in os.listdir(
        os.path.join(TEST_DATA_DIR, pokemon_type)):
      if image_filename.endswith(IMAGE_FORMAT):

        image_filepath = os.path.join(TEST_DATA_DIR, pokemon_type,
                                      image_filename)
        image_ndarray = ndimage.imread(image_filepath, mode="RGB")
        test_dataset[index] = image_ndarray

        test_labels[index] = pokemon_type_id_map.get(pokemon_type)
        index += 1

  # step3 定义model
  # placeholder
  keys_placeholder = tf.placeholder(tf.int32, shape=[None, 1])
  keys = tf.identity(keys_placeholder)

  # base64编码图像
  model_base64_placeholder = tf.placeholder(
      shape=[None], dtype=tf.string, name="model_input_b64_images")
  model_base64_string = tf.decode_base64(model_base64_placeholder)
  # 等价于python的map()
  model_base64_input = tf.map_fn(lambda x: tf.image.resize_images(tf.image.decode_jpeg(x, channels=RGB_CHANNEL_SIZE), [IMAGE_SIZE, IMAGE_SIZE]), model_base64_string, dtype=tf.float32)

  x = tf.placeholder(
      tf.float32, shape=(None, IMAGE_SIZE, IMAGE_SIZE, RGB_CHANNEL_SIZE))
  y = tf.placeholder(tf.int32, shape=(None, ))

  batch_size = FLAGS.batch_size
  epoch_number = FLAGS.epoch_number
  checkpoint_dir = FLAGS.checkpoint_dir
  if not os.path.exists(checkpoint_dir):
    os.makedirs(checkpoint_dir)
  tensorboard_dir = FLAGS.tensorboard_dir
  mode = FLAGS.mode
  checkpoint_file = checkpoint_dir + "/checkpoint.ckpt"
  steps_to_validate = FLAGS.steps_to_validate

  def cnn_inference(x):
    # Convolution layer result: [BATCH_SIZE, 16, 16, 64]
    # (n+2p-f)/s+1
    with tf.variable_scope("conv1"):
      weights = tf.get_variable(
          "weights", [3, 3, 3, 32], initializer=tf.random_normal_initializer())
      bias = tf.get_variable(
          "bias", [32], initializer=tf.random_normal_initializer())

      layer = tf.nn.conv2d(x, weights, strides=[1, 1, 1, 1], padding="SAME") # 32*32*32
      layer = tf.nn.bias_add(layer, bias)
      layer = tf.nn.relu(layer)
      # (n-f)/s+1
      layer = tf.nn.max_pool(
          layer, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") #16*16*32

    # Convolution layer result: [BATCH_SIZE, 8, 8, 64]
    with tf.variable_scope("conv2"):
      weights = tf.get_variable(
          "weights", [3, 3, 32, 64],
          initializer=tf.random_normal_initializer())
      bias = tf.get_variable(
          "bias", [64], initializer=tf.random_normal_initializer())

      layer = tf.nn.conv2d(
          layer, weights, strides=[1, 1, 1, 1], padding="SAME") #16*16*64
      layer = tf.nn.bias_add(layer, bias)
      layer = tf.nn.relu(layer)
      layer = tf.nn.max_pool(
          layer, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") #8*8*64

    # 拉直做全连接
    layer = tf.reshape(layer, [-1, 8 * 8 * 64])

    # Full connected layer result: [BATCH_SIZE, 17]
    with tf.variable_scope("fc1"):
      # weights.get_shape().as_list()[0]] = 8 * 8 * 64
      weights = tf.get_variable(
          "weights", [8 * 8 * 64, LABEL_SIZE],
          initializer=tf.random_normal_initializer())
      bias = tf.get_variable(
          "bias", [LABEL_SIZE], initializer=tf.random_normal_initializer())
      layer = tf.add(tf.matmul(layer, weights), bias)

    return layer  # 17个节点

  def lstm_inference(x):
    RNN_HIDDEN_UNITS = 128

    # x was [BATCH_SIZE, 32, 32, 3]
    # x changes to [32, BATCH_SIZE, 32, 3]
    x = tf.transpose(x, [1, 0, 2, 3]) 
    # x changes to [32 * BATCH_SIZE, 32 * 3]
    x = tf.reshape(x, [-1, IMAGE_SIZE * RGB_CHANNEL_SIZE])
    # x changes to array of 32 * [BATCH_SIZE, 32 * 3]
    x = tf.split(axis=0, num_or_size_splits=IMAGE_SIZE, value=x)

    weights = tf.Variable(tf.random_normal([RNN_HIDDEN_UNITS, LABEL_SIZE]))
    biases = tf.Variable(tf.random_normal([LABEL_SIZE]))

    # output size is 128, state size is (c=128, h=128)
    lstm_cell = rnn.BasicLSTMCell(RNN_HIDDEN_UNITS, forget_bias=1.0)
    # outputs is array of 32 * [BATCH_SIZE, 128]
    outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)

    # outputs[-1] is [BATCH_SIZE, 128]
    return tf.matmul(outputs[-1], weights) + biases

  def bidirectional_lstm_inference(x):
    RNN_HIDDEN_UNITS = 128

    # x was [BATCH_SIZE, 32, 32, 3]
    # x changes to [32, BATCH_SIZE, 32, 3]
    x = tf.transpose(x, [1, 0, 2, 3])
    # x changes to [32 * BATCH_SIZE, 32 * 3]
    x = tf.reshape(x, [-1, IMAGE_SIZE * RGB_CHANNEL_SIZE])
    # x changes to array of 32 * [BATCH_SIZE, 32 * 3]
    x = tf.split(axis=0, num_or_size_splits=IMAGE_SIZE, value=x)

    weights = tf.Variable(tf.random_normal([2 * RNN_HIDDEN_UNITS, LABEL_SIZE]))
    biases = tf.Variable(tf.random_normal([LABEL_SIZE]))

    # output size is 128, state size is (c=128, h=128)
    fw_lstm_cell = rnn.BasicLSTMCell(RNN_HIDDEN_UNITS, forget_bias=1.0)
    bw_lstm_cell = rnn.BasicLSTMCell(RNN_HIDDEN_UNITS, forget_bias=1.0)

    # outputs is array of 32 * [BATCH_SIZE, 128]
    outputs, _, _ = rnn.bidirectional_rnn(
        fw_lstm_cell, bw_lstm_cell, x, dtype=tf.float32)

    # outputs[-1] is [BATCH_SIZE, 128]
    return tf.matmul(outputs[-1], weights) + biases

  def stacked_lstm_inference(x):
  	'''
	lstm_inference(x)
  	'''
    RNN_HIDDEN_UNITS = 128

    # x was [BATCH_SIZE, 32, 32, 3]
    # x changes to [32, BATCH_SIZE, 32, 3]
    x = tf.transpose(x, [1, 0, 2, 3])
    # x changes to [32 * BATCH_SIZE, 32 * 3]
    x = tf.reshape(x, [-1, IMAGE_SIZE * RGB_CHANNEL_SIZE])
    # x changes to array of 32 * [BATCH_SIZE, 32 * 3]
    x = tf.split(axis=0, num_or_size_splits=IMAGE_SIZE, value=x)

    weights = tf.Variable(tf.random_normal([RNN_HIDDEN_UNITS, LABEL_SIZE]))
    biases = tf.Variable(tf.random_normal([LABEL_SIZE]))

    # output size is 128, state size is (c=128, h=128)
    lstm_cell = rnn.BasicLSTMCell(RNN_HIDDEN_UNITS, forget_bias=1.0)
    lstm_cells = rnn.MultiRNNCell([lstm_cell] * 2) # 2层

    # outputs is array of 32 * [BATCH_SIZE, 128]
    outputs, states = rnn.rnn(lstm_cells, x, dtype=tf.float32)

    # outputs[-1] is [BATCH_SIZE, 128]
    return tf.matmul(outputs[-1], weights) + biases