예제 #1
0
    def __init__(self, rnn_size, rnn_layer, batch_size, input_embedding_size, dim_image, dim_hidden, max_words_q, vocabulary_size, drop_out_rate):

	self.rnn_size = rnn_size
	self.rnn_layer = rnn_layer
	self.batch_size = batch_size
	self.input_embedding_size = input_embedding_size
	self.dim_image = dim_image
	self.dim_hidden = dim_hidden
	self.max_words_q = max_words_q
	self.vocabulary_size = vocabulary_size	
	self.drop_out_rate = drop_out_rate

	# question-embedding
	self.embed_ques_W = tf.Variable(tf.random_uniform([self.vocabulary_size, self.input_embedding_size], -0.08, 0.08), name='embed_ques_W')

	# encoder: RNN body
	self.lstm_1 = core_rnn_cell.LSTMCell(rnn_size, input_embedding_size, use_peepholes=True, state_is_tuple = False)
        self.lstm_dropout_1 = core_rnn_cell.DropoutWrapper(self.lstm_1, output_keep_prob = 1 - self.drop_out_rate)
        self.lstm_2 = core_rnn_cell.LSTMCell(rnn_size, rnn_size, use_peepholes=True, state_is_tuple = False)
        self.lstm_dropout_2 = core_rnn_cell.DropoutWrapper(self.lstm_2, output_keep_prob = 1 - self.drop_out_rate)
	self.stacked_lstm = core_rnn_cell.MultiRNNCell([self.lstm_dropout_1, self.lstm_dropout_2], state_is_tuple = False)

	# state-embedding
        self.embed_state_W = tf.Variable(tf.random_uniform([2*rnn_size*rnn_layer, self.dim_hidden], -0.08,0.08),name='embed_state_W')
        self.embed_state_b = tf.Variable(tf.random_uniform([self.dim_hidden], -0.08, 0.08), name='embed_state_b')
	# image-embedding
	self.embed_image_W = tf.Variable(tf.random_uniform([dim_image, self.dim_hidden], -0.08, 0.08), name='embed_image_W')
        self.embed_image_b = tf.Variable(tf.random_uniform([dim_hidden], -0.08, 0.08), name='embed_image_b')
	# score-embedding
	self.embed_scor_W = tf.Variable(tf.random_uniform([dim_hidden, num_output], -0.08, 0.08), name='embed_scor_W')
	self.embed_scor_b = tf.Variable(tf.random_uniform([num_output], -0.08, 0.08), name='embed_scor_b')
예제 #2
0
    def __init__(self, isTraining, enc_attribs):
        """Initializer for encoder class.

        Args:
            isTraining: Whether the network is in training mode or not. This
                would affect whether dropout is used or not.
            enc_attribs: A dictionary of attributes used by encoder like:
                hidden_size: Hidden size of LSTM cell used for encoding
                num_layers: Number of hidden layers used
                vocab_size: Vocabulary size of input symbols
                emb_size: Embedding size used to feed in input symbols
                out_prob(Optional): (1 - Dropout probability)
        """
        self.isTraining = isTraining
        # Update the parameters
        self.__dict__.update(enc_attribs)
        # Create the LSTM cell using the hidden size attribute
        self.cell = rnn_cell.BasicLSTMCell(self.hidden_size,
                                           state_is_tuple=True)
        if self.isTraining:
            # During training a dropout wrapper is used
            self.cell = rnn_cell.DropoutWrapper(self.cell,
                                                output_keep_prob=self.out_prob)
        if self.num_layers > 1:
            self.cell = rnn_cell.MultiRNNCell([self.cell] * self.num_layers,
                                              state_is_tuple=True)
예제 #3
0
    def __init__(self, args, data, infer=False):
        if infer:
            args.batch_size = 1
            args.seq_length = 1
        with tf.name_scope('inputs'):
            self.input_data = tf.placeholder(
                tf.int32, [args.batch_size, args.seq_length])
            self.target_data = tf.placeholder(
                tf.int32, [args.batch_size, args.seq_length])

        with tf.name_scope('model'):
            self.cell = rnn_cell.BasicLSTMCell(args.state_size)
            self.cell = rnn_cell.MultiRNNCell([self.cell] * args.num_layers)
            self.initial_state = self.cell.zero_state(args.batch_size,
                                                      tf.float32)
            with tf.variable_scope('rnnlm'):
                w = tf.get_variable('softmax_w',
                                    [args.state_size, data.vocab_size])
                b = tf.get_variable('softmax_b', [data.vocab_size])
                with tf.device("/cpu:0"):
                    embedding = tf.get_variable(
                        'embedding', [data.vocab_size, args.state_size])
                    inputs = tf.nn.embedding_lookup(embedding, self.input_data)
            outputs, last_state = tf.nn.dynamic_rnn(
                self.cell, inputs, initial_state=self.initial_state)

        with tf.name_scope('loss'):
            output = tf.reshape(outputs, [-1, args.state_size])

            self.logits = tf.matmul(output, w) + b
            self.probs = tf.nn.softmax(self.logits)
            self.last_state = last_state

            targets = tf.reshape(self.target_data, [-1])
            loss = seq2seq.sequence_loss_by_example(
                [self.logits], [targets],
                [tf.ones_like(targets, dtype=tf.float32)])
            self.cost = tf.reduce_sum(loss) / args.batch_size
            tf.summary.scalar('loss', self.cost)

        with tf.name_scope('optimize'):
            self.lr = tf.placeholder(tf.float32, [])
            tf.summary.scalar('learning_rate', self.lr)

            optimizer = tf.train.AdamOptimizer(self.lr)
            tvars = tf.trainable_variables()
            grads = tf.gradients(self.cost, tvars)
            for g in grads:
                tf.summary.histogram(g.name, g)
            grads, _ = tf.clip_by_global_norm(grads, args.grad_clip)

            self.train_op = optimizer.apply_gradients(zip(grads, tvars))
            self.merged_op = tf.summary.merge_all()
예제 #4
0
파일: decoder.py 프로젝트: entn-at/g2p-1
    def set_cell_config(self):
        """Create the LSTM cell used by decoder."""
        # Use the BasicLSTMCell - https://arxiv.org/pdf/1409.2329.pdf
        cell = rnn_cell.BasicLSTMCell(self.hidden_size, state_is_tuple=True)
        if self.isTraining:
            # During training we use a dropout wrapper
            cell = rnn_cell.DropoutWrapper(cell,
                                           output_keep_prob=self.out_prob)
        if self.num_layers > 1:
            # If RNN is stacked then we use MultiRNNCell class
            cell = rnn_cell.MultiRNNCell([cell] * self.num_layers,
                                         state_is_tuple=True)

        # Use the OutputProjectionWrapper to project cell output to output
        # vocab size. This projection is fine for a small vocabulary output
        # but would be bad for large vocabulary output spaces.
        cell = rnn_cell.OutputProjectionWrapper(cell, self.vocab_size)
        return cell
예제 #5
0
    def __init__(self, configs, data, infer=False):
        if infer:
            configs.batch_size = 1
            configs.seq_length = 1
        self.input_data = tf.placeholder(tf.int32, [configs.batch_size, configs.seq_length])
        self.target_data = tf.placeholder(tf.int32, [configs.batch_size, configs.seq_length])
        self.lr = tf.placeholder(tf.float32, [])

        #cell definition
        self.cell = rnn.BasicLSTMCell(configs.state_size)
        self.cell = rnn.MultiRNNCell([self.cell] * configs.num_layers)
        self.initial_state = self.cell.zero_state(configs.batch_size, tf.float32)

        # para definitions
        w = tf.get_variable('softmax_w', [configs.state_size, data.vocab_size])
        b = tf.get_variable('softmax_b', [data.vocab_size])

        #embedding
        embedding = tf.get_variable('embedding', [data.vocab_size, configs.state_size])
        inputs = tf.nn.embedding_lookup(embedding, self.input_data)

        #output
        output, last_state = tf.nn.dynamic_rnn(self.cell, inputs, initial_state=self.initial_state)
        output_new = tf.reshape(output, [-1, configs.state_size])

        #logit computation
        self.logits = tf.matmul(output_new, w) + b
        self.probs = tf.nn.softmax(self.logits)
        self.last_state = last_state

        #comparison
        target = tf.reshape(self.target_data, [-1])
        loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example([self.logits], [target], [tf.ones_like(target, dtype=tf.float32)])
        self.cost = tf.reduce_sum(loss) / configs.batch_size

        #optimizer
        optimizer = tf.train.AdamOptimizer(self.lr)
        tvars = tf.trainable_variables()
        grads = tf.gradients(self.cost, tvars)
        grads, _ = tf.clip_by_global_norm(grads, configs.grad_clip)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
예제 #6
0
    def __init__(self, config, training=False):
        self.config = config
        self.time_batch_len = time_batch_len = config.time_batch_len
        self.input_dim = input_dim = config.input_dim
        hidden_size = config.hidden_size
        num_layers = config.num_layers
        dropout_prob = config.dropout_prob
        input_dropout_prob = config.input_dropout_prob
        cell_type = config.cell_type

        self.seq_input = \
            tf.placeholder(tf.float32, shape=[self.time_batch_len, None, input_dim])

        if (dropout_prob <= 0.0 or dropout_prob > 1.0):
            raise Exception("Invalid dropout probability: {}".format(dropout_prob))

        if (input_dropout_prob <= 0.0 or input_dropout_prob > 1.0):
            raise Exception("Invalid input dropout probability: {}".format(input_dropout_prob))

        # setup variables
        with tf.variable_scope("rnnlstm"):
            output_W = tf.get_variable("output_w", [hidden_size, input_dim])
            output_b = tf.get_variable("output_b", [input_dim])
            self.lr = tf.constant(config.learning_rate, name="learning_rate")
            self.lr_decay = tf.constant(config.learning_rate_decay, name="learning_rate_decay")

        def create_cell(input_size):
            if cell_type == "vanilla":
                cell_class = rnn_cell.BasicRNNCell
            elif cell_type == "gru":
                cell_class = rnn_cell.BasicGRUCell
            elif cell_type == "lstm":
                cell_class = rnn_cell.BasicLSTMCell
            else:
                raise Exception("Invalid cell type: {}".format(cell_type))

            cell = cell_class(hidden_size, input_size = input_size)
            if training:
                return rnn_cell.DropoutWrapper(cell, output_keep_prob = dropout_prob)
            else:
                return cell

        if training:
            self.seq_input_dropout = tf.nn.dropout(self.seq_input, keep_prob = input_dropout_prob)
        else:
            self.seq_input_dropout = self.seq_input

        self.cell = rnn_cell.MultiRNNCell(
            [create_cell(input_dim)] + [create_cell(hidden_size) for i in range(1, num_layers)])

        batch_size = tf.shape(self.seq_input_dropout)[0]
        self.initial_state = self.cell.zero_state(batch_size, tf.float32)
        inputs_list = tf.unstack(self.seq_input_dropout)

        # rnn outputs a list of [batch_size x H] outputs
        outputs_list, self.final_state = rnn.static_rnn(self.cell, inputs_list, 
                                                 initial_state=self.initial_state)

        outputs = tf.stack(outputs_list)
        outputs_concat = tf.reshape(outputs, [-1, hidden_size])
        logits_concat = tf.matmul(outputs_concat, output_W) + output_b
        logits = tf.reshape(logits_concat, [self.time_batch_len, -1, input_dim])

        # probabilities of each note
        self.probs = self.calculate_probs(logits)
        self.loss = self.init_loss(logits, logits_concat)
        self.train_step = tf.train.RMSPropOptimizer(self.lr, decay = self.lr_decay) \
                            .minimize(self.loss)
예제 #7
0
def rnn(hnum1, hnum2, hnum3):
    file = open(r"wind.csv")
    file.readline()  # 读掉第一行,下次再引用file的时候,将file的文件指针指向第二行开始的文件.
    reader = csv.reader(file)

    raw_data = []
    for date, hors, u, v, ws, wd in reader:
        if ws != 'NA':
            raw_data.append(float(ws))
    # difference = max(raw_data)-min(raw_data)
    # raw_data = [i/difference for i in raw_data]
    # raw_data=[10*math.sin(0.1*i) for i in range(20000)]

    sequence_length = 100  # 代表以往数据的移动窗口宽度 注意取值范围 (可调参数)
    predict_length = 16  # 代表预测数据的移动窗口宽度 即一次预测出的结果数 注意取值范围 (可调参数)
    train_input_all = []
    for i in range(0, len(raw_data[0:-sequence_length - predict_length + 1])):
        temp_list = []
        for j in range(sequence_length):
            temp_list.append([raw_data[i + j]])
        train_input_all.append(temp_list)

    train_label_all = []
    train_label_all1 = []
    for i in range(sequence_length, len(raw_data) - predict_length + 1):
        temp_list = []
        for j in range(predict_length):
            temp_list.append(raw_data[i + j])
        train_label_all.append(temp_list)
        train_label_all1.append(raw_data[i + j])

    seperate_point = 5000  # 测试集与训练集分割点 (可调数)
    test_point = 90000  # 使用的数据量大小(可调数 且必须大于seperate_point)
    test_point_start = 80000
    train_input = train_input_all[0:seperate_point]
    test_input = train_input_all[test_point_start + 1:test_point]
    train_output = train_label_all[0:seperate_point]  # 训练数据标签格式1
    train_output1 = train_label_all1[0:seperate_point]  # 训练数据标签格式2
    test_output = train_label_all[test_point_start + 1:test_point]  # 测试数据标签格式1
    test_output1 = train_label_all1[test_point_start +
                                    1:test_point]  # 测试数据标签格式2
    # 打乱训练集
    index = [i for i in range(len(train_input))]
    shuffle(index)
    train_input = [train_input[index[i]] for i in range(len(index))]
    train_output = [train_output[index[i]] for i in range(len(index))]

    data = tf.placeholder(
        tf.float32, [None, sequence_length, 1])  # batch_size maxtime deepth
    target = tf.placeholder(tf.float32, [None, predict_length], name='target')
    num_hidden = [hnum1, hnum2, hnum3]  # 隐含层数量(可调参数)
    # cell = rnn_cell.BasicRNNCell(num_hidden)
    # cells = rnn_cell.LSTMCell(num_hidden[0], state_is_tuple=True)
    cell_layer1 = rnn_cell.LSTMCell(num_hidden[0], state_is_tuple=True)
    # cell_layer1 = rnn_cell.DropoutWrapper(cell_layer1, input_keep_prob=0.5, output_keep_prob=0.5)
    cell_layer2 = rnn_cell.LSTMCell(num_hidden[1], state_is_tuple=True)
    # cell_layer2 = rnn_cell.DropoutWrapper(cell_layer2, input_keep_prob=0.5, output_keep_prob=0.5)
    cell_layer3 = rnn_cell.LSTMCell(num_hidden[2], state_is_tuple=True)
    # cell_layer4 = rnn_cell.LSTMCell(num_hidden[3], state_is_tuple=True)
    # cell_layer5 = rnn_cell.LSTMCell(num_hidden[4], state_is_tuple=True)
    cells = rnn_cell.MultiRNNCell([cell_layer1, cell_layer2,
                                   cell_layer3])  # 建立多层rnn

    val, state = tf.nn.dynamic_rnn(cells, data, dtype=tf.float32)

    val = tf.transpose(val, [1, 0, 2])

    val_shape = val.get_shape()

    last = tf.gather(val, int(val.get_shape()[0]) - 1)
    last_shape = last.get_shape()
    weight = tf.Variable(
        tf.truncated_normal([num_hidden[-1],
                             int(target.get_shape()[1])]))
    bias = tf.Variable(tf.constant(0.1, shape=[target.get_shape()[1]]))

    prediction = tf.matmul(last, weight) + bias
    prediction_shape = prediction.get_shape()

    loss = tf.reduce_mean(tf.square(prediction - target))
    loss_shape = loss.get_shape()
    optimizer = tf.train.AdamOptimizer()
    minimize = optimizer.minimize(loss)

    # mistakes = tf.not_equal(tf.argmax(target, 1), tf.argmax(prediction, 1))
    error = tf.reduce_mean(tf.square(prediction - target))
    error_sep = tf.square(prediction - target)  # 计算每一个预测分量的误差
    init_op = tf.global_variables_initializer()
    sess = tf.Session()
    saver = tf.train.Saver()

    sess.run(init_op)  # 在这里,可以执行这个语句,也可以不执行,即使执行了,初始化的值也会被restore的值给override
    # saver.restore(sess, r"parameter_5.ckpt")

    batch_size = 10  # (可调参数)
    no_of_batches = int(len(train_input) / batch_size)
    epoch = 25  # (可调参数)

    total_error1 = 0
    predict_result1 = []
    total_error = 0
    predict_error = []
    predict_result = []
    temp = 0  # 测试变量

    for i in range(epoch):
        ptr = 0
        for j in range(no_of_batches):
            inp, out = train_input[ptr:ptr +
                                   batch_size], train_output[ptr:ptr +
                                                             batch_size]
            ptr += batch_size
            sess.run(minimize, {data: inp, target: out})
        print("Epoch - ", str(i))

    # sess.run(error, {data: train_input, target: train_output})
    # 观察测试样本的估计情况
    total_error = 0
    predict_error = []
    predict_result = []
    temp = 0
    temp1 = 0
    temp_sep = []
    total_error_sep = []
    for i in range(len(test_input)):
        inp, out = test_input[i:i + 1], test_output[i:i + 1]
        temp1 = sess.run(prediction, {data: inp, target: out})
        temp = sess.run(error, {data: inp, target: out})
        temp_sep = sess.run(error_sep, {data: inp, target: out})
        # print(temp1)
        total_error += temp
        # predict_error.append(temp)
        predict_result.append(temp1[0])
        total_error_sep.append(temp_sep)
    total_error /= len(test_input)
    total_error = math.sqrt(total_error)
    total_error_sep = (np.array(total_error_sep)).mean(axis=0)
    # 观察训练样本的训练情况

    total_error1 = 0
    predict_result1 = []
    temp2 = 0
    temp3 = 0
    temp_sep1 = []
    total_error_sep1 = []
    for i in range(len(train_input)):
        inp, out = train_input[i:i + 1], train_output[i:i + 1]
        temp2 = sess.run(error, {data: inp, target: out})
        temp3 = sess.run(prediction, {data: inp, target: out})
        temp_sep1 = sess.run(error_sep, {data: inp, target: out})
        total_error1 += temp2
        predict_result1.append((temp3[0]))
        total_error_sep1.append(temp_sep1)
    total_error1 /= len(train_input)
    total_error1 = math.sqrt(total_error1)
    total_error_sep1 = (np.array(total_error_sep1)).mean(axis=0)

    # incorrect = sess.run(error, {data: test_input, target: test_output})
    print('Epoch {:2d} error {:3.5f}'.format(i + 1, total_error))
    # print('predict_error')
    # print(predict_error)
    # print('predict_result')
    # print(predict_result)
    saver.save(sess, r"parameter_5.ckpt")
    sess.close()

    # saver = tf.train.Saver()
    # pylab.plot(predict_result)# predict_result1是测试样本的检擦结果predict_result
    # pylab.plot(test_output)
    # pylab.plot(predict_result1)# predict_result1是训练样本的检擦结果predict_result
    # pylab.plot(train_output)#train_output1是训练样本的检查结果test_output1
    corrcoef_result_test = []
    corrcoef_result_train = []
    for cursor in range(16):
        test_output_single = [
            test_output[i][int(cursor)] for i in range(len(test_output))
        ]
        predict_result_single = [
            predict_result[i][int(cursor)] for i in range(len(predict_result))
        ]
        corrcoef_result_test.append(
            corrcoef(test_output_single, predict_result_single))
        #print(corrcoef(test_output_single, predict_result_single))

    for cursor in range(16):
        train_output_single = [
            train_output[i][int(cursor)] for i in range(len(train_output))
        ]
        predict_result1_single = [
            predict_result1[i][int(cursor)]
            for i in range(len(predict_result1))
        ]
        corrcoef_result_train.append(
            corrcoef(train_output_single, predict_result1_single))
        #print(corrcoef(train_output_single, predict_result1_single))
    '''
    需要记录的数据 1输入训练样本编号 2测试样本编号 3使用的模型类型 4使用的模型参数(隐含层数量 隐含层每层的单元数量)5训练batch大小
    6训练的周期数 7预测结果 8输出误差大小 9输出的相关系数 10训练时间
    '''

    csvfile = open(r'short_result5.csv', 'a')
    writer = csv.writer(csvfile)
    writer.writerow([
        'epoch', 'seperate_point', 'test_point_start', 'test_point',
        'num_hidden', 'sequence_length', 'predict_length', 'batch_size'
    ])
    data = [(epoch, seperate_point, test_point_start, test_point, num_hidden,
             sequence_length, predict_length, batch_size)]
    writer.writerows(data)
    writer.writerow(['corrcoef_result_test'])
    writer.writerow(corrcoef_result_test)
    writer.writerow(['corrcoef_result_train'])
    writer.writerow(corrcoef_result_train)
    writer.writerow(['prediction_result'])
    writer.writerow(['total_error_sep'])
    for i in range(len(total_error_sep)):
        writer.writerow(total_error_sep[i])
    writer.writerow(['total_error'])
    writer.writerow([total_error])
    csvfile.close()
예제 #8
0
''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' ''

data = tf.placeholder(tf.float32,
                      [None, sequence_length, 1])  #batch_size maxtime deepth
target = tf.placeholder(tf.float32, [None, predict_length], name='target')
num_hidden = [30, 30]  #隐含层数量(可调参数)
#cell = rnn_cell.BasicRNNCell(num_hidden)
#cells = rnn_cell.LSTMCell(num_hidden[0], state_is_tuple=True)
cell_layer1 = rnn_cell.LSTMCell(num_hidden[0], state_is_tuple=True)
#cell_layer1 = rnn_cell.DropoutWrapper(cell_layer1, input_keep_prob=0.5, output_keep_prob=0.5)
cell_layer2 = rnn_cell.LSTMCell(num_hidden[1], state_is_tuple=True)
#cell_layer2 = rnn_cell.DropoutWrapper(cell_layer2, input_keep_prob=0.5, output_keep_prob=0.5)
#cell_layer3 = rnn_cell.LSTMCell(num_hidden[2], state_is_tuple=True)
#cell_layer4 = rnn_cell.LSTMCell(num_hidden[3], state_is_tuple=True)
#cell_layer5 = rnn_cell.LSTMCell(num_hidden[4], state_is_tuple=True)
cells = rnn_cell.MultiRNNCell([cell_layer1, cell_layer2])  #建立多层rnn

val, state = tf.nn.dynamic_rnn(cells, data, dtype=tf.float32)

val = tf.transpose(val, [1, 0, 2])

val_shape = val.get_shape()

last = tf.gather(val, int(val.get_shape()[0]) - 1)
last_shape = last.get_shape()
weight = tf.Variable(
    tf.truncated_normal([num_hidden[-1],
                         int(target.get_shape()[1])]))
bias = tf.Variable(tf.constant(0.1, shape=[target.get_shape()[1]]))
#weight_shape = weight.get_shape()
#bias_shape = bias.get_shape()
예제 #9
0
    def __init__(self,
                 args,
                 infer=False):  # infer is set to true during sampling.
        self.args = args
        if infer:
            # Worry about one character at a time during sampling; no batching or BPTT.
            args.batch_size = 1
            args.seq_length = 1

        # Set cell_fn to the type of network cell we're creating -- RNN, GRU or LSTM.
        if args.model == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        # Call tensorflow library tensorflow-master/tensorflow/python/ops/rnn_cell
        # to create a layer of rnn_size cells of the specified basic type (RNN/GRU/LSTM).
        if args.model == "gru":
            cell = cell_fn(args.rnn_size)
        else:
            cell = cell_fn(args.rnn_size, state_is_tuple=True)

        # Use the same rnn_cell library to create a stack of these cells
        # of num_layers layers. Pass in a python list of these cells.
        # (The [cell] * arg.num_layers syntax literally duplicates cell multiple times in
        # a list. The syntax is such that [5, 6] * 3 would return [5, 6, 5, 6, 5, 6].)
        self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers,
                                                 state_is_tuple=True)

        # Create two TF placeholder nodes of 32-bit ints (NOT floats!),
        # each of shape batch_size x seq_length. This shape matches the batches
        # (listed in x_batches and y_batches) constructed in create_batches in utils.py.
        # input_data will receive input batches, and targets will be what it compares against
        # to calculate loss.
        self.input_data = tf.placeholder(tf.int32,
                                         [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32,
                                      [args.batch_size, args.seq_length])

        # Using the zero_state function in the RNNCell master class in rnn_cell library,
        # create a tensor of zeros such that we can swap it in for the network state at any time
        # to zero out the network's state.
        # State dimensions are: cell_fn state size (2 for LSTM) x rnn_size x num_layers.
        # So an LSTM network with 100 cells per layer and 3 layers would have a state size of 600,
        # and initial_state would have a dimension of none x 600.
        self.initial_state = self.cell.zero_state(args.batch_size, tf.float32)

        # Scope our new variables to the scope identifier string "rnnlm".
        with tf.variable_scope('rnnlm'):
            # Create new variable softmax_w and softmax_b for output.
            # softmax_w is a weights matrix from the top layer of the model (of size rnn_size)
            # to the vocabulary output (of size vocab_size).
            softmax_w = tf.get_variable("softmax_w",
                                        [args.rnn_size, args.vocab_size])
            # softmax_b is a bias vector of the ouput characters (of size vocab_size).
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
            # [TODO: Why specify CPU? Same as the TF translation tutorial, but don't know why.]
            with tf.device("/cpu:0"):
                # Create new variable named 'embedding' to connect the character input to the base layer
                # of the RNN. Its role is the conceptual inverse of softmax_w.
                # It contains the trainable weights from the one-hot input vector to the lowest layer of RNN.
                embedding = tf.get_variable("embedding",
                                            [args.vocab_size, args.rnn_size])
                # Create an embedding tensor with tf.nn.embedding_lookup(embedding, self.input_data).
                # This tensor has dimensions batch_size x seq_length x rnn_size.
                # tf.split splits that embedding lookup tensor into seq_length tensors (along dimension 1).
                # Thus inputs is a list of seq_length different tensors,
                # each of dimension batch_size x 1 x rnn_size.
                inputs = tf.split(tf.nn.embedding_lookup(
                    embedding, self.input_data),
                                  args.seq_length,
                                  axis=1)
                # Iterate through these resulting tensors and eliminate that degenerate second dimension of 1,
                # i.e. squeeze each from batch_size x 1 x rnn_size down to batch_size x rnn_size.
                # Thus we now have a list of seq_length tensors, each with dimension batch_size x rnn_size.
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        # THIS LOOP FUNCTION IS NEVER ACTUALLY USED.
        # IT IS EXPLICITLY NOT USED DURING TRAINING.
        # DURING INFERENCE, SEQ_LENGTH == 1, SO SEQ2SEQ.RNN_DECODER() ONLY USES THE LOOP ARGUMENT
        # ON SEQUENCE LENGTH ITEMS SUBSEQUENT TO THE FIRST.
        # This looping function is used as part of seq2seq.rnn_decoder only during sampling -- not training.
        # prev is a 2D Tensor of shape [batch_size x cell.output_size].
        # returns a 2D Tensor of shape [batch_size x cell.input_size].
        def loop(prev, _):
            # prev is initially the top cell state.
            # Convert the top cell state into character logits.
            prev = tf.matmul(prev, softmax_w) + softmax_b
            # Pull the character with the greatest logit (no sampling, just argmaxing).
            # WHY IS THIS ARGMAXING WHEN ACTUAL SAMPLING IS DONE PROBABILISTICALLY?
            # DOESN'T THIS CAUSE OUTPUTS NOT TO MATCH INPUTS DURING SEQUENCE GENERATION?
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            # Re-embed that symbol as the next step's input, and return that.
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        # Set up a seq2seq decoder from the seq2seq.py library.
        # This constructs the outputs and states nodes of the network.
        # Outputs is a list (of len seq_length, same as inputs) of tensors of shape [batch_size x rnn_size].
        # These are the raw output values of the top layer of the network at each time step.
        # They have NOT been fed through the decoder projection; they are still in network space,
        # not character space.
        # State is a tensor of shape [batch_size x cell.state_size].
        # This is also the step where all of the trainable parameters for the LSTM (weights and biases) are defined.
        outputs, self.final_state = seq2seq.rnn_decoder(
            inputs,
            self.initial_state,
            cell,
            loop_function=loop if infer else None,
            scope='rnnlm')
        # tf.concat concatenates the output tensors along the rnn_size dimension,
        # to make a single tensor of shape [batch_size x (seq_length * rnn_size)].
        # This gives the following 2D outputs matrix:
        #   [(rnn output: batch 0, seq 0) (rnn output: batch 0, seq 1) ... (rnn output: batch 0, seq seq_len-1)]
        #   [(rnn output: batch 1, seq 0) (rnn output: batch 1, seq 1) ... (rnn output: batch 1, seq seq_len-1)]
        #   ...
        #   [(rnn output: batch batch_size-1, seq 0) (rnn output: batch batch_size-1, seq 1) ... (rnn output: batch batch_size-1, seq seq_len-1)]
        # tf.reshape then reshapes it to a tensor of shape [(batch_size * seq_length) x rnn_size].
        # Output will now be the following matrix:
        #   [rnn output: batch 0, seq 0]
        #   [rnn output: batch 0, seq 1]
        #   ...
        #   [rnn output: batch 0, seq seq_len-1]
        #   [rnn output: batch 1, seq 0]
        #   [rnn output: batch 1, seq 1]
        #   ...
        #   [rnn output: batch 1, seq seq_len-1]
        #   ...
        #   ...
        #   [rnn output: batch batch_size-1, seq seq_len-1]
        # Note the following comment in rnn_cell.py:
        #   Note: in many cases it may be more efficient to not use this wrapper,
        #   but instead concatenate the whole sequence of your outputs in time,
        #   do the projection on this batch-concatenated sequence, then split it
        #   if needed or directly feed into a softmax.
        output = tf.reshape(tf.concat(outputs, axis=1), [-1, args.rnn_size])
        # Obtain logits node by applying output weights and biases to the output tensor.
        # Logits is a tensor of shape [(batch_size * seq_length) x vocab_size].
        # Recall that outputs is a 2D tensor of shape [(batch_size * seq_length) x rnn_size],
        # and softmax_w is a 2D tensor of shape [rnn_size x vocab_size].
        # The matrix product is therefore a new 2D tensor of [(batch_size * seq_length) x vocab_size].
        # In other words, that multiplication converts a loooong list of rnn_size vectors
        # to a loooong list of vocab_size vectors.
        # Then add softmax_b (a single vocab-sized vector) to every row of that list.
        # That gives you the logits!
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        # Convert logits to probabilities. Probs isn't used during training! That node is never calculated.
        # Like logits, probs is a tensor of shape [(batch_size * seq_length) x vocab_size].
        # During sampling, this means it is of shape [1 x vocab_size].
        self.probs = tf.nn.softmax(self.logits)
        # seq2seq.sequence_loss_by_example returns 1D float Tensor containing the log-perplexity
        # for each sequence. (Size is batch_size * seq_length.)
        # Targets are reshaped from a [batch_size x seq_length] tensor to a 1D tensor, of the following layout:
        #   target character (batch 0, seq 0)
        #   target character (batch 0, seq 1)
        #   ...
        #   target character (batch 0, seq seq_len-1)
        #   target character (batch 1, seq 0)
        #   ...
        # These targets are compared to the logits to generate loss.
        # Logits: instead of a list of character indices, it's a list of character index probability vectors.
        # seq2seq.sequence_loss_by_example will do the work of generating losses by comparing the one-hot vectors
        # implicitly represented by the target characters against the probability distrutions in logits.
        # It returns a 1D float tensor (a vector) where item i is the log-perplexity of
        # the comparison of the ith logit distribution to the ith one-hot target vector.
        loss = seq2seq.sequence_loss_by_example(
            [self.logits],
            # logits: 1-item list of 2D Tensors of shape [batch_size x vocab_size]
            [tf.reshape(self.targets, [-1])],
            # targets: 1-item list of 1D batch-sized int32 Tensors of the same length as logits
            [tf.ones([args.batch_size * args.seq_length])],
            # weights: 1-item list of 1D batch-sized float-Tensors of the same length as logits
            args.vocab_size
        )  # num_decoder_symbols: integer, number of decoder symbols (output classes)
        # Cost is the arithmetic mean of the values of the loss tensor
        # (the sum divided by the total number of elements).
        # It is a single-element floating point tensor. This is what the optimizer seeks to minimize.
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        # Create a summary for our cost.
        tf.summary.scalar("cost", self.cost)
        # Create a node to track the learning rate as it decays through the epochs.
        self.lr = tf.Variable(args.learning_rate, trainable=False)
        self.global_epoch_fraction = tf.Variable(0.0, trainable=False)
        self.global_seconds_elapsed = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables(
        )  # tvars is a python list of all trainable TF Variable objects.

        # tf.gradients returns a list of tensors of length len(tvars) where each tensor is sum(dy/dx).
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          args.grad_clip)
        optimizer = tf.train.AdamOptimizer(
            self.lr)  # Use ADAM optimizer with the current learning rate.
        # Zip creates a list of tuples, where each tuple is (variable tensor, gradient tensor).
        # Training op nudges the variables along the gradient, with the given learning rate, using the ADAM optimizer.
        # This is the op that a training session should be instructed to perform.
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
        self.summary_op = tf.summary.merge_all()
예제 #10
0
    def model(self,
              mode="train",
              num_layers=2,
              cell_size=32,
              cell_type="BasicLSTMCell",
              embedding_size=20,
              learning_rate=0.0001,
              tensorboard_verbose=0,
              checkpoint_path=None):

        assert mode in ["train", "predict"]

        checkpoint_path = checkpoint_path or (
            "%s%ss2s_checkpoint.tfl" %
            (self.data_dir or "", "/" if self.data_dir else ""))
        GO_VALUE = self.out_max_int + 1  # unique integer value used to trigger decoder outputs in the seq2seq RNN

        network = tflearn.input_data(
            shape=[None, self.in_seq_len + self.out_seq_len],
            dtype=tf.int32,
            name="XY")
        encoder_inputs = tf.slice(network, [0, 0], [-1, self.in_seq_len],
                                  name="enc_in")  # get encoder inputs
        encoder_inputs = tf.unstack(
            encoder_inputs, axis=1
        )  # transform into list of self.in_seq_len elements, each [-1]

        decoder_inputs = tf.slice(network, [0, self.in_seq_len],
                                  [-1, self.out_seq_len],
                                  name="dec_in")  # get decoder inputs
        decoder_inputs = tf.unstack(
            decoder_inputs, axis=1
        )  # transform into list of self.out_seq_len elements, each [-1]

        go_input = tf.multiply(
            tf.ones_like(decoder_inputs[0], dtype=tf.int32), GO_VALUE
        )  # insert "GO" symbol as the first decoder input; drop the last decoder input
        decoder_inputs = [
            go_input
        ] + decoder_inputs[:self.out_seq_len -
                           1]  # insert GO as first; drop last decoder input

        feed_previous = not (mode == "train")

        self.n_input_symbols = self.in_max_int + 1  # default is integers from 0 to 9
        self.n_output_symbols = self.out_max_int + 2  # extra "GO" symbol for decoder inputs

        single_cell = getattr(core_rnn_cell, cell_type)(cell_size,
                                                        state_is_tuple=True)
        if num_layers == 1:
            cell = single_cell
        else:
            cell = core_rnn_cell.MultiRNNCell([single_cell] * num_layers)

        if self.seq2seq_model == "embedding_rnn":
            model_outputs, states = legacy_seq2seq.embedding_rnn_seq2seq(
                encoder_inputs,
                # encoder_inputs: A list of 2D Tensors [batch_size, input_size].
                decoder_inputs,
                cell,
                num_encoder_symbols=self.n_input_symbols,
                num_decoder_symbols=self.n_output_symbols,
                embedding_size=embedding_size,
                feed_previous=feed_previous)
        elif self.seq2seq_model == "embedding_attention":
            model_outputs, states = legacy_seq2seq.embedding_attention_seq2seq(
                encoder_inputs,
                # encoder_inputs: A list of 2D Tensors [batch_size, input_size].
                decoder_inputs,
                cell,
                num_encoder_symbols=self.n_input_symbols,
                num_decoder_symbols=self.n_output_symbols,
                embedding_size=embedding_size,
                num_heads=1,
                initial_state_attention=False,
                feed_previous=feed_previous)
        else:
            raise Exception('[TFLearnSeq2Seq] Unknown seq2seq model %s' %
                            self.seq2seq_model)

        tf.add_to_collection(
            tf.GraphKeys.LAYER_VARIABLES + '/' + "seq2seq_model",
            model_outputs)  # for TFLearn to know what to save and restore

        # model_outputs: list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x output_size] containing the generated outputs.

        network = tf.stack(
            model_outputs, axis=1
        )  # shape [-1, n_decoder_inputs (= self.out_seq_len), num_decoder_symbols]

        with tf.name_scope(
                "TargetsData"
        ):  # placeholder for target variable (i.e. trainY input)
            targetY = tf.placeholder(shape=[None, self.out_seq_len],
                                     dtype=tf.int32,
                                     name="Y")

        network = tflearn.regression(network,
                                     placeholder=targetY,
                                     optimizer='adam',
                                     learning_rate=learning_rate,
                                     loss=self.sequence_loss,
                                     metric=self.accuracy,
                                     name="Y")

        model = tflearn.DNN(network,
                            tensorboard_verbose=tensorboard_verbose,
                            checkpoint_path=checkpoint_path)
        return model