Example #1
0
    def model_initializer(self, V, K, sq_length, recurr_unit, nonlin_func,
                          optimizer, optimizer_args, reg):

        self.input_words = tf.placeholder(tf.int32,
                                          shape=(None, sq_length),
                                          name="tfX")
        self.target_POS = tf.placeholder(tf.int32,
                                         shape=(None, sq_length),
                                         name="tfT")
        num_samples = tf.shape(self.input_words)[0]

        self.hidden_layers = []
        M_input = self.D

        self.W_embed = self.helper((V, self.D))
        self.W_out = self.helper((self.hid_lay_sizes[-1], K))

        Xw = tf.nn.embedding_lookup(self.W_embed[0], self.input_words)
        # converts x from a tensor of shape N x T x M into a list of length T, where each element is a tensor of shape N x M
        Xw = tf.unstack(Xw, sq_length, 1)

        output = Xw
        for idx, layer_sz in enumerate(self.hid_lay_sizes):
            rnn_unit = recurr_unit[idx](num_units=layer_sz,
                                        activation=self.nonlinear(
                                            nonlin_func[idx]))
            output, _ = get_rnn_output(rnn_unit, output, dtype=tf.float32)

        # outputs are now of size (T, N, M) => make it (N, T, M); M - is last hidden layer size
        output = tf.transpose(output, (1, 0, 2))
        output = tf.reshape(
            output,
            (sq_length * num_samples, self.hid_lay_sizes[-1]))  # NT x M

        logits = tf.matmul(output, self.W_out[0]) + self.W_out[1]  # NT x K
        self.prediction = tf.reshape(tf.argmax(logits, axis=1),
                                     (num_samples, sq_length))
        #self.out_prob = tf.nn.softmax(logits)

        l2_loss = reg * sum(
            tf.nn.l2_loss(tf_var) for tf_var in tf.trainable_variables()
            if not ("noreg" in tf_var.name or "Bias" in tf_var.name))
        ''' tf.reduce_sum([beta*tf.nn.l2_loss(var) for var in tf.trainable_variables()]) '''

        self.cost = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=tf.reshape(self.target_POS,
                                                 [-1]))) + l2_loss

        self.train_op = self.optimizer(optimizer,
                                       optimizer_args).minimize(self.cost)
Example #2
0
    def forward(self, X):

        outputs, states = get_rnn_output(self.rnn_unit, X, dtype=tf.float32)

        # outputs are now of size (T, batch_sz, M)
        # so make it (batch_sz, T, M)
        """
        outputs = tf.transpose(outputs, (1, 0, 2))
        outputs = tf.reshape(
            outputs, 
            (-1, self.hidden_layer_size))
        """

        return tf.matmul(outputs[-1], self.Wo) + self.bo
Example #3
0
  def fit(self, X, Y, batch_sz=20, learning_rate=0.1, mu=0.9, activation=tf.nn.sigmoid, epochs=100, show_fig=False):
    N, T, D = X.shape # X is of size N x T(n) x D
    K = len(set(Y.flatten()))
    M = self.M
    self.f = activation

    # initial weights
    # note: Wx, Wh, bh are all part of the RNN unit and will be created
    #       by BasicRNNCell
    Wo = init_weight(M, K).astype(np.float32)
    bo = np.zeros(K, dtype=np.float32)

    # make them tf variables
    self.Wo = tf.Variable(Wo)
    self.bo = tf.Variable(bo)

    # tf Graph input
    tfX = tf.compat.v1.placeholder(tf.float32, shape=(batch_sz, T, D), name='inputs')
    tfY = tf.compat.v1.placeholder(tf.int64, shape=(batch_sz, T), name='targets')

    # turn tfX into a sequence, e.g. T tensors all of size (batch_sz, D)
    sequenceX = x2sequence(tfX, T, D, batch_sz)

    # create the simple rnn unit
    rnn_unit = BasicRNNCell(num_units=self.M, activation=self.f)

    # Get rnn cell output
    # outputs, states = rnn_module.rnn(rnn_unit, sequenceX, dtype=tf.float32)
    outputs, states = get_rnn_output(rnn_unit, sequenceX, dtype=tf.float32)

    # outputs are now of size (T, batch_sz, M)
    # so make it (batch_sz, T, M)
    outputs = tf.transpose(a=outputs, perm=(1, 0, 2))
    outputs = tf.reshape(outputs, (T*batch_sz, M))

    # Linear activation, using rnn inner loop last output
    logits = tf.matmul(outputs, self.Wo) + self.bo
    predict_op = tf.argmax(input=logits, axis=1)
    targets = tf.reshape(tfY, (T*batch_sz,))

    cost_op = tf.reduce_mean(
      input_tensor=tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=logits,
        labels=targets
      )
    )
    train_op = tf.compat.v1.train.MomentumOptimizer(learning_rate, momentum=mu).minimize(cost_op)

    costs = []
    n_batches = N // batch_sz
    
    init = tf.compat.v1.global_variables_initializer()
    with tf.compat.v1.Session() as session:
      session.run(init)
      for i in range(epochs):
        X, Y = shuffle(X, Y)
        n_correct = 0
        cost = 0
        for j in range(n_batches):
          Xbatch = X[j*batch_sz:(j+1)*batch_sz]
          Ybatch = Y[j*batch_sz:(j+1)*batch_sz]
          
          _, c, p = session.run([train_op, cost_op, predict_op], feed_dict={tfX: Xbatch, tfY: Ybatch})
          cost += c
          for b in range(batch_sz):
            idx = (b + 1)*T - 1
            n_correct += (p[idx] == Ybatch[b][-1])
        if i % 10 == 0:
          print("i:", i, "cost:", cost, "classification rate:", (float(n_correct)/N))
        if n_correct == N:
          print("i:", i, "cost:", cost, "classification rate:", (float(n_correct)/N))
          break
        costs.append(cost)

    if show_fig:
      plt.plot(costs)
      plt.show()
rnn_unit = GRUCell(num_units=hidden_layer_size, activation=tf.nn.relu)

# ================ model + cost + solver  =======================
# get the output from enbedding layer
x = tf.nn.embedding_lookup(tfWe, inputs)  # x is a tensor of shape  N x T x M

# converts x from a tensor of shape N x T x M
# into a list of length T, where each element is a tensor of shape N x M
# tensorflow的RNN 有個很怪的要求,是輸入tensor型別必須是 T x N x M
# 還好tensorflow 有現成的方法來改變 tensor shape
x = tf.unstack(
    x, sequence_length, axis=1
)  # axis=1 (第二個維度) 代表對T dim 做分解  # output x is a tensor of shape T x N x D

# get the rnn output
output, states = get_rnn_output(rnn_unit, x, dtype=tf.float32)

# output are now of size (T, N, M)
# so make it (N, T, M)                   # TODO 這裡可以用unstack嗎??
outputs = tf.transpose(output,
                       (1, 0, 2))  # TODO 確認 transpose/reshape/ unstack 運作邏輯
outputs = tf.reshape(
    outputs,
    (num_sample * sequence_length, hidden_layer_size))  # NT x M  (這裡詳閱note 1)

# final dense layer
logits = tf.matmul(outputs, tfWo) + tfbo  # NT x K
predictions = tf.argmax(logits, axis=1)  # (NT, )
predict_op = tf.reshape(predictions, (num_sample, sequence_length))  # N x T
labels_flat = tf.reshape(targets, [-1])  # (NT, )  ,這一步的目的是為了後續計算cost,詳閱note2
Example #5
0
    def train(self,
              epochs=10,
              learning_rate=1e-2,
              mu=0.99,
              batch_size=32,
              hidden_layer_size=10,
              embedding_dim=10):

        # training config
        sequence_length = max(len(x) for x in self.Xtrain + self.Xtest)
        V = self.V
        K = self.K

        # pad sequences
        Xtrain = pad_sequences(self.Xtrain, maxlen=sequence_length)
        Ytrain = pad_sequences(self.Ytrain, maxlen=sequence_length)
        Xtest = pad_sequences(self.Xtest, maxlen=sequence_length)
        Ytest = pad_sequences(self.Ytest, maxlen=sequence_length)

        print("Xtrain.shape:", Xtrain.shape)
        print("Ytrain.shape:", Ytrain.shape)

        # inputs
        inputs = tf.placeholder(tf.int32, shape=(None, sequence_length))
        targets = tf.placeholder(tf.int32, shape=(None, sequence_length))
        num_samples = tf.shape(inputs)[0]  # useful for later

        # embedding
        We = np.random.randn(V, embedding_dim).astype(np.float32)

        # output layer
        Wo = init_weight(hidden_layer_size, K).astype(np.float32)
        bo = np.zeros(K).astype(np.float32)

        # make them tensorflow variables
        tfWe = tf.Variable(We)
        tfWo = tf.Variable(Wo)
        tfbo = tf.Variable(bo)

        # make the rnn unit
        rnn_unit = GRUCell(num_units=hidden_layer_size, activation=tf.nn.relu)

        # get the output
        x = tf.nn.embedding_lookup(tfWe, inputs)

        # converts x from a tensor of shape N x T x D
        # into a list of length T, where each element is a tensor of shape N x D
        x = tf.unstack(x, sequence_length, 1)

        # get the rnn output
        outputs, states = get_rnn_output(rnn_unit, x, dtype=tf.float32)

        # outputs are now of size (T, N, M)
        # so make it (N, T, M)
        outputs = tf.transpose(outputs, (1, 0, 2))
        outputs = tf.reshape(
            outputs,
            (sequence_length * num_samples, hidden_layer_size))  # NT x M

        # Linear activation, using rnn inner loop last output
        logits = tf.matmul(outputs, tfWo) + tfbo  # NT x K
        predictions = tf.argmax(logits, 1)
        predict_op = tf.reshape(predictions, (num_samples, sequence_length))
        labels_flat = tf.reshape(targets, [-1])

        cost_op = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                           labels=labels_flat))
        train_op = tf.train.AdamOptimizer(learning_rate).minimize(cost_op)

        # init stuffle
        sess = tf.InteractiveSession()
        init = tf.global_variables_initializer()
        sess.run(init)

        # training loop
        accs = []
        costs = []
        n_batches = len(Ytrain) // batch_size
        for i in range(epochs):
            n_total = 0
            n_correct = 0

            t0 = datetime.now()
            Xtrain, Ytrain = shuffle(Xtrain, Ytrain)
            cost = 0

            for j in range(n_batches):
                x = Xtrain[j * batch_size:(j + 1) * batch_size]
                y = Ytrain[j * batch_size:(j + 1) * batch_size]

                # get the cost, predictions, and perform a gradient descent step
                c, p, _ = sess.run((cost_op, predict_op, train_op),
                                   feed_dict={
                                       inputs: x,
                                       targets: y
                                   })
                cost += c

                # calculate the accuracy
                for yi, pi in zip(y, p):
                    # we don't care about the padded entries so ignore them
                    yii = yi[yi > 0]
                    pii = pi[yi > 0]
                    n_correct += np.sum(yii == pii)
                    n_total += len(yii)

                # print stuff out periodically
                if j % 10 == 0:
                    sys.stdout.write(
                        "j/N: %d/%d correct rate so far: %f, cost so far: %f\r"
                        % (j, n_batches, float(n_correct) / n_total, cost))
                    sys.stdout.flush()

            # get test acc. too
            p = sess.run(predict_op, feed_dict={inputs: Xtest, targets: Ytest})
            n_test_correct = 0
            n_test_total = 0
            for yi, pi in zip(Ytest, p):
                yii = yi[yi > 0]
                pii = pi[yi > 0]
                n_test_correct += np.sum(yii == pii)
                n_test_total += len(yii)
            test_acc = float(n_test_correct) / n_test_total

            print("i:", i, "cost:", "%.4f" % cost, "train acc:",
                  "%.4f" % (float(n_correct) / n_total), "test acc:",
                  "%.4f" % test_acc, "time for epoch:", (datetime.now() - t0))
            accs.append((float(n_correct) / n_total))
            costs.append(cost)

        f, plt_arr = plt.subplots(2, sharex=True)
        plt_arr[0].plot(costs)
        plt_arr[0].set_title('costs')
        plt_arr[1].plot(accs)
        plt_arr[1].set_title('acc')
        plt.show()
  def fit(self, X, Y, batch_sz=20, learning_rate=0.1, mu=0.9, activation=tf.nn.sigmoid, epochs=100, show_fig=False):
    N, T, D = X.shape # X is of size N x T(n) x D
    K = len(set(Y.flatten()))
    M = self.M
    self.f = activation

    # initial weights
    # note: Wx, Wh, bh are all part of the RNN unit and will be created
    #       by BasicRNNCell
    Wo = init_weight(M, K).astype(np.float32)
    bo = np.zeros(K, dtype=np.float32)

    # make them tf variables
    self.Wo = tf.Variable(Wo)
    self.bo = tf.Variable(bo)

    # tf Graph input
    tfX = tf.placeholder(tf.float32, shape=(batch_sz, T, D), name='inputs')
    tfY = tf.placeholder(tf.int64, shape=(batch_sz, T), name='targets')

    # turn tfX into a sequence, e.g. T tensors all of size (batch_sz, D)
    sequenceX = x2sequence(tfX, T, D, batch_sz)

    # create the simple rnn unit
    rnn_unit = BasicRNNCell(num_units=self.M, activation=self.f)

    # Get rnn cell output
    # outputs, states = rnn_module.rnn(rnn_unit, sequenceX, dtype=tf.float32)
    outputs, states = get_rnn_output(rnn_unit, sequenceX, dtype=tf.float32)

    # outputs are now of size (T, batch_sz, M)
    # so make it (batch_sz, T, M)
    outputs = tf.transpose(outputs, (1, 0, 2))
    outputs = tf.reshape(outputs, (T*batch_sz, M))

    # Linear activation, using rnn inner loop last output
    logits = tf.matmul(outputs, self.Wo) + self.bo
    predict_op = tf.argmax(logits, 1)
    targets = tf.reshape(tfY, (T*batch_sz,))

    cost_op = tf.reduce_mean(
      tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=logits,
        labels=targets
      )
    )
    train_op = tf.train.MomentumOptimizer(learning_rate, momentum=mu).minimize(cost_op)

    costs = []
    n_batches = N // batch_sz
    
    init = tf.global_variables_initializer()
    with tf.Session() as session:
      session.run(init)
      for i in range(epochs):
        X, Y = shuffle(X, Y)
        n_correct = 0
        cost = 0
        for j in range(n_batches):
          Xbatch = X[j*batch_sz:(j+1)*batch_sz]
          Ybatch = Y[j*batch_sz:(j+1)*batch_sz]
          
          _, c, p = session.run([train_op, cost_op, predict_op], feed_dict={tfX: Xbatch, tfY: Ybatch})
          cost += c
          for b in range(batch_sz):
            idx = (b + 1)*T - 1
            n_correct += (p[idx] == Ybatch[b][-1])
        if i % 10 == 0:
          print("i:", i, "cost:", cost, "classification rate:", (float(n_correct)/N))
        if n_correct == N:
          print("i:", i, "cost:", cost, "classification rate:", (float(n_correct)/N))
          break
        costs.append(cost)

    if show_fig:
      plt.plot(costs)
      plt.show()
Example #7
0
W0 = init_weight(hidden_unit_size, class_num).astype(np.float32)
b0 = np.zeros(class_num, dtype=np.float32)
tfW0 = tf.Variable(W0)
tfb0 = tf.Variable(b0)

# 受限于X的shape,tfX就长这样了,导致之后的一系列维度转换
tfX = tf.placeholder(tf.float32, shape=(batch_size, bit_len, D), name='inputs')
tfY = tf.placeholder(tf.int32, shape=(batch_size, bit_len), name='outputs')

# 将tfX转换为序列 bit_len个lists 每个list里是 batch_size D
sequenceX = x2sequence(tfX, batch_size, bit_len, D)

rnn_units = BasicRNNCell(num_units=hidden_unit_size, activation=tf.nn.sigmoid)

# outputs同sequenceX: bit_len batch_size D  bit_len个二维tensor
outputs_, states = get_rnn_output(rnn_units, sequenceX, dtype=tf.float32)
outputs = tf.transpose(outputs_, perm=(1, 0, 2))
outputs = tf.reshape(outputs, shape=(bit_len * batch_size, hidden_unit_size))

logits = tf.matmul(outputs, tfW0) + tfb0
predict = tf.argmax(logits, axis=1)
targets = tf.reshape(tfY, shape=(bit_len * batch_size, ))

# 损失函数
loss = tf.reduce_mean(
    tf.nn.sparse_softmax_cross_entropy_with_logits(labels=targets,
                                                   logits=logits))
# 优化算法
train_optimize = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                            momentum=momentum)
train_setp = train_optimize.minimize(loss)
Example #8
0
    def fit(self,
            X,
            Y,
            batch_sz=20,
            learning_rate=0.1,
            mu=0.9,
            activation=tf.nn.sigmoid,
            epochs=100,
            show_fig=False):

        N, T, D = X.shape  # X is of size N x T(n) x D
        K = len(set(Y.flatten()))
        M = self.M
        self.f = activation

        hidden_layer = HiddenLayer(M, K)
        params = hidden_layer.get_hidden_layer_params()
        self.Wo = params[0]
        self.bo = params[1]

        # tf Graph input
        tfX = tf.placeholder(tf.float32, shape=(batch_sz, T, D), name='inputs')
        tfY = tf.placeholder(tf.int64, shape=(batch_sz, T), name='targets')

        # turn tfX into a sequence, e.g. T tensors all of size (batch_sz, D)
        sequenceX = x2sequence(tfX, T, D, batch_sz)

        rnn_unit = BasicRNNCell(num_units=self.M, activation=self.f)
        outputs, states = get_rnn_output(rnn_unit, sequenceX, dtype=tf.float32)

        # outputs are now of size (T, batch_sz, M)
        # so make it (batch_sz, T, M)
        outputs = tf.transpose(outputs, (1, 0, 2))
        outputs = tf.reshape(outputs, (T * batch_sz, M))

        logits = tf.matmul(outputs, self.Wo) + self.bo
        predict_op = tf.argmax(logits, axis=1)
        targets = tf.reshape(tfY, (T * batch_sz, ))  ####default -1

        #calculate the cost function
        cost_op = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                           labels=targets))

        train_op = tf.train.MomentumOptimizer(learning_rate,
                                              momentum=mu).minimize(cost_op)
        costs = []
        n_batches = N // batch_sz

        init = tf.global_variables_initializer()
        with tf.Session() as session:
            session.run(init)
            for i in range(epochs):
                X, Y = shuffle(X, Y)
                n_correct = 0
                cost = 0
                for j in range(n_batches):
                    Xbatch = X[j * batch_sz:(j + 1) * batch_sz]
                    Ybatch = Y[j * batch_sz:(j + 1) * batch_sz]
                    # calculate c:
                    _, c, p = session.run([train_op, cost_op, predict_op],
                                          feed_dict={
                                              tfX: Xbatch,
                                              tfY: Ybatch
                                          })
                    cost += c
                    for b in range(batch_sz):
                        idx = (b + 1) * T - 1
                        n_correct += (p[idx] == Ybatch[b][-1])

                    if i % 10 == 0:
                        print("i:", i, "cost:", cost, "classification rate:",
                              (float(n_correct) / N))
                    if n_correct == N:
                        print("i:", i, "cost:", cost, "classification rate:",
                              (float(n_correct) / N))
                        break
                    costs.append(cost)

        if show_fig:
            plt.plot(costs)
            plt.show()
Example #9
0
# make them tensorflow variables
tfWe = tf.Variable(We)
tfWo = tf.Variable(Wo)
tfbo = tf.Variable(bo)

# make the rnn unit
rnn_unit = GRUCell(num_units=hidden_layer_size, activation=tf.nn.relu)

# get the output
x = tf.nn.embedding_lookup(tfWe, inputs) #  (N, T, D)
# converts x from a tensor of shape (N, T, D)
# into a list of length T, where each element is a tensor of shape (N, D)
x = tf.unstack(x, sequence_length, 1) # ()

# get the rnn output
outputs, states = get_rnn_output(rnn_unit, x, dtype=tf.float32) # (T, N, M)

# outputs are now of size (T, N, M)
# so make it (N, T, M)
outputs = tf.transpose(outputs, (1, 0, 2)) # (N, T, M)
outputs = tf.reshape(outputs, (num_samples * sequence_length, hidden_layer_size)) # (NT, M)

# final dense layer
logits = tf.matmul(outputs, tfWo) + tfbo # (NT, K)
predictions = tf.argmax(logits, 1) # (NT, )
predict_op = tf.reshape(predictions, (num_samples, sequence_length)) # (N, T)
labels_flat = tf.reshape(targets, [-1]) # flattens shape into 1-D: (N, T, 1) --> (NT, )

loss_op = tf.reduce_mean(
  tf.nn.sparse_softmax_cross_entropy_with_logits(
    logits=logits,
tfWo = tf.Variable(Wo)
tfbo = tf.Variable(bo)

# make the rnn unit
rnn_unit = GRUCell(num_units=hidden_layer_size, activation=tf.nn.relu)


# get the output
x = tf.nn.embedding_lookup(tfWe, inputs)

# converts x from a tensor of shape N x T x M
# into a list of length T, where each element is a tensor of shape N x M
x = tf.unstack(x, sequence_length, 1)

# get the rnn output
outputs, states = get_rnn_output(rnn_unit, x, dtype=tf.float32)


# outputs are now of size (T, N, M)
# so make it (N, T, M)
outputs = tf.transpose(outputs, (1, 0, 2))
outputs = tf.reshape(outputs, (sequence_length*num_samples, hidden_layer_size)) # NT x M

# final dense layer
logits = tf.matmul(outputs, tfWo) + tfbo # NT x K
predictions = tf.argmax(logits, 1)
predict_op = tf.reshape(predictions, (num_samples, sequence_length))
labels_flat = tf.reshape(targets, [-1])

cost_op = tf.reduce_mean(
  tf.nn.sparse_softmax_cross_entropy_with_logits(
Example #11
0
tfWo = tf.Variable(Wo)
tfbo = tf.Variable(bo)

# set up the rnn unit
rnn_unit = GRUCell(num_units=hidden_layer_size, activation=tf.nn.relu)
rnn_unit_dropout = DropoutWrapper(rnn_unit, output_keep_prob=keep_prob)

# get the output
x = tf.nn.embedding_lookup(tfWe, inputs)

# convert x from a tensorof shape N x T x D
# into a list of length T, where each element is a tensor of shape N x D
x = tf.unstack(x, sequence_length, 1)

# get the rnn output
outputs, states = get_rnn_output(rnn_unit_dropout, x, dtype=tf.float32)

# outputs are now of size (T, N, M)
# so make it (N, T, M)
outputs = tf.transpose(outputs, (1, 0, 2))
outputs = tf.reshape(
    outputs, (sequence_length * num_samples, hidden_layer_size))  # NT x M

# Linear activation, using rnn inner loop last output
logits = tf.matmul(outputs, tfWo) + tfbo  # NT x K
predictions = tf.argmax(logits, 1)
predict_op = tf.reshape(predictions, (num_samples, sequence_length))
labels_flat = tf.reshape(targets, [-1])

cost_op = tf.reduce_mean(
    tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,