コード例 #1
0
    def __init__(self,
                 size=256,
                 depth=3,
                 batch_size=32,
                 keep_prob=0.1,
                 max_nsteps=1000,
                 checkpoint_dir="checkpoint",
                 forward_only=False):
        """Initialize the parameters for an Deep LSTM model.
    
    Args:
      size: int, The dimensionality of the inputs into the Deep LSTM cell [32, 64, 256]
      learning_rate: float, [1e-3, 5e-4, 1e-4, 5e-5]
      batch_size: int, The size of a batch [16, 32]
      keep_prob: unit Tensor or float between 0 and 1 [0.0, 0.1, 0.2]
      max_nsteps: int, The max time unit [1000]
    """
        super(DeepLSTM, self).__init__()

        self.size = int(size)
        self.depth = int(depth)
        self.batch_size = int(batch_size)
        self.output_size = self.depth * self.size
        self.keep_prob = float(keep_prob)
        self.max_nsteps = int(max_nsteps)
        self.checkpoint_dir = checkpoint_dir

        start = time.clock()
        print(" [*] Building Deep LSTM...")
        self.cell = LSTMCell(size, forget_bias=0.0)

        if not forward_only and self.keep_prob < 1:
            d = nn.Dropout(p=keep_prob)
            self.cell = d(self.cell)
        self.stacked_cell = MultiRNNCellWithSkipConn([self.cell] * depth)

        self.initial_state = self.stacked_cell.zero_state(
            batch_size, torch.float32)
コード例 #2
0
    def __init__(
        self,
        size=256,
        depth=3,
        batch_size=32,
        keep_prob=0.1,
        max_nsteps=1000,
        checkpoint_dir="checkpoint",
        forward_only=False,
    ):
        """Initialize the parameters for an Deep LSTM model.
    
    Args:
      size: int, The dimensionality of the inputs into the Deep LSTM cell [32, 64, 256]
      learning_rate: float, [1e-3, 5e-4, 1e-4, 5e-5]
      batch_size: int, The size of a batch [16, 32]
      keep_prob: unit Tensor or float between 0 and 1 [0.0, 0.1, 0.2]
      max_nsteps: int, The max time unit [1000]
    """
        super(DeepLSTM, self).__init__()

        self.size = int(size)
        self.depth = int(depth)
        self.batch_size = int(batch_size)
        self.output_size = self.depth * self.size
        self.keep_prob = float(keep_prob)
        self.max_nsteps = int(max_nsteps)
        self.checkpoint_dir = checkpoint_dir

        start = time.clock()
        print(" [*] Building Deep LSTM...")
        self.cell = LSTMCell(size, forget_bias=0.0)
        if not forward_only and self.keep_prob < 1:
            self.cell = rnn_cell.DropoutWrapper(self.cell, output_keep_prob=keep_prob)
        self.stacked_cell = MultiRNNCellWithSkipConn([self.cell] * depth)

        self.initial_state = self.stacked_cell.zero_state(batch_size, tf.float32)
コード例 #3
0
class DeepLSTM(nn.Module):
    """Deep LSTM model."""
    def __init__(self,
                 size=256,
                 depth=3,
                 batch_size=32,
                 keep_prob=0.1,
                 max_nsteps=1000,
                 checkpoint_dir="checkpoint",
                 forward_only=False):
        """Initialize the parameters for an Deep LSTM model.
    
    Args:
      size: int, The dimensionality of the inputs into the Deep LSTM cell [32, 64, 256]
      learning_rate: float, [1e-3, 5e-4, 1e-4, 5e-5]
      batch_size: int, The size of a batch [16, 32]
      keep_prob: unit Tensor or float between 0 and 1 [0.0, 0.1, 0.2]
      max_nsteps: int, The max time unit [1000]
    """
        super(DeepLSTM, self).__init__()

        self.size = int(size)
        self.depth = int(depth)
        self.batch_size = int(batch_size)
        self.output_size = self.depth * self.size
        self.keep_prob = float(keep_prob)
        self.max_nsteps = int(max_nsteps)
        self.checkpoint_dir = checkpoint_dir

        start = time.clock()
        print(" [*] Building Deep LSTM...")
        self.cell = LSTMCell(size, forget_bias=0.0)

        if not forward_only and self.keep_prob < 1:
            d = nn.Dropout(p=keep_prob)
            self.cell = d(self.cell)
        self.stacked_cell = MultiRNNCellWithSkipConn([self.cell] * depth)

        self.initial_state = self.stacked_cell.zero_state(
            batch_size, torch.float32)

    def prepare_model(self, data_dir, dataset_name, vocab_size):
        if not self.vocab:
            self.vocab, self.rev_vocab = load_vocab(data_dir, dataset_name,
                                                    vocab_size)
            print(" [*] Loading vocab finished.")

        self.vocab_size = len(self.vocab)

        self.emb = nn.Embedding(self.vocab_size, self.size)

        # inputs
        self.inputs = torch.IntTensor(self.batch_size, self.max_nsteps).zero_()
        embed_inputs = self.emb(torch.transpose(self.inputs, 1, 0))

        _, states = rnn.rnn(self.stacked_cell,
                            torch.unbind(embed_inputs),
                            dtype=torch.float32,
                            initial_state=self.initial_state)
        self.batch_states = torch.stack(states)

        self.nstarts = torch.IntTensor(self.batch_size, 3).zero_()
        outputs = torch.stack(
            self.batch_states[:2, :2, :self.output_size + 1]
            for idx, nstarts in enumerate(torch.unbind(self.nstarts)))

        self.outputs = outputs.view(self.batch_size, self.output_size).size()

        self.W = torch.randn((self.vocab_size, self.output_size),
                             requires_grad=True)

        # logits
        self.y = torch.FloatTensor(self.batch_size, self.vocab_size).zero_()
        # labels
        self.y_ = torch.matmul(self.outputs, torch.transpose(self.W, 1, 0))

        loss_fn = torch.nn.CrossEntropyLoss()
        self.loss = loss_fn(self.y_, self.y)

        _, logits_indices = torch.max(self.y, 1)
        _, labels_indices = torch.max(self.y_, 1)
        x = list(logits_indices.size())[0]
        y = list(labels_indices.size())[0]
        correct_prediction = logits_indices.expand(x, y).eq(
            labels_indices.expand(x, y))
        self.accuracy = torch.mean(correct_prediction.type(torch.FloatTensor))

        print(" [*] Preparing model finished.")
コード例 #4
0
class DeepLSTM(Model):
    """Deep LSTM model."""

    def __init__(
        self,
        size=256,
        depth=3,
        batch_size=32,
        keep_prob=0.1,
        max_nsteps=1000,
        checkpoint_dir="checkpoint",
        forward_only=False,
    ):
        """Initialize the parameters for an Deep LSTM model.
    
    Args:
      size: int, The dimensionality of the inputs into the Deep LSTM cell [32, 64, 256]
      learning_rate: float, [1e-3, 5e-4, 1e-4, 5e-5]
      batch_size: int, The size of a batch [16, 32]
      keep_prob: unit Tensor or float between 0 and 1 [0.0, 0.1, 0.2]
      max_nsteps: int, The max time unit [1000]
    """
        super(DeepLSTM, self).__init__()

        self.size = int(size)
        self.depth = int(depth)
        self.batch_size = int(batch_size)
        self.output_size = self.depth * self.size
        self.keep_prob = float(keep_prob)
        self.max_nsteps = int(max_nsteps)
        self.checkpoint_dir = checkpoint_dir

        start = time.clock()
        print(" [*] Building Deep LSTM...")
        self.cell = LSTMCell(size, forget_bias=0.0)
        if not forward_only and self.keep_prob < 1:
            self.cell = rnn_cell.DropoutWrapper(self.cell, output_keep_prob=keep_prob)
        self.stacked_cell = MultiRNNCellWithSkipConn([self.cell] * depth)

        self.initial_state = self.stacked_cell.zero_state(batch_size, tf.float32)

    def prepare_model(self, data_dir, dataset_name, vocab_size):
        if not self.vocab:
            self.vocab, self.rev_vocab = load_vocab(data_dir, dataset_name, vocab_size)
            print(" [*] Loading vocab finished.")

        self.vocab_size = len(self.vocab)

        self.emb = tf.get_variable("emb", [self.vocab_size, self.size])

        # inputs
        self.inputs = tf.placeholder(tf.int32, [self.batch_size, self.max_nsteps])
        embed_inputs = tf.nn.embedding_lookup(self.emb, tf.transpose(self.inputs))

        tf.histogram_summary("embed", self.emb)

        # output states
        _, states = rnn.rnn(
            self.stacked_cell, tf.unpack(embed_inputs), dtype=tf.float32, initial_state=self.initial_state
        )
        self.batch_states = tf.pack(states)

        self.nstarts = tf.placeholder(tf.int32, [self.batch_size, 3])
        outputs = tf.pack(
            [
                tf.slice(self.batch_states, nstarts, [1, 1, self.output_size])
                for idx, nstarts in enumerate(tf.unpack(self.nstarts))
            ]
        )

        self.outputs = tf.reshape(outputs, [self.batch_size, self.output_size])

        self.W = tf.get_variable("W", [self.vocab_size, self.output_size])
        tf.histogram_summary("weights", self.W)
        tf.histogram_summary("output", outputs)

        self.y = tf.placeholder(tf.float32, [self.batch_size, self.vocab_size])
        self.y_ = tf.matmul(self.outputs, self.W, transpose_b=True)

        self.loss = tf.nn.softmax_cross_entropy_with_logits(self.y_, self.y)
        tf.scalar_summary("loss", tf.reduce_mean(self.loss))

        correct_prediction = tf.equal(tf.argmax(self.y, 1), tf.argmax(self.y_, 1))
        self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
        tf.scalar_summary("accuracy", self.accuracy)

        print(" [*] Preparing model finished.")

    def train(
        self,
        sess,
        vocab_size,
        epoch=25,
        learning_rate=0.0002,
        momentum=0.9,
        decay=0.95,
        data_dir="data",
        dataset_name="cnn",
    ):
        self.prepare_model(data_dir, dataset_name, vocab_size)

        start = time.clock()
        print(" [*] Calculating gradient and loss...")
        self.optim = tf.train.AdamOptimizer(learning_rate, 0.9).minimize(self.loss)
        print(" [*] Calculating gradient and loss finished. Take %.2fs" % (time.clock() - start))

        # Could not use RMSPropOptimizer because the sparse update of RMSPropOptimizer
        # is not implemented yet (2016.01.24).
        # self.optim = tf.train.RMSPropOptimizer(learning_rate,
        #                                        decay=decay,
        #                                        momentum=momentum).minimize(self.loss)

        sess.run(tf.initialize_all_variables())

        if self.load(sess, self.checkpoint_dir, dataset_name):
            print(" [*] Deep LSTM checkpoint is loaded.")
        else:
            print(" [*] There is no checkpoint for this model.")

        y = np.zeros([self.batch_size, self.vocab_size])

        merged = tf.merge_all_summaries()
        writer = tf.train.SummaryWriter("/tmp/deep", sess.graph_def)

        counter = 0
        start_time = time.time()
        for epoch_idx in xrange(epoch):
            data_loader = load_dataset(data_dir, dataset_name, vocab_size)

            batch_stop = False
            while True:
                y.fill(0)
                inputs, nstarts, answers = [], [], []
                batch_idx = 0
                while True:
                    try:
                        (_, document, question, answer, _), data_idx, data_max_idx = data_loader.next()
                    except StopIteration:
                        batch_stop = True
                        break

                    # [0] means splitter between d and q
                    data = (
                        [int(d) for d in document.split()]
                        + [0]
                        + [int(q) for q in question.split() for q in question.split()]
                    )

                    if len(data) > self.max_nsteps:
                        continue

                    inputs.append(data)
                    nstarts.append(len(inputs[-1]) - 1)
                    y[batch_idx][int(answer)] = 1

                    batch_idx += 1
                    if batch_idx == self.batch_size:
                        break
                if batch_stop:
                    break

                FORCE = False
                if FORCE:
                    inputs = array_pad(inputs, self.max_nsteps, pad=-1, force=FORCE)
                    nstarts = np.where(inputs == -1)[1]
                    inputs[inputs == -1] = 0
                else:
                    inputs = array_pad(inputs, self.max_nsteps, pad=0)
                nstarts = [[nstart, idx, 0] for idx, nstart in enumerate(nstarts)]

                _, summary_str, cost, accuracy = sess.run(
                    [self.optim, merged, self.loss, self.accuracy],
                    feed_dict={self.inputs: inputs, self.nstarts: nstarts, self.y: y},
                )
                if counter % 10 == 0:
                    writer.add_summary(summary_str, counter)
                    print(
                        "Epoch: [%2d] [%4d/%4d] time: %4.4f, loss: %.8f, accuracy: %.8f"
                        % (epoch_idx, data_idx, data_max_idx, time.time() - start_time, np.mean(cost), accuracy)
                    )
                counter += 1
            self.save(sess, self.checkpoint_dir, dataset_name)

    def test(self, voab_size):
        self.prepare_model(data_dir, dataset_name, vocab_size)
コード例 #5
0
class DeepLSTM(Model):
    """Deep LSTM model."""
    def __init__(self,
                 size=256,
                 depth=3,
                 batch_size=32,
                 keep_prob=0.1,
                 max_nsteps=1000,
                 checkpoint_dir="checkpoint",
                 forward_only=False):
        """Initialize the parameters for an Deep LSTM model.
    
    Args:
      size: int, The dimensionality of the inputs into the Deep LSTM cell [32, 64, 256]
      learning_rate: float, [1e-3, 5e-4, 1e-4, 5e-5]
      batch_size: int, The size of a batch [16, 32]
      keep_prob: unit Tensor or float between 0 and 1 [0.0, 0.1, 0.2]
      max_nsteps: int, The max time unit [1000]
    """
        super(DeepLSTM, self).__init__()

        self.size = int(size)
        self.depth = int(depth)
        self.batch_size = int(batch_size)
        self.output_size = self.depth * self.size
        self.keep_prob = float(keep_prob)
        self.max_nsteps = int(max_nsteps)
        self.checkpoint_dir = checkpoint_dir

        start = time.clock()
        print(" [*] Building Deep LSTM...")
        self.cell = LSTMCell(size, forget_bias=0.0)
        if not forward_only and self.keep_prob < 1:
            self.cell = rnn_cell.DropoutWrapper(self.cell,
                                                output_keep_prob=keep_prob)
        self.stacked_cell = MultiRNNCellWithSkipConn([self.cell] * depth)

        self.initial_state = self.stacked_cell.zero_state(
            batch_size, tf.float32)

    def prepare_model(self, data_dir, dataset_name, vocab_size):
        if not self.vocab:
            self.vocab, self.rev_vocab = load_vocab(data_dir, dataset_name,
                                                    vocab_size)
            print(" [*] Loading vocab finished.")

        self.vocab_size = len(self.vocab)

        self.emb = tf.get_variable("emb", [self.vocab_size, self.size])

        # inputs
        self.inputs = tf.placeholder(tf.int32,
                                     [self.batch_size, self.max_nsteps])
        embed_inputs = tf.nn.embedding_lookup(self.emb,
                                              tf.transpose(self.inputs))

        tf.histogram_summary("embed", self.emb)

        # output states
        _, states = rnn.rnn(self.stacked_cell,
                            tf.unpack(embed_inputs),
                            dtype=tf.float32,
                            initial_state=self.initial_state)
        self.batch_states = tf.pack(states)

        self.nstarts = tf.placeholder(tf.int32, [self.batch_size, 3])
        outputs = tf.pack([
            tf.slice(self.batch_states, nstarts, [1, 1, self.output_size])
            for idx, nstarts in enumerate(tf.unpack(self.nstarts))
        ])

        self.outputs = tf.reshape(outputs, [self.batch_size, self.output_size])

        self.W = tf.get_variable("W", [self.vocab_size, self.output_size])
        tf.histogram_summary("weights", self.W)
        tf.histogram_summary("output", outputs)

        self.y = tf.placeholder(tf.float32, [self.batch_size, self.vocab_size])
        self.y_ = tf.matmul(self.outputs, self.W, transpose_b=True)

        self.loss = tf.nn.softmax_cross_entropy_with_logits(self.y_, self.y)
        tf.scalar_summary("loss", tf.reduce_mean(self.loss))

        correct_prediction = tf.equal(tf.argmax(self.y, 1),
                                      tf.argmax(self.y_, 1))
        self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
        tf.scalar_summary("accuracy", self.accuracy)

        print(" [*] Preparing model finished.")

    def train(self,
              sess,
              vocab_size,
              epoch=25,
              learning_rate=0.0002,
              momentum=0.9,
              decay=0.95,
              data_dir="data",
              dataset_name="cnn"):
        self.prepare_model(data_dir, dataset_name, vocab_size)

        start = time.clock()
        print(" [*] Calculating gradient and loss...")
        self.optim = tf.train.AdamOptimizer(learning_rate,
                                            0.9).minimize(self.loss)
        print(" [*] Calculating gradient and loss finished. Take %.2fs" %
              (time.clock() - start))

        # Could not use RMSPropOptimizer because the sparse update of RMSPropOptimizer
        # is not implemented yet (2016.01.24).
        # self.optim = tf.train.RMSPropOptimizer(learning_rate,
        #                                        decay=decay,
        #                                        momentum=momentum).minimize(self.loss)

        sess.run(tf.initialize_all_variables())

        if self.load(sess, self.checkpoint_dir, dataset_name):
            print(" [*] Deep LSTM checkpoint is loaded.")
        else:
            print(" [*] There is no checkpoint for this model.")

        y = np.zeros([self.batch_size, self.vocab_size])

        merged = tf.merge_all_summaries()
        writer = tf.train.SummaryWriter("/tmp/deep", sess.graph_def)

        counter = 0
        start_time = time.time()
        for epoch_idx in xrange(epoch):
            data_loader = load_dataset(data_dir, dataset_name, vocab_size)

            batch_stop = False
            while True:
                y.fill(0)
                inputs, nstarts, answers = [], [], []
                batch_idx = 0
                while True:
                    try:
                        (_, document, question, answer,
                         _), data_idx, data_max_idx = data_loader.next()
                    except StopIteration:
                        batch_stop = True
                        break

                    # [0] means splitter between d and q
                    data = [int(d) for d in document.split()] + [0] + \
                        [int(q) for q in question.split() for q in question.split()]

                    if len(data) > self.max_nsteps:
                        continue

                    inputs.append(data)
                    nstarts.append(len(inputs[-1]) - 1)
                    y[batch_idx][int(answer)] = 1

                    batch_idx += 1
                    if batch_idx == self.batch_size: break
                if batch_stop: break

                FORCE = False
                if FORCE:
                    inputs = array_pad(inputs,
                                       self.max_nsteps,
                                       pad=-1,
                                       force=FORCE)
                    nstarts = np.where(inputs == -1)[1]
                    inputs[inputs == -1] = 0
                else:
                    inputs = array_pad(inputs, self.max_nsteps, pad=0)
                nstarts = [[nstart, idx, 0]
                           for idx, nstart in enumerate(nstarts)]

                _, summary_str, cost, accuracy = sess.run(
                    [self.optim, merged, self.loss, self.accuracy],
                    feed_dict={
                        self.inputs: inputs,
                        self.nstarts: nstarts,
                        self.y: y
                    })
                if counter % 10 == 0:
                    writer.add_summary(summary_str, counter)
                    print("Epoch: [%2d] [%4d/%4d] time: %4.4f, loss: %.8f, accuracy: %.8f" \
                        % (epoch_idx, data_idx, data_max_idx, time.time() - start_time, np.mean(cost), accuracy))
                counter += 1
            self.save(sess, self.checkpoint_dir, dataset_name)

    def test(self, voab_size):
        self.prepare_model(data_dir, dataset_name, vocab_size)