コード例 #1
0
def main_okapi():
    import OkapiV2.Core as ok
    from OkapiV2.Core import Model
    from OkapiV2.Layers.Basic import FullyConnected, Dropout, BatchNorm
    from OkapiV2.Layers.Activations import ActivationLayer, PReLULayer
    from OkapiV2.Layers.Recurrent import LSTM
    from OkapiV2 import Activations, Optimizers, Losses

    path = 'data/lear.txt'
    text = open(path).read().lower()  # [0:corpus_length]
    print('Corpus length:', len(text))

    chars = set(text)
    print('Total Characters:', len(chars))
    char_to_index = dict((c, i) for i, c in enumerate(chars))
    index_to_char = dict((i, c) for i, c in enumerate(chars))

    # cut the text in semi-redundant sequences of maxlen characters
    sentences = []
    next_chars = []
    for i in range(0, len(text) - maxlen, step):
        sentences.append(text[i: i + maxlen])
        next_chars.append(text[i + maxlen])
    print('Total Sequences:', len(sentences))

    print('Vectorization...')
    X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
    y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
    for i, sentence in enumerate(sentences):
        for t, char in enumerate(sentence):
            X[i, t, char_to_index[char]] = 1
        y[i, char_to_index[next_chars[i]]] = 1

    def sample(a, temperature=1.0):
        # helper function to sample an index from a probability array
        a = np.log(a) / temperature
        a = np.exp(a) / np.sum(np.exp(a)) - 1e-7
        return np.argmax(np.random.multinomial(1, a, 1))

    model = Model()
    model.add(LSTM((h_layer_size, 1, 1, 1)))
    model.add(PReLULayer())
    model.add(Dropout(0.2))
    model.add(BatchNorm())
    model.add(LSTM((h_layer_size, 1, 1, 1)))
    model.add(PReLULayer())
    model.add(Dropout(0.2))
    model.add(BatchNorm())
    model.add(FullyConnected())
    model.add(ActivationLayer(Activations.alt_softmax))

    model.set_loss(Losses.Crossentropy())
    model.set_optimizer(Optimizers.RMSprop(learning_rate=learning_rate))

    for iteration in range(0, num_iterations):
        print()
        print('-' * 50)
        print('Iteration', iteration + 1)
        model.train(X, y, batch_size=batch_size, num_epochs=1,
                    params_filename='okapi_shakespeare_params.pk')

        start_index = random.randint(0, len(text) - maxlen - 1)

        for diversity in diversities:
            print()
            print('----- diversity:', diversity)

            generated = ''
            sentence = text[start_index: start_index + maxlen]
            generated += sentence
            print('----- Generating with seed: "' + sentence + '"')
            sys.stdout.write(generated)

            for iteration in range(num_chars):
                x = np.zeros((1, maxlen, len(chars)))
                for t, char in enumerate(sentence):
                    x[0, t, char_to_index[char]] = 1.

                preds = model.predict(x)
                preds = preds[0]
                next_index = sample(preds, diversity)
                next_char = index_to_char[next_index]

                generated += next_char
                sentence = sentence[1:] + next_char

                sys.stdout.write(next_char)
                sys.stdout.flush()
            print()
    ok.save_model(model, 'okapi_shakespeare_model.pk')