def evaluate_step(inp, tar):
        tar_inp = tar[:, :-1]
        tar_real = tar[:, 1:]

        enc_padding_mask, combined_mask, dec_padding_mask = create_masks(
            inp, tar_inp)

        predictions, _ = transformer(inp, tar_inp, False, enc_padding_mask,
                                     combined_mask, dec_padding_mask)
        loss = loss_function(tar_real, predictions)

        train_loss(loss)
        train_accuracy(tar_real, predictions)
    def train_step(inp, tar):
        tar_inp = tar[:, :-1]
        tar_real = tar[:, 1:]

        enc_padding_mask, combined_mask, dec_padding_mask = create_masks(
            inp, tar_inp)

        with tf.GradientTape() as tape:
            predictions, _ = transformer(inp, tar_inp, True, enc_padding_mask,
                                         combined_mask, dec_padding_mask)
            loss = loss_function(tar_real, predictions)
            gradients = tape.gradient(loss, transformer.trainable_variables)
        optimizer.apply_gradients(
            zip(gradients, transformer.trainable_variables))

        train_loss(loss)
        train_accuracy(tar_real, predictions)
Exemple #3
0
    def validation_step(inp, tar):
        tar_inp = tar[:, :-1]
        tar_real = tar[:, 1:]

        enc_padding_mask, combined_mask, dec_padding_mask = create_masks(
            inp, tar_inp)

        predictions, _ = transformer(inp, tar_inp, False, enc_padding_mask,
                                     combined_mask, dec_padding_mask)
        predictions = tf.argmax(predictions, axis=-1)
        compared = tf.equal(
            tf.cast(tar_real, tf.int32),
            tf.cast(predictions,
                    tf.int32))  #비교하려면, tf.int32로 변환되어야 한다. tf.int64는 안된다.
        char_accuracy = tf.reduce_mean(tf.cast(compared, tf.float32))

        seq_accuracy = tf.reduce_mean(
            tf.cast(tf.reduce_all(compared, axis=-1), tf.float32))

        return char_accuracy, seq_accuracy
Exemple #4
0
    def evaluate(encoder_inputs):
        # inp_sentences: batch_data.

        batch_size = len(encoder_inputs)
        decoder_inputs_init = np.array([word_to_index[GO_TOKEN]] *
                                       batch_size).reshape(-1, 1)
        decoder_inputs = decoder_inputs_init
        for i in range(decoder_length):
            enc_padding_mask, combined_mask, dec_padding_mask = create_masks(
                encoder_inputs, decoder_inputs)

            # predictions.shape == (batch_size, seq_len, vocab_size)
            predictions, attention_weights = transformer(
                encoder_inputs, decoder_inputs, False, enc_padding_mask,
                combined_mask, dec_padding_mask)

            predictions = tf.cast(tf.argmax(predictions, axis=-1), tf.int32)

            decoder_inputs = tf.concat([decoder_inputs_init, predictions],
                                       axis=-1)

        return predictions, attention_weights
Exemple #5
0
def train():

    ############ DATA
    inputs, targets, word_to_index, index_to_word, VOCAB_SIZE, INPUT_LENGTH, OUTPUT_LENGTH = load_data(
        hp)  # (50000, 29), (50000, 12)
    print(word_to_index)

    ########## Hyper parameters
    hp['vocab_size'] = VOCAB_SIZE
    batch_size = hp['batch_size']
    encoder_length = INPUT_LENGTH  # = hp['MAX_LEN']
    decoder_length = OUTPUT_LENGTH
    num_layers = hp['num_layers']
    hidden_dim = hp['hidden_dim']
    dff = hp['dff']
    num_heads = hp['num_heads']
    num_epoch = hp['num_epoch']
    drop_rate = hp['drop_rate']

    model_save_dir = hp['model_save_dir']

    train_input, test_input, train_target, test_target = train_test_split(
        inputs, targets, test_size=0.1, random_state=13371447)
    print('train_input: {},test_input: {},train_target: {},test_target: {}'.
          format(train_input.shape, test_input.shape, train_target.shape,
                 test_target.shape))
    print('INPUT_LENGTH: {}, OUTPUT_LENGTH: {}'.format(INPUT_LENGTH,
                                                       OUTPUT_LENGTH))

    train_dataset = tf.data.Dataset.from_tensor_slices(
        (train_input, train_target))  # 여기의 argument가 mapping_fn의 argument가 된다.

    def map_fn(inp, tar):
        # dict 또는 tuple로 묶어서...
        #inputs = {'encoder_in': inp, 'decoder_in': tar[:,:-1]}
        #return inputs, tar[:,1:]
        return (inp, tar[:, :-1]), tar[:, 1:]

    train_dataset = train_dataset.batch(batch_size, drop_remainder=False)
    train_dataset = train_dataset.map(map_fn)
    train_dataset = train_dataset.shuffle(buffer_size=20000).repeat()

    valid_dataset = tf.data.Dataset.from_tensor_slices(
        (test_input, test_target))
    valid_dataset = valid_dataset.batch(len(test_input), drop_remainder=True)
    valid_dataset = valid_dataset.map(map_fn)

    ############## MODEL
    transformer = Transformer(num_layers=num_layers,
                              d_model=hidden_dim,
                              num_heads=num_heads,
                              dff=dff,
                              input_vocab_size=VOCAB_SIZE,
                              target_vocab_size=VOCAB_SIZE,
                              pe_input=encoder_length,
                              pe_target=decoder_length,
                              rate=drop_rate)

    learning_rate = CustomSchedule(hidden_dim)

    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate,
                                         beta_1=0.9,
                                         beta_2=0.98,
                                         epsilon=1e-9)
    #train_loss = tf.keras.metrics.Mean(name='train_loss')  # keras에서 loss는 기본으로 metric에 포함되어 있다.
    char_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
        name='char_accuracy')
    seq_accuracy = SequenceAccuracy(name='seq_accuracy')

    encoder_inputs = tf.keras.Input(shape=(INPUT_LENGTH, ), dtype=tf.int32)
    decoder_inputs = tf.keras.Input(shape=(OUTPUT_LENGTH, ), dtype=tf.int32)

    enc_padding_mask, combined_mask, dec_padding_mask = create_masks(
        encoder_inputs, decoder_inputs)

    predictions, _ = transformer(encoder_inputs,
                                 decoder_inputs,
                                 training=True,
                                 enc_padding_mask=enc_padding_mask,
                                 look_ahead_mask=combined_mask,
                                 dec_padding_mask=dec_padding_mask)

    model = tf.keras.Model([encoder_inputs, decoder_inputs],
                           predictions)  # train_dataset의 return 형태와 같아야 한다.
    model.compile(optimizer=optimizer,
                  loss=loss_function,
                  metrics=[char_accuracy, seq_accuracy])

    print(model.summary())

    initial_epoch = 0
    latest = tf.train.latest_checkpoint(
        model_save_dir)  # 단순히 checkpoint파일을 읽어서...
    if latest:
        model.load_weights(latest)
        initial_epoch = int(latest.split('.')[2])

    file_path = model_save_dir + '/model_ckpt.{epoch:02d}.ckpt'  # 확장자에 따라서, checkpoint형식 또는 h5형식으로 저장.

    # priod=5(먹히지 안흔다. save_freq='epoch'으로 동작 1epoch마다 저장),                     save_freq=data갯수단위
    #save_freq = len(train_input)//batch_size * batch_size  ----> sample 갯수로 지정하면 된다고 하는데, 안됨. 무조건 1 epoch마다 저장하고 잇음.
    # tensorflow 2.2에서는 save_freq ---> step 단위

    # initial_epoch, epochs(initial_epoch에 추가하는 epoch수가 아님. 누적 epoch임)
    checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        file_path, save_weights_only=True, verbose=1, save_freq=1406 * 5)

    # training=True로 명시된 모델이므로, validation에서도 training=True가 적용된다.  keara구조에서는 control이 안된다.

    steps_per_epoch = len(train_input) // batch_size
    model.fit(train_dataset,
              initial_epoch=initial_epoch,
              epochs=num_epoch,
              verbose=1,
              steps_per_epoch=steps_per_epoch,
              validation_data=valid_dataset,
              validation_freq=1,
              callbacks=[checkpoint_callback])
Exemple #6
0
def test():
    inputs, word_to_index, index_to_word, VOCAB_SIZE, INPUT_LENGTH = load_data(
        hp, 'test_data.txt', train_flag=False)
    print(word_to_index)
    print('train_input: {}'.format(inputs.shape))
    print('INPUT_LENGTH: {}'.format(INPUT_LENGTH))

    raw_data = []
    pad_token = word_to_index[PAD]
    for i, d in enumerate(inputs):
        raw_data.append(''.join(
            [index_to_word[x] for x in d if x != pad_token]))

    print(raw_data)

    test_dataset = tf.data.Dataset.from_tensor_slices(
        inputs)  # 여기의 argument가 mapping_fn의 argument가 된다.
    test_dataset = test_dataset.batch(len(inputs), drop_remainder=False)

    ########## Hyper parameters
    hp['vocab_size'] = VOCAB_SIZE
    batch_size = hp['batch_size']
    encoder_length = INPUT_LENGTH  # = hp['MAX_LEN']
    decoder_length = 11
    num_layers = hp['num_layers']
    hidden_dim = hp['hidden_dim']
    dff = hp['dff']
    num_heads = hp['num_heads']
    drop_rate = hp['drop_rate']

    model_save_dir = hp['model_save_dir']

    transformer = Transformer(num_layers=num_layers,
                              d_model=hidden_dim,
                              num_heads=num_heads,
                              dff=dff,
                              input_vocab_size=VOCAB_SIZE,
                              target_vocab_size=VOCAB_SIZE,
                              pe_input=encoder_length,
                              pe_target=decoder_length,
                              rate=drop_rate)

    encoder_inputs = tf.keras.Input(shape=(INPUT_LENGTH, ), dtype=tf.int32)
    decoder_inputs = tf.keras.Input(shape=(None, ), dtype=tf.int32)

    enc_padding_mask, combined_mask, dec_padding_mask = create_masks(
        encoder_inputs, decoder_inputs)

    predictions, _ = transformer(encoder_inputs,
                                 decoder_inputs,
                                 training=False,
                                 enc_padding_mask=enc_padding_mask,
                                 look_ahead_mask=combined_mask,
                                 dec_padding_mask=dec_padding_mask)

    model = tf.keras.Model([encoder_inputs, decoder_inputs], predictions)
    print(model.summary())

    latest = tf.train.latest_checkpoint(
        model_save_dir)  # 단순히 checkpoint파일을 읽어서...
    if latest:
        model.load_weights(latest).expect_partial()
        print('Latest checkpoint restored!!', latest)
    else:
        print('No Model Found!!')
        exit()

    result_all = []
    end_token = word_to_index[END_TOKEN]

    @tf.function(experimental_relax_shapes=True)
    def evaluate(encoder_inputs, decoder_inputs):
        return model.predict([encoder_inputs, decoder_inputs])

    for _, encoder_inputs in enumerate(test_dataset):
        batch_size = len(encoder_inputs)
        decoder_inputs_init = np.array([word_to_index[GO_TOKEN]] *
                                       batch_size).reshape(-1, 1)
        decoder_inputs = tf.cast(decoder_inputs_init, tf.int32)
        for i in tf.range(decoder_length):

            #predictions = model.predict([encoder_inputs, decoder_inputs])   -----> 많이 느리다.
            predictions = model([encoder_inputs, decoder_inputs])

            predictions = tf.cast(tf.argmax(predictions, axis=-1), tf.int32)

            decoder_inputs = tf.concat([decoder_inputs_init, predictions],
                                       axis=-1)

        for p in predictions:
            output = ''.join(
                [index_to_word[x] for x in p.numpy() if x != end_token])
            result_all.append(output)

    for x, y in zip(raw_data, result_all):
        print('{:<30} ==> {}'.format(x, y))