Beispiel #1
0
    def __init__(self, corpus, **opts):
        self.corpus = corpus

        self.opts = opts

        self.global_step = get_or_create_global_step()
        self.increment_global_step_op = tf.assign(self.global_step, self.global_step + 1, name="increment_global_step")

        self.corpus_size = get_corpus_size(self.corpus["train"])
        self.corpus_size_valid = get_corpus_size(self.corpus["valid"])

        self.word2idx, self.idx2word = build_vocab(self.corpus["train"])
        self.vocab_size = len(self.word2idx)

        self.generator_template = tf.make_template(GENERATOR_PREFIX, generator)
        self.discriminator_template = tf.make_template(DISCRIMINATOR_PREFIX, discriminator)

        self.enqueue_data, _, source, target, sequence_length = \
            prepare_data(self.corpus["train"], self.word2idx, num_threads=7, **self.opts)

        # TODO: option to either do pretrain or just generate?
        self.g_tensors_pretrain = self.generator_template(
            source, target, sequence_length, self.vocab_size, **self.opts)

        self.enqueue_data_valid, self.input_ph, source_valid, target_valid, sequence_length_valid = \
            prepare_data(self.corpus["valid"], self.word2idx, num_threads=1, **self.opts)

        self.g_tensors_pretrain_valid = self.generator_template(
            source_valid, target_valid, sequence_length_valid, self.vocab_size, **self.opts)

        self.decoder_fn = prepare_custom_decoder(
            sequence_length, self.g_tensors_pretrain.embedding_matrix, self.g_tensors_pretrain.output_projections)

        self.g_tensors_fake = self.generator_template(
            source, target, sequence_length, self.vocab_size, decoder_fn=self.decoder_fn, **self.opts)

        self.g_tensors_fake_valid = self.generator_template(
            source_valid, target_valid, sequence_length_valid, self.vocab_size, decoder_fn=self.decoder_fn, **self.opts)

        # TODO: using the rnn outputs from pretraining as "real" instead of target embeddings (aka professor forcing)
        self.d_tensors_real = self.discriminator_template(
            self.g_tensors_pretrain.rnn_outputs, sequence_length, is_real=True, **self.opts)

        # TODO: check to see if sequence_length is correct
        self.d_tensors_fake = self.discriminator_template(
            self.g_tensors_fake.rnn_outputs, None, is_real=False, **self.opts)

        self.g_tvars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=GENERATOR_PREFIX)
        self.d_tvars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=DISCRIMINATOR_PREFIX)
Beispiel #2
0
    def __init__(self, corpus, **opts):
        self.corpus = corpus

        self.opts = opts

        self.global_step = get_or_create_global_step()
        self.increment_global_step_op = tf.assign(self.global_step, self.global_step + 1, name="increment_global_step")

        self.corpus_size = get_corpus_size(self.corpus["train"])
        self.corpus_size_valid = get_corpus_size(self.corpus["valid"])

        self.word2idx, self.idx2word = build_vocab(self.corpus["train"])
        self.vocab_size = len(self.word2idx)

        self.generator_template = tf.make_template(GENERATOR_PREFIX, generator)
        self.discriminator_template = tf.make_template(DISCRIMINATOR_PREFIX, discriminator)

        self.enqueue_data, _, source, target, sequence_length = \
            prepare_data(self.corpus["train"], self.word2idx, num_threads=7, **self.opts)

        # TODO: option to either do pretrain or just generate?
        self.g_tensors_pretrain = self.generator_template(
            source, target, sequence_length, self.vocab_size, **self.opts)

        self.enqueue_data_valid, self.input_ph, source_valid, target_valid, sequence_length_valid = \
            prepare_data(self.corpus["valid"], self.word2idx, num_threads=1, **self.opts)

        self.g_tensors_pretrain_valid = self.generator_template(
            source_valid, target_valid, sequence_length_valid, self.vocab_size, **self.opts)

        self.decoder_fn = prepare_custom_decoder(sequence_length)

        self.g_tensors_fake = self.generator_template(
            source, target, sequence_length, self.vocab_size, decoder_fn=self.decoder_fn, **self.opts)

        # TODO: using the rnn outputs from pretraining as "real" instead of target embeddings (aka professor forcing)
        self.d_tensors_real = self.discriminator_template(
            self.g_tensors_pretrain.rnn_outputs, sequence_length, is_real=True, **self.opts)

        # TODO: check to see if sequence_length is correct
        self.d_tensors_fake = self.discriminator_template(
            self.g_tensors_fake.rnn_outputs, None, is_real=False, **self.opts)

        self.g_tvars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=GENERATOR_PREFIX)
        self.d_tvars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=DISCRIMINATOR_PREFIX)
Beispiel #3
0
def main(args):
    # Create model directory
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    # Image preprocessing
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.033, 0.032, 0.033), (0.027, 0.027, 0.027))
    ])

    # Build vocab
    vocab = build_vocab(args.root_path, threshold=0)
    vocab_path = args.vocab_path
    with open(vocab_path, 'wb') as f:
        pickle.dump(vocab, f)
    len_vocab = vocab.idx
    print(vocab.idx2word)

    # Build data loader
    data_loader = get_loader(args.root_path,
                             vocab,
                             transform,
                             args.batch_size,
                             shuffle=True,
                             num_workers=args.num_workers)

    # Build the models
    encoder = ResNet(ResidualBlock, [3, 3, 3], args.embed_size)
    decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab),
                         args.num_layers)

    #Build atten models
    if torch.cuda.is_available():
        encoder.cuda(1)
        decoder.cuda(1)

    # Loss and Optimizer
    criterion = nn.CrossEntropyLoss()
    params = list(decoder.parameters()) + list(encoder.parameters())
    optimizer = torch.optim.Adam(params, lr=args.learning_rate)

    # Train the Models
    total_step = len(data_loader)
    for epoch in range(args.num_epochs):
        for i, (images, captions, lengths) in enumerate(data_loader):

            # make one hot
            # cap_ = torch.unsqueeze(captions,2)
            # one_hot_ = torch.FloatTensor(captions.size(0),captions.size(1),len_vocab).zero_()
            # one_hot_caption = one_hot_.scatter_(2, cap_, 1)

            # Set mini-batch dataset
            images = to_var(images)
            captions = to_var(captions)
            #captions_ = to_var(one_hot_caption)

            targets = pack_padded_sequence(captions, lengths,
                                           batch_first=True)[0]
            # Forward, Backward and Optimize
            optimizer.zero_grad()
            features = encoder(images)
            outputs = decoder(features, captions, lengths)

            captions = captions.view(-1)
            outputs = outputs.view(-1, len_vocab)

            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            #print(targets)
            #print(outputs)

            # Print log info
            if i % args.log_step == 0:
                print(
                    'Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Perplexity: %5.4f'
                    % (epoch, args.num_epochs, i, total_step, loss.data[0],
                       np.exp(loss.data[0])))

                #test set accuracy
                #print(outputs.max(1)[1])
                outputs_np = outputs.max(1)[1].cpu().data.numpy()
                targets_np = targets.cpu().data.numpy()

                print(outputs_np)
                print(targets_np)

                location_match = 0
                size_match = 0
                shape_match = 0
                exact_match = 0
                for i in range(len(targets_np)):
                    if outputs_np[i] == targets_np[i]:
                        exact_match += 1
                    if i >= args.batch_size and i < args.batch_size * 2 and outputs_np[
                            i] == targets_np[i]:
                        shape_match += 1
                    elif i >= args.batch_size * 2 and i < args.batch_size * 3 and outputs_np[
                            i] == targets_np[i]:
                        location_match += 1
                    elif i >= args.batch_size * 3 and i < args.batch_size * 4 and outputs_np[
                            i] == targets_np[i]:
                        size_match += 1

                print(
                    'location match : %.4f, shape match : %.4f, exact_match: %.4f'
                    % (location_match / (args.batch_size), shape_match /
                       args.batch_size, exact_match / len(targets_np)))

            # Save the models
            if (i + 1) % args.save_step == 0:
                torch.save(
                    decoder.state_dict(),
                    os.path.join(args.model_path,
                                 'decoder-%d-%d.pkl' % (epoch + 1, i + 1)))
                torch.save(
                    encoder.state_dict(),
                    os.path.join(args.model_path,
                                 'encoder-%d-%d.pkl' % (epoch + 1, i + 1)))
Beispiel #4
0
    def __init__(self, dataset, opts, use_pretrained_embeddings=True):

        # TODO: Add Dropout layer later.
        self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                name="dropout_keep_prob")

        if use_pretrained_embeddings:
            word2vec = get_word2vec_model(WORD2VEC_PATH)
            word2idx, idx2word, label2idx, idx2label = build_vocab(
                dataset.training_files,
                dataset.vocab_file,
                word2vec,
                min_counts=opts['min_counts'])
            embedding_weights = get_embedding_weights(word2idx, word2vec)
            embedding_length = embedding_weights.shape[1]
            # TODO: embedding might be trainable.
            self.embeddings = tf.Variable(embedding_weights,
                                          dtype=tf.float32,
                                          trainable=False)
        else:
            word2idx, idx2word, label2idx, idx2label = build_vocab(
                dataset.training_files,
                dataset.vocab_file,
                min_counts=opts['min_counts'])
            embedding_length = opts['embedding_length']
            self.embeddings = tf.Variable(tf.random_uniform(
                [len(word2idx), embedding_length], -1.0, 1.0),
                                          dtype=tf.float32)

        self.sess = tf.Session()

        self.enqueue_data, self.source, self.target_word, self.label, \
            self.sequence_length = prepare_data(self.sess, dataset.training_files, word2idx, label2idx, **opts)

        self.target_words_embedded = tf.nn.embedding_lookup(
            self.embeddings, self.target_word)
        self.sentences_embedded = tf.nn.embedding_lookup(
            self.embeddings, self.source)

        hidden_unit_size = opts['hidden_unit_size']
        num_senses = len(label2idx)

        encoder_cell = LSTMCell(hidden_unit_size)

        (encoder_fw_outputs, encoder_bw_outputs), (encoder_fw_final_state, encoder_bw_final_state) = \
            tf.nn.bidirectional_dynamic_rnn(cell_fw=encoder_cell, cell_bw=encoder_cell, inputs=self.sentences_embedded,
                                            sequence_length=self.sequence_length, dtype=tf.float32, time_major=True)

        encoder_final_state_c = tf.concat(
            (encoder_fw_final_state.c, encoder_bw_final_state.c), 1)
        encoder_final_state_h = tf.concat(
            (encoder_fw_final_state.h, encoder_bw_final_state.h), 1)
        encoder_final_state = LSTMStateTuple(c=encoder_final_state_c,
                                             h=encoder_final_state_h)

        # self.encoder_target_embedding = encoder_final_state.c
        self.encoder_target_embedding = tf.concat(
            (encoder_final_state.c, self.target_words_embedded), 1)

        with tf.name_scope("output"):
            W = tf.Variable(tf.truncated_normal(
                [hidden_unit_size * 2 + embedding_length, num_senses],
                stddev=0.1),
                            name="W")
            b = tf.Variable(tf.constant(0.1, shape=[num_senses]), name="b")
            self.scores = tf.matmul(self.encoder_target_embedding, W) + b
            self.predictions = tf.argmax(self.scores, 1, name="predictions")

        with tf.name_scope('cross_entropy'):
            labels = tf.one_hot(self.label, num_senses)
            self.diff = tf.nn.softmax_cross_entropy_with_logits(
                labels=labels, logits=self.scores)

        with tf.name_scope('loss'):
            self.loss = tf.reduce_mean(self.diff)

        with tf.name_scope('train'):
            self.train_step = tf.train.AdamOptimizer(
                opts['learning_rate']).minimize(self.loss)

        with tf.name_scope('accuracy'):
            with tf.name_scope('correct_prediction'):
                correct_prediction = tf.equal(self.predictions,
                                              tf.argmax(labels, 1))
            with tf.name_scope('accuracy'):
                self.accuracy = tf.reduce_mean(
                    tf.cast(correct_prediction, tf.float32))

        self.sess.run(tf.global_variables_initializer())
Beispiel #5
0
def main():
    training_dataset = [[
        "안녕하세요, 제 이름은 윤주성입니다", "hello, my name is joosung yoon"
    ], ["저는 텐서플로우를 좋아합니다", "i like tensorflow"]]
    X_y_split = list(zip(*training_dataset))

    X_train_str = list(
        X_y_split[0])  # ['안녕하세요, 제 이름은 윤주성입니다', '저는 텐서플로우를 좋아합니다']
    y_train_str = list(
        X_y_split[1]
    )  # ['Hello, my name is joosung Yoon', 'I like TensorFlow']
    print(X_train_str)
    print(y_train_str)

    corpus = []
    corpus.extend(X_train_str)
    corpus.extend(
        y_train_str
    )  # ['안녕하세요, 제 이름은 윤주성입니다', '저는 텐서플로우를 좋아합니다', 'Hello, my name is joosung Yoon', 'I like TensorFlow']

    vocab = build_vocab(corpus)
    print(vocab.idx2word)

    max_sequence_len = 13

    X_train, _, _ = word_to_pad_word_ids(text_batch=X_train_str,
                                         vocab=vocab,
                                         maxlen=max_sequence_len,
                                         add_start_end_token=True)
    _, tar_inp, tar_real = word_to_pad_word_ids(
        text_batch=y_train_str,
        vocab=vocab,
        maxlen=max_sequence_len,
        add_start_end_token=True)  # add +1 maxlen for start, end token

    print(
        X_train
    )  # [[ 5  6  7  8  9 10 11 12 13 14  0  0  0  0  0], [15 16 17 18 19  0  0  0  0  0  0  0  0  0  0]]
    print(
        tar_inp
    )  # [[20  8 21 22 23 24 25  0  0  0  0  0  0  0  0], [26 27 28  0  0  0  0  0  0  0  0  0  0  0  0]]
    print(tar_real)

    print(decode_word_ids(X_train, vocab))
    # [['안녕/NNG', '하/XSV', '세요/EP+EF', ',/SC', '제/MM', '이름/NNG', '은/JX', '윤주/NNG', '성/XSN', '입니다/VCP+EC', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>'],
    # ['저/NP', '는/JX', '텐서플로우/NNP', '를/JKO', '좋아합니다/VV+EC', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']]

    config = {}
    config['vocab_size'] = len(vocab.idx2word)
    config['maxlen'] = max_sequence_len
    config['embed_dim'] = 100
    config['head_num'] = 5
    config['split_embed_dim'] = 20
    config['layer_num'] = 2
    config['feed_forward_dim'] = 100

    # define model
    model = Transformer(config=config)

    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
        from_logits=True, reduction='none')  # input label == index of class
    optimizer = tf.keras.optimizers.Adam()

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
        name='train_accuracy')

    test_loss = tf.keras.metrics.Mean(name='test_loss')
    test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
        name='test_accuracy')

    def loss_function(real, pred):
        mask = tf.math.logical_not(tf.math.equal(real, 0))  # padding 아닌건 1
        loss_ = loss_object(real, pred)

        mask = tf.cast(mask, dtype=loss_.dtype)
        loss_ *= mask  # 패딩이 아닌 1인 값은 살리고, 패딩인 값인 0인 값은 없앰

        return tf.reduce_mean(loss_)

    def create_padding_mask(seq):
        seq = tf.cast(tf.math.equal(seq, 0), tf.float32)

        # add extra dimensions so that we can add the padding
        # to the attention logits.
        return seq[:, tf.newaxis, tf.newaxis, :]  # (batch_size, 1, 1, seq_len)

    def create_look_ahead_mask(step_size):
        """
        - decoder에서 각 상태에 대한 self-attention이 inference step에 맞게 future token을 보지 못하게 해야됨
        - 각 step이 소유하고 있는 attention은 step개수 만큼임
        - future token보지 못하게 하려면 각 step에서 future step에 대해서 마스킹 해야함
        - 1 step에서는 나머지 n-1개 masking, 2번째 스텝에서는 앞에 두개 빼고 나머지 n-2개 마스킹
        - 이렇게 하면 역삼각형 모양의 마스킹 매트릭스가 나옴
        - step * step 을 대각선으로 나눈 모양임

        example)
        x = tf.random.uniform((1, 3))
        temp = create_look_ahead_mask(x.shape[1])
        temp:
        <tf.Tensor: id=311521, shape=(3, 3), dtype=float32, numpy=
        array([[ 0.,  1.,  1.],
               [ 0.,  0.,  1.],
               [ 0.,  0.,  0.]], dtype=float32)>

        Special usecase:
         tf.matrix_band_part(input, 0, -1) ==> Upper triangular part.
         tf.matrix_band_part(input, -1, 0) ==> Lower triangular part.
         tf.matrix_band_part(input, 0, 0) ==> Diagonal.
        :param step_size:
        :return:

        """
        mask = 1 - tf.linalg.band_part(tf.ones((step_size, step_size)), -1, 0)
        return mask  # (seq_len, seq_len)

    def create_masks(inp, tar):
        # Encoder padding mask
        enc_padding_mask = create_padding_mask(inp)

        # Used in the 2nd attention block in the decoder.
        # This padding mask is used to mask the encoder outputs.
        dec_padding_mask = create_padding_mask(inp)

        # Used in the 1st attention block in the decoder.
        # It is used to pad and mask future tokens in the input received by
        # the decoder.
        look_ahead_mask = create_look_ahead_mask(tf.shape(tar)[1])
        dec_target_padding_mask = create_padding_mask(tar)
        combined_mask = tf.maximum(dec_target_padding_mask, look_ahead_mask)

        return enc_padding_mask, combined_mask, dec_padding_mask

    # 세션 대신 tf.function() decorator로 파이썬 함수를 감싸면, 이 함수를 하나의 그래프로 실행하기 위해 JIT 컴파일함
    # tf.function()을 쓰면 eager mode -> graph mode 되는 것임
    # @tf.function
    def train_step(enc_input, tar_inp, tar_real):
        # tar_inp = label[:, :-1] # remove </s>
        # tar_real = label[:, 1:] # remove <s>

        enc_padding_mask, combined_mask, dec_padding_mask = create_masks(
            enc_input, tar_inp)

        with tf.GradientTape() as tape:
            predictions, attention_weights = model(enc_input, tar_inp, True,
                                                   enc_padding_mask,
                                                   combined_mask,
                                                   dec_padding_mask)
            loss = loss_function(tar_real,
                                 predictions)  # masking losses for padding

            predicted_id = tf.cast(tf.argmax(predictions, axis=-1),
                                   tf.int32).numpy()
            print("X_train: ", decode_word_ids(enc_input.numpy(), vocab))
            print("tar_inp: ", decode_word_ids(tar_inp.numpy(), vocab))
            print("tar_real: ", decode_word_ids(tar_real.numpy(), vocab))
            print("result: ", decode_word_ids(predicted_id, vocab))

        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        train_loss(loss)
        train_accuracy(tar_real, predictions)

    # @tf.function
    # def test_step(Y_test, label):
    #     predictions = model(Y_test)
    #     t_loss = loss_object(label, predictions)
    #
    #     test_loss(t_loss)
    #     test_accuracy(label, predictions)

    def plot_attention_weights(attention, sentence, result, layer):
        import matplotlib.pyplot as plt
        from matplotlib import font_manager, rc
        # print("font_list: ", font_manager.get_fontconfig_fonts())
        font_name = font_manager.FontProperties(
            fname='/Library/Fonts/NanumSquareBold.ttf').get_name()
        rc('font', family=font_name)

        fig = plt.figure(figsize=(16, 8))

        sentence, _, _ = word_to_pad_word_ids(
            text_batch=[sentence],
            vocab=vocab,
            maxlen=max_sequence_len,
            add_start_end_token=True)  #tokenizer_pt.encode(sentence)
        attention = tf.squeeze(attention[layer], axis=0)

        for head in range(attention.shape[0]):
            ax = fig.add_subplot(2, 4, head + 1)

            # plot the attention weights
            im = ax.matshow(
                attention[head][:, :],
                cmap='viridis')  # viridis  #plt.cm.Reds # plt.cm.Blues

            fontdict = {'fontsize': 10}

            ax.set_xticks(range(len(decode_word_ids(sentence, vocab)[0])))
            ax.set_yticks(range(len(decode_word_ids(result, vocab)[0])))

            from mpl_toolkits.axes_grid1 import make_axes_locatable
            divider = make_axes_locatable(ax)
            cax = divider.append_axes("right", size="5%", pad=0.05)
            plt.colorbar(im, cax=cax)
            ax.set_xticklabels(decode_word_ids(sentence, vocab)[0],
                               fontdict=fontdict,
                               rotation=90)

            ax.set_yticklabels(decode_word_ids(result, vocab)[0],
                               fontdict=fontdict)

            ax.set_xlabel('Head {}'.format(head + 1))

        plt.tight_layout()
        plt.show()

    def evaluate(inp_sentence, vocab, max_sequence_len):

        # inference 일때는 굳이 length를 +1 하지 않아도됨
        encoder_input, _, _ = word_to_pad_word_ids(text_batch=[inp_sentence],
                                                   vocab=vocab,
                                                   maxlen=max_sequence_len,
                                                   add_start_end_token=True)
        print("encoder_input: ", encoder_input)

        decoder_input = ['<s>']
        decoder_input = [vocab.word2idx[_] for _ in decoder_input]
        output = tf.expand_dims(decoder_input, 0)
        print("output: ", decode_word_ids(output.numpy(), vocab))

        for i in range(max_sequence_len):

            enc_padding_mask, combined_mask, dec_padding_mask = create_masks(
                encoder_input, output)

            # predictions.shape == (batch_size, seq_len, vocab_size)
            predictions, attention_weights = model(encoder_input, output,
                                                   False, enc_padding_mask,
                                                   combined_mask,
                                                   dec_padding_mask)

            # select the last word from the seq_len dimension
            print("predicted_id: ",
                  tf.cast(tf.argmax(predictions, axis=-1), tf.int32))
            predictions = predictions[:, -1:, :]  # (batch_size, 1, vocab_size)
            predicted_id = tf.cast(tf.argmax(predictions, axis=-1), tf.int32)

            # return the result if the predicted_id is equal to the end token
            if tf.equal(predicted_id, vocab.word2idx['</s>']):
                return tf.squeeze(output, axis=0), attention_weights

            # concatentate the predicted_id to the output which is given to the decoder
            # as its input.
            output = tf.concat([output, predicted_id], axis=-1)
            print("output: ", decode_word_ids(output.numpy(), vocab))

        return tf.squeeze(output, axis=0), attention_weights

    def translate(sentence, vocab, max_sequence_len, plot=''):
        result, attention_weights = evaluate(sentence, vocab, max_sequence_len)
        result = [result.numpy()]

        predicted_sentence = decode_word_ids(result, vocab)

        print('Input: {}'.format(sentence))
        print('Predicted translation: {}'.format(predicted_sentence))

        if plot:
            plot_attention_weights(attention_weights, sentence, result, plot)

    ### Training

    EPOCHS = 4000
    BATCH_SIZE = 45

    train_ds = tf.data.Dataset.from_tensor_slices((X_train, tar_inp, tar_real))
    train_ds = train_ds.repeat(EPOCHS).shuffle(1024).batch(BATCH_SIZE)
    train_ds = train_ds.prefetch(tf.data.experimental.AUTOTUNE)

    for step, (X_train_batch, tar_inp, tar_real) in enumerate(train_ds):
        train_step(X_train_batch, tar_inp, tar_real)

        template = 'Step {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
        print(
            template.format(step + 1, train_loss.result(),
                            train_accuracy.result() * 100, test_loss.result(),
                            test_accuracy.result() * 100))

    translate("안녕하세요, 제 이름은 윤주성입니다",
              vocab,
              max_sequence_len,
              plot='decoder_layer2_block2')

    model.summary()
Beispiel #6
0
    eval_data_dir = sys.argv[2]
    base_dir = sys.argv[3]
    window_size = sys.argv[4]
    train_ratio = sys.argv[5]
    vocab_dir = os.path.join(base_dir, 'vocab.txt')
    save_dir = os.path.join(base_dir, train_ratio + '/checkpoints/textrnn')
    save_path = os.path.join(save_dir, 'best_validation')  # 最佳验证结果保存路径
    window_size = int(window_size)
    train_ratio = float(train_ratio)

    print('Configuring RNN model...')
    print('Building vocab if not exists.')
    start_time_vocab = time.time()
    config = TRNNConfig()
    if not os.path.exists(vocab_dir):  # 如果不存在词汇表,重建
        build_vocab(train_data_dir, vocab_dir)
    categories, cat_to_id = read_category()
    words, word_to_id = read_vocab(vocab_dir)
    config.vocab_size = len(words)
    model = TextRNN(config)
    time_dif_vocab = get_time_dif(start_time_vocab)
    print("Time usage:", time_dif_vocab)

    #读取原始数据并转换成三个集合
    print("Processing and loading training and validation data...")
    start_time = time.time()
    x_train, x_val, x_test, y_train, y_val, y_test = process_all_file(
        train_data_dir, eval_data_dir, train_ratio, word_to_id, cat_to_id,
        config.seq_length, window_size)
    time_dif = get_time_dif(start_time)
    print("Time usage:", time_dif)
Beispiel #7
0
    save_dir_bak = 'checkpoints/textcnn_bak'
    save_path_bak = os.path.join(save_dir_bak, 'best_validation')  # 最佳验证结果保存路径

    save_dir = 'checkpoints/textcnn'
    save_path = os.path.join(save_dir, 'best_validation')

    print('Configuring CNN model...')
   
    normal_num = [690] * 3
    max_acc = 0
    greatest_normal_num = 0
    for i in normal_num:
        get_train_data(i)
        config = TCNNConfig()
        if not os.path.exists(vocab_dir):  # 如果不存在词汇表,重建
            build_vocab(train_dir, vocab_dir, config.vocab_size)
        categories, cat_to_id = read_category(train_dir)
        words, word_to_id = read_vocab(vocab_dir)
        config.vocab_size = len(words)
        config.num_classes = len(categories)
        model = TextCNN(config)
        # 训练模型并保存到bak
        train()

        print("Loading test data...")
        start_time = time.time()
        x_test, y_test = process_file(test_dir, word_to_id, cat_to_id, config.seq_length)

        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1) 
        session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
        
Beispiel #8
0
def main(args):

    # Image preprocessing
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.033, 0.032, 0.033), (0.027, 0.027, 0.027))
    ])

    vocab = build_vocab(args.root_path, threshold=0)
    num_class = 9

    # Build data loader
    data_loader = get_loader(args.root_path,
                             vocab,
                             transform,
                             args.batch_size,
                             shuffle=True,
                             num_workers=args.num_workers)

    # Build the models
    cnn = ResNet(ResidualBlock, [3, 3, 3], num_class)

    if torch.cuda.is_available():
        cnn.cuda(1)

    # Loss and Optimizer
    criterion = nn.CrossEntropyLoss()
    params = list(cnn.parameters())
    optimizer = torch.optim.Adam(params, lr=args.learning_rate)

    # Train the Models
    total_step = len(data_loader)
    for epoch in range(args.num_epochs):
        for i, (images, captions, lengths) in enumerate(data_loader):

            #if i > 1 :
            #  break;
            idx_arr = []
            for element in captions[:, 1]:
                idx_arr.append(int(vocab.idx2word[element]) - 1)
            temp_arr = np.array(idx_arr)
            trg_arr = torch.from_numpy(temp_arr)
            target = to_var(trg_arr)
            images = to_var(images)

            optimizer.zero_grad()
            features = cnn(images)
            loss = criterion(features, target)
            loss.backward()
            optimizer.step()

            # Print log info
            if i % args.log_step == 0:
                print(
                    'Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Perplexity: %5.4f'
                    % (epoch, args.num_epochs, i, total_step, loss.data[0],
                       np.exp(loss.data[0])))

                #print(features)
                #print(target)

                ##test set accuracy
                #rearrange tensor to batch_size * caption_size
                re_target = rearrange_tensor(target, captions.size(0), 1)
                re_out_max = rearrange_tensor(
                    features.max(1)[1], captions.size(0), 1)
                #convert to numpy
                outputs_np = re_out_max.cpu().data.numpy()
                targets_np = re_target.cpu().data.numpy()

                location_match = 0
                for i in range(len(targets_np)):

                    if (outputs_np[i] == targets_np[i]):
                        location_match += 1
                print('location match accuracy: %.4f' %
                      (location_match / len(targets_np)))

    #test model
    print('---------------------------------')
    cnn.eval()
    test_loader = get_loader(args.test_path,
                             vocab,
                             transform,
                             args.batch_size,
                             shuffle=True,
                             num_workers=args.num_workers)
    for images, captions, lengths in test_loader:
        idx_arr = []
        for element in captions[:, 1]:
            idx_arr.append(int(vocab.idx2word[element]) - 1)
        temp_arr = np.array(idx_arr)
        trg_arr = torch.from_numpy(temp_arr)
        target = to_var(trg_arr)

        images = to_var(images)
        features = cnn(images)

        re_target = rearrange_tensor(target, captions.size(0), 1)
        re_out_max = rearrange_tensor(features.max(1)[1], captions.size(0), 1)
        #convert to numpy
        outputs_np = re_out_max.cpu().data.numpy()
        targets_np = re_target.cpu().data.numpy()

        location_match = 0
        for i in range(len(targets_np)):
            if (outputs_np[i] == targets_np[i]):
                location_match += 1
        print('location match accuracy: %.4f' %
              (location_match / len(targets_np)))
    test_txt_dirs = txt_dirs[int(len(txt_dirs) * 0.9):]

    # print "载入测试样本..."
    # test_txt_dirs = list()
    # # test_data_dir = '/home/abc/ssd/pzw/nlp/data/0523/word_sep_test/'
    # test_data_dir = '/home/abc/ssd/pzw/nlp/data/test_data3/'
    # # test_data_dir = '/home/zhwpeng/abc/nlp/data/0324/word_sep_test/'
    # # test_data_dir = '/home/zhwpeng/abc/text_classify/data/0412/raw/test_data3/'
    # for fold in glob(test_data_dir + '*'):
    #     test_txt_dirs = test_txt_dirs + glob(fold + '/*.txt')
    #     # test_txt_dirs = test_txt_dirs + glob(fold + '/*.txt')[:10]
    # # print "测试集样本总数是{}".format(len(test_txt_dirs))
    # np.random.shuffle(test_txt_dirs)

    print "配置CNN模型..."
    config = TCNNConfig()
    seq_length = config.seq_length
    if not os.path.exists(vocab_dir):  # 如果不存在词汇表,重建
        build_vocab(txt_dirs, seq_length, vocab_dir, config.vocab_size)
    categories, cat_to_id = read_category(types)
    words, word_to_id = read_vocab(vocab_dir)
    config.vocab_size = len(words)
    model = TextCNN(config)

    if sys.argv[1] == 'train':
        print "开始训练..."
        train()
    else:
        print "开始测试..."
        model_tes_t(test_txt_dirs, train_flag=False)