def __init__(self, corpus, **opts): self.corpus = corpus self.opts = opts self.global_step = get_or_create_global_step() self.increment_global_step_op = tf.assign(self.global_step, self.global_step + 1, name="increment_global_step") self.corpus_size = get_corpus_size(self.corpus["train"]) self.corpus_size_valid = get_corpus_size(self.corpus["valid"]) self.word2idx, self.idx2word = build_vocab(self.corpus["train"]) self.vocab_size = len(self.word2idx) self.generator_template = tf.make_template(GENERATOR_PREFIX, generator) self.discriminator_template = tf.make_template(DISCRIMINATOR_PREFIX, discriminator) self.enqueue_data, _, source, target, sequence_length = \ prepare_data(self.corpus["train"], self.word2idx, num_threads=7, **self.opts) # TODO: option to either do pretrain or just generate? self.g_tensors_pretrain = self.generator_template( source, target, sequence_length, self.vocab_size, **self.opts) self.enqueue_data_valid, self.input_ph, source_valid, target_valid, sequence_length_valid = \ prepare_data(self.corpus["valid"], self.word2idx, num_threads=1, **self.opts) self.g_tensors_pretrain_valid = self.generator_template( source_valid, target_valid, sequence_length_valid, self.vocab_size, **self.opts) self.decoder_fn = prepare_custom_decoder( sequence_length, self.g_tensors_pretrain.embedding_matrix, self.g_tensors_pretrain.output_projections) self.g_tensors_fake = self.generator_template( source, target, sequence_length, self.vocab_size, decoder_fn=self.decoder_fn, **self.opts) self.g_tensors_fake_valid = self.generator_template( source_valid, target_valid, sequence_length_valid, self.vocab_size, decoder_fn=self.decoder_fn, **self.opts) # TODO: using the rnn outputs from pretraining as "real" instead of target embeddings (aka professor forcing) self.d_tensors_real = self.discriminator_template( self.g_tensors_pretrain.rnn_outputs, sequence_length, is_real=True, **self.opts) # TODO: check to see if sequence_length is correct self.d_tensors_fake = self.discriminator_template( self.g_tensors_fake.rnn_outputs, None, is_real=False, **self.opts) self.g_tvars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=GENERATOR_PREFIX) self.d_tvars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=DISCRIMINATOR_PREFIX)
def __init__(self, corpus, **opts): self.corpus = corpus self.opts = opts self.global_step = get_or_create_global_step() self.increment_global_step_op = tf.assign(self.global_step, self.global_step + 1, name="increment_global_step") self.corpus_size = get_corpus_size(self.corpus["train"]) self.corpus_size_valid = get_corpus_size(self.corpus["valid"]) self.word2idx, self.idx2word = build_vocab(self.corpus["train"]) self.vocab_size = len(self.word2idx) self.generator_template = tf.make_template(GENERATOR_PREFIX, generator) self.discriminator_template = tf.make_template(DISCRIMINATOR_PREFIX, discriminator) self.enqueue_data, _, source, target, sequence_length = \ prepare_data(self.corpus["train"], self.word2idx, num_threads=7, **self.opts) # TODO: option to either do pretrain or just generate? self.g_tensors_pretrain = self.generator_template( source, target, sequence_length, self.vocab_size, **self.opts) self.enqueue_data_valid, self.input_ph, source_valid, target_valid, sequence_length_valid = \ prepare_data(self.corpus["valid"], self.word2idx, num_threads=1, **self.opts) self.g_tensors_pretrain_valid = self.generator_template( source_valid, target_valid, sequence_length_valid, self.vocab_size, **self.opts) self.decoder_fn = prepare_custom_decoder(sequence_length) self.g_tensors_fake = self.generator_template( source, target, sequence_length, self.vocab_size, decoder_fn=self.decoder_fn, **self.opts) # TODO: using the rnn outputs from pretraining as "real" instead of target embeddings (aka professor forcing) self.d_tensors_real = self.discriminator_template( self.g_tensors_pretrain.rnn_outputs, sequence_length, is_real=True, **self.opts) # TODO: check to see if sequence_length is correct self.d_tensors_fake = self.discriminator_template( self.g_tensors_fake.rnn_outputs, None, is_real=False, **self.opts) self.g_tvars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=GENERATOR_PREFIX) self.d_tvars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=DISCRIMINATOR_PREFIX)
def main(args): # Create model directory if not os.path.exists(args.model_path): os.makedirs(args.model_path) # Image preprocessing transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.033, 0.032, 0.033), (0.027, 0.027, 0.027)) ]) # Build vocab vocab = build_vocab(args.root_path, threshold=0) vocab_path = args.vocab_path with open(vocab_path, 'wb') as f: pickle.dump(vocab, f) len_vocab = vocab.idx print(vocab.idx2word) # Build data loader data_loader = get_loader(args.root_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) # Build the models encoder = ResNet(ResidualBlock, [3, 3, 3], args.embed_size) decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab), args.num_layers) #Build atten models if torch.cuda.is_available(): encoder.cuda(1) decoder.cuda(1) # Loss and Optimizer criterion = nn.CrossEntropyLoss() params = list(decoder.parameters()) + list(encoder.parameters()) optimizer = torch.optim.Adam(params, lr=args.learning_rate) # Train the Models total_step = len(data_loader) for epoch in range(args.num_epochs): for i, (images, captions, lengths) in enumerate(data_loader): # make one hot # cap_ = torch.unsqueeze(captions,2) # one_hot_ = torch.FloatTensor(captions.size(0),captions.size(1),len_vocab).zero_() # one_hot_caption = one_hot_.scatter_(2, cap_, 1) # Set mini-batch dataset images = to_var(images) captions = to_var(captions) #captions_ = to_var(one_hot_caption) targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] # Forward, Backward and Optimize optimizer.zero_grad() features = encoder(images) outputs = decoder(features, captions, lengths) captions = captions.view(-1) outputs = outputs.view(-1, len_vocab) loss = criterion(outputs, targets) loss.backward() optimizer.step() #print(targets) #print(outputs) # Print log info if i % args.log_step == 0: print( 'Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Perplexity: %5.4f' % (epoch, args.num_epochs, i, total_step, loss.data[0], np.exp(loss.data[0]))) #test set accuracy #print(outputs.max(1)[1]) outputs_np = outputs.max(1)[1].cpu().data.numpy() targets_np = targets.cpu().data.numpy() print(outputs_np) print(targets_np) location_match = 0 size_match = 0 shape_match = 0 exact_match = 0 for i in range(len(targets_np)): if outputs_np[i] == targets_np[i]: exact_match += 1 if i >= args.batch_size and i < args.batch_size * 2 and outputs_np[ i] == targets_np[i]: shape_match += 1 elif i >= args.batch_size * 2 and i < args.batch_size * 3 and outputs_np[ i] == targets_np[i]: location_match += 1 elif i >= args.batch_size * 3 and i < args.batch_size * 4 and outputs_np[ i] == targets_np[i]: size_match += 1 print( 'location match : %.4f, shape match : %.4f, exact_match: %.4f' % (location_match / (args.batch_size), shape_match / args.batch_size, exact_match / len(targets_np))) # Save the models if (i + 1) % args.save_step == 0: torch.save( decoder.state_dict(), os.path.join(args.model_path, 'decoder-%d-%d.pkl' % (epoch + 1, i + 1))) torch.save( encoder.state_dict(), os.path.join(args.model_path, 'encoder-%d-%d.pkl' % (epoch + 1, i + 1)))
def __init__(self, dataset, opts, use_pretrained_embeddings=True): # TODO: Add Dropout layer later. self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") if use_pretrained_embeddings: word2vec = get_word2vec_model(WORD2VEC_PATH) word2idx, idx2word, label2idx, idx2label = build_vocab( dataset.training_files, dataset.vocab_file, word2vec, min_counts=opts['min_counts']) embedding_weights = get_embedding_weights(word2idx, word2vec) embedding_length = embedding_weights.shape[1] # TODO: embedding might be trainable. self.embeddings = tf.Variable(embedding_weights, dtype=tf.float32, trainable=False) else: word2idx, idx2word, label2idx, idx2label = build_vocab( dataset.training_files, dataset.vocab_file, min_counts=opts['min_counts']) embedding_length = opts['embedding_length'] self.embeddings = tf.Variable(tf.random_uniform( [len(word2idx), embedding_length], -1.0, 1.0), dtype=tf.float32) self.sess = tf.Session() self.enqueue_data, self.source, self.target_word, self.label, \ self.sequence_length = prepare_data(self.sess, dataset.training_files, word2idx, label2idx, **opts) self.target_words_embedded = tf.nn.embedding_lookup( self.embeddings, self.target_word) self.sentences_embedded = tf.nn.embedding_lookup( self.embeddings, self.source) hidden_unit_size = opts['hidden_unit_size'] num_senses = len(label2idx) encoder_cell = LSTMCell(hidden_unit_size) (encoder_fw_outputs, encoder_bw_outputs), (encoder_fw_final_state, encoder_bw_final_state) = \ tf.nn.bidirectional_dynamic_rnn(cell_fw=encoder_cell, cell_bw=encoder_cell, inputs=self.sentences_embedded, sequence_length=self.sequence_length, dtype=tf.float32, time_major=True) encoder_final_state_c = tf.concat( (encoder_fw_final_state.c, encoder_bw_final_state.c), 1) encoder_final_state_h = tf.concat( (encoder_fw_final_state.h, encoder_bw_final_state.h), 1) encoder_final_state = LSTMStateTuple(c=encoder_final_state_c, h=encoder_final_state_h) # self.encoder_target_embedding = encoder_final_state.c self.encoder_target_embedding = tf.concat( (encoder_final_state.c, self.target_words_embedded), 1) with tf.name_scope("output"): W = tf.Variable(tf.truncated_normal( [hidden_unit_size * 2 + embedding_length, num_senses], stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[num_senses]), name="b") self.scores = tf.matmul(self.encoder_target_embedding, W) + b self.predictions = tf.argmax(self.scores, 1, name="predictions") with tf.name_scope('cross_entropy'): labels = tf.one_hot(self.label, num_senses) self.diff = tf.nn.softmax_cross_entropy_with_logits( labels=labels, logits=self.scores) with tf.name_scope('loss'): self.loss = tf.reduce_mean(self.diff) with tf.name_scope('train'): self.train_step = tf.train.AdamOptimizer( opts['learning_rate']).minimize(self.loss) with tf.name_scope('accuracy'): with tf.name_scope('correct_prediction'): correct_prediction = tf.equal(self.predictions, tf.argmax(labels, 1)) with tf.name_scope('accuracy'): self.accuracy = tf.reduce_mean( tf.cast(correct_prediction, tf.float32)) self.sess.run(tf.global_variables_initializer())
def main(): training_dataset = [[ "안녕하세요, 제 이름은 윤주성입니다", "hello, my name is joosung yoon" ], ["저는 텐서플로우를 좋아합니다", "i like tensorflow"]] X_y_split = list(zip(*training_dataset)) X_train_str = list( X_y_split[0]) # ['안녕하세요, 제 이름은 윤주성입니다', '저는 텐서플로우를 좋아합니다'] y_train_str = list( X_y_split[1] ) # ['Hello, my name is joosung Yoon', 'I like TensorFlow'] print(X_train_str) print(y_train_str) corpus = [] corpus.extend(X_train_str) corpus.extend( y_train_str ) # ['안녕하세요, 제 이름은 윤주성입니다', '저는 텐서플로우를 좋아합니다', 'Hello, my name is joosung Yoon', 'I like TensorFlow'] vocab = build_vocab(corpus) print(vocab.idx2word) max_sequence_len = 13 X_train, _, _ = word_to_pad_word_ids(text_batch=X_train_str, vocab=vocab, maxlen=max_sequence_len, add_start_end_token=True) _, tar_inp, tar_real = word_to_pad_word_ids( text_batch=y_train_str, vocab=vocab, maxlen=max_sequence_len, add_start_end_token=True) # add +1 maxlen for start, end token print( X_train ) # [[ 5 6 7 8 9 10 11 12 13 14 0 0 0 0 0], [15 16 17 18 19 0 0 0 0 0 0 0 0 0 0]] print( tar_inp ) # [[20 8 21 22 23 24 25 0 0 0 0 0 0 0 0], [26 27 28 0 0 0 0 0 0 0 0 0 0 0 0]] print(tar_real) print(decode_word_ids(X_train, vocab)) # [['안녕/NNG', '하/XSV', '세요/EP+EF', ',/SC', '제/MM', '이름/NNG', '은/JX', '윤주/NNG', '성/XSN', '입니다/VCP+EC', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>'], # ['저/NP', '는/JX', '텐서플로우/NNP', '를/JKO', '좋아합니다/VV+EC', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']] config = {} config['vocab_size'] = len(vocab.idx2word) config['maxlen'] = max_sequence_len config['embed_dim'] = 100 config['head_num'] = 5 config['split_embed_dim'] = 20 config['layer_num'] = 2 config['feed_forward_dim'] = 100 # define model model = Transformer(config=config) loss_object = tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True, reduction='none') # input label == index of class optimizer = tf.keras.optimizers.Adam() train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( name='train_accuracy') test_loss = tf.keras.metrics.Mean(name='test_loss') test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( name='test_accuracy') def loss_function(real, pred): mask = tf.math.logical_not(tf.math.equal(real, 0)) # padding 아닌건 1 loss_ = loss_object(real, pred) mask = tf.cast(mask, dtype=loss_.dtype) loss_ *= mask # 패딩이 아닌 1인 값은 살리고, 패딩인 값인 0인 값은 없앰 return tf.reduce_mean(loss_) def create_padding_mask(seq): seq = tf.cast(tf.math.equal(seq, 0), tf.float32) # add extra dimensions so that we can add the padding # to the attention logits. return seq[:, tf.newaxis, tf.newaxis, :] # (batch_size, 1, 1, seq_len) def create_look_ahead_mask(step_size): """ - decoder에서 각 상태에 대한 self-attention이 inference step에 맞게 future token을 보지 못하게 해야됨 - 각 step이 소유하고 있는 attention은 step개수 만큼임 - future token보지 못하게 하려면 각 step에서 future step에 대해서 마스킹 해야함 - 1 step에서는 나머지 n-1개 masking, 2번째 스텝에서는 앞에 두개 빼고 나머지 n-2개 마스킹 - 이렇게 하면 역삼각형 모양의 마스킹 매트릭스가 나옴 - step * step 을 대각선으로 나눈 모양임 example) x = tf.random.uniform((1, 3)) temp = create_look_ahead_mask(x.shape[1]) temp: <tf.Tensor: id=311521, shape=(3, 3), dtype=float32, numpy= array([[ 0., 1., 1.], [ 0., 0., 1.], [ 0., 0., 0.]], dtype=float32)> Special usecase: tf.matrix_band_part(input, 0, -1) ==> Upper triangular part. tf.matrix_band_part(input, -1, 0) ==> Lower triangular part. tf.matrix_band_part(input, 0, 0) ==> Diagonal. :param step_size: :return: """ mask = 1 - tf.linalg.band_part(tf.ones((step_size, step_size)), -1, 0) return mask # (seq_len, seq_len) def create_masks(inp, tar): # Encoder padding mask enc_padding_mask = create_padding_mask(inp) # Used in the 2nd attention block in the decoder. # This padding mask is used to mask the encoder outputs. dec_padding_mask = create_padding_mask(inp) # Used in the 1st attention block in the decoder. # It is used to pad and mask future tokens in the input received by # the decoder. look_ahead_mask = create_look_ahead_mask(tf.shape(tar)[1]) dec_target_padding_mask = create_padding_mask(tar) combined_mask = tf.maximum(dec_target_padding_mask, look_ahead_mask) return enc_padding_mask, combined_mask, dec_padding_mask # 세션 대신 tf.function() decorator로 파이썬 함수를 감싸면, 이 함수를 하나의 그래프로 실행하기 위해 JIT 컴파일함 # tf.function()을 쓰면 eager mode -> graph mode 되는 것임 # @tf.function def train_step(enc_input, tar_inp, tar_real): # tar_inp = label[:, :-1] # remove </s> # tar_real = label[:, 1:] # remove <s> enc_padding_mask, combined_mask, dec_padding_mask = create_masks( enc_input, tar_inp) with tf.GradientTape() as tape: predictions, attention_weights = model(enc_input, tar_inp, True, enc_padding_mask, combined_mask, dec_padding_mask) loss = loss_function(tar_real, predictions) # masking losses for padding predicted_id = tf.cast(tf.argmax(predictions, axis=-1), tf.int32).numpy() print("X_train: ", decode_word_ids(enc_input.numpy(), vocab)) print("tar_inp: ", decode_word_ids(tar_inp.numpy(), vocab)) print("tar_real: ", decode_word_ids(tar_real.numpy(), vocab)) print("result: ", decode_word_ids(predicted_id, vocab)) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) train_loss(loss) train_accuracy(tar_real, predictions) # @tf.function # def test_step(Y_test, label): # predictions = model(Y_test) # t_loss = loss_object(label, predictions) # # test_loss(t_loss) # test_accuracy(label, predictions) def plot_attention_weights(attention, sentence, result, layer): import matplotlib.pyplot as plt from matplotlib import font_manager, rc # print("font_list: ", font_manager.get_fontconfig_fonts()) font_name = font_manager.FontProperties( fname='/Library/Fonts/NanumSquareBold.ttf').get_name() rc('font', family=font_name) fig = plt.figure(figsize=(16, 8)) sentence, _, _ = word_to_pad_word_ids( text_batch=[sentence], vocab=vocab, maxlen=max_sequence_len, add_start_end_token=True) #tokenizer_pt.encode(sentence) attention = tf.squeeze(attention[layer], axis=0) for head in range(attention.shape[0]): ax = fig.add_subplot(2, 4, head + 1) # plot the attention weights im = ax.matshow( attention[head][:, :], cmap='viridis') # viridis #plt.cm.Reds # plt.cm.Blues fontdict = {'fontsize': 10} ax.set_xticks(range(len(decode_word_ids(sentence, vocab)[0]))) ax.set_yticks(range(len(decode_word_ids(result, vocab)[0]))) from mpl_toolkits.axes_grid1 import make_axes_locatable divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.05) plt.colorbar(im, cax=cax) ax.set_xticklabels(decode_word_ids(sentence, vocab)[0], fontdict=fontdict, rotation=90) ax.set_yticklabels(decode_word_ids(result, vocab)[0], fontdict=fontdict) ax.set_xlabel('Head {}'.format(head + 1)) plt.tight_layout() plt.show() def evaluate(inp_sentence, vocab, max_sequence_len): # inference 일때는 굳이 length를 +1 하지 않아도됨 encoder_input, _, _ = word_to_pad_word_ids(text_batch=[inp_sentence], vocab=vocab, maxlen=max_sequence_len, add_start_end_token=True) print("encoder_input: ", encoder_input) decoder_input = ['<s>'] decoder_input = [vocab.word2idx[_] for _ in decoder_input] output = tf.expand_dims(decoder_input, 0) print("output: ", decode_word_ids(output.numpy(), vocab)) for i in range(max_sequence_len): enc_padding_mask, combined_mask, dec_padding_mask = create_masks( encoder_input, output) # predictions.shape == (batch_size, seq_len, vocab_size) predictions, attention_weights = model(encoder_input, output, False, enc_padding_mask, combined_mask, dec_padding_mask) # select the last word from the seq_len dimension print("predicted_id: ", tf.cast(tf.argmax(predictions, axis=-1), tf.int32)) predictions = predictions[:, -1:, :] # (batch_size, 1, vocab_size) predicted_id = tf.cast(tf.argmax(predictions, axis=-1), tf.int32) # return the result if the predicted_id is equal to the end token if tf.equal(predicted_id, vocab.word2idx['</s>']): return tf.squeeze(output, axis=0), attention_weights # concatentate the predicted_id to the output which is given to the decoder # as its input. output = tf.concat([output, predicted_id], axis=-1) print("output: ", decode_word_ids(output.numpy(), vocab)) return tf.squeeze(output, axis=0), attention_weights def translate(sentence, vocab, max_sequence_len, plot=''): result, attention_weights = evaluate(sentence, vocab, max_sequence_len) result = [result.numpy()] predicted_sentence = decode_word_ids(result, vocab) print('Input: {}'.format(sentence)) print('Predicted translation: {}'.format(predicted_sentence)) if plot: plot_attention_weights(attention_weights, sentence, result, plot) ### Training EPOCHS = 4000 BATCH_SIZE = 45 train_ds = tf.data.Dataset.from_tensor_slices((X_train, tar_inp, tar_real)) train_ds = train_ds.repeat(EPOCHS).shuffle(1024).batch(BATCH_SIZE) train_ds = train_ds.prefetch(tf.data.experimental.AUTOTUNE) for step, (X_train_batch, tar_inp, tar_real) in enumerate(train_ds): train_step(X_train_batch, tar_inp, tar_real) template = 'Step {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}' print( template.format(step + 1, train_loss.result(), train_accuracy.result() * 100, test_loss.result(), test_accuracy.result() * 100)) translate("안녕하세요, 제 이름은 윤주성입니다", vocab, max_sequence_len, plot='decoder_layer2_block2') model.summary()
eval_data_dir = sys.argv[2] base_dir = sys.argv[3] window_size = sys.argv[4] train_ratio = sys.argv[5] vocab_dir = os.path.join(base_dir, 'vocab.txt') save_dir = os.path.join(base_dir, train_ratio + '/checkpoints/textrnn') save_path = os.path.join(save_dir, 'best_validation') # 最佳验证结果保存路径 window_size = int(window_size) train_ratio = float(train_ratio) print('Configuring RNN model...') print('Building vocab if not exists.') start_time_vocab = time.time() config = TRNNConfig() if not os.path.exists(vocab_dir): # 如果不存在词汇表,重建 build_vocab(train_data_dir, vocab_dir) categories, cat_to_id = read_category() words, word_to_id = read_vocab(vocab_dir) config.vocab_size = len(words) model = TextRNN(config) time_dif_vocab = get_time_dif(start_time_vocab) print("Time usage:", time_dif_vocab) #读取原始数据并转换成三个集合 print("Processing and loading training and validation data...") start_time = time.time() x_train, x_val, x_test, y_train, y_val, y_test = process_all_file( train_data_dir, eval_data_dir, train_ratio, word_to_id, cat_to_id, config.seq_length, window_size) time_dif = get_time_dif(start_time) print("Time usage:", time_dif)
save_dir_bak = 'checkpoints/textcnn_bak' save_path_bak = os.path.join(save_dir_bak, 'best_validation') # 最佳验证结果保存路径 save_dir = 'checkpoints/textcnn' save_path = os.path.join(save_dir, 'best_validation') print('Configuring CNN model...') normal_num = [690] * 3 max_acc = 0 greatest_normal_num = 0 for i in normal_num: get_train_data(i) config = TCNNConfig() if not os.path.exists(vocab_dir): # 如果不存在词汇表,重建 build_vocab(train_dir, vocab_dir, config.vocab_size) categories, cat_to_id = read_category(train_dir) words, word_to_id = read_vocab(vocab_dir) config.vocab_size = len(words) config.num_classes = len(categories) model = TextCNN(config) # 训练模型并保存到bak train() print("Loading test data...") start_time = time.time() x_test, y_test = process_file(test_dir, word_to_id, cat_to_id, config.seq_length) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1) session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
def main(args): # Image preprocessing transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.033, 0.032, 0.033), (0.027, 0.027, 0.027)) ]) vocab = build_vocab(args.root_path, threshold=0) num_class = 9 # Build data loader data_loader = get_loader(args.root_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) # Build the models cnn = ResNet(ResidualBlock, [3, 3, 3], num_class) if torch.cuda.is_available(): cnn.cuda(1) # Loss and Optimizer criterion = nn.CrossEntropyLoss() params = list(cnn.parameters()) optimizer = torch.optim.Adam(params, lr=args.learning_rate) # Train the Models total_step = len(data_loader) for epoch in range(args.num_epochs): for i, (images, captions, lengths) in enumerate(data_loader): #if i > 1 : # break; idx_arr = [] for element in captions[:, 1]: idx_arr.append(int(vocab.idx2word[element]) - 1) temp_arr = np.array(idx_arr) trg_arr = torch.from_numpy(temp_arr) target = to_var(trg_arr) images = to_var(images) optimizer.zero_grad() features = cnn(images) loss = criterion(features, target) loss.backward() optimizer.step() # Print log info if i % args.log_step == 0: print( 'Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Perplexity: %5.4f' % (epoch, args.num_epochs, i, total_step, loss.data[0], np.exp(loss.data[0]))) #print(features) #print(target) ##test set accuracy #rearrange tensor to batch_size * caption_size re_target = rearrange_tensor(target, captions.size(0), 1) re_out_max = rearrange_tensor( features.max(1)[1], captions.size(0), 1) #convert to numpy outputs_np = re_out_max.cpu().data.numpy() targets_np = re_target.cpu().data.numpy() location_match = 0 for i in range(len(targets_np)): if (outputs_np[i] == targets_np[i]): location_match += 1 print('location match accuracy: %.4f' % (location_match / len(targets_np))) #test model print('---------------------------------') cnn.eval() test_loader = get_loader(args.test_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) for images, captions, lengths in test_loader: idx_arr = [] for element in captions[:, 1]: idx_arr.append(int(vocab.idx2word[element]) - 1) temp_arr = np.array(idx_arr) trg_arr = torch.from_numpy(temp_arr) target = to_var(trg_arr) images = to_var(images) features = cnn(images) re_target = rearrange_tensor(target, captions.size(0), 1) re_out_max = rearrange_tensor(features.max(1)[1], captions.size(0), 1) #convert to numpy outputs_np = re_out_max.cpu().data.numpy() targets_np = re_target.cpu().data.numpy() location_match = 0 for i in range(len(targets_np)): if (outputs_np[i] == targets_np[i]): location_match += 1 print('location match accuracy: %.4f' % (location_match / len(targets_np)))
test_txt_dirs = txt_dirs[int(len(txt_dirs) * 0.9):] # print "载入测试样本..." # test_txt_dirs = list() # # test_data_dir = '/home/abc/ssd/pzw/nlp/data/0523/word_sep_test/' # test_data_dir = '/home/abc/ssd/pzw/nlp/data/test_data3/' # # test_data_dir = '/home/zhwpeng/abc/nlp/data/0324/word_sep_test/' # # test_data_dir = '/home/zhwpeng/abc/text_classify/data/0412/raw/test_data3/' # for fold in glob(test_data_dir + '*'): # test_txt_dirs = test_txt_dirs + glob(fold + '/*.txt') # # test_txt_dirs = test_txt_dirs + glob(fold + '/*.txt')[:10] # # print "测试集样本总数是{}".format(len(test_txt_dirs)) # np.random.shuffle(test_txt_dirs) print "配置CNN模型..." config = TCNNConfig() seq_length = config.seq_length if not os.path.exists(vocab_dir): # 如果不存在词汇表,重建 build_vocab(txt_dirs, seq_length, vocab_dir, config.vocab_size) categories, cat_to_id = read_category(types) words, word_to_id = read_vocab(vocab_dir) config.vocab_size = len(words) model = TextCNN(config) if sys.argv[1] == 'train': print "开始训练..." train() else: print "开始测试..." model_tes_t(test_txt_dirs, train_flag=False)