Example #1
0
    def __init__(self, args):
        """
        model: 训练好的模型
        word2id: 词语到id的映射
        id2label: id到类别的映射
        """

        self.args = args
        self.rule = re.compile(r"[^\u4e00-\u9fa5]")
        self.cut = word_tokenize
        with open(self.args.vocab + '/' + 'word2id.pkl', 'rb') as f:
            print('Loading word2id...')
            self.word2id = pickle.load(f)
        with open(self.args.vocab + '/' + 'id2label.pkl', 'rb') as f:
            print('Loading id2label...')
            self.id2label = pickle.load(f)
        self.args.embed_num = len(self.word2id)
        self.args.class_num = len(self.id2label)
        print_parameters(self.args)
        self.model = TextCNN(args)
        if self.args.snapshot is not None:
            print('\nLoading model from %s...' % (self.args.snapshot))
            self.model.load_state_dict(torch.load(self.args.snapshot))

        if self.args.cuda:
            torch.cuda.set_device(self.args.device)
            self.model = self.model.cuda()
        self.model.eval()
Example #2
0
class Predictor():
    def __init__(self, args):
        """
        model: 训练好的模型
        word2id: 词语到id的映射
        id2label: id到类别的映射
        """

        self.args = args
        self.rule = re.compile(r"[^\u4e00-\u9fa5]")
        self.cut = word_tokenize
        with open(self.args.vocab + '/' + 'word2id.pkl', 'rb') as f:
            print('Loading word2id...')
            self.word2id = pickle.load(f)
        with open(self.args.vocab + '/' + 'id2label.pkl', 'rb') as f:
            print('Loading id2label...')
            self.id2label = pickle.load(f)
        self.args.embed_num = len(self.word2id)
        self.args.class_num = len(self.id2label)
        print_parameters(self.args)
        self.model = TextCNN(args)
        if self.args.snapshot is not None:
            print('\nLoading model from %s...' % (self.args.snapshot))
            self.model.load_state_dict(torch.load(self.args.snapshot))

        if self.args.cuda:
            torch.cuda.set_device(self.args.device)
            self.model = self.model.cuda()
        self.model.eval()

    def predict(self, text):
        """
        预测单个文本的类别
        :param text: 单个文本
        :return:
        """
        try:
            text = self.rule.sub('', text)
            text = self.cut(text)
            x = Variable(
                torch.LongTensor([[
                    self.word2id[word]
                    if word != '\x00' and word in self.word2id else 0
                    for word in text
                ]]))
            # print(id2label)
            if self.args.cuda:
                x = x.cuda()
            output = self.model(x)
            # prob = F.softmax(output, 1)
            # _, predicted = torch.max(prob, 1)
            _, predicted = torch.max(output, 1)
            return self.id2label[predicted.data[0]]
        except:
            return 'UNK'
Example #3
0
def get_model(model, Title):
    embedding_dim = len(Title.vocab.vectors[0])
    embedding_weight = Title.vocab.vectors

    if model == 'TextCNN':
        model = TextCNN.TextCNN(max_length=20,
                                vocab_size=len(Title.vocab),
                                embedding_dim=embedding_dim,
                                embedding_weight=embedding_weight,
                                output_size=10)
    elif model == 'LSTM':
        model = LSTM.LSTM(vocab_size=len(Title.vocab),
                          embedding_dim=embedding_dim,
                          embedding_weight=embedding_weight,
                          hidden_size=100,
                          num_layers=4,
                          output_size=10)
    elif model == 'BiLSTM_Attention':
        model = BiLSTM_Attention.BiLSTM_Attention(
            vocab_size=len(Title.vocab),
            embedding_dim=embedding_dim,
            embedding_weight=embedding_weight,
            hidden_size=100,
            num_layers=4,
            output_size=10)
    else:
        model = Fasttext.fasttext(vocab_size=len(Title.vocab),
                                  embedding_dim=embedding_dim,
                                  embedding_weight=embedding_weight,
                                  hidden_size=100,
                                  output_size=10)
    return model
Example #4
0
def train():
    x = tf.placeholder(tf.int32, [None, None], name='x')
    y = tf.placeholder(tf.int32, [None], name='y')
    lr = TextCNN.INIT_LEARNING_RATE
    embedding = tf.Variable(embedding_table, dtype=tf.float32, trainable=False)
    # embedding = tf.Variable(tf.random_uniform([TextCNN.VOCAB_SIZE, TextCNN.EMBED_FEATURE], -1.0, 1.0))
    input = tf.nn.embedding_lookup(embedding, x)

    model = TextCNN.TextCNN()
    logits_train = model.inference(input, Training=True)
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_train,
                                                          labels=y,
                                                          name='loss')
    loss_ = tf.reduce_mean(loss) + tf.nn.l2_loss(model.fc.get_weights()[0])
    train_op = tf.train.AdamOptimizer(lr).minimize(loss_)

    logits = model.inference(input)
    correct_pred = tf.equal(tf.argmax(logits, axis=1), tf.cast(y, tf.int64))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, dtype=tf.float32))

    sum_correct_pred = tf.reduce_sum(tf.cast(correct_pred, dtype=tf.float32))

    with tf.Session() as sess:
        init_op = tf.global_variables_initializer()
        sess.run(init_op)
        for epoch in range(TextCNN.EPOCH):
            for step, (x_, y_) in enumerate(
                    data_helper.batch_iter(x_train, y_train,
                                           TextCNN.BATCH_SIZE)):
                # print(sess.run(input , feed_dict={x:x_}))
                _ = sess.run(train_op, feed_dict={x: x_, y: y_})
                if step % 64 == 0:
                    # print(y_)
                    # print(sess.run(tf.argmax(logits, axis=1), feed_dict={x: x_}))
                    print('epoch :', epoch, 'step :', step, ' train_acc = ',
                          sess.run(accuracy, feed_dict={
                              x: x_,
                              y: y_
                          }))
            sum_ = 0
            for (x__, y__) in data_helper.batch_iter(x_test, y_test,
                                                     TextCNN.BATCH_SIZE):
                tmp = sess.run(sum_correct_pred, feed_dict={x: x__, y: y__})
                sum_ += tmp
            print('epoch ', epoch, 'acc = ', sum_ / len(y_test))
            if epoch % 30 == 0:
                lr /= 2
Example #5
0
        x_batch.extend([x_char, x_char_pad_idx])
    return x_batch, y_batch


with tf.Graph().as_default():
    session_conf = tf.ConfigProto(allow_soft_placement=True,
                                  log_device_placement=False)
    session_conf.gpu_options.allow_growth = True
    sess = tf.Session(config=session_conf)

    with sess.as_default():
        cnn = TextCNN(char_ngram_vocab_size=len(ngrams_dict) + 1,
                      word_ngram_vocab_size=len(words_dict) + 1,
                      char_vocab_size=len(chars_dict) + 1,
                      embedding_size=FLAGS["model.emb_dim"],
                      word_seq_len=FLAGS["data.max_len_words"],
                      char_seq_len=FLAGS["data.max_len_chars"],
                      l2_reg_lambda=FLAGS["train.l2_reg_lambda"],
                      mode=FLAGS["model.emb_mode"],
                      filter_sizes=list(
                          map(int, FLAGS["model.filter_sizes"].split(","))))

        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(FLAGS["train.lr"])
        grads_and_vars = optimizer.compute_gradients(cnn.loss)
        train_op = optimizer.apply_gradients(grads_and_vars,
                                             global_step=global_step)

        print("Writing to {}\n".format(FLAGS["log.output_dir"]))
        if not os.path.exists(FLAGS["log.output_dir"]):
            os.makedirs(FLAGS["log.output_dir"])
Example #6
0
    def __init__(self,
                 file_config,
                 config=None,
                 model=None,
                 corpus=None,
                 verbose=True,
                 opt_param=None):
        print('Loading data...')
        self.verbose = verbose
        if config is None:
            config = TCNNConfig()

        self.config = config
        self.model_file = "{}epc{}lr{}".format(
            datetime.datetime.now().strftime("%d%m%Y-%H%M%S"),
            self.config.num_epochs, self.config.learning_rate)
        self.file_config = file_config
        if corpus is None:
            corpus = TwitterHashtagCorpus(file_config.train_file,
                                          file_config.vocab_file,
                                          self.config.dev_split,
                                          self.config.seq_length,
                                          self.config.vocab_size)

    #DOING#####
        self.file_config.model_file = '{}/{}'.format(
            self.file_config.save_path, self.model_file)
        self.file_config.results_train = '{}/{}.epochs'.format(
            self.file_config.result_path, self.model_file)
        self.file_config.results_train_file = None
        ##########

        self.corpus = corpus
        self.config.vocab_size = len(self.corpus.words)
        self.config.target_names = self.corpus.label_to_id.keys()
        self.config.num_classes = len(self.corpus.label_to_id)
        self.train_data = TensorDataset(torch.LongTensor(self.corpus.x_train),
                                        torch.LongTensor(self.corpus.y_train))
        self.validation_data = TensorDataset(
            torch.LongTensor(self.corpus.x_validation),
            torch.LongTensor(self.corpus.y_validation))
        if corpus.x_test is not None:
            self.test_data = TensorDataset(
                torch.LongTensor(self.corpus.x_test),
                torch.LongTensor(self.corpus.y_test))
        print('Configuring CNN model...')
        if model is None:
            model = TextCNN(self.config)
        self.model = model
        if opt_param is None:
            opt_param = self.model.parameters()

        if self.verbose:
            print(self.corpus)
            print(self.model)

        if use_cuda:
            self.model.cuda()

        #Optimizer and Loss Function
        self.criterion = nn.CrossEntropyLoss(
            size_average=False)  # nn.MultiLabelSoftMarginLoss()
        self.optimizer = optim.Adam(opt_param, lr=self.config.learning_rate)
Example #7
0
    def train(self, bagging_iter):
        for model_idx in range(FLAGS.num_models):
            model_idx += 1
            start_time = time.time()
            graph = tf.Graph()
            with graph.as_default():
                session_conf = tf.ConfigProto()
                session_conf.gpu_options.allow_growth = True
                sess = tf.Session(config=session_conf)
                f_model_path = '../data/f_model/%sth_f_model/%sth_f_model.dat' % (
                    bagging_iter, model_idx)
                f = open(f_model_path, 'r')
                model = pickle.load(f)
                f.close()
                with sess.as_default():
                    cnn = mycnn.DepCNNv6(model_idx=model_idx,
                                         num_classes=model.type_size,
                                         vocab_size=model.mor_size + 1,
                                         pos_size=model.pos_size + 1,
                                         hc_size=model.hc_feature_size,
                                         embedding_size=FLAGS.embedding_dim,
                                         mlp_size=FLAGS.mlp_size,
                                         l2_reg_lambda=FLAGS.l2_reg_lambda)

                    # Define Training procedure
                    optimizer = tf.train.AdamOptimizer(0.0001)
                    grads_and_vars = optimizer.compute_gradients(cnn.loss)
                    train_op = optimizer.apply_gradients(grads_and_vars)

                    saver = tf.train.Saver(tf.global_variables(),
                                           max_to_keep=FLAGS.num_checkpoints)

                    # Initialize all variables
                    sess.run(tf.global_variables_initializer())

                    out_path = '../data/training_data/' + str(
                        bagging_iter) + 'th_out'
                    sample_list = dt.make_sample_list_from_input_data(
                        out_path + '/train' + str(model_idx) + '.out', model)

                    # print('데이터 로드 끝')
                    total_batch = len(sample_list) / FLAGS.batch_size
                    print('total_batch = ' + str(total_batch))
                    for ep in range(FLAGS.num_epochs):
                        # 학습 phase
                        # sample 섞기
                        random.shuffle(sample_list)
                        # batch 만큼 데이터 가져오기
                        batch_number = 0
                        tmp = 0
                        while True:
                            tmp += 1
                            sample_batch = sample_list[batch_number *
                                                       FLAGS.batch_size:
                                                       (batch_number + 1) *
                                                       FLAGS.batch_size]
                            batch_number += 1

                            x_mor, x_pos, x_left_mor, x_left_pos, x_right_mor, x_right_pos, \
                            x_position_mark_batch, x_child_mor, x_child_pos, x_hc_batch, y_batch\
                                = dt.convert_to_input_vector(sample_batch, model)

                            if sample_batch == []:
                                if batch_number * FLAGS.batch_size > len(
                                        sample_list):
                                    break
                                continue

                            self.train_step(cnn, sess, train_op, x_mor, x_pos,
                                            x_child_mor, x_child_pos,
                                            x_hc_batch, y_batch)
                            if (batch_number % (total_batch / 50)) == 0:
                                print('.'),
                            if batch_number * FLAGS.batch_size > len(
                                    sample_list):
                                break

                        # 테스트 phase
                        total_arc = 0
                        correct_arc = 0
                        correct_sentence = 0
                        total_sentence = 0
                        correct_arc_with_tag = 0
                        correct_sentence_with_tag = 0

                        cr_test = dt.CorpusReader()
                        cr_test.set_file(
                            '../data/raw_data/valid_data/sejong_test_edit_VV.txt'
                        )
                        parser = tp.TransitionParser(model, 1)
                        while True:
                            data = cr_test.get_next(1)

                            if data == []:
                                break
                            if data[0].raw_sentence is None:
                                continue
                            parser.initialize(data)
                            while parser.is_final_state() is False:
                                left_mor, left_pos, right_mor, right_pos, child_mor, child_pos, hc = parser.make_input_vector(
                                    data, mode='train')
                                x_mor = left_mor + right_mor
                                x_pos = left_pos + right_pos
                                hc = model.convert_to_zero_one(
                                    hc, model.hc_feature_size, mode='train')
                                next_action = self.test_step(
                                    cnn, graph, sess, np.array([x_mor]),
                                    np.array([x_pos]), np.array([child_mor]),
                                    np.array([child_pos]), np.array([hc]),
                                    model_idx)
                                next_action = next_action[0][0]
                                parser.run_action(next_action,
                                                  model,
                                                  mode='train')

                            # 성능 평가
                            predicts = parser.get_result('train')
                            golds = data[0].correct_dep_list

                            sentence_flag = True
                            sentence_with_tag_flag = True
                            for i in range(len(predicts)):
                                if predicts[i].head == golds[i].head:
                                    correct_arc += 1
                                    if predicts[i].type == golds[i].type:
                                        correct_arc_with_tag += 1
                                    else:
                                        sentence_with_tag_flag = False
                                else:
                                    sentence_flag = False
                                    sentence_with_tag_flag = False
                                total_arc += 1
                            if sentence_flag is True:
                                correct_sentence += 1
                            if sentence_with_tag_flag is True:
                                correct_sentence_with_tag += 1
                            total_sentence += 1

                            if (total_sentence % 300) == 0:
                                print('.'),

                        cr_test.close_file()
                        with open(
                                '../data/log/' + str(bagging_iter) +
                                'th_bagging_model.txt', 'a') as f:
                            if ep is 0:
                                print(
                                    '%sth_total_arc = %s %sth_total_sentence = %s'
                                    % (model_idx, total_arc, model_idx,
                                       total_sentence))
                                f.write(
                                    '%sth_total_arc = %s %sth_total_sentence = %s\n'
                                    % (model_idx, total_arc, model_idx,
                                       total_sentence))

                            if not os.path.isdir('../data/ckpt/%sth_ckpt' %
                                                 (bagging_iter)):
                                os.mkdir('../data/ckpt/%sth_ckpt' %
                                         (bagging_iter))

                            if (ep % FLAGS.num_checkpoints) == 0:
                                path = saver.save(
                                    sess, '../data/ckpt/%sth_ckpt/%sth_%s' %
                                    (bagging_iter, model_idx, ep))
                                f.write('\n')
                                f.write(
                                    "Saved model checkpoint to {}\n".format(
                                        path))
                                print("Saved model checkpoint to {}\n".format(
                                    path))

                            f.write(
                                str(model_idx) + 'th model result : ' +
                                'epoch = ' + str(ep + 1) + ', acc = ' +
                                str(correct_arc / float(total_arc)) +
                                ', sen_acc = ' +
                                str(correct_sentence / float(total_sentence)) +
                                ', acc_with_tag = ' +
                                str(correct_arc_with_tag / float(total_arc)) +
                                ', sen_acc_with_tag = ' +
                                str(correct_sentence_with_tag /
                                    float(total_sentence)))
                            print(
                                str(model_idx) + 'th model result : ',
                                'epoch = ' + str(ep + 1) + ', acc = ' +
                                str(correct_arc / float(total_arc)) +
                                ', sen_acc = ' +
                                str(correct_sentence / float(total_sentence)) +
                                ', acc_with_tag = ' +
                                str(correct_arc_with_tag / float(total_arc)) +
                                ', sen_acc_with_tag = ' +
                                str(correct_sentence_with_tag /
                                    float(total_sentence)))

                print(
                    str(bagging_iter) + '_bagging ',
                    str(model_idx) + 'th trainning time : ',
                    str(time.time() - start_time))
Example #8
0
# Training
# ==================================================

max_acc = 0.0

with tf.Graph().as_default():
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)
    session_conf = tf.ConfigProto(
        allow_soft_placement=FLAGS.allow_soft_placement,
        log_device_placement=FLAGS.log_device_placement,
        gpu_options=gpu_options)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        cnn = TextCNN(
            sequence_length=length,  # train 或者test 切换的时候要记得修改
            num_classes=1999,
            embedding_size=FLAGS.embedding_dim,
            filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
            num_filters=FLAGS.num_filters)

        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(1e-3)
        grads_and_vars = optimizer.compute_gradients(cnn.loss)
        train_op = optimizer.apply_gradients(grads_and_vars,
                                             global_step=global_step)

        # Keep track of gradient values and sparsity (optional)
        grad_summaries = []
        for g, v in grads_and_vars:
            if g is not None:
                grad_hist_summary = tf.summary.histogram(
def train_TextCNN(subject):

    print('Reading Data')
    root = roots[subject]
    dataset = build_dataset(root)
    num_topics = len(dataset['label'].unique())
    common_texts = dataset['item'].tolist()

    print('Cleaning Data')
    common_texts, word2id, valid_words = filter_pad_words(
        common_texts, max_feature)
    id2word = dict(zip(word2id.values(), word2id.keys()))
    origin_texts = [[id2word[ind] for ind in sentence]
                    for sentence in common_texts]

    print('Training Word2Vec')
    model = Word2Vec(
        origin_texts,
        size=embedding_size,
        min_count=
        1,  # this min_count is also used to select words in utils.clean_sentence
        workers=3,
        window=5,
        iter=3)

    print('Feeding weights')
    fixed = np.zeros((len(word2id), embedding_size))
    for word, ind in word2id.items():
        fixed[ind] = np.array(model.wv[word])
    fixed = torch.from_numpy(fixed).float()

    Network = TextCNN(fixed, window_size_list, len(word2id), num_topics,
                      len(word2id) - 1, dropout_rate,
                      embedding_size).to(device)
    optimizer = optim.Adam(Network.parameters(), lr_schedule[0])

    print('Creating training/testing set')
    label2id = dict(zip(dataset['label'].unique(), range(num_topics)))
    id2label = dict(zip(label2id.values(), label2id.keys()))
    X = np.array(common_texts)
    y = np.array([label2id[label]
                  for label in dataset['label']]).reshape(-1, 1)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=101)

    X_train = torch.tensor(X_train).long()
    y_train = torch.tensor(y_train).long()
    X_test = torch.tensor(X_test).long()
    y_test = torch.tensor(y_test).long()
    train = TensorDataset(X_train, y_train)
    test = TensorDataset(X_test, y_test)
    train_loader = DataLoader(train, 64, True)
    test_loader = DataLoader(test, 64, False)

    print('Training\n')
    criterion = nn.NLLLoss()
    Network = Network.to(device)
    Network.train()
    for i in range(1, epoch + 1):

        log = []

        for X_sample, y_sample in iter(train_loader):

            X_sample = X_sample.to(device)
            y_sample = y_sample.view(-1).to(device)
            logits = Network(X_sample)
            loss = criterion(logits, y_sample)
            log.append(loss.item())

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print('Epoch {}. Average loss {:.4f}'.format(i, np.mean(log)))

        if i in lr_schedule:
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr_schedule[i]

    print('\nTesting\n')
    predictions = []
    Network.eval()
    with torch.no_grad():

        for X_sample, _ in iter(test_loader):

            X_sample = X_sample.to(device)
            logits = Network(X_sample)
            _, index = logits.topk(1, 1)
            index = index.view(-1).cpu().numpy().tolist()
            predictions += index

    y_test = y_test.reshape(-1).tolist()
    y_test = [id2label[ind] for ind in y_test]
    predictions = [id2label[ind] for ind in predictions]

    print('\nTest result for {} :'.format(subject))
    print(classification_report(y_test, predictions))

    return TextCNN
Example #10
0
                                   feed_dict)
    return step, loss, acc


with tf.Graph().as_default():
    session_conf = tf.ConfigProto(allow_soft_placement=True,
                                  log_device_placement=False)
    session_conf.gpu_options.allow_growth = True
    sess = tf.Session(config=session_conf)

    with sess.as_default():
        cnn = TextCNN(char_ngram_vocab_size=len(ngrams_dict) + 1,
                      word_ngram_vocab_size=len(words_dict) + 1,
                      char_vocab_size=len(chars_dict) + 1,
                      embedding_size=FLAGS.EMB_DIM,
                      word_seq_len=FLAGS.MAX_LENGTH_WORDS,
                      char_seq_len=FLAGS.MAX_LENGTH_CHARS,
                      l2_reg_lambda=FLAGS.L2_REG_LAMBDA,
                      mode=FLAGS.EMB_MODE,
                      filter_sizes=list(map(int,
                                            FLAGS.FILTER_SIZES.split(","))))

        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(FLAGS.LR)
        grads_and_vars = optimizer.compute_gradients(cnn.loss)
        train_op = optimizer.apply_gradients(grads_and_vars,
                                             global_step=global_step)

        print("Writing to {}\n".format(FLAGS.OUTPUT_DIR))
        if not os.path.exists(FLAGS.OUTPUT_DIR):
            os.makedirs(FLAGS.OUTPUT_DIR)
args = parser.parse_args()

device = 'cuda' if torch.cuda.is_available() else 'cpu'
#device = 'cpu'
best_acc = 0
start_epoch = 0

train_data = dataloader.Myarticles(args.csvdir,args.article_dir,validation=False)
test_data = dataloader.Myarticles(args.csvdir,args.article_dir,validation=True)

train_loader =data.DataLoader(train_data,batch_size=1,shuffle=True)
test_loader =data.DataLoader(test_data,batch_size=1,shuffle=False)

print('==> Loading Network structure..\n')
args.vocab_size = len(train_data.word2idx)
net = TextCNN.MultiCNNTextBNDeep(args.vocab_size,args.emb_dim,args.content_dim,args.pooling_dim, args.linear_dim,args.num_classes)
net = net.to(device)

print('==> Loading cuda...\n')

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=1e-4)

savepath='./train/'+str(args.mname)
if not os.path.exists(savepath):
    os.makedirs(savepath)

def train(epoch):
    print('\nEpoch: %d' % epoch)
    net.train()
    train_loss = 0
Example #12
0
    def run(self):
        self.graph = tf.Graph()
        with self.graph.as_default():
            f = open(
                '../data/f_model/%sth_f_model/%sth_f_model.dat' %
                (self.bagging_iter, self.model_idx), 'r')
            model = pickle.load(f)
            f.close()

            self.cnn = mycnn.DepCNNv6(model_idx=self.model_idx,
                                      num_classes=model.type_size,
                                      vocab_size=model.mor_size + 1,
                                      pos_size=model.pos_size + 1,
                                      hc_size=model.hc_feature_size,
                                      embedding_size=FLAGS.embedding_dim,
                                      mlp_size=FLAGS.mlp_size,
                                      l2_reg_lambda=FLAGS.l2_reg_lambda)

            checkpoint_file = '../data/ckpt/%sth_ckpt/%sth_%s' % (
                self.bagging_iter, self.model_idx, int(FLAGS.num_epochs) - 1)
            cr_test = dt.CorpusReader()
            if self.bagging_iter == str(0) and self.mode != 'g_predict':
                cr_test.set_file(
                    (self.input_path + '/test_%s.txt') % (self.file_idx))
            elif self.mode == 'g_predict':
                cr_test.set_file(self.input_path)

            if self.input_path == 'default':
                self.input_path = '../data/test_data/%sth_test_data'
                cr_test.set_file((self.input_path + '/test_%s.txt') %
                                 (self.bagging_iter, self.file_idx))
            elif self.input_path != 'default' and self.mode != 'g_predict':
                self.input_path = self.input_path
                cr_test.set_file(
                    (self.input_path + '/test_%s.txt') % (self.file_idx))

            session_conf = tf.ConfigProto()
            session_conf.gpu_options.allow_growth = True
            self.sess = tf.Session(config=session_conf)
            saver = tf.train.Saver()
            saver.restore(self.sess, checkpoint_file)

            last_flag = False
            data_dix = 0

            parser = tp.TransitionParser(model, FLAGS.batch_size)
            results = []
            while True:
                data = cr_test.get_next()
                data_dix += 1
                parsing_trees = [0] * len(data)

                if data == []:
                    last_flag == True
                    self.file_write(parsing_trees)
                    parsing_trees = [0] * len(data)
                    break
                if data[0].raw_sentence is None:
                    last_flag == True
                    self.file_write(parsing_trees)
                    parsing_trees = [0] * len(data)
                    break
                if data == None:
                    last_flag == True
                    self.file_write(parsing_trees)
                    parsing_trees = [0] * len(data)
                    break

                for batch_idx in range(len(data)):
                    parsing_trees[batch_idx] = {
                        'raw_sentence': data[batch_idx].raw_sentence,
                        'tree': [],
                        'eojeol_list': data[batch_idx].eojeol_list
                    }

                parser.initialize(data)

                tmp_idx = 0
                while parser.is_final_state() is False:
                    features, hc = parser.make_input_vector(data)
                    hc = model.convert_to_zero_one(hc, model.hc_feature_size)
                    next_action = self.test_step(features, hc)
                    next_action = next_action[0]
                    parser.run_action(next_action, model)
                    tmp_idx += 1

                predicts = parser.get_result('test')

                for batch_idx, predict in enumerate(predicts):
                    parsing_trees[batch_idx]['tree'] = (predict)

                results.append(parsing_trees)
                parsing_trees = [0] * len(data)

                if data_dix % 20 == 1:
                    print(
                        str(self.file_idx) + 'th_file , ' +
                        str(self.model_idx) + 'th_model index, ' +
                        str(data_dix) + '_th batch')
                    self.file_write(results)
                    results = []

            self.file_write(results)