def eval():
    if not os.path.exists('./results'):
        os.makedirs('./results')

    # Load graph
    print("Graph loaded")
    print("Model name:{}".format(hp.modelname))
    # Load data
    print("Testing Data...")
    txt_src_names, idx_src_names, txt_tgt_names, _ = load_evaluate_data(
        eval_mode="test")

    x_w2i, x_i2w, y_w2i, y_i2w = load_vocab()

    g = Graph(is_training=False)
    with g.graph.as_default(), tf.Session() as sess:
        sv = tf.train.Saver()
        # Restore parameters
        print("Parameter Restoring...")
        sv.restore(sess,
                   tf.train.latest_checkpoint(hp.logdir + '/' + hp.modelname))
        # Inference
        count = 0
        with open('./results/' + hp.modelname + '_result.txt', "w") as fout:
            for i in range(0, len(txt_src_names), hp.batch_size):
                batch_txt_src_names = txt_src_names[i:i + hp.batch_size]
                batch_idx_src_names = idx_src_names[i:i + hp.batch_size]
                batch_txt_tgt_names = txt_tgt_names[i:i + hp.batch_size]
                batch_predicted_ids = sess.run(g.pred_outputs, {
                    g.x: batch_idx_src_names
                }).predicted_ids[:, :, :]

                for source, target, predicted_ids in zip(
                        batch_txt_src_names, batch_txt_tgt_names,
                        batch_predicted_ids):
                    print(
                        str(count) + '\t' + source + '\t' +
                        hangul.join_jamos(target))
                    count += 1
                    candidates = []
                    predicted_ids = predicted_ids.transpose(1, 0)
                    for pred in predicted_ids:
                        candidate = "".join(y_i2w[idx]
                                            for idx in pred).split("E")[0]
                        candidate = hangul.join_jamos(candidate)
                        candidates.append(candidate)

                    fout.write(source + '\t')
                    fout.write(hangul.join_jamos(target))
                    for candidate in candidates:
                        fout.write('\t')
                        fout.write(candidate.encode('utf-8'))
                    fout.write('\n')
                    fout.flush()
    def page_text_finder(self, report_text):
        page_text = ''
        text = ''
        found = False

        company_name = self.file_nm.split('_')[3]
        company_num = self.file_nm.split('_')[4][1:]

        company_dict = {'LG상사': 'LG 상사'}

        # To resolve hangul encoding issue
        company_name = hangul.join_jamos(j2hcj(h2j(company_name)))

        if company_name in company_dict.keys():
            company_name = company_dict[company_name]

        for line in report_text.split('\n'):
            if "page_id" in line and '||Title||  ' + company_name in text and company_num in text:
                page_text = text
                found = True
                break

            elif "page_id" in line:
                text = ''
            else:
                text += line + '\n'

        return page_text, found, company_name, company_num
def save_to_txt(file_nm, file_text):
    root_dir = '/Users/daniel/Desktop/test_2/after_inspec_txt/'
    path = root_dir + file_nm
    path = hangul.join_jamos(j2hcj(h2j(path)))
    print(file_nm)

    with open(path, 'w') as out_file:
        out_file.write(file_text)
    def convert_pdf_to_txt(self, pdf_file):
        """PDF파일을 텍스트로 변환해주는 함수

        Args:
            pdf ([PDF]): PDF파일

        Returns:
            [dict]: PDF에서 텍스트로 변환된 결과물
        """

        output_string = StringIO()
        self.file_nm = pdf_file.split(".")[0]
        file_ex = pdf_file.split(".")[1]

        self.pdf_path = self.report_pdf_dir + pdf_file
        self.pdf_path = hangul.join_jamos(j2hcj(h2j(self.pdf_path)))

        laparams = LAParams(line_overlap=.5,
                            char_margin=1.35,
                            line_margin=1.0,
                            word_margin=0.01,
                            boxes_flow=.5,
                            detect_vertical=False,
                            all_texts=False)

        rsrcmgr = PDFResourceManager()
        device = FinanceConverter(rsrcmgr, output_string, laparams=laparams)
        interpreter = PDFPageInterpreter(rsrcmgr, device)

        # Extract text
        found = False
        with open(self.pdf_path, 'rb') as in_file:

            for page_num, page in enumerate(PDFPage.get_pages(in_file, check_extractable=True)):
                interpreter.process_page(page)
                page_text = output_string.getvalue()
                report_text, found, company_nm, company_num = self.page_text_finder(
                    page_text)
                if found:
                    break

            if not found:
                report_text = None

        return report_text, company_nm, company_num
    def save_to_txt(self, txt):
        output_path = self.output_txt_dir + self.file_nm + '.txt'
        output_path = hangul.join_jamos(j2hcj(h2j(output_path)))

        with open(output_path, 'w') as out_file:
            out_file.write(txt)