예제 #1
0
def main(argv):
    parser = argparse.ArgumentParser()
    parser.add_argument("document", help="Document to load")
    parser.add_argument("-n",
                        type=int,
                        default=10,
                        help="Number of previous words to include")
    parser.add_argument("-o", "--output", default="", help="Output file")
    parser.add_argument("--chapter",
                        type=int,
                        help="Load only refs from chapter")
    parser.add_argument("--sort", action="store_true")

    args = parser.parse_args(argv)

    if len(args.output) == 0:
        output_filename = args.document.rstrip(".docx") + "-endnotes.docx"
    else:
        output_filename = args.output

    d = Document(args.document)

    notes, note_ids = get_notes(d)
    pars = get_paragraphs(d, args.chapter)

    ids = set([])
    for run, _ in pars:
        ids.update(map(lambda x: x.id, run.endnote_references))

    # This is definitely doing something; there are only a few that don't match up
    assert ids.issubset(note_ids)

    d = Document()

    if args.sort:
        pars = sorted(pars, key=ref_to_note(notes))

    for ref, p in pars:
        id = ref.endnote_references[0].id
        runs = p.runs

        ix = ref_to_run_ix(id, runs)
        if ix == -1:
            print(f"Error: reference {id} not found in following paragraph:")
            print(p.text)
            sys.exit(1)

        pre_text = get_previous_words(ix, runs, args.n)

        for p2 in notes[id].paragraphs:
            np = d.add_paragraph()

            if len(pre_text) > 0 and len(p2.text.strip()) > 0:
                nr = np.add_run()
                nr.add_text("“" + pre_text + "”")
                nr.bold = True
                pre_text = ""  # Reset it so we only add it once

            for run in p2.runs:
                nr = np.add_run()
                nr.add_text(run.text)
                copy_run_style(run, nr, keep=["italic"])

        note_text = "".join(map(lambda x: x.text, notes[id].paragraphs))

        print(f"Note id: {id}")
        print(f"Note text: {note_text}")
        print(f"Source paragraph: {pre_text}")
        print("")

    d.save(output_filename)
예제 #2
0
from docx import Document
from docx.shared import Inches
import os
import sys

args = sys.argv
docxname = 'test.docx' if len(args) == 1 else args[1]

sosudPartsBot = ['BOT-1', 'BOT-1-2', 'BOT-1-3', 'BOT-2', 'BOT-2-2', 'BOT-2-3']
sosudParts = ['OBECH', 'UB+OB', 'UB-FAST', 'USIL']
zonesBot = [4, 6, 7, 4, 6, 7]
zones = [2, 1, 3, 5]
doc = Document()

views = {1: 'Слева', 2: 'Справа', 3: 'Вид снизу'}

ramaPic = 'ramaMises'
for v in [1, 2]:
    doc.add_picture(ramaPic + '-{}.png'.format(v), width=Inches(6))
    doc.add_paragraph('Рисунок  - Эффективные напряжения по Мизесу. {}'.format(
        views[v]))

for v in [1, 2]:
    doc.add_picture('emax-{}.png'.format(v), width=Inches(6))
    doc.add_paragraph(
        'Рисунок  - Максимальная деформация растяжения в сосуде их ПКМ. {}'.
        format(views[v]))

for i, p in enumerate(sosudPartsBot):
    for v in [1, 2, 3]:
        fname = '{}-{}.png'.format(p, v)
예제 #3
0
import docx
from docx import Document
from docx.shared import RGBColor, Inches, Pt, Cm
from docx.oxml.ns import qn
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT, WD_UNDERLINE, WD_TAB_ALIGNMENT, WD_TAB_LEADER
from docx.enum.style import WD_STYLE_TYPE
import os

doc = Document("12.docx")

doc1 = Document("13.docx")

# for style1 in doc1.styles:
#     print(style1)
# print(len(doc.styles))
# for style2 in doc.styles:
#     print(style2)
t = doc1.styles
# par_styles = [s for s in t if s.type == WD_STYLE_TYPE.PARAGRAPH]
# for style in par_styles:
#     print(style.name)
d = doc.styles
d1 = d.add_style("yangshi", WD_STYLE_TYPE.CHARACTER)
print(d1.name)

for para in doc.paragraphs:
    print(para.text)
# s = doc1.paragraphs[0]
#
s1 = doc.paragraphs[0]
# s.paragraph_format.space_after = Inches(0.5)
예제 #4
0
    def convert(self, input, output, type):

        if type == 'iconDOCX':
            print 'Opening {}'.format(input)
            docx = Document(input)
        else:
            tmp = './tmp-{}'.format(input.replace('comlaw/', ''))
            print 'converting from {} {}'.format(type, input)
            command = 'textutil -convert docx -stdout ./{} >| {}'.format(
                input, tmp)
            subprocess.check_call(command, shell=True)
            docx = Document(tmp)
            os.remove(tmp)

        out = []

        for p in docx.paragraphs:

            if not p.text.strip():
                continue

            if p.style.font and p.style.font.size:
                p_size = p.style.font.size.pt
            elif p.style.base_style and p.style.base_style.font and p.style.base_style.font.size:
                p_size = p.style.base_style.font.size.pt
            else:
                p_size = 11

            _indent = para_indent(p)

            if _indent > 0:
                indent = int(_indent / p_size)
            else:
                indent = 0

            sizes = collections.defaultdict(int)
            for r in p.runs:
                sizes[r.font.size.pt if r.font.
                      size is not None else p_size] += 1

            #Font size is the most common of the header
            font_size = pts_to_header(
                sorted(sizes.iteritems(), key=lambda k: k[1],
                       reverse=True)[0][0])

            for r in p.runs:

                text = r.text
                for frm, to in code_map:
                    text = text.replace(frm, to)
                text = text.encode('ascii', 'replace')

                #Remove EMBED WORD PICTURE
                if text.strip().startswith('EMBED'):
                    continue

                remove = r'(DOCPROPERTY ([a-zA-Z]+)|TOC \\o|PAGEREF _[a-zA-Z0-9]+ \\h [\d]+|STYLEREF [a-zA-Z0-9]+)'
                text = re.sub(remove, '', text)

                out.append(
                    TEXT(1 if r.bold == True and font_size == 0 else 0,
                         1 if r.italic == True and font_size == 0 else 0,
                         indent, font_size, text))

            #Paragraph
            out.append(NEWPARA())

        return out
예제 #5
0
import os
import re
import xlwt
import time
from docx import Document
path = "C:\\Users\\songjie\\Desktop\\qwer"
for files in os.walk(path):
    # print(files[2])
    print(len(files[2]))
used = 0
# for a in range(len(files[2])):\

wr = xlwt.Workbook()
sheet1 = wr.add_sheet('Sheet1', cell_overwrite_ok=True)
for q in range(0, len(files[2])):
    doc = Document("C:\\Users\\songjie\\Desktop\\qwer\\%s" % files[2][q])
    print("processing:%s" % files[2][q])
    print(files[2][q])
    for x in range(1, len(doc.tables[0].rows)):
        print(x)
        print("1 %s" % time.time())
        a = doc.tables[0].rows[x]
        print("2 %s" % time.time())
        x -= 1
        name = a.cells[1].paragraphs[0].text
        sheet1.write(x + used, 0, name)

        temp = re.findall('(\d+)', files[2][q])
        year = temp[0]
        sheet1.write(x + used, 3, year)
        certificate_number = a.cells[2].paragraphs[0].text
예제 #6
0
def lab_1_check_answer(student_path, correct_path):
    response_message = {}
    response_message["errors"] = []
    response_message["custom_header_style"] = {}
    response_message["custom_main_style"] = {}
    response_message["document_margins"] = {}
    response_message["document_header"] = {}
    response_message["footnote"] = {}
    response_message["table_of_contents"] = {}
    response_message["document_numbering"] = {}
    response_message["headers_style"] = {}
    try:
        # document_path = 'data/internet_tables.docx'
        document_student = Document(student_path)
        document_correct = Document(correct_path)

        header_style_name = "Заголовок_ФИО"
        main_style_name = "Основной_ФИО"

        if is_table_of_contents(document_student):
            response_message["table_of_contents"][
                "message"] = "Оглавление создано"
            response_message["table_of_contents"]["status"] = True
        else:
            response_message["table_of_contents"][
                "message"] = "Оглавление не создано"
            response_message["table_of_contents"]["status"] = False

        if is_document_numbering(document_student):
            response_message["document_numbering"][
                "message"] = "Страницы пронумерованы"
            response_message["document_numbering"]["status"] = True
        else:
            response_message["document_numbering"][
                "message"] = "Страницы не пронумерованы"
            response_message["document_numbering"]["status"] = False

        main_style_student = get_custom_header_style(document_student,
                                                     main_style_name)
        main_style_correct = get_custom_header_style(document_correct,
                                                     main_style_name)

        if main_style_student == main_style_correct:
            response_message["custom_main_style"][
                "message"] = "Основной стиль создан верно"
            response_message["custom_main_style"]["status"] = True
        else:
            response_message["custom_main_style"][
                "message"] = "Основной стиль создан неверно"
            response_message["custom_main_style"]["status"] = False

        header_style_student = get_custom_header_style(document_student,
                                                       header_style_name)
        header_style_correct = get_custom_header_style(document_correct,
                                                       header_style_name)

        if header_style_student == header_style_correct:
            response_message["custom_header_style"][
                "message"] = "Стиль заголовков создан верно"
            response_message["custom_header_style"]["status"] = True
        else:
            response_message["custom_header_style"][
                "message"] = "Стиль заголовков создан неверно"
            response_message["custom_header_style"]["status"] = False

        if get_footnotes(student_path) == get_footnotes(correct_path):
            response_message["footnote"]["message"] = "Сноска создана верно"
            response_message["footnote"]["status"] = True
        else:
            response_message["footnote"]["message"] = "Сноска создана неверно"
            response_message["footnote"]["status"] = False

        if get_docement_headers(document_student,
                                header_style_name) == get_docement_headers(
                                    document_correct, header_style_name):
            response_message["headers_style"][
                "message"] = "Стиль к заголовкам применен верно"
            response_message["headers_style"]["status"] = True
        else:
            response_message["headers_style"][
                "message"] = "Стиль к заголовкам применен неверно"
            response_message["headers_style"]["status"] = False

        if get_document_header(document_student) == get_document_header(
                document_correct):
            response_message["document_header"][
                "message"] = "Верхний колонтитул применен верно"
            response_message["document_header"]["status"] = True
        else:
            response_message["document_header"][
                "message"] = "Верхний колонтитул применен неверно"
            response_message["document_header"]["status"] = False

        if get_document_margins(document_student) == get_document_margins(
                document_correct):
            response_message["document_margins"][
                "message"] = "Отступы применены верно"
            response_message["document_margins"]["status"] = True
        else:
            response_message["document_margins"][
                "message"] = "Отступы применены неверно"
            response_message["document_margins"]["status"] = False
    except:
        response_message["errors"].append('Ошибка при открытии файла')
    return response_message
  def export(self):
    if self.filename != '':
      rows_order = ['流动资产合计', '非流动资产合计', '资产总计', '流动负债合计', '非流动负债合计', '负债合计', '实收资本', '所有者权益合计', '营业收入', '营业成本', '利润总额', '净利润']
      self.chooseFolder()
      '''
      doc = Document('老板希望的导出效果.docx')
      table = doc.tables[0]
      for row in table.rows[1:]:
        key = row.cells[0].text
        row.cells[1].text = str(self.results[key]['end'])
        row.cells[2].text = str(self.results[key]['start'])
      para = doc.paragraphs
      for p in para:
        for r in p.runs:
          if r.underline:
            print(r.underline)
          else:
            print(r.text)
      doc.save('test.docx')
      '''
      doc = Document()

      doc.styles['Normal'].font.name = '宋体'
      doc.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), '宋体')

      doc.add_heading(self.results['company'], 0)

      table = doc.add_table(rows=1, cols=4, style='Table Grid')
      hdr_cells = table.rows[0].cells
      hdr_cells[0].text = '科目'
      hdr_cells[1].text = '年初(万元)'
      hdr_cells[2].text = '期末(万元)'
      hdr_cells[3].text = '变动(%)'
      for item in rows_order:
        row_cells = table.add_row().cells
        row_cells[0].text = item
        # 鉴于数据的不完整性,可能会出现KeyError异常(item)导致程序退出,这里使用try来捕捉异常,做兼容性处理
        try:
          row_cells[1].text = str(self.results[item]['start'])
          row_cells[2].text = str(self.results[item]['end'])
          row_cells[3].text = str(self.results[item]['change'])
        except KeyError:
          row_cells[1].text = row_cells[2].text = row_cells[3].text = ''

      try:
        ts = self.results['资产总计']
      except KeyError:
        ts = {'start': ' ', 'end': ' ', 'change': ' '}
      try:
        td = self.results['负债合计']
      except KeyError:
        td = {'start': ' ', 'end': ' ', 'change': ''}
      try:
        oi = self.results['营业收入']
      except KeyError:
        oi = {'start': ' ', 'end': ' ', 'change': ' '}
      try:
        np = self.results['净利润']
      except KeyError:
        np = {'start': ' ', 'end': ' ', 'change': ' '}
      doc.add_paragraph('截至报告期末,项目公司总资产{0}万元,相对上期增长{1}%;总负债{2}万元,相对上期增长{3}%。收入和利润方面,报告期末公司营业收入{4}万元,相对上期增长{5}%;净利润{6}万元,相对上期增长{7}%。'.format(ts['end'], ts['change'], td['end'], td['change'], oi['end'], oi['change'], np['end'], np['change']))

      doc.add_paragraph('通过上述信息,公司各项财务指标(有/无)异常,报告期内(有/无)明显变化。总体来看,公司财务状况()。')

      if self.foldername !='':
        doc.save(os.path.join(self.foldername, '{}.docx'.format(self.results['company'])))
      else:
        doc.save(os.path.join(os.getcwd(), '{}.docx'.format(self.results['company'])))
        tkinter.messagebox.showinfo('提示', '未选择文件存放位置,默认存放于当前文件夹!')
    else:
      tkinter.messagebox.showerror('错误', '未选择文件!')
예제 #8
0
파일: doxc.py 프로젝트: ozzigit/biblio
# coding: utf8
#есть смысл переделать под пи3
from docx import Document
document = Document('1.docx')
fileProblems = open('bad.txt', 'w')
filedoctype = open('doctype.txt', 'w')
fileautors = open('autors.txt', 'w')
fileautorsUkazatel = open('autorsUkazatel.txt', 'w')
filelastautorsUkazatel = open('lastautorsUkazatel.txt', 'w')
intNumOfZapis = 0
intColOfDonFindZapis = 0
testingString = ""
dataOfAllFindUniqKeysOfAll = {}
listOfKeyWords = []
listOfAuthors = []
nonSortedSlovarOfAUtors = {}
flagIsAllZapisRegonuzeBuType = False
flagIsAllZapisRegonuzeBuAutorsField = False
flagIsPresenBetterKeyForRedactors = False


class Zapis:
    """обьект записи """
    def CheckRazdeliteli():
        """описывает правила для разделителей"""
        pass

    def CheckTypeOfDoc():
        """описывает типы  док-та"""
        pass
예제 #9
0
def given_a_run(context):
    p = Document().add_paragraph()
    context.run = p.add_run()
예제 #10
0
 def __init__(self):
     self.document = Document()
예제 #11
0
def convert_in_word(request):
    document = Document()
    style = document.styles['Normal']
    font = style.font
    font.name = 'Calibri'
    font.size = Pt(10)

    margin = 1.5
    sections = document.sections
    for section in sections:
        section.top_margin = Cm(margin)
        section.bottom_margin = Cm(margin)
        section.left_margin = Cm(margin)
        section.right_margin = Cm(margin)

    footer_section = sections[0]
    footer = footer_section.footer

    tree = ET.parse(request.FILES.get('file'))
    root = tree.getroot()

    exporter_ragione_sociale = root.find("./Esportatore_1/RagioneSociale").text
    exporter_address = root.find("./Esportatore_1/Indirizzo").text
    exporter_comune = root.find("./Esportatore_1/Comune").text
    exporter_cap = root.find("./Esportatore_1/CAP").text
    exporter_country_code = root.find("./Esportatore_1/CodPaese").text
    exporter_string = exporter_ragione_sociale + "\n" + exporter_address + "\n" + exporter_comune + " " + exporter_cap + "\n" + exporter_country_code

    provenience = root.find("./Origine_4").text

    destinatario_ragione_sociale = root.find(
        "./Destinatario_3/RagioneSociale").text
    destinatario_address = root.find("./Destinatario_3/Indirizzo").text
    destinatario_comune = root.find("./Destinatario_3/Comune").text
    destinatario_cap = root.find("./Destinatario_3/CAP").text
    destinatario_country_code = root.find("./Destinatario_3/CodPaese").text
    destinatario_string = destinatario_ragione_sociale + "\n" + destinatario_address + "\n" + destinatario_comune + " " + destinatario_cap + "\n" + destinatario_country_code

    arrival = root.find("./Destinazione_5").text

    table = document.add_table(rows=0, cols=2)
    row_cells = table.add_row().cells
    row_cells[0].text = exporter_string
    row_cells[0].style = document.styles['Normal']

    par = row_cells[1].add_paragraph()
    par.text = provenience
    par.alignment = WD_ALIGN_PARAGRAPH.RIGHT
    par.style = document.styles['Normal']

    row_cells = table.add_row().cells
    row_cells[0].text = destinatario_string
    row_cells[0].style = document.styles['Normal']

    par = row_cells[1].add_paragraph()
    par.text = arrival
    par.alignment = WD_ALIGN_PARAGRAPH.RIGHT
    par.style = document.styles['Normal']

    table = document.add_table(rows=0, cols=2)
    row_cells = table.add_row().cells
    nested_table = row_cells[1].add_table(rows=0, cols=2)
    nested_table_cells = nested_table.add_row().cells
    nested_table_cells[0].text = provenience
    nested_table_cells[1].text = arrival

    document.add_paragraph('\n\n\n\n\n\n\n\n\n\n')

    table = document.add_table(rows=0, cols=2)
    for casella_8 in root.findall("./Casella_8_9_10/Casella_8"):
        row_cells = table.add_row().cells
        row_cells[0].text = casella_8.find(
            "./Progressivo").text + " " + casella_8.find(
                "./TotaleColli").text + casella_8.find(
                    "./CodiceConfezione").text + casella_8.find(
                        "./DescrizioneMerce").text
        row_cells[0].style = document.styles['Normal']
        row_cells[1].text = casella_8.find("./Progressivo").text
        row_cells[1].style = document.styles['Normal']

    cert_id = root.find("./Cert_ID").text
    visto_modello = root.find("./VistoDogana_11/Modello").text
    visto_numero = root.find("./VistoDogana_11/Numero").text
    luogo = root.find("./VistoDogana_11/Luogo").text
    data = root.find("./VistoDogana_11/Data").text[0:10]
    data2 = root.find("./VistoDogana_11/Del").text[0:10]

    data = datetime.strptime(data, "%Y-%m-%d")
    data = datetime.strftime(data, "%d/%m/%Y")

    data2 = datetime.strptime(data2, "%Y-%m-%d")
    data2 = datetime.strftime(data2, "%d/%m/%Y")

    footer_string = "Certificato" + cert_id + "\n" + "Versione 1\n\n\n"
    footer_string += visto_modello + "     " + visto_numero + "\t\t" + luogo + "  " + data + "\n"
    footer_string += data2 + "\n" + "28100 UD PARMA"
    footer_par = footer.paragraphs[0]
    footer_par.text = footer_string

    # document.add_page_break()
    dirspot = os.getcwd()
    dateTimeObj = datetime.now()
    timestampStr = dateTimeObj.strftime("%d%b%Y%H%M%S%f")
    document.save(dirspot + '/converter/static/files/' + timestampStr +
                  '.docx')
    url = static('files/' + timestampStr + '.docx')

    return HttpResponse(url)
예제 #12
0
import csv
import os
from docx import Document
from docx.shared import Inches

#create AnswerKey document
documentAnswerKey = Document()

#create BlankQuiz document
documentBlankQuiz = Document()

#add a AnswerKey title
documentAnswerKey.add_heading('CFA Formula Quiz Answer Key', 0)

#add a BlankQuiz title
documentBlankQuiz.add_heading('CFA Formula Quiz', 0)

#BEGIN LOOP
#open CSV
with open("guide.csv") as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        #find row marked Count
        NumberRef = (row['NumberRef'])

        #find row marked FormulaName
        FormulaName = (row['FormulaName'])

        #print question onto AnswerKey
        documentAnswerKey.add_paragraph(str(NumberRef) + ") " + FormulaName)
예제 #13
0
def read_file(file_name):
    document = Document(file_name)
    file_content = []
    for con in document.paragraphs:
        file_content.append(con.text)
    return file_content
예제 #14
0
    parser.add_argument('--dest')
    parser.add_argument('--use-braces', nargs=2, default=None)
    parser.add_argument('--remove-empty-braces', action='store_true')
    parser.add_argument('--remove-unreplaced-braces', action='store_true')
    parser.add_argument('--remove-empty-row', type=int)

    args = parser.parse_args()

    # input validity checks
    if not args.old_text and not args.replace_list_file:
        raise Exception(
            'You must specify either old_text and new_text argument or use a json spec file'
        )

    if args.old_text and not args.new_text:
        raise Exception('You must specify new_text if you specified old_text',
                        args.old_text, args.new_text)

    # process
    document = Document(args.original_docx_path)
    if args.replace_list_file:
        replace_list = readReplaceListFromJson(args.replace_list_file)
    else:
        replace_list = [{"old_text": args.old_text, "new_text": args.new_text}]

    multiReplace(document, replace_list, vars(args))

    # save
    dest = args.dest or args.original_docx_path
    document.save(dest)
예제 #15
0
    def read_words(self):
        document = Document(self.file_in)
        self.words = []

        for p in document.paragraphs:
            self.words.extend(p.text.split(' '))
예제 #16
0
def given_a_run_having_bool_prop_set_on(context, bool_prop_name):
    run = Document().add_paragraph().add_run()
    setattr(run, bool_prop_name, True)
    context.run = run
예제 #17
0
def generate_word_report(journal_name, table_name,info,df,cwd,invalid_lines):   
    document = Document()
    document.add_heading(journal_name, 0)
    
    document.add_heading('Journal Analysis...', level=1) 
    journal_info = general_info(journal_name)
    document.add_paragraph('SCI: '+journal_info['sci'])
    document.add_paragraph('SSCI: '+journal_info['ssci'])
    document.add_paragraph('University Top Journal: '+journal_info['UnivRank'])
    document.add_paragraph('International Business School Top Journal: '+journal_info['IBSRank'])
    document.add_paragraph('ISSN: '+journal_info['issn'])
    document.add_paragraph('Publication Frequency: '+journal_info['freq'].upper())
    document.add_paragraph('Press: '+journal_info['press'].upper())
    document.add_paragraph('City: '+journal_info['city'].upper())
    document.add_paragraph('Country: '+journal_info['country'].upper())
    document.add_paragraph('Address: '+journal_info['address'].upper())
    

    
    #document.add_paragraph('Journal Statistics')
    document.add_heading('Journal Statistics: ({} out of {} entries have been deleted)'.format(invalid_lines, invalid_lines+len(df)), level=1) 
    
    # add table ------------------
    rows = 1
    cols = 5
    table = document.add_table(rows=rows, cols=cols, style = "Light List Accent 1") 
    # populate header row -----
    column_names = ['Year','AveragePage','NumPublication','ChineseAuthor','ChineseUniv']
    heading_cells = table.rows[0].cells
    for i in range(len(column_names)):
        heading_cells[i].text = column_names[i]
    
    for item in info:
        cells = table.add_row().cells
        cells[0].text = item['year']
        cells[1].text = str(int(item['aver_page']))
        cells[2].text = str(item['num_pubs'])
        cells[3].text = str(item['percent_cn_author'])+'%'
        cells[4].text = str(item['percent_cn_univ'])+'%'

    document.add_heading('Journal Keywords Distributions - Top 10 most popular Keywords', level=1) 
    # add table ------------------
    num_of_keywords = 10
    rows = num_of_keywords + 1
    cols = len(info)
    table = document.add_table(rows=rows, cols=cols,style = "Table Grid") 
    # populate header row -----
    heading_cells = table.rows[0].cells
    for i in range(len(info)):
        heading_cells[i].text = info[i]['year']
    
    for c in range(cols):
        for r in range(1,rows): # The first row has been populated, start from the second row
            run=table.cell(r,c).paragraphs[0].add_run(info[c]['list_keyw'][r-1])
            run.font.size=Pt(8)
    #        cell = table.cell(r, c)
    #        cell.text = info[c]['list_keyw'][r-1]
    
    
    
    # add images ------------------
    document.add_heading('Journal Keywords Cloud by Years', level=1) 
    plot_wordcloud(info,table_name,cwd)
    document.add_picture(cwd+'\\'+table_name+'_wc.jpg', width=Cm(17))
    
    document.add_page_break() 
    
    
    # add a new page ------------------
    document.add_heading('Country Distribution', level=1) 
    labels, nums = plot_country_distribution(df,table_name, cwd)
    document.add_picture(cwd+'\\'+table_name+'_cd.jpg', width=Cm(15))
    
    document.add_heading('List of most productive country...', level=2) 
    for i in range(10):
        p = document.add_paragraph('Productive country ', style='ListBullet')
        p.add_run('No. '+str(i+1)+' is ')
        p.add_run(labels[i]).bold = True
        p.add_run(', which has ')
        p.add_run(str(nums[i])).bold = True
        p.add_run(' publications.')
    
    if 'China' not in labels:
        document.add_heading('There is NOT publication from any Chinese university...', level=2) 
    else:
        i = labels.index('China')
        document.add_heading('China is ranked at No. '+str(i+1)+' and has '+str(nums[i])+' Publications in this journal.', level=2) 
        
    document.add_page_break() 
    
    # add a new page ------------------
    document.add_heading('Institute Distribution', level=1) 
    labels, nums = plot_institute_distribution(df,table_name, cwd)
    document.add_picture(cwd+'\\'+table_name+'_id.jpg', width=Cm(15))
    
    document.add_heading('List of most productive institute...', level=2) 
    for i in range(10):
        p = document.add_paragraph('Productive institute ', style='ListBullet')
        p.add_run('No. '+str(i+1)+' is ')
        p.add_run(labels[i]).bold = True
        p.add_run(', which has ')
        p.add_run(str(nums[i])).bold = True
        p.add_run(' publications.')
    
    # Top chinese universities ------
    cn_ins_hist = cal_cn_univ_distribution(df)
    if not cn_ins_hist:
        document.add_heading('We are on the way...', level=2) 
    else:
        document.add_heading('List of most productive Chinese institute...', level=2) 
        label_list = [c[0] for c in cn_ins_hist]
        num_list = [c[1] for c in cn_ins_hist]
        for i in range(min(10,len(cn_ins_hist))): # display top 10 (if available) universities
            p = document.add_paragraph('Productive institute ', style='ListBullet')
            p.add_run('No. '+str(i+1)+' is ')
            p.add_run(label_list[i]).bold = True
            p.add_run(', which has ')
            p.add_run(str(num_list[i])).bold = True
            p.add_run(' publications.')
    
 
    # add a new page ------------------
    # add table ------------------
    univs = ['Zhejiang Gongshang Univ','Zhejiang Univ Finance & Econ','Shanghai Univ Finance & Econ',
             'Jiangxi Univ Finance & Econ', 'Cent Univ Finance & Econ', 'Dongbei Univ Finance & Econ',
             'Southwestern Univ Finance & Econ','Univ Int Business & Econ','Zhongnan Univ Econ & Law']
    for univ in univs:
        document.add_heading('Publication Record of '+univ, level=1) 
        index = any_univ(df, univ)
        if not index:
            document.add_paragraph(univ+' has not been contributed to this journal yet...')
          
        else:  
            rows = 1
            cols = 5
            table = document.add_table(rows=rows, cols=cols,style = "Light List Accent 1") 
            #table.autofit=True
            table.cell(0,0).width=Cm(0.5)
            table.cell(0,1).width=Cm(1)
            table.cell(0,2).width=Cm(2.5)
            table.cell(0,3).width=Cm(2.5)
            table.cell(0,4).width=Cm(8)
            # populate header row -----
            column_names = ['ID','Year','Author(s)','School','Title']
            heading_cells = table.rows[0].cells
            for i in range(len(column_names)):
                heading_cells[i].text = column_names[i]
            
            i = 0
            for it in range(len(index)):
                item = df.iloc[index[it],:]
                i += 1
                cells = table.add_row().cells
                cells[0].text = str(i)
                cells[1].text = item['year']
                cells[2].text = item['authors']
                cells[3].text = item['paddress'].split(',')[1].strip()
                cells[4].text = item['title']
        
        
    # save to doc  ------------------
    document.save(cwd+'\\'+table_name+'.docx')
예제 #18
0
def given_a_run_having_underline_type(context, underline_type):
    run_idx = {
        'inherited': 0, 'no': 1, 'single': 2, 'double': 3
    }[underline_type]
    document = Document(test_docx('run-enumerated-props'))
    context.run = document.paragraphs[0].runs[run_idx]
예제 #19
0
"""
Created on Thu Sep 19 14:38:30 2019

@author: wb550776
"""
import numpy as np
import pandas as pd
import nltk
nltk.download('punkt')  # one time execution
import re
import docx
from nltk.tokenize import sent_tokenize
#pip uninstall docx
#pip install python-docx
from docx import Document
document = Document()
document = Document(
    'C:/Users/wb550776/Downloads/Using ML in Evaluative Synthesis May 31 2019.docx'
)


#Open the article
def read_article(filename):
    file = open(r'C:/Users/wb550776/Downloads/10144-Ghana-ELAC EvNote.docx',
                encoding='utf-8')
    #file2 = open(r'C:/Users/wb550776/Downloads/README.txt')
    filedata = file.readlines()
    article = filedata[0].split(". ")
    sentences = []

예제 #20
0
def given_a_run_having_style_char_style(context, char_style):
    run_idx = {
        'None': 0, 'Emphasis': 1, 'Strong': 2
    }[char_style]
    document = Document(test_docx('run-char-style'))
    context.run = document.paragraphs[0].runs[run_idx]
예제 #21
0
# 利用python读取docx文件
'''
  1. 首先需要安装python-docx库
'''

from docx import Document

document = Document("d:\\1.docx")

for para in document.paragraphs:
    para.append('dd')
    print(para.text)
def write_report(info_request,print_to_pdf=True,\
                 path_report_folder=os.path.join(os.environ['CRF_DATA'],'Request_reports')):
    if info_request['table_finished'][0].lower() != 'yes':
        print('WARNING!! Table is marked as NOT FINISHED.')
    if info_request['final_validation_done'][0].lower() != 'yes':
        print('WARNING!! Table is marked as NOT VALIDATED.')
    footnote_list = []
    now = datetime.now()
    name_file_report = info_request['request_number'][
        0] + '_Report_' + now.strftime('%Y%m%dT%H%M%S')
    name_file_emails = info_request['request_number'][
        0] + '_Report_' + now.strftime('%Y%m%dT%H%M%S') + '_emails'
    path_file_report = os.path.join(path_report_folder,
                                    name_file_report + '.docx')
    path_file_emails = os.path.join(path_report_folder,
                                    name_file_emails + '.txt')
    copyfile(r'xxx_report.docx', path_file_report)
    document = Document(path_file_report)
    document.styles['Normal'].font.name = 'Raleway'
    document.styles['Normal'].font.size = Pt(12)

    if info_request['final_outcome'][
            0] == 'Resolved: Task successfully finished':
        info_request['final_outcome'][0] = 'Success'

    # Title
    p = document.add_paragraph()
    run = p.add_run('Request ' + info_request['request_number'][0])
    run.font.bold = True
    run.font.size = Pt(16)
    p.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER

    # Basic info about the request
    p = document.add_paragraph()
    p = document.add_paragraph()
    p.add_run('Request description: ').bold = True
    p.add_run(info_request['description_task'][0])
    p = document.add_paragraph()
    p.add_run('Requester\'s institution: ').bold = True
    p.add_run(info_request['requester']['full_affiliation'][0])
    p = document.add_paragraph()
    p.add_run('Outcome: ').bold = True
    p.add_run(info_request['final_outcome'][0] + chr(8212) +
              info_request['description_success'][0])
    p = document.add_paragraph()

    # Volunteers
    p = document.add_paragraph()
    p.add_run('Volunteers:').bold = True
    footnote_list_role = []
    # any_volunteer = False
    any_role = False
    for i_volunteer in range(len(info_request['volunteer']['email'])):
        # if info_request['volunteer']['email'][i_volunteer]!='none':
        #     any_volunteer = True
        p = document.add_paragraph()
        if len(info_request['volunteer']['role'])>i_volunteer \
           and not(not(info_request['volunteer']['role'][i_volunteer])):
            add_footnote(p,
                         footnote_list_role,
                         info_request['volunteer']['role'][i_volunteer],
                         type_symbol='symbol')
            any_role = True
        p.add_run(
            write_name_and_affiliation(info_request['volunteer'], i_volunteer))
    # Volunteers' roles
    if any_role:
        p = document.add_paragraph()
        p = document.add_paragraph()
        p.add_run('Volunteers\' roles:').bold = True
        print_footnotes(document, footnote_list_role, font_size=12)

    # Timeline
    p = document.add_paragraph()
    p = document.add_paragraph()
    p.add_run('Timeline:').bold = True
    if info_request['date']['request'][0] != 'none':
        p = document.add_paragraph('Date of request: ' \
                                   +  datetime.strptime(info_request['date']['request'][0],'%d/%m/%Y %H:%M').strftime('%B %d, %Y'))
        deltaT = datetime.strptime(info_request['date']['first_response'][0],'%d/%m/%Y %H:%M') \
            - datetime.strptime(info_request['date']['request'][0],'%d/%m/%Y %H:%M')
        p = document.add_paragraph(
            deltaT_to_string(deltaT) + ' from request to first response')
        add_footnote(
            p, footnote_list,
            'The first response from Crowdfight typically contains a feasibility estimate for the request.'
        )
        deltaT = datetime.strptime(info_request['date']['proposal'][0],'%d/%m/%Y %H:%M') \
            - datetime.strptime(info_request['date']['first_response'][0],'%d/%m/%Y %H:%M')
        p = document.add_paragraph(
            deltaT_to_string(deltaT) + ' from first response to proposal')
        add_footnote(
            p, footnote_list,
            'The proposal typically includes making first contact between requester and volunteers.'
        )
        deltaT = datetime.strptime(info_request['date']['success'][0],'%d/%m/%Y %H:%M') \
            - datetime.strptime(info_request['date']['proposal'][0],'%d/%m/%Y %H:%M')
        p = document.add_paragraph(
            deltaT_to_string(deltaT) + ' from proposal to resolution')
    else:
        p = document.add_paragraph('Date of request: None')
    p = document.add_paragraph('Date of resolution')
    add_footnote(
        p, footnote_list,
        'For successful requests, this is the date in which we confirmed that the success criterion was reached (we try to be unintrusive during follow-up, so this confirmation can have significant delay). The success criterion is defined beforehand, and is as stringent as possible to guarantee that our activity is truly useful.'
    )
    p.add_run(': ' + datetime.strptime(info_request['date']['success'][0],
                                       '%d/%m/%Y %H:%M').strftime('%B %d, %Y'))

    # Crowdfight staff
    p = document.add_paragraph()
    type_person_list = [['receiver', 'Receiver'],
                        ['advisor_manager', 'Advisor manager'],
                        ['advisor', 'Scientific advisor'],
                        ['coordinator', 'Coordinator'],
                        ['follow_up', 'Follow up'],
                        ['documenter', 'Documenter']]
    text_footnote_person = [
        'The Receiver is in charge of receiving the request, understanding it (with help of other coordinators and advisors), and writing the request to be sent in the task distribution e-mail (when appropriate).',
        'The advisor manager handles the assignment of requests to the scientific advisors',
        'The scientific advisor reviews the self-assessments submitted by the volunteers, shortlists the most adequate ones for the request, and recommends a course of action to the coordinator.',
        'The coordinator decides a course of action with help from the scientific advisor, makes a proposal to the requester and contacts the selected volunteers.',
        'The person in charge of follow-up stays in touch with requester and volunteers to make sure they have everything they need until the resolution of the request.',
        'The documenter maintains our database and gathers the necessary information to produce this report.'
    ]

    for i_person, person in enumerate(type_person_list):
        p = document.add_paragraph()
        p.add_run(person[1]).bold = True
        add_footnote(p,
                     footnote_list,
                     text_footnote_person[i_person],
                     bold=True)
        p.add_run(': ').bold = True
        # print(person)
        # print(write_name_and_affiliation(info_request[person[0]], 0))
        p.add_run(write_name_and_affiliation(info_request[person[0]], 0))
        # if info_request[person[0]]['email'][0]!='none':
        #     p.add_run(info_request[person[0]]['full_name'][0] + ' | ' + info_request[person[0]]['full_affiliation'][0])
        # else:
        #     p.add_run('None')

    # Notes
    p = document.add_paragraph()
    p = document.add_paragraph()
    p.add_run('Notes:').bold = True
    print_footnotes(document, footnote_list)
    p = document.add_paragraph()
    p = document.add_paragraph()
    if not (info_request['doi']['static']):
        doi_static = 'xxxx'
    else:
        doi_static = info_request['doi']['static'][
            -1]  # Take the last (we'll keep adding as we make new versions)
    if not (info_request['doi']['updated']):
        doi_updated = 'xxxx'
    else:
        doi_updated = info_request['doi']['updated'][0]
    run = p.add_run('This report belongs to the internal documentation of Crowdfight, a non-for profit association registered in the EU with number ES-G47805569. Reports are published for transparency with the explicit agreement of all people mentioned. The report was generated automatically from our records on 08-Jun-2020 15:23:09. While we do our best to keep faithful records, it may contain unintentional inaccuracies or omissions. Any modification of this document is strictly forbidden. This document can be found at https://doi.org/' \
                    + doi_static + ', and up to date versions can be found at https://doi.org/' \
                    + doi_updated + '.')
    run.font.size = Pt(9)
    document.save(path_file_report)
    print_emails(info_request, file_path=path_file_emails)
    if print_to_pdf:
        printout_file(path_file_report, wait_for_pdf=True)
예제 #23
0
tempList = []
num = 30
index = 0
while index < len(nameList):
    tempList.append(nameList[index])
    if (index + 1) % num == 0 or (index + 1) == len(nameList):
        list1 = copy.deepcopy(tempList)
        docxList.append(list1)
        tempList.clear()
    index += 1

a = 0
while a < len(docxList):
    docName = "F:\DiYanYuan\python\word\第一次\大于400\\all_合并\\" + str(a) + ".docx"
    # 新建空白文档
    new_document = Document()
    composer = Composer(new_document)

    for file in docxList[a]:
        composer.append(Document(file))
    composer.save(docName)
    a += 1

#不正确 重叠了
# a = 0
# while a <len(docxList):
#     docName = "F:\DiYanYuan\python\word\第一次\大于400\\all_合并\\" + str(a) + ".docx"
#     # 启动word应用程序
#     word = win32com.client.Dispatch('Word.Application')
#     word.Visible = False
#     # 新建空白文档
    pat = '结算通知书'
    m = re.search(pat, dir.decode('gbk').encode('utf8'))
    if m:
        allfile.append(dir)  # 获得所有文档名中带有结算通知书的文件

#print allfile
print len(allfile)

findf = []
for dir in allfile:
    try:
        #document=Document(dir.decode("utf8"))
        temp = 0

        #document=Document(path+"\\"+dir)
        document = Document(dir)
        doc_new = Document()
        t = document.tables[0]

        for r in t.rows:
            for c in r.cells:
                doc_new.add_paragraph(c.text, style=None)
        a1.append(doc_new.paragraphs[1].text)
        findf.append(1)
        for inx, p in enumerate(doc_new.paragraphs):

            tt = p.text
            pat = '利率收益金额'.decode("utf8")
            m = re.search(pat, tt)
            if m:
                if temp == 0:
예제 #25
0
def data_sheet(request,id):

    idprefix=request.POST['idprefix']
    print(idprefix,'jjjjjjjjjjjj')

    doc_final_path ='E:/certa-drdo/certa/TA_Datasheet.docx' 
    final_path ='E:/certa-drdo/certa/' 
    # finalpath=final_path.replace('/','\\')
    pdf_final_path ='E:/certa-drdo/certa/TA_Datasheet.pdf' 
    if os.path.isfile(pdf_final_path):
        with open(pdf_final_path, 'rb') as pdf:
            response = HttpResponse(pdf.read(),content_type='application/pdf')
            response['Content-Disposition'] = 'filename=some_file.pdf'
        return response
    elif os.path.isfile(doc_final_path):
        print('mmmmmmmmmmmmmm')
        pythoncom.CoInitialize()
        wdFormatPDF = 17
        # print(tempfile.gettempdir(),'temp')

        in_file = os.path.abspath(doc_final_path)
        # out_file = os.path.abspath('D:/cemilac/certa/defence/media/org1.pdf')

        word = comtypes.client.CreateObject('Word.Application')
        doc = word.Documents.Open(in_file)
        doc.SaveAs('E:/certa-drdo/certa/TA_Datasheet.pdf', FileFormat=wdFormatPDF)
        print('nnnnnnnnnnn')
        doc.Close()
        word.Quit()
        with open(final_path+'TA_Datasheet.pdf', 'rb') as pdf:
            response = HttpResponse(pdf.read(),content_type='application/pdf')
            response['Content-Disposition'] = 'filename=some_file.pdf'
        return response
    else:
        curr_path = "/"+str(id)+ "/"+idprefix+"Annexure 6/"
        curr_path=curr_path.replace('/','\\')
        new_path = os.path.join(settings.MEDIA_ROOT + curr_path)
    
        # if os.path.isdir(new_path):
        #     with open(new_path+'TA Datasheet.docx', 'rb') as pdf:
        #         response = HttpResponse(pdf.read(),content_type='application/pdf')
        #         response['Content-Disposition'] = 'filename=some_file.pdf'
        #     return response
        # else:
        taa=TAapplicationmodel.objects.filter(user_id=id).first()

            # template = get_template('dealing officer/Draft TA pdf.html')
        target_file = StringIO()
        template = DocxTemplate("E:/certa-drdo/certa/dashboard/templates/dealing officer/DS template.docx")
        context= {
            'firmname':taa.firmname,
            'addr1':taa.addr1,
            'item_name':taa.item_name,
            'part_no':taa.part_no

        }
        html = template.render(context)
        doc_io = io.BytesIO() # create a file-like object
        template.save("TA_Datasheet.docx") # save data to file-like object

        
        new_path1 = 'E:\certa-drdo\certa\TA_Datasheet.docx'
        # output_path = os.path.join(settings.MEDIA_ROOT) + '/89/result.pdf'
        # new_path=new_path.replace('\','//')

        taa=TAapplicationfiles.objects.filter(user_id=id,refid=idprefix,refpath='Annexure 6').first()
        aesurl=taa.filepath
        docurl = aesurl[:-4]
        print('aesview',aesurl)
        print('docurl',docurl)

        bufferSize = 64 * 1024
        passw = "#EX\xc8\xd5\xbfI{\xa2$\x05(\xd5\x18\xbf\xc0\x85)\x10nc\x94\x02)j\xdf\xcb\xc4\x94\x9d(\x9e"
        encFileSize = stat(aesurl).st_size
        with open(aesurl, "rb") as fIn:
            with open(docurl, "wb") as fOut:
                pyAesCrypt.decryptStream(fIn, fOut, passw, bufferSize, encFileSize)

        
        templateDoc1 = Document(new_path1)
        templateDoc = Document(docurl)

        # templateDoc1.add_page_break()
        
        for element in templateDoc.element.body:
            templateDoc1.element.body.append(element)

        templateDoc1.save(new_path1)
        messages.success(request, 'Data_sheet Successfully Prepared, Click again to view the file !')

        reg=TAapplicationmodel.objects.filter(file_in_id=str(request.user.id))
        return render(request, 'tcs do/receivedtyperecord.html',{'details':reg,'status':True})
예제 #26
0
             fontsize=4.5,
             verticalalignment='center',
             horizontalalignment='center')
    plt.axis('off')
    plt.savefig(f'{path}/{jpgname}.jpg')
    plt.cla()
    plt.close("all")
    im = Image.open(f'{path}/{jpgname}.jpg')
    im = trim(im)
    im.save(f'{path}/{jpgname}.jpg')
    return im.size[0] / 96


print('文档格式初始化……')
# 创建文档对象
calc_book = Document()
# 设置正文字体
calc_book.styles['Normal'].font.name = 'Italic'
calc_book.styles['Normal'].element.rPr.rFonts.set(qn('w:eastAsia'), '微软雅黑')
calc_book.styles['Normal'].font.size = Pt(12)
calc_book.styles['Normal'].font.color.rgb = RGBColor(0x00, 0x00, 0x00)
calc_book.styles[
    'Normal'].paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
# 首行缩进,字符宽度等于字符高度,12pt=4.23mm, 1pt=0.3527mm
calc_book.styles['Normal'].paragraph_format.first_line_indent = Mm(8.46)
calc_book.styles[
    'Normal'].paragraph_format.line_spacing_rule = WD_LINE_SPACING.ONE_POINT_FIVE
calc_book.styles['Normal'].paragraph_format.space_before = Mm(0)
calc_book.styles['Normal'].paragraph_format.space_after = Mm(0)
# 增加一个居中显示的样式
calc_book.styles['No Spacing'].paragraph_format.first_line_indent = Mm(0)
예제 #27
0
import requests, re
from math import *
from bs4 import BeautifulSoup    # HTML을 파싱하는 모듈
import os, random, itertools, time
from docx import Document
from docx.shared import Pt
from docx.enum.text import WD_ALIGN_PARAGRAPH

### 시간 측정 ### 
first_time = time.time() 

## 파일 생성##
templit = Document("English_Composition.docx")
check = Document()

def googling(key1=str):
    url = "http://www.google.com/search?q=" + key1
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0'}
    
    html = requests.get(url, headers=headers ).text
    soup = BeautifulSoup(html, 'html.parser')    
    div = soup.find('div', id='result-stats')

    if not div:
        return 0 
    result_characters = " ".join(div.text.replace(",","")).split(" ")

    for order in range(len(result_characters)):
        word = result_characters[order]
        if not word in [str(number) for number in range(0,10)]:
            if word == "개":
예제 #28
0
msg_list["PDN CONNECTIVITY REJECT"]["table"] = 56
msg_list["PDN CONNECTIVITY REQUEST"]["table"] = 57
msg_list["PDN DISCONNECT REJECT"]["table"] = 58
msg_list["PDN DISCONNECT REQUEST"]["table"] = 59

for key in msg_list.keys():
    if "table" not in msg_list[key].keys():
        continue;

    d_info("[" + key + "]")
    cachefile = cachedir + "nas-msg-" + msg_list[key]["type"] + ".py"
    if os.path.isfile(cachefile) and os.access(cachefile, os.R_OK):
        execfile(cachefile)
        print "Read from " + cachefile
    else:
        document = Document(filename)
        f = open(cachefile, 'w') 

        ies = []
        write_file(f, "ies = []\n")
        table = document.tables[msg_list[key]["table"]]

        start_row = 0
        for start_row, row in enumerate(table.rows):
            cells = get_cells(row.cells);
            if cells["type"].find('Message type') != -1:
                break
            if cells["type"].find('KSI and sequence number') != -1:
                start_row -= 1
                break
예제 #29
0
from docx import Document
from docx.shared import Inches

document = Document()

document.add_heading('Document Title', 0)

p = document.add_paragraph('A plain paragraph having some ')
p.add_run('bold').bold = True
p.add_run(' and some ')
p.add_run('italic.').italic = True

document.add_heading('Heading, level 1', level=1)
document.add_paragraph('Intense quote', style='Intense Quote')

document.add_paragraph('first item in unordered list', style='List Bullet')
document.add_paragraph('first item in ordered list', style='List Number')

document.add_picture('t_0.png', width=Inches(1.25))

records = ((3, '101', 'Spam'), (7, '422', 'Eggs'),
           (4, '631', 'Spam, spam, eggs, and spam'))

table = document.add_table(rows=1, cols=3)
hdr_cells = table.rows[0].cells
hdr_cells[0].text = 'Qty'
hdr_cells[1].text = 'Id'
hdr_cells[2].text = 'Desc'
for qty, id, desc in records:
    row_cells = table.add_row().cells
    row_cells[0].text = str(qty)
        특허총칙.docx
        특허요건.docx
        특허심사.docx
        특허등록.docx
        특허권.docx
        특허권자의보호.docx
        특허취소신청.docx
        특허심판.docx
        특허재심.docx
        특허소송.docx
        특허국제출원.docx
        특허보칙.docx
        특허벌칙.docx
"""

templit = Document('특허취소신청.docx')
Result = Document('Today.docx')
Keyword = None

##################################################################################
"""
https://m.blog.naver.com/anakt/221842622079
다음 조항으로 넘어갈 때 '문제 유닛'을 클래스 선언하고, 이전 '문제 유닛'을 '문제 리스트'에 넣는다.
"""


def data_acquisition(templit):
    Problem_list = []
    Problem_unit = None

    for row, paragraph in enumerate(templit.paragraphs):