Example #1
0
def draw_like(paragraph: Paragraph, words: List[str], bold=False):
    words = random.sample(words, 2)
    for word in words:
        run = paragraph.add_run(word)
        run.bold = bold
        run2 = paragraph.add_run(bracket * 3)
        run2.bold = bold
        paragraph.add_run(space)
 def insert_paragraph_after(self, paragraph, text=None, style=None):
     new_p = OxmlElement("w:p")
     paragraph._p.addnext(new_p)
     new_para = Paragraph(new_p, paragraph._parent)
     if text:
         new_para.add_run(text)
     if style is not None:
         new_para.style = style
     return new_para
Example #3
0
def _insert_paragraph_after(paragraph, text=None, style=None):
    """Insert a new paragraph after the given paragraph."""
    new_paragraph_oxml = OxmlElement("w:p")
    paragraph._p.addnext(new_paragraph_oxml)
    new_paragraph = Paragraph(new_paragraph_oxml, paragraph._parent)
    if text:
        new_paragraph.add_run(text)
    if style is not None:
        new_paragraph.style = style
    return new_paragraph
def insert_paragraph_after(inparagraph, text=None, style=None):
    """Insert a new paragraph after the given paragraph."""
    new_p = OxmlElement("w:p")
    inparagraph._p.addnext(new_p)
    new_para = Paragraph(new_p, inparagraph._parent)
    if text is not None:
        new_para.add_run(text)
    if style is not None:
        new_para.style = style
    return new_para
Example #5
0
 def __init__(self, cif: CifContainer, paragraph: Paragraph):
     self.cif = cif
     self.crytsalization_method = gstr(
         self.cif['_exptl_crystal_recrystallization_method'])
     if not self.crytsalization_method:
         self.crytsalization_method = '[No crystallization method was given]'
     sentence = "{}. "
     self.text = sentence.format(
         remove_line_endings(
             retranslate_delimiter(self.crytsalization_method)))
     paragraph.add_run(retranslate_delimiter(self.text))
Example #6
0
def add_r_int_value(cif: CifContainer, rint_p: Paragraph):
    reflns_number_total = cif['_reflns_number_total']
    reflns_av_R_equivalents = cif['_diffrn_reflns_av_R_equivalents']
    reflns_av_unetI = cif['_diffrn_reflns_av_unetI/netI']
    rint_p.add_run(this_or_quest(reflns_number_total) + '\n')
    rint_p.add_run('R').font.italic = True
    rint_p.add_run('int').font.subscript = True
    rint_p.add_run(' = ' + this_or_quest(reflns_av_R_equivalents) + '\n')
    rint_p.add_run('R').font.italic = True
    rint_p.add_run('sigma').font.subscript = True
    rint_p.add_run(' = ' + this_or_quest(reflns_av_unetI))
Example #7
0
 def __init__(self, cif: CifContainer, paragraph: Paragraph):
     self.cif = cif
     self.dsr_sentence = ''
     sentence1 = "Disordered moieties were refined using bond lengths " \
                 "restraints and displacement parameter restraints. "
     if self.cif.dsr_used:
         self.dsr_sentence = "Some parts of the disorder model were introduced by the " \
                             "program DSR."
     paragraph.add_run(sentence1)
     if self.dsr_sentence:
         paragraph.add_run(self.dsr_sentence)
Example #8
0
def format_space_group(paragraph: Paragraph, space_group: str, it_number: str) -> None:
    """
    Sets formatting of the space group symbol in row 6 of the report table.
    """
    try:
        # The HM space group type
        s = SpaceGroups()
        spgrxml = s.to_mathml(space_group)
        paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
        paragraph._element.append(math_to_word(spgrxml))
        paragraph.add_run(' (' + it_number + ')')
    except Exception:
        paragraph.add_run(space_group)
Example #9
0
 def __init__(self, cif: CifContainer, paragraph: Paragraph,
              ref: ReferenceList):
     self.cif = cif
     ccdc_num = gstr(
         self.cif['_database_code_depnum_ccdc_archive']) or '??????'
     sentence1 = "Crystallographic data for the structures reported in this " \
                 "paper have been deposited with the Cambridge Crystallographic Data Centre."
     sentence2 = "CCDC {} contain the supplementary crystallographic data for this paper. " \
                 "These data can be obtained free of charge from The Cambridge Crystallographic Data Centre " \
                 "via www.ccdc.cam.ac.uk/{}structures.".format(ccdc_num, zero_width_space)
     paragraph.add_run(sentence1)
     ref.append(CCDCReference())
     SpaceChar(paragraph).regular()
     paragraph.add_run(sentence2)
Example #10
0
 def __init__(self, cif: CifContainer, paragraph: Paragraph):
     self.cif = cif
     self.temperature = gstr(self.cif['_diffrn_ambient_temperature'])
     self._name = cif.fileobj.name
     method = 'shock-cooled '
     sentence = "The data for {} were collected from a {}single crystal at {}{}K "
     try:
         if float(self.temperature.split('(')[0]) > 200:
             method = ''
     except ValueError:
         method = ''
     self.txt = sentence.format(self.cif.block.name, method,
                                self.temperature, protected_space)
     paragraph.add_run(retranslate_delimiter(self.txt))
Example #11
0
def draw_one_multi_pron(paragraph: Paragraph, letter: str, bold=False):
    run1 = paragraph.add_run(space)
    run1.bold = bold
    run1.font.underline = True
    paragraph.add_run('\n')
    run2 = paragraph.add_run(letter)
    run2.bold = bold
    paragraph.add_run('\n')
    run3 = paragraph.add_run(bracket)
    run3.bold = bold
    paragraph.add_run('\n')
def insert_paragraph_after(inparagraph, text=None, style=None):
    """Insert a new paragraph after the given paragraph."""
    new_p = OxmlElement("w:p")
    inparagraph._p.addnext(new_p)

    new_para = Paragraph(new_p, inparagraph._parent)
    if text != None:
        new_para.style = template.styles[style]
        run = new_para.add_run(text)
    return new_para
Example #13
0
 def __init__(self, cif: CifContainer, paragraph: Paragraph,
              ref: ReferenceList):
     self.cif = cif
     integration = gstr(self.cif['_computing_data_reduction']) or '??'
     abstype = gstr(self.cif['_exptl_absorpt_correction_type']) or '??'
     abs_details = gstr(self.cif['_exptl_absorpt_process_details']) or '??'
     data_reduct_ref = DummyReference()
     absorpt_ref = DummyReference()
     integration_prog = '[unknown integration program]'
     scale_prog = '[unknown program]'
     if 'SAINT' in integration:
         data_reduct_ref, integration_prog = self.add_saint_reference(
             integration)
     if 'CrysAlisPro'.lower() in integration.lower():
         data_reduct_ref, absorpt_ref, integration_prog = self.add_crysalispro_reference(
             integration)
     absdetails = cif['_exptl_absorpt_process_details'].replace('-', ' ')
     if 'SADABS' in absdetails.upper() or 'TWINABS' in absdetails.upper():
         if len(absdetails.split()) > 1:
             version = absdetails.split()[1]
         else:
             version = 'unknown version'
         if 'SADABS' in absdetails:
             scale_prog = 'SADABS'
         else:
             scale_prog = 'TWINABS'
         # absorpt_ref = SAINTReference(scale_prog, version)
         absorpt_ref = SadabsTwinabsReference()
     if 'SORTAV' in absdetails.upper():
         scale_prog = 'SORTAV'
         absorpt_ref = SORTAVReference()
     if 'crysalis' in abs_details.lower():
         scale_prog = 'SCALE3 ABSPACK'
     sentence = 'All data were integrated with {} and {} {} absorption correction using {} was applied.'
     txt = sentence.format(integration_prog, get_inf_article(abstype),
                           abstype, scale_prog)
     paragraph.add_run(retranslate_delimiter(txt))
     ref.append([data_reduct_ref, absorpt_ref])
Example #14
0
    def add_line_break(paragraph: Paragraph, num: int, font_size=None) -> None:
        """Add a number of line breaks into the target `paragraph` object.

        Args:
            paragraph: Target paragraph.
            num: Number of line breaks.
            font_size: Font size of the line break.
        """
        run = paragraph.add_run()
        if font_size:
            run.font.size = font_size

        for i in range(num):
            run.add_break()
Example #15
0
 def __init__(self, cif: CifContainer, paragraph: Paragraph,
              ref: ReferenceList):
     self.cif = cif
     refineref = DummyReference()
     solveref = DummyReference()
     solution_prog = gstr(self.cif['_computing_structure_solution']) or '??'
     solution_method = gstr(
         self.cif['_atom_sites_solution_primary']) or '??'
     if solution_prog.upper().startswith(('SHELXT', 'XT')):
         solveref = SHELXTReference()
     if 'SHELXS' in solution_prog.upper():
         solveref = SHELXSReference()
     if 'SHELXD' in solution_prog.upper():
         solveref = SHELXDReference()
     refined = gstr(self.cif['_computing_structure_refinement']) or '??'
     if refined.upper().startswith(('SHELXL', 'XL')):
         refineref = SHELXLReference()
     if 'OLEX' in refined.upper():
         refineref = Olex2Reference()
     refine_coef = gstr(self.cif['_refine_ls_structure_factor_coef'])
     sentence = r"The structure were solved by {} methods using {} and refined by full-matrix " \
                "least-squares methods against "
     txt = sentence.format(solution_method.strip('\n\r'),
                           solution_prog.split()[0])
     paragraph.add_run(retranslate_delimiter(txt))
     paragraph.add_run('F').font.italic = True
     if refine_coef.lower() == 'fsqd':
         paragraph.add_run('2').font.superscript = True
     paragraph.add_run(' by {}'.format(refined.split()[0]))
     shelxle = None
     if 'shelxle' in refined.lower(
     ) or 'shelxle' in self.cif['_computing_molecular_graphics'].lower():
         paragraph.add_run(' using ShelXle')
         shelxle = ShelXleReference()
     paragraph.add_run('.')
     ref.append([solveref, refineref, shelxle])
Example #16
0
 def __init__(self, cif: CifContainer, paragraph: Paragraph):
     self.cif = cif
     self.difftype = gstr(self.cif['_diffrn_measurement_device_type']) \
                     or '[No measurement device type given]'
     self.device = gstr(self.cif['_diffrn_measurement_device']) \
                   or '[No measurement device given]'
     self.source = gstr(self.cif['_diffrn_source']).strip('\n\r') \
                   or '[No radiation source given]'
     self.monochrom = gstr(self.cif['_diffrn_radiation_monochromator']) \
                      or '[No monochromator type given]'
     if not self.monochrom:
         self.monochrom = '?'
     self.cooling = gstr(self.cif['_olex2_diffrn_ambient_temperature_device']) \
                    or ''
     self.rad_type = gstr(self.cif['_diffrn_radiation_type']) \
                     or '[No radiation type given]'
     radtype = format_radiation(self.rad_type)
     self.wavelen = gstr(self.cif['_diffrn_radiation_wavelength']) \
                    or '[No wavelength given]'
     self.detector_type = ''
     detector_type = gstr(self.cif['_diffrn_detector_type']) \
                     or '[No detector type given]'
     if detector_type:
         self.detector_type = " and a {} detector".format(detector_type)
     sentence1 = "on {0} {1} {2} with {3} {4} using {5} as monochromator{6}. " \
                 "The diffractometer was equipped with {7} {8} low temperature device and used "
     sentence2 = " radiation (λ = {}" + protected_space + "{}). ".format(
         angstrom)
     txt = sentence1.format(get_inf_article(self.difftype),
                            self.difftype, self.device,
                            get_inf_article(self.source), self.source,
                            self.monochrom, self.detector_type,
                            get_inf_article(self.cooling), self.cooling)
     paragraph.add_run(retranslate_delimiter(txt))
     # radiation type e.g. Mo:
     paragraph.add_run(retranslate_delimiter(radtype[0]))
     # K line:
     radrunita = paragraph.add_run(radtype[1])
     radrunita.font.italic = True
     alpha = paragraph.add_run(retranslate_delimiter(radtype[2]))
     alpha.font.italic = True
     alpha.font.subscript = True
     txt2 = sentence2.format(self.wavelen)
     paragraph.add_run(txt2)
 def __render_inline_element(self,
                             p: Paragraph,
                             pq: PyQuery,
                             bold=False,
                             italic=False,
                             sub=False,
                             sup=False,
                             underline=False,
                             font_size=None,
                             strike=False):
     """
     渲染行内元素
     :param p: 段落
     :param pq: 带渲染元素
     :param bold: 加粗
     :param italic: 斜体
     :param sub: 下标
     :param sup: 上标
     :param underline: 下划线
     :param font_size:默认字号 9pt,小五号
     :return:
     """
     for item in pq.contents():
         text = item.text if isinstance(item,
                                        (HtmlElement, _Element)) else item
         if isinstance(item, (HtmlElement, _Element)):
             self._render_element(p,
                                  item,
                                  bold=bold,
                                  italic=italic,
                                  underline=underline,
                                  strike=strike,
                                  sup=sup,
                                  sub=sub,
                                  font_size=font_size)
             continue
         run = p.add_run(text)
         self.__force_simsun(run)
         run.underline = underline
         run.bold = bold
         run.italic = italic
         run.font.superscript = sup
         run.font.subscript = sub
         if font_size:
             run.font.size = font_size
         run.font.strike = strike
Example #18
0
def draw_two_multi_pron(paragraph: Paragraph,
                        letter1: str,
                        letter2: str,
                        bold=False):
    draw_empty(paragraph)

    run6 = paragraph.add_run(letter1)
    run6.bold = bold
    paragraph.add_run(space * 4)
    run8 = paragraph.add_run(letter2)
    run8.bold = bold
    paragraph.add_run('\n')

    draw_empty(paragraph)
def add_table_of_contents(paragraph: Paragraph) -> None:
    """Add a table of contents to the paragraph."""
    run = paragraph.add_run()
    fld_char = OxmlElement("w:fldChar")  # creates a new element
    fld_char.set(qn("w:fldCharType"), "begin")  # sets attribute on element
    instr_text = OxmlElement("w:instrText")
    instr_text.set(qn("xml:space"), "preserve")  # sets attribute on element
    instr_text.text = 'TOC \\o "1-3" \\h \\z \\u'  # change 1-3 depending on heading levels you need

    fld_char2 = OxmlElement("w:fldChar")
    fld_char2.set(qn("w:fldCharType"), "separate")
    fld_char3 = OxmlElement("w:t")
    fld_char3.text = "Right-click to update field."
    fld_char2.append(fld_char3)

    fld_char4 = OxmlElement("w:fldChar")
    fld_char4.set(qn("w:fldCharType"), "end")

    r_element = run._r  # pylint: disable=protected-access
    r_element.append(fld_char)
    r_element.append(instr_text)
    r_element.append(fld_char2)
    r_element.append(fld_char4)
Example #20
0
 def render(self, p: Paragraph, _: docx.document.Document) -> None:
     font = p.add_run(self.text, self.style).font
     font.name = self.font_name
     font.size = Pt(self.font_size)
    def _render_img(self, p: Paragraph, pq: PyQuery):
        """
        渲染图片
        :param p:
        :param pq:
        :return:
        """
        from django.conf import settings
        src = pq.attr('src')
        if src is None:
            return
        width = self._get_pq_style(pq, 'width')
        col1_width = Cm(self.content_side_width)
        if width:
            digit_array = re.findall(r'\d+(?:\.\d+)*', width)
            if len(digit_array):
                width = float(digit_array[0])
                width = min(self.get_cm(int(width * IMG_SIZE_ZOOM_FACTOR)),
                            col1_width)

        if src.startswith("http"):

            src = src[len(settings.MEDIA_URL):]
        elif src.startswith('/media/'):
            src = src[len('/media/'):]
        if src.startswith('/'):
            src = src[1:]

        target_file_name = default_storage.path(src)

        if not default_storage.exists(target_file_name):
            # target_file_name = default_storage.path('tmp/export/word/' + src[src.rindex('/') + 1:])
            if src.startswith('data:image'):
                idx = src.index(',')
                stream = BytesIO(decode_base64(src[idx + 1:].encode('ascii')))
                pic = p.add_run().add_picture(stream, width)
                self.adjust_pic_width(pic, col1_width)
            else:
                try:
                    resp = requests.get(settings.MEDIA_URL + src,
                                        stream=True,
                                        timeout=1)
                    if resp.status_code == 200:
                        default_storage.save(target_file_name, resp.raw)
                        target_file_name = self._convert_svg_to_jpg(
                            target_file_name)
                        pic = p.add_run().add_picture(target_file_name,
                                                      width)  # 设置图片大小
                        self.adjust_pic_width(pic, col1_width)
                    else:
                        p.add_run("MISS IMG")
                        print(f"缺少图片:{src}")
                except RequestException:
                    pass
        else:
            try:
                target_file_name = self._convert_svg_to_jpg(target_file_name)
                pic = p.add_run().add_picture(target_file_name,
                                              width)  # 设置图片大小
                self.adjust_pic_width(pic, col1_width)
            except UnrecognizedImageError:
                print(f"缺少图片:{src}")
                p.add_run("MISS IMG")
    def _render_span(self,
                     p: Paragraph,
                     pq: PyQuery,
                     bold=False,
                     italic=False,
                     strike=False,
                     underline=False,
                     font_size=None,
                     sub=False,
                     sup=False):
        """
        转换span
        change 19.5.3
            公式转换错误,则直接用图片
        :param pq:
        :return:
        """
        try:
            if pq.attr('data-latex'):  # 公式
                omml_str = converter.to_omml(
                    self.mini_trim(pq.attr('data-latex')))
                omml_str = omml_str.replace(
                    '<m:oMath',
                    '<m:oMath xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math"'
                )

                pq(p._element).append(omml_str)
                return
            if pq.has_class("math-tex"):  # 公式
                if pq.attr('data-latex'):
                    omml_str = pq.attr('data-latex')
                else:
                    omml_str = html.unescape(
                        pq.html()) if pq.html() is not None else ''
                omml_str = omml_str.replace(r'\(', '').replace(r'\)', '')
                omml_str = converter.to_omml(self.mini_trim(omml_str))

                omml_str = omml_str.replace(
                    '<m:oMath',
                    '<m:oMath xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math"'
                )

                pq(p._element).append(omml_str)
                return

            # 阿凡题公式
            if pq.has_class('afanti-latex'):
                metadata = AftQuestion(pq).parse_element()
                if metadata.startswith('^') or metadata.startswith('_'):
                    last_ele = pq(p._element).children()[-1]
                    metadata = last_ele.text[-1] + metadata
                    last_ele.text = last_ele.text[:-1]

                omml_str = converter.to_omml(self.mini_trim(metadata))
                omml_str = omml_str.replace(
                    '<m:oMath',
                    '<m:oMath xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math"'
                )

                pq(p._element).append(omml_str)
                return
        except EquationConvertError:
            img = PyQuery('img', pq)
            self._render_img(p, img)
            return

        bold = any([
            bold,
            self._get_pq_style(pq, 'font-weight') == 'bold',
            self._get_pq_style(pq, 'font-weight') == 'bolder'
        ])
        italic = any(
            [italic, self._get_pq_style(pq, 'font-style') == 'italic'])
        strike = any([
            strike,
            self._get_pq_style(pq, 'text-decoration') == 'line-through',
            self._get_pq_style(pq, 'text-decoration-line') == 'line-through'
        ])
        underline = any([
            underline,
            self._get_pq_style(pq, 'text-decoration') == 'underline',
            self._get_pq_style(pq, 'text-decoration-line') == 'underline'
        ])

        if self._get_pq_style(pq, 'font-size'):
            size = self._get_pq_style(pq, 'font-size')
            if size.endswith('px'):
                size = size[:-2]
                size = int(float(size))
                font_size = self.get_pt(size)
            elif size.endswith('pt'):
                size = size[:-2]
                size = float(size)
                font_size = Pt(size)
        # self.__render_inline_element(p, pq, bold=bold, italic=italic, underline=underline, font_size=font_size,
        #                              strike=strike)

        contents = pq.contents()
        for item in contents:
            if isinstance(item, (HtmlElement, _Element)):
                self._render_element(p,
                                     item,
                                     is_root=True,
                                     bold=bold,
                                     italic=italic,
                                     strike=strike,
                                     underline=underline,
                                     font_size=font_size)
                continue
            run = p.add_run(self._clear_text(item))
            self.__force_simsun(run)
            if self._get_pq_style(pq, 'font-name'):
                run.font.name = self._get_pq_style(pq, 'font-name')
            if font_size:
                run.font.size = font_size

            run.underline = underline

            run.bold = bold
            run.italic = italic
            run.font.strike = strike
            run.font.superscript = sup
            run.font.subscript = sub
    def _render_element(self,
                        p: Paragraph,
                        element: str or Element,
                        is_root=False,
                        bold=False,
                        italic=False,
                        strike=False,
                        underline=False,
                        font_size=None,
                        sup=False,
                        sub=False):
        """
        转换html节点到word
        :param element:
        :return:
        """
        if isinstance(element, str):
            run = p.add_run(self._clear_text(element))
            run.bold = bold
            run.italic = italic
            run.font.strike = strike
            run.font.underline = underline
            run.font.subscript = sub
            run.font.superscript = sup
            if font_size:
                run.font.size = font_size
            self.__force_simsun(run)
            return
        pq = PyQuery(element)
        if pq.is_('p'):  # 不支持嵌套p,自动扁平化
            contents = pq.contents()
            align = self._get_pq_style(pq, 'text-align')

            if align == 'center':
                p.alignment = WD_ALIGN_PARAGRAPH.CENTER
            elif align == 'right':
                p.alignment = WD_ALIGN_PARAGRAPH.RIGHT
            else:
                p.alignment = WD_ALIGN_PARAGRAPH.LEFT

            if is_root:
                self._render_children(p, contents)
            else:
                sub_p = p._parent.add_paragraph()

                if align == 'center':
                    sub_p.alignment = WD_ALIGN_PARAGRAPH.CENTER
                elif align == 'right':
                    sub_p.alignment = WD_ALIGN_PARAGRAPH.RIGHT
                else:
                    sub_p.alignment = WD_ALIGN_PARAGRAPH.LEFT

                self._render_children(sub_p, contents)
        elif pq.is_('u'):  # 下划线
            self.__render_inline_element(p,
                                         pq,
                                         underline=True,
                                         bold=bold,
                                         italic=italic,
                                         strike=strike,
                                         font_size=font_size,
                                         sub=sub,
                                         sup=sup)
        elif pq.is_('strong') or pq.is_('b'):  # 加粗
            self.__render_inline_element(p,
                                         pq,
                                         underline=underline,
                                         bold=True,
                                         italic=italic,
                                         strike=strike,
                                         font_size=font_size,
                                         sub=sub,
                                         sup=sup)
        elif pq.is_('i') or pq.is_('em'):  # 斜体
            self.__render_inline_element(p,
                                         pq,
                                         underline=underline,
                                         bold=bold,
                                         italic=True,
                                         strike=strike,
                                         font_size=font_size,
                                         sub=sub,
                                         sup=sup)
        elif pq.is_('sub'):  # 下标
            self.__render_inline_element(p,
                                         pq,
                                         underline=underline,
                                         bold=bold,
                                         italic=italic,
                                         strike=strike,
                                         font_size=font_size,
                                         sub=True,
                                         sup=sup)
        elif pq.is_('sup'):  # 上标
            self.__render_inline_element(p,
                                         pq,
                                         underline=underline,
                                         bold=bold,
                                         italic=italic,
                                         strike=strike,
                                         font_size=font_size,
                                         sub=sub,
                                         sup=True)
        elif pq.is_('var'):  # 老公式
            self.__render_inline_element(p,
                                         pq,
                                         underline=underline,
                                         bold=bold,
                                         italic=True,
                                         strike=strike,
                                         font_size=font_size,
                                         sub=sub,
                                         sup=sup)
        elif pq.is_('span'):
            self._render_span(p,
                              pq,
                              bold=bold,
                              italic=italic,
                              strike=strike,
                              underline=underline,
                              font_size=font_size)
        elif pq.is_("br"):
            p.add_run().add_break()
        elif pq.is_("div"):
            # sub_p = p._parent.add_paragraph()
            p.add_run().add_break()
            self._render_children(p, pq.contents())
        elif pq.is_('ul'):
            self._render_unorder_list(p, pq)
        elif pq.is_('ol'):
            self._render_order_list(p, pq)
        elif pq.is_('table'):
            self._render_table(p, pq)
        elif pq.is_('img'):  # 图片
            self._render_img(p, pq)
        elif element.tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6'):
            sub_p = p._parent.add_paragraph()
            self.__render_inline_element(sub_p,
                                         pq,
                                         bold=True,
                                         font_size=Pt(12),
                                         underline=underline,
                                         italic=True,
                                         strike=strike,
                                         sub=sub,
                                         sup=sup)
        else:
            sub_p = p._parent.add_paragraph()
            contents = pq.contents()
            self._render_children(sub_p, contents)
Example #24
0
 def __init__(self, paragraph: Paragraph, ref: ReferenceList):
     sentence = "This report and the CIF file were generated using FinalCif."
     paragraph.add_run(sentence)
     ref.append(FinalCifReference())
Example #25
0
 def render(self, p: Paragraph, _: docx.document.Document) -> None:
     p.add_run(self.text, self.style).font.color.rgb = self.color
Example #26
0
 def render(self, p: Paragraph, _: docx.document.Document) -> None:
     p.add_run(self.text, self.style).bold = True
Example #27
0
 def __init__(self, cif: CifContainer, paragraph: Paragraph):
     """
     TODO: check if the proposed things are really there.
     """
     self.cif = cif
     n_isotropic = self.number_of_isotropic_atoms()
     number = 'All'
     parameter_type = 'anisotropic'
     if 0 < n_isotropic < self.cif.natoms(without_h=True):
         number = 'Some atoms ({}) were refined using isotropic displacement parameters.' \
                  ' All other'.format(n_isotropic)
     if n_isotropic > 0 and n_isotropic > self.cif.natoms(without_h=True):
         number = 'Most atoms ({}) were refined using isotropic displacement parameters.' \
                  ' All other'.format(n_isotropic)
     if n_isotropic == self.cif.natoms(without_h=True):
         number = 'All'
         parameter_type = 'isotropic'
     sentence1 = "{} non-hydrogen atoms were refined with {} displacement parameters. " \
                 "The hydrogen atoms were refined isotropically on calculated positions using a riding model " \
                 "with their ".format(number, parameter_type)
     sentence2 = " values constrained to 1.5 times the "
     sentence3 = " of their pivot atoms for terminal sp"
     sentence4 = " carbon atoms and 1.2 times for all other carbon atoms."
     paragraph.add_run(sentence1)
     paragraph.add_run('U').font.italic = True
     paragraph.add_run('iso').font.subscript = True
     paragraph.add_run(sentence2)
     paragraph.add_run('U').font.italic = True
     paragraph.add_run('eq').font.subscript = True
     paragraph.add_run(sentence3)
     paragraph.add_run('3').font.superscript = True
     paragraph.add_run(sentence4)
Example #28
0
def convert(source_path, out_path, short_name, cite, year):

    ### TODO:
    # whitelist allowed tags
    # replace paragraph with .5 inch indented first line with a tab

    ### known changes:
    # tighter character spacing?
    # footnote numbers bold?
    # no space after footnote number?

    ### LOAD DATA ###

    # load docs
    source_doc, source_pq = load_doc(source_path)
    template_doc, template_pq = load_doc(template_path)

    # load footnotes
    footnotes_part, footnotes_el, footnotes_pq = load_part(
        source_doc.part.part_related_by(RT.FOOTNOTES))
    template_footnotes_part, template_footnotes_el, template_footnotes_pq = load_part(
        template_doc.part.part_related_by(RT.FOOTNOTES))

    ### COPY STYLES FROM TEMPLATE ###

    # copy styles, settings, and section formatting from template doc
    replace_element_contents(template_doc.styles._element,
                             source_doc.styles._element)
    replace_element_contents(template_doc.settings._element,
                             source_doc.settings._element)
    replace_element_contents(
        template_pq('w|sectPr')[0],
        source_pq('w|sectPr')[0])
    replace_element_contents(
        template_footnotes_pq('w|footnote').children()[0],
        footnotes_pq('w|footnote').children()
        [0])  # first footnote is the footnote separator

    ### HEADERS ###

    # delete existing header parts and copy in new header parts
    for rId, rel in list(source_doc.part.rels.items()):
        if rel.reltype == RT.HEADER:
            del source_doc.part.rels[rId]
    update_refs = {}
    header_parts = []
    for rId, rel in template_doc.part.rels.items():
        if rel.reltype == RT.HEADER:
            new_id = source_doc.part.rels._next_rId
            update_refs[rId] = new_id
            header_parts.append(load_part(rel.target_part))
            source_doc.part.rels.add_relationship(RT.HEADER, rel.target_part,
                                                  new_id)
            source_doc.part.package.parts.append(rel.target_part)

    # update header references
    for header_ref in source_pq('w|headerReference'):
        header_ref.attrib[qn('r:id')] = update_refs[header_ref.attrib[qn(
            'r:id')]]

    # fill in header values
    for header_part, header_el, header_pq in header_parts:
        header_pq("w|rStyle[w|val='HeaderYear']").closest('w|r')('w|t').text(
            year)
        header_pq("w|rStyle[w|val='HeaderCitation']").closest('w|r')(
            'w|t').text(cite)
        short_name_par = Paragraph(
            header_pq("w|pStyle[w|val='HeaderCaseName']").closest('w|p')[0],
            None)
        short_name_par.clear()

        # italicize v. in party name
        if ' v. ' in short_name:
            party_a, party_b = short_name.split(' v. ', 2)
            short_name_par.add_run(party_a)
            vs_run = short_name_par.add_run(' v. ')
            vs_run.italic = True
            short_name_par.add_run(party_b)
        else:
            short_name_par.add_run(short_name)

    # set starting page number
    starting_page_number = cite.rsplit(' ', 1)[-1]
    source_pq('w|sectPr').append(
        make_el(
            source_pq('w|sectPr')[0], 'w:pgNumType',
            {'w:start': starting_page_number}))

    ### TYPOGRAPHY ###

    # apply typography changes to body text and footnotes, adjusting variables that are different
    for query, allowed_styles, section_name, blockquote_style_name in (
        (source_pq, ('FootnoteReference', ), 'body',
         'Blockquote'), (footnotes_pq, ('FootnoteText', 'FootnoteSeparator',
                                        'FootnoteReference'), 'footnote',
                         'FootnoteBlockquote')):

        # clear existing styles
        ignore_removed_styles = ('NormalWeb', )
        for style_tag in query('w|pStyle,w|rStyle'):
            style_name = style_tag.attrib.get(qn('w:val'))
            if style_name not in allowed_styles:
                if style_name not in ignore_removed_styles:
                    print("Warning: removing unrecognized %s style %s." %
                          (section_name, style_name))
                remove_el(style_tag)

        # mark block quotes
        for par in query('w|ind[w|left="720"]'):
            if qn('w:hanging') not in par.attrib:
                par = pq(par).closest('w|p')[0]
                par.style = blockquote_style_name

        # remove fonts and sizes
        remove_tags = ('sz', 'szCs', 'rFonts', 'ind', 'spacing', 'proofErr',
                       'bookmarkStart', 'bookmarkEnd', 'color[w|val="000000"]',
                       'lastRenderedPageBreak')
        for tag in remove_tags:
            query('w|' + tag).remove()

        # underline to italic
        for el in query('w|u'):
            if el.attrib.get(qn('w:val')) == 'double':
                el.tag = qn('w:smallCaps')
            else:
                el.tag = qn('w:i')
            el.attrib.clear()

        # combine consecutive runs with identical formatting
        query('w|t').attr(
            qn('xml:space'),
            'preserve')  # add preserve to all t blocks for uniformity
        skip = 0
        for run in query('w|r'):

            # skip runs that have already been appended to previous run and detached
            if skip:
                skip -= 1
                continue

            blank_r1 = blank_run(run)
            while True:
                r2 = pq(run).next()
                if not r2:
                    break
                r2 = r2[0]
                if r2.tag != run.tag or etree.tostring(
                        blank_r1) != etree.tostring(blank_run(r2)):
                    break
                run.text += r2.text
                remove_el(r2)
                skip += 1

        # text replacements
        for t in query('w|t'):
            text = t.text
            # fix dashes
            text = text.replace(" -- ", " — ")
            # remove double spaces
            text = re.sub(' +', ' ', text)
            # fix quotes
            for straight_quote, left_quote, right_quote in (('"', '“', '”'),
                                                            ("'", '‘', '’')):
                if straight_quote not in text:
                    continue
                # right smart quotes
                text = re.sub(r'([a-zA-Z0-9.,?!;:\'\"])%s' % straight_quote,
                              r'\1%s' % right_quote, text)
                text = re.sub(r'%s ' % straight_quote, r'%s ' % right_quote,
                              text)
                # remaining are left smart quotes
                text = text.replace(straight_quote, left_quote)
            t.text = text

    ### FOOTNOTES ###

    footnote_tab = deepcopy(
        template_footnotes_pq('w|footnote:not([w|type]) w|r')
        [0])  # first run in template footnotes is a tab
    for footnote in footnotes_pq('w|footnote:not([w|type])'):

        # remove extra tabs from footnotes, add single tab
        for run in pq(footnote, namespaces=nsmap)('w|r'):
            if pq(run, namespaces=nsmap)('w|tab'):
                remove_el(run)
            else:
                pq(run).before(deepcopy(footnote_tab))
                break

        # make sure footnotes have FootnoteText style
        for par in pq(footnote, namespaces=nsmap)('w|p'):
            if not par.style:
                par.style = 'FootnoteText'

    ### CAPTION ###

    def skip_blanks(paragraphs, par_num):
        par_num += 1
        while not has_text(paragraphs[par_num]):
            par_num += 1
        return par_num

    # delete first four paragraphs
    pq(source_pq('w|p')[:4]).remove()

    paragraphs = source_pq('w|p')

    # format first paragraph
    par_num = 0
    paragraphs[par_num].style = 'CaseName'

    # process the case name so all-caps becomes small-caps:
    for run in pq(paragraphs[par_num])('w|r'):
        parts = re.split(r'([A-Z][A-Z]+)', run.text)
        if len(parts) > 1:
            new_runs = split_run(run, parts)
            for new_run in new_runs[1::2]:
                # every other part will be all-caps, so should become small-caps
                Run(new_run, None).font.small_caps = True
                new_run.text = new_run.text.title()

    par_num = skip_blanks(paragraphs, par_num)
    paragraphs[par_num].style = 'Dates'
    par_num = skip_blanks(paragraphs, par_num)
    paragraphs[par_num].style = 'Judges'
    par_num = skip_blanks(paragraphs, par_num)
    paragraphs[par_num].style = 'Categories'
    par_num = skip_blanks(paragraphs, par_num)

    while has_text(paragraphs[par_num]):
        paragraphs[par_num].style = 'Headnote'
        par_num += 2

    # extra space for last headnote
    Paragraph(paragraphs[par_num - 2],
              None).paragraph_format.space_after = Pt(12)

    par_num = skip_blanks(paragraphs, par_num)
    while has_text(paragraphs[par_num]):
        paragraphs[par_num].style = 'History'
        par_num += 2

    par_num = skip_blanks(paragraphs, par_num)
    while has_text(paragraphs[par_num]):
        paragraphs[par_num].style = 'Appearance'
        par_num += 1

    # mark author name -- first sentence of first paragraph of case text
    par_num = skip_blanks(paragraphs, par_num)
    first_paragraph = Paragraph(paragraphs[par_num], source_doc._body)

    try:
        first_run = next(r for r in first_paragraph.runs if r.text.strip())
        first_run, second_run = split_run(first_run._element,
                                          first_run.text.split('.', 1))
        first_run.text = first_run.text.title() + "."
        Run(first_run, first_paragraph).style = "Author"
    except Exception as e:
        print("Warning: failed to detect author name. Searched this text: %s" %
              first_paragraph.text)
        raise

    # remove blank paragraphs
    # this has to come AFTER caption processing so we can tell sections apart
    for query in (source_pq, footnotes_pq('w|footnote:not([w|type])')):
        for p in query('w|p'):
            if not has_text(p):
                remove_el(p)

    ### HEADNOTE PAGE RANGES ###

    # replace highlighted headnote markers with bookmarks
    bookmarks = []
    for i, highlight_run in enumerate(
            source_pq("w|highlight[w|val='yellow']")):
        highlight_run = pq(highlight_run).closest('w|r')
        bookmark_name = "Headnote%s%s" % ("End" if i % 2 else "Start",
                                          int(i / 2))
        highlight_run.after(
            pq([
                make_el(highlight_run[0], "w:bookmarkStart", {
                    "w:id": str(i),
                    "w:name": bookmark_name
                }),
                make_el(highlight_run[0], "w:bookmarkEnd", {"w:id": str(i)})
            ]))
        remove_el(highlight_run[0])
        bookmarks.append(bookmark_name)

    # replace headnote page number references with bookmark shortcodes
    reference_template = """
        <w:fldSimple w:instr=" PAGEREF {bookmark_start} ">
            <w:r><w:rPr><w:noProof/></w:rPr><w:t>PRINT</w:t></w:r>
        </w:fldSimple>
        <w:r><w:fldChar w:fldCharType="begin"/></w:r>
        <w:r><w:instrText xml:space="preserve"> IF  </w:instrText></w:r>
        <w:r><w:fldChar w:fldCharType="begin"/></w:r>
        <w:r><w:instrText xml:space="preserve"> PAGEREF {bookmark_start} </w:instrText></w:r>
        <w:r><w:fldChar w:fldCharType="separate"/></w:r>
        <w:r><w:rPr><w:noProof/></w:rPr><w:instrText>PRINT</w:instrText></w:r>
        <w:r><w:fldChar w:fldCharType="end"/></w:r>
        <w:r><w:instrText xml:space="preserve"> = </w:instrText></w:r>
        <w:r><w:fldChar w:fldCharType="begin"/></w:r>
        <w:r><w:instrText xml:space="preserve"> PAGEREF {bookmark_end} </w:instrText></w:r>
        <w:r><w:fldChar w:fldCharType="separate"/></w:r>
        <w:r><w:rPr><w:noProof/></w:rPr><w:instrText>PRINT</w:instrText></w:r>
        <w:r><w:fldChar w:fldCharType="end"/></w:r>
        <w:r><w:instrText xml:space="preserve"> "" "-</w:instrText></w:r>
        <w:r><w:fldChar w:fldCharType="begin"/></w:r>
        <w:r><w:instrText xml:space="preserve"> PAGEREF {bookmark_end} </w:instrText></w:r>
        <w:r><w:fldChar w:fldCharType="separate"/></w:r>
        <w:r><w:rPr><w:noProof/></w:rPr><w:instrText>PRINT</w:instrText></w:r>
        <w:r><w:fldChar w:fldCharType="end"/></w:r>
        <w:r><w:instrText>"</w:instrText></w:r>
        <w:r><w:fldChar w:fldCharType="end"/></w:r>
    """
    for headnote in source_pq('w|pStyle[w|val="Headnote"]'):
        for run in pq(headnote).closest('w|p')('w|r'):
            run = pq(run)
            parts = re.split(r'\[.*?\]', run('w|t').text())
            if len(parts) > 1:
                new_els = []
                for i, part in enumerate(parts):
                    if i != 0:
                        new_els.extend(
                            parse_xml_fragment(
                                run[0],
                                reference_template.format(
                                    bookmark_start=bookmarks.pop(0),
                                    bookmark_end=bookmarks.pop(0))))
                    new_run = deepcopy(run[0])
                    pq(new_run)('w|t').text(("]" if i != 0 else "") + part + (
                        "[" if i != len(parts) - 1 else ""))
                    new_els.append(new_run)
                run.after(pq(new_els))
                remove_el(run[0])

    ### OUTPUT ###

    # write footnotes and headers
    save_part(footnotes_el, footnotes_part)
    for header_part, header_el, header_pq in header_parts:
        save_part(header_el, header_part)

    # save output
    #save_xml(out_path, source_doc)
    source_doc.save(out_path)
Example #29
0
 def render(self, p: Paragraph, _: docx.document.Document) -> None:
     if self.ref:
         p.add_run(self.ref(self.key))
Example #30
0
 def add_reference(self, p: Paragraph):
     if self.authors:
         p.add_run(self.authors)
         p.add_run(', ')
     if self.journal:
         p.add_run(self.journal).italic = True
         if not self.journal.endswith('.'):
             p.add_run(', ')
         else:
             p.add_run(' ')
     if self.year:
         p.add_run(self.year).bold = True
         p.add_run(', ')
     if self.volume:
         p.add_run(self.volume).italic = True
         p.add_run(', ')
     if self.pages:
         p.add_run(self.pages)
         if self.doi:
             p.add_run(', ')
     if self.doi:
         p.add_run(self.doi)
     if any([self.journal, self.pages, self.year, self.volume, self.doi]):
         p.add_run('.')
Example #31
0
 def text_set_fixture(self):
     paragraph = Paragraph(element('w:p'), None)
     paragraph.add_run('must not appear in result')
     new_text_value = 'foo\tbar\rbaz\n'
     expected_text_value = 'foo\tbar\nbaz\n'
     return paragraph, new_text_value, expected_text_value