Beispiel #1
0
 def ensurenl(i, e):
     ''' Ensure element ends with a newline, if not already present.
         Passed index in parent and element. '''
     if isinstance(e, sfm.Element):
         if len(e):
             ensurenl(len(e) - 1, e[-1])
         else:
             e.append(sfm.Text("\n", e.pos, e.parent))
             return True
     elif not e.endswith("\n"):
         e.parent.insert(i + 1, sfm.Text("\n", e.pos, e.parent))
         return True
     return False
Beispiel #2
0
 def _insertVerse(self, job, parent, offset, e):
     job.parent = parent
     if offset != 0:
         last = parent[offset - (1 if offset > 0 else 0)]
         if isinstance(last, sfm.Text) and last.data[-1] in " \n":
             newstr = last.data.rstrip()
             lc = last.data[len(newstr):]
             last.data = newstr
         else:
             lc = None
     else:
         lc = None
     if offset >= 0:
         parent.insert(offset, job)
         offset += 1
     else:
         parent.append(job)
     if lc is not None:
         t = sfm.Text(lc, pos=e.pos, parent=parent)
         if offset > 0:
             parent.insert(offset, t)
             offset += 1
         else:
             parent.append(t)
     return offset
Beispiel #3
0
 def versesToEnd(self):
     it = self.iiterel(0, self.doc[0])
     currjob = None
     lastpara = None
     thispara = None
     for i, e in it:
         if not isinstance(e, sfm.Element):
             continue
         etype = e.meta.get('texttype', '').lower()
         style = e.meta.get('styletype', '').lower()
         if style == 'paragraph' and etype == 'versetext':
             # if e.parent is not None and e.parent.name == 'p':
             # print(e)
             thispara = e
             offset = 0
         elif style == 'paragraph':
             if currjob is not None:
                 self._insertVerse(currjob, lastpara, -1, e)
                 currjob = None
             thispara = None
             offset = 0
         if e.name == 'v':
             if currjob is not None:
                 if thispara is not None and i > 1:
                     offset = self._insertVerse(currjob, thispara,
                                                i + offset, e) - i
                 else:
                     self._insertVerse(currjob, lastpara, -1, e)
             currjob = sfm.Element('vp',
                                   e.pos,
                                   parent=e.parent,
                                   meta=self.sheets['vp'])
             currjob.append(sfm.Text(e.args[0], pos=e.pos, parent=currjob))
         if thispara is not None and thispara is not e:
             lastpara = thispara
     if currjob is not None:
         lastpara.append(currjob)
         lastpara.append(sfm.Text("\n", parent=lastpara))
Beispiel #4
0
 def fn(e):
     s = str(e)
     processed = False
     for r in regs:
         if r[0] is not None and not r[0](e.parent):
             continue
         ns = r[1].sub(r[2], s)
         if ns != s:
             processed = True
             s = ns
     if processed:
         return sfm.Text(s, e.pos, e.parent)
     else:
         return e
Beispiel #5
0
 def _g(a, e):
     if isinstance(e, sfm.Text):
         if pred(e):
             a.append(sfm.Text(e, e.pos, a or None))
         return a
     if e.name in removes:
         return a
     e_ = sfm.Element(e.name,
                      e.pos,
                      e.args,
                      parent=a or None,
                      meta=e.meta)
     reduce(_g, e, e_)
     if pred(e):
         a.append(e_)
     elif len(e_):
         a.extend(e_[:])
     return a
Beispiel #6
0
        def fn(e):
            if not e.parent or not isScriptureText(e.parent):
                return e
            done = False
            lastspace = id(e.parent[0]) != id(e)

            res = []
            for (islet, c) in groupby(str(e),
                                      key=lambda x: get_ucd(ord(x), "gc") in
                                      takslc_cats and x != "|"):
                chars = "".join(c)
                # print("{} = {}".format(chars, islet))
                if not len(chars):
                    continue
                if islet:
                    res.append(("" if lastspace else inschar) +
                               inschar.join(chars))
                    done = True
                else:
                    res.append(chars)
                lastspace = get_ucd(ord(chars[-1]), "InSC") in ("Invisible_Stacker", "Virama") \
                            or get_ucd(ord(chars[-1]), "gc") in ("Cf", "WS") \
                            or chars[-1] in (r"\|")
            return sfm.Text("".join(res), e.pos, e.parent) if done else e
Beispiel #7
0
 def new_element(self, e, name, content):
     return sfm.Element(name, e.pos, [], e.parent, content=[sfm.Text("\n", e.pos)] \
                                                     + content, meta=self.sheets[name])
Beispiel #8
0
    def parse_element(self, e):
        if isinstance(e, sfm.Text):
            t = self.localise_re.sub(self.localref, str(e))
            if t != e:
                return [sfm.Text(t, e.pos, e.parent)]
            return [e]
        elif e.name == "ref" or e.name == "refnp":
            res = []
            isidparent = e.parent is None or e.parent.name == "id"
            reps = []
            curr = e.parent.index(e)
            while curr + 1 < len(e.parent):
                rep = e.parent[curr + 1]
                if not isinstance(rep, sfm.Element) or rep.name != "rep":
                    break
                # parse rep
                m = re.match("^\s*(.*?)\s*=>\s*(.*?)\s*$", str(rep[0]), re.M)
                if m:
                    reps.append(
                        (None,
                         re.compile(r"\b" +
                                    m.group(1).replace("...", "[^\n\r]+") +
                                    "(\\b|(?=\\s)|$)"), m.group(2)))
                del e.parent[curr + 1]
            for r in RefList.fromStr(str(e[0]), context=self.usfms.booknames):
                p = self.get_passage(r,
                                     removes=self.removes,
                                     strippara=e.name == "refnp")
                if e.name == "ref":
                    for i, t in enumerate(p):
                        if isinstance(t, sfm.Element) and t.meta.get(
                                'StyleType', '').lower() == 'paragraph':
                            if i:
                                p[0:i] = [
                                    self.new_element(
                                        e, "p1" if isidparent else "p", p[0:i])
                                ]
                            break
                    else:

                        p = [
                            self.new_element(e, "p1" if isidparent else "p", p)
                        ]
                res.extend(p)
            if len(reps):
                res = self.doc.transform_text(*reps, doc=res)
            return res
        elif e.name == 'inc':
            s = "".join(map(str, e)).strip()
            for c in s:
                if c == "-":
                    self.removes = set(sum(exclusionmap.values(), []))
                else:
                    self.removes.difference_update(exclusionmap.get(c, []))
        elif e.name == 'mod':
            mod = Module(e[0].strip(), self.usfms)
            return mod.parse()
        else:
            cs = sum(map(self.parse_element, e), [])
            e[:] = cs
        return [e]
Beispiel #9
0
 def iterfn(el, top=False):
     if isinstance(el, sfm.Element):
         lastv = None
         predels = []
         for c in el[:]:
             if not isinstance(c, sfm.Element) or c.name != "v":
                 if iterfn(c):  # False if deletable ~> empty
                     if len(predels):
                         if isinstance(predels[-1], sfm.Element) \
                                          and predels[-1].name == "p" \
                                          and len(predels[-1]) == 1 \
                                          and str(predels[-1][0]).strip() == "...":
                             predels.pop(-1)
                         for p in predels:
                             if isinstance(p, sfm.Element):
                                 p.parent.remove(p)
                     lastv = None
                     predels = []
                 else:
                     predels.append(c)
             elif isinstance(c, sfm.Element) and c.name == "v":
                 if lastv is not None:
                     for p in predels:
                         p.parent.remove(p)
                     predels = []
                     if ellipsis:
                         i = lastv.parent.index(lastv)
                         ell = sfm.Text("...", parent=lastv.parent)
                         lastv.parent.insert(i, ell)
                         predels.append(ell)
                         lastv.parent.pop(i + 1)
                     else:
                         lastv.parent.remove(lastv)
                 lastv = c
         if lastv is not None:
             lastv.parent.remove(lastv)
         res = len(el) != 0
         nonemptypredels = [
             p for p in predels if isinstance(p, sfm.Element)
             or not re.match(r"^\s*$", str(p))
         ]
         ell = None
         if len(nonemptypredels):
             if ellipsis:
                 p = nonemptypredels[0]
                 i = p.parent.index(p)
                 st = p.parent.meta.get("styletype", "")
                 if st is None or st.lower() == "paragraph":
                     ell = sfm.Text("...", parent=p.parent)
                 else:
                     ell = sfm.Element('p',
                                       parent=p.parent,
                                       meta=self.sheets['p'])
                     ell.append(sfm.Text("...\n", parent=ell))
                 p.parent.insert(i, ell)
         for p in predels:
             p.parent.remove(p)
         predels = [ell] if ell is not None else []
         st = el.meta.get("styletype", "")
         if (st is None or st.lower()
                 == "paragraph") and len(el) == len(predels):
             # el.parent.remove(el)
             return True if st is None else False  # To handle empty markers like \pagebreak
     elif re.match(r"^\s*$", str(el)) or re.match(
             r"\.{3}\s*$", str(el)):
         return False
     return True