コード例 #1
0
ファイル: dict_cc.py プロジェクト: doozan/pyglossary
    def _iterOneDirection(self, column1, column2):
        from itertools import groupby
        from lxml import etree as ET
        from io import BytesIO
        from pyglossary.html_utils import unescape_unicode

        glos = self._glos
        for headword, groupsOrig in groupby(
                self.iterRows(column1, column2),
                key=lambda row: row[0],
        ):
            headword = html.unescape(headword)
            groups = [(term2, entry_type)
                      for _, term2, entry_type in groupsOrig]
            f = BytesIO()
            with ET.htmlfile(f) as hf:
                with hf.element("div"):
                    with glos.titleElement(hf, headword):
                        try:
                            hf.write(headword)
                        except Exception as e:
                            log.error(f"error in writing {headword!r}, {e}")
                            hf.write(repr(headword))
                    if len(groups) == 1:
                        hf.write(ET.Element("br"))
                    self.makeList(
                        hf,
                        groups,
                        self.writeSense,
                    )
            defi = unescape_unicode(f.getvalue().decode("utf-8"))
            yield self._glos.newEntry(headword, defi, defiFormat="h")
コード例 #2
0
ファイル: conv.py プロジェクト: xiaoke912/pyglossary
def render_article(trad, simp, pinyin, eng):
    from lxml import etree as ET
    from io import BytesIO

    # pinyin_tones = [convert(syl) for syl in pinyin.split()]
    pinyin_list = []
    tones = []
    for syllable in pinyin.split():
        nice_syllable, tone = convert(syllable)
        pinyin_list.append(nice_syllable)
        tones.append(tone)

    f = BytesIO()
    with ET.htmlfile(f, encoding="utf-8") as hf:
        with hf.element("div", style="border: 1px solid; padding: 5px"):
            with hf.element("div"):
                with hf.element("big"):
                    colorize(hf, simp, tones)
                if trad != simp:
                    hf.write("\xa0/\xa0")  # "\xa0" --> " " == " "
                    colorize(hf, trad, tones)
                hf.write(ET.Element("br"))
                with hf.element("big"):
                    colorize(hf, pinyin_list, tones)

            with hf.element("div"):
                with hf.element("ul"):
                    for defn in eng:
                        with hf.element("li"):
                            hf.write(defn)

    article = f.getvalue().decode("utf-8")
    return article
コード例 #3
0
    def getEntryByElem(self, entry: "lxml.etree.Element") -> "BaseEntry":
        from lxml import etree as ET
        keywords = []
        f = BytesIO()

        if self._discover:
            for elem in entry.iter():
                if elem.tag not in self.supportedTags:
                    self._discoveredTags[elem.tag] = elem

        def br():
            return ET.Element("br")

        with ET.htmlfile(f) as hf:
            with hf.element("div"):
                for form in entry.findall("form/orth", self.ns):
                    keywords.append(form.text)

                if self._keywords_header:
                    for keyword in keywords:
                        with hf.element("b"):
                            hf.write(keyword)
                        hf.write(br())

                # TODO: "form/usg"
                # <usg type="geo">Brit</usg>
                # <usg type="geo">US</usg>
                # <usg type="hint">...</usg>

                gramGrpList = entry.findall("gramGrp", self.ns)
                if gramGrpList:
                    for gramGrp in gramGrpList:
                        parts = []
                        for child in gramGrp.iterchildren():
                            text = self.normalizeGramGrpChild(child)
                            if text:
                                parts.append(text)
                        with hf.element("i"):
                            hf.write(", ".join(parts))
                        hf.write(br())

                pronList = entry.findall("form/pron", self.ns)
                if pronList:
                    for i, pron in enumerate(pronList):
                        if i > 0:
                            hf.write(", ")
                        with hf.element("font", color="green"):
                            hf.write(f"/{pron.text}/")
                    hf.write(br())
                    hf.write("\n")

                self.make_list(
                    hf,
                    entry.findall("sense", self.ns),
                    self.process_sense,
                )

        defi = f.getvalue().decode("utf-8")
        defi = unescape_unicode(defi)
        return self._glos.newEntry(keywords, defi, defiFormat="h")
コード例 #4
0
	def _iterOneDirection(self, column1, column2):
		from itertools import groupby
		from lxml import etree as ET
		from io import BytesIO

		glos = self._glos
		for headword, groupsOrig in groupby(
			self.iterRows(column1, column2),
			key=lambda row: row[0],
		):
			headword = html.unescape(headword)
			groups = [
				(term2, entry_type)
				for _, term2, entry_type in groupsOrig
			]
			f = BytesIO()
			gender, headword = self.parseGender(headword)
			with ET.htmlfile(f, encoding="utf-8") as hf:
				with hf.element("div"):
					if gender:
						with hf.element("i"):
							hf.write(gender)
						hf.write(ET.Element("br"))
					self.makeList(
						hf,
						groups,
						self.writeSense,
					)
			defi = f.getvalue().decode("utf-8")
			yield self._glos.newEntry(headword, defi, defiFormat="h")
コード例 #5
0
	def transform(self, article: "lxml.etree.Element") -> str:
		from lxml import etree as ET
		encoding = self._encoding
		f = BytesIO()
		with ET.htmlfile(f, encoding="utf-8") as hf:
			with hf.element("div", **{"class": "article"}):
				self.writeChildrenOf(hf, article)

		text = f.getvalue().decode("utf-8")
		text = text.replace("<br>", "<br/>")  # for compatibility
		return text
コード例 #6
0
    def getEntryByElem(self, entry: "lxml.etree.Element") -> "BaseEntry":
        from lxml import etree as ET
        keywords = []
        f = BytesIO()

        if self._discover:
            for elem in entry.iter():
                if elem.tag not in self.supportedTags:
                    self._discoveredTags[elem.tag] = elem

        with ET.htmlfile(f) as hf:
            with hf.element("div"):
                for form in entry.findall("form/orth", self.ns):
                    keywords.append(form.text)
                    # TODO: if there is only one keyword, we should skip this
                    with hf.element("b"):
                        hf.write(form.text)
                hf.write(ET.Element("br"))

                # TODO: "form/usg"
                # <usg type="geo">Brit</usg>
                # <usg type="geo">US</usg>
                # <usg type="hint">...</usg>

                gramGrpList = entry.findall("gramGrp", self.ns)
                if gramGrpList:
                    for gramGrp in gramGrpList:
                        parts = []
                        for child in gramGrp.iterchildren():
                            text = self.normalizeGramGrpChild(child)
                            if text:
                                parts.append(text)
                        with hf.element("i"):
                            hf.write(", ".join(parts))
                        hf.write(ET.Element("br"))

                pronList = entry.findall("form/pron", self.ns)
                if pronList:
                    hf.write(", ".join(f'<font color="green">/{p.text}/</font>'
                                       for p in pronList))
                    hf.write(ET.Element("br"))
                    hf.write("\n")

                self.make_list(
                    hf,
                    entry.findall("sense", self.ns),
                    self.process_sense,
                )

        defi = f.getvalue().decode("utf-8")
        defi = html.unescape(defi)
        return self._glos.newEntry(keywords, defi)
コード例 #7
0
ファイル: cc_kedict.py プロジェクト: qnga/pyglossary
    def _createEntry(self, yamlBlock: str):
        from lxml import etree as ET
        from yaml import load
        try:
            from yaml import CLoader as Loader
        except ImportError:
            from yaml import Loader

        edict = load(yamlBlock, Loader=Loader)
        word = edict.get("word")
        if not word:
            log.error(f"no word in {edict}")
            return

        f = BytesIO()

        with ET.htmlfile(f) as hf:
            with hf.element("div"):
                self._processEntry(hf, edict)

        defi = f.getvalue().decode("utf-8")
        return word, defi
コード例 #8
0
	def getEntryByElem(self, entry: "lxml.etree.Element") -> "BaseEntry":
		from lxml import etree as ET
		keywords = []
		f = BytesIO()
		with ET.htmlfile(f) as hf:
			with hf.element("div"):
				for form in entry.findall("form/orth", self.ns):
					keywords.append(form.text)
					# TODO: if there is only one keyword, we should skip this
					with hf.element("b"):
						hf.write(form.text)
				hf.write(ET.Element("br"))
				# TODO: "gramGrp/gen" is gender: m|masc|f|fem|n|neut|m;f|adj
				posList = entry.findall("gramGrp/pos", self.ns)
				if posList:
					for pos in posList:
						with hf.element("i"):
							hf.write(pos.text)
						hf.write(" ")
					hf.write(ET.Element("br"))
				pronList = entry.findall("form/pron", self.ns)
				if pronList:
					hf.write(", ".join(
						f'<font color="green">/{p.text}/</font>'
						for p in pronList
					))
					hf.write(ET.Element("br"))
					hf.write("\n")

				self.make_list(
					hf,
					entry.findall("sense", self.ns),
					self.process_sense,
				)

		defi = f.getvalue().decode("utf-8")
		defi = html.unescape(defi)
		return self._glos.newEntry(keywords, defi)
コード例 #9
0
    def getEntryByElem(self, entry: "lxml.etree.Element") -> "BaseEntry":
        from lxml import etree as ET
        keywords = []
        f = BytesIO()
        with ET.htmlfile(f) as hf:
            with hf.element("div"):
                for form in entry.findall("form/orth", self.ns):
                    keywords.append(form.text)
                    with hf.element("b"):
                        hf.write(form.text)
                hf.write(" ")
                for pos in entry.findall("gramGrp/pos", self.ns):
                    with hf.element("i"):
                        hf.write(pos.text)
                hf.write(ET.Element("br"))
                hf.write("\n")

                self.make_list(
                    hf,
                    entry.findall("sense", self.ns),
                    self.process_sense,
                )

        return self._glos.newEntry(keywords, f.getvalue().decode("utf-8"))
コード例 #10
0
 def docfile(self, *args, **kwargs):
     return etree.htmlfile(*args, **kwargs)
コード例 #11
0
ファイル: _base.py プロジェクト: plq/spyne
 def docfile(self, *args, **kwargs):
     logger.debug("Starting file with %r %r", args, kwargs)
     return etree.htmlfile(*args, **kwargs)
コード例 #12
0
ファイル: _base.py プロジェクト: yanni21/spyne
 def docfile(self, *args, **kwargs):
     logger.debug("Starting file with %r %r", args, kwargs)
     return etree.htmlfile(*args, **kwargs)
コード例 #13
0
ファイル: freedict.py プロジェクト: maxim-saplin/pyglossary
    def getEntryByElem(self, entry: "lxml.etree.Element") -> "BaseEntry":
        from lxml import etree as ET
        glos = self._glos
        keywords = []
        f = BytesIO()
        pron_color = self._pron_color

        if self._discover:
            for elem in entry.iter():
                if elem.tag not in self.supportedTags:
                    self._discoveredTags[elem.tag] = elem

        def br():
            return ET.Element("br")

        inflectedKeywords = []

        for form in entry.findall("form", self.ns):
            inflected = form.get("type") == "infl"
            for orth in form.findall("orth", self.ns):
                if not orth.text:
                    continue
                if inflected:
                    inflectedKeywords.append(orth.text)
                else:
                    keywords.append(orth.text)

        keywords += inflectedKeywords

        pronList = [
            pron.text.strip('/')
            for pron in entry.findall("form/pron", self.ns) if pron.text
        ]
        senseList = entry.findall("sense", self.ns)

        with ET.htmlfile(f, encoding="utf-8") as hf:
            with hf.element("div"):
                if self._word_title:
                    for keyword in keywords:
                        with glos.titleElement(hf, keyword):
                            hf.write(keyword)
                        hf.write(br())

                # TODO: "form/usg"
                # <usg type="geo">Brit</usg>
                # <usg type="geo">US</usg>
                # <usg type="hint">...</usg>

                if pronList:
                    for i, pron in enumerate(pronList):
                        if i > 0:
                            hf.write(", ")
                        hf.write("/")
                        with hf.element("font", color=pron_color):
                            hf.write(f"{pron}")
                        hf.write("/")
                    hf.write(br())
                    hf.write("\n")

                self.writeGramGroups(hf, entry.findall("gramGrp", self.ns))

                self.writeSenseList(hf, senseList)

        defi = f.getvalue().decode("utf-8")
        # defi = defi.replace("\xa0", "&nbsp;")  # do we need to do this?
        return self._glos.newEntry(
            keywords,
            defi,
            defiFormat="h",
            byteProgress=(self._file.tell(), self._fileSize),
        )
コード例 #14
0
ファイル: freedict.py プロジェクト: qnga/pyglossary
    def getEntryByElem(self, entry: "lxml.etree.Element") -> "BaseEntry":
        from lxml import etree as ET
        glos = self._glos
        keywords = []
        f = BytesIO()

        if self._discover:
            for elem in entry.iter():
                if elem.tag not in self.supportedTags:
                    self._discoveredTags[elem.tag] = elem

        def br():
            return ET.Element("br")

        for form in entry.findall("form/orth", self.ns):
            if form.getparent().get("type"):
                # only use normal form, not inflected one, here
                continue
            keywords.append(form.text)

        # Add keywords for inflected forms
        for orth in entry.findall('.//form[@type="infl"]/orth', self.ns):
            if not orth.text:
                continue
            keywords.append(orth.text)

        gramList = []  # type: List[str]
        for gramGrp in entry.findall("gramGrp", self.ns):
            parts = []
            for child in gramGrp.iterchildren():
                text = self.normalizeGramGrpChild(child)
                if text:
                    parts.append(text)
            if parts:
                gramList.append(", ".join(parts))

        pronList = entry.findall("form/pron", self.ns)
        senseList = entry.findall("sense", self.ns)

        with ET.htmlfile(f) as hf:
            with hf.element("div"):
                if self._keywords_header:
                    for keyword in keywords:
                        with glos.titleElement(hf, keyword):
                            hf.write(keyword)
                        hf.write(br())

                # TODO: "form/usg"
                # <usg type="geo">Brit</usg>
                # <usg type="geo">US</usg>
                # <usg type="hint">...</usg>

                for text in gramList:
                    with hf.element("i"):
                        hf.write(text)
                    hf.write(br())

                if pronList:
                    for i, pron in enumerate(pronList):
                        if i > 0:
                            hf.write(", ")
                        with hf.element("font", color="green"):
                            hf.write(f"/{pron.text}/")
                    hf.write(br())
                    hf.write("\n")

                self.makeList(
                    hf,
                    senseList,
                    self.writeSense,
                )

        defi = unescape_unicode(f.getvalue().decode("utf-8"))
        return self._glos.newEntry(
            keywords,
            defi,
            defiFormat="h",
            byteProgress=(self._file.tell(), self._fileSize),
        )
コード例 #15
0
ファイル: _base.py プロジェクト: arskom/spyne
 def docfile(self, *args, **kwargs):
     return etree.htmlfile(*args, **kwargs)
コード例 #16
0
	def getEntryByElem(self, entry: "lxml.etree.Element") -> "BaseEntry":
		from lxml import etree as ET
		glos = self._glos
		keywords = []
		f = BytesIO()

		def br():
			return ET.Element("br")

		with ET.htmlfile(f) as hf:
			kebList = []  # type: List[str]
			rebList = []  # type: List[str]
			with hf.element("div"):
				for k_ele in entry.findall("k_ele"):
					keb = k_ele.find("keb")
					if keb is None:
						continue
					kebList.append(keb.text)
					keywords.append(keb.text)
					# for elem in k_ele.findall("ke_pri"):
					# 	log.info(elem.text)

				for r_ele in entry.findall("r_ele"):
					reb = r_ele.find("reb")
					if reb is None:
						continue
					props = []
					if r_ele.find("re_nokanji") is not None:
						props.append("no kanji")
					inf = r_ele.find("re_inf")
					if inf is not None:
						props.append(
							self.re_inf_mapping.get(inf.text, inf.text)
						)
					rebList.append((reb.text, props))
					keywords.append(reb.text)
					# for elem in r_ele.findall("re_pri"):
					# 	log.info(elem.text)

				# this is for making internal links valid
				# this makes too many alternates!
				# but we don't seem to have a choice
				# execpt for scanning and indexing all words once
				# and then starting over and fixing/optimizing links
				for keb in kebList:
					for reb, _ in rebList:
						keywords.append(f"{keb}・{reb}")

				if kebList:
					with glos.titleElement(hf, kebList[0]):
						for i, keb in enumerate(kebList):
							if i > 0:
								with hf.element("font", color="red"):
									hf.write(" | ")
							hf.write(keb)
					hf.write(br())

				if rebList:
					for i, (reb, props) in enumerate(rebList):
						if i > 0:
							with hf.element("font", color="red"):
								hf.write(" | ")
						with hf.element("font", color="green"):
							hf.write(reb)
						for prop in props:
							hf.write(" ")
							with hf.element("small"):
								with hf.element("span", style=self.tagStyle):
									hf.write(prop)
					hf.write(br())

				self.makeList(
					hf,
					entry.findall("sense"),
					self.writeSense,
				)

		defi = f.getvalue().decode("utf-8")
		defi = unescape_unicode(defi)
		byteProgress = (self._file.tell(), self._fileSize)
		return self._glos.newEntry(keywords, defi, defiFormat="h", byteProgress=byteProgress)