Example #1
0
 def newEntry(self, word, defi) -> "BaseEntry":
     byteProgress = None
     if self._fileSize:
         byteProgress = (self._file.tell(), self._fileSize)
     return Entry(
         word,
         defi,
         byteProgress=byteProgress,
     )
Example #2
0
 def __next__(self):
     if not self._slobObj:
         log.error("iterating over a reader which is not open")
         raise StopIteration
     self._refIndex += 1
     if self._refIndex >= len(self._slobObj):
         raise StopIteration
     blob = self._slobObj[self._refIndex]
     # blob.key is str, blob.content is bytes
     word = blob.key
     defi = toStr(blob.content)
     return Entry(word, defi)
Example #3
0
 def __next__(self):
     self._pos += 1
     try:
         return self._pendingEntries.pop(0)
     except IndexError:
         pass
     ###
     try:
         wordDefi = self.nextPair()
     except StopIteration as e:
         self._len = self._pos
         raise e
     if not wordDefi:
         return
     word, defi = wordDefi
     ###
     return Entry(word, defi)
Example #4
0
 def loadInfo(self):
     self._pendingEntries = []
     self._leadingLinesCount = 0
     try:
         while True:
             wordDefi = self.nextPair()
             if not wordDefi:
                 continue
             word, defi = wordDefi
             if not self.isInfoWord(word):
                 self._pendingEntries.append(Entry(word, defi))
                 break
             self._leadingLinesCount += 1
             word = self.fixInfoWord(word)
             if not word:
                 continue
             self._glos.setInfo(word, defi)
     except StopIteration:
         pass
Example #5
0
	def loadInfo(self) -> None:
		self._pendingEntries = []
		self._leadingLinesCount = 0
		try:
			while True:
				wordDefi = self.nextPair()
				if not wordDefi:
					continue
				word, defi = wordDefi
				if not self.isInfoWords(word):
					self._pendingEntries.append(Entry(word, defi))
					break
				self._leadingLinesCount += 1
				if isinstance(word, list):
					word = [self.fixInfoWord(w) for w in word]
				else:
					word = self.fixInfoWord(word)
				if not word:
					continue
				if not defi:
					continue
				self._glos.setInfo(word, defi)
		except StopIteration:
			pass
Example #6
0
    def write_groups(self):
        import gzip
        from collections import OrderedDict
        from pyglossary.entry import Entry

        glos = self._glos
        words = []
        dataEntryCount = 0

        htmlHeader = "<?xml version=\"1.0\" encoding=\"utf-8\"?><html>\n"

        groupCounter = 0
        htmlContents = htmlHeader

        def writeGroup(lastPrefix):
            nonlocal htmlContents
            group_fname = fixFilename(lastPrefix)
            htmlContents += "</html>"
            log.debug(f"writeGroup: {lastPrefix!r}, "
                      "{group_fname!r}, count={groupCounter}")
            with gzip.open(group_fname + ".html", mode="wb") as gzipFile:
                gzipFile.write(htmlContents.encode("utf-8"))
            htmlContents = htmlHeader

        data = []

        while True:
            entry = yield
            if entry is None:
                break
            if entry.isData():
                dataEntryCount += 1
                continue
            l_word = entry.l_word
            if len(l_word) == 1:
                data.append(entry.getRaw(glos))
                continue
            wordsByPrefix = OrderedDict()
            for word in l_word:
                prefix = self.get_prefix(word)
                if prefix in wordsByPrefix:
                    wordsByPrefix[prefix].append(word)
                else:
                    wordsByPrefix[prefix] = [word]
            if len(wordsByPrefix) == 1:
                data.append(entry.getRaw(glos))
                continue
            defi = entry.defi
            for prefix, p_words in wordsByPrefix.items():
                data.append(Entry(p_words, defi).getRaw(glos))
            del entry

        log.info(f"\nKobo: sorting entries...")
        data.sort(key=Entry.getRawEntrySortKey(glos, self.get_prefix_b))

        lastPrefix = ""
        for rawEntry in data:
            entry = Entry.fromRaw(glos, rawEntry)

            headword, *variants = entry.l_word
            prefix = self.get_prefix(headword)
            if lastPrefix and prefix != lastPrefix:
                writeGroup(lastPrefix)
                groupCounter = 0
            lastPrefix = prefix

            defi = entry.defi
            defi = self.fix_defi(defi)
            for w in entry.l_word:
                words.append(w)
            variants = [v.strip().lower() for v in variants]
            variants_html = ('<var>' + ''.join(f'<variant name="{v}"/>'
                                               for v in variants) + '</var>')
            htmlContents += f"<w><a name=\"{headword}\" /><div><b>{headword}</b>"\
             f"{variants_html}<br/>{defi}</div></w>\n"
            groupCounter += 1

        if groupCounter > 0:
            writeGroup(lastPrefix)

        if dataEntryCount > 0:
            log.warn(
                f"ignored {dataEntryCount} files (data entries)"
                " and replaced '<img ...' tags in definitions with placeholders"
            )

        self._words = words