Python unicode2htmlの例

プログラミング言語: Python

名前空間/パッケージ名: wikipedia

メソッド/関数: unicode2html

hotexamples.comのコード掲載数: 5

Python unicode2html - 5件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのwikipedia.unicode2htmlの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

    def transform(self, ispdf=False):
        """Normalize the title"""
        #convert html entities
        if not ispdf:
            self.title = pywikibot.html2unicode(self.title)
        self.title = re.sub(r'-+', '-', self.title)
        #remove formatting, i.e long useless strings
        self.title = re.sub(r'[\.+\-=]{4,}', ' ', self.title)
        #remove \n and \r and Unicode spaces from titles
        self.title = re.sub(r'(?u)\s', ' ', self.title)
        self.title = re.sub(r'[\n\r\t]', ' ', self.title)
        #remove extra whitespaces
        #remove leading and trailing ./;/,/-/_/+/ /
        self.title = re.sub(r' +', ' ', self.title.strip(r'=.;,-+_ '))

        self.avoid_uppercase()
        #avoid closing the link before the end
        self.title = self.title.replace(']', '&#93;')
        #avoid multiple } being interpreted as a template inclusion
        self.title = self.title.replace('}}', '}&#125;')
        #prevent multiple quotes being interpreted as '' or '''
        self.title = self.title.replace('\'\'', '\'&#39;')
        self.title = pywikibot.unicode2html(self.title, self.site.encoding())

コード例 #2

ファイルを表示

ファイル: reflinks.py プロジェクト: edgarskos/pywikipedia-git

    def transform(self, ispdf = False):
        """Normalize the title"""
        #convert html entities
        if not ispdf:
            self.title = pywikibot.html2unicode(self.title)
        self.title = re.sub(r'-+', '-', self.title)
        #remove formatting, i.e long useless strings
        self.title = re.sub(r'[\.+\-=]{4,}', ' ', self.title)
        #remove \n and \r and Unicode spaces from titles
        self.title = re.sub(r'(?u)\s', ' ', self.title)
        self.title = re.sub(r'[\n\r\t]', ' ', self.title)
        #remove extra whitespaces
        #remove leading and trailing ./;/,/-/_/+/ /
        self.title = re.sub(r' +', ' ', self.title.strip(r'=.;,-+_ '))

        self.avoid_uppercase()
        #avoid closing the link before the end
        self.title = self.title.replace(']', '&#93;')
        #avoid multiple } being interpreted as a template inclusion
        self.title = self.title.replace('}}', '}&#125;')
        #prevent multiple quotes being interpreted as '' or '''
        self.title = self.title.replace('\'\'', '\'&#39;')
        self.title = pywikibot.unicode2html(self.title, self.site.encoding())

コード例 #3

ファイルを表示

ファイル: reflinks.py プロジェクト: hroest/pywikibot-compat

    def transform(self, ispdf=False):
        """Normalize the title"""
        # convert html entities
        if not ispdf:
            self.title = pywikibot.html2unicode(self.title)
        self.title = re.sub(r"-+", "-", self.title)
        # remove formatting, i.e long useless strings
        self.title = re.sub(r"[\.+\-=]{4,}", " ", self.title)
        # remove \n and \r and Unicode spaces from titles
        self.title = re.sub(r"(?u)\s", " ", self.title)
        self.title = re.sub(r"[\n\r\t]", " ", self.title)
        # remove extra whitespaces
        # remove leading and trailing ./;/,/-/_/+/ /
        self.title = re.sub(r" +", " ", self.title.strip(r"=.;,-+_ "))

        self.avoid_uppercase()
        # avoid closing the link before the end
        self.title = self.title.replace("]", "&#93;")
        # avoid multiple } being interpreted as a template inclusion
        self.title = self.title.replace("}}", "}&#125;")
        # prevent multiple quotes being interpreted as '' or '''
        self.title = self.title.replace("''", "'&#39;")
        self.title = pywikibot.unicode2html(self.title, self.site.encoding())

コード例 #4

ファイルを表示

ファイル: check_extern.py プロジェクト: pyropeter/PyroBot-1G

 else:
     donow = todo
     # If there was more to do, the 'if len(todo)<61' part would have extended
     # todo beyond this size.
     cont = False
 try:
     wikipedia.getall(mysite, donow)
 except wikipedia.SaxError:
     # Ignore this error, and get the pages the traditional way.
     pass
 checked += len(donow)
 for pl in donow:
     R = re.compile(r"http://[^\s}<\]]+[^\s.,:;)\?!\]}<]")
     try:
         for url in R.findall(pl.get()):
             url = wikipedia.unicode2html(url, "ascii")
             try:
                 error = URLerrorFinder().open(url)
             except IOError:
                 error = -1
             if error in allowederrorcodes:
                 working += 1
             else:
                 nonworking += 1
                 print
                 wikipedia.output(u'Page "%s" links to:' % pl.title())
                 wikipedia.output(url)
                 wikipedia.output(u"Which gave error: %s %s" % (error, errorname(error)))
     # If anything is wrong with the Wikipedia page, just ignore
     except (wikipedia.NoPage, wikipedia.IsRedirectPage, wikipedia.LockedPage):
         pass

コード例 #5

ファイルを表示

 else:
     donow = todo
     # If there was more to do, the 'if len(todo)<61' part would have extended
     # todo beyond this size.
     cont = False
 try:
     wikipedia.getall(mysite, donow)
 except wikipedia.SaxError:
     # Ignore this error, and get the pages the traditional way.
     pass
 checked += len(donow)
 for pl in donow:
     R = re.compile(r'http://[^\s}<\]]+[^\s.,:;)\?!\]}<]')
     try:
         for url in R.findall(pl.get()):
             url = wikipedia.unicode2html(url, 'ascii')
             try:
                 error = URLerrorFinder().open(url)
             except IOError:
                 error = -1
             if error in allowederrorcodes:
                 working += 1
             else:
                 nonworking += 1
                 print
                 wikipedia.output(u'Page "%s" links to:' % pl.title())
                 wikipedia.output(url)
                 wikipedia.output(u'Which gave error: %s %s' %
                                  (error, errorname(error)))
     # If anything is wrong with the Wikipedia page, just ignore
     except (wikipedia.NoPage, wikipedia.IsRedirectPage,