Python HTMLParserの例、html.parser.HTMLParser Pythonの例

コード例 #1

0

ファイルを表示

def update_text(clickData):
    if clickData['points'][0]['curveNumber'] == 0:
        return html.Div([
            html.A("Direct link to HackerNews user comment",
                   href="https://news.ycombinator.com/item?id=" +
                   str(hackernews_xamarin_Id_Data[clickData['points'][0]
                                                  ['pointIndex']]),
                   target="_blank"),
            html.H3(HP.HTMLParser().unescape(hackernews_xamarin_Body_Data[
                clickData['points'][0]['pointIndex']]))
        ])
    if clickData['points'][0]['curveNumber'] == 1:
        return html.Div([
            html.A("Direct link to HackerNews user comment",
                   href="https://news.ycombinator.com/item?id=" +
                   str(hackernews_react_native_Id_Data[clickData['points'][0]
                                                       ['pointIndex']]),
                   target="_blank"),
            html.H3(HP.HTMLParser().unescape(hackernews_react_native_Body_Data[
                clickData['points'][0]['pointIndex']]))
        ])
    if clickData['points'][0]['curveNumber'] == 2:
        return html.Div([
            html.A("Direct link to HackerNews user comment",
                   href="https://news.ycombinator.com/item?id=" +
                   str(hackernews_flutter_Id_Data[clickData['points'][0]
                                                  ['pointIndex']]),
                   target="_blank"),
            html.H3(HP.HTMLParser().unescape(hackernews_flutter_Body_Data[
                clickData['points'][0]['pointIndex']]))
        ])

コード例 #2

0

ファイルを表示

def update_text(hoverData):
    if hoverData['points'][0]['curveNumber'] == 0:
        return HP.HTMLParser().unescape(
            reddit_xamarin_Body_Data[hoverData['points'][0]['pointIndex']])
    if hoverData['points'][0]['curveNumber'] == 1:
        return HP.HTMLParser().unescape(reddit_react_native_Body_Data[
            hoverData['points'][0]['pointIndex']])
    if hoverData['points'][0]['curveNumber'] == 2:
        return HP.HTMLParser().unescape(
            reddit_flutter_Body_Data[hoverData['points'][0]['pointIndex']])

コード例 #3

0

ファイルを表示

def replaceHTMLCodes(txt):
    txt = re.sub("(&#[0-9]+)([^;^0-9]+)", "\\1;\\2", txt)
    try:
        import html.parser as html_parser
    except:
        import HTMLParser as html_parser
    txt = html_parser.HTMLParser().unescape(txt)
    txt = html_parser.HTMLParser().unescape(txt)
    txt = txt.replace("&quot;", "\"")
    txt = txt.replace("&amp;", "&")
    txt = txt.strip()
    return txt

コード例 #4

0

ファイルを表示

    def convert_content(content):
        html_parser = parser.HTMLParser()
        content = html_parser.unescape(content)

        if not disable_convert_code_tag:
            content = convert_code_tag(content)
        return content

コード例 #5

0

ファイルを表示

def trombi():
    h = parser.HTMLParser()
    filters = ""
    method = request.method
    error, session, params = log_and_check_params(
        ["token", "location", "year"], request)
    if error != {}:
        return json.dumps(error), error['error']['code']
    try:
        for param in params:
            if param != "login" and param != "password":
                filters = filters + "&%s=%s" % (param, params[param])
        r = session.post(server_url + "/user/filter/user?format=json" +
                         filters,
                         verify=ssl_verify,
                         cookies={'language': 'fr'})
        if r.status_code == 403:
            return json.dumps({
                "error": {
                    "message": "Connection token is invalid or has expired",
                    'code': 403
                }
            }), 403
        return clean_json(r.text)
    except Exception as e:
        return json.dumps({
            "error": {
                "message":
                "Server was unable to connect to Epitech's intra API",
                "code": 500
            }
        }), 500

コード例 #6

0

ファイルを表示

def find_tv_show_season(content, tvshow, season):
    url_found = None
    found_urls = []
    possible_matches = []
    all_tvshows = []

    h = HTMLParser.HTMLParser()
    for matches in re.finditer(movie_season_pattern, content, re.IGNORECASE | re.DOTALL):
        found_title = matches.group('title')
        found_title = h.unescape(found_title)

        if matches.group('link') in found_urls:
            continue
        log(__name__, "Found tv show season on search page: %s" % found_title)
        found_urls.append(matches.group('link'))
        s = difflib.SequenceMatcher(None, (found_title + ' ' + matches.group('year')).lower(), tvshow.lower())
        all_tvshows.append(matches.groups() + (s.ratio() * int(matches.group('numsubtitles')),))
        if found_title.lower().find(tvshow.lower() + " ") > -1:
            if found_title.lower().find(season.lower()) > -1:
                log(__name__, "Matching tv show season found on search page: %s" % found_title)
                possible_matches.append(matches.groups())

    if len(possible_matches) > 0:
        possible_matches = sorted(possible_matches, key=lambda x: -int(x[3]))
        url_found = possible_matches[0][0]
        log(__name__, "Selecting matching tv show with most subtitles: %s (%s)" % (
            possible_matches[0][1], possible_matches[0][3]))
    else:
        if len(all_tvshows) > 0:
            all_tvshows = sorted(all_tvshows, key=lambda x: -int(x[4]))
            url_found = all_tvshows[0][0]
            log(__name__, "Selecting tv show with highest fuzzy string score: %s (score: %s subtitles: %s)" % (
                all_tvshows[0][1], all_tvshows[0][4], all_tvshows[0][3]))

    return url_found

コード例 #7

0

ファイルを表示

ファイル: unogs_query.py プロジェクト: edwardjackchandler/ngk

    def es_ingest_objects(self):
        ingest_list = []
        parser = htmlparser.HTMLParser()

        es_dict_template = {"_index": "netflix_crossing", "_type": "nfobject"}

        title_list = []

        for nfobject in self.nfobjects:
            ingest_dict = es_dict_template.copy()

            nfobject["title"] = parser.unescape(nfobject["title"])
            nfobject["synopsis"] = parser.unescape(nfobject["synopsis"])

            title_list.append(nfobject["title"])

            ingest_dict.update(nfobject)

            ingest_list.append(ingest_dict)

        helpers.bulk(self.es, ingest_list)

        print("Objects ingested")
        print("Title list")
        print(title_list)

コード例 #8

0

ファイルを表示

ファイル: toonova.py プロジェクト: andrebaptista2/TeamKodiAddonsPT

def replaceEscapeCodes(txt):
    try:
        import html.parser as html_parser
    except:
        import HTMLParser as html_parser
    txt = html_parser.HTMLParser().unescape(txt)
    return txt

コード例 #9

0

ファイルを表示

    def find_by_title(self, title):
        default_find_by_title_params = {'json': '1',
                                        'nr': 1,
                                        'tt': 'on',
                                        'q': title}
        query_params = urlencode(default_find_by_title_params)
        results = self.get(('http://www.imdb.com/'
                            'xml/find?{0}').format(query_params))

        keys = ['title_popular',
                'title_exact',
                'title_approx',
                'title_substring']
        title_results = []

        html_unescape = htmlparser.HTMLParser().unescape

        # Loop through all results and build a list with popular matches first
        for key in keys:
            if key in results:
                for r in results[key]:
                    year = None
                    year_match = re.search(r'(\d{4})', r['title_description'])
                    if year_match:
                        year = year_match.group(0)

                    title_match = {
                        'title': html_unescape(r['title']),
                        'year': year,
                        'imdb_id': r['id']
                    }
                    title_results.append(title_match)

        return title_results

コード例 #10

0

ファイルを表示

 def restoreIni(self):
     #prjFileInfo = QtCore.QFileInfo(core.QgsProject.instance().fileName())
     #iniFileInfo = QtCore.QFileInfo(os.path.join(prjFileInfo.path(),prjFileInfo.baseName()+".gsv"))
     stored_settings = core.QgsExpressionContextUtils.projectScope(core.QgsProject.instance()).variable('go2sv_infolayer_settings')
     if stored_settings:
         self.infoBoxIni = json.loads(stored_settings)
         self.loadPointLayers(default = self.infoBoxIni["infoLayer"])
         self.infoField.setText(self.infoBoxIni["infoField"])
     else:
         self.infoBoxIni = {'infoLayerEnabled': None,'infoBoxTemplate': u'','infoField': '','infoBoxEnabled': None,'iconPath': '','infoLayer': '','distanceBuffer':'100',"mapCommandsEnabled":None}
         self.loadPointLayers()
     if self.infoBoxIni["infoLayerEnabled"]:
         self.enableInfoLayerCheckbox.setCheckState(QtCore.Qt.Checked)
     else:
         self.enableInfoLayerCheckbox.setCheckState(QtCore.Qt.Unchecked)
     self.iconPath.setText(self.infoBoxIni["iconPath"])
     self.distanceBuffer.setText(self.infoBoxIni["distanceBuffer"])
     if self.infoBoxIni["infoBoxEnabled"]:
         self.enableInfoBoxCheckbox.setCheckState(QtCore.Qt.Checked)
     else:
         self.enableInfoBoxCheckbox.setCheckState(QtCore.Qt.Unchecked)
     if self.infoBoxIni["mapCommandsEnabled"]:
         self.mapCommandsCheck.setCheckState(QtCore.Qt.Checked)
     else:
         self.enableInfoBoxCheckbox.setCheckState(QtCore.Qt.Unchecked)
     html_parser = HTMLParser.HTMLParser()
     self.infoboxHtml.setPlainText(html_parser.unescape(self.infoBoxIni["infoBoxTemplate"]))
     self.enableInfoLayerAction(True)
     if self.infoIndex and self.enableInfoLayerCheckbox.isChecked():
         self.updateSpatialIndex()
     self.defined.emit()

コード例 #11

0

ファイルを表示

ファイル: nxdl_desc2rst.py プロジェクト: ess-dmsc/nexus-constructor

def getDocFromNode(ns, node, retval=None):
    annotation_node = node.find('xs:annotation', ns)
    if annotation_node is None:
        return retval
    documentation_node = annotation_node.find('xs:documentation', ns)
    if documentation_node is None:
        return retval

    # Be sure to grab _all_ content in the <xs:documentation> node.
    # In the documentation nodes, use XML entities ("&lt;"" instead of "<")
    # for documentation characters that would otherwise be considered as XML.
    s = lxml.etree.tostring(documentation_node,
                            method="text",
                            pretty_print=True)
    rst = s.decode().lstrip('\n')  # remove any leading blank lines
    rst = rst.rstrip()  # remove any trailing white space
    text = textwrap.dedent(rst)  # remove common leading space

    # substitute HTML entities in markup: "<" for "&lt;"
    # thanks: http://stackoverflow.com/questions/2087370/decode-html-entities-in-python-string
    try:  # see #661
        import html
        text = html.unescape(text)
    except (ImportError, AttributeError):
        from html import parser as HTMLParser
        htmlparser = HTMLParser.HTMLParser()
        text = htmlparser.unescape(text)

    return text.lstrip()

コード例 #12

0

ファイルを表示

ファイル: flagger.py プロジェクト: rossrader/destalinator

    def __init__(self, *args, **kwargs):
        self.htmlparser = HTMLParser.HTMLParser()
        super(Flagger, self).__init__(*args, **kwargs)
        self.logger = logging.getLogger(__name__)
        self.logger.setLevel((self.debug or self.verbose) and logging.DEBUG or logging.ERROR)

        self.now = int(time.time())

コード例 #13

0

ファイルを表示

ファイル: raiplay.py プロジェクト: rafhtl/plugin.video.raitv

    def fillRaiSportKeys(self):
        # search for items in main menu
        RaiSportKeys = []

        try:
            data = utils.checkStr(urllib2.urlopen(self.RaiSportMainUrl).read())
        except urllib2.HTTPError:
            data = ''

        m = re.search("<a href=\"javascript:void\(0\)\">Menu</a>(.*?)</div>",
                      data, re.S)
        if not m:
            return []
        menu = m.group(0)

        links = re.findall("<a href=\"(?P<url>[^\"]+)\">(?P<title>[^<]+)</a>",
                           menu)
        good_links = []
        for l in links:
            if ('/archivio.html?' in l[0]) and not ('&amp;' in l[0]):
                good_links.append({'title': l[1], 'url': l[0]})

        good_links.append({
            'title':
            self.RaiPlayAddonHandle.getLocalizedString(32015),
            'url':
            '/archivio.html?tematica=altri-sport'
        })

        # open any single page in list and grab search keys

        for l in good_links:
            try:
                data = utils.checkStr(
                    urllib2.urlopen(self.RaiSportMainUrl + l['url']).read())
            except urllib2.HTTPError:
                data = ''

            dataDominio = re.findall("data-dominio=\"(.*?)\"", data)
            dataTematica = re.findall("data-tematica=\"(.*?)\"", data)
            xbmc.log(str(dataTematica))
            if dataTematica:
                if len(dataTematica) > 1:
                    del (dataTematica[0])

                try:
                    title = dataTematica[0].split('|')[0]
                    title = utils.checkStr(
                        HTMLParser.HTMLParser().unescape(title))
                    params = {
                        'title': title,
                        'dominio': dataDominio[0],
                        'sub_keys': dataTematica
                    }

                    RaiSportKeys.append(params)
                except:
                    xbmc.log("error in key %s" % str(dataTematica))

        return RaiSportKeys

コード例 #14

0

ファイルを表示

	def xml_constructor(self, soup, link, tpburl, info):
		page = HTMLParser(soup)
		if info[0] == "search":
			try:
				title = page.title
			except:
				title = info[1]
		elif info[0] in ["browse", "user"]:
			try:
				title = parser.HTMLParser().unescape(search('<title>(.*) - TPB</title>', soup).group(1))
			except:
				title = info[1]
		elif info[0] == "recent":
			title = "Recent Torrents"
		xml = "<rss version=\"2.0\">\n\t<channel>\n\t\t"
		xml += "<title>TPB2RSS: %s</title>\n\t\t" % title
		xml += "<link>%s%s</link>\n\t\t" % (tpburl, parse.quote(link))
		xml += "<description>The Pirate Bay %s feed for \"%s\"</description>\n\t\t" % (info[0], title)
		xml += "<lastBuildDate>%s GMT</lastBuildDate>\n\t\t" % datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S")
		xml += "<language>en-us</language>\n\t\t"
		xml += "<generator>TPB2RSS %s</generator>\n\t\t" % __version__
		xml += "<docs>%s</docs>\n\t\t" % __docs__
		xml += "<webMaster>%s (%s)</webMaster>" % (__email__, __author__)
		position = 0
		for i in range(int(len(page.data) / 4)):
			item = str(page.data[position + 1]).split("\"")
			seeders = str(str(page.data[position + 2]).split(">")[1]).split("<")[0]
			leechers = str(str(page.data[position + 3]).split(">")[1]).split("<")[0]
			category = sub(r"(\n|\t)", "", (compile(r'<.*?>').sub('', page.data[0]).replace("(", " (")))
			xml += self.item_constructor(item, seeders, leechers, category, tpburl)
			position += 4
		xml += "\n\t</channel>\n</rss>"
		return xml

コード例 #15

0

ファイルを表示

ファイル: PreProcess.py プロジェクト: whmnoe4j/EC-Master

def trim_cmt(cmt_list):
    import html.parser as HTMLParser
    html_parser = HTMLParser.HTMLParser()
    pattern = re.compile("<.*>.*?<.*>", re.S)
    return map(
        lambda x: re.sub(pattern, "", html_parser.unescape(x[0])).strip(),
        cmt_list)

コード例 #16

0

ファイルを表示

def translate_text(text, lang):
    """ 
  Takes in input text and utilizes Google's Translate API to convert 
  it to the selected language. 
  """
    translate_client = translate.Client()

    # Takes the input language and returns the string abbreviation for the translate file
    # This resolves the Google API invalid input error when passing the lang parameter.
    langDict = {
        'en': 'English',
        'es': 'Spanish',
        'it': 'Italian',
        'fr': 'French',
        'tr': 'Turkish',
        'ko': 'Korean'
    }
    language = ''
    for abbrev, fullLanguage in langDict.items():
        if fullLanguage == langDict[lang]:
            language = abbrev

    if isinstance(text, six.binary_type):
        text = text.decode('utf-8')

    # Initialize parser to convert special characters back to their original form after
    # Google translation API has finished translating the text.
    parser = htmlparser.HTMLParser()
    result = translate_client.translate(text, target_language=language)

    return parser.unescape(result['translatedText'])

コード例 #17

0

ファイルを表示

def find_movie(content, title, year):
    found_urls = {}
    found_movies = []
    h = HTMLParser.HTMLParser()
    for secmatches in re.finditer(search_section_pattern, content, re.IGNORECASE | re.DOTALL):
        log(__name__, secmatches.group('section'))
        for matches in re.finditer(movie_season_pattern, secmatches.group('content'), re.IGNORECASE | re.DOTALL):
            if matches.group('link') in found_urls:
                if secmatches.group('section') == 'close':
                    found_movies[found_urls[matches.group('link')]]['is_close'] = True
                if secmatches.group('section') == 'exact':
                    found_movies[found_urls[matches.group('link')]]['is_exact'] = True
                continue
            found_urls[matches.group('link')] = len(found_movies)

            found_title = matches.group('title')
            found_title = h.unescape(found_title)
            log(__name__, "Found movie on search page: %s (%s)" % (found_title, matches.group('year')))
            found_movies.append(
                {'t': found_title.lower(),
                 'y': int(matches.group('year')),
                 'is_exact': secmatches.group('section') == 'exact',
                 'is_close': secmatches.group('section') == 'close',
                 'l': matches.group('link'),
                 'c': int(matches.group('numsubtitles'))})

    year = int(year)
    title = title.lower()
    # Priority 1: matching title and year
    for movie in found_movies:
        if movie['t'].find(title) > -1:
            if movie['y'] == year:
                log(__name__, "Matching movie found on search page: %s (%s)" % (movie['t'], movie['y']))
                return movie['l']

    # Priority 2: matching title and one off year
    for movie in found_movies:
        if movie['t'].find(title) > -1:
            if movie['y'] == year + 1 or movie['y'] == year - 1:
                log(__name__, "Matching movie found on search page (one off year): %s (%s)" % (movie['t'], movie['y']))
                return movie['l']

    # Priority 3: "Exact" match according to search result page
    close_movies = []
    for movie in found_movies:
        if movie['is_exact']:
            log(__name__, "Using 'Exact' match: %s (%s)" % (movie['t'], movie['y']))
            return movie['l']
        if movie['is_close']:
            close_movies.append(movie)

    # Priority 4: "Close" match according to search result page
    if len(close_movies) > 0:
        close_movies = sorted(close_movies, key=itemgetter('c'), reverse=True)
        log(__name__, "Using 'Close' match: %s (%s)" % (close_movies[0]['t'], close_movies[0]['y']))
        return close_movies[0]['l']

    return None

コード例 #18

0

ファイルを表示

ファイル: parsedom.py プロジェクト: movieshark/DiniRoutines

def replaceHTMLCodes(txt):
    txt = re.sub("(&#[0-9]+)([^;^0-9]+)", "\\1;\\2", txt)
    if version_info[0] == 3:
        txt = HTMLParser.unescape(txt)
    else:
        txt = HTMLParser.HTMLParser().unescape(txt)
    txt = txt.replace("&quot;", '"')
    txt = txt.replace("&amp;", "&")
    return txt

コード例 #19

0

ファイルを表示

ファイル: custom.py プロジェクト: imbi7py/botbot

def title_echo(r, line, bot, chan):
    """Echo the title of a url via MC"""
    def write_url(title, url):
        """Write a URL to the database"""
        conn = sqlite3.connect(DB_FILE)
        c = conn.cursor()
        t = (title, url)
        c.execute('INSERT INTO url_history VALUES (?, ?)', t)
        conn.commit()
        conn.close()

    # get url and build opener with custom user-agent and cookies enabled
    url = r.group(5).split()[0]
    cookie_jar = http.cookiejar.CookieJar()
    opener = urllib.request.build_opener(
        urllib.request.HTTPCookieProcessor(cookie_jar))
    opener.addheaders = [('User-agent', USER_AGENT)]

    # open the url but only read a maximum of 2**20 bytes in case someone is
    # screwing with us
    try:
        f = opener.open(str(url))
        html = f.read(1048576)  # only read the first 2**20 bytes
    except:
        e = traceback.format_exc()
        debug(e, log_only=True)
        write_url(None, url)
        return

    # uncompress if the data is gzipped
    try:
        encoding = f.info()['content-encoding']
    except KeyError:
        encoding = None
    if encoding and encoding == 'gzip':
        html = io.BytesIO(html)
        gz = gzip.GzipFile(fileobj=html, mode='rb')
        html = gz.read()
        gz.close
    f.close()

    # decode the html and search for the title element
    html = html.decode('utf-8', errors='replace')
    title = re.search(r'<title.*?>(.*?)</title>', html,
                      re.DOTALL | re.IGNORECASE)
    if title:
        title = title.group(1).strip()
        title = title.replace('\n', '').replace('\r', '')
        title = ' '.join([w for w in title.split(' ') if w != ''])
        title = htmlp.HTMLParser().unescape(title)
    else:
        write_url(None, url)
        return

    # if we are here then there's a title so echo it to the channel
    bot.write('PRIVMSG {chan} :Title: {msg}\r\n'.format(chan=chan, msg=title))
    write_url(title, url)

コード例 #20

0

ファイルを表示

ファイル: preprocess_wiki_extractor.py プロジェクト: xuehuiping/language

def create_block_info(input_path, preprocessor):
    """Create block info."""
    results = []
    html_parser = parser.HTMLParser()
    with tf.io.gfile.GFile(input_path) as input_file:
        for line in input_file:
            results.extend(
                wiki_preprocessor.example_from_json_line(
                    line, html_parser, preprocessor))
    return results

コード例 #21

0

ファイルを表示

def fix_scripts(dom):
    # ldjson workaround
    p = HTMLParser.HTMLParser()
    for script in dom.getElementsByTagName("script"):
        r = RawText()
        r.ownerDocument = dom
        r.data = p.unescape(script.childNodes[0].wholeText)
        for cn in script.childNodes:
            script.removeChild(cn)
        script.appendChild(r)

コード例 #22

0

ファイルを表示

def speeches():
    f = open('speeches.txt', 'r')
    text_model = markovify.Text(f.read())
    tweet = text_model.make_short_sentence(100) + ' ' + random.choice(hashtags)
    print('Posting to Twitter...')
    try:
        api.update_status(HTMLParser.HTMLParser().unescape(tweet))
    except ImportError:
        api.update_status(htmlparser.HTMLParser().unescape(tweet))
    f.close()
    print('Tweet you posted: ' + tweet)

コード例 #23

0

ファイルを表示

ファイル: app.py プロジェクト: hone1er/Thrasher_alexa_skill

def get_info():
'''
This is a generator that takes titles and descriptions,
reformats the html and yields the title and description
of the latest videos for Alexa to respond with
'''
    for tit, des in zip(titles, descriptions):
        titled = tit.a.img["alt"].strip()
        title = parser.HTMLParser().unescape(titled)
        description = des.text.strip()
        yield f"{title}. {description}"

コード例 #24

0

ファイルを表示

ファイル: core.py プロジェクト: Nlegion/School_Work

 def decode_value(data):
     new_data = {}
     for k, v in data.items():
         val = bytes(v.replace('%', '=').replace("+", " "), 'UTF-8')
         val_decode_str = decodestring(val).decode('UTF-8')
         val_decode_str = decodestring(val_decode_str).decode('UTF-8')
         if "&#" in val_decode_str:
             parser = hlmtparser.HTMLParser()
             val_decode_str = parser.unescape(val_decode_str)
         new_data[k] = val_decode_str
     return new_data

コード例 #25

0

ファイルを表示

ファイル: entity2unicode.py プロジェクト: zaid100/tendenci

    def handle(self, *args, **options):
        from tendenci.apps.pages.models import Page

        pages = Page.objects.all()
        self.h = html_parser.HTMLParser()
        pattern = re.compile(r'(&#\d+;)', re.IGNORECASE)

        for page in pages:
            page.title = re.sub(pattern, self.unescape, page.title)
            page.content = re.sub(pattern, self.unescape, page.content)
            page.save()

コード例 #26

0

ファイルを表示

def update_text(clickData):
    if clickData['points'][0]['curveNumber'] == 0:
        return html.Div([
            html.A(
                "Direct link to Reddit user comment",
                href="https://new.reddit.com/comments/" +
                str(reddit_xamarin_link_Id_Data[
                    clickData['points'][0]['pointIndex']]).replace("t3_", "") +
                "/_/" + str(reddit_xamarin_Id_Data[clickData['points'][0]
                                                   ['pointIndex']]),
                target="_blank"),
            html.H3(HP.HTMLParser().unescape(reddit_xamarin_Body_Data[
                clickData['points'][0]['pointIndex']]))
        ])
    if clickData['points'][0]['curveNumber'] == 1:
        return html.Div([
            html.A(
                "Direct link to Reddit user comment",
                href="https://new.reddit.com/comments/" +
                str(reddit_react_native_link_Id_Data[
                    clickData['points'][0]['pointIndex']]).replace("t3_", "") +
                "/_/" + str(reddit_react_native_Id_Data[clickData['points'][0]
                                                        ['pointIndex']]),
                target="_blank"),
            html.H3(HP.HTMLParser().unescape(reddit_react_native_Body_Data[
                clickData['points'][0]['pointIndex']]))
        ])
    if clickData['points'][0]['curveNumber'] == 2:
        return html.Div([
            html.A(
                "Direct link to Reddit user comment",
                href="https://new.reddit.com/comments/" +
                str(reddit_flutter_link_Id_Data[
                    clickData['points'][0]['pointIndex']]).replace("t3_", "") +
                "/_/" + str(reddit_flutter_Id_Data[clickData['points'][0]
                                                   ['pointIndex']]),
                target="_blank"),
            html.H3(HP.HTMLParser().unescape(reddit_flutter_Body_Data[
                clickData['points'][0]['pointIndex']]))
        ])

コード例 #27

0

ファイルを表示

ファイル: text.py プロジェクト: uncoldice/svtplay-dl

def decode_html_entities(s):
    """
    Replaces html entities with the character they represent.

        >>> print(decode_html_entities("&lt;3 &amp;"))
        <3 &
    """
    parser = HTMLParser.HTMLParser()

    def unesc(m):
        return parser.unescape(m.group())

    return re.sub(r'(&[^;]+;)', unesc, ensure_unicode(s))

コード例 #28

0

ファイルを表示

ファイル: bzoj.py プロジェクト: LeverImmy/SmojSubmit

    def load_result(self, runtime):
        values = {
            'problem_id': str(runtime.pid),
            'user_id': self.username,
            'language': '-1',
            'jresult': '-1'
        }
        url = self.result_url.format(urllib.parse.urlencode(values))

        result_regex = re.compile(
            self.result_regex_r.format(pid=runtime.pid, name=self.username))

        while True:
            self.set_status('Waiting for judging...')
            time.sleep(1)

            html, resp = self.get(url, self.headers)
            match = result_regex.search(html)

            result = match.group(4)
            if result not in [
                    'Pending', 'Pending_Rejudging', 'Compiling',
                    'Running_&_Judging'
            ]:
                break

        match = re.search(
            self.detail_regex_r.format(pid=runtime.pid, name=self.username),
            html)

        message = 'Loading result...'
        self.set_status(message)

        result = result.replace('_', ' ')
        if result.endswith('Exceed'):
            result = result + 'ed'

        judge_id = match.group(2)
        memory = match.group(6) + ' KB'
        time_ = match.group(7) + ' ms'
        detail = [result, time_, memory]

        if result == 'Compile Error':
            html, resp = self.get(self.compile_message_url.format(judge_id),
                                  self.headers)
            runtime.judge_compile_message = \
                parser.HTMLParser().unescape(self.compile_message_regex.findall(html)[0]).replace('\r', '')

        runtime.judge_detail = [detail]
        runtime.judge_result = result
        runtime.judge_score = 100 if result == 'Accepted' else 0

コード例 #29

0

ファイルを表示

    def download_subtitles(self, suburls):
        """Download the SAMI subtitles, decode the HTML entities and save to temp directory.
        Return a list of the path to the downloaded subtitles."""
        paths = []
        for sub_data in suburls:
            sami = self.make_request(url=sub_data['href'], method='get').decode('utf-8', 'ignore').strip()
            htmlparser = HTMLParser.HTMLParser()
            subtitle = htmlparser.unescape(sami).encode('utf-8')
            path = os.path.join(self.tempdir, '{0}.sami'.format(sub_data['languageCode']))
            with open(path, 'wb') as subfile:
                subfile.write(subtitle)
            paths.append(path)

        return paths

コード例 #30

0

ファイルを表示

 def _render_cell(self, column, cell, model, iter_, destroy):
     article = model[iter_][0]
     title = escape(
         re.sub('\\s+', ' ',
                article.title.replace('\n', ' ').strip()))
     content = []
     html_parser = parser.HTMLParser()
     html_parser.handle_data = content.append
     html_parser.feed(article.description)
     content = escape(
         re.sub('\\s+', ' ', ''.join(content)[:1000].replace('\n',
                                                             ' ').strip()))
     cell.set_property(
         'markup', '<big>%s</big>\n<small>%s</small>' %
         (('%s' if article.read else '<b>%s</b>') % title, content))