Python HTMLParser 예제들, html.parser.HTMLParser Python 예제들

예제 #1

0

파일 보기

def update_text(clickData):
    if clickData['points'][0]['curveNumber'] == 0:
        return html.Div([
            html.A("Direct link to HackerNews user comment",
                   href="https://news.ycombinator.com/item?id=" +
                   str(hackernews_xamarin_Id_Data[clickData['points'][0]
                                                  ['pointIndex']]),
                   target="_blank"),
            html.H3(HP.HTMLParser().unescape(hackernews_xamarin_Body_Data[
                clickData['points'][0]['pointIndex']]))
        ])
    if clickData['points'][0]['curveNumber'] == 1:
        return html.Div([
            html.A("Direct link to HackerNews user comment",
                   href="https://news.ycombinator.com/item?id=" +
                   str(hackernews_react_native_Id_Data[clickData['points'][0]
                                                       ['pointIndex']]),
                   target="_blank"),
            html.H3(HP.HTMLParser().unescape(hackernews_react_native_Body_Data[
                clickData['points'][0]['pointIndex']]))
        ])
    if clickData['points'][0]['curveNumber'] == 2:
        return html.Div([
            html.A("Direct link to HackerNews user comment",
                   href="https://news.ycombinator.com/item?id=" +
                   str(hackernews_flutter_Id_Data[clickData['points'][0]
                                                  ['pointIndex']]),
                   target="_blank"),
            html.H3(HP.HTMLParser().unescape(hackernews_flutter_Body_Data[
                clickData['points'][0]['pointIndex']]))
        ])

예제 #2

0

파일 보기

def update_text(hoverData):
    if hoverData['points'][0]['curveNumber'] == 0:
        return HP.HTMLParser().unescape(
            reddit_xamarin_Body_Data[hoverData['points'][0]['pointIndex']])
    if hoverData['points'][0]['curveNumber'] == 1:
        return HP.HTMLParser().unescape(reddit_react_native_Body_Data[
            hoverData['points'][0]['pointIndex']])
    if hoverData['points'][0]['curveNumber'] == 2:
        return HP.HTMLParser().unescape(
            reddit_flutter_Body_Data[hoverData['points'][0]['pointIndex']])

예제 #3

0

파일 보기

def replaceHTMLCodes(txt):
    txt = re.sub("(&#[0-9]+)([^;^0-9]+)", "\\1;\\2", txt)
    try:
        import html.parser as html_parser
    except:
        import HTMLParser as html_parser
    txt = html_parser.HTMLParser().unescape(txt)
    txt = html_parser.HTMLParser().unescape(txt)
    txt = txt.replace("&quot;", "\"")
    txt = txt.replace("&amp;", "&")
    txt = txt.strip()
    return txt

예제 #4

0

파일 보기

    def convert_content(content):
        html_parser = parser.HTMLParser()
        content = html_parser.unescape(content)

        if not disable_convert_code_tag:
            content = convert_code_tag(content)
        return content

예제 #5

0

파일 보기

def trombi():
    h = parser.HTMLParser()
    filters = ""
    method = request.method
    error, session, params = log_and_check_params(
        ["token", "location", "year"], request)
    if error != {}:
        return json.dumps(error), error['error']['code']
    try:
        for param in params:
            if param != "login" and param != "password":
                filters = filters + "&%s=%s" % (param, params[param])
        r = session.post(server_url + "/user/filter/user?format=json" +
                         filters,
                         verify=ssl_verify,
                         cookies={'language': 'fr'})
        if r.status_code == 403:
            return json.dumps({
                "error": {
                    "message": "Connection token is invalid or has expired",
                    'code': 403
                }
            }), 403
        return clean_json(r.text)
    except Exception as e:
        return json.dumps({
            "error": {
                "message":
                "Server was unable to connect to Epitech's intra API",
                "code": 500
            }
        }), 500

예제 #6

0

파일 보기

def find_tv_show_season(content, tvshow, season):
    url_found = None
    found_urls = []
    possible_matches = []
    all_tvshows = []

    h = HTMLParser.HTMLParser()
    for matches in re.finditer(movie_season_pattern, content, re.IGNORECASE | re.DOTALL):
        found_title = matches.group('title')
        found_title = h.unescape(found_title)

        if matches.group('link') in found_urls:
            continue
        log(__name__, "Found tv show season on search page: %s" % found_title)
        found_urls.append(matches.group('link'))
        s = difflib.SequenceMatcher(None, (found_title + ' ' + matches.group('year')).lower(), tvshow.lower())
        all_tvshows.append(matches.groups() + (s.ratio() * int(matches.group('numsubtitles')),))
        if found_title.lower().find(tvshow.lower() + " ") > -1:
            if found_title.lower().find(season.lower()) > -1:
                log(__name__, "Matching tv show season found on search page: %s" % found_title)
                possible_matches.append(matches.groups())

    if len(possible_matches) > 0:
        possible_matches = sorted(possible_matches, key=lambda x: -int(x[3]))
        url_found = possible_matches[0][0]
        log(__name__, "Selecting matching tv show with most subtitles: %s (%s)" % (
            possible_matches[0][1], possible_matches[0][3]))
    else:
        if len(all_tvshows) > 0:
            all_tvshows = sorted(all_tvshows, key=lambda x: -int(x[4]))
            url_found = all_tvshows[0][0]
            log(__name__, "Selecting tv show with highest fuzzy string score: %s (score: %s subtitles: %s)" % (
                all_tvshows[0][1], all_tvshows[0][4], all_tvshows[0][3]))

    return url_found

예제 #7

0

파일 보기

파일: unogs_query.py 프로젝트: edwardjackchandler/ngk

    def es_ingest_objects(self):
        ingest_list = []
        parser = htmlparser.HTMLParser()

        es_dict_template = {"_index": "netflix_crossing", "_type": "nfobject"}

        title_list = []

        for nfobject in self.nfobjects:
            ingest_dict = es_dict_template.copy()

            nfobject["title"] = parser.unescape(nfobject["title"])
            nfobject["synopsis"] = parser.unescape(nfobject["synopsis"])

            title_list.append(nfobject["title"])

            ingest_dict.update(nfobject)

            ingest_list.append(ingest_dict)

        helpers.bulk(self.es, ingest_list)

        print("Objects ingested")
        print("Title list")
        print(title_list)

예제 #8

0

파일 보기

파일: toonova.py 프로젝트: andrebaptista2/TeamKodiAddonsPT

def replaceEscapeCodes(txt):
    try:
        import html.parser as html_parser
    except:
        import HTMLParser as html_parser
    txt = html_parser.HTMLParser().unescape(txt)
    return txt

예제 #9

0

파일 보기

    def find_by_title(self, title):
        default_find_by_title_params = {'json': '1',
                                        'nr': 1,
                                        'tt': 'on',
                                        'q': title}
        query_params = urlencode(default_find_by_title_params)
        results = self.get(('http://www.imdb.com/'
                            'xml/find?{0}').format(query_params))

        keys = ['title_popular',
                'title_exact',
                'title_approx',
                'title_substring']
        title_results = []

        html_unescape = htmlparser.HTMLParser().unescape

        # Loop through all results and build a list with popular matches first
        for key in keys:
            if key in results:
                for r in results[key]:
                    year = None
                    year_match = re.search(r'(\d{4})', r['title_description'])
                    if year_match:
                        year = year_match.group(0)

                    title_match = {
                        'title': html_unescape(r['title']),
                        'year': year,
                        'imdb_id': r['id']
                    }
                    title_results.append(title_match)

        return title_results

예제 #10

0

파일 보기

 def restoreIni(self):
     #prjFileInfo = QtCore.QFileInfo(core.QgsProject.instance().fileName())
     #iniFileInfo = QtCore.QFileInfo(os.path.join(prjFileInfo.path(),prjFileInfo.baseName()+".gsv"))
     stored_settings = core.QgsExpressionContextUtils.projectScope(core.QgsProject.instance()).variable('go2sv_infolayer_settings')
     if stored_settings:
         self.infoBoxIni = json.loads(stored_settings)
         self.loadPointLayers(default = self.infoBoxIni["infoLayer"])
         self.infoField.setText(self.infoBoxIni["infoField"])
     else:
         self.infoBoxIni = {'infoLayerEnabled': None,'infoBoxTemplate': u'','infoField': '','infoBoxEnabled': None,'iconPath': '','infoLayer': '','distanceBuffer':'100',"mapCommandsEnabled":None}
         self.loadPointLayers()
     if self.infoBoxIni["infoLayerEnabled"]:
         self.enableInfoLayerCheckbox.setCheckState(QtCore.Qt.Checked)
     else:
         self.enableInfoLayerCheckbox.setCheckState(QtCore.Qt.Unchecked)
     self.iconPath.setText(self.infoBoxIni["iconPath"])
     self.distanceBuffer.setText(self.infoBoxIni["distanceBuffer"])
     if self.infoBoxIni["infoBoxEnabled"]:
         self.enableInfoBoxCheckbox.setCheckState(QtCore.Qt.Checked)
     else:
         self.enableInfoBoxCheckbox.setCheckState(QtCore.Qt.Unchecked)
     if self.infoBoxIni["mapCommandsEnabled"]:
         self.mapCommandsCheck.setCheckState(QtCore.Qt.Checked)
     else:
         self.enableInfoBoxCheckbox.setCheckState(QtCore.Qt.Unchecked)
     html_parser = HTMLParser.HTMLParser()
     self.infoboxHtml.setPlainText(html_parser.unescape(self.infoBoxIni["infoBoxTemplate"]))
     self.enableInfoLayerAction(True)
     if self.infoIndex and self.enableInfoLayerCheckbox.isChecked():
         self.updateSpatialIndex()
     self.defined.emit()

예제 #11

0

파일 보기

파일: nxdl_desc2rst.py 프로젝트: ess-dmsc/nexus-constructor

def getDocFromNode(ns, node, retval=None):
    annotation_node = node.find('xs:annotation', ns)
    if annotation_node is None:
        return retval
    documentation_node = annotation_node.find('xs:documentation', ns)
    if documentation_node is None:
        return retval

    # Be sure to grab _all_ content in the <xs:documentation> node.
    # In the documentation nodes, use XML entities ("&lt;"" instead of "<")
    # for documentation characters that would otherwise be considered as XML.
    s = lxml.etree.tostring(documentation_node,
                            method="text",
                            pretty_print=True)
    rst = s.decode().lstrip('\n')  # remove any leading blank lines
    rst = rst.rstrip()  # remove any trailing white space
    text = textwrap.dedent(rst)  # remove common leading space

    # substitute HTML entities in markup: "<" for "&lt;"
    # thanks: http://stackoverflow.com/questions/2087370/decode-html-entities-in-python-string
    try:  # see #661
        import html
        text = html.unescape(text)
    except (ImportError, AttributeError):
        from html import parser as HTMLParser
        htmlparser = HTMLParser.HTMLParser()
        text = htmlparser.unescape(text)

    return text.lstrip()

예제 #12

0

파일 보기

파일: flagger.py 프로젝트: rossrader/destalinator

    def __init__(self, *args, **kwargs):
        self.htmlparser = HTMLParser.HTMLParser()
        super(Flagger, self).__init__(*args, **kwargs)
        self.logger = logging.getLogger(__name__)
        self.logger.setLevel((self.debug or self.verbose) and logging.DEBUG or logging.ERROR)

        self.now = int(time.time())

예제 #13

0

파일 보기

파일: raiplay.py 프로젝트: rafhtl/plugin.video.raitv

    def fillRaiSportKeys(self):
        # search for items in main menu
        RaiSportKeys = []

        try:
            data = utils.checkStr(urllib2.urlopen(self.RaiSportMainUrl).read())
        except urllib2.HTTPError:
            data = ''

        m = re.search("<a href=\"javascript:void\(0\)\">Menu</a>(.*?)</div>",
                      data, re.S)
        if not m:
            return []
        menu = m.group(0)

        links = re.findall("<a href=\"(?P<url>[^\"]+)\">(?P<title>[^<]+)</a>",
                           menu)
        good_links = []
        for l in links:
            if ('/archivio.html?' in l[0]) and not ('&amp;' in l[0]):
                good_links.append({'title': l[1], 'url': l[0]})

        good_links.append({
            'title':
            self.RaiPlayAddonHandle.getLocalizedString(32015),
            'url':
            '/archivio.html?tematica=altri-sport'
        })

        # open any single page in list and grab search keys

        for l in good_links:
            try:
                data = utils.checkStr(
                    urllib2.urlopen(self.RaiSportMainUrl + l['url']).read())
            except urllib2.HTTPError:
                data = ''

            dataDominio = re.findall("data-dominio=\"(.*?)\"", data)
            dataTematica = re.findall("data-tematica=\"(.*?)\"", data)
            xbmc.log(str(dataTematica))
            if dataTematica:
                if len(dataTematica) > 1:
                    del (dataTematica[0])

                try:
                    title = dataTematica[0].split('|')[0]
                    title = utils.checkStr(
                        HTMLParser.HTMLParser().unescape(title))
                    params = {
                        'title': title,
                        'dominio': dataDominio[0],
                        'sub_keys': dataTematica
                    }

                    RaiSportKeys.append(params)
                except:
                    xbmc.log("error in key %s" % str(dataTematica))

        return RaiSportKeys

예제 #14

0

파일 보기

	def xml_constructor(self, soup, link, tpburl, info):
		page = HTMLParser(soup)
		if info[0] == "search":
			try:
				title = page.title
			except:
				title = info[1]
		elif info[0] in ["browse", "user"]:
			try:
				title = parser.HTMLParser().unescape(search('<title>(.*) - TPB</title>', soup).group(1))
			except:
				title = info[1]
		elif info[0] == "recent":
			title = "Recent Torrents"
		xml = "<rss version=\"2.0\">\n\t<channel>\n\t\t"
		xml += "<title>TPB2RSS: %s</title>\n\t\t" % title
		xml += "<link>%s%s</link>\n\t\t" % (tpburl, parse.quote(link))
		xml += "<description>The Pirate Bay %s feed for \"%s\"</description>\n\t\t" % (info[0], title)
		xml += "<lastBuildDate>%s GMT</lastBuildDate>\n\t\t" % datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S")
		xml += "<language>en-us</language>\n\t\t"
		xml += "<generator>TPB2RSS %s</generator>\n\t\t" % __version__
		xml += "<docs>%s</docs>\n\t\t" % __docs__
		xml += "<webMaster>%s (%s)</webMaster>" % (__email__, __author__)
		position = 0
		for i in range(int(len(page.data) / 4)):
			item = str(page.data[position + 1]).split("\"")
			seeders = str(str(page.data[position + 2]).split(">")[1]).split("<")[0]
			leechers = str(str(page.data[position + 3]).split(">")[1]).split("<")[0]
			category = sub(r"(\n|\t)", "", (compile(r'<.*?>').sub('', page.data[0]).replace("(", " (")))
			xml += self.item_constructor(item, seeders, leechers, category, tpburl)
			position += 4
		xml += "\n\t</channel>\n</rss>"
		return xml

예제 #15

0

파일 보기

파일: PreProcess.py 프로젝트: whmnoe4j/EC-Master

def trim_cmt(cmt_list):
    import html.parser as HTMLParser
    html_parser = HTMLParser.HTMLParser()
    pattern = re.compile("<.*>.*?<.*>", re.S)
    return map(
        lambda x: re.sub(pattern, "", html_parser.unescape(x[0])).strip(),
        cmt_list)

예제 #16

0

파일 보기

def translate_text(text, lang):
    """ 
  Takes in input text and utilizes Google's Translate API to convert 
  it to the selected language. 
  """
    translate_client = translate.Client()

    # Takes the input language and returns the string abbreviation for the translate file
    # This resolves the Google API invalid input error when passing the lang parameter.
    langDict = {
        'en': 'English',
        'es': 'Spanish',
        'it': 'Italian',
        'fr': 'French',
        'tr': 'Turkish',
        'ko': 'Korean'
    }
    language = ''
    for abbrev, fullLanguage in langDict.items():
        if fullLanguage == langDict[lang]:
            language = abbrev

    if isinstance(text, six.binary_type):
        text = text.decode('utf-8')

    # Initialize parser to convert special characters back to their original form after
    # Google translation API has finished translating the text.
    parser = htmlparser.HTMLParser()
    result = translate_client.translate(text, target_language=language)

    return parser.unescape(result['translatedText'])

예제 #17

0

파일 보기

def find_movie(content, title, year):
    found_urls = {}
    found_movies = []
    h = HTMLParser.HTMLParser()
    for secmatches in re.finditer(search_section_pattern, content, re.IGNORECASE | re.DOTALL):
        log(__name__, secmatches.group('section'))
        for matches in re.finditer(movie_season_pattern, secmatches.group('content'), re.IGNORECASE | re.DOTALL):
            if matches.group('link') in found_urls:
                if secmatches.group('section') == 'close':
                    found_movies[found_urls[matches.group('link')]]['is_close'] = True
                if secmatches.group('section') == 'exact':
                    found_movies[found_urls[matches.group('link')]]['is_exact'] = True
                continue
            found_urls[matches.group('link')] = len(found_movies)

            found_title = matches.group('title')
            found_title = h.unescape(found_title)
            log(__name__, "Found movie on search page: %s (%s)" % (found_title, matches.group('year')))
            found_movies.append(
                {'t': found_title.lower(),
                 'y': int(matches.group('year')),
                 'is_exact': secmatches.group('section') == 'exact',
                 'is_close': secmatches.group('section') == 'close',
                 'l': matches.group('link'),
                 'c': int(matches.group('numsubtitles'))})

    year = int(year)
    title = title.lower()
    # Priority 1: matching title and year
    for movie in found_movies:
        if movie['t'].find(title) > -1:
            if movie['y'] == year:
                log(__name__, "Matching movie found on search page: %s (%s)" % (movie['t'], movie['y']))
                return movie['l']

    # Priority 2: matching title and one off year
    for movie in found_movies:
        if movie['t'].find(title) > -1:
            if movie['y'] == year + 1 or movie['y'] == year - 1:
                log(__name__, "Matching movie found on search page (one off year): %s (%s)" % (movie['t'], movie['y']))
                return movie['l']

    # Priority 3: "Exact" match according to search result page
    close_movies = []
    for movie in found_movies:
        if movie['is_exact']:
            log(__name__, "Using 'Exact' match: %s (%s)" % (movie['t'], movie['y']))
            return movie['l']
        if movie['is_close']:
            close_movies.append(movie)

    # Priority 4: "Close" match according to search result page
    if len(close_movies) > 0:
        close_movies = sorted(close_movies, key=itemgetter('c'), reverse=True)
        log(__name__, "Using 'Close' match: %s (%s)" % (close_movies[0]['t'], close_movies[0]['y']))
        return close_movies[0]['l']

    return None

예제 #18

0

파일 보기

파일: parsedom.py 프로젝트: movieshark/DiniRoutines

def replaceHTMLCodes(txt):
    txt = re.sub("(&#[0-9]+)([^;^0-9]+)", "\\1;\\2", txt)
    if version_info[0] == 3:
        txt = HTMLParser.unescape(txt)
    else:
        txt = HTMLParser.HTMLParser().unescape(txt)
    txt = txt.replace("&quot;", '"')
    txt = txt.replace("&amp;", "&")
    return txt

예제 #19

0

파일 보기

파일: custom.py 프로젝트: imbi7py/botbot

def title_echo(r, line, bot, chan):
    """Echo the title of a url via MC"""
    def write_url(title, url):
        """Write a URL to the database"""
        conn = sqlite3.connect(DB_FILE)
        c = conn.cursor()
        t = (title, url)
        c.execute('INSERT INTO url_history VALUES (?, ?)', t)
        conn.commit()
        conn.close()

    # get url and build opener with custom user-agent and cookies enabled
    url = r.group(5).split()[0]
    cookie_jar = http.cookiejar.CookieJar()
    opener = urllib.request.build_opener(
        urllib.request.HTTPCookieProcessor(cookie_jar))
    opener.addheaders = [('User-agent', USER_AGENT)]

    # open the url but only read a maximum of 2**20 bytes in case someone is
    # screwing with us
    try:
        f = opener.open(str(url))
        html = f.read(1048576)  # only read the first 2**20 bytes
    except:
        e = traceback.format_exc()
        debug(e, log_only=True)
        write_url(None, url)
        return

    # uncompress if the data is gzipped
    try:
        encoding = f.info()['content-encoding']
    except KeyError:
        encoding = None
    if encoding and encoding == 'gzip':
        html = io.BytesIO(html)
        gz = gzip.GzipFile(fileobj=html, mode='rb')
        html = gz.read()
        gz.close
    f.close()

    # decode the html and search for the title element
    html = html.decode('utf-8', errors='replace')
    title = re.search(r'<title.*?>(.*?)</title>', html,
                      re.DOTALL | re.IGNORECASE)
    if title:
        title = title.group(1).strip()
        title = title.replace('\n', '').replace('\r', '')
        title = ' '.join([w for w in title.split(' ') if w != ''])
        title = htmlp.HTMLParser().unescape(title)
    else:
        write_url(None, url)
        return

    # if we are here then there's a title so echo it to the channel
    bot.write('PRIVMSG {chan} :Title: {msg}\r\n'.format(chan=chan, msg=title))
    write_url(title, url)

예제 #20

0

파일 보기

파일: preprocess_wiki_extractor.py 프로젝트: xuehuiping/language

def create_block_info(input_path, preprocessor):
    """Create block info."""
    results = []
    html_parser = parser.HTMLParser()
    with tf.io.gfile.GFile(input_path) as input_file:
        for line in input_file:
            results.extend(
                wiki_preprocessor.example_from_json_line(
                    line, html_parser, preprocessor))
    return results

예제 #21

0

파일 보기

def fix_scripts(dom):
    # ldjson workaround
    p = HTMLParser.HTMLParser()
    for script in dom.getElementsByTagName("script"):
        r = RawText()
        r.ownerDocument = dom
        r.data = p.unescape(script.childNodes[0].wholeText)
        for cn in script.childNodes:
            script.removeChild(cn)
        script.appendChild(r)

예제 #22

0

파일 보기

def speeches():
    f = open('speeches.txt', 'r')
    text_model = markovify.Text(f.read())
    tweet = text_model.make_short_sentence(100) + ' ' + random.choice(hashtags)
    print('Posting to Twitter...')
    try:
        api.update_status(HTMLParser.HTMLParser().unescape(tweet))
    except ImportError:
        api.update_status(htmlparser.HTMLParser().unescape(tweet))
    f.close()
    print('Tweet you posted: ' + tweet)

예제 #23

0

파일 보기

파일: app.py 프로젝트: hone1er/Thrasher_alexa_skill

def get_info():
'''
This is a generator that takes titles and descriptions,
reformats the html and yields the title and description
of the latest videos for Alexa to respond with
'''
    for tit, des in zip(titles, descriptions):
        titled = tit.a.img["alt"].strip()
        title = parser.HTMLParser().unescape(titled)
        description = des.text.strip()
        yield f"{title}. {description}"

예제 #24

0

파일 보기

파일: core.py 프로젝트: Nlegion/School_Work

 def decode_value(data):
     new_data = {}
     for k, v in data.items():
         val = bytes(v.replace('%', '=').replace("+", " "), 'UTF-8')
         val_decode_str = decodestring(val).decode('UTF-8')
         val_decode_str = decodestring(val_decode_str).decode('UTF-8')
         if "&#" in val_decode_str:
             parser = hlmtparser.HTMLParser()
             val_decode_str = parser.unescape(val_decode_str)
         new_data[k] = val_decode_str
     return new_data

예제 #25

0

파일 보기

파일: entity2unicode.py 프로젝트: zaid100/tendenci

    def handle(self, *args, **options):
        from tendenci.apps.pages.models import Page

        pages = Page.objects.all()
        self.h = html_parser.HTMLParser()
        pattern = re.compile(r'(&#\d+;)', re.IGNORECASE)

        for page in pages:
            page.title = re.sub(pattern, self.unescape, page.title)
            page.content = re.sub(pattern, self.unescape, page.content)
            page.save()

예제 #26

0

파일 보기

def update_text(clickData):
    if clickData['points'][0]['curveNumber'] == 0:
        return html.Div([
            html.A(
                "Direct link to Reddit user comment",
                href="https://new.reddit.com/comments/" +
                str(reddit_xamarin_link_Id_Data[
                    clickData['points'][0]['pointIndex']]).replace("t3_", "") +
                "/_/" + str(reddit_xamarin_Id_Data[clickData['points'][0]
                                                   ['pointIndex']]),
                target="_blank"),
            html.H3(HP.HTMLParser().unescape(reddit_xamarin_Body_Data[
                clickData['points'][0]['pointIndex']]))
        ])
    if clickData['points'][0]['curveNumber'] == 1:
        return html.Div([
            html.A(
                "Direct link to Reddit user comment",
                href="https://new.reddit.com/comments/" +
                str(reddit_react_native_link_Id_Data[
                    clickData['points'][0]['pointIndex']]).replace("t3_", "") +
                "/_/" + str(reddit_react_native_Id_Data[clickData['points'][0]
                                                        ['pointIndex']]),
                target="_blank"),
            html.H3(HP.HTMLParser().unescape(reddit_react_native_Body_Data[
                clickData['points'][0]['pointIndex']]))
        ])
    if clickData['points'][0]['curveNumber'] == 2:
        return html.Div([
            html.A(
                "Direct link to Reddit user comment",
                href="https://new.reddit.com/comments/" +
                str(reddit_flutter_link_Id_Data[
                    clickData['points'][0]['pointIndex']]).replace("t3_", "") +
                "/_/" + str(reddit_flutter_Id_Data[clickData['points'][0]
                                                   ['pointIndex']]),
                target="_blank"),
            html.H3(HP.HTMLParser().unescape(reddit_flutter_Body_Data[
                clickData['points'][0]['pointIndex']]))
        ])

예제 #27

0

파일 보기

파일: text.py 프로젝트: uncoldice/svtplay-dl

def decode_html_entities(s):
    """
    Replaces html entities with the character they represent.

        >>> print(decode_html_entities("&lt;3 &amp;"))
        <3 &
    """
    parser = HTMLParser.HTMLParser()

    def unesc(m):
        return parser.unescape(m.group())

    return re.sub(r'(&[^;]+;)', unesc, ensure_unicode(s))

예제 #28

0

파일 보기

파일: bzoj.py 프로젝트: LeverImmy/SmojSubmit

    def load_result(self, runtime):
        values = {
            'problem_id': str(runtime.pid),
            'user_id': self.username,
            'language': '-1',
            'jresult': '-1'
        }
        url = self.result_url.format(urllib.parse.urlencode(values))

        result_regex = re.compile(
            self.result_regex_r.format(pid=runtime.pid, name=self.username))

        while True:
            self.set_status('Waiting for judging...')
            time.sleep(1)

            html, resp = self.get(url, self.headers)
            match = result_regex.search(html)

            result = match.group(4)
            if result not in [
                    'Pending', 'Pending_Rejudging', 'Compiling',
                    'Running_&_Judging'
            ]:
                break

        match = re.search(
            self.detail_regex_r.format(pid=runtime.pid, name=self.username),
            html)

        message = 'Loading result...'
        self.set_status(message)

        result = result.replace('_', ' ')
        if result.endswith('Exceed'):
            result = result + 'ed'

        judge_id = match.group(2)
        memory = match.group(6) + ' KB'
        time_ = match.group(7) + ' ms'
        detail = [result, time_, memory]

        if result == 'Compile Error':
            html, resp = self.get(self.compile_message_url.format(judge_id),
                                  self.headers)
            runtime.judge_compile_message = \
                parser.HTMLParser().unescape(self.compile_message_regex.findall(html)[0]).replace('\r', '')

        runtime.judge_detail = [detail]
        runtime.judge_result = result
        runtime.judge_score = 100 if result == 'Accepted' else 0

예제 #29

0

파일 보기

    def download_subtitles(self, suburls):
        """Download the SAMI subtitles, decode the HTML entities and save to temp directory.
        Return a list of the path to the downloaded subtitles."""
        paths = []
        for sub_data in suburls:
            sami = self.make_request(url=sub_data['href'], method='get').decode('utf-8', 'ignore').strip()
            htmlparser = HTMLParser.HTMLParser()
            subtitle = htmlparser.unescape(sami).encode('utf-8')
            path = os.path.join(self.tempdir, '{0}.sami'.format(sub_data['languageCode']))
            with open(path, 'wb') as subfile:
                subfile.write(subtitle)
            paths.append(path)

        return paths

예제 #30

0

파일 보기

 def _render_cell(self, column, cell, model, iter_, destroy):
     article = model[iter_][0]
     title = escape(
         re.sub('\\s+', ' ',
                article.title.replace('\n', ' ').strip()))
     content = []
     html_parser = parser.HTMLParser()
     html_parser.handle_data = content.append
     html_parser.feed(article.description)
     content = escape(
         re.sub('\\s+', ' ', ''.join(content)[:1000].replace('\n',
                                                             ' ').strip()))
     cell.set_property(
         'markup', '<big>%s</big>\n<small>%s</small>' %
         (('%s' if article.read else '<b>%s</b>') % title, content))