def test_render_attribute_to_document(): document = PyQuery('<a attribute="{ value }" data-riot-id="0"></a>') expression = {'expression': '{ value }', 'attribute': 'attribute', 'type': 'attribute', 'node': document} render_document([expression], {'value': 'value'}) assert document.outer_html() == '<a attribute="value" data-riot-id="0" data-riot-dirty="true"></a>' render_document([expression], {'value': 1}) assert document.outer_html() == '<a attribute="1" data-riot-id="0" data-riot-dirty="true"></a>'
def test_render_attribute_to_document(): document = PyQuery('<a attribute="{ value }" data-riot-id="0"></a>') expression = { 'expression': '{ value }', 'attribute': 'attribute', 'type': 'attribute', 'node': document } render_document([expression], {'value': 'value'}) assert document.outer_html( ) == '<a attribute="value" data-riot-id="0" data-riot-dirty="true"></a>' render_document([expression], {'value': 1}) assert document.outer_html( ) == '<a attribute="1" data-riot-id="0" data-riot-dirty="true"></a>'
def fetch_comment_list(self, broadcast_url, broadcast_douban_id): url = broadcast_url comments = [] while True: response = self.fetch_url_content(url) dom = PyQuery(response.text) comment_items = dom('#comments>.comment-item') for comment_item in comment_items: item_div = PyQuery(comment_item) comments.append({ 'content': item_div.outer_html(), 'target_type': 'broadcast', 'target_douban_id': broadcast_douban_id, 'douban_id': item_div.attr('data-cid'), 'user': self.fetch_user( PyQuery(item_div('.pic>a')).attr('data-uid')), 'text': PyQuery(item_div('.content>p.text')).text(), 'created': PyQuery(item_div('.content>.author>.created_at')).text(), }) next_page = dom('#comments>.paginator>.next>a') if next_page: url = broadcast_url + next_page.attr('href') else: break return comments
def render_svg(image, width=None, height=None, request=None, css_class='', img_class='', alt=''): """Render SVG file""" # pylint: disable=too-many-arguments options = {} if width or height: options['style'] = 'width: {0}{1}; height: {2}{3};'.format( width, 'px' if isinstance(width, int) else '', height, 'px' if isinstance(height, int) else '') else: options['style'] = '' options['css_class'] = css_class if alt or img_class: svg = PyQuery(image.data) if alt: group = PyQuery('<g></g>') group.append(PyQuery('<title />').text(alt)) for child in svg.children(): group.append(child) svg.empty().append(group) if img_class: svg.attr('class', img_class) options['svg'] = svg.outer_html() else: options['svg'] = image.data return render('templates/svg-render.pt', options, request)
def render(self): self._generate_slides() self._render_slides() press_wrapper = Pq('<div></div>') press_wrapper.attr('id', 'pres_wrapper') press_wrapper.append(self.body) self.html = press_wrapper.outer_html()
def test_render_markup_to_document(): document = PyQuery('<custom data-riot-id="0"><text data-riot-id="0.0"><span class="name">{ name}</span><span class="greet">{ greet }</span></text></custom>') expressions = parse_document_expressions(document) render_document(expressions, {'name': '@ainesmile', 'greet': 'I love you.'}) assert document.outer_html() == '<custom data-riot-id="0" data-riot-dirty="true"><text data-riot-id="0.0" data-riot-dirty="true"></text></custom>' assert expressions[0]['value'] == [(u'name', '@ainesmile'), (u'greet', 'I love you.')] render_document(expressions, {'name': '@soasme', 'greet': 'I love you, too.'}) assert expressions[0]['value'] == [(u'name', '@soasme'), (u'greet', 'I love you, too.')]
def rich_text_export(html_str, request=None, id_prefix=''): if not (html_str and request and id_prefix): return html_str pq = PyQuery(html_str) # Footnote labels have an :after css pseudo element with '.' content # To prevent editing of the label in a way that will be lost pq(".footnote-label").append(" ") # IDs that unique within a document may not be unique within multiple documents # so we add a prefix def prefix_id(_, el): original_id = el.attrib['id'] el.attrib['id'] = f"{id_prefix}-{original_id}" def prefix_href(_, el): original_target = el.attrib['href'][1:] el.attrib['href'] = f"#{id_prefix}-{original_target}" pq("[id]").each(prefix_id) pq("[href^='#']").each(prefix_href) # Images need to have absolute urls def absolute_src(_, el): original_src = el.attrib['src'] el.attrib['src'] = request.build_absolute_uri(original_src) pq("img[src]").each(absolute_src) # Add Doc styling wrappers to images def replace_in_parent(style, el): original_html = el.parent().html(method="html") src = el.outer_html() replacement = f"</p><div data-custom-style='{style}'>{el.outer_html()}</div><p>" el.parent().html(original_html.replace(src, replacement)) def attach_id_to_style(el): el.attrib['data-custom-style'] += f"-{id_prefix}" for el in pq("img.image-center-large").items(): replace_in_parent("Image Centered Large", el) for el in pq("img.image-center-medium").items(): replace_in_parent("Image Centered Medium", el) for el in pq("img.image-left-medium").items(): replace_in_parent("Image Left Medium", el) for el in pq("img.image-right-medium").items(): replace_in_parent("Image Right Medium", el) for el in pq("[data-custom-style='Footnote Reference']"): attach_id_to_style(el) for el in pq("[data-custom-style='Footnote Text']"): attach_id_to_style(el) # Insert a non-breaking space if the paragraph after an image is empty # to prevent it from potentially overlapping with a following image pq("div[data-custom-style]+p:empty").text(" \xa0 ") return pq.outer_html()
def get_html_page(self, task): ''' 抓取目标页面 ''' msg = 'get_html_page(): task: {task:s}' logger.debug(msg.format(task = str(task))) ## 更新记录状态 update_record_status(self.db_conn, task['url'], 'pending') if 0 < self.config['max_depth'] < task['depth']: msg = '已超过最大深度: task: {task:s}' logger.warning(msg.format(task = str(task))) return if task['failed_times'] > self.config['max_retry_times']: msg = '失败次数过多, 不再重试: task: {task:s}' logger.warning(msg.format(task = str(task))) update_record_status(self.db_conn, task['url'], 'failed') return code, resp = request_get_async(task, self.config) if not code: msg = '请求页面失败, 重新入队列: task: {task:s}, err: {err:s}' logger.error(msg.format(task = str(task), err = resp)) ## 出现异常, 则失败次数加1 ## 不需要调用enqueue(), 直接入队列. task['failed_times'] += 1 self.enqueue_page(task) return elif resp.status_code == 404: ## 抓取失败一般是5xx或403, 405等, 出现404基本上就没有重试的意义了, 可以直接放弃 update_record_status(self.db_conn, task['url'], 'failed') return msg = '请求页面成功, 准备解析并保存: task: {task:s}' logger.debug(msg.format(task = str(task))) try: charset = get_page_charset(resp.content) resp.encoding = charset pq_selector = PyQuery(resp.text) ## 超过最大深度的页面不再抓取, 在入队列前就先判断. ## 但静态资源无所谓深度, 所以还是要抓取的. if 0 < self.config['max_depth'] < task['depth'] + 1: msg = '当前页面已达到最大深度, 不再抓取新页面: task {task:s}' logger.warning(msg.format(task = str(task))) else: parse_linking_pages(pq_selector, task, self.config, callback = self.enqueue_page) parse_linking_assets(pq_selector, task, self.config, callback = self.enqueue_asset) logger.debug('修改页面元素链接完成') ## 抓取此页面上的静态资源 self.asset_worker_pool.start(task) byte_content = pq_selector.outer_html().encode('utf-8') file_path, file_name = trans_to_local_path(task['url'], 'page', self.main_site) code, data = save_file_async(self.config['site_path'], file_path, file_name, byte_content) if code: update_record_status(self.db_conn, task['url'], 'success') except Exception as err: msg = '保存页面文件失败: task: {task:s}, err: {err:s}' logger.error(msg.format(task = str(task), err = repr(err)))
def format_footnotes_for_export(html_str): """ Footnotes are stored in TextBlocks as `<span class="footnote footnote-ref" data-custom-style="Footnote Reference" id="footnote-${data.id}-ref">${data.mark}</span>` `<div class="footnote footnote-body" id="footnote-${data.id}"><p><span class="footnote-label" contenteditable="false">${data.mark}</span>${data.footnote}</p></div>` """ if not html_str: return html_str pq = PyQuery(html_str) pq(".footnote-label").append(". ") return pq.outer_html()
def _convert_svg_to_resource(self, svg_element): """ Converts svg element to image. """ name = uuid.uuid4().hex element = PyQuery(svg_element) svg = element.outer_html().replace('viewbox', 'viewBox') element.replace_with(PyQuery('<img src="name:{0}" />'.format(name))) return Resource(name, convert_svg_to_png(svg), 'image/png')
def refineLocal(): with open(os.path.join(savefolder, "downloads.html"), "r", encoding="utf8") as f: html = PyQuery(f.read()) cmd = "" for item in html("li button"): dl_url = item.get("data-clipboard-text").split(" ")[1] cmd += "{}\n".format(item.get("data-clipboard-text")) if os.path.exists(os.path.join(savefolder, os.path.basename(urllib.parse.urlsplit(dl_url).path))): item.set("class", "btn downloaded") else: item.set("class", "btn") with open(os.path.join(savefolder, "downloads-refined.html"), "w+", encoding="utf-8") as wf: wf.write(html.outer_html()) subprocess.Popen(["explorer", os.path.join(savefolder, "downloads-refined.html")]) with open(os.path.join(savefolder, "wwdc2016.sh"), "w+", encoding="utf8") as f: f.write(cmd[:-2])
def prefix_ids_hrefs(html_str, prefix): if not html_str: return html_str def prefix_id(_, el): original_id = el.attrib['id'] el.attrib['id'] = f"{prefix}-{original_id}" def prefix_href(_, el): original_target = el.attrib['href'][1:] el.attrib['href'] = f"#{prefix}-{original_target}" pq = PyQuery(html_str) pq("[id]").each(prefix_id) pq("[href^='#']").each(prefix_href) return pq.outer_html()
def fetch_note_comments(self, url, dom, douban_id): comments = [] strip_username = lambda el: re.findall( r'^http(?:s?)://www\.douban\.com/people/(.+)/$', el.attr('href') ).pop(0) while True: comment_items = dom('#comments .comment-item') for comment_item in comment_items: item_div = PyQuery(comment_item) quote_user_link = item_div('.content>.reply-quote>.pubdate>a') if quote_user_link: quote_user_name = quote_user_link.text() quote_user_id = strip_username(quote_user_link) quote_text = item_div('.content>.reply-quote>.all').text() blockquote = '{0}({1}):{2}'.format(quote_user_name, quote_user_id, quote_text) else: blockquote = None comments.append({ 'douban_id': item_div.attr('data-cid'), 'content': item_div.outer_html(), 'target_type': 'note', 'target_douban_id': douban_id, 'user': self.fetch_user(strip_username(item_div('.pic>a'))), 'text': item_div('.content>p').text(), 'created': item_div('.content>.author>span').text(), 'quote': blockquote, }) next_page = dom('#comments>.paginator>.next>a') if next_page: url = next_page.attr('href') else: break response = self.fetch_url_content(url) dom = PyQuery(response.text) return comments
def test_render_markup_to_document(): document = PyQuery( '<custom data-riot-id="0"><text data-riot-id="0.0"><span class="name">{ name}</span><span class="greet">{ greet }</span></text></custom>' ) expressions = parse_document_expressions(document) render_document(expressions, { 'name': '@ainesmile', 'greet': 'I love you.' }) assert document.outer_html( ) == '<custom data-riot-id="0" data-riot-dirty="true"><text data-riot-id="0.0" data-riot-dirty="true"></text></custom>' assert expressions[0]['value'] == [(u'name', '@ainesmile'), (u'greet', 'I love you.')] render_document(expressions, { 'name': '@soasme', 'greet': 'I love you, too.' }) assert expressions[0]['value'] == [(u'name', '@soasme'), (u'greet', 'I love you, too.')]
def convert_image_links(self, url: str, html: str): """ replace image src from relative url to absolute url """ doc = PyQuery(html) images = doc.find("img").items() for image in images: src = image.attr("src") abs_url = urllib.parse.urljoin(url, src) if self.exclude_image_url_pattern and re.match(self.exclude_image_url_pattern, abs_url): self.logger.info("skiping %s", abs_url) image.remove_attr("src") image.attr("alt", "excluded image") else: ext = get_url_ext(abs_url) save_to = "{}{}".format(md5_digest(abs_url), ext) image.attr("src", save_to.replace(".svg", ".png")) self.images.add((abs_url, save_to)) return doc.outer_html()
def parse_status(status_div): """ 关于object_kind说明: 1001: 图书 1002: 电影 1003: 音乐 1005: 关注好友 1011: 活动 1012: 评论 1013: 小组话题 1014: (电影)讨论 1015: 日记 1018: 图文广播 1019: 小组 1020: 豆列 1021: 九点文章 1022: 网页 1025: 相册照片 1026: 相册 1043: 影人 1044: 艺术家 1062: board(???) 2001: 线上活动 2004: 小站视频 3043: 豆瓣FM单曲 3049: 读书笔记 3065: 条目 3072: 豆瓣FM兆赫 3090: 东西 3114: 游戏 5021: 豆瓣阅读的图片 5022: 豆瓣阅读的作品 """ if not isinstance(status_div, PyQuery): status_div = PyQuery(status_div) reshared_count = 0 like_count = 0 comments_count = 0 created_at = None is_noreply = False status_url = None target_type = None object_kind = None object_id = None reshared_detail = None blockquote = None douban_user_id = status_div.attr('data-uid') douban_id = status_div.attr('data-sid') is_saying = status_div.has_class('saying') is_reshared = status_div.has_class('status-reshared-wrapper') try: created_span = status_div.find('.actions>.created_at')[0] except: is_noreply = True try: """ 获取广播链接 """ exactly_link = PyQuery(status_div.find('.actions a').eq(0)) status_url = exactly_link.attr('href') except: pass try: """ 获取关于广播类型的属性 """ status_item_div = PyQuery( status_div.find('.status-item').eq(0)) target_type = status_item_div.attr('data-target-type') object_kind = status_item_div.attr('data-object-kind') object_id = status_item_div.attr('data-object-id') if not douban_user_id: douban_user_id = status_item_div.attr('data-uid') if not douban_id: douban_id = status_div.attr('data-sid') blockquote = PyQuery(status_item_div.find('blockquote')).html() except: pass if not is_noreply: """ 获取创建时间、回复、点赞、转播数 """ try: created_at = PyQuery(created_span).attr('title') reply_link = PyQuery( status_item_div.find('.actions>.new-reply')) comments_count = reply_link.attr('data-count') like_span = PyQuery( status_item_div.find('.actions>.like-count')) like_count = like_span.attr('data-count') if like_count is None: try: like_count = int( re.match(r'赞\((.*)\)', like_span.text().strip())[1]) except: like_count = 0 reshared_span = PyQuery( status_item_div.find('.actions>.reshared-count')) reshared_count = reshared_span.attr('data-count') if reshared_count is None: reshared_count = 0 except: pass if not douban_id or douban_id == 'None': """ 原广播已被删除 """ return None, None detail = { 'douban_id': douban_id, 'douban_user_id': douban_user_id, 'content': status_div.outer_html(), 'created': created_at, 'is_reshared': is_reshared, 'is_saying': is_saying, 'is_noreply': is_noreply, 'updated_at': now, 'reshared_count': reshared_count, 'like_count': like_count, 'comments_count': comments_count, 'status_url': status_url, 'target_type': target_type, 'object_kind': object_kind, 'object_id': object_id, 'user': self.fetch_user_by_id(douban_user_id), 'blockquote': blockquote, } if is_reshared: reshared_status_div = PyQuery( status_div.find('.status-real-wrapper').eq(0)) reshared_detail, _ = parse_status(reshared_status_div) if reshared_detail: detail['reshared_id'] = reshared_detail['douban_id'] if target_type == 'sns': attachments = [] images = status_div.find( '.attachments-saying.group-pics a.view-large') for img_lnk in images: attachments.append({ 'type': 'image', 'url': PyQuery(img_lnk).attr('href'), }) images = status_div.find( '.attachments-saying.attachments-pic img') for img in images: img_lnk = PyQuery(img).attr('data-raw-src') if img_lnk: attachments.append({ 'type': 'image', 'url': img_lnk, }) if attachments: self.save_attachments(attachments) detail['attachments'] = attachments elif target_type == 'movie' and object_kind == '1002': self.fetch_movie(object_id) elif target_type == 'book' and object_kind == '1001': self.fetch_book(object_id) elif target_type == 'music' and object_kind == '1003': self.fetch_music(object_id) return detail, reshared_detail
def test_identify_document(document, result): root = PyQuery(document) identify_document(root) assert root.outer_html() == result
def scanRemote(): html = PyQuery("""<!doctype html> <html lang="en"> <head> <title>WWDC 2016</title> <script src="data:text/javascript;base64,LyohCiAqIGNsaXBib2FyZC5qcyB2MS41LjEwCiAqIGh0dHBzOi8vemVub3JvY2hhLmdpdGh1Yi5p by9jbGlwYm9hcmQuanMKICoKICogTGljZW5zZWQgTUlUIMKpIFplbm8gUm9jaGEKICovCiFmdW5j dGlvbih0KXtpZigib2JqZWN0Ij09dHlwZW9mIGV4cG9ydHMmJiJ1bmRlZmluZWQiIT10eXBlb2Yg bW9kdWxlKW1vZHVsZS5leHBvcnRzPXQoKTtlbHNlIGlmKCJmdW5jdGlvbiI9PXR5cGVvZiBkZWZp bmUmJmRlZmluZS5hbWQpZGVmaW5lKFtdLHQpO2Vsc2V7dmFyIGU7ZT0idW5kZWZpbmVkIiE9dHlw ZW9mIHdpbmRvdz93aW5kb3c6InVuZGVmaW5lZCIhPXR5cGVvZiBnbG9iYWw/Z2xvYmFsOiJ1bmRl ZmluZWQiIT10eXBlb2Ygc2VsZj9zZWxmOnRoaXMsZS5DbGlwYm9hcmQ9dCgpfX0oZnVuY3Rpb24o KXt2YXIgdCxlLG47cmV0dXJuIGZ1bmN0aW9uIHQoZSxuLG8pe2Z1bmN0aW9uIGkoYyxhKXtpZigh bltjXSl7aWYoIWVbY10pe3ZhciBzPSJmdW5jdGlvbiI9PXR5cGVvZiByZXF1aXJlJiZyZXF1aXJl O2lmKCFhJiZzKXJldHVybiBzKGMsITApO2lmKHIpcmV0dXJuIHIoYywhMCk7dmFyIGw9bmV3IEVy cm9yKCJDYW5ub3QgZmluZCBtb2R1bGUgJyIrYysiJyIpO3Rocm93IGwuY29kZT0iTU9EVUxFX05P VF9GT1VORCIsbH12YXIgdT1uW2NdPXtleHBvcnRzOnt9fTtlW2NdWzBdLmNhbGwodS5leHBvcnRz LGZ1bmN0aW9uKHQpe3ZhciBuPWVbY11bMV1bdF07cmV0dXJuIGkobj9uOnQpfSx1LHUuZXhwb3J0 cyx0LGUsbixvKX1yZXR1cm4gbltjXS5leHBvcnRzfWZvcih2YXIgcj0iZnVuY3Rpb24iPT10eXBl b2YgcmVxdWlyZSYmcmVxdWlyZSxjPTA7YzxvLmxlbmd0aDtjKyspaShvW2NdKTtyZXR1cm4gaX0o ezE6W2Z1bmN0aW9uKHQsZSxuKXt2YXIgbz10KCJtYXRjaGVzLXNlbGVjdG9yIik7ZS5leHBvcnRz PWZ1bmN0aW9uKHQsZSxuKXtmb3IodmFyIGk9bj90OnQucGFyZW50Tm9kZTtpJiZpIT09ZG9jdW1l bnQ7KXtpZihvKGksZSkpcmV0dXJuIGk7aT1pLnBhcmVudE5vZGV9fX0seyJtYXRjaGVzLXNlbGVj dG9yIjo1fV0sMjpbZnVuY3Rpb24odCxlLG4pe2Z1bmN0aW9uIG8odCxlLG4sbyxyKXt2YXIgYz1p LmFwcGx5KHRoaXMsYXJndW1lbnRzKTtyZXR1cm4gdC5hZGRFdmVudExpc3RlbmVyKG4sYyxyKSx7 ZGVzdHJveTpmdW5jdGlvbigpe3QucmVtb3ZlRXZlbnRMaXN0ZW5lcihuLGMscil9fX1mdW5jdGlv biBpKHQsZSxuLG8pe3JldHVybiBmdW5jdGlvbihuKXtuLmRlbGVnYXRlVGFyZ2V0PXIobi50YXJn ZXQsZSwhMCksbi5kZWxlZ2F0ZVRhcmdldCYmby5jYWxsKHQsbil9fXZhciByPXQoImNsb3Nlc3Qi KTtlLmV4cG9ydHM9b30se2Nsb3Nlc3Q6MX1dLDM6W2Z1bmN0aW9uKHQsZSxuKXtuLm5vZGU9ZnVu Y3Rpb24odCl7cmV0dXJuIHZvaWQgMCE9PXQmJnQgaW5zdGFuY2VvZiBIVE1MRWxlbWVudCYmMT09 PXQubm9kZVR5cGV9LG4ubm9kZUxpc3Q9ZnVuY3Rpb24odCl7dmFyIGU9T2JqZWN0LnByb3RvdHlw ZS50b1N0cmluZy5jYWxsKHQpO3JldHVybiB2b2lkIDAhPT10JiYoIltvYmplY3QgTm9kZUxpc3Rd Ij09PWV8fCJbb2JqZWN0IEhUTUxDb2xsZWN0aW9uXSI9PT1lKSYmImxlbmd0aCJpbiB0JiYoMD09 PXQubGVuZ3RofHxuLm5vZGUodFswXSkpfSxuLnN0cmluZz1mdW5jdGlvbih0KXtyZXR1cm4ic3Ry aW5nIj09dHlwZW9mIHR8fHQgaW5zdGFuY2VvZiBTdHJpbmd9LG4uZm49ZnVuY3Rpb24odCl7dmFy IGU9T2JqZWN0LnByb3RvdHlwZS50b1N0cmluZy5jYWxsKHQpO3JldHVybiJbb2JqZWN0IEZ1bmN0 aW9uXSI9PT1lfX0se31dLDQ6W2Z1bmN0aW9uKHQsZSxuKXtmdW5jdGlvbiBvKHQsZSxuKXtpZigh dCYmIWUmJiFuKXRocm93IG5ldyBFcnJvcigiTWlzc2luZyByZXF1aXJlZCBhcmd1bWVudHMiKTtp ZighYS5zdHJpbmcoZSkpdGhyb3cgbmV3IFR5cGVFcnJvcigiU2Vjb25kIGFyZ3VtZW50IG11c3Qg YmUgYSBTdHJpbmciKTtpZighYS5mbihuKSl0aHJvdyBuZXcgVHlwZUVycm9yKCJUaGlyZCBhcmd1 bWVudCBtdXN0IGJlIGEgRnVuY3Rpb24iKTtpZihhLm5vZGUodCkpcmV0dXJuIGkodCxlLG4pO2lm KGEubm9kZUxpc3QodCkpcmV0dXJuIHIodCxlLG4pO2lmKGEuc3RyaW5nKHQpKXJldHVybiBjKHQs ZSxuKTt0aHJvdyBuZXcgVHlwZUVycm9yKCJGaXJzdCBhcmd1bWVudCBtdXN0IGJlIGEgU3RyaW5n LCBIVE1MRWxlbWVudCwgSFRNTENvbGxlY3Rpb24sIG9yIE5vZGVMaXN0Iil9ZnVuY3Rpb24gaSh0 LGUsbil7cmV0dXJuIHQuYWRkRXZlbnRMaXN0ZW5lcihlLG4pLHtkZXN0cm95OmZ1bmN0aW9uKCl7 dC5yZW1vdmVFdmVudExpc3RlbmVyKGUsbil9fX1mdW5jdGlvbiByKHQsZSxuKXtyZXR1cm4gQXJy YXkucHJvdG90eXBlLmZvckVhY2guY2FsbCh0LGZ1bmN0aW9uKHQpe3QuYWRkRXZlbnRMaXN0ZW5l cihlLG4pfSkse2Rlc3Ryb3k6ZnVuY3Rpb24oKXtBcnJheS5wcm90b3R5cGUuZm9yRWFjaC5jYWxs KHQsZnVuY3Rpb24odCl7dC5yZW1vdmVFdmVudExpc3RlbmVyKGUsbil9KX19fWZ1bmN0aW9uIGMo dCxlLG4pe3JldHVybiBzKGRvY3VtZW50LmJvZHksdCxlLG4pfXZhciBhPXQoIi4vaXMiKSxzPXQo ImRlbGVnYXRlIik7ZS5leHBvcnRzPW99LHsiLi9pcyI6MyxkZWxlZ2F0ZToyfV0sNTpbZnVuY3Rp b24odCxlLG4pe2Z1bmN0aW9uIG8odCxlKXtpZihyKXJldHVybiByLmNhbGwodCxlKTtmb3IodmFy IG49dC5wYXJlbnROb2RlLnF1ZXJ5U2VsZWN0b3JBbGwoZSksbz0wO288bi5sZW5ndGg7KytvKWlm KG5bb109PXQpcmV0dXJuITA7cmV0dXJuITF9dmFyIGk9RWxlbWVudC5wcm90b3R5cGUscj1pLm1h dGNoZXNTZWxlY3Rvcnx8aS53ZWJraXRNYXRjaGVzU2VsZWN0b3J8fGkubW96TWF0Y2hlc1NlbGVj dG9yfHxpLm1zTWF0Y2hlc1NlbGVjdG9yfHxpLm9NYXRjaGVzU2VsZWN0b3I7ZS5leHBvcnRzPW99 LHt9XSw2OltmdW5jdGlvbih0LGUsbil7ZnVuY3Rpb24gbyh0KXt2YXIgZTtpZigiSU5QVVQiPT09 dC5ub2RlTmFtZXx8IlRFWFRBUkVBIj09PXQubm9kZU5hbWUpdC5mb2N1cygpLHQuc2V0U2VsZWN0 aW9uUmFuZ2UoMCx0LnZhbHVlLmxlbmd0aCksZT10LnZhbHVlO2Vsc2V7dC5oYXNBdHRyaWJ1dGUo ImNvbnRlbnRlZGl0YWJsZSIpJiZ0LmZvY3VzKCk7dmFyIG49d2luZG93LmdldFNlbGVjdGlvbigp LG89ZG9jdW1lbnQuY3JlYXRlUmFuZ2UoKTtvLnNlbGVjdE5vZGVDb250ZW50cyh0KSxuLnJlbW92 ZUFsbFJhbmdlcygpLG4uYWRkUmFuZ2UobyksZT1uLnRvU3RyaW5nKCl9cmV0dXJuIGV9ZS5leHBv cnRzPW99LHt9XSw3OltmdW5jdGlvbih0LGUsbil7ZnVuY3Rpb24gbygpe31vLnByb3RvdHlwZT17 b246ZnVuY3Rpb24odCxlLG4pe3ZhciBvPXRoaXMuZXx8KHRoaXMuZT17fSk7cmV0dXJuKG9bdF18 fChvW3RdPVtdKSkucHVzaCh7Zm46ZSxjdHg6bn0pLHRoaXN9LG9uY2U6ZnVuY3Rpb24odCxlLG4p e2Z1bmN0aW9uIG8oKXtpLm9mZih0LG8pLGUuYXBwbHkobixhcmd1bWVudHMpfXZhciBpPXRoaXM7 cmV0dXJuIG8uXz1lLHRoaXMub24odCxvLG4pfSxlbWl0OmZ1bmN0aW9uKHQpe3ZhciBlPVtdLnNs aWNlLmNhbGwoYXJndW1lbnRzLDEpLG49KCh0aGlzLmV8fCh0aGlzLmU9e30pKVt0XXx8W10pLnNs aWNlKCksbz0wLGk9bi5sZW5ndGg7Zm9yKG87aT5vO28rKyluW29dLmZuLmFwcGx5KG5bb10uY3R4 LGUpO3JldHVybiB0aGlzfSxvZmY6ZnVuY3Rpb24odCxlKXt2YXIgbj10aGlzLmV8fCh0aGlzLmU9 e30pLG89blt0XSxpPVtdO2lmKG8mJmUpZm9yKHZhciByPTAsYz1vLmxlbmd0aDtjPnI7cisrKW9b cl0uZm4hPT1lJiZvW3JdLmZuLl8hPT1lJiZpLnB1c2gob1tyXSk7cmV0dXJuIGkubGVuZ3RoP25b dF09aTpkZWxldGUgblt0XSx0aGlzfX0sZS5leHBvcnRzPW99LHt9XSw4OltmdW5jdGlvbihlLG4s byl7IWZ1bmN0aW9uKGkscil7aWYoImZ1bmN0aW9uIj09dHlwZW9mIHQmJnQuYW1kKXQoWyJtb2R1 bGUiLCJzZWxlY3QiXSxyKTtlbHNlIGlmKCJ1bmRlZmluZWQiIT10eXBlb2YgbylyKG4sZSgic2Vs ZWN0IikpO2Vsc2V7dmFyIGM9e2V4cG9ydHM6e319O3IoYyxpLnNlbGVjdCksaS5jbGlwYm9hcmRB Y3Rpb249Yy5leHBvcnRzfX0odGhpcyxmdW5jdGlvbih0LGUpeyJ1c2Ugc3RyaWN0IjtmdW5jdGlv biBuKHQpe3JldHVybiB0JiZ0Ll9fZXNNb2R1bGU/dDp7ImRlZmF1bHQiOnR9fWZ1bmN0aW9uIG8o dCxlKXtpZighKHQgaW5zdGFuY2VvZiBlKSl0aHJvdyBuZXcgVHlwZUVycm9yKCJDYW5ub3QgY2Fs bCBhIGNsYXNzIGFzIGEgZnVuY3Rpb24iKX12YXIgaT1uKGUpLHI9ImZ1bmN0aW9uIj09dHlwZW9m IFN5bWJvbCYmInN5bWJvbCI9PXR5cGVvZiBTeW1ib2wuaXRlcmF0b3I/ZnVuY3Rpb24odCl7cmV0 dXJuIHR5cGVvZiB0fTpmdW5jdGlvbih0KXtyZXR1cm4gdCYmImZ1bmN0aW9uIj09dHlwZW9mIFN5 bWJvbCYmdC5jb25zdHJ1Y3Rvcj09PVN5bWJvbD8ic3ltYm9sIjp0eXBlb2YgdH0sYz1mdW5jdGlv bigpe2Z1bmN0aW9uIHQodCxlKXtmb3IodmFyIG49MDtuPGUubGVuZ3RoO24rKyl7dmFyIG89ZVtu XTtvLmVudW1lcmFibGU9by5lbnVtZXJhYmxlfHwhMSxvLmNvbmZpZ3VyYWJsZT0hMCwidmFsdWUi aW4gbyYmKG8ud3JpdGFibGU9ITApLE9iamVjdC5kZWZpbmVQcm9wZXJ0eSh0LG8ua2V5LG8pfX1y ZXR1cm4gZnVuY3Rpb24oZSxuLG8pe3JldHVybiBuJiZ0KGUucHJvdG90eXBlLG4pLG8mJnQoZSxv KSxlfX0oKSxhPWZ1bmN0aW9uKCl7ZnVuY3Rpb24gdChlKXtvKHRoaXMsdCksdGhpcy5yZXNvbHZl T3B0aW9ucyhlKSx0aGlzLmluaXRTZWxlY3Rpb24oKX1yZXR1cm4gdC5wcm90b3R5cGUucmVzb2x2 ZU9wdGlvbnM9ZnVuY3Rpb24gdCgpe3ZhciBlPWFyZ3VtZW50cy5sZW5ndGg8PTB8fHZvaWQgMD09 PWFyZ3VtZW50c1swXT97fTphcmd1bWVudHNbMF07dGhpcy5hY3Rpb249ZS5hY3Rpb24sdGhpcy5l bWl0dGVyPWUuZW1pdHRlcix0aGlzLnRhcmdldD1lLnRhcmdldCx0aGlzLnRleHQ9ZS50ZXh0LHRo aXMudHJpZ2dlcj1lLnRyaWdnZXIsdGhpcy5zZWxlY3RlZFRleHQ9IiJ9LHQucHJvdG90eXBlLmlu aXRTZWxlY3Rpb249ZnVuY3Rpb24gdCgpe3RoaXMudGV4dD90aGlzLnNlbGVjdEZha2UoKTp0aGlz LnRhcmdldCYmdGhpcy5zZWxlY3RUYXJnZXQoKX0sdC5wcm90b3R5cGUuc2VsZWN0RmFrZT1mdW5j dGlvbiB0KCl7dmFyIGU9dGhpcyxuPSJydGwiPT1kb2N1bWVudC5kb2N1bWVudEVsZW1lbnQuZ2V0 QXR0cmlidXRlKCJkaXIiKTt0aGlzLnJlbW92ZUZha2UoKSx0aGlzLmZha2VIYW5kbGVyPWRvY3Vt ZW50LmJvZHkuYWRkRXZlbnRMaXN0ZW5lcigiY2xpY2siLGZ1bmN0aW9uKCl7cmV0dXJuIGUucmVt b3ZlRmFrZSgpfSksdGhpcy5mYWtlRWxlbT1kb2N1bWVudC5jcmVhdGVFbGVtZW50KCJ0ZXh0YXJl YSIpLHRoaXMuZmFrZUVsZW0uc3R5bGUuZm9udFNpemU9IjEycHQiLHRoaXMuZmFrZUVsZW0uc3R5 bGUuYm9yZGVyPSIwIix0aGlzLmZha2VFbGVtLnN0eWxlLnBhZGRpbmc9IjAiLHRoaXMuZmFrZUVs ZW0uc3R5bGUubWFyZ2luPSIwIix0aGlzLmZha2VFbGVtLnN0eWxlLnBvc2l0aW9uPSJmaXhlZCIs dGhpcy5mYWtlRWxlbS5zdHlsZVtuPyJyaWdodCI6ImxlZnQiXT0iLTk5OTlweCIsdGhpcy5mYWtl RWxlbS5zdHlsZS50b3A9KHdpbmRvdy5wYWdlWU9mZnNldHx8ZG9jdW1lbnQuZG9jdW1lbnRFbGVt ZW50LnNjcm9sbFRvcCkrInB4Iix0aGlzLmZha2VFbGVtLnNldEF0dHJpYnV0ZSgicmVhZG9ubHki LCIiKSx0aGlzLmZha2VFbGVtLnZhbHVlPXRoaXMudGV4dCxkb2N1bWVudC5ib2R5LmFwcGVuZENo aWxkKHRoaXMuZmFrZUVsZW0pLHRoaXMuc2VsZWN0ZWRUZXh0PSgwLGkuZGVmYXVsdCkodGhpcy5m YWtlRWxlbSksdGhpcy5jb3B5VGV4dCgpfSx0LnByb3RvdHlwZS5yZW1vdmVGYWtlPWZ1bmN0aW9u IHQoKXt0aGlzLmZha2VIYW5kbGVyJiYoZG9jdW1lbnQuYm9keS5yZW1vdmVFdmVudExpc3RlbmVy KCJjbGljayIpLHRoaXMuZmFrZUhhbmRsZXI9bnVsbCksdGhpcy5mYWtlRWxlbSYmKGRvY3VtZW50 LmJvZHkucmVtb3ZlQ2hpbGQodGhpcy5mYWtlRWxlbSksdGhpcy5mYWtlRWxlbT1udWxsKX0sdC5w cm90b3R5cGUuc2VsZWN0VGFyZ2V0PWZ1bmN0aW9uIHQoKXt0aGlzLnNlbGVjdGVkVGV4dD0oMCxp LmRlZmF1bHQpKHRoaXMudGFyZ2V0KSx0aGlzLmNvcHlUZXh0KCl9LHQucHJvdG90eXBlLmNvcHlU ZXh0PWZ1bmN0aW9uIHQoKXt2YXIgZT12b2lkIDA7dHJ5e2U9ZG9jdW1lbnQuZXhlY0NvbW1hbmQo dGhpcy5hY3Rpb24pfWNhdGNoKG4pe2U9ITF9dGhpcy5oYW5kbGVSZXN1bHQoZSl9LHQucHJvdG90 eXBlLmhhbmRsZVJlc3VsdD1mdW5jdGlvbiB0KGUpe2U/dGhpcy5lbWl0dGVyLmVtaXQoInN1Y2Nl c3MiLHthY3Rpb246dGhpcy5hY3Rpb24sdGV4dDp0aGlzLnNlbGVjdGVkVGV4dCx0cmlnZ2VyOnRo aXMudHJpZ2dlcixjbGVhclNlbGVjdGlvbjp0aGlzLmNsZWFyU2VsZWN0aW9uLmJpbmQodGhpcyl9 KTp0aGlzLmVtaXR0ZXIuZW1pdCgiZXJyb3IiLHthY3Rpb246dGhpcy5hY3Rpb24sdHJpZ2dlcjp0 aGlzLnRyaWdnZXIsY2xlYXJTZWxlY3Rpb246dGhpcy5jbGVhclNlbGVjdGlvbi5iaW5kKHRoaXMp fSl9LHQucHJvdG90eXBlLmNsZWFyU2VsZWN0aW9uPWZ1bmN0aW9uIHQoKXt0aGlzLnRhcmdldCYm dGhpcy50YXJnZXQuYmx1cigpLHdpbmRvdy5nZXRTZWxlY3Rpb24oKS5yZW1vdmVBbGxSYW5nZXMo KX0sdC5wcm90b3R5cGUuZGVzdHJveT1mdW5jdGlvbiB0KCl7dGhpcy5yZW1vdmVGYWtlKCl9LGMo dCxbe2tleToiYWN0aW9uIixzZXQ6ZnVuY3Rpb24gdCgpe3ZhciBlPWFyZ3VtZW50cy5sZW5ndGg8 PTB8fHZvaWQgMD09PWFyZ3VtZW50c1swXT8iY29weSI6YXJndW1lbnRzWzBdO2lmKHRoaXMuX2Fj dGlvbj1lLCJjb3B5IiE9PXRoaXMuX2FjdGlvbiYmImN1dCIhPT10aGlzLl9hY3Rpb24pdGhyb3cg bmV3IEVycm9yKCdJbnZhbGlkICJhY3Rpb24iIHZhbHVlLCB1c2UgZWl0aGVyICJjb3B5IiBvciAi Y3V0IicpfSxnZXQ6ZnVuY3Rpb24gdCgpe3JldHVybiB0aGlzLl9hY3Rpb259fSx7a2V5OiJ0YXJn ZXQiLHNldDpmdW5jdGlvbiB0KGUpe2lmKHZvaWQgMCE9PWUpe2lmKCFlfHwib2JqZWN0IiE9PSgi dW5kZWZpbmVkIj09dHlwZW9mIGU/InVuZGVmaW5lZCI6cihlKSl8fDEhPT1lLm5vZGVUeXBlKXRo cm93IG5ldyBFcnJvcignSW52YWxpZCAidGFyZ2V0IiB2YWx1ZSwgdXNlIGEgdmFsaWQgRWxlbWVu dCcpO2lmKCJjb3B5Ij09PXRoaXMuYWN0aW9uJiZlLmhhc0F0dHJpYnV0ZSgiZGlzYWJsZWQiKSl0 aHJvdyBuZXcgRXJyb3IoJ0ludmFsaWQgInRhcmdldCIgYXR0cmlidXRlLiBQbGVhc2UgdXNlICJy ZWFkb25seSIgaW5zdGVhZCBvZiAiZGlzYWJsZWQiIGF0dHJpYnV0ZScpO2lmKCJjdXQiPT09dGhp cy5hY3Rpb24mJihlLmhhc0F0dHJpYnV0ZSgicmVhZG9ubHkiKXx8ZS5oYXNBdHRyaWJ1dGUoImRp c2FibGVkIikpKXRocm93IG5ldyBFcnJvcignSW52YWxpZCAidGFyZ2V0IiBhdHRyaWJ1dGUuIFlv dSBjYW5cJ3QgY3V0IHRleHQgZnJvbSBlbGVtZW50cyB3aXRoICJyZWFkb25seSIgb3IgImRpc2Fi bGVkIiBhdHRyaWJ1dGVzJyk7dGhpcy5fdGFyZ2V0PWV9fSxnZXQ6ZnVuY3Rpb24gdCgpe3JldHVy biB0aGlzLl90YXJnZXR9fV0pLHR9KCk7dC5leHBvcnRzPWF9KX0se3NlbGVjdDo2fV0sOTpbZnVu Y3Rpb24oZSxuLG8peyFmdW5jdGlvbihpLHIpe2lmKCJmdW5jdGlvbiI9PXR5cGVvZiB0JiZ0LmFt ZCl0KFsibW9kdWxlIiwiLi9jbGlwYm9hcmQtYWN0aW9uIiwidGlueS1lbWl0dGVyIiwiZ29vZC1s aXN0ZW5lciJdLHIpO2Vsc2UgaWYoInVuZGVmaW5lZCIhPXR5cGVvZiBvKXIobixlKCIuL2NsaXBi b2FyZC1hY3Rpb24iKSxlKCJ0aW55LWVtaXR0ZXIiKSxlKCJnb29kLWxpc3RlbmVyIikpO2Vsc2V7 dmFyIGM9e2V4cG9ydHM6e319O3IoYyxpLmNsaXBib2FyZEFjdGlvbixpLnRpbnlFbWl0dGVyLGku Z29vZExpc3RlbmVyKSxpLmNsaXBib2FyZD1jLmV4cG9ydHN9fSh0aGlzLGZ1bmN0aW9uKHQsZSxu LG8peyJ1c2Ugc3RyaWN0IjtmdW5jdGlvbiBpKHQpe3JldHVybiB0JiZ0Ll9fZXNNb2R1bGU/dDp7 ImRlZmF1bHQiOnR9fWZ1bmN0aW9uIHIodCxlKXtpZighKHQgaW5zdGFuY2VvZiBlKSl0aHJvdyBu ZXcgVHlwZUVycm9yKCJDYW5ub3QgY2FsbCBhIGNsYXNzIGFzIGEgZnVuY3Rpb24iKX1mdW5jdGlv biBjKHQsZSl7aWYoIXQpdGhyb3cgbmV3IFJlZmVyZW5jZUVycm9yKCJ0aGlzIGhhc24ndCBiZWVu IGluaXRpYWxpc2VkIC0gc3VwZXIoKSBoYXNuJ3QgYmVlbiBjYWxsZWQiKTtyZXR1cm4hZXx8Im9i amVjdCIhPXR5cGVvZiBlJiYiZnVuY3Rpb24iIT10eXBlb2YgZT90OmV9ZnVuY3Rpb24gYSh0LGUp e2lmKCJmdW5jdGlvbiIhPXR5cGVvZiBlJiZudWxsIT09ZSl0aHJvdyBuZXcgVHlwZUVycm9yKCJT dXBlciBleHByZXNzaW9uIG11c3QgZWl0aGVyIGJlIG51bGwgb3IgYSBmdW5jdGlvbiwgbm90ICIr dHlwZW9mIGUpO3QucHJvdG90eXBlPU9iamVjdC5jcmVhdGUoZSYmZS5wcm90b3R5cGUse2NvbnN0 cnVjdG9yOnt2YWx1ZTp0LGVudW1lcmFibGU6ITEsd3JpdGFibGU6ITAsY29uZmlndXJhYmxlOiEw fX0pLGUmJihPYmplY3Quc2V0UHJvdG90eXBlT2Y/T2JqZWN0LnNldFByb3RvdHlwZU9mKHQsZSk6 dC5fX3Byb3RvX189ZSl9ZnVuY3Rpb24gcyh0LGUpe3ZhciBuPSJkYXRhLWNsaXBib2FyZC0iK3Q7 aWYoZS5oYXNBdHRyaWJ1dGUobikpcmV0dXJuIGUuZ2V0QXR0cmlidXRlKG4pfXZhciBsPWkoZSks dT1pKG4pLGY9aShvKSxkPWZ1bmN0aW9uKHQpe2Z1bmN0aW9uIGUobixvKXtyKHRoaXMsZSk7dmFy IGk9Yyh0aGlzLHQuY2FsbCh0aGlzKSk7cmV0dXJuIGkucmVzb2x2ZU9wdGlvbnMobyksaS5saXN0 ZW5DbGljayhuKSxpfXJldHVybiBhKGUsdCksZS5wcm90b3R5cGUucmVzb2x2ZU9wdGlvbnM9ZnVu Y3Rpb24gdCgpe3ZhciBlPWFyZ3VtZW50cy5sZW5ndGg8PTB8fHZvaWQgMD09PWFyZ3VtZW50c1sw XT97fTphcmd1bWVudHNbMF07dGhpcy5hY3Rpb249ImZ1bmN0aW9uIj09dHlwZW9mIGUuYWN0aW9u P2UuYWN0aW9uOnRoaXMuZGVmYXVsdEFjdGlvbix0aGlzLnRhcmdldD0iZnVuY3Rpb24iPT10eXBl b2YgZS50YXJnZXQ/ZS50YXJnZXQ6dGhpcy5kZWZhdWx0VGFyZ2V0LHRoaXMudGV4dD0iZnVuY3Rp b24iPT10eXBlb2YgZS50ZXh0P2UudGV4dDp0aGlzLmRlZmF1bHRUZXh0fSxlLnByb3RvdHlwZS5s aXN0ZW5DbGljaz1mdW5jdGlvbiB0KGUpe3ZhciBuPXRoaXM7dGhpcy5saXN0ZW5lcj0oMCxmLmRl ZmF1bHQpKGUsImNsaWNrIixmdW5jdGlvbih0KXtyZXR1cm4gbi5vbkNsaWNrKHQpfSl9LGUucHJv dG90eXBlLm9uQ2xpY2s9ZnVuY3Rpb24gdChlKXt2YXIgbj1lLmRlbGVnYXRlVGFyZ2V0fHxlLmN1 cnJlbnRUYXJnZXQ7dGhpcy5jbGlwYm9hcmRBY3Rpb24mJih0aGlzLmNsaXBib2FyZEFjdGlvbj1u dWxsKSx0aGlzLmNsaXBib2FyZEFjdGlvbj1uZXcgbC5kZWZhdWx0KHthY3Rpb246dGhpcy5hY3Rp b24obiksdGFyZ2V0OnRoaXMudGFyZ2V0KG4pLHRleHQ6dGhpcy50ZXh0KG4pLHRyaWdnZXI6bixl bWl0dGVyOnRoaXN9KX0sZS5wcm90b3R5cGUuZGVmYXVsdEFjdGlvbj1mdW5jdGlvbiB0KGUpe3Jl dHVybiBzKCJhY3Rpb24iLGUpfSxlLnByb3RvdHlwZS5kZWZhdWx0VGFyZ2V0PWZ1bmN0aW9uIHQo ZSl7dmFyIG49cygidGFyZ2V0IixlKTtyZXR1cm4gbj9kb2N1bWVudC5xdWVyeVNlbGVjdG9yKG4p OnZvaWQgMH0sZS5wcm90b3R5cGUuZGVmYXVsdFRleHQ9ZnVuY3Rpb24gdChlKXtyZXR1cm4gcygi dGV4dCIsZSl9LGUucHJvdG90eXBlLmRlc3Ryb3k9ZnVuY3Rpb24gdCgpe3RoaXMubGlzdGVuZXIu ZGVzdHJveSgpLHRoaXMuY2xpcGJvYXJkQWN0aW9uJiYodGhpcy5jbGlwYm9hcmRBY3Rpb24uZGVz dHJveSgpLHRoaXMuY2xpcGJvYXJkQWN0aW9uPW51bGwpfSxlfSh1LmRlZmF1bHQpO3QuZXhwb3J0 cz1kfSl9LHsiLi9jbGlwYm9hcmQtYWN0aW9uIjo4LCJnb29kLWxpc3RlbmVyIjo0LCJ0aW55LWVt aXR0ZXIiOjd9XX0se30sWzldKSg5KX0pOw== "> </script> <style type="text/css"> *{ font-family: Consolas, Serif; } body{ margin:0; padding: 0; } h1{ text-align: center; } ul{ column-count: 3; column-gap: 20px; } li{ list-style: none; margin-bottom: 0.2em; } .btn{ height : 32px; background-color: white; border-radius: 5px; } .downloaded{ background-color:yellow; } </style> </head> <body> <h1>WWDC 2016 DOWNLOADS</h2> <ul id="downloadlist"> </ul> <script type="text/javascript"> var clipboard = new Clipboard('.btn'); </script> </body> </html>""") item = """<li><button class="btn downloaded" data-clipboard-text="{}">{}</button></li>""" not_downloaded_item = """<li><button class="btn" data-clipboard-text="{}">{}</button></li>""" entrance = PyQuery(url=url) videos = entrance('li') for v in videos: v = PyQuery(v) if v.attr("data-released") == "true": h5 = v.find("h5") a = v.find("a[href]") (title, page) = (h5.text(), url + a.attr("href")) subpage = PyQuery(page) dl_url = urllib.parse.urlparse(subpage("a:contains('HD')").attr.href).path print("[{}]({})".format(title, dl_url)) if os.path.exists(os.path.join(savefolder, os.path.basename(urllib.parse.urlsplit(dl_url).path))): html("#downloadlist").append(item.format("curl {} -O -C -".format(dl_url),title)) else: html("#downloadlist").append(not_downloaded_item.format("curl {} -O -C -".format(dl_url), title)) with open(os.path.join(savefolder, "downloads.html"), "w+", encoding="utf8") as f: print(html.outer_html()) f.write(html.outer_html()) subprocess.Popen(["explorer", os.path.join(savefolder, "downloads.html")])