#!/usr/bin/env python # -*- coding: utf-8 -*- from codecs import getwriter from linklist import parse_linklist, serialize_webvtt from sys import argv, stdin, stdout stdout = getwriter("utf-8")(stdout) data = parse_linklist(stdin.read(), remove_tags=True) data = filter(lambda e: e["url"] == "", data) last_chapter_end = argv[1] stdout.write(serialize_webvtt(data, last_chapter_end))
url = element['url'] if url == '': continue scheme = urlparse.urlsplit(url).scheme host = urlparse.urlsplit(url).netloc if scheme in ('http', 'https') and \ url_status_cache.get(url) is not True: try: request = head(url, timeout=10) # some web sites cannot into head requests if request.status_code in (403, 405, 500) or \ host in ('mobil.morgenpost.de'): request = get(url) except Timeout as e: stderr.write('Connection to <%s> timeouted.\n' % url) exit(1) except ConnectionError as e: stderr.write('Connection to <%s> failed.\n' % url) stderr.write(str(e) + '\n') exit(1) if request.ok: url_status_cache.set(url, request.ok) else: stderr.write('<%s> is unreachable.\n' % url) exit(1) pbar.update(i + 1) data = parse_linklist(stdin.read()) validate_links(data)
#!/usr/bin/env python # -*- coding: utf-8 -*- from codecs import getwriter from linklist import parse_linklist, serialize_webvtt from sys import argv, stdin, stdout stdout = getwriter('utf-8')(stdout) data = parse_linklist(stdin.read(), remove_tags=True) data = filter(lambda e: e['url'] == '', data) last_chapter_end = argv[1] stdout.write(serialize_webvtt(data, last_chapter_end))
#!/usr/bin/env python # -*- coding: utf-8 -*- from linklist import parse_linklist, serialize_html_table from sys import stdin, stdout data = parse_linklist(stdin.read()) data = filter(lambda e: e['url'] != '', data) stdout.write(serialize_html_table(data))