Esempio n. 1
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from codecs import getwriter
from linklist import parse_linklist, serialize_webvtt
from sys import argv, stdin, stdout

stdout = getwriter("utf-8")(stdout)

data = parse_linklist(stdin.read(), remove_tags=True)
data = filter(lambda e: e["url"] == "", data)
last_chapter_end = argv[1]
stdout.write(serialize_webvtt(data, last_chapter_end))
Esempio n. 2
0
        url = element['url']
        if url == '':
            continue
        scheme = urlparse.urlsplit(url).scheme
        host = urlparse.urlsplit(url).netloc
        if scheme in ('http', 'https') and \
            url_status_cache.get(url) is not True:
            try:
                request = head(url, timeout=10)
                # some web sites cannot into head requests
                if request.status_code in (403, 405, 500) or \
                    host in ('mobil.morgenpost.de'):
                    request = get(url)
            except Timeout as e:
                stderr.write('Connection to <%s> timeouted.\n' % url)
                exit(1)
            except ConnectionError as e:
                stderr.write('Connection to <%s> failed.\n' % url)
                stderr.write(str(e) + '\n')
                exit(1)
            if request.ok:
                url_status_cache.set(url, request.ok)
            else:
                stderr.write('<%s> is unreachable.\n' % url)
                exit(1)
        pbar.update(i + 1)


data = parse_linklist(stdin.read())
validate_links(data)
Esempio n. 3
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from codecs import getwriter
from linklist import parse_linklist, serialize_webvtt
from sys import argv, stdin, stdout
stdout = getwriter('utf-8')(stdout)

data = parse_linklist(stdin.read(), remove_tags=True)
data = filter(lambda e: e['url'] == '', data)
last_chapter_end = argv[1]
stdout.write(serialize_webvtt(data, last_chapter_end))
Esempio n. 4
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from linklist import parse_linklist, serialize_html_table
from sys import stdin, stdout

data = parse_linklist(stdin.read())
data = filter(lambda e: e['url'] != '', data)
stdout.write(serialize_html_table(data))