コード例 #1
0
ファイル: __init__.py プロジェクト: bosslsk/hmqf_crawler_hy
def parse(url, settings):
    host, path, _ = parse_url(url)
    config = query_config(settings, host=host, path=path)
    if not config:
        raise HostNotSupportException("host and path not support")
    parse_func = parse_dict[host]
    host, path, extra_info = parse_func(url)
    extra_info['source'] = [source_dict[k] for k in source_dict
                            if k in host][0]
    return host, path, extra_info
コード例 #2
0
def parse_shangshu(url):
    host, path, q = parse_url(url)
    book_id = path.split('/')[-2]
    extra_info = {'book_id': book_id, 'url': url}
    return host, path, extra_info
コード例 #3
0
ファイル: biquge.py プロジェクト: bosslsk/hmqf_crawler_hy
def parse_biquge(url):
    host, path, q = parse_url(url)
    book_id = path.split('/')[1].split('_')[1]
    extra_info = {'book_id': book_id, 'url': url}
    return host, path, extra_info
コード例 #4
0
ファイル: xxsy.py プロジェクト: bosslsk/hmqf_crawler_hy
def parse_xxsy(url):
    host, path, q = parse_url(url)
    book_id = path.split('/')[2].split('.')[0]
    extra_info = {'book_id': book_id, 'url': url}
    return host, path, extra_info
コード例 #5
0
def parse_jjwx(url):
    host, path, q = parse_url(url)
    book_id = q['novelid']
    extra_info = {'book_id': book_id, 'url': url}
    return host, path, extra_info
コード例 #6
0
def parse_qidian(url):
    host, path, q = parse_url(url)
    book_id = path.split('/')[-1]
    extra_info = {'book_id': book_id, 'url': url}
    return host, path, extra_info