Example #1
0
def download_ts_file(m3u8_url: str, ts_urls: List[str]):
    save_dir = get_ts_ave_dir(m3u8_url)
    index = 1
    for ts_url in ts_urls:
        file_name = u_file.get_file_name_from_url(ts_url)
        u_file.download_file(ts_url, file_name, save_dir, **_REQUESTS_KWARGS)
        log.info('download ts file success({}/{}): {}'.format(
            index, len(ts_urls), ts_url))
        index += 1
Example #2
0
def download_pictures(url: str, title: str) -> list:
    html_content = u_file.get_content(url, encoding='UTF-8')
    soup = BeautifulSoup(html_content, 'lxml')

    img_elements = soup.select('figure.img-box')
    log.info('get book elements size: {}'.format(len(img_elements)))
    for img_element in img_elements:
        image_url = img_element.find('img')['data-src']
        image_url = 'http:' + re.sub(r"@[^\n]+", '-', image_url)
        u_file.download_file(image_url, title + '-' + u_file.get_file_name_from_url(image_url), r'result')
    return []
def download_pins(pins: list, board_name: str):
    log.info('begin download board: {} pins image, size: {}'.format(
        board_name, len(pins)))
    save_dir = r'result'
    save_dir = os.path.join(save_dir, board_name)
    for pin in pins:
        u_file.download_file(pin['image_url'],
                             pin['id'],
                             path=save_dir,
                             **_REQUESTS_KWARGS)
    log.info('end download board: {} pins image, size: {}'.format(
        board_name, len(pins)))
Example #4
0
def download_top():
    posts = query_top_score_posts(10000)
    directory = r'result'
    for post in posts:
        post = query_post(post.get('id'))
        if post.mark == 'downloaded':
            u_log.info('the post has been downloaded. id: {}'.format(post.id))
            continue
        u_log.info('begin download post. id: {}, score: {}, size: {}'.format(post.id, post.score, post.file_size))
        file_name = u_file.get_file_name_from_url(post.file_url)
        u_file.download_file(post.file_url, file_name, directory)
        mark_post(post, 'downloaded')
Example #5
0
def download_tag():
    tag = 'f******o'
    posts = query_posts_by_tag(tag)
    directory = r'result' + '\\' + tag
    for post in posts:
        post = query_post(post.get('id'))
        if post.mark == 'downloaded':
            u_log.info('the post has been downloaded. id: {}'.format(post.id))
            continue
        if post.score < 30:
            u_log.info('the post score is low. id: {}, score: {}'.format(post.id, post.score))
            continue
        u_log.info('begin download post. id: {}, score: {}, size: {}'.format(post.id, post.score, post.file_size))
        file_name = u_file.get_file_name_from_url(post.file_url)
        u_file.download_file(post.file_url, file_name, directory)
        mark_post(post, 'downloaded')
Example #6
0
def download_image_collect(image_collect: dict, save_dir=r'result'):
    html_content = u_file.get_content(image_collect['url'], encoding='gb2312')
    soup = BeautifulSoup(html_content, 'lxml')

    image_collection_img_elements = soup.select('ul#showImg > li img')
    image_count = len(image_collection_img_elements)
    log.info('The image collect image size: {}'.format(image_count))

    # image_download_button_element = soup.select('span#kk > a')
    # full_image_url = image_download_button_element['href']
    # full_image_url = full_image_url.replace('http://cj.jj20.com/2020/down.html?picurl=', 'http://pic.jj20.com')
    for image_collection_img_element in image_collection_img_elements:
        current_image_url = image_collection_img_element['src']
        current_image_url = current_image_url.replace('-lp', '')
        current_image_url = 'http:' + current_image_url
        filename = image_collect[
            'title'] + '-' + u_file.get_file_name_from_url(current_image_url)
        u_file.download_file(current_image_url, filename, save_dir)
Example #7
0
def through_pose(url):
    log.info('begin through url'.format(url))
    url_template = url.replace('pose_0001', 'pose_%04d')
    path = os.path.abspath(r'./result')
    if not os.path.isdir(path):
        os.mkdir(path)
    for index in range(1, 5):
        pose_url = url_template % index
        name = re.sub(
            r"[\\/?*<>|\":]+", '-',
            pose_url.replace(r'http://www.posemaniacs.com/pose/', ''))

        # 名称分组文件夹
        for (path_key, path_value) in PATH_MAP.items():
            if name.find(path_key) >= 0:
                path = path.replace('result', path_value)
                break
        log.info('begin download image from url: {}'.format(pose_url))
        download_status = u_file.download_file(pose_url, name=name, path=path)
        if not download_status:
            log.info('download end. index: {}'.format(index))
            break
Example #8
0
def test_download_file():
    url = 'http://aod.cos.tx.xmcdn.com/group20/M01/7E/F8/wKgJJ1eoW8uBquKEACmsecPrn1o863.m4a'
    file_name = '19663334'
    file_path = r'cache'
    u_file.download_file(url, file_name, file_path)
    u_unittest.assert_true(os.path.isfile(r'cache\19663334.m4a'))