Esempio n. 1
0
def download(loop, no_parse_links=False, urls=None):
    """
    下载更新数据

    Args:
        urls:tuple - tuple of urls
    """
    print('start download')
    if urls:
        sys.argv.extend(urls)
    else:
        logger.warning('no links to download')
        return
    count = APP_CONFIG['download.count']
    extra_options = APP_CONFIG.get('options', {})
    options = {'no_parse_links': no_parse_links, 'roots': urls, 'count': count}
    extra_options.update(options)

    aspider.download(loop, extra_options)
    try:
        import busface.model.classifier as clf

        print('start recommend')
        clf.recommend()
    except FileNotFoundError:
        print('还没有训练好的模型, 无法推荐')
Esempio n. 2
0
def test_download_fanhao():
    print('start download')
    roots = [
        'https://www.busdmm.work/YSN-484',
    ]
    extra_args = {'roots': roots, 'no_parse_links': True}
    stats = aspider.download(extra_args=extra_args)
    stats.report()
Esempio n. 3
0
def test_download():
    print('start download')
    roots = [
        'https://www.cdnbus.bid',
    ]
    extra_args = {'roots': roots, 'no_parse_links': False, 'count': 100}
    stats = aspider.download(extra_args=extra_args)
    stats.report()
Esempio n. 4
0
def main():
    options = {'roots': [root_url]}
    stats = aspider.download(extra_args=options)
    stats.report()
    fname = 'top250.txt'
    sorted_movies_250 = sorted(movies_250, key=lambda m: m.rank)
    with open(fname, 'w') as f:
        for movie in sorted_movies_250:
            print(f'#{movie.rank:<10} {movie.score:<10.2f} - {movie.title}')
            print(f'#{movie.rank:<10} {movie.score:<10.2f} - {movie.title}',
                  file=f)
Esempio n. 5
0
def test_routing1():
    '''
    pass roots to aspider
    '''
    @router.route('/\w+\.jpg', no_parse_links=True)
    def parse_item(text, path):
        logger.debug('parse_item')
        print(path)

    roots = ['https://pics.javcdn.pw/cover/7btq_b.jpg']
    extra_args = {'roots': roots, 'no_parse_links': True}
    stats_report = aspider.download(extra_args=extra_args)
    stats_report.report()
Esempio n. 6
0
def test_upload():
    print('start read from file')
    with open('./YuaMikami.txt', 'r') as file:
        fanhao_list = file.read()

    tag_like = RATE_VALUE.LIKE
    missed_fanhao, local_file_count, tag_file_count = add_local_fanhao(
        fanhao_list, tag_like)
    if len(missed_fanhao) > 0:
        print('start download')
        urls = [
            bus_spider.get_url_by_fanhao(fanhao) for fanhao in missed_fanhao
        ]

        extra_args = {'roots': urls, 'no_parse_links': True}
        stats = aspider.download(extra_args=extra_args)
        stats.report()
Esempio n. 7
0
def upload(name):
    print('start read from file')
    file = '{}/{}.txt'.format(model_path, name)
    with open(file, 'r') as file:
        fanhao_list = file.read()

    tag_like = RATE_VALUE.LIKE
    missed_fanhao, local_file_count, tag_file_count = add_local_fanhao(
        fanhao_list, tag_like)
    if len(missed_fanhao) > 0:
        print('start download')
        urls = [
            bus_spider.get_url_by_fanhao(fanhao) for fanhao in missed_fanhao
        ]

        extra_args = {'roots': urls, 'no_parse_links': True}
        stats = aspider.download(extra_args=extra_args)
        stats.report()
Esempio n. 8
0
def test_routing1():
    '''
    pass roots to aspider
    '''
    @router.route('/page/\d+')
    def parse_page(text, path):
        print(path)

    @router.route('/[A-Z]+-[0-9]+', no_parse_links=True)
    def parse_item(text, path):
        print(path)

    roots = ['https://www.cdnbus.bid/page/2']
    extra_args = {
        'roots': roots,
        'count': 40
    }
    stats_report = aspider.download(extra_args=extra_args)
    stats_report.report()
Esempio n. 9
0
def test_download_fanhao():
    print('start download')
    roots = root_url
    extra_args = {'roots': roots, 'no_parse_links': False, 'count': 10}
    stats = aspider.download(extra_args=extra_args)
    stats.report()