def download(loop, no_parse_links=False, urls=None): """ 下载更新数据 Args: urls:tuple - tuple of urls """ print('start download') if urls: sys.argv.extend(urls) else: logger.warning('no links to download') return count = APP_CONFIG['download.count'] extra_options = APP_CONFIG.get('options', {}) options = {'no_parse_links': no_parse_links, 'roots': urls, 'count': count} extra_options.update(options) aspider.download(loop, extra_options) try: import busface.model.classifier as clf print('start recommend') clf.recommend() except FileNotFoundError: print('还没有训练好的模型, 无法推荐')
def test_download_fanhao(): print('start download') roots = [ 'https://www.busdmm.work/YSN-484', ] extra_args = {'roots': roots, 'no_parse_links': True} stats = aspider.download(extra_args=extra_args) stats.report()
def test_download(): print('start download') roots = [ 'https://www.cdnbus.bid', ] extra_args = {'roots': roots, 'no_parse_links': False, 'count': 100} stats = aspider.download(extra_args=extra_args) stats.report()
def main(): options = {'roots': [root_url]} stats = aspider.download(extra_args=options) stats.report() fname = 'top250.txt' sorted_movies_250 = sorted(movies_250, key=lambda m: m.rank) with open(fname, 'w') as f: for movie in sorted_movies_250: print(f'#{movie.rank:<10} {movie.score:<10.2f} - {movie.title}') print(f'#{movie.rank:<10} {movie.score:<10.2f} - {movie.title}', file=f)
def test_routing1(): ''' pass roots to aspider ''' @router.route('/\w+\.jpg', no_parse_links=True) def parse_item(text, path): logger.debug('parse_item') print(path) roots = ['https://pics.javcdn.pw/cover/7btq_b.jpg'] extra_args = {'roots': roots, 'no_parse_links': True} stats_report = aspider.download(extra_args=extra_args) stats_report.report()
def test_upload(): print('start read from file') with open('./YuaMikami.txt', 'r') as file: fanhao_list = file.read() tag_like = RATE_VALUE.LIKE missed_fanhao, local_file_count, tag_file_count = add_local_fanhao( fanhao_list, tag_like) if len(missed_fanhao) > 0: print('start download') urls = [ bus_spider.get_url_by_fanhao(fanhao) for fanhao in missed_fanhao ] extra_args = {'roots': urls, 'no_parse_links': True} stats = aspider.download(extra_args=extra_args) stats.report()
def upload(name): print('start read from file') file = '{}/{}.txt'.format(model_path, name) with open(file, 'r') as file: fanhao_list = file.read() tag_like = RATE_VALUE.LIKE missed_fanhao, local_file_count, tag_file_count = add_local_fanhao( fanhao_list, tag_like) if len(missed_fanhao) > 0: print('start download') urls = [ bus_spider.get_url_by_fanhao(fanhao) for fanhao in missed_fanhao ] extra_args = {'roots': urls, 'no_parse_links': True} stats = aspider.download(extra_args=extra_args) stats.report()
def test_routing1(): ''' pass roots to aspider ''' @router.route('/page/\d+') def parse_page(text, path): print(path) @router.route('/[A-Z]+-[0-9]+', no_parse_links=True) def parse_item(text, path): print(path) roots = ['https://www.cdnbus.bid/page/2'] extra_args = { 'roots': roots, 'count': 40 } stats_report = aspider.download(extra_args=extra_args) stats_report.report()
def test_download_fanhao(): print('start download') roots = root_url extra_args = {'roots': roots, 'no_parse_links': False, 'count': 10} stats = aspider.download(extra_args=extra_args) stats.report()