def install(): if request.method == 'GET': return render_template('install.html', db_file=CONFIG.get("base", "db_file"), frame_data={'title': '安装程序'}) # 创建db db_file = CONFIG.get("base", "db_file") LOGGER.info('create db,', db_file) db = sqlite3.connect(db_file, check_same_thread=False) # 创建表 sql_list = [ CREATE_AV_LIST_SQL, CREATE_AV_STARS_SQL, CREATE_AV_GENRE_SQL, CREATE_AV_EXTEND_SQL ] for sql in sql_list: LOGGER.info('create table,sql:', sql) db.cursor().execute(sql) db.close() db_init() # 抓取av_genre insert(AV_GENRE, Spider.crawl_genre()) if 'init.crawl' in request.form: # 七ツ森りり crawl_accurate('star', '17f01576bb6b6755') for item in AV_GENRE_DEMO_DATA: insert(AV_EXTEND, [{ 'extend_name': item[0], 'key': item[1], 'val': item[2] }]) # 跳转到爬虫页 return redirect(url_for('page_spider'))
def genre(): # 获取类目 av_genre_res = query_sql("SELECT linkid,name,title FROM av_genre") # 如果genre为空则抓取 if not av_genre_res: LOGGER.info('spider.genre.fetch') insert(AV_GENRE, Spider.crawl_genre()) return "请刷新" # 统计标签个数 genre_list = [] for row in query_sql("SELECT genre AS genre FROM av_list"): genre_list.extend(list(set(row['genre'].strip("|").split("|")))) genre_counter = collections.Counter(genre_list) data = {} for item in av_genre_res: if item['title'] not in data: data[item['title']] = [] # 组装标签数据 if item['name'] in genre_counter: item["genre_count"] = genre_counter[item['name']] data[item["title"]].append(item) data = list(data.values()) return render_template('genre.html', data={AV_GENRE: data}, frame_data={ 'title': PAGE_TYPE_MAP['genre']['name'], 'origin_link': get_url("genre"), 'page': { 'count': len(av_genre_res) } })
def crawl_accurate(page_type: str, keyword: str = '', page_start: int = 1, page_limit: int = PAGE_MAX, skip_exist: bool = True): if page_type not in [ 'movie', 'star', 'genre', 'series', 'studio', 'label', 'director', 'search', 'popular', 'group', 'all_star', 'all_genre' ]: return 'wrong' if page_type == 'all_genre': LOGGER.info('spider.genre.fetch') insert(AV_GENRE, Spider.crawl_genre()) return '抓取完毕' if page_type == 'group': page_type = 'search' keyword = keyword + '-' if page_type == 'all_star': star_list = query_sql("SELECT linkid,name FROM av_stars") for item in star_list: # 遍历所有演员 add_work({ "page_type": "star", "keyword": item['linkid'], "skip_exist": True, }) return '排队中({})...'.format(len(star_list)) if page_type in [ 'movie', 'star', 'genre', 'series', 'studio', 'label', 'director' ]: if not is_linkid(keyword): return 'keyword错误' add_work({ "page_type": page_type, "keyword": keyword, "page_start": page_start, "page_limit": page_limit, "skip_exist": skip_exist, }) return '排队中...'