Example #1
0
def install():
    if request.method == 'GET':
        return render_template('install.html',
                               db_file=CONFIG.get("base", "db_file"),
                               frame_data={'title': '安装程序'})
    # 创建db
    db_file = CONFIG.get("base", "db_file")
    LOGGER.info('create db,', db_file)
    db = sqlite3.connect(db_file, check_same_thread=False)
    # 创建表
    sql_list = [
        CREATE_AV_LIST_SQL, CREATE_AV_STARS_SQL, CREATE_AV_GENRE_SQL,
        CREATE_AV_EXTEND_SQL
    ]
    for sql in sql_list:
        LOGGER.info('create table,sql:', sql)
        db.cursor().execute(sql)
    db.close()

    db_init()
    # 抓取av_genre
    insert(AV_GENRE, Spider.crawl_genre())
    if 'init.crawl' in request.form:
        # 七ツ森りり
        crawl_accurate('star', '17f01576bb6b6755')
        for item in AV_GENRE_DEMO_DATA:
            insert(AV_EXTEND, [{
                'extend_name': item[0],
                'key': item[1],
                'val': item[2]
            }])
    # 跳转到爬虫页
    return redirect(url_for('page_spider'))
Example #2
0
def genre():
    # 获取类目
    av_genre_res = query_sql("SELECT linkid,name,title FROM av_genre")

    # 如果genre为空则抓取
    if not av_genre_res:
        LOGGER.info('spider.genre.fetch')
        insert(AV_GENRE, Spider.crawl_genre())
        return "请刷新"

    # 统计标签个数
    genre_list = []
    for row in query_sql("SELECT genre AS genre FROM av_list"):
        genre_list.extend(list(set(row['genre'].strip("|").split("|"))))
    genre_counter = collections.Counter(genre_list)

    data = {}
    for item in av_genre_res:
        if item['title'] not in data:
            data[item['title']] = []
        # 组装标签数据
        if item['name'] in genre_counter:
            item["genre_count"] = genre_counter[item['name']]

        data[item["title"]].append(item)
    data = list(data.values())
    return render_template('genre.html',
                           data={AV_GENRE: data},
                           frame_data={
                               'title': PAGE_TYPE_MAP['genre']['name'],
                               'origin_link': get_url("genre"),
                               'page': {
                                   'count': len(av_genre_res)
                               }
                           })
Example #3
0
def crawl_accurate(page_type: str,
                   keyword: str = '',
                   page_start: int = 1,
                   page_limit: int = PAGE_MAX,
                   skip_exist: bool = True):
    if page_type not in [
            'movie', 'star', 'genre', 'series', 'studio', 'label', 'director',
            'search', 'popular', 'group', 'all_star', 'all_genre'
    ]:
        return 'wrong'
    if page_type == 'all_genre':
        LOGGER.info('spider.genre.fetch')
        insert(AV_GENRE, Spider.crawl_genre())
        return '抓取完毕'
    if page_type == 'group':
        page_type = 'search'
        keyword = keyword + '-'

    if page_type == 'all_star':
        star_list = query_sql("SELECT linkid,name FROM av_stars")
        for item in star_list:
            # 遍历所有演员
            add_work({
                "page_type": "star",
                "keyword": item['linkid'],
                "skip_exist": True,
            })
        return '排队中({})...'.format(len(star_list))

    if page_type in [
            'movie', 'star', 'genre', 'series', 'studio', 'label', 'director'
    ]:
        if not is_linkid(keyword):
            return 'keyword错误'
    add_work({
        "page_type": page_type,
        "keyword": keyword,
        "page_start": page_start,
        "page_limit": page_limit,
        "skip_exist": skip_exist,
    })
    return '排队中...'