Ejemplo n.º 1
0
def crawl_website():
    """
    获取buff下所有大类的名称
    根据黑白名单进行修正
    根据价格进行搜索,将结果放在csgo_items中
    再爬取steam价格,修正到item的类参数中
    返回的是由csgo_items生成的pd下的table表格对象
    """
    csgo_items = []

    raw_categories = csgo_all_categories()
    #获取buff下所有枪械物品大类名称
    categories = final_categories(raw_categories)
    #定义在src/category_util下,用于利用白名单和黑名单进行目录的处理
    # crawl by categories and price section
    if len(raw_categories) != len(categories):
        for category in categories:
            csgo_items.extend(crawl_goods_by_price_section(category))
            #在csgo_items中连接上 所有满足价格区间的 category 类别的item
    else:
        # crawl by price section without category
        csgo_items.extend(crawl_goods_by_price_section(None))
        #由于黑白名单不在所获取的categories中,所以 面向全物品 进行搜索,故传入参数 None

    enrich_item_with_price_history(csgo_items)
    #为csgo_items中的所有物品爬取steam价格,并添加到其类的参数中
    return persist_util.tabulate(csgo_items)
Ejemplo n.º 2
0
def crawl_website():
    csgo_items = []

    raw_categories = csgo_all_categories()

    categories = final_categories(raw_categories)

    # crawl by categories and price section
    if len(raw_categories) != len(categories):
        for category in categories:
            csgo_items.extend(crawl_goods_by_price_section(category))
    else:
        # crawl by price section without category
        csgo_items.extend(crawl_goods_by_price_section(None))

    enrich_item_with_price_history(csgo_items)
    return persist_util.tabulate(csgo_items)
Ejemplo n.º 3
0
def crawl_website():
    csgo_items = []

    raw_categories = csgo_all_categories()

    categories = final_categories(raw_categories)

    # crawl by categories and price section
    if len(raw_categories) != len(categories):
        total_category = len(categories)
        for index, category in enumerate(categories, start=1):
            csgo_items.extend(crawl_goods_by_price_section(category))
            log.info('GET category {}/{} for ({}).'.format(index, total_category, category))
    else:
        # crawl by price section without category
        csgo_items.extend(crawl_goods_by_price_section(None))

    enrich_item_with_price_history(csgo_items, CRAWL_STEAM_ASYNC)
    return persist_util.tabulate(csgo_items)