コード例 #1
0
ファイル: xxskins.py プロジェクト: blackcoffees/spider
def collenction_all_reset(app_id):
    """收集所有数据时,对is_merge进行初始化"""
    sql = """select id from product where app_id=%s and is_merge=1 """ % app_id
    result = pool.find(sql)
    ids = [int(item.get("id")) for item in result]
    ids = tuple(ids)
    sql = """update sale_history set qty=0, updated="%s" WHERE product_id in %s""" % (datetime.datetime.now(), ids)
    pool.commit(sql)
コード例 #2
0
def save_web_product(web_site, product_id, web_site_product_id, app_id):
    sql = """select id from web_site_product where web_site=%s and web_site_product_id=%s and app_id=%s 
                                and product_id=%s"""
    result = pool.find(sql,
                       (web_site, web_site_product_id, app_id, product_id))
    if not result:
        sql = """insert into web_site_product(web_site, product_id, web_site_product_id, created, app_id) 
                                                    values("%s","%s","%s",%s, %s)"""
        pool.commit(sql, (web_site, product_id, web_site_product_id,
                          datetime.datetime.now(), app_id))
コード例 #3
0
def set_last_execute_time(func_name):
    now = datetime.datetime.now()
    last_exeucte_time = datetime.datetime(year=now.year,
                                          month=now.month,
                                          day=now.day,
                                          hour=0,
                                          minute=0,
                                          second=0)
    sql = """update spider_conf set last_execute=%s, updated=%s where function_name=%s"""
    param = [last_exeucte_time, datetime.datetime.now(), func_name]
    pool.commit(sql, param)
コード例 #4
0
ファイル: xxskins.py プロジェクト: blackcoffees/spider
def collection_xxskins_product(app_id):
    func_name = collection_xxskins_product.__name__
    sql = """select page, start_page, total_page from spider_conf where function_name="%s" """ % func_name
    result = pool.find_one(sql)
    page = result.get("page")
    start_page = result.get("start_page")
    total_page = result.get("total_page")
    try:
        while True:
            url = "https://apis.xxskins.com/goods/%s/0?_=1522230191000&page=%s" % (app_id, page)
            response = send_request(url)
            resp_data = json.loads(response.read())
            print "xxskins:product:page:%s" % page
            if resp_data and int(resp_data.get("code")) == 99999:
                # 总页数
                if page == start_page:
                    total_page = resp_data.get("data").get("totalPage")
                    sql = """update spider_conf set total_page=%s where function_name="%s" """ % (total_page, func_name)
                    pool.commit(sql)
                product_list = resp_data.get("data").get("list")
                for product in product_list:
                    try:
                        icon_url = "https://steamcommunity-a.akamaihd.net/economy/image/class/%s/%s" % \
                                   (product.get("app_id"), product.get("class_id"))
                        market_name = product.get("market_name")
                        color = product.get("category_rarity_color")
                        market_hash_name = product.get("market_hash_name")
                        steam_product = save_steam_product(market_name, icon_url, app_id, color,
                                                           market_hash_name=market_hash_name)
                        if steam_product == 200:
                            steam_product = get_steam_product(app_id, market_hash_name=market_hash_name)
                        if not steam_product == -100:
                            save_web_product(WEB_SITE.get("xxskins"), steam_product.get("id"), product.get("goods_item_id"),
                                             app_id)
                    except BaseException as e:
                        print "xxskins:product:error:%s" % e
                        continue
                page = page + 1
                if page > total_page:
                    break
        thread_error_stop(page, func_name)
    except BaseException as e:
        print "xxskins:product:error:%s" % e
        thread_error_stop(page, func_name)
コード例 #5
0
def save_steam_product(market_name,
                       icon_url,
                       app_id,
                       color,
                       market_hash_name=None):
    """
    保存Steam饰品
    :param market_name:
    :param icon_url:
    :param app_id:
    :param color:
    :param market_hash_name:
    :return:
    """
    result = base_result()
    if market_hash_name:
        steam_product = get_steam_product(app_id,
                                          market_hash_name=market_hash_name)
    else:
        steam_product = get_steam_product(app_id, market_name=market_name)
    if steam_product:
        result["code"] = ResultCode.Success
        result["data"] = steam_product
        return result
    if not market_hash_name:
        market_hash_name, color = get_steam_market_hash_name(
            app_id, market_name)
    is_merge = 0
    if u"箱" in market_name and app_id == 730:
        is_merge = 1
    if not market_hash_name:
        result["code"] = ResultCode.NoneProduct
        return result
    sql = """insert into product(market_name, market_hash_name, icon_url, created, app_id, color, is_merge) 
              values("%s","%s","%s", %s, %s, "%s", "%s")"""
    param = (market_name, market_hash_name, icon_url, datetime.datetime.now(),
             app_id, color, is_merge)
    pool.commit(sql, param)
    steam_product = get_steam_product(app_id,
                                      market_hash_name=market_hash_name)
    result["data"] = steam_product
    result["code"] = ResultCode.Success
    return result
コード例 #6
0
ファイル: xxskins.py プロジェクト: blackcoffees/spider
def collection_xx_sale_history(app_id):
    func_name = collection_xx_sale_history.__name__
    try:
        sql = """select count(id) from web_site_product where web_site=%s""" % WEB_SITE.get("xxskins")
        total_page = pool.find_one(sql).get("count(id)") / 100 + 1
        sql = """update spider_conf set total_page=%s, updated="%s" where function_name="%s" """ % \
              (total_page, datetime.datetime.now(), func_name)
        pool.commit(sql)
        web_id = WEB_SITE.get("xxskins")
        sql = """select page, last_execute, is_first, start_page from spider_conf where function_name="%s" limit 1""" % func_name
        result = pool.find_one(sql)
        db_page = result.get("page")
        last_execute = result.get("last_execute")
        is_first = result.get("is_first")
        start_page = result.get("start_page")
        if is_first == 0:
            collenction_all_reset(app_id)
        if db_page == start_page and is_first == 1:
            set_last_execute_time(func_name)
        db_rows = 100
        while True:
            db_start = db_page * db_rows
            sql = """select product_id, web_site_product_id, market_name,is_merge from web_site_product, product 
                      where web_site_product.app_id=%s and web_site=%s and web_site_product.product_id=product.id 
                      and is_merge = 1
                      limit %s, %s """ % (app_id, web_id, db_start, db_rows)
            web_p_list = pool.find(sql)
            print "xxskins:sale_history:db_page:%s" % db_page
            for site_product in web_p_list:
                web_page = 1
                is_before = False
                print "xxskins:sale_history:product:%s" % site_product.get("product_id")
                while True:
                    url = "https://apis.xxskins.com/goods/saleRecord?_=1522660905000&goodsItemId=%s&page=%s&limit=100" % \
                          (site_product.get("web_site_product_id"), web_page)
                    response = send_request(url)
                    resp_data = json.loads(response.read())
                    print url
                    print "%s" % web_page
                    if web_page == 279:
                        print "asdfsadfsadfsadd"
                        print "asdfsadfsadfsadd"
                        print "asdfsadfsadfsadd"
                    if resp_data and int(resp_data.get("code")) == 99999:
                        history_list = resp_data.get("data").get("list")
                        if history_list:
                            for history in history_list:
                                try:
                                    if last_execute and is_before_time(history.get("sell_time"), last_execute) and is_first == 1:
                                        is_before = True
                                        break
                                    feature_id = get_feature_id("xxshinks", app_id, site_product.get("product_id"),
                                                                history.get("sell_time"))
                                    sql = """select id, qty from sale_history where feature_id="%s" """ % feature_id
                                    result = pool.find_one(sql)
                                    if not result:
                                        sticker_json = history.get("sticker_json")
                                        if not sticker_json:
                                            sticker_json = ""
                                        else:
                                            sticker_json = json.dumps(sticker_json)
                                        wear = history.get("worn")
                                        if not wear:
                                            wear = ""
                                        sql = """insert into sale_history(web_site, qty, price, pay_time, market_name,
                                                  product_id, web_site_product_id, created, app_id, description, wear,
                                                  feature_id) VALUES (%s, %s, %s, "%s", "%s", %s, %s, "%s", %s, '%s',
                                                   "%s", "%s")""" % \
                                              (web_id, history.get("count"), history.get("sell_price"),
                                               history.get("sell_time"), site_product.get("market_name"),
                                               site_product.get("product_id"), site_product.get("web_site_product_id"),
                                               datetime.datetime.now(), app_id, sticker_json, wear,
                                               feature_id)
                                        pool.commit(sql)
                                    elif result and site_product.get("is_merge") == 1:
                                        total_qty = result.get("qty") + history.get("count")
                                        sql = """update sale_history set qty=%s, updated="%s" where id=%s""" % \
                                              (total_qty, datetime.datetime.now(), result.get("id"))
                                        pool.commit(sql)
                                except BaseException as e:
                                    print "xxskins:sale_history:error:%s" % e
                                    continue
                        else:
                            break
                    else:
                        break
                    if is_before:
                        break
                    web_page = web_page + 1
            db_page = db_page + 1
            if db_page >= total_page:
                break
        thread_error_stop(db_page, func_name)
    except BaseException as e:
        print "xxskins:sale_history:error:%s" % e
        thread_error_stop(db_page, func_name)
コード例 #7
0
def update_total_count(function_name, total_count, total_page):
    sql = """update spider_conf set total_count=%s, total_page=%s, updated=%s where  function_name="%s" """
    param = (total_count, total_page, datetime.datetime.now(), function_name)
    return pool.commit(sql, param)
コード例 #8
0
def update_last_execute(function_name):
    sql = """update spider_conf set last_execute=%s where function_name=%s """
    param = [datetime.datetime.now(), function_name]
    return pool.commit(sql, param)
コード例 #9
0
def save_trade_stop_log(function_name, page):
    sql = """insert into thread_stop_log(created, function_name, page) values (%s, %s, %s)"""
    param = [datetime.datetime.now(), function_name, page]
    pool.commit(sql, param)
コード例 #10
0
ファイル: stmbuy.py プロジェクト: blackcoffees/spider
def collection_stmbuy_sale_history(app_id):
    func_name = collection_stmbuy_sale_history.__name__
    sql = """select count(id) from web_site_product where web_site=%s""" % WEB_SITE.get(
        "stmbuy")
    result = pool.find_one(sql)
    sql = """update spider_conf set total_page=%s, updated="%s" where function_name="%s" """ \
          %(result.get("count(id)")/100+1, datetime.datetime.now(), func_name)
    pool.commit(sql)
    sql = """select page, total_page, last_execute, is_first, start_page from spider_conf where function_name="%s" """ % func_name
    result = pool.find_one(sql)
    db_page = result.get("page")
    total_page = result.get("total_page")
    is_first = result.get("is_first")
    last_execute = result.get("last_execute")
    start_page = result.get("start_page")
    if start_page == db_page and is_first == 1:
        set_last_execute_time(func_name)
    try:
        while True:
            if db_page > total_page:
                break
            sql = """select web_site_product_id, product_id, id from web_site_product where web_site=%s limit %s, %s""" \
                  % (WEB_SITE.get("stmbuy"), db_page, 100)
            product_list = pool.find(sql)
            print "stmbuy:sale_history:db_page:%s" % db_page
            for product in product_list:
                print "stmbuy:sale_history:product:%s" % product.get("id")
                web_page = 1
                is_before = False
                while True:
                    url = "http://www.stmbuy.com/item/history.html?class_id=%s&game_name=csgo&sort[_id]=-1&page=%s" \
                          % (product.get("web_site_product_id"), web_page)
                    response = send_request(url)
                    if response.code == 200:
                        soup = BeautifulSoup(response.read(), "html.parser")
                        none = soup.find("div", attrs={"class": "def-none"})
                        if none:
                            break
                        ul = soup.find("ul", attrs={"class": "goods-list"})
                        li = ul.find_all("li")
                        for li_item in li:
                            try:
                                qty = li_item.find("div",
                                                   attrs={
                                                       "class": "amount"
                                                   }).find("span").string
                                price_div = li_item.find(
                                    "div", attrs={"class": "price"})
                                price = price_div.contents[
                                    1] + price_div.contents[2].string
                                pay_time = li_item.find_all(
                                    "div",
                                    attrs={"class":
                                           "time fr"})[0].contents[2].strip()
                                if last_execute and is_before_time(
                                        pay_time,
                                        last_execute) and is_first == 1:
                                    is_before = False
                                    break
                                wear_p = li_item.find(
                                    "div", attrs={"goods-sellinfo"}).find(
                                        "p", attrs={"class": "mosundu-num"})
                                if wear_p:
                                    wear = wear_p.find("span").string
                                    market_name = li_item.find(
                                        "div",
                                        attrs={"goods-sellinfo"
                                               }).find_all("p")[1].string
                                else:
                                    wear = ""
                                    market_name = li_item.find(
                                        "div",
                                        attrs={"goods-sellinfo"
                                               }).find("p").string.strip()
                                feature_id = get_feature_id(
                                    "stmbuy", app_id,
                                    product.get("product_id"), pay_time)
                                if not get_sale_history(feature_id):
                                    sql = """insert into sale_history(web_site, qty, price, pay_time, market_name, product_id,
                                                                        web_site_product_id, created, app_id, description, wear, feature_id) VALUES
                                                                        (%s, %s, %s, "%s", "%s", %s, %s, "%s", %s, "%s", "%s", "%s")""" % \
                                          (WEB_SITE.get("stmbuy"), qty, price, pay_time, market_name,
                                           product.get("product_id"), product.get("id"),
                                           datetime.datetime.now(), app_id, "", wear, feature_id)
                                    pool.commit(sql)
                            except BaseException as e:
                                print "stmbuy:sale_history:error:%s" % e
                                continue
                    web_page += 1
                    if is_before:
                        break
        thread_error_stop(db_page, func_name)
    except BaseException as e:
        print "stmbuy:sale_history:error:%s" % e
        thread_error_stop(db_page, func_name)
コード例 #11
0
def thread_error_stop(page, func_name):
    page_sql = """update spider_conf set page=%s, updated='%s' where function_name='%s'""" \
               % (page, datetime.datetime.now(), func_name)
    pool.commit(page_sql)
    thread_list.sleep(func_name)
コード例 #12
0
def collection_steam_sale_history(app_id):
    try:
        func_name = collection_steam_sale_history.__name__
        rows = 10
        sql = """select count(id) from web_site_product where web_site=%s""" % WEB_SITE.get(
            "steam")
        total_count = pool.find_one(sql).get("count(id)") / rows + 1
        sql = """update spider_conf set total_page=%s, updated="%s" where function_name="%s" """ %\
              (total_count, datetime.datetime.now(), func_name)
        pool.commit(sql)
        sql = """select page, is_first, last_execute, start_page from spider_conf where function_name="%s" """ % func_name
        spider_result = pool.find_one(sql)
        db_page = spider_result.get("page")
        is_first = spider_result.get("is_first")
        last_execute = spider_result.get("last_execute")
        start_page = spider_result.get("start_page")
        hearders = {"Accept-Language": "zh-CN,zh;q=0.8,en;q=0.6"}
        if start_page == db_page and is_first == 1:
            set_last_execute_time(func_name)
        while True:
            start = db_page * rows
            sql = """select market_hash_name, product.market_name, product.id from product, web_site_product where
                        product.id=web_site_product.product_id and web_site=%s limit %s, %s""" % \
                  (WEB_SITE.get("steam"), start, rows)
            product_list = pool.find(sql)
            print "steam:sale_history:db_page:%s" % db_page
            for product in product_list:
                print "steam:sale_history:product:%s" % product.get(
                    "product.id")
                market_hash_name = str(
                    product.get("market_hash_name").encode("utf-8"))
                time.sleep(1)
                url = "https://steamcommunity.com/market/listings/%s/%s" % (
                    app_id, urllib.quote(market_hash_name))
                response = send_request(url, hearders)
                if response.code == 200:
                    soup = BeautifulSoup(response.read(), "html.parser")
                    pattern = re.compile(r"line1")
                    script = soup.find("script", text=pattern)
                    if not script:
                        continue
                    history_list = json.loads(
                        script.text.split("line1=")[1].split("];")[0] + "]")
                    for history in history_list:
                        sell_time = history[0].split(" ")
                        pay_time = datetime.datetime(
                            year=int(sell_time[2]),
                            month=Time_dict.get(sell_time[0]),
                            day=int(sell_time[1]),
                            hour=int(sell_time[3].split(":")[0]))
                        if last_execute and is_first == 1 and is_before_time(
                                pay_time, last_execute):
                            continue
                        price = history[1]
                        qty = history[2]
                        feature_id = get_feature_id(
                            "steam", app_id, product.get("product.id"),
                            pay_time.strftime("%Y-%m-%d %H:%M:%S"))
                        if not get_sale_history(feature_id):
                            try:
                                sql = """insert into sale_history(web_site, qty, price, pay_time, market_name, product_id,
                                                                  web_site_product_id, created, app_id, description, wear, feature_id) VALUES
                                                                  (%s, %s, %s, "%s", "%s", %s, %s, "%s", %s, "%s", "%s", "%s")""" % \
                                      (WEB_SITE.get("steam"), qty, price, pay_time, product.get("product.market_name"),
                                       product.get("product.id"), product.get("product.id"), datetime.datetime.now(),
                                       app_id, "", "", feature_id)
                                pool.commit(sql)
                            except BaseException as e2:
                                print "steam:sale_history:error:%s" % e2
                                continue
                else:
                    break
            db_page = db_page + 1
            if db_page >= total_count:
                break
        thread_error_stop(db_page, func_name)
    except BaseException as e:
        print "steam:sale_history:error:%s" % e
        thread_error_stop(db_page, func_name)
コード例 #13
0
ファイル: c5game.py プロジェクト: blackcoffees/spider
def collection_c5_sale_history(app_id):
    func_name = collection_c5_sale_history.__name__
    sql = """select count(id) from product where web_site=%s""" % WEB_SITE.get(
        "c5game")
    result = pool.find_one(sql)
    total_page = result.get("count(id)") / 100 + 1
    sql = """update spider_conf set total_page=%s, updated="%s" where function_name="%s" """ % \
          (total_page, datetime.datetime.now(), func_name)
    pool.commit(sql)
    sql = """select page, total_page, is_first, last_execute from spider_conf where function_name="%s" """ % func_name
    result = pool.find_one(sql)
    db_page = result.get("page")
    total_page = result.get("total_page")
    is_first = result.get("is_first")
    last_execute = result.get("last_execute")
    while True:
        try:
            start = db_page * 100
            sql = """select web_site_product_id, product_id from web_site_product where web_site=%s and app_id=%s limit %s, %s""" \
                  % (WEB_SITE.get("c5game"), app_id, start, 100)
            site_product_list = pool.find(sql)
            for site_product in site_product_list:
                web_site_product_id = site_product.get("web_site_product_id")
                url = "https://www.c5game.com/csgo/item/history/%s.html" % web_site_product_id
                response = send_request(url)
                if not response == 200:
                    break
                soup = BeautifulSoup(response.read(), "html.parser")
                tr_list = soup.find("div", attrs={
                    "id": "history"
                }).find("table").find_all("tbody")[2].find_all("tr")
                for tr_item in tr_list:
                    try:
                        none_td = tr_item.find(
                            "td", attrs={"class": "text-center empty"})
                        if not none_td:
                            break
                        icon_url = tr_item.find("div",
                                                attrs={
                                                    "class":
                                                    "img csgo-img-bg ml-0"
                                                }).find("img").get("src")
                        market_name = tr_item.find("div",
                                                   attrs={
                                                       "class":
                                                       "img csgo-img-bg ml-0"
                                                   }).find("img").get("alt")
                        price = tr_item.find("span",
                                             attrs={
                                                 "class": "ft-gold"
                                             }).string.split("¥")[1]
                        pay_time = "20" + tr_item.find_all("td")[4].string
                        if last_execute and is_first == 1 and is_before_time(
                                pay_time, last_execute):
                            break
                        feature_id = get_feature_id(
                            "c5game", app_id, site_product.get("product_id"),
                            pay_time)
                        if not get_sale_history(feature_id):
                            sql = """insert into sale_history(web_site, qty, price, pay_time, market_name, product_id,
                                                   web_site_product_id, created, app_id, description, wear, feature_id) VALUES
                                                   (%s, %s, %s, "%s", "%s", %s, %s, "%s", %s, "%s", "%s", "%s")""" % \
                                 (WEB_SITE.get("c5game"), 1, price, pay_time, market_name,
                                  site_product.get("product_id"), web_site_product_id,
                                  datetime.datetime.now(), app_id, "", "", feature_id)
                            pool.commit(sql)
                    except BaseException as e:
                        print "c5game:sale_history:error:%s" % e
                        continue
            if db_page >= total_page:
                break
        except BaseException as e:
            print "steam:sale_history:error:%s" % e
            thread_error_stop(db_page, func_name)