def collenction_all_reset(app_id): """收集所有数据时,对is_merge进行初始化""" sql = """select id from product where app_id=%s and is_merge=1 """ % app_id result = pool.find(sql) ids = [int(item.get("id")) for item in result] ids = tuple(ids) sql = """update sale_history set qty=0, updated="%s" WHERE product_id in %s""" % (datetime.datetime.now(), ids) pool.commit(sql)
def save_web_product(web_site, product_id, web_site_product_id, app_id): sql = """select id from web_site_product where web_site=%s and web_site_product_id=%s and app_id=%s and product_id=%s""" result = pool.find(sql, (web_site, web_site_product_id, app_id, product_id)) if not result: sql = """insert into web_site_product(web_site, product_id, web_site_product_id, created, app_id) values("%s","%s","%s",%s, %s)""" pool.commit(sql, (web_site, product_id, web_site_product_id, datetime.datetime.now(), app_id))
def get_spider_conf(function_name=None): if function_name: sql = """select function_name, page, created, updated, total_page, last_execute, is_first, start_page, status, page_size, total_count from spider_conf where function_name = '%s' limit 1 """ % function_name return pool.find_one(sql) else: sql = """select function_name, page, created, updated, total_page, last_execute, is_first, start_page, status from spider_conf """ return pool.find(sql)
def collection_xx_sale_history(app_id): func_name = collection_xx_sale_history.__name__ try: sql = """select count(id) from web_site_product where web_site=%s""" % WEB_SITE.get("xxskins") total_page = pool.find_one(sql).get("count(id)") / 100 + 1 sql = """update spider_conf set total_page=%s, updated="%s" where function_name="%s" """ % \ (total_page, datetime.datetime.now(), func_name) pool.commit(sql) web_id = WEB_SITE.get("xxskins") sql = """select page, last_execute, is_first, start_page from spider_conf where function_name="%s" limit 1""" % func_name result = pool.find_one(sql) db_page = result.get("page") last_execute = result.get("last_execute") is_first = result.get("is_first") start_page = result.get("start_page") if is_first == 0: collenction_all_reset(app_id) if db_page == start_page and is_first == 1: set_last_execute_time(func_name) db_rows = 100 while True: db_start = db_page * db_rows sql = """select product_id, web_site_product_id, market_name,is_merge from web_site_product, product where web_site_product.app_id=%s and web_site=%s and web_site_product.product_id=product.id and is_merge = 1 limit %s, %s """ % (app_id, web_id, db_start, db_rows) web_p_list = pool.find(sql) print "xxskins:sale_history:db_page:%s" % db_page for site_product in web_p_list: web_page = 1 is_before = False print "xxskins:sale_history:product:%s" % site_product.get("product_id") while True: url = "https://apis.xxskins.com/goods/saleRecord?_=1522660905000&goodsItemId=%s&page=%s&limit=100" % \ (site_product.get("web_site_product_id"), web_page) response = send_request(url) resp_data = json.loads(response.read()) print url print "%s" % web_page if web_page == 279: print "asdfsadfsadfsadd" print "asdfsadfsadfsadd" print "asdfsadfsadfsadd" if resp_data and int(resp_data.get("code")) == 99999: history_list = resp_data.get("data").get("list") if history_list: for history in history_list: try: if last_execute and is_before_time(history.get("sell_time"), last_execute) and is_first == 1: is_before = True break feature_id = get_feature_id("xxshinks", app_id, site_product.get("product_id"), history.get("sell_time")) sql = """select id, qty from sale_history where feature_id="%s" """ % feature_id result = pool.find_one(sql) if not result: sticker_json = history.get("sticker_json") if not sticker_json: sticker_json = "" else: sticker_json = json.dumps(sticker_json) wear = history.get("worn") if not wear: wear = "" sql = """insert into sale_history(web_site, qty, price, pay_time, market_name, product_id, web_site_product_id, created, app_id, description, wear, feature_id) VALUES (%s, %s, %s, "%s", "%s", %s, %s, "%s", %s, '%s', "%s", "%s")""" % \ (web_id, history.get("count"), history.get("sell_price"), history.get("sell_time"), site_product.get("market_name"), site_product.get("product_id"), site_product.get("web_site_product_id"), datetime.datetime.now(), app_id, sticker_json, wear, feature_id) pool.commit(sql) elif result and site_product.get("is_merge") == 1: total_qty = result.get("qty") + history.get("count") sql = """update sale_history set qty=%s, updated="%s" where id=%s""" % \ (total_qty, datetime.datetime.now(), result.get("id")) pool.commit(sql) except BaseException as e: print "xxskins:sale_history:error:%s" % e continue else: break else: break if is_before: break web_page = web_page + 1 db_page = db_page + 1 if db_page >= total_page: break thread_error_stop(db_page, func_name) except BaseException as e: print "xxskins:sale_history:error:%s" % e thread_error_stop(db_page, func_name)
def collection_stmbuy_sale_history(app_id): func_name = collection_stmbuy_sale_history.__name__ sql = """select count(id) from web_site_product where web_site=%s""" % WEB_SITE.get( "stmbuy") result = pool.find_one(sql) sql = """update spider_conf set total_page=%s, updated="%s" where function_name="%s" """ \ %(result.get("count(id)")/100+1, datetime.datetime.now(), func_name) pool.commit(sql) sql = """select page, total_page, last_execute, is_first, start_page from spider_conf where function_name="%s" """ % func_name result = pool.find_one(sql) db_page = result.get("page") total_page = result.get("total_page") is_first = result.get("is_first") last_execute = result.get("last_execute") start_page = result.get("start_page") if start_page == db_page and is_first == 1: set_last_execute_time(func_name) try: while True: if db_page > total_page: break sql = """select web_site_product_id, product_id, id from web_site_product where web_site=%s limit %s, %s""" \ % (WEB_SITE.get("stmbuy"), db_page, 100) product_list = pool.find(sql) print "stmbuy:sale_history:db_page:%s" % db_page for product in product_list: print "stmbuy:sale_history:product:%s" % product.get("id") web_page = 1 is_before = False while True: url = "http://www.stmbuy.com/item/history.html?class_id=%s&game_name=csgo&sort[_id]=-1&page=%s" \ % (product.get("web_site_product_id"), web_page) response = send_request(url) if response.code == 200: soup = BeautifulSoup(response.read(), "html.parser") none = soup.find("div", attrs={"class": "def-none"}) if none: break ul = soup.find("ul", attrs={"class": "goods-list"}) li = ul.find_all("li") for li_item in li: try: qty = li_item.find("div", attrs={ "class": "amount" }).find("span").string price_div = li_item.find( "div", attrs={"class": "price"}) price = price_div.contents[ 1] + price_div.contents[2].string pay_time = li_item.find_all( "div", attrs={"class": "time fr"})[0].contents[2].strip() if last_execute and is_before_time( pay_time, last_execute) and is_first == 1: is_before = False break wear_p = li_item.find( "div", attrs={"goods-sellinfo"}).find( "p", attrs={"class": "mosundu-num"}) if wear_p: wear = wear_p.find("span").string market_name = li_item.find( "div", attrs={"goods-sellinfo" }).find_all("p")[1].string else: wear = "" market_name = li_item.find( "div", attrs={"goods-sellinfo" }).find("p").string.strip() feature_id = get_feature_id( "stmbuy", app_id, product.get("product_id"), pay_time) if not get_sale_history(feature_id): sql = """insert into sale_history(web_site, qty, price, pay_time, market_name, product_id, web_site_product_id, created, app_id, description, wear, feature_id) VALUES (%s, %s, %s, "%s", "%s", %s, %s, "%s", %s, "%s", "%s", "%s")""" % \ (WEB_SITE.get("stmbuy"), qty, price, pay_time, market_name, product.get("product_id"), product.get("id"), datetime.datetime.now(), app_id, "", wear, feature_id) pool.commit(sql) except BaseException as e: print "stmbuy:sale_history:error:%s" % e continue web_page += 1 if is_before: break thread_error_stop(db_page, func_name) except BaseException as e: print "stmbuy:sale_history:error:%s" % e thread_error_stop(db_page, func_name)
# -*- coding: utf-8 -*- from base.ThreadList import ThreadList from base.thread import CollectionThread from tool.PoolDB import pool from tool.CommonUtils import thread_list if __name__ == "__main__": sql = """select function_name, page, total_page from spider_conf where status=1 limit 1""" web_list = pool.find(sql) tread_list = ThreadList() for web in web_list: thread = CollectionThread(web.get("function_name"), web.get("function_name"), 730) thread_list.append(thread) thread.start() while True: if len(thread_list.get_all()) == 0: break for thread in thread_list.get_all(): # 线程已经停止 if thread.get_stop(): thread.event.clear() thread.event.wait(100) thread.event.set() # print "process start:%s" % datetime.datetime.now() # web_name_list = ["xxskins"] # app_id = 730 # type = "sale_history" # for web_name in web_name_list: # if type == "sale_history":
def collection_steam_sale_history(app_id): try: func_name = collection_steam_sale_history.__name__ rows = 10 sql = """select count(id) from web_site_product where web_site=%s""" % WEB_SITE.get( "steam") total_count = pool.find_one(sql).get("count(id)") / rows + 1 sql = """update spider_conf set total_page=%s, updated="%s" where function_name="%s" """ %\ (total_count, datetime.datetime.now(), func_name) pool.commit(sql) sql = """select page, is_first, last_execute, start_page from spider_conf where function_name="%s" """ % func_name spider_result = pool.find_one(sql) db_page = spider_result.get("page") is_first = spider_result.get("is_first") last_execute = spider_result.get("last_execute") start_page = spider_result.get("start_page") hearders = {"Accept-Language": "zh-CN,zh;q=0.8,en;q=0.6"} if start_page == db_page and is_first == 1: set_last_execute_time(func_name) while True: start = db_page * rows sql = """select market_hash_name, product.market_name, product.id from product, web_site_product where product.id=web_site_product.product_id and web_site=%s limit %s, %s""" % \ (WEB_SITE.get("steam"), start, rows) product_list = pool.find(sql) print "steam:sale_history:db_page:%s" % db_page for product in product_list: print "steam:sale_history:product:%s" % product.get( "product.id") market_hash_name = str( product.get("market_hash_name").encode("utf-8")) time.sleep(1) url = "https://steamcommunity.com/market/listings/%s/%s" % ( app_id, urllib.quote(market_hash_name)) response = send_request(url, hearders) if response.code == 200: soup = BeautifulSoup(response.read(), "html.parser") pattern = re.compile(r"line1") script = soup.find("script", text=pattern) if not script: continue history_list = json.loads( script.text.split("line1=")[1].split("];")[0] + "]") for history in history_list: sell_time = history[0].split(" ") pay_time = datetime.datetime( year=int(sell_time[2]), month=Time_dict.get(sell_time[0]), day=int(sell_time[1]), hour=int(sell_time[3].split(":")[0])) if last_execute and is_first == 1 and is_before_time( pay_time, last_execute): continue price = history[1] qty = history[2] feature_id = get_feature_id( "steam", app_id, product.get("product.id"), pay_time.strftime("%Y-%m-%d %H:%M:%S")) if not get_sale_history(feature_id): try: sql = """insert into sale_history(web_site, qty, price, pay_time, market_name, product_id, web_site_product_id, created, app_id, description, wear, feature_id) VALUES (%s, %s, %s, "%s", "%s", %s, %s, "%s", %s, "%s", "%s", "%s")""" % \ (WEB_SITE.get("steam"), qty, price, pay_time, product.get("product.market_name"), product.get("product.id"), product.get("product.id"), datetime.datetime.now(), app_id, "", "", feature_id) pool.commit(sql) except BaseException as e2: print "steam:sale_history:error:%s" % e2 continue else: break db_page = db_page + 1 if db_page >= total_count: break thread_error_stop(db_page, func_name) except BaseException as e: print "steam:sale_history:error:%s" % e thread_error_stop(db_page, func_name)
def collection_c5_sale_history(app_id): func_name = collection_c5_sale_history.__name__ sql = """select count(id) from product where web_site=%s""" % WEB_SITE.get( "c5game") result = pool.find_one(sql) total_page = result.get("count(id)") / 100 + 1 sql = """update spider_conf set total_page=%s, updated="%s" where function_name="%s" """ % \ (total_page, datetime.datetime.now(), func_name) pool.commit(sql) sql = """select page, total_page, is_first, last_execute from spider_conf where function_name="%s" """ % func_name result = pool.find_one(sql) db_page = result.get("page") total_page = result.get("total_page") is_first = result.get("is_first") last_execute = result.get("last_execute") while True: try: start = db_page * 100 sql = """select web_site_product_id, product_id from web_site_product where web_site=%s and app_id=%s limit %s, %s""" \ % (WEB_SITE.get("c5game"), app_id, start, 100) site_product_list = pool.find(sql) for site_product in site_product_list: web_site_product_id = site_product.get("web_site_product_id") url = "https://www.c5game.com/csgo/item/history/%s.html" % web_site_product_id response = send_request(url) if not response == 200: break soup = BeautifulSoup(response.read(), "html.parser") tr_list = soup.find("div", attrs={ "id": "history" }).find("table").find_all("tbody")[2].find_all("tr") for tr_item in tr_list: try: none_td = tr_item.find( "td", attrs={"class": "text-center empty"}) if not none_td: break icon_url = tr_item.find("div", attrs={ "class": "img csgo-img-bg ml-0" }).find("img").get("src") market_name = tr_item.find("div", attrs={ "class": "img csgo-img-bg ml-0" }).find("img").get("alt") price = tr_item.find("span", attrs={ "class": "ft-gold" }).string.split("¥")[1] pay_time = "20" + tr_item.find_all("td")[4].string if last_execute and is_first == 1 and is_before_time( pay_time, last_execute): break feature_id = get_feature_id( "c5game", app_id, site_product.get("product_id"), pay_time) if not get_sale_history(feature_id): sql = """insert into sale_history(web_site, qty, price, pay_time, market_name, product_id, web_site_product_id, created, app_id, description, wear, feature_id) VALUES (%s, %s, %s, "%s", "%s", %s, %s, "%s", %s, "%s", "%s", "%s")""" % \ (WEB_SITE.get("c5game"), 1, price, pay_time, market_name, site_product.get("product_id"), web_site_product_id, datetime.datetime.now(), app_id, "", "", feature_id) pool.commit(sql) except BaseException as e: print "c5game:sale_history:error:%s" % e continue if db_page >= total_page: break except BaseException as e: print "steam:sale_history:error:%s" % e thread_error_stop(db_page, func_name)