def collection_stmbuy_product(): try: while True: url = "http://123.56.70.240:9000/sl/csgo-china/issues/100041/" response = send_request(url) if response.code == 200: soup = BeautifulSoup(response.read(), "html.parser") table = soup.find("table", attrs={"class": "table key-value"}) tr = table.find_all("tr")[0] code = tr.find("span", attrs={"class": "val-number"}) # for li_item in li_list: # try: # web_site_product_id = li_item.find("a").get("href").split("item-")[1] # if li_item.find("p", attrs={"class": "special-tag"}): # market_name = li_item.find("p", attrs={"class": "tit"}).string + " (" + \ # li_item.find("p", attrs={"class": "special-tag"}).contents[1].string + ")" # else: # market_name = li_item.find("p", attrs={"class": "tit"}).string # icon_url = li_item.find("img").get("src") # color = li_item.find("p", attrs={"class": "tit"}).get("style").split("color:")[1] # steam_product = save_steam_product(market_name, icon_url, app_id, color) # if steam_product == 200: # steam_product = get_steam_product(app_id, market_name=market_name) # if not steam_product == -100: # save_web_product(WEB_SITE.get("stmbuy"), steam_product.get("id"), web_site_product_id, # app_id) # except BaseException as e: # if e.code == 429: # print "request too many, sleep 5 min" # time.sleep(300) # else: # print "stmbuy:product:error:%s" % e # continue except BaseException as e: print "stmbuy:product:error:%s" % e
def collection_stmbuy_product(app_id): func_name = collection_stmbuy_product.__name__ sql = """select page, total_page from spider_conf where function_name="%s" """ % func_name result = pool.find_one(sql) page = result.get("page") page_total = result.get("total_page") try: while True: url = "http://www.stmbuy.com/csgo/page/?&page=%s" % page response = send_request(url) print "stmbuy:product:db_page:%s" % page if response.code == 200: soup = BeautifulSoup(response.read(), "html.parser") ul = soup.find("ul", attrs={"class": "goods-list"}) li_list = ul.find_all("li") for li_item in li_list: try: web_site_product_id = li_item.find("a").get( "href").split("item-")[1] if li_item.find("p", attrs={"class": "special-tag"}): market_name = li_item.find("p", attrs={"class": "tit"}).string + " (" + \ li_item.find("p", attrs={"class": "special-tag"}).contents[1].string + ")" else: market_name = li_item.find("p", attrs={ "class": "tit" }).string icon_url = li_item.find("img").get("src") color = li_item.find("p", attrs={ "class": "tit" }).get("style").split("color:")[1] steam_product = save_steam_product( market_name, icon_url, app_id, color) if steam_product == 200: steam_product = get_steam_product( app_id, market_name=market_name) if not steam_product == -100: save_web_product(WEB_SITE.get("stmbuy"), steam_product.get("id"), web_site_product_id, app_id) except BaseException as e: if e.code == 429: print "request too many, sleep 5 min" time.sleep(300) else: print "stmbuy:product:error:%s" % e continue page = page + 1 if page > page_total: break thread_error_stop(page, func_name) except BaseException as e: print "stmbuy:product:error:%s" % e thread_error_stop(page, func_name)
def collection_c5_product(app_id): func_name = collection_c5_product.__name__ try: page_sql = """select page, start_page, total_page from spider_conf where function_name="%s" """ % func_name page_result = pool.find_one(page_sql) page = page_result.get("page") total_page = page_result.get("total_page") while True: url = "https://www.c5game.com/csgo/default/result.html?page=%s&locale=zh" % page response = send_request(url) soup = BeautifulSoup(response.read(), "html.parser") div = soup.find("div", id="yw0") li_list = div.find_all("li", attrs={"class": "selling"}) print "c5game:product:db_page:%s" % page for li in li_list: try: p_name = li.find("p", attrs={"class": "name"}) market_name = p_name.find("span", attrs={ "class": "text-unique" }).string icon_url = li.find("img").get("src").split("@250w.png")[0] web_site_product_id = li.find("a", attrs={"class": "csgo-img-bg text-center img"}).get("href") \ .split(".html?")[1].split("&type=")[0].split("item_id=")[1] steam_product = save_steam_product(market_name, icon_url, app_id, "") if steam_product == 200: steam_product = get_steam_product( app_id, market_name=market_name) if not steam_product == -100: save_web_product(WEB_SITE.get("c5game"), steam_product.get("id"), web_site_product_id, app_id) except BaseException as e: if e.code == 429: print "request too many, sleep 5 min" time.sleep(300) else: print "c5game:product:error:%s" % e continue continue page = page + 1 if page >= total_page: break thread_error_stop(page, func_name) except BaseException as e: print "c5game:product:error:%s" % e thread_error_stop(page, func_name)
def collection_xxskins_product(app_id): func_name = collection_xxskins_product.__name__ sql = """select page, start_page, total_page from spider_conf where function_name="%s" """ % func_name result = pool.find_one(sql) page = result.get("page") start_page = result.get("start_page") total_page = result.get("total_page") try: while True: url = "https://apis.xxskins.com/goods/%s/0?_=1522230191000&page=%s" % (app_id, page) response = send_request(url) resp_data = json.loads(response.read()) print "xxskins:product:page:%s" % page if resp_data and int(resp_data.get("code")) == 99999: # 总页数 if page == start_page: total_page = resp_data.get("data").get("totalPage") sql = """update spider_conf set total_page=%s where function_name="%s" """ % (total_page, func_name) pool.commit(sql) product_list = resp_data.get("data").get("list") for product in product_list: try: icon_url = "https://steamcommunity-a.akamaihd.net/economy/image/class/%s/%s" % \ (product.get("app_id"), product.get("class_id")) market_name = product.get("market_name") color = product.get("category_rarity_color") market_hash_name = product.get("market_hash_name") steam_product = save_steam_product(market_name, icon_url, app_id, color, market_hash_name=market_hash_name) if steam_product == 200: steam_product = get_steam_product(app_id, market_hash_name=market_hash_name) if not steam_product == -100: save_web_product(WEB_SITE.get("xxskins"), steam_product.get("id"), product.get("goods_item_id"), app_id) except BaseException as e: print "xxskins:product:error:%s" % e continue page = page + 1 if page > total_page: break thread_error_stop(page, func_name) except BaseException as e: print "xxskins:product:error:%s" % e thread_error_stop(page, func_name)
def collection_xx_sale_history(app_id): func_name = collection_xx_sale_history.__name__ try: sql = """select count(id) from web_site_product where web_site=%s""" % WEB_SITE.get("xxskins") total_page = pool.find_one(sql).get("count(id)") / 100 + 1 sql = """update spider_conf set total_page=%s, updated="%s" where function_name="%s" """ % \ (total_page, datetime.datetime.now(), func_name) pool.commit(sql) web_id = WEB_SITE.get("xxskins") sql = """select page, last_execute, is_first, start_page from spider_conf where function_name="%s" limit 1""" % func_name result = pool.find_one(sql) db_page = result.get("page") last_execute = result.get("last_execute") is_first = result.get("is_first") start_page = result.get("start_page") if is_first == 0: collenction_all_reset(app_id) if db_page == start_page and is_first == 1: set_last_execute_time(func_name) db_rows = 100 while True: db_start = db_page * db_rows sql = """select product_id, web_site_product_id, market_name,is_merge from web_site_product, product where web_site_product.app_id=%s and web_site=%s and web_site_product.product_id=product.id and is_merge = 1 limit %s, %s """ % (app_id, web_id, db_start, db_rows) web_p_list = pool.find(sql) print "xxskins:sale_history:db_page:%s" % db_page for site_product in web_p_list: web_page = 1 is_before = False print "xxskins:sale_history:product:%s" % site_product.get("product_id") while True: url = "https://apis.xxskins.com/goods/saleRecord?_=1522660905000&goodsItemId=%s&page=%s&limit=100" % \ (site_product.get("web_site_product_id"), web_page) response = send_request(url) resp_data = json.loads(response.read()) print url print "%s" % web_page if web_page == 279: print "asdfsadfsadfsadd" print "asdfsadfsadfsadd" print "asdfsadfsadfsadd" if resp_data and int(resp_data.get("code")) == 99999: history_list = resp_data.get("data").get("list") if history_list: for history in history_list: try: if last_execute and is_before_time(history.get("sell_time"), last_execute) and is_first == 1: is_before = True break feature_id = get_feature_id("xxshinks", app_id, site_product.get("product_id"), history.get("sell_time")) sql = """select id, qty from sale_history where feature_id="%s" """ % feature_id result = pool.find_one(sql) if not result: sticker_json = history.get("sticker_json") if not sticker_json: sticker_json = "" else: sticker_json = json.dumps(sticker_json) wear = history.get("worn") if not wear: wear = "" sql = """insert into sale_history(web_site, qty, price, pay_time, market_name, product_id, web_site_product_id, created, app_id, description, wear, feature_id) VALUES (%s, %s, %s, "%s", "%s", %s, %s, "%s", %s, '%s', "%s", "%s")""" % \ (web_id, history.get("count"), history.get("sell_price"), history.get("sell_time"), site_product.get("market_name"), site_product.get("product_id"), site_product.get("web_site_product_id"), datetime.datetime.now(), app_id, sticker_json, wear, feature_id) pool.commit(sql) elif result and site_product.get("is_merge") == 1: total_qty = result.get("qty") + history.get("count") sql = """update sale_history set qty=%s, updated="%s" where id=%s""" % \ (total_qty, datetime.datetime.now(), result.get("id")) pool.commit(sql) except BaseException as e: print "xxskins:sale_history:error:%s" % e continue else: break else: break if is_before: break web_page = web_page + 1 db_page = db_page + 1 if db_page >= total_page: break thread_error_stop(db_page, func_name) except BaseException as e: print "xxskins:sale_history:error:%s" % e thread_error_stop(db_page, func_name)
def collection_stmbuy_sale_history(app_id): func_name = collection_stmbuy_sale_history.__name__ sql = """select count(id) from web_site_product where web_site=%s""" % WEB_SITE.get( "stmbuy") result = pool.find_one(sql) sql = """update spider_conf set total_page=%s, updated="%s" where function_name="%s" """ \ %(result.get("count(id)")/100+1, datetime.datetime.now(), func_name) pool.commit(sql) sql = """select page, total_page, last_execute, is_first, start_page from spider_conf where function_name="%s" """ % func_name result = pool.find_one(sql) db_page = result.get("page") total_page = result.get("total_page") is_first = result.get("is_first") last_execute = result.get("last_execute") start_page = result.get("start_page") if start_page == db_page and is_first == 1: set_last_execute_time(func_name) try: while True: if db_page > total_page: break sql = """select web_site_product_id, product_id, id from web_site_product where web_site=%s limit %s, %s""" \ % (WEB_SITE.get("stmbuy"), db_page, 100) product_list = pool.find(sql) print "stmbuy:sale_history:db_page:%s" % db_page for product in product_list: print "stmbuy:sale_history:product:%s" % product.get("id") web_page = 1 is_before = False while True: url = "http://www.stmbuy.com/item/history.html?class_id=%s&game_name=csgo&sort[_id]=-1&page=%s" \ % (product.get("web_site_product_id"), web_page) response = send_request(url) if response.code == 200: soup = BeautifulSoup(response.read(), "html.parser") none = soup.find("div", attrs={"class": "def-none"}) if none: break ul = soup.find("ul", attrs={"class": "goods-list"}) li = ul.find_all("li") for li_item in li: try: qty = li_item.find("div", attrs={ "class": "amount" }).find("span").string price_div = li_item.find( "div", attrs={"class": "price"}) price = price_div.contents[ 1] + price_div.contents[2].string pay_time = li_item.find_all( "div", attrs={"class": "time fr"})[0].contents[2].strip() if last_execute and is_before_time( pay_time, last_execute) and is_first == 1: is_before = False break wear_p = li_item.find( "div", attrs={"goods-sellinfo"}).find( "p", attrs={"class": "mosundu-num"}) if wear_p: wear = wear_p.find("span").string market_name = li_item.find( "div", attrs={"goods-sellinfo" }).find_all("p")[1].string else: wear = "" market_name = li_item.find( "div", attrs={"goods-sellinfo" }).find("p").string.strip() feature_id = get_feature_id( "stmbuy", app_id, product.get("product_id"), pay_time) if not get_sale_history(feature_id): sql = """insert into sale_history(web_site, qty, price, pay_time, market_name, product_id, web_site_product_id, created, app_id, description, wear, feature_id) VALUES (%s, %s, %s, "%s", "%s", %s, %s, "%s", %s, "%s", "%s", "%s")""" % \ (WEB_SITE.get("stmbuy"), qty, price, pay_time, market_name, product.get("product_id"), product.get("id"), datetime.datetime.now(), app_id, "", wear, feature_id) pool.commit(sql) except BaseException as e: print "stmbuy:sale_history:error:%s" % e continue web_page += 1 if is_before: break thread_error_stop(db_page, func_name) except BaseException as e: print "stmbuy:sale_history:error:%s" % e thread_error_stop(db_page, func_name)
def collection_opskins_product(app_id): try: func_name = collection_opskins_product.__name__ sql = """select page from spider_conf where function_name="%s" """ % func_name result = pool.find_one(sql) page = result.get("page") headers = { "referer": "https://zh.opskins.com/?loc=shop_browse", "user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)" " Chrome/55.0.2883.87 Safari/537.36", "x-csrf": "2I25Z75fQ3YlYxMvdbndV5z5PYFDrmQEE", "x-op-userid": "0", "content-type": "text/html; charset=UTF-8", "x-requested-with": "XMLHttpRequest", "accept-encoding": "gzip, deflate, sdch, br", "accept-language": "zh-CN,zh;q=0.8,en;q=0.6", "cookie": "__ssid=1e1310b2-13e2-4bf1-a574-56200eff83bc; opskins_login_token=PTy54AM5urW6QvWHWLcJpBuwywiDhIkj;" " Hm_lvt_f4d83c43fa7e41722d75d036cbcfcbbe=1522206377,1522229544,1524127707; eu_cookie_accepted=auto; " "Hm_lvt_f4d83c43fa7e41722d75d036cbcfcbbe=1522206377,1522229544,1524127707; " "opskins_csrf_token=2I25Z75fQ3YlYxMvdbndV5z5PYFDrmQEE;" " referer=aHR0cHM6Ly93d3cuc28uY29tL3M%2FaWU9dXRmLTgmc3JjPTM2MGNocm9tZV90b29sYmFyX3NlYXJjaCZxPW9wc2tpbnM%3D;" " incoming=landing_url%3D%252F%253Faff_id%253D2%2526marketing_source%253Dbaidu%2526trans_id%" "253D102a5930f5661b3983b7688811e0e9%2526utm_campaign%253D%2526utm_content%253D%2526utm_medium%" "253Dcpc%2526utm_source%253Dbaidu%2526utm_term%253D%26referer%3Dhttps%253A%252F%252Fwww.so.com%" "252Fs%253Fie%253Dutf-8%2526src%253D360chrome_toolbar_search%2526q%253Dopskins%26user_agent%" "3D119900%26trans_id%3D102a5930f5661b3983b7688811e0e9%26aff_id%3D2%26marketing_source%3Dbaidu%26kw%" "3D%26campid%3D%26adgrp%3D%26mt%3D%26source%3Dbaidu%26medium%3Dcpc%26campaign%3D%26term%3D%26content%3D; " "loggedout-marketing-link-json=%7B%22aff_id%22%3A%222%22%2C%22marketing_source%22%3A%22baidu%22%2" "C%22trans_id%22%3A%22102a5930f5661b3983b7688811e0e9%22%2C%22utm_campaign%22%3A%22%22%2C%22utm_conte" "nt%22%3A%22%22%2C%22utm_medium%22%3A%22cpc%22%2C%22utm_source%22%3A%22baidu%22%2C%22utm_term%22%3A%" "22%22%2C%22referrer%22%3A%22www.so.com%22%2C%22referrer_path%22%3A%22%5C%2Fs%22%2C%" "%22%3A%22ie%3Dutf-8%26src%3D360chrome_toolbar_search%26q%3Dopskins%22%7D; aft=eyJ0cmFuc0lkIjoiMTA" "yYTU5MzBmNTY2MWIzOTgzYjc2ODg4MTFlMGU5IiwiYWZmaWxpYXRlSWQiOiIyIiwibWFya2V0aW5nU291cmNlIjoiYmFpZHUi" "LCJrZXl3b3JkIjpudWxsLCJjYW1wYWlnbklkIjpudWxsLCJhZEdyb3VwIjpudWxsLCJtYXRjaFR5cGUiOm51bGwsInNvdXJjZ" "SI6ImJhaWR1IiwibWVkaXVtIjoiY3BjIiwiY2FtcGFpZ24iOiIiLCJ0ZXJtIjoiIiwiY29udGVudCI6IiIsInNlc3Npb25UaW1" "lc3RhbXAiOjE1MjUyNTI2NDgsInJlZmVyZXIiOiJodHRwczpcL1wvd3d3LnNvLmNvbVwvcz9pZT11dGYtOCZzcmM9MzYwY2h" "yb21lX3Rvb2xiYXJfc2VhcmNoJnE9b3Bza2lucyIsImxhbmRpbmdVcmwiOiJcLz9hZmZfaWQ9MiZtYXJrZXRpbmdfc291cmNlP" "WJhaWR1JnRyYW5zX2lkPTEwMmE1OTMwZjU2NjFiMzk4M2I3Njg4ODExZTBlOSZ1dG1fY2FtcGFpZ249JnV0bV9jb250ZW50P" "SZ1dG1fbWVkaXVtPWNwYyZ1dG1fc291cmNlPWJhaWR1JnV0bV90ZXJtPSJ9; __cfduid=ddebe77d43e319a65b1601e33" "9bcd54121525252649; _pk_ref.1.0ff0=%5B%22baidu%22%2C%22%22%2C1525252681%2C%22https%3A%2F%2Fwww" ".so.com%2Fs%3Fie%3Dutf-8%26src%3D360chrome_toolbar_search%26q%3Dopskins%22%5D; cf_clearance=bbd" "f22c0367ea926643a5d054adbb08d52dbb284-1525252703-14400; _ga=GA1.2.1144942300.1508463511; _gid=G" "A1.2.1518183863.1525252651; _uetsid=_uet7cb5ab24; _pk_id.1.0ff0=d5d4cd07616a5428.1508463536.1" "1.1525252793.1525252681.; _pk_ses.1.0ff0=*; n_lang=zh-CN; timezone_offset=8%2C0;" " Hm_lvt_af7094281cbe36451577c00f5c0923a8=1525252681; " "Hm_lpvt_af7094281cbe36451577c00f5c0923a8=1525252793; PHPSESSID=an968inhr4b4q8tlmvlkj9bit0" } db_page = page while True: print "opskins:product:page:%s" % page time.sleep(1) url = "https://zh.opskins.com/ajax/browse_scroll.php?page=%s&appId=%s&contextId=2" % ( db_page, app_id) response = send_request(url, headers=headers) a = response.read() b = unicode(a, "gta") soup = BeautifulSoup(a, "html.parser") product_div_list = soup.find_all("div", attrs={"class": "featured-item"}) if not product_div_list: break for product_div in product_div_list: try: market_name = product_div.find( "div", attrs={ "class": "market-name market-link" }).string web_site_product_id = product_div.attr("id").split( "cartItem")[1] icon_url = \ product_div.find("img", attrs={"class": "item-img media-async-complete"}).attr("src").split( "/256fx256f")[0] steam_product = save_steam_product(market_name, icon_url, app_id, "#ffffff") if steam_product == 200: steam_product = get_steam_product( app_id, market_name=market_name) if not steam_product == -100: save_web_product(WEB_SITE.get("opskins"), steam_product.get("id"), web_site_product_id, app_id) except BaseException as e: print "opskins:product:error:%s" % e continue thread_error_stop(db_page, func_name) except BaseException as e: print "opskins:product:error:%s" % e thread_error_stop(db_page, func_name)
def collection_c5_sale_history(app_id): func_name = collection_c5_sale_history.__name__ sql = """select count(id) from product where web_site=%s""" % WEB_SITE.get( "c5game") result = pool.find_one(sql) total_page = result.get("count(id)") / 100 + 1 sql = """update spider_conf set total_page=%s, updated="%s" where function_name="%s" """ % \ (total_page, datetime.datetime.now(), func_name) pool.commit(sql) sql = """select page, total_page, is_first, last_execute from spider_conf where function_name="%s" """ % func_name result = pool.find_one(sql) db_page = result.get("page") total_page = result.get("total_page") is_first = result.get("is_first") last_execute = result.get("last_execute") while True: try: start = db_page * 100 sql = """select web_site_product_id, product_id from web_site_product where web_site=%s and app_id=%s limit %s, %s""" \ % (WEB_SITE.get("c5game"), app_id, start, 100) site_product_list = pool.find(sql) for site_product in site_product_list: web_site_product_id = site_product.get("web_site_product_id") url = "https://www.c5game.com/csgo/item/history/%s.html" % web_site_product_id response = send_request(url) if not response == 200: break soup = BeautifulSoup(response.read(), "html.parser") tr_list = soup.find("div", attrs={ "id": "history" }).find("table").find_all("tbody")[2].find_all("tr") for tr_item in tr_list: try: none_td = tr_item.find( "td", attrs={"class": "text-center empty"}) if not none_td: break icon_url = tr_item.find("div", attrs={ "class": "img csgo-img-bg ml-0" }).find("img").get("src") market_name = tr_item.find("div", attrs={ "class": "img csgo-img-bg ml-0" }).find("img").get("alt") price = tr_item.find("span", attrs={ "class": "ft-gold" }).string.split("¥")[1] pay_time = "20" + tr_item.find_all("td")[4].string if last_execute and is_first == 1 and is_before_time( pay_time, last_execute): break feature_id = get_feature_id( "c5game", app_id, site_product.get("product_id"), pay_time) if not get_sale_history(feature_id): sql = """insert into sale_history(web_site, qty, price, pay_time, market_name, product_id, web_site_product_id, created, app_id, description, wear, feature_id) VALUES (%s, %s, %s, "%s", "%s", %s, %s, "%s", %s, "%s", "%s", "%s")""" % \ (WEB_SITE.get("c5game"), 1, price, pay_time, market_name, site_product.get("product_id"), web_site_product_id, datetime.datetime.now(), app_id, "", "", feature_id) pool.commit(sql) except BaseException as e: print "c5game:sale_history:error:%s" % e continue if db_page >= total_page: break except BaseException as e: print "steam:sale_history:error:%s" % e thread_error_stop(db_page, func_name)