예제 #1
0
def collenction_all_reset(app_id):
    """收集所有数据时,对is_merge进行初始化"""
    sql = """select id from product where app_id=%s and is_merge=1 """ % app_id
    result = pool.find(sql)
    ids = [int(item.get("id")) for item in result]
    ids = tuple(ids)
    sql = """update sale_history set qty=0, updated="%s" WHERE product_id in %s""" % (datetime.datetime.now(), ids)
    pool.commit(sql)
예제 #2
0
def save_web_product(web_site, product_id, web_site_product_id, app_id):
    sql = """select id from web_site_product where web_site=%s and web_site_product_id=%s and app_id=%s 
                                and product_id=%s"""
    result = pool.find(sql,
                       (web_site, web_site_product_id, app_id, product_id))
    if not result:
        sql = """insert into web_site_product(web_site, product_id, web_site_product_id, created, app_id) 
                                                    values("%s","%s","%s",%s, %s)"""
        pool.commit(sql, (web_site, product_id, web_site_product_id,
                          datetime.datetime.now(), app_id))
예제 #3
0
def get_spider_conf(function_name=None):
    if function_name:
        sql = """select function_name, page, created, updated, total_page, last_execute, is_first, start_page, status,
                  page_size, total_count from spider_conf where function_name = '%s' limit 1
              """ % function_name
        return pool.find_one(sql)
    else:
        sql = """select function_name, page, created, updated, total_page, last_execute, is_first, start_page, status
                  from spider_conf """
        return pool.find(sql)
예제 #4
0
def collection_xx_sale_history(app_id):
    func_name = collection_xx_sale_history.__name__
    try:
        sql = """select count(id) from web_site_product where web_site=%s""" % WEB_SITE.get("xxskins")
        total_page = pool.find_one(sql).get("count(id)") / 100 + 1
        sql = """update spider_conf set total_page=%s, updated="%s" where function_name="%s" """ % \
              (total_page, datetime.datetime.now(), func_name)
        pool.commit(sql)
        web_id = WEB_SITE.get("xxskins")
        sql = """select page, last_execute, is_first, start_page from spider_conf where function_name="%s" limit 1""" % func_name
        result = pool.find_one(sql)
        db_page = result.get("page")
        last_execute = result.get("last_execute")
        is_first = result.get("is_first")
        start_page = result.get("start_page")
        if is_first == 0:
            collenction_all_reset(app_id)
        if db_page == start_page and is_first == 1:
            set_last_execute_time(func_name)
        db_rows = 100
        while True:
            db_start = db_page * db_rows
            sql = """select product_id, web_site_product_id, market_name,is_merge from web_site_product, product 
                      where web_site_product.app_id=%s and web_site=%s and web_site_product.product_id=product.id 
                      and is_merge = 1
                      limit %s, %s """ % (app_id, web_id, db_start, db_rows)
            web_p_list = pool.find(sql)
            print "xxskins:sale_history:db_page:%s" % db_page
            for site_product in web_p_list:
                web_page = 1
                is_before = False
                print "xxskins:sale_history:product:%s" % site_product.get("product_id")
                while True:
                    url = "https://apis.xxskins.com/goods/saleRecord?_=1522660905000&goodsItemId=%s&page=%s&limit=100" % \
                          (site_product.get("web_site_product_id"), web_page)
                    response = send_request(url)
                    resp_data = json.loads(response.read())
                    print url
                    print "%s" % web_page
                    if web_page == 279:
                        print "asdfsadfsadfsadd"
                        print "asdfsadfsadfsadd"
                        print "asdfsadfsadfsadd"
                    if resp_data and int(resp_data.get("code")) == 99999:
                        history_list = resp_data.get("data").get("list")
                        if history_list:
                            for history in history_list:
                                try:
                                    if last_execute and is_before_time(history.get("sell_time"), last_execute) and is_first == 1:
                                        is_before = True
                                        break
                                    feature_id = get_feature_id("xxshinks", app_id, site_product.get("product_id"),
                                                                history.get("sell_time"))
                                    sql = """select id, qty from sale_history where feature_id="%s" """ % feature_id
                                    result = pool.find_one(sql)
                                    if not result:
                                        sticker_json = history.get("sticker_json")
                                        if not sticker_json:
                                            sticker_json = ""
                                        else:
                                            sticker_json = json.dumps(sticker_json)
                                        wear = history.get("worn")
                                        if not wear:
                                            wear = ""
                                        sql = """insert into sale_history(web_site, qty, price, pay_time, market_name,
                                                  product_id, web_site_product_id, created, app_id, description, wear,
                                                  feature_id) VALUES (%s, %s, %s, "%s", "%s", %s, %s, "%s", %s, '%s',
                                                   "%s", "%s")""" % \
                                              (web_id, history.get("count"), history.get("sell_price"),
                                               history.get("sell_time"), site_product.get("market_name"),
                                               site_product.get("product_id"), site_product.get("web_site_product_id"),
                                               datetime.datetime.now(), app_id, sticker_json, wear,
                                               feature_id)
                                        pool.commit(sql)
                                    elif result and site_product.get("is_merge") == 1:
                                        total_qty = result.get("qty") + history.get("count")
                                        sql = """update sale_history set qty=%s, updated="%s" where id=%s""" % \
                                              (total_qty, datetime.datetime.now(), result.get("id"))
                                        pool.commit(sql)
                                except BaseException as e:
                                    print "xxskins:sale_history:error:%s" % e
                                    continue
                        else:
                            break
                    else:
                        break
                    if is_before:
                        break
                    web_page = web_page + 1
            db_page = db_page + 1
            if db_page >= total_page:
                break
        thread_error_stop(db_page, func_name)
    except BaseException as e:
        print "xxskins:sale_history:error:%s" % e
        thread_error_stop(db_page, func_name)
예제 #5
0
def collection_stmbuy_sale_history(app_id):
    func_name = collection_stmbuy_sale_history.__name__
    sql = """select count(id) from web_site_product where web_site=%s""" % WEB_SITE.get(
        "stmbuy")
    result = pool.find_one(sql)
    sql = """update spider_conf set total_page=%s, updated="%s" where function_name="%s" """ \
          %(result.get("count(id)")/100+1, datetime.datetime.now(), func_name)
    pool.commit(sql)
    sql = """select page, total_page, last_execute, is_first, start_page from spider_conf where function_name="%s" """ % func_name
    result = pool.find_one(sql)
    db_page = result.get("page")
    total_page = result.get("total_page")
    is_first = result.get("is_first")
    last_execute = result.get("last_execute")
    start_page = result.get("start_page")
    if start_page == db_page and is_first == 1:
        set_last_execute_time(func_name)
    try:
        while True:
            if db_page > total_page:
                break
            sql = """select web_site_product_id, product_id, id from web_site_product where web_site=%s limit %s, %s""" \
                  % (WEB_SITE.get("stmbuy"), db_page, 100)
            product_list = pool.find(sql)
            print "stmbuy:sale_history:db_page:%s" % db_page
            for product in product_list:
                print "stmbuy:sale_history:product:%s" % product.get("id")
                web_page = 1
                is_before = False
                while True:
                    url = "http://www.stmbuy.com/item/history.html?class_id=%s&game_name=csgo&sort[_id]=-1&page=%s" \
                          % (product.get("web_site_product_id"), web_page)
                    response = send_request(url)
                    if response.code == 200:
                        soup = BeautifulSoup(response.read(), "html.parser")
                        none = soup.find("div", attrs={"class": "def-none"})
                        if none:
                            break
                        ul = soup.find("ul", attrs={"class": "goods-list"})
                        li = ul.find_all("li")
                        for li_item in li:
                            try:
                                qty = li_item.find("div",
                                                   attrs={
                                                       "class": "amount"
                                                   }).find("span").string
                                price_div = li_item.find(
                                    "div", attrs={"class": "price"})
                                price = price_div.contents[
                                    1] + price_div.contents[2].string
                                pay_time = li_item.find_all(
                                    "div",
                                    attrs={"class":
                                           "time fr"})[0].contents[2].strip()
                                if last_execute and is_before_time(
                                        pay_time,
                                        last_execute) and is_first == 1:
                                    is_before = False
                                    break
                                wear_p = li_item.find(
                                    "div", attrs={"goods-sellinfo"}).find(
                                        "p", attrs={"class": "mosundu-num"})
                                if wear_p:
                                    wear = wear_p.find("span").string
                                    market_name = li_item.find(
                                        "div",
                                        attrs={"goods-sellinfo"
                                               }).find_all("p")[1].string
                                else:
                                    wear = ""
                                    market_name = li_item.find(
                                        "div",
                                        attrs={"goods-sellinfo"
                                               }).find("p").string.strip()
                                feature_id = get_feature_id(
                                    "stmbuy", app_id,
                                    product.get("product_id"), pay_time)
                                if not get_sale_history(feature_id):
                                    sql = """insert into sale_history(web_site, qty, price, pay_time, market_name, product_id,
                                                                        web_site_product_id, created, app_id, description, wear, feature_id) VALUES
                                                                        (%s, %s, %s, "%s", "%s", %s, %s, "%s", %s, "%s", "%s", "%s")""" % \
                                          (WEB_SITE.get("stmbuy"), qty, price, pay_time, market_name,
                                           product.get("product_id"), product.get("id"),
                                           datetime.datetime.now(), app_id, "", wear, feature_id)
                                    pool.commit(sql)
                            except BaseException as e:
                                print "stmbuy:sale_history:error:%s" % e
                                continue
                    web_page += 1
                    if is_before:
                        break
        thread_error_stop(db_page, func_name)
    except BaseException as e:
        print "stmbuy:sale_history:error:%s" % e
        thread_error_stop(db_page, func_name)
예제 #6
0
파일: main.py 프로젝트: blackcoffees/spider
# -*- coding: utf-8 -*-
from base.ThreadList import ThreadList
from base.thread import CollectionThread
from tool.PoolDB import pool
from tool.CommonUtils import thread_list

if __name__ == "__main__":
    sql = """select function_name, page, total_page from spider_conf where status=1 limit 1"""
    web_list = pool.find(sql)
    tread_list = ThreadList()
    for web in web_list:
        thread = CollectionThread(web.get("function_name"),
                                  web.get("function_name"), 730)
        thread_list.append(thread)
        thread.start()
    while True:
        if len(thread_list.get_all()) == 0:
            break
        for thread in thread_list.get_all():
            # 线程已经停止
            if thread.get_stop():
                thread.event.clear()
                thread.event.wait(100)
                thread.event.set()

    # print "process start:%s" % datetime.datetime.now()
    # web_name_list = ["xxskins"]
    # app_id = 730
    # type = "sale_history"
    # for web_name in web_name_list:
    #     if type == "sale_history":
예제 #7
0
def collection_steam_sale_history(app_id):
    try:
        func_name = collection_steam_sale_history.__name__
        rows = 10
        sql = """select count(id) from web_site_product where web_site=%s""" % WEB_SITE.get(
            "steam")
        total_count = pool.find_one(sql).get("count(id)") / rows + 1
        sql = """update spider_conf set total_page=%s, updated="%s" where function_name="%s" """ %\
              (total_count, datetime.datetime.now(), func_name)
        pool.commit(sql)
        sql = """select page, is_first, last_execute, start_page from spider_conf where function_name="%s" """ % func_name
        spider_result = pool.find_one(sql)
        db_page = spider_result.get("page")
        is_first = spider_result.get("is_first")
        last_execute = spider_result.get("last_execute")
        start_page = spider_result.get("start_page")
        hearders = {"Accept-Language": "zh-CN,zh;q=0.8,en;q=0.6"}
        if start_page == db_page and is_first == 1:
            set_last_execute_time(func_name)
        while True:
            start = db_page * rows
            sql = """select market_hash_name, product.market_name, product.id from product, web_site_product where
                        product.id=web_site_product.product_id and web_site=%s limit %s, %s""" % \
                  (WEB_SITE.get("steam"), start, rows)
            product_list = pool.find(sql)
            print "steam:sale_history:db_page:%s" % db_page
            for product in product_list:
                print "steam:sale_history:product:%s" % product.get(
                    "product.id")
                market_hash_name = str(
                    product.get("market_hash_name").encode("utf-8"))
                time.sleep(1)
                url = "https://steamcommunity.com/market/listings/%s/%s" % (
                    app_id, urllib.quote(market_hash_name))
                response = send_request(url, hearders)
                if response.code == 200:
                    soup = BeautifulSoup(response.read(), "html.parser")
                    pattern = re.compile(r"line1")
                    script = soup.find("script", text=pattern)
                    if not script:
                        continue
                    history_list = json.loads(
                        script.text.split("line1=")[1].split("];")[0] + "]")
                    for history in history_list:
                        sell_time = history[0].split(" ")
                        pay_time = datetime.datetime(
                            year=int(sell_time[2]),
                            month=Time_dict.get(sell_time[0]),
                            day=int(sell_time[1]),
                            hour=int(sell_time[3].split(":")[0]))
                        if last_execute and is_first == 1 and is_before_time(
                                pay_time, last_execute):
                            continue
                        price = history[1]
                        qty = history[2]
                        feature_id = get_feature_id(
                            "steam", app_id, product.get("product.id"),
                            pay_time.strftime("%Y-%m-%d %H:%M:%S"))
                        if not get_sale_history(feature_id):
                            try:
                                sql = """insert into sale_history(web_site, qty, price, pay_time, market_name, product_id,
                                                                  web_site_product_id, created, app_id, description, wear, feature_id) VALUES
                                                                  (%s, %s, %s, "%s", "%s", %s, %s, "%s", %s, "%s", "%s", "%s")""" % \
                                      (WEB_SITE.get("steam"), qty, price, pay_time, product.get("product.market_name"),
                                       product.get("product.id"), product.get("product.id"), datetime.datetime.now(),
                                       app_id, "", "", feature_id)
                                pool.commit(sql)
                            except BaseException as e2:
                                print "steam:sale_history:error:%s" % e2
                                continue
                else:
                    break
            db_page = db_page + 1
            if db_page >= total_count:
                break
        thread_error_stop(db_page, func_name)
    except BaseException as e:
        print "steam:sale_history:error:%s" % e
        thread_error_stop(db_page, func_name)
예제 #8
0
def collection_c5_sale_history(app_id):
    func_name = collection_c5_sale_history.__name__
    sql = """select count(id) from product where web_site=%s""" % WEB_SITE.get(
        "c5game")
    result = pool.find_one(sql)
    total_page = result.get("count(id)") / 100 + 1
    sql = """update spider_conf set total_page=%s, updated="%s" where function_name="%s" """ % \
          (total_page, datetime.datetime.now(), func_name)
    pool.commit(sql)
    sql = """select page, total_page, is_first, last_execute from spider_conf where function_name="%s" """ % func_name
    result = pool.find_one(sql)
    db_page = result.get("page")
    total_page = result.get("total_page")
    is_first = result.get("is_first")
    last_execute = result.get("last_execute")
    while True:
        try:
            start = db_page * 100
            sql = """select web_site_product_id, product_id from web_site_product where web_site=%s and app_id=%s limit %s, %s""" \
                  % (WEB_SITE.get("c5game"), app_id, start, 100)
            site_product_list = pool.find(sql)
            for site_product in site_product_list:
                web_site_product_id = site_product.get("web_site_product_id")
                url = "https://www.c5game.com/csgo/item/history/%s.html" % web_site_product_id
                response = send_request(url)
                if not response == 200:
                    break
                soup = BeautifulSoup(response.read(), "html.parser")
                tr_list = soup.find("div", attrs={
                    "id": "history"
                }).find("table").find_all("tbody")[2].find_all("tr")
                for tr_item in tr_list:
                    try:
                        none_td = tr_item.find(
                            "td", attrs={"class": "text-center empty"})
                        if not none_td:
                            break
                        icon_url = tr_item.find("div",
                                                attrs={
                                                    "class":
                                                    "img csgo-img-bg ml-0"
                                                }).find("img").get("src")
                        market_name = tr_item.find("div",
                                                   attrs={
                                                       "class":
                                                       "img csgo-img-bg ml-0"
                                                   }).find("img").get("alt")
                        price = tr_item.find("span",
                                             attrs={
                                                 "class": "ft-gold"
                                             }).string.split("¥")[1]
                        pay_time = "20" + tr_item.find_all("td")[4].string
                        if last_execute and is_first == 1 and is_before_time(
                                pay_time, last_execute):
                            break
                        feature_id = get_feature_id(
                            "c5game", app_id, site_product.get("product_id"),
                            pay_time)
                        if not get_sale_history(feature_id):
                            sql = """insert into sale_history(web_site, qty, price, pay_time, market_name, product_id,
                                                   web_site_product_id, created, app_id, description, wear, feature_id) VALUES
                                                   (%s, %s, %s, "%s", "%s", %s, %s, "%s", %s, "%s", "%s", "%s")""" % \
                                 (WEB_SITE.get("c5game"), 1, price, pay_time, market_name,
                                  site_product.get("product_id"), web_site_product_id,
                                  datetime.datetime.now(), app_id, "", "", feature_id)
                            pool.commit(sql)
                    except BaseException as e:
                        print "c5game:sale_history:error:%s" % e
                        continue
            if db_page >= total_page:
                break
        except BaseException as e:
            print "steam:sale_history:error:%s" % e
            thread_error_stop(db_page, func_name)