コード例 #1
0
def get_shop_id(thread_name, queue, table):
    while not EXIT_FLAG:
        QUEUE_LOCK.acquire()
        if not WORK_QUEUE.empty():
            try:
                sku = queue.get()
                QUEUE_LOCK.release()
                url = 'https://item.m.jd.com/product/' + sku + '.html'
                print(thread_name, url)
                _spider = jd_spider.Spider()
                html_data = _spider.get_html(url)
                if html_data[0] != -1:
                    result = html_analysis.get_shop_id(html_data[1])
                else:
                    pass
                if result[0] != -1:
                    shop_id = result[1]
                    print("%s: shop_id %s" % (thread_name, shop_id))
                    sql = 'update ' + table + ' set shop_id=%s where sku=%s '
                    data = [shop_id, sku]
                    database_util.update_sql(sql, data)
                    count = list(
                        database_util.search_sql(
                            'select count(*) from shop where shop_id=%s',
                            shop_id)[1])[0][0]
                    if count == 0:
                        database_util.update_sql(
                            'insert into shop(shop_id) values(%s)', shop_id)
            except Exception as err:
                print(err)
                # print('thread_queue get_shop_id err:' + str(err))
        else:
            QUEUE_LOCK.release()
        time.sleep(1)
コード例 #2
0
def update_shop_info(thread_name, queue, table):
    while not EXIT_FLAG:
        QUEUE_LOCK.acquire()
        if not WORK_QUEUE.empty():
            try:
                shop_id = queue.get()
                QUEUE_LOCK.release()
                _spider = jd_spider.Spider()
                url = 'https://shop.m.jd.com/?shopId=' + shop_id
                print(url)
                html_data = _spider.get_html(url)
                if html_data[0] != -1:
                    result = html_analysis.get_shop_info(html_data[1])
                else:
                    pass
                if result[0] != -1:
                    follow = result[1]
                    shop_name = result[2]
                    print("%s: %s %d " % (thread_name, shop_name, follow))
                    sql = 'update shop set update_time=%s,follow=%s,shop_name=%s where shop_id=%s '
                    data = [
                        datetime.datetime.now(), follow, shop_name, shop_id
                    ]
                    database_util.update_sql(sql, data)
            except Exception as err:
                print('thread_queue update_shop_info err:' + str(err))
        else:
            QUEUE_LOCK.release()
        time.sleep(1)
コード例 #3
0
def get_sku(table):
    sql = 'select url,id from ' + table
    result = database_util.search_sql(sql, None)
    if result[0] != -1:
        result = list(result[1])
        for i in result:
            sku = i[0].strip('https://item.jd.com/').strip('.html')
            id = i[1]
            sql = 'update ' + table + ' set sku=%s where id=%s'
            database_util.update_sql(sql, [sku, id])
コード例 #4
0
def unify_brand(table):
    sql = 'select sku,brand from ' + table + ' where brand=%s'
    result = database_util.search_sql(sql, '360手机')
    if result[0] != -1:
        result = list(result[1])
    for i in result:
        sql = 'update ' + table + ' set brand=%s where sku=%s'
        sku = i[0]
        data = ['360', sku]
        database_util.update_sql(sql, data)
        print(i[0])
コード例 #5
0
def get_param(table):
    # 把店铺关注人数少的商品删掉
    sql = 'delete from '+table+' where sku in (select a.sku from (select a.sku from '+table+' a,shop b where a.shop_id=b.shop_id and b.follow<10000) a)'
    database_util.update_sql(sql,None)

    sql = 'SELECT url FROM '+table+' where update_time is null';
    result = list(database_util.search_sql(sql, None)[1])
    url_list = []
    for i in result:
        url_list.append(i[0])
    thread_queue.fill_queue(url_list)
    thread_queue.use_threading(['get_param',table])
コード例 #6
0
def temp(table):
    sql = 'select shop_id from computer where shop_id is not null'
    result = list(database_util.search_sql(sql, None))[1]
    for i in result:
        shop_id = i[0]
        count = list(
            database_util.search_sql(
                'select count(*) from shop where shop_id=%s',
                shop_id)[1])[0][0]
        if count == 0:
            database_util.update_sql('insert into shop(shop_id) values(%s)',
                                     shop_id)
コード例 #7
0
def update_img(table):
    # https://img11.360buyimg.com/n5/s54x54_jfs/t5773/143/1465870132/216483/4bbce005/592692d8Nbcc8f248.jpg
    # https://img10.360buyimg.com/n7/jfs/t18772/89/1863054684/170815/d28ecae1/5adca3deN76bb61cb.jpg
    sql = 'select img,sku from ' + table
    result = database_util.search_sql(sql, None)
    if result[0] != -1:
        imgs = list(result[1])
        for i in imgs:
            img = i[0]
            sku = i[1]
            print(img)
            new_img = img.replace('n5/s54x54_jfs', 'n7/jfs')
            print(new_img + '\n')

            sql = 'update ' + table + ' set img=%s where sku=%s'
            data = [new_img, sku]
            database_util.update_sql(sql, data)
コード例 #8
0
def insert_url(thread_name, queue, table):
    while not EXIT_FLAG:
        QUEUE_LOCK.acquire()
        if not WORK_QUEUE.empty():
            try:
                url = queue.get()
                QUEUE_LOCK.release()
                count = list(
                    database_util.search_sql(
                        'select count(*) url from ' + table + ' where url=%s',
                        url)[1])[0][0]
                if count == 0:
                    sql = 'insert into ' + table + ' set url=%s'
                    database_util.update_sql(sql, url)
            except Exception as err:
                print('thread_queue update_price err:' + str(err))
        else:
            QUEUE_LOCK.release()
        time.sleep(1)
コード例 #9
0
def update_shop_info(table):
    sql = 'SELECT shop_id FROM shop where TO_DAYS(NOW()) - TO_DAYS(update_time) >=1'
    result = database_util.search_sql(sql, None)
    shop_id = []
    if result[0] != -1:
        id = list(result[1])
        for i in id:
            shop_id.append(i[0])
    thread_queue.fill_queue(shop_id)
    thread_queue.use_threading(['update_shop_info', table])

    sql = 'select brand,follow from ' + table + ' group by brand order by follow'
    result = database_util.search_sql(sql, None)
    if result[0] != -1:
        result = list(result[1])
    for i in result:
        sql = 'update ' + table + ' set brand_hot=%s where brand=%s'
        data = [i[1], i[0]]
        database_util.update_sql(sql, data)
コード例 #10
0
def get_comment(queue, table, page_no):
    while not EXIT_FLAG:
        QUEUE_LOCK.acquire()
        if not WORK_QUEUE.empty():
            try:
                sku = queue.get()
                QUEUE_LOCK.release()
                _spider = jd_spider.Spider()
                result = _spider.get_comment(table, sku, page_no)
                # if result[0] != -1:
                #     result = _spider.get_after_comment(table,sku,page_no)
                if result[0] != -1:
                    sql = 'update ' + table + ' set update_comment_time=%s where sku=%s '
                    data = [datetime.datetime.now(), sku]
                    database_util.update_sql(sql, data)
            except Exception as err:
                print('thread_queue get_comment err:' + str(err))
        else:
            QUEUE_LOCK.release()
        time.sleep(1)
コード例 #11
0
def update_price(thread_name, queue, table):
    while not EXIT_FLAG:
        QUEUE_LOCK.acquire()
        if not WORK_QUEUE.empty():
            try:
                data = queue.get()
                QUEUE_LOCK.release()
                sku = data['sku']
                max_price = data['max_price']
                min_price = data['min_price']
                avg_price = data['avg_price']
                price_times = data['price_times']
                _spider = jd_spider.Spider()
                price_result = _spider.get_price(sku)
                if price_result[0] != -1:
                    cur_price = price_result[1]
                    if cur_price > max_price:
                        max_price = cur_price
                    if cur_price < min_price:
                        min_price = cur_price
                    avg_price = round((avg_price * price_times + cur_price) /
                                      (price_times + 1), 2)
                    price_times += 1
                    print("%s: %.2f, %.2f, %.2f, %.2f" %
                          (thread_name, max_price, min_price, avg_price,
                           cur_price))
                    sql = 'update ' + table + ' set update_price_time=%s,max_price=%s,min_price=%s,avg_price=%s,price=%s,price_times=%s where sku=%s '
                    data = [
                        datetime.datetime.now(), max_price, min_price,
                        avg_price, cur_price, price_times, sku
                    ]
                    database_util.update_sql(sql, data)
            except Exception as err:
                print('thread_queue update_price err:' + str(err))
        else:
            QUEUE_LOCK.release()
        time.sleep(1)
コード例 #12
0
def update_score(thread_name, queue, table, para):
    while not EXIT_FLAG:
        QUEUE_LOCK.acquire()
        if not WORK_QUEUE.empty():
            try:
                sku = queue.get()
                QUEUE_LOCK.release()
                w_rate = para['w_rate']
                w_follow = para['w_follow']
                w_comment = para['w_comment']
                w_sentiment = para['w_sentiment']
                w_brand = para['w_brand']

                sql = 'select sku,rate,follow,comment,sentiment,brand_hot from ' + table + ' where sku=%s'
                result = database_util.search_sql(sql, sku)
                if result[0] != -1:
                    result = list(result[1])
                    for i in result:
                        sku = i[0]
                        rate = float(i[1]) * 100
                        follow = int(i[2])
                        comment = int(i[3])
                        sentiment = int(i[4])
                        brand_hot = int(i[5])
                        score = round(
                            (rate * w_rate + follow * w_follow +
                             comment * w_comment + sentiment * w_sentiment +
                             brand_hot * w_brand), 2)
                        sql = 'update ' + table + ' set score=%s where sku=%s'
                        data = [score, sku]
                        database_util.update_sql(sql, data)

            except Exception as err:
                print('thread_queue update_score err:' + str(err))
        else:
            QUEUE_LOCK.release()
        time.sleep(1)
コード例 #13
0
def get_param(thread_name, queue, table):
    while not EXIT_FLAG:
        QUEUE_LOCK.acquire()
        if not WORK_QUEUE.empty():
            try:
                url = queue.get()
                QUEUE_LOCK.release()
                _item = item.Item()
                _spider = jd_spider.Spider()
                html_data = _spider.get_html(url)  # 获取商品详情页面的html数据
                if html_data[0] == -1:
                    continue
                sku = url.strip('https://item.jd.com/').strip('.html')
                _item = html_analysis.get_all_param(html_data[1],
                                                    _item)  #获取普通参数
                _item = _spider.get_rate(sku, _item)  #获取跟评价有关的信息
                result = _spider.get_price(sku)
                if result[0] != -1:
                    _item.price = result[1]
                _item.price_times = 1

                print(thread_name, url)
                sql = 'update ' + table + ' set description=%s,price=%s,img=%s,brand=%s,name=%s,update_time=%s,' \
                      'comment=%s,rate=%s,max_price=%s,min_price=%s,avg_price=%s,price_times=%s,update_price_time=%s,update_rate_time=%s where sku=%s '
                data = [
                    _item.description, _item.price, _item.img, _item.brand,
                    _item.name, _item.update_time, _item.comment, _item.rate,
                    _item.price, _item.price, _item.price, 1,
                    _item.update_price_time, _item.update_rate_time, sku
                ]
                database_util.update_sql(sql, data)
            except Exception as err:
                print('thread_queue get_param err:' + str(err))
        else:
            QUEUE_LOCK.release()
        time.sleep(1)
コード例 #14
0
def del_items(table):
    sql = 'delete from ' + table + ' where shop_name is null;'
    database_util.update_sql(sql, None)