Esempio n. 1
0
def init_crawler_state():
    time_now = int(time.time() * 1000)
    dbObj = GetDbObj().get_db_obj()
    cur = dbObj.cursor()
    update_sql = 'update public.amazon_product_data set crawler_state=0 where getinfo_tm < %s;' % (time_now)
    cur.execute(update_sql)
    row1 = cur.rowcount
    print('%s %s %s行更新成功' % (1, update_sql, row1))
    update_sql = 'update public.amazon_product_data_tosell set crawler_state=0 where getinfo_tm < %s;' % (time_now)
    cur.execute(update_sql)
    row2 = cur.rowcount
    print('%s %s %s行更新成功' % (2, update_sql, row2))
    update_sql = 'update public.amazon_keyword_data set crawler_state=0 where getinfo_tm < %s;' % (time_now)
    cur.execute(update_sql)
    row3 = cur.rowcount
    print('%s %s %s行更新成功' % (3, update_sql, row3))
    dbObj.commit()
    cur.close()
    dbObj.close()
Esempio n. 2
0
def tosell_save(dataQ, debug_log, db_log):
    print('\ntosell_save init\n')
    data_type = 'tosell'
    if dataQ.RedisQ.llen('tosellData') > 0:
        dbObj = GetDbObj().get_db_obj()
        cur = dbObj.cursor()
        dataOutput = DataOutput(dbObj, cur, db_log, debug_log, dataQ)
        data_tosell_db_name = SqlConfig.data_tosell_db_name
        data_tosell_update_sql = SqlConfig.data_tosell_update_sql
        data_tosell_insert_sql = SqlConfig.data_tosell_insert_sql

        druid_tosell_db_name = SqlConfig.druid_tosell_db_name
        # druid_tosell_update_sql = SqlConfig.druid_tosell_update_sql
        druid_tosell_update_sql = None  # SqlConfig.druid_tosell_update_sql
        druid_tosell_insert_sql = SqlConfig.druid_tosell_insert_sql
        while True:
            datas = dataQ.get_new_tosellData()
            pprint(datas)
            # datas = {'B01F0QQN8Q': ({'asin': 'B01F0QQN8Q',
            #                          'fba_sn': 1,
            #                          'getinfo_tm': 1542018763364,
            #                          'plow': 1,
            #                          'plows': 'largeshop',
            #                          'plows_id': 'df',
            #                          'seller_id': 'A1XEMYOCVN4TN8',
            #                          'sn': 1,
            #                          'sname': 'Gemschest'},
            #                         [{'aday': '20181112',
            #                           'asin': 'B01F0QQN8Q',
            #                           'condition': 'New',
            #                           'crawler_state': 1,
            #                           'delivery': 'Fulfillment by Amazon',
            #                           'demo': '5 out of 5 stars 99% positive over the past 12 months. (722 total '
            #                                   'ratings)',
            #                           'fba': 1,
            #                           'is_limit': 0,
            #                           'offering_id': 'tXTG86Zk6%2Bfn3YW0ITpD7nE1mscbzOgJAAhDW3VHDrP8cWV%2F1fd0DDtk7FV8eHIOKghI7PqYtkyapr23dSShe%2Fec6EMnW30fniLCM2fd1hkZKMTSUhqBYCuO87D2zljdYwfuDuVCDTm%2FQbjYnRPPhVBBs82MwpT9',
            #                           'positive': 99,
            #                           'price': 2199,
            #                           'qty': 11,
            #                           'qtydt': 0,
            #                           'rank': 1,
            #                           'reivew_count': 50,
            #                           'seller_id': 'A21P7EI9UKXT1Y',
            #                           'sn': 1,
            #                           'sname': 'largeshop',
            #                           'srank': 0,
            #                           'stype': 'FREE Shipping',
            #                           'tm': 1542018647,
            #                           'total_ratings': 722}])}
            if not datas:
                if dataQ.RedisQ.llen('tosellData') > 0:
                    datas = dataQ.get_new_tosellData()
                else:
                    break
            # print('\ntosell_save datas: [= %s =] \n' % (datas))
            tm = DataOutput.get_redis_time()
            for item in datas:
                asin = item
                tosell_datas = datas[item][0]
                tosell_list = datas[item][1]

                pprint(tosell_datas)
                pprint(tosell_list)
                print(tosell_datas['getinfo_tm'], 1)
                tosell_datas['getinfo_tm'] = tm
                print(tosell_datas['getinfo_tm'], 2)
                sql = "select asin, aday from public.amazon_product_tosell where asin=%(asin)s and aday=%(aday)s limit 1;"
                aday = tosell_list[0]['aday'] if len(
                    tosell_list) > 0 else return_PST().strftime('%Y%m%d')
                select_dict = {'asin': asin, 'aday': aday}
                cur.execute(sql, select_dict)
                select_rows = cur.fetchall()
                dbObj.commit()
                if len(select_rows) < 1:
                    if not tosell_datas.get('sname'):
                        print(222222)
                        sql1 = "select sname, seller_id from public.amazon_product_data where asin='%s' and getinfo_tm > %s" % (
                            asin, tm - 24 * 3600 * 1000)
                        cur.execute(sql1)
                        select_rows = cur.fetchall()
                        dbObj.commit()
                        select_rows = select_rows[0] if len(
                            select_rows) == 1 else ('', '')
                        sname, seller_id = select_rows
                        print('seller_id: ', seller_id)
                        print('sname ', sname)
                        tosell_datas['sname'] = sname
                        tosell_datas['seller_id'] = seller_id

                    data0 = dataOutput.save_data_to_db(
                        data_tosell_update_sql,
                        data_tosell_insert_sql,
                        asin,
                        tosell_datas,
                        db_name=data_tosell_db_name)

                    for item in tosell_list:
                        item['tm'] = int(tm / 1000)
                        data = dataOutput.save_data_to_db(
                            druid_tosell_update_sql,
                            druid_tosell_insert_sql,
                            asin,
                            item,
                            db_name=druid_tosell_db_name)

                    # 记录更新时间
                    dataOutput.crawler_tm(asin, data_type)
        cur.close()
        dbObj.close()
        db_log.war('%s, %s线程任务已完成\n' %
                   (return_PST().strftime("%Y-%m-%d %H:%M:%S"), data_type))
    else:
        db_log.war('%s, %s数据队列为空\n' %
                   (return_PST().strftime("%Y-%m-%d %H:%M:%S"), data_type))
Esempio n. 3
0
def tosell_save(dataQ, debug_log, db_log):
    print('\ntosell_save init\n')
    data_type = 'tosell'
    if dataQ.RedisQ.llen('tosellData') > 0:
        dbObj = GetDbObj().get_db_obj()
        cur = dbObj.cursor()
        dataOutput = DataOutput(dbObj, cur, db_log, debug_log, dataQ)
        data_tosell_db_name = SqlConfig.data_tosell_db_name
        data_tosell_update_sql = SqlConfig.data_tosell_update_sql
        data_tosell_insert_sql = SqlConfig.data_tosell_insert_sql

        druid_tosell_db_name = SqlConfig.druid_tosell_db_name
        #druid_tosell_update_sql = SqlConfig.druid_tosell_update_sql
        druid_tosell_update_sql = None  #SqlConfig.druid_tosell_update_sql
        druid_tosell_insert_sql = SqlConfig.druid_tosell_insert_sql
        while True:
            datas = dataQ.get_new_tosellData()
            if not datas:
                if dataQ.RedisQ.llen('tosellData') > 0:
                    datas = dataQ.get_new_tosellData()
                else:
                    break
            # print('\ntosell_save datas: [= %s =] \n' % (datas))
            tm = DataOutput.get_redis_time()
            for item in datas:
                asin = item
                tosell_datas = datas[item][0]
                tosell_list = datas[item][1]
                # print('tosell_datas: ', tosell_datas)
                print(tosell_datas['getinfo_tm'], 1)
                tosell_datas['getinfo_tm'] = tm
                print(tosell_datas['getinfo_tm'], 2)
                # sql = "select asin, getinfo_tm from public.amazon_product_data_tosell where asin=%(asin)s and getinfo_tm>%(the_tm)s;"
                # # select_dict = {'asin': asin, 'the_tm': (tm / 1000 - 120) * 1000}
                # the_tm = dataQ._get_value_from_string('initUpdateTm', 'initTime')
                # print('the_tm1', the_tm)
                # if not the_tm:
                #     _, the_tm = BaseCrawler.get_the_time()
                #     print('the_tm2', the_tm)
                # else:
                #     the_tm = str(the_tm, encoding='utf-8')
                # print('the_tm3', the_tm)
                # select_dict = {'asin': asin, 'the_tm': int(the_tm) * 1000}
                # cur.execute(sql, select_dict)
                # select_rows = cur.fetchall()
                sql = "select asin, aday from public.amazon_product_tosell where asin=%(asin)s and aday=%(aday)s limit 1;"
                aday = tosell_list[0]['aday'] if len(
                    tosell_list) > 0 else return_PST().strftime('%Y%m%d')
                select_dict = {'asin': asin, 'aday': aday}
                cur.execute(sql, select_dict)
                select_rows = cur.fetchall()
                dbObj.commit()
                if len(select_rows) < 1:
                    print(tosell_datas)
                    if not tosell_datas.get('sname'):
                        sql1 = "select sname, seller_id from public.amazon_product_data where asin='%s' and getinfo_tm > %s" % (
                            asin, tm - 24 * 3600 * 1000)
                        cur.execute(sql1)
                        select_rows = cur.fetchall()
                        dbObj.commit()
                        select_rows = select_rows[0] if len(
                            select_rows) == 1 else ('', '')
                        sname, seller_id = select_rows
                        print('seller_id: ', seller_id)
                        print('sname ', sname)
                        tosell_datas['sname'] = sname
                        tosell_datas['seller_id'] = seller_id
                    data0 = dataOutput.save_data_to_db(
                        data_tosell_update_sql,
                        data_tosell_insert_sql,
                        asin,
                        tosell_datas,
                        db_name=data_tosell_db_name)
                    for item in tosell_list:
                        item['tm'] = int(tm / 1000)
                        data = dataOutput.save_data_to_db(
                            druid_tosell_update_sql,
                            druid_tosell_insert_sql,
                            asin,
                            item,
                            db_name=druid_tosell_db_name)

                    # 记录更新时间
                    dataOutput.crawler_tm(asin, data_type)
        cur.close()
        dbObj.close()
        db_log.war('%s, %s线程任务已完成\n' %
                   (return_PST().strftime("%Y-%m-%d %H:%M:%S"), data_type))
    else:
        db_log.war('%s, %s数据队列为空\n' %
                   (return_PST().strftime("%Y-%m-%d %H:%M:%S"), data_type))