Beispiel #1
0
 def __init__(self):
     self.redis = db_redis()
     self.mongodb = db_mongodb()
     self.mongodb_remote = None
     self.date = date()
     self.collection = self.mongodb['m_{0}'.format(self.date)]
     self.collection.ensure_index('item_data', unique=True)
     self.mysql = db_mysql()
     self.mysql_cursor = self.mysql.cursor()
Beispiel #2
0
def save_all_top_category():
    redis = db_redis()
    top_set = set()
    # todo 由 mysql 改为从 redis leaf_category 获取
    for id in all_category_in_redis():
        top = get_top_parent(id)
        top_set.add(top)
        # print(id, top)
        redis.hset('ebay:top_category_id_us', int(id), int(top))
    print(top_set)
    print(len(top_set))
Beispiel #3
0
 def __init__(self, redis=None, mongodb=None, mysql=None, datetime=None):
     self.redis = redis or db_redis()
     self.mongodb = mongodb or db_mongodb('mongodb_remote')
     self.mysql = mysql or db_mysql()
     self.mysql_cursor = self.mysql.cursor()
     self.date = datetime or date()
     try:
         self.mysql_local = db_mysql('mysql_local')
         self.mysql_cursor_local = self.mysql_local.cursor()
     except:
         pass
     self.start_statistician()
Beispiel #4
0
def main():
    redis = db_redis()
    mongodb = db_mongodb('mongodb_remote')
    mysql = db_mysql('mysql_remote')
    datetime = date()
    # 全站商品数据统计
    g = GoodsStatistician(redis=redis,
                          mongodb=mongodb,
                          mysql=mysql,
                          datetime=datetime)
    g.save()
    # 全站店铺数据统计
    s = ShopStatistician(redis=redis,
                         mongodb=mongodb,
                         mysql=mysql,
                         datetime=datetime)
    s.save(process=32)
Beispiel #5
0
 def shop_values(shop_list, redis=None):
     r = redis or db_redis()
     for i in shop_list:
         shop = json.loads(str(i, encoding='utf8'))
         shop['count'] = int(
             r.hget('ebay:shop:count', shop['shop_name']) or 0)
         shop['week_sold'] = int(
             r.hget('ebay:shop:week_sold', shop['shop_name']) or 0)
         shop['last_week_sold'] = int(
             r.hget('ebay:shop:last_week_sold', shop['shop_name']) or 0)
         shop['has_sold_count'] = int(
             r.hget('ebay:shop:has_sold_count', shop['shop_name']) or 0)
         shop['total_sold'] = int(
             r.hget('ebay:shop:total_sold', shop['shop_name']) or 0)
         shop['amount'] = round(
             float(r.hget('ebay:shop:amount', shop['shop_name']) or 0),
             2)
         yield {
             'shop_name':
             shop['shop_name'],
             'shop_feedback_score':
             shop['shop_feedback_score'],
             'shop_feedback_percentage':
             shop['shop_feedback_percent'],
             'sold_goods_count':
             shop['has_sold_count'],
             'total_goods_count':
             shop['count'],
             'total_sold':
             shop['total_sold'],
             'weeks_sold':
             shop['week_sold'],
             'last_weeks_sold':
             shop['last_week_sold'],
             'amount':
             shop['amount'],
             'shop_open_time':
             shop['shop_open_time'],
             'weeks_inc_ratio':
             (shop['week_sold'] - shop['last_week_sold']) /
             (shop['last_week_sold'] + 1)
         }
Beispiel #6
0
def save_leaf_category(file):
    redis = db_redis()

    with open('{0}.xml'.format(file)) as f:
        xml = bytes(f.read(), 'utf8')
    data = etree.HTML(xml)
    result = data.xpath('//html/body/getcategoriesresponse/categoryarray/category')

    count = 0
    for category in result:
        leaf = category.xpath('leafcategory/text()')
        if len(leaf) > 0 and leaf[0] == 'true':
            count += 1
            id = category.xpath('categoryid/text()')[0]
            redis.sadd('ebay:leaf_category_ids_us', int(id))

    print()
    print(file)
    print(count)
    print(len(result))
Beispiel #7
0
def all_category_in_redis(key='ebay:leaf_category_ids_us'):
    r = db_redis()
    ids = r.smembers('ebay:leaf_category_ids_us')
    for id in ids:
        yield int(id)
Beispiel #8
0
    def insert_to_mysql(shop_list, mysql=None, redis=None):
        redis = redis or db_redis()
        mysql = mysql or db_mysql()
        cursor = mysql.cursor()

        #
        def shop_values(shop_list, redis=None):
            r = redis or db_redis()
            for i in shop_list:
                shop = json.loads(str(i, encoding='utf8'))
                shop['count'] = int(
                    r.hget('ebay:shop:count', shop['shop_name']) or 0)
                shop['week_sold'] = int(
                    r.hget('ebay:shop:week_sold', shop['shop_name']) or 0)
                shop['last_week_sold'] = int(
                    r.hget('ebay:shop:last_week_sold', shop['shop_name']) or 0)
                shop['has_sold_count'] = int(
                    r.hget('ebay:shop:has_sold_count', shop['shop_name']) or 0)
                shop['total_sold'] = int(
                    r.hget('ebay:shop:total_sold', shop['shop_name']) or 0)
                shop['amount'] = round(
                    float(r.hget('ebay:shop:amount', shop['shop_name']) or 0),
                    2)
                yield {
                    'shop_name':
                    shop['shop_name'],
                    'shop_feedback_score':
                    shop['shop_feedback_score'],
                    'shop_feedback_percentage':
                    shop['shop_feedback_percent'],
                    'sold_goods_count':
                    shop['has_sold_count'],
                    'total_goods_count':
                    shop['count'],
                    'total_sold':
                    shop['total_sold'],
                    'weeks_sold':
                    shop['week_sold'],
                    'last_weeks_sold':
                    shop['last_week_sold'],
                    'amount':
                    shop['amount'],
                    'shop_open_time':
                    shop['shop_open_time'],
                    'weeks_inc_ratio':
                    (shop['week_sold'] - shop['last_week_sold']) /
                    (shop['last_week_sold'] + 1)
                }

        #
        sql = """
            INSERT INTO erp_spider.shop_statistics_{0} (shop_name, shop_feedback_score, shop_feedback_percentage, sold_goods_count, total_goods_count, total_sold, weeks_sold, last_weeks_sold, amount, shop_open_time, weeks_inc_ratio ) 
            VALUES (%(shop_name)s, %(shop_feedback_score)s, %(shop_feedback_percentage)s, %(sold_goods_count)s, %(total_goods_count)s, %(total_sold)s, %(weeks_sold)s, %(last_weeks_sold)s, %(amount)s, %(shop_open_time)s, %(weeks_inc_ratio)s)
        """.format(date())
        data = [i for i in shop_values(shop_list, redis)]
        #
        try:
            cursor.executemany(sql, data)
            mysql.commit()
        except pymysql.err.IntegrityError:
            logger.warning('Duplicate. sqls:\n{0}'.format(data[0]))