コード例 #1
0
ファイル: summary_zx.py プロジェクト: zhanghtt/crawl-new2
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from mongo import op
import logging
logger = logging.getLogger(__name__)
import re
price_pattern = re.compile(r'^\d+\.\d\d$')
from multiprocess.tools import timeUtil
from jingdong.Tools import format_cat_id
current_date = timeUtil.current_time()
price_pattern = re.compile(r'^\d+\.\d\d$')

def clean_price(item):
    price_tmp = []
    for key in item:
        current_value = str(item[key])
        str_price_list = price_pattern.findall(current_value)
        if str_price_list and str_price_list[0] != "-1.00":
            price_tmp.append(float(str_price_list[0]))
    if price_tmp:
        price = min(price_tmp)
    else:
        price = 79.90
    return price


def run_result():
    with op.DBManger() as m:
        brandid2name={}
        for brand_id, brand_name in m.read_from(db_collect=("jingdong", "jdbrand20210108retry0"),
                                                                              out_field=("brand_id", "name")):
コード例 #2
0
ファイル: test.py プロジェクト: zhanghtt/crawl-new2
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from multiprocess.tools import timeUtil, collections
from mongo import op

print(timeUtil.getdate(0, format="%Y-%m%d"))
print(timeUtil.current_time())
dt = collections.DataSet([1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6])
for i in dt.shuffle(
        buffer_size=3).map(lambda x: x * 2).map(lambda x: x + 1).distinct():
    print(i)

with op.DBManger() as m:
    last_brand_collect = m.get_lasted_collection(
        "jingdong", filter={"name": {
            "$regex": r"^brand20\d\d\d\d\d\d$"
        }})
    pipeline = [{
        "$match": {
            "cate_id": {
                "$ne": None
            }
        }
    }, {
        "$match": {
            "brand_id": {
                "$ne": None
            }
        }
    }, {
        "$match": {