#!/usr/bin/env python # -*- coding: utf-8 -*- from mongo import op import logging logger = logging.getLogger(__name__) import re price_pattern = re.compile(r'^\d+\.\d\d$') from multiprocess.tools import timeUtil from jingdong.Tools import format_cat_id current_date = timeUtil.current_time() price_pattern = re.compile(r'^\d+\.\d\d$') def clean_price(item): price_tmp = [] for key in item: current_value = str(item[key]) str_price_list = price_pattern.findall(current_value) if str_price_list and str_price_list[0] != "-1.00": price_tmp.append(float(str_price_list[0])) if price_tmp: price = min(price_tmp) else: price = 79.90 return price def run_result(): with op.DBManger() as m: brandid2name={} for brand_id, brand_name in m.read_from(db_collect=("jingdong", "jdbrand20210108retry0"), out_field=("brand_id", "name")):
#!/usr/bin/env python # -*- coding: utf-8 -*- from multiprocess.tools import timeUtil, collections from mongo import op print(timeUtil.getdate(0, format="%Y-%m%d")) print(timeUtil.current_time()) dt = collections.DataSet([1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6]) for i in dt.shuffle( buffer_size=3).map(lambda x: x * 2).map(lambda x: x + 1).distinct(): print(i) with op.DBManger() as m: last_brand_collect = m.get_lasted_collection( "jingdong", filter={"name": { "$regex": r"^brand20\d\d\d\d\d\d$" }}) pipeline = [{ "$match": { "cate_id": { "$ne": None } } }, { "$match": { "brand_id": { "$ne": None } } }, { "$match": {