def get_sourcelist(): data_csv = GeneralTool.get_csv_data('../../files/Reebok-主档数据.csv') sourcelist = list() for data in data_csv: now = datetime.utcnow() reebok_model = { "externalCode": data['BARCODE'], "sourceType": "reebok-erp", "sourceData": data, "isStore": False, "operatorId": "jm006826", "createTime": now, "updateTime": now, "logicDelete": 0 } sourcelist.append(reebok_model) return sourcelist
async def main(): async with httpx.AsyncClient() as client: start = time.time() task_list = [] client.headers = { 'tenantCode': 'baozun', 'catalog': 'REEBOK', 'Cookie': 'SESSION=YzNhZTQwYzUtMTkxZS00YTdiLWJjMmUtNjk5YmZjMTVjNTQz; SESSION=ZmY2N2Y0YTMtOGMwYy00OTBkLWFkNTMtYjhkZjk4NjVhNDkw', 'token': '18p2570', 'Content-Type': 'application/json' } uri = """mongodb://*****:*****@kh-public-uat-mongo-db01.cloud.bz:27017,kh-public-uat-mongo-db02.cloud.bz:27017,kh-public-uat-mongo-db03.cloud.bz:27017/db_mid_platform_integration_sit?authSource=db_mid_platform_integration_sit&replicaSet=rs-public-uat""" collection = GeneralTool.get_collection( uri, 'db_mid_platform_integration_sit', 'P_SOURCE_REEBOK', authuser='******', authpwd='root1234') condition = {"sourceType": "reebok-erp"} allrows = collection.find(condition) allrows = list(allrows) for index, item in enumerate(allrows): print(index) req = request(client, body={ "operatorId": "jm006826", "externalCodes": [{ "sourceType": 'reebok-erp', "externalCode": item['externalCode'] }] }) task = asyncio.create_task(req) task_list.append(task) await asyncio.gather(*task_list) end = time.time() print(f'耗时:{end - start}')
def main(): url = "http://mid-platform-publish-platform-service-sit.cloud.bz/publish-platform/convertJs/hub-convert" headers = { 'tenantCode': 'baozun', 'catalog': 'anf', 'ruleCode': 'source-common-ins-upd', 'Content-Type': 'application/json' } # 1. 原始数据(ins.csv),从中 取出deptname,keyword ,并将原始数据转成dict(用来调用hub-convert接口) # 2. 通过deptname,keyword 到ANF类目转化中,查到预期的schemaCode_exp # 3. 使用dict调用hub-convert接口,查询通过js转换后的schemaCode_act # 4. 对比 查到预期的schemaCode_exp 和 通过js转换后的schemaCode_act dict_reader = GeneralTool.get_csv_data('../files/ins.csv') for data in dict_reader: dept_name = data['DEPT_NAME'] unique_name_cn = data['UNIQUE_NAME_CN'] print(f'源数据中DEPT_NAME:{dept_name}, UNIQUE_NAME_CN:{unique_name_cn}') # print(data) # print(json.dumps(data, ensure_ascii=False)) response = request(url, data, headers=headers) variant = response.json()['data']['variants'][0] ################## # {'variantCode': '952810001', 'schemaCode': 's1', 'properties': {'customColor': {'customColorID': 'C1', 'customColorName': '白色001'}, 'color': '43', 'KIC_ID': 'KIC_322-2110-0986-101', 'COLOR_CD': 'C1', 'COMPLIANCE_TAG_URL': 'http://anf.scene7.com/is/image/anf/sku639861338-cn-comply-label1', 'WEB_LONG_SKU': 'w1', 'customReferenceHeight': 'Slim1', 'customSize': 'L1Slim1', 'PRICE_TICKET_URL': 'http://anf.scene7.com/is/image/anf/sku639861338-price-ticket-RMB1', 'skuCode': 'AFUPC952810001', 'platformCode': 'gn1'}} ############# # 从接口返回值中,查到实际接口返回的schemacode schemaCode_act = variant['schemaCode'] # 通过原始数据中的 deptname 和 unique_name_cn ,从转换规则中,查到预期的schemacode schemalist = get_schemalist_from_excel('ANF上新类目转化逻辑-0528.xlsx', 'Sheet1') schemaCode_exp = get_schemacode_by_keyword(dept_name, keyword=unique_name_cn, schemalist=schemalist) print( f'{schemaCode_exp == schemaCode_act},schemaCode_exp:{schemaCode_exp},schemaCode_act:{schemaCode_act}' ) break
if DIVISION == 'FTW': # 通过源数据的 MODEL_NAME+DESC_IN_CHINESE 拼接,然后从Excel中读取到的schemacode keyword = f"{kwargs['MODEL_NAME']} {kwargs['DESC_IN_CHINESE']}" title = cls.get_schemacode_by_keyword(keyword) else: # 通过源数据的 CATEGORY+LOCAL_PRODUCT_TYPE 拼接,然后从Excel中读取到的schemacode keyword = f"{kwargs['CATEGORY']} {kwargs['LOCAL_PRODUCT_TYPE']}" # [{'keyword': 'CLASSIC BAG', 'schema_code': 's1011107'}] title = cls.get_schemacode_by_keyword(keyword) return title if __name__ == '__main__': print('start') collection = GeneralTool.get_collection(integration['sit']['uri'], integration['sit']['db'], 'P_SOURCE_REEBOK') condition = { "sourceType": "reebok-erp", "sourceData.MODEL_NO": { "$ne": "" } } allrows = collection.find(condition) allrows = list(allrows) print(len(allrows)) start = time.time() pool = Pool(4) # 实例化线程池 pool.map(Prodcut.conver2common, allrows) # 开启线程池,get_down函数,list为可迭代对象 pool.close() pool.join()
def handle_schemacode(): print('=====>>开始执行校验类目转换规则') start = time.time() collection = GeneralTool.get_collection(integration['sit']['uri'], integration['sit']['db'], 'p_source') reportlist = [] # 鞋类的类目 和MODEL_NAME有直接关系,因此按MODEL_NAME 去重,然后使用该MODEL_NAME查到sourceData 入库验证 # condition_ftw = {"catalog": "REEBOK", "sourceType": "reebok-erp", "sourceData.DIVISION": "FTW"} # modelnames = collection.find(condition_ftw).distinct('sourceData.MODEL_NAME') # modelnames = list(modelnames) # print(f'MODEL_NAME 总计:{len(modelnames)}条') # # for index, modelname in enumerate(modelnames): # print(f'当前是第{index + 1}个,正在验证的MODEL_NAME:{modelname}') # row = collection.find_one({"catalog": "REEBOK", "sourceType": "reebok-erp", "sourceData.MODEL_NAME": modelname}) # result = Prodcut.handle_source(row) # if result: # reportlist.append(result) # 非鞋类 使用CATEGORY+LOCAL_PRODUCT_TYPE 组合匹配类目 condition_other = { "catalog": "REEBOK", "sourceType": "reebok-erp", "sourceData.DIVISION": { "$ne": "FTW" } } allrows = collection.find(condition_other) print(f'非鞋类总计源数据条数:{len(allrows := list(allrows))}') # allrows = list(allrows) keyword_list = list() for row in allrows: source = row['sourceData'] CATEGORY = source['CATEGORY'] LOCAL_PRODUCT_TYPE = source['LOCAL_PRODUCT_TYPE'] keyword = f'{CATEGORY}_{LOCAL_PRODUCT_TYPE}' keyword_list.append(keyword) print( f"非鞋类的关键字CATEGORY_LOCAL_PRODUCT_TYPE,去重后总计:{len(set(keyword_list))}条") distinct_rows = list(set(keyword_list)) for keyword in distinct_rows: CATEGORY, LOCAL_PRODUCT_TYPE = keyword.split('_') cdn = { "sourceType": "reebok-erp", "sourceData.CATEGORY": CATEGORY, "sourceData.LOCAL_PRODUCT_TYPE": LOCAL_PRODUCT_TYPE, "sourceData.DIVISION": { "$ne": "FTW" } } row = collection.find_one(cdn) report = Prodcut.handle_source(row) if report: reportlist.append(report) # pool = Pool(4) # 实例化线程池 # pool.map(Prodcut.handle_source, allrows) # 开启线程池,get_down函数,list为可迭代对象 # pool.close() # pool.join() end = time.time() print(f'总计错误数:{len(reportlist)}') if len(reportlist) > 0: ExcelTool.write(reportlist, [ 'MODEL_NO', 'BARCODE', 'DIVISION', 'KEYWORD', 'EXP_VALUE', 'ACT_VALUE' ], filename='report.xlsx', sheetname='schemaCode') print('类目转换规则校验结束<<=====') print("共耗时:", end - start)
row = collection.find_one(cdn) report = Prodcut.handle_source(row) if report: reportlist.append(report) # pool = Pool(4) # 实例化线程池 # pool.map(Prodcut.handle_source, allrows) # 开启线程池,get_down函数,list为可迭代对象 # pool.close() # pool.join() end = time.time() print(f'总计错误数:{len(reportlist)}') if len(reportlist) > 0: ExcelTool.write(reportlist, [ 'MODEL_NO', 'BARCODE', 'DIVISION', 'KEYWORD', 'EXP_VALUE', 'ACT_VALUE' ], filename='report.xlsx', sheetname='schemaCode') print('类目转换规则校验结束<<=====') print("共耗时:", end - start) if __name__ == '__main__': # handle_schemacode() collection = GeneralTool.get_collection(integration['sit']['uri'], integration['sit']['db'], 'p_source') handle_title(collection)
def main(): # 1 # uri = "mongodb://*****:*****@kh-public-uat-mongo-db01.cloud.bz:27017,kh-public-uat-mongo-db02.cloud.bz:27017,kh-public-uat-mongo-db03.cloud.bz:27017/db_mid_platform_publish_sit?authSource=db_mid_platform_publish_sit&replicaSet=rs-public-uat" uri = "mongodb://*****:*****@kh-public-uat-mongo-db01.cloud.bz:27017,kh-public-uat-mongo-db02.cloud.bz:27017,kh-public-uat-mongo-db03.cloud.bz:27017/db_mid_platform_publish_uat?replicaSet=rs-public-uat&authSource=db_mid_platform_publish_uat" dbname = "db_mid_platform_publish_uat" colname = "RESOURCE_NIKEPIM" global mycol mycol = GeneralTool.get_collection(uri, dbname, colname, authuser="******", authpwd="db_mid_platform_publish_uat1234") sourcelist = list() for i in range(20000): fileCode = f'{863000000 + i}' now = datetime.utcnow() mydict = { "fileCode": fileCode, "url": "http://pic40.nipic.com/20140403/8614226_162017444195_2.jpg", "name": "{}.png".format(fileCode), "path": "/dam/pic/", "type": 1, "meta": { "size": "830", "name": "{}.png".format(fileCode), "fullName": "{}.png".format(fileCode), "extName": ".png", "fileType": "png", "htmlMediaType": "image" }, "properties": [ { "key": "year", "value": "2020年" }, { "key": "artNo", "value": fileCode }, { "key": "sku", "value": fileCode } ], "opDomain": "NIKEPIM", "operatorName": "likai", "orderField": 1, "operatorId": "9527", "propertyTemplateId": "5e96ae2a7336496ecc31abbf", "logicDelete": 0, "tenantCode": "NIKE", "createTime": now, "updateTime": now, "labels": [ "630", "1111" ] } sourcelist.append(mydict) start = time.time() pool = Pool(4) # 实例化线程池 pool.map(insert, sourcelist) # 开启线程池,get_down函数,list为可迭代对象 pool.close() pool.join() end = time.time() print("共耗时:", end - start)
# same key diff value samekey = dict1.keys() & dict2.keys() diff_vals = [{ k: (dict1[k], dict2[k]) } for k in samekey if str(dict1[k]).strip() != str(dict2[k]).strip()] if diff_vals: print(f'异常的数据:{barcode},{json.dumps(diff_vals, ensure_ascii=False)}') if __name__ == '__main__': # sit mongo db # uri = """mongodb://*****:*****@kh-public-uat-mongo-db01.cloud.bz:27017,kh-public-uat-mongo-db02.cloud.bz:27017,kh-public-uat-mongo-db03.cloud.bz:27017/db_mid_platform_integration_sit?authSource=db_mid_platform_integration_sit&replicaSet=rs-public-uat""" query = {"catalog": "REEBOK", "sourceType": "reebok-erp"} collection = GeneralTool.get_collection(integration['sit']['uri'], integration['sit']['db'], 'p_source') datas_mongo = collection.find(query).sort('_id', -1) datas_csv = GeneralTool.get_csv_data('BARCODE_ARTICLE_20200805020001.csv', delimiter=',', encoding='gbk') print("mongo数据条数:{},csv数据条数:{}".format( len(datas_mongo := list(datas_mongo)), len(datas_csv := list(datas_csv)))) for c in datas_csv: barcode = c['BARCODE'] condition = {"catalog": "REEBOK", "externalCode": barcode} m = collection.find_one(condition)
"sourceType": "reebok-erp", "sourceData": data, "isStore": False, "operatorId": "jm006826", "createTime": now, "updateTime": now, "logicDelete": 0 } sourcelist.append(reebok_model) return sourcelist if __name__ == '__main__': start = time.time() uri = """mongodb://*****:*****@kh-public-uat-mongo-db01.cloud.bz:27017,kh-public-uat-mongo-db02.cloud.bz:27017,kh-public-uat-mongo-db03.cloud.bz:27017/db_mid_platform_integration_sit?authSource=db_mid_platform_integration_sit&replicaSet=rs-public-uat""" mycol = GeneralTool.get_collection( uri, 'db_mid_platform_integration_sit', 'P_SOURCE_REEBOK', authuser='******', authpwd='root1234') sourcelist = get_sourcelist() pool = Pool(4) # 实例化线程池 pool.map(insert, sourcelist) # 开启线程池,get_down函数,list为可迭代对象 pool.close() pool.join() end = time.time() print("共耗时:{}秒".format(round(end - start)))
def main(): # 1 uri = "mongodb://*****:*****@kh-public-uat-mongo-db01.cloud.bz:27017,kh-public-uat-mongo-db02.cloud.bz:27017,kh-public-uat-mongo-db03.cloud.bz:27017/db_mid_platform_publish_sit?authSource=db_mid_platform_publish_sit&replicaSet=rs-public-uat" dbname = "db_mid_platform_publish_sit" colname = "RESOURCE_anf" mycol = GeneralTool.get_collection(uri, dbname, colname, authuser="******", authpwd="root1234") # 4. insert data for i in range(80000): fileCode = f'{862400000 + i}' mydict = { "fileCode": fileCode, "url": "http://pic40.nipic.com/20140403/8614226_162017444195_2.jpg", "name": "{}.png".format(fileCode), "path": "/dam/pic/", "type": 1, "meta": { "size": "8511089", "name": "{}.png".format(fileCode), "fullName": "{}.png".format(fileCode), "extName": ".png", "fileType": "png", "htmlMediaType": "image" }, "properties": [{ "key": "year", "value": "2020年" }, { "key": "artNo", "value": fileCode }, { "key": "sku", "value": fileCode }], "opDomain": "anf", "operatorName": "likai", "orderField": 1, "operatorId": "9527", "propertyTemplateId": "5e96ae2a7336496ecc31abbf", "logicDelete": 0, "tenantCode": "baozun", "createTime": datetime.now(), "updateTime": datetime.now(), "labels": ["618", "测试"] } # # 4. insert data x = mycol.insert_one(mydict) # print(x.inserted_id) # print(x) print('正在进行操作-{}'.format(i))