col = db['TaMember_{cid}_Subscription'.format(cid=cid)] ls = col.find({ 'Period': int(period), 'ChannelId': dictory[channel], 'SendMode': 0 }) for x in ls: filters.append(x['MemberId']) client1.close() logger.add( 'shopid={shopid} ,cid={cid} et_update_list get Subscription {c}.......' .format(shopid=shopid, cid=cid, c=len(filters))) merge_path = "/user/athena/{}/meb_attribute/{}".format(shopid, lsdate) if not util_hadoop.PathIsExit(merge_path): ls = util_hadoop.GetPath(merge_path.rsplit("/", 1)[0]) lsdate = max(ls).rsplit('/', 1)[1] merge_path = "/user/athena/{}/meb_attribute/{}".format( shopid, lsdate) logger.add( 'shopid={shopid} ,cid={cid} et_update_list get ta by last date {lsdate}.......' .format(shopid=shopid, cid=cid, lsdate=lsdate)) SparkContext.setSystemProperty('spark.cores.max', '56') sc = SparkContext(appName="et_update_list_{}".format(shopid)) def mapp(l): npt = l.split("\t")[12] if l.split("\t")[12] != 'cindy1' else 'S3' return { 'memberid': l.split("\t")[1],
for h in ls: n = n + 1 ran = random.random() memberid, nes, npt, npd = h PR = PRD[npt] if npt <= D[storeid][0]: hml = "L" elif npt > D[storeid][1]: hml = "H" else: hml = "M" R.append("{storeid}\t{memberid}\t{nes}\t{hml}\t{npt}\t{PR}\t{order}\t{npd}\t{ran}".format(storeid=storeid, memberid=memberid, nes=nes, hml=hml, npt=npt, PR=PR, order=n, npd=npd, ran=ran)) return R Result = R1.map(lambda l: (l[0].split('_')[0], \ (l[0].split('_')[1], l[1][0] ,float(l[1][1][0]), l[1][1][1]) \ )).groupByKey().map(lambda x : (x[0], list(x[1]))).map(lambda x : hml(x[0], x[1], D)).flatMap(lambda x : x) serial = Result.count() if serial > 0: if util_hadoop.PathIsExit(outpath): cmd = "hadoop fs -rm -r {}".format(outpath) subprocess.call(cmd, shell=True) try: Result.coalesce(10).saveAsTextFile(outpath) except: pass finally: pass sc.stop()
try: logger.add( 'clientid={clientid}, setting={se}, groups seperate is init ....'. format(clientid=shopid, se=setting)) R = getabgroup(setting, D) #spark init prefix = "hdfs://{hdfsmaster}:8020".format(hdfsmaster=MIGO_HDFS_MASTER) src = prefix + "/user/migo/starterDIY/{}/member/done/*".format(shopid) ta_parent = prefix + "/user/athena/{}/ta_done/{}".format( shopid, caldate) ta_path = ta_parent + "/*" #get laste ta if not util_hadoop.PathIsExit(ta_parent): ls = util_hadoop.GetPath(ta_parent.rsplit("/", 1)[0]) ta_path = max(ls) + "/*" caldate = max(ls).rsplit("/", 1)[1] #the laste hadoop ta date sc = SparkContext(appName="ab_group_init_{}".format(shopid)) configpath = "/user/athena/{}/et_group/config".format(shopid) if util_hadoop.PathIsExit(configpath): cmd = "hadoop fs -rm -r {}".format(configpath) subprocess.call(cmd, shell=True) sc.parallelize(["{}\t{}".format(shopid, setting) ]).coalesce(1).saveAsTextFile(configpath) members = sc.textFile(src)
lsdate = caldate UPDATE = True try: prefix = "hdfs://{hdfsmaster}:8020".format(hdfsmaster=MIGO_HDFS_MASTER) merge_path = prefix + "/user/athena/{shop}/meb_attribute/{caldate}".format(shop=shopid, caldate=caldate) lsdate = caldate logger.add('clientid={clientid}, cid={cid}, et_query is init by {shopid} {caldate} {tag1} {cid} {start} {end} {type} {flag} {reset} {M} {F} {npt} {age} {gender} {first} {last} {groups} {top} {npd}' \ .format(clientid=shopid, cid=cid, shopid=shopid, caldate=caldate, tag1=tag1, start=start, end=end, type=type, flag=flag ,reset=reset, M=M, F=F, npt=npt, age=age, gender=gender, \ first=first, last=last, groups=groups, top=top, npd=npd )) CONTINUE = True MSG = "" if not util_hadoop.PathIsExit(merge_path): if util_hadoop.PathIsExit(merge_path.rsplit("/",1)[0]): ls = util_hadoop.GetPath(merge_path.rsplit("/",1)[0]) merge_path = "hdfs:" + max(ls) + "/*" lsdate = max(ls).rsplit("/",1)[1] else: CONTINUE = False MSG = 'member cube is not exists...' else: merge_path = merge_path + "/*" client = pymongo.MongoClient(MIGO_MONGO_TA_URL, 27017) db = client['ET_' + shopid ] col = db['campaign'] up = False if flag == "0":
"BE_S3_W": "16777216,0,0" }, { "BE_BUY_W": "2097152,0,0" }] if __name__ == "__main__": """ error code : 1. ['-1'] => no ta_done file """ try: shop = sys.argv[1] caldate = sys.argv[2] ta = "/user/athena/{shop}/ta_done/{caldate}".format(shop=shop, caldate=caldate) MSG = "" if util_hadoop.PathIsExit(ta): start_time = datetime.datetime.now().second cf = SparkConf().setAppName("[ET_update_member-{}-{}] {}".format( shop, caldate, start_time)).set("spark.cores.max", "40") sc = SparkContext(conf=cf) raw_ta = sc.textFile( ta, 20).filter(lambda x: x.split("\t")[0] == shop).cache() R = [] for x in DICT: tag1 = int(x[x.keys()[0]].split(",")[0]) tag = x.keys()[0] if tag1 == 0: counts = raw_ta.filter( lambda x: x.split("\t")[2] == "L7D").count()
#get all et enable clients enable_client = [] cmd = 'curl -s -k {}'.format(eturl) try: r = os.popen(cmd).read().strip() if r != '': data = ast.literal_eval(r) if data['code'] == 0: enable_client = data['result']['enable_company_codes'].split(',') except: pass if shopid in enable_client: #spark init if util_hadoop.PathIsExit(os.path.dirname(hdfs)): #get laste ta if not util_hadoop.PathIsExit(ta_parent): ls = util_hadoop.GetPath(ta_parent.rsplit("/",1)[0]) ta_path = max(ls) + "/*" lsdate = max(ls).rsplit("/",1)[1] #the laste hadoop ta date #ta is exist SparkContext.setSystemProperty('spark.executor.cores', '4') SparkContext.setSystemProperty('spark.cores.max', '48') sc = SparkContext(appName="et_groups_inc_calculation_member_{}".format(shopid)) def mapp(line): try: return line.split('\t')[3] except: pass