class UserEventWriter(ModeWriter): def __init__(self): self.client = PyMongoClient() self.groupwriter = UserEventGroupWriter() self.modename = "UserEvent" def setClient(self, client): self.client = client def remove(self, appkey, modename, tm): modename = self.modename tm = tm.replace("-", "") # 格式化 tm = time.strftime( "%Y%m%d", time.localtime(time.mktime(time.strptime(tm, "%Y%m%d")))) self.client.remove(appkey, modename, {"partition_date": tm}) self.groupwriter.remove(appkey, modename, tm) def write(self, data, appkey, modename, modetools, *args, **kwargs): a = time.time() modename = self.modename op = [] for doc in data: try: # doc["_id"] = ObjectId() doc["partition_date"] = kwargs["today"].replace("-", "") \ if "today" in kwargs else time.strftime("%Y%m%d", time.localtime(time.time()-86400)) jhd_userkey = doc["jhd_userkey"] jhd_ts = doc["jhd_ts"] jhd_eventId = doc["jhd_eventId"] _id = "_".join( map(str, [jhd_userkey, jhd_ts, jhd_eventId[:10]])) doc["_id"] = _id # op.append(InsertOne(doc)) op.append(ReplaceOne({"_id": _id}, doc, True)) except: import traceback print(traceback.print_exc(), doc) try: if op: op = self.client.bulkWrite(appkey, modename, op) except: print( "Warn: bulkStore 'UserProfile' Rise a error; Switch to Single Mode" ) for op_item in op: try: self.client.bulkWrite(appkey, modename, [op_item]) except: import traceback print(traceback.print_exc()) finallyMask(appkey, modename, self.client) self.groupwriter.write(data, appkey, modename, modetools, *args, **kwargs) print("UserEventWriter cost seconds %.3f" % ((time.time() - a), ))
class UserActiveWriter(ModeWriter): def __init__(self): self.client = PyMongoClient() self.modename = "UserActive" def setClient(self, client): self.client = client def remove(self, appkey, modename, tm): modename = self.modename tm = tm.replace("-", "") # 格式化 tm = time.strftime( "%Y%m%d", time.localtime(time.mktime(time.strptime(tm, "%Y%m%d")))) self.client.remove(appkey, modename, {"partition_date": tm}) # def write(self, appkey, modename, curDay=time.strftime("%Y-%m-%d", time.localtime(time.time()-86400)), *args, **kwargs): def write(self, data, appkey, modename, modetools, *args, **kwargs): a = time.time() modename = self.modename curDay = kwargs["today"].replace( "-", "") if "today" in kwargs else time.strftime( "%Y%m%d", time.localtime(time.time() - 86400)) conn = self.client.getConn() userActiveCollection = conn[appkey][modename] docs = self.client.find(appkey, "UserProfile", {}) yesterday = getDay(curDay, "%Y%m%d", -1) op = [] a = time.time() for doc in docs: try: key = doc["_id"] activelife = doc.get("activelife", [0]) firstLoginTime = doc["firstLoginTime"][:8] login = getDayDelta(curDay, firstLoginTime) in activelife # 查找前一天的用户活跃记录,需要配合索引提升速度db.UserActive.ensureIndex({partition_date: -1, jh_uid: 1}) userActive = userActiveCollection.find_one({ "jh_uid": key, "partition_date": yesterday }) # 构造今天的用户记录 newUserActive = UserActiveBuilder() newUserActive.setJhdUid(key) newUserActive.setPartitionDate(curDay) if userActive is None: newUserActive.setActive([1] if login else [0]) else: userActive["active"].append(1 if login else 0) newUserActive.setActive(userActive["active"]) # 计算衡量指标 newUserActive.setFirstLoginTime(doc["firstLoginTime"]) newUserActive.setLastLoginTime(doc["lastLoginTime"]) op.append( ReplaceOne({ "jh_uid": key, "partition_date": curDay }, newUserActive.builder(), upsert=True)) except: import traceback print(traceback.print_exc(), doc) print("find cost time: %d" % int(time.time() - a)) # print("len(op): ", len(op), "yesterday: ", yesterday) try: if op: userActiveCollection.bulk_write(op) except: import traceback print(traceback.print_exc()) print( "Warn: bulkStore 'UserActive' Rise a error; Switch to Single Mode" ) for op_item in op: try: userActiveCollection.bulk_write([op_item]) except: import traceback print(traceback.print_exc()) finallyMask(appkey, modename, self.client) print("UserActiveWriter cost seconds %.10f" % ((time.time() - a), ))
class UserCrumbsWriter(ModeWriter): def __init__(self, mongo_id=1): self.client = PyMongoClient(mongo_id=mongo_id) self.conn = self.client.getConn() self.modename = "uvfile" # self.store_attachmode = UserIP() self.attachmode_storers = [] try: # self.attachmode_storers = [UserIP(), UserProfileUpdateWriter()] self.attachmode_storers = [UserIP()] except: import traceback print(traceback.print_exc()) def setClient(self, client): self.client = client self.conn = self.client.getConn() def remove(self, appkey, modename, tm): modename = self.modename tm = tm.replace("-", "") tm = time.strftime( "%Y-%m-%d", time.localtime(time.mktime(time.strptime(tm, "%Y%m%d")))) self.client.remove(appkey, modename, {"tm": tm}) def getMeasure(self, activelifeabs, fix_deltaday): activelifeabs = [i for i in activelifeabs if i <= fix_deltaday] measure = { "last7ActiveNum": 0, "last14ActiveNum": 0, "last28ActiveNum": 0, "last30ActiveNum": 0, } for activelifeabs_delta in activelifeabs: delta = fix_deltaday - activelifeabs_delta if delta <= 6: measure["last7ActiveNum"] += 1 if delta <= 13: measure["last14ActiveNum"] += 1 if delta <= 27: measure["last28ActiveNum"] += 1 if delta <= 29: measure["last30ActiveNum"] += 1 return measure def write(self, data, appkey, modename, modetools, *args, **kwargs): modename = self.modename # today = kwargs["today"] if "today" in kwargs else time.strftime("%Y-%m-%d", time.localtime(time.time()-86400)) today = kwargs["today"] today = today.replace("-", "") uids = data.keys() yesterday = time.strftime( "%Y-%m-%d", time.localtime( time.mktime(time.strptime(today, "%Y%m%d")) - 86400)) yyyy_mm_dd = time.strftime( "%Y-%m-%d", time.localtime( time.mktime(time.strptime(yesterday.replace("-", ""), "%Y%m%d")) + 86400)) uvfile = self.client.find( appkey, "uvfile", OrderedDict([("tm", yyyy_mm_dd), ("jhd_userkey", { "$in": uids })])) user_profile = self.client.find(appkey, "UserProfile", {"_id": { "$in": uids }}) ips = set() ip_loc = {} try: for uid in data: ips = ips.union(data[uid].get("jhd_ip")) ip_loc_cur = self.conn["jh"]["UserIP"].find( OrderedDict([("_id", { "$in": list(ips) }), ("province", { "$exists": True }), ("city", { "$exists": True })]), { "province": True, "city": True }) for item in ip_loc_cur: ip = item["_id"] province = item["province"] city = item["city"] if not province: continue if not city: city = province # ip_loc.setdefault(ip, "_".join([province, city])) ip_loc.setdefault(ip, {"prov": province, "city": city}) except: import traceback print traceback.print_exc() # 合并 数据 for doc in uvfile: uid = doc["jhd_userkey"] data[uid] = modetools.mergeUserCrumbs(doc, data[uid]) try: ip_lis = data[uid]["jhd_ip"] data[uid].setdefault("jhd_loc", []) for ip in ip_lis: loc = ip_loc.get(ip, None) if loc and loc not in data[uid]["jhd_loc"]: data[uid]["jhd_loc"].append(loc) # tmp = [] # for item in data[uid]["jhd_loc"]: # if isinstance(item, dict): # tmp.append(item) # data[uid]["jhd_loc"] = tmp except: import traceback print traceback.print_exc() fix_deltaday = getDayDelta(today, "20160101") # lastActiveInterval # firstLoginTime for doc in user_profile: # print("doc", doc["_id"], fix_deltaday, doc.get("activelifeabs", [])) key = doc["_id"] tmp = {} # 获取用户首次登录时间 tmp["firstLoginTime"] = doc.get("firstLoginTime", "unknown") firstloginday = tmp["firstLoginTime"][:8] activelifeabs = doc.get("activelifeabs", []) # 获取用户最近最近活跃信息 tmp["measure"] = self.getMeasure(activelifeabs, fix_deltaday) tmp["measure"]["firstLoginTime"] = tmp["firstLoginTime"] # 更新数据 data[key] = dict(data[key], **tmp) op = [] for key in data: if "_id" not in data[key]: data[key]["_id"] = ObjectId() _id = data[key]["_id"] op.append( ReplaceOne({"_id": _id}, modetools.formatList(data[key]), True)) try: if op: self.client.bulkWrite(appkey, modename, op) except: print( "Warn: bulkStore 'uvfile' Rise a error; Switch to Single Mode") try: replace_onebyone(data, appkey, modename, self.client) except: import traceback print(traceback.print_exc()) finallyMask(appkey, modename, self.client) try: kwargs["ip_loc"] = ip_loc self.store_attachmode(data, appkey, modename, modetools, *args, **kwargs) except: import traceback print traceback.print_exc()