class UserProfileUpdateWriter(ModeWriter): def __init__(self): self.client = PyMongoClient() self.conn = self.client.getConn() self.modename = "UserProfile" def setClient(self, client): self.client = client self.conn = self.client.getConn() def remove(self, appkey, modename, tm): pass def write(self, data, appkey, modename, modetools, *args, **kwargs): modename = self.modename a = time.time() ip_loc = kwargs["ip_loc"] curDay = kwargs["today"].replace("-", "") # fix_deltaday = getDayDelta(curDay, "20160101") for uid in data: for ip in data[uid]["jhd_ip"]: loc = ip_loc.get(ip, {}) if not loc: continue prov = loc.get("prov", "#") city = loc.get("city", "#") loc_data = {"prov": prov, "city": city} data[uid].setdefault("locs", []).append(loc_data) op = [] for uid in data: locs = data[uid].get("locs", []) if not locs: continue for loc in locs: op.append(UpdateOne({"_id": uid}, {"$addToSet": { "locs": loc }})) try: if op: self.client.bulkWrite(appkey, modename, op) except: print( "Warn: bulkStore 'UserProfile' Rise a error; Switch to Single Mode" ) try: replace_onebyone(data, appkey, modename, self.client) except: import traceback print(traceback.print_exc()) print("UserProfileUpdateWriter cost seconds %.3f" % ((time.time() - a), ))
class UserIP(ModeWriter): ''' { _id: [ip], inctag: [number], timestamp: [number] } ''' def __init__(self, client=None): if client is None: self.client = PyMongoClient() else: self.client = client self.dbname = "jh" self.modename = "UserIP" def setClient(self, client): self.client = client def remove(self, *args, **kwargs): pass def write(self, data, appkey, modename, modetools, *args, **kwargs): modename = self.modename ip_set = set() for key in data: # uid = key for ip in data[key]["jhd_ip"]: ip_set.add(ip) op = [] for ip in ip_set: query_update = { "$set": { "timestamp": time.time() }, "$inc": { "inctag": 1 }, "$addToSet": { "appkey": appkey } } op.append(UpdateOne({"_id": ip}, query_update, upsert=True)) try: if op: self.client.bulkWrite(self.dbname, modename, op) except: print( "Warn: bulkStore 'jh.UserIP' Rise a error; Switch to Single Mode" ) for op_item in op: try: self.client.bulkWrite(self.dbname, modename, [op_item]) except: import traceback print(traceback.print_exc())
class UserEventWriter(ModeWriter): def __init__(self): self.client = PyMongoClient() self.groupwriter = UserEventGroupWriter() self.modename = "UserEvent" def setClient(self, client): self.client = client def remove(self, appkey, modename, tm): modename = self.modename tm = tm.replace("-", "") # 格式化 tm = time.strftime( "%Y%m%d", time.localtime(time.mktime(time.strptime(tm, "%Y%m%d")))) self.client.remove(appkey, modename, {"partition_date": tm}) self.groupwriter.remove(appkey, modename, tm) def write(self, data, appkey, modename, modetools, *args, **kwargs): a = time.time() modename = self.modename op = [] for doc in data: try: # doc["_id"] = ObjectId() doc["partition_date"] = kwargs["today"].replace("-", "") \ if "today" in kwargs else time.strftime("%Y%m%d", time.localtime(time.time()-86400)) jhd_userkey = doc["jhd_userkey"] jhd_ts = doc["jhd_ts"] jhd_eventId = doc["jhd_eventId"] _id = "_".join( map(str, [jhd_userkey, jhd_ts, jhd_eventId[:10]])) doc["_id"] = _id # op.append(InsertOne(doc)) op.append(ReplaceOne({"_id": _id}, doc, True)) except: import traceback print(traceback.print_exc(), doc) try: if op: op = self.client.bulkWrite(appkey, modename, op) except: print( "Warn: bulkStore 'UserProfile' Rise a error; Switch to Single Mode" ) for op_item in op: try: self.client.bulkWrite(appkey, modename, [op_item]) except: import traceback print(traceback.print_exc()) finallyMask(appkey, modename, self.client) self.groupwriter.write(data, appkey, modename, modetools, *args, **kwargs) print("UserEventWriter cost seconds %.3f" % ((time.time() - a), ))
class UserActiveUpdateWriter(ModeWriter): def __init__(self): self.client = PyMongoClient() self.modename = "UserActiveUpdate" def setClient(self, client): self.client = client def remove(self, appkey, modename, tm): pass def write(self, data, appkey, modename, modetools, *args, **kwargs): a = time.time() modename = self.modename curDay = kwargs["today"].replace("-", "") # self.client.getConn()[appkey][modename].remove({}) # data_count = self.client.getConn()[appkey][modename].find({}).count() # if data_count == 0: # data_cur = self.client.getConn()[appkey]["UserActive"].find({"partition_date": "20161114"}, {"jh_uid": 1}) # for item in data_cur: # self.client.getConn()[appkey][modename].insert({"_id": item["jh_uid"]}) activelifeabs = getDayDelta(curDay, "20160101") update_query = {"$addToSet": {"activelifeabs": activelifeabs}} op = [] for key in data: # key is userkey try: uid = key op.append(UpdateOne({"_id": uid}, update_query, True)) except: import traceback print(traceback.print_exc(), key, data[data]) try: if op: op = self.client.bulkWrite(appkey, modename, op) except: import traceback print(traceback.print_exc()) print( "Warn: bulkStore 'UserActive' Rise a error; Switch to Single Mode" ) for op_item in op: try: self.client.bulkWrite(appkey, modename, [op_item]) except: import traceback print(traceback.print_exc()) finallyMask(appkey, modename, self.client) print("UserActiveUpdateWriter cost seconds %.10f" % ((time.time() - a), ))
class IPStorageMG(IPStorage): def __init__(self): self.client = PyMongoClient() self.conn = self.client.getConn() def store(self, _id, **kwargs): op = UpdateOne({"_id": _id}, {"$set": dict({}, **kwargs)}, False) self.client.bulkWrite("jh", "UserIP", [op]) def storeItem(self, _id, key, value): pass def bulkStore(self, data): op = [] for key in data: _id = key op.append( UpdateOne({"_id": _id}, {"$set": dict({}, **data[key])}, False)) self.client.bulkWrite("jh", "UserIP", op)
class ModeWriteMongoUserIP(ModeWriter): def __init__(self, mongo_id=1): self.client = PyMongoClient(mongo_id=mongo_id) self.conn = self.client.getConn() self.dbname = "jh" self.modename = "UserIP" def remove(self, *args, **kwargs): pass def write(self, *args, **kwargs): ''' :param args: 保留参数 :param kwargs: today = 当天日期(yyyy-mm-dd) :return: ''' today = kwargs["today"] cur = self.conn[self.dbname][self.modename].find( { "timestamp": { "$gte": time.time() - 100 }, "province": { "$exists": True }, "city": { "$exists": True }, "appkey": { "$exists": True } }, { "province": True, "city": True, "appkey": True }) update_appkey = {} for item in cur: ip = item["_id"] province = item["province"] city = item["city"] if not province: continue if not city: city = province appkey = item["appkey"] for a_appkey in appkey: update_appkey.setdefault(a_appkey, []).append( UpdateOne( OrderedDict([("tm", today), ("jhd_loc", { "$exista": False }), ("jhd_ip", ip)]), # 需要配合索引使用提高更新速度 { "$addToSet": { "jhd_loc": { "prov": province, "city": city } } })) for a_appkey in update_appkey: a = time.time() self.client.bulkWrite(a_appkey, "uvfile", update_appkey[a_appkey]) print(__name__, time.time() - a, a_appkey, len(update_appkey[a_appkey]))
class UserMapMetaWriter(ModeWriter): def __init__(self, mongo_id=1): self.client = PyMongoClient(mongo_id=mongo_id) self.modename = "UserMapMeta" def setClient(self, client): self.client = client def remove(self, appkey, modename, tm): modename = "UserMapMeta" pass def write(self, data, appkey, modename, modetools, *args, **kwargs): a = time.time() # self.client.getConn()[appkey][modename].remove({}) modename = self.modename op = [] for _data in data: try: if not _data: continue update_query = { "$addToSet": { "fields": { "$each": _data.pop("fields") } } } _id = _data.pop("_id") op.append(UpdateOne({"_id": _id}, update_query, True)) for key in _data["field_elems"].keys(): update_query = { "$addToSet": { "field_elems.%s" % key: { "$each": [ item for item in _data["field_elems"].pop(key) ] } } } # import json # print "-"*100, json.dumps(update_query) # 元素个数 >100 的不更新 op.append( UpdateOne( { "_id": _id, "field_elems.%s.100" % (key, ): { "$exists": False } }, update_query, True)) except: import traceback print(traceback.print_exc(), op) try: if op: op = self.client.bulkWrite(appkey, modename, op) except: print( "Warn: bulkStore 'UserProfile' Rise a error; Switch to Single Mode" ) for op_item in op: try: self.client.bulkWrite(appkey, modename, [op_item]) except: import traceback print(traceback.print_exc()) # self.client.getConn()[appkey][modename].remove({"field_elems": {"$exists": True}}) finallyMask(appkey, modename, self.client) print("UserMapMetaWriter cost seconds %.10f" % ((time.time() - a), ))
class UserProfileWriter(ModeWriter): def __init__(self, mongo_id=1): self.client = PyMongoClient(mongo_id=mongo_id) self.conn = self.client.getConn() self.modename = "UserProfile" def setClient(self, client): self.client = client self.conn = self.client.getConn() def remove(self, appkey, modename, tm): pass def write(self, data, appkey, modename, modetools, *args, **kwargs): a = time.time() curDay = kwargs["today"].replace("-", "") fix_deltaday = getDayDelta(curDay, "20160101") modename = self.modename uids = data.keys() docs = self.client.find(appkey, modename, {"_id": {"$in": uids}}) oldusers = set() for doc in docs: try: # UserProfile _id 为 userkey key = doc["_id"] oldusers.add(key) # 如果新添加数据比首次访问时间要早,对历史数据进行修正处理 if "lastLoginTime" in data[key]: lastLoginTime_new = data[key]["lastLoginTime"][:8] if "firstLoginTime" in doc and "firstLoginTime" in data[key]: activelife = doc.get("activelife", [0]) firstLoginTime_new = data[key]["firstLoginTime"][:8] firstLoginTime_old = doc["firstLoginTime"][:8] if firstLoginTime_new < firstLoginTime_old: firstLoginDelta = getDayDelta(firstLoginTime_old, firstLoginTime_new) doc["activelife"] = map(lambda i: i + firstLoginDelta, activelife) data[key] = modetools.mergeUserProfile(data[key], doc) # 生成用户生命周期数据 firstLoginDay = data[key]["firstLoginTime"][:8] lastLoginDay = data[key]["lastLoginTime"][:8] dayDelta = getDayDelta(lastLoginTime_new, firstLoginDay) data[key].setdefault("activelife", [0]) # 兼容历史数据 if dayDelta not in data[key]["activelife"]: data[key]["activelife"].append(dayDelta) data[key]["activelife"].sort() # 用户绝对活跃数据,起始 日期为 2016-01-01 try: firstlogin_deltaday = getDayDelta(firstLoginDay, "20160101") data[key]["activelifeabs"] = [ firstlogin_deltaday + remain_day for remain_day in data[key]["activelife"] ] except: import traceback print(traceback.print_exc()) except: import traceback print(traceback.print_exc()) # 设置新增版本 # for key in set(uids)-set([item["_id"] for item in docs]): for key in set(uids) - oldusers: data[key]["comever"] = data[key]["ver"] # 用户绝对活跃数据,起始 日期为 2016-01-01 try: firstLoginDay = data[key]["lastLoginTime"][:8] firstlogin_deltaday = getDayDelta(firstLoginDay, "20160101") data[key]["activelifeabs"] = [ firstlogin_deltaday + remain_day for remain_day in data[key]["activelife"] ] except: import traceback print(traceback.print_exc()) op = [] for key in data: op.append(ReplaceOne({"_id": key}, data[key], True)) try: if op: self.client.bulkWrite(appkey, modename, op) except: print( "Warn: bulkStore 'UserProfile' Rise a error; Switch to Single Mode" ) try: replace_onebyone(data, appkey, modename, self.client) except: import traceback print(traceback.print_exc()) finallyMask(appkey, modename, self.client) print("UserProfileWriter cost seconds %.3f" % ((time.time() - a), ))
class UserCrumbsWriter(ModeWriter): def __init__(self, mongo_id=1): self.client = PyMongoClient(mongo_id=mongo_id) self.conn = self.client.getConn() self.modename = "uvfile" # self.store_attachmode = UserIP() self.attachmode_storers = [] try: # self.attachmode_storers = [UserIP(), UserProfileUpdateWriter()] self.attachmode_storers = [UserIP()] except: import traceback print(traceback.print_exc()) def setClient(self, client): self.client = client self.conn = self.client.getConn() def remove(self, appkey, modename, tm): modename = self.modename tm = tm.replace("-", "") tm = time.strftime( "%Y-%m-%d", time.localtime(time.mktime(time.strptime(tm, "%Y%m%d")))) self.client.remove(appkey, modename, {"tm": tm}) def getMeasure(self, activelifeabs, fix_deltaday): activelifeabs = [i for i in activelifeabs if i <= fix_deltaday] measure = { "last7ActiveNum": 0, "last14ActiveNum": 0, "last28ActiveNum": 0, "last30ActiveNum": 0, } for activelifeabs_delta in activelifeabs: delta = fix_deltaday - activelifeabs_delta if delta <= 6: measure["last7ActiveNum"] += 1 if delta <= 13: measure["last14ActiveNum"] += 1 if delta <= 27: measure["last28ActiveNum"] += 1 if delta <= 29: measure["last30ActiveNum"] += 1 return measure def write(self, data, appkey, modename, modetools, *args, **kwargs): modename = self.modename # today = kwargs["today"] if "today" in kwargs else time.strftime("%Y-%m-%d", time.localtime(time.time()-86400)) today = kwargs["today"] today = today.replace("-", "") uids = data.keys() yesterday = time.strftime( "%Y-%m-%d", time.localtime( time.mktime(time.strptime(today, "%Y%m%d")) - 86400)) yyyy_mm_dd = time.strftime( "%Y-%m-%d", time.localtime( time.mktime(time.strptime(yesterday.replace("-", ""), "%Y%m%d")) + 86400)) uvfile = self.client.find( appkey, "uvfile", OrderedDict([("tm", yyyy_mm_dd), ("jhd_userkey", { "$in": uids })])) user_profile = self.client.find(appkey, "UserProfile", {"_id": { "$in": uids }}) ips = set() ip_loc = {} try: for uid in data: ips = ips.union(data[uid].get("jhd_ip")) ip_loc_cur = self.conn["jh"]["UserIP"].find( OrderedDict([("_id", { "$in": list(ips) }), ("province", { "$exists": True }), ("city", { "$exists": True })]), { "province": True, "city": True }) for item in ip_loc_cur: ip = item["_id"] province = item["province"] city = item["city"] if not province: continue if not city: city = province # ip_loc.setdefault(ip, "_".join([province, city])) ip_loc.setdefault(ip, {"prov": province, "city": city}) except: import traceback print traceback.print_exc() # 合并 数据 for doc in uvfile: uid = doc["jhd_userkey"] data[uid] = modetools.mergeUserCrumbs(doc, data[uid]) try: ip_lis = data[uid]["jhd_ip"] data[uid].setdefault("jhd_loc", []) for ip in ip_lis: loc = ip_loc.get(ip, None) if loc and loc not in data[uid]["jhd_loc"]: data[uid]["jhd_loc"].append(loc) # tmp = [] # for item in data[uid]["jhd_loc"]: # if isinstance(item, dict): # tmp.append(item) # data[uid]["jhd_loc"] = tmp except: import traceback print traceback.print_exc() fix_deltaday = getDayDelta(today, "20160101") # lastActiveInterval # firstLoginTime for doc in user_profile: # print("doc", doc["_id"], fix_deltaday, doc.get("activelifeabs", [])) key = doc["_id"] tmp = {} # 获取用户首次登录时间 tmp["firstLoginTime"] = doc.get("firstLoginTime", "unknown") firstloginday = tmp["firstLoginTime"][:8] activelifeabs = doc.get("activelifeabs", []) # 获取用户最近最近活跃信息 tmp["measure"] = self.getMeasure(activelifeabs, fix_deltaday) tmp["measure"]["firstLoginTime"] = tmp["firstLoginTime"] # 更新数据 data[key] = dict(data[key], **tmp) op = [] for key in data: if "_id" not in data[key]: data[key]["_id"] = ObjectId() _id = data[key]["_id"] op.append( ReplaceOne({"_id": _id}, modetools.formatList(data[key]), True)) try: if op: self.client.bulkWrite(appkey, modename, op) except: print( "Warn: bulkStore 'uvfile' Rise a error; Switch to Single Mode") try: replace_onebyone(data, appkey, modename, self.client) except: import traceback print(traceback.print_exc()) finallyMask(appkey, modename, self.client) try: kwargs["ip_loc"] = ip_loc self.store_attachmode(data, appkey, modename, modetools, *args, **kwargs) except: import traceback print traceback.print_exc()