class UserProfileUpdateWriter(ModeWriter): def __init__(self): self.client = PyMongoClient() self.conn = self.client.getConn() self.modename = "UserProfile" def setClient(self, client): self.client = client self.conn = self.client.getConn() def remove(self, appkey, modename, tm): pass def write(self, data, appkey, modename, modetools, *args, **kwargs): modename = self.modename a = time.time() ip_loc = kwargs["ip_loc"] curDay = kwargs["today"].replace("-", "") # fix_deltaday = getDayDelta(curDay, "20160101") for uid in data: for ip in data[uid]["jhd_ip"]: loc = ip_loc.get(ip, {}) if not loc: continue prov = loc.get("prov", "#") city = loc.get("city", "#") loc_data = {"prov": prov, "city": city} data[uid].setdefault("locs", []).append(loc_data) op = [] for uid in data: locs = data[uid].get("locs", []) if not locs: continue for loc in locs: op.append(UpdateOne({"_id": uid}, {"$addToSet": { "locs": loc }})) try: if op: self.client.bulkWrite(appkey, modename, op) except: print( "Warn: bulkStore 'UserProfile' Rise a error; Switch to Single Mode" ) try: replace_onebyone(data, appkey, modename, self.client) except: import traceback print(traceback.print_exc()) print("UserProfileUpdateWriter cost seconds %.3f" % ((time.time() - a), ))
class UserDefineMapMeta(object): def __init__(self): pass def get_mongoid(self, appkey): m_client = MysqlClient() self.mongo_id = m_client.get_mongoid(appkey)[0] m_client.closeMysql() def getData(self, appkey, *args, **kwargs): modename = "UserMapMeta" self.get_mongoid(appkey) self.client = PyMongoClient(self.mongo_id) cur = self.client.getConn()[appkey][modename].find({}) result = {} # format: { "_id" : "ac7", "fields" : [ { "type" : "string", "name" : "type" } ], "field_elems" : { "type" : [ "分类视图" ] } } for item in cur: eventid = item["_id"] fields = item["fields"] field_elems = item.get("field_elems", {}) # 保存 type/elems for field in fields: try: mapkey = field["name"] mapkey_type = field["type"] elems = field_elems.get(mapkey, []) elems_tmp = [] for elem in elems: if isinstance(elem, str) or isinstance(elem, unicode): # 排除URL类型属性 if "http" in elem: continue if len(elem) >= 60: continue elems_tmp.append(elem) if len(elems_tmp) >= 100: break result.setdefault(eventid, {}).setdefault(mapkey, { "type": mapkey_type, "elems": elems_tmp, }) except: continue # 不包含map的情况 if bool(fields) == False: result.setdefault(eventid, {}) return result
class IPStorageMG(IPStorage): def __init__(self): self.client = PyMongoClient() self.conn = self.client.getConn() def store(self, _id, **kwargs): op = UpdateOne({"_id": _id}, {"$set": dict({}, **kwargs)}, False) self.client.bulkWrite("jh", "UserIP", [op]) def storeItem(self, _id, key, value): pass def bulkStore(self, data): op = [] for key in data: _id = key op.append( UpdateOne({"_id": _id}, {"$set": dict({}, **data[key])}, False)) self.client.bulkWrite("jh", "UserIP", op)
class ModeWriteMongoUserIP(ModeWriter): def __init__(self, mongo_id=1): self.client = PyMongoClient(mongo_id=mongo_id) self.conn = self.client.getConn() self.dbname = "jh" self.modename = "UserIP" def remove(self, *args, **kwargs): pass def write(self, *args, **kwargs): ''' :param args: 保留参数 :param kwargs: today = 当天日期(yyyy-mm-dd) :return: ''' today = kwargs["today"] cur = self.conn[self.dbname][self.modename].find( { "timestamp": { "$gte": time.time() - 100 }, "province": { "$exists": True }, "city": { "$exists": True }, "appkey": { "$exists": True } }, { "province": True, "city": True, "appkey": True }) update_appkey = {} for item in cur: ip = item["_id"] province = item["province"] city = item["city"] if not province: continue if not city: city = province appkey = item["appkey"] for a_appkey in appkey: update_appkey.setdefault(a_appkey, []).append( UpdateOne( OrderedDict([("tm", today), ("jhd_loc", { "$exista": False }), ("jhd_ip", ip)]), # 需要配合索引使用提高更新速度 { "$addToSet": { "jhd_loc": { "prov": province, "city": city } } })) for a_appkey in update_appkey: a = time.time() self.client.bulkWrite(a_appkey, "uvfile", update_appkey[a_appkey]) print(__name__, time.time() - a, a_appkey, len(update_appkey[a_appkey]))
class UserActiveWriter(ModeWriter): def __init__(self): self.client = PyMongoClient() self.modename = "UserActive" def setClient(self, client): self.client = client def remove(self, appkey, modename, tm): modename = self.modename tm = tm.replace("-", "") # 格式化 tm = time.strftime( "%Y%m%d", time.localtime(time.mktime(time.strptime(tm, "%Y%m%d")))) self.client.remove(appkey, modename, {"partition_date": tm}) # def write(self, appkey, modename, curDay=time.strftime("%Y-%m-%d", time.localtime(time.time()-86400)), *args, **kwargs): def write(self, data, appkey, modename, modetools, *args, **kwargs): a = time.time() modename = self.modename curDay = kwargs["today"].replace( "-", "") if "today" in kwargs else time.strftime( "%Y%m%d", time.localtime(time.time() - 86400)) conn = self.client.getConn() userActiveCollection = conn[appkey][modename] docs = self.client.find(appkey, "UserProfile", {}) yesterday = getDay(curDay, "%Y%m%d", -1) op = [] a = time.time() for doc in docs: try: key = doc["_id"] activelife = doc.get("activelife", [0]) firstLoginTime = doc["firstLoginTime"][:8] login = getDayDelta(curDay, firstLoginTime) in activelife # 查找前一天的用户活跃记录,需要配合索引提升速度db.UserActive.ensureIndex({partition_date: -1, jh_uid: 1}) userActive = userActiveCollection.find_one({ "jh_uid": key, "partition_date": yesterday }) # 构造今天的用户记录 newUserActive = UserActiveBuilder() newUserActive.setJhdUid(key) newUserActive.setPartitionDate(curDay) if userActive is None: newUserActive.setActive([1] if login else [0]) else: userActive["active"].append(1 if login else 0) newUserActive.setActive(userActive["active"]) # 计算衡量指标 newUserActive.setFirstLoginTime(doc["firstLoginTime"]) newUserActive.setLastLoginTime(doc["lastLoginTime"]) op.append( ReplaceOne({ "jh_uid": key, "partition_date": curDay }, newUserActive.builder(), upsert=True)) except: import traceback print(traceback.print_exc(), doc) print("find cost time: %d" % int(time.time() - a)) # print("len(op): ", len(op), "yesterday: ", yesterday) try: if op: userActiveCollection.bulk_write(op) except: import traceback print(traceback.print_exc()) print( "Warn: bulkStore 'UserActive' Rise a error; Switch to Single Mode" ) for op_item in op: try: userActiveCollection.bulk_write([op_item]) except: import traceback print(traceback.print_exc()) finallyMask(appkey, modename, self.client) print("UserActiveWriter cost seconds %.10f" % ((time.time() - a), ))
class UserProfileWriter(ModeWriter): def __init__(self, mongo_id=1): self.client = PyMongoClient(mongo_id=mongo_id) self.conn = self.client.getConn() self.modename = "UserProfile" def setClient(self, client): self.client = client self.conn = self.client.getConn() def remove(self, appkey, modename, tm): pass def write(self, data, appkey, modename, modetools, *args, **kwargs): a = time.time() curDay = kwargs["today"].replace("-", "") fix_deltaday = getDayDelta(curDay, "20160101") modename = self.modename uids = data.keys() docs = self.client.find(appkey, modename, {"_id": {"$in": uids}}) oldusers = set() for doc in docs: try: # UserProfile _id 为 userkey key = doc["_id"] oldusers.add(key) # 如果新添加数据比首次访问时间要早,对历史数据进行修正处理 if "lastLoginTime" in data[key]: lastLoginTime_new = data[key]["lastLoginTime"][:8] if "firstLoginTime" in doc and "firstLoginTime" in data[key]: activelife = doc.get("activelife", [0]) firstLoginTime_new = data[key]["firstLoginTime"][:8] firstLoginTime_old = doc["firstLoginTime"][:8] if firstLoginTime_new < firstLoginTime_old: firstLoginDelta = getDayDelta(firstLoginTime_old, firstLoginTime_new) doc["activelife"] = map(lambda i: i + firstLoginDelta, activelife) data[key] = modetools.mergeUserProfile(data[key], doc) # 生成用户生命周期数据 firstLoginDay = data[key]["firstLoginTime"][:8] lastLoginDay = data[key]["lastLoginTime"][:8] dayDelta = getDayDelta(lastLoginTime_new, firstLoginDay) data[key].setdefault("activelife", [0]) # 兼容历史数据 if dayDelta not in data[key]["activelife"]: data[key]["activelife"].append(dayDelta) data[key]["activelife"].sort() # 用户绝对活跃数据,起始 日期为 2016-01-01 try: firstlogin_deltaday = getDayDelta(firstLoginDay, "20160101") data[key]["activelifeabs"] = [ firstlogin_deltaday + remain_day for remain_day in data[key]["activelife"] ] except: import traceback print(traceback.print_exc()) except: import traceback print(traceback.print_exc()) # 设置新增版本 # for key in set(uids)-set([item["_id"] for item in docs]): for key in set(uids) - oldusers: data[key]["comever"] = data[key]["ver"] # 用户绝对活跃数据,起始 日期为 2016-01-01 try: firstLoginDay = data[key]["lastLoginTime"][:8] firstlogin_deltaday = getDayDelta(firstLoginDay, "20160101") data[key]["activelifeabs"] = [ firstlogin_deltaday + remain_day for remain_day in data[key]["activelife"] ] except: import traceback print(traceback.print_exc()) op = [] for key in data: op.append(ReplaceOne({"_id": key}, data[key], True)) try: if op: self.client.bulkWrite(appkey, modename, op) except: print( "Warn: bulkStore 'UserProfile' Rise a error; Switch to Single Mode" ) try: replace_onebyone(data, appkey, modename, self.client) except: import traceback print(traceback.print_exc()) finallyMask(appkey, modename, self.client) print("UserProfileWriter cost seconds %.3f" % ((time.time() - a), ))
class UserCrumbsWriter(ModeWriter): def __init__(self, mongo_id=1): self.client = PyMongoClient(mongo_id=mongo_id) self.conn = self.client.getConn() self.modename = "uvfile" # self.store_attachmode = UserIP() self.attachmode_storers = [] try: # self.attachmode_storers = [UserIP(), UserProfileUpdateWriter()] self.attachmode_storers = [UserIP()] except: import traceback print(traceback.print_exc()) def setClient(self, client): self.client = client self.conn = self.client.getConn() def remove(self, appkey, modename, tm): modename = self.modename tm = tm.replace("-", "") tm = time.strftime( "%Y-%m-%d", time.localtime(time.mktime(time.strptime(tm, "%Y%m%d")))) self.client.remove(appkey, modename, {"tm": tm}) def getMeasure(self, activelifeabs, fix_deltaday): activelifeabs = [i for i in activelifeabs if i <= fix_deltaday] measure = { "last7ActiveNum": 0, "last14ActiveNum": 0, "last28ActiveNum": 0, "last30ActiveNum": 0, } for activelifeabs_delta in activelifeabs: delta = fix_deltaday - activelifeabs_delta if delta <= 6: measure["last7ActiveNum"] += 1 if delta <= 13: measure["last14ActiveNum"] += 1 if delta <= 27: measure["last28ActiveNum"] += 1 if delta <= 29: measure["last30ActiveNum"] += 1 return measure def write(self, data, appkey, modename, modetools, *args, **kwargs): modename = self.modename # today = kwargs["today"] if "today" in kwargs else time.strftime("%Y-%m-%d", time.localtime(time.time()-86400)) today = kwargs["today"] today = today.replace("-", "") uids = data.keys() yesterday = time.strftime( "%Y-%m-%d", time.localtime( time.mktime(time.strptime(today, "%Y%m%d")) - 86400)) yyyy_mm_dd = time.strftime( "%Y-%m-%d", time.localtime( time.mktime(time.strptime(yesterday.replace("-", ""), "%Y%m%d")) + 86400)) uvfile = self.client.find( appkey, "uvfile", OrderedDict([("tm", yyyy_mm_dd), ("jhd_userkey", { "$in": uids })])) user_profile = self.client.find(appkey, "UserProfile", {"_id": { "$in": uids }}) ips = set() ip_loc = {} try: for uid in data: ips = ips.union(data[uid].get("jhd_ip")) ip_loc_cur = self.conn["jh"]["UserIP"].find( OrderedDict([("_id", { "$in": list(ips) }), ("province", { "$exists": True }), ("city", { "$exists": True })]), { "province": True, "city": True }) for item in ip_loc_cur: ip = item["_id"] province = item["province"] city = item["city"] if not province: continue if not city: city = province # ip_loc.setdefault(ip, "_".join([province, city])) ip_loc.setdefault(ip, {"prov": province, "city": city}) except: import traceback print traceback.print_exc() # 合并 数据 for doc in uvfile: uid = doc["jhd_userkey"] data[uid] = modetools.mergeUserCrumbs(doc, data[uid]) try: ip_lis = data[uid]["jhd_ip"] data[uid].setdefault("jhd_loc", []) for ip in ip_lis: loc = ip_loc.get(ip, None) if loc and loc not in data[uid]["jhd_loc"]: data[uid]["jhd_loc"].append(loc) # tmp = [] # for item in data[uid]["jhd_loc"]: # if isinstance(item, dict): # tmp.append(item) # data[uid]["jhd_loc"] = tmp except: import traceback print traceback.print_exc() fix_deltaday = getDayDelta(today, "20160101") # lastActiveInterval # firstLoginTime for doc in user_profile: # print("doc", doc["_id"], fix_deltaday, doc.get("activelifeabs", [])) key = doc["_id"] tmp = {} # 获取用户首次登录时间 tmp["firstLoginTime"] = doc.get("firstLoginTime", "unknown") firstloginday = tmp["firstLoginTime"][:8] activelifeabs = doc.get("activelifeabs", []) # 获取用户最近最近活跃信息 tmp["measure"] = self.getMeasure(activelifeabs, fix_deltaday) tmp["measure"]["firstLoginTime"] = tmp["firstLoginTime"] # 更新数据 data[key] = dict(data[key], **tmp) op = [] for key in data: if "_id" not in data[key]: data[key]["_id"] = ObjectId() _id = data[key]["_id"] op.append( ReplaceOne({"_id": _id}, modetools.formatList(data[key]), True)) try: if op: self.client.bulkWrite(appkey, modename, op) except: print( "Warn: bulkStore 'uvfile' Rise a error; Switch to Single Mode") try: replace_onebyone(data, appkey, modename, self.client) except: import traceback print(traceback.print_exc()) finallyMask(appkey, modename, self.client) try: kwargs["ip_loc"] = ip_loc self.store_attachmode(data, appkey, modename, modetools, *args, **kwargs) except: import traceback print traceback.print_exc()
def get_mongo_conn(appkey): m_client = MysqlClient() mongo_id = m_client.get_mongoid(appkey)[0] m_client.closeMysql() conn = PyMongoClient(mongo_id=mongo_id) return conn.getConn()