コード例 #1
0
class UserProfileUpdateWriter(ModeWriter):
    def __init__(self):
        self.client = PyMongoClient()
        self.conn = self.client.getConn()
        self.modename = "UserProfile"

    def setClient(self, client):
        self.client = client
        self.conn = self.client.getConn()

    def remove(self, appkey, modename, tm):
        pass

    def write(self, data, appkey, modename, modetools, *args, **kwargs):
        modename = self.modename
        a = time.time()

        ip_loc = kwargs["ip_loc"]
        curDay = kwargs["today"].replace("-", "")
        # fix_deltaday = getDayDelta(curDay, "20160101")
        for uid in data:
            for ip in data[uid]["jhd_ip"]:
                loc = ip_loc.get(ip, {})
                if not loc:
                    continue
                prov = loc.get("prov", "#")
                city = loc.get("city", "#")

                loc_data = {"prov": prov, "city": city}

                data[uid].setdefault("locs", []).append(loc_data)

        op = []
        for uid in data:
            locs = data[uid].get("locs", [])
            if not locs:
                continue
            for loc in locs:
                op.append(UpdateOne({"_id": uid}, {"$addToSet": {
                    "locs": loc
                }}))

        try:
            if op:
                self.client.bulkWrite(appkey, modename, op)
        except:
            print(
                "Warn: bulkStore 'UserProfile' Rise a error; Switch to Single Mode"
            )
            try:
                replace_onebyone(data, appkey, modename, self.client)
            except:
                import traceback
                print(traceback.print_exc())
        print("UserProfileUpdateWriter cost seconds %.3f" %
              ((time.time() - a), ))
コード例 #2
0
class UserIP(ModeWriter):
    '''
    {
        _id: [ip],
        inctag: [number],
        timestamp: [number]
    }
    '''
    def __init__(self, client=None):
        if client is None:
            self.client = PyMongoClient()
        else:
            self.client = client
        self.dbname = "jh"
        self.modename = "UserIP"

    def setClient(self, client):
        self.client = client

    def remove(self, *args, **kwargs):
        pass

    def write(self, data, appkey, modename, modetools, *args, **kwargs):
        modename = self.modename
        ip_set = set()
        for key in data:
            # uid = key
            for ip in data[key]["jhd_ip"]:
                ip_set.add(ip)
        op = []
        for ip in ip_set:
            query_update = {
                "$set": {
                    "timestamp": time.time()
                },
                "$inc": {
                    "inctag": 1
                },
                "$addToSet": {
                    "appkey": appkey
                }
            }
            op.append(UpdateOne({"_id": ip}, query_update, upsert=True))
        try:
            if op:
                self.client.bulkWrite(self.dbname, modename, op)
        except:
            print(
                "Warn: bulkStore 'jh.UserIP' Rise a error; Switch to Single Mode"
            )
            for op_item in op:
                try:
                    self.client.bulkWrite(self.dbname, modename, [op_item])
                except:
                    import traceback
                    print(traceback.print_exc())
コード例 #3
0
class UserEventWriter(ModeWriter):
    def __init__(self):
        self.client = PyMongoClient()
        self.groupwriter = UserEventGroupWriter()
        self.modename = "UserEvent"

    def setClient(self, client):
        self.client = client

    def remove(self, appkey, modename, tm):
        modename = self.modename
        tm = tm.replace("-", "")
        # 格式化
        tm = time.strftime(
            "%Y%m%d", time.localtime(time.mktime(time.strptime(tm, "%Y%m%d"))))
        self.client.remove(appkey, modename, {"partition_date": tm})
        self.groupwriter.remove(appkey, modename, tm)

    def write(self, data, appkey, modename, modetools, *args, **kwargs):
        a = time.time()
        modename = self.modename
        op = []
        for doc in data:
            try:
                # doc["_id"] = ObjectId()
                doc["partition_date"] = kwargs["today"].replace("-", "") \
                    if "today" in kwargs else time.strftime("%Y%m%d", time.localtime(time.time()-86400))
                jhd_userkey = doc["jhd_userkey"]
                jhd_ts = doc["jhd_ts"]
                jhd_eventId = doc["jhd_eventId"]
                _id = "_".join(
                    map(str, [jhd_userkey, jhd_ts, jhd_eventId[:10]]))
                doc["_id"] = _id
                # op.append(InsertOne(doc))
                op.append(ReplaceOne({"_id": _id}, doc, True))
            except:
                import traceback
                print(traceback.print_exc(), doc)
        try:
            if op:
                op = self.client.bulkWrite(appkey, modename, op)
        except:
            print(
                "Warn: bulkStore 'UserProfile' Rise a error; Switch to Single Mode"
            )
            for op_item in op:
                try:
                    self.client.bulkWrite(appkey, modename, [op_item])
                except:
                    import traceback
                    print(traceback.print_exc())
        finallyMask(appkey, modename, self.client)
        self.groupwriter.write(data, appkey, modename, modetools, *args,
                               **kwargs)
        print("UserEventWriter cost seconds %.3f" % ((time.time() - a), ))
コード例 #4
0
class UserActiveUpdateWriter(ModeWriter):
    def __init__(self):
        self.client = PyMongoClient()
        self.modename = "UserActiveUpdate"

    def setClient(self, client):
        self.client = client

    def remove(self, appkey, modename, tm):
        pass

    def write(self, data, appkey, modename, modetools, *args, **kwargs):
        a = time.time()
        modename = self.modename
        curDay = kwargs["today"].replace("-", "")

        # self.client.getConn()[appkey][modename].remove({})
        # data_count = self.client.getConn()[appkey][modename].find({}).count()
        # if data_count == 0:
        #     data_cur = self.client.getConn()[appkey]["UserActive"].find({"partition_date": "20161114"}, {"jh_uid": 1})
        #     for item in data_cur:
        #         self.client.getConn()[appkey][modename].insert({"_id": item["jh_uid"]})

        activelifeabs = getDayDelta(curDay, "20160101")
        update_query = {"$addToSet": {"activelifeabs": activelifeabs}}
        op = []
        for key in data:
            # key is userkey
            try:
                uid = key
                op.append(UpdateOne({"_id": uid}, update_query, True))
            except:
                import traceback
                print(traceback.print_exc(), key, data[data])
        try:
            if op:
                op = self.client.bulkWrite(appkey, modename, op)
        except:
            import traceback
            print(traceback.print_exc())
            print(
                "Warn: bulkStore 'UserActive' Rise a error; Switch to Single Mode"
            )
            for op_item in op:
                try:
                    self.client.bulkWrite(appkey, modename, [op_item])
                except:
                    import traceback
                    print(traceback.print_exc())
        finallyMask(appkey, modename, self.client)
        print("UserActiveUpdateWriter cost seconds %.10f" %
              ((time.time() - a), ))
コード例 #5
0
class IPStorageMG(IPStorage):
    def __init__(self):
        self.client = PyMongoClient()
        self.conn = self.client.getConn()

    def store(self, _id, **kwargs):
        op = UpdateOne({"_id": _id}, {"$set": dict({}, **kwargs)}, False)
        self.client.bulkWrite("jh", "UserIP", [op])

    def storeItem(self, _id, key, value):
        pass

    def bulkStore(self, data):
        op = []
        for key in data:
            _id = key
            op.append(
                UpdateOne({"_id": _id}, {"$set": dict({}, **data[key])},
                          False))
        self.client.bulkWrite("jh", "UserIP", op)
コード例 #6
0
class ModeWriteMongoUserIP(ModeWriter):
    def __init__(self, mongo_id=1):
        self.client = PyMongoClient(mongo_id=mongo_id)
        self.conn = self.client.getConn()
        self.dbname = "jh"
        self.modename = "UserIP"

    def remove(self, *args, **kwargs):
        pass

    def write(self, *args, **kwargs):
        '''
        :param args: 保留参数
        :param kwargs: today = 当天日期(yyyy-mm-dd)
        :return:
        '''
        today = kwargs["today"]
        cur = self.conn[self.dbname][self.modename].find(
            {
                "timestamp": {
                    "$gte": time.time() - 100
                },
                "province": {
                    "$exists": True
                },
                "city": {
                    "$exists": True
                },
                "appkey": {
                    "$exists": True
                }
            }, {
                "province": True,
                "city": True,
                "appkey": True
            })
        update_appkey = {}
        for item in cur:
            ip = item["_id"]
            province = item["province"]
            city = item["city"]
            if not province:
                continue
            if not city:
                city = province
            appkey = item["appkey"]
            for a_appkey in appkey:
                update_appkey.setdefault(a_appkey, []).append(
                    UpdateOne(
                        OrderedDict([("tm", today),
                                     ("jhd_loc", {
                                         "$exista": False
                                     }), ("jhd_ip", ip)]),  # 需要配合索引使用提高更新速度
                        {
                            "$addToSet": {
                                "jhd_loc": {
                                    "prov": province,
                                    "city": city
                                }
                            }
                        }))
        for a_appkey in update_appkey:
            a = time.time()
            self.client.bulkWrite(a_appkey, "uvfile", update_appkey[a_appkey])
            print(__name__,
                  time.time() - a, a_appkey, len(update_appkey[a_appkey]))
コード例 #7
0
class UserMapMetaWriter(ModeWriter):
    def __init__(self, mongo_id=1):
        self.client = PyMongoClient(mongo_id=mongo_id)
        self.modename = "UserMapMeta"

    def setClient(self, client):
        self.client = client

    def remove(self, appkey, modename, tm):
        modename = "UserMapMeta"
        pass

    def write(self, data, appkey, modename, modetools, *args, **kwargs):
        a = time.time()
        # self.client.getConn()[appkey][modename].remove({})
        modename = self.modename
        op = []
        for _data in data:
            try:
                if not _data:
                    continue
                update_query = {
                    "$addToSet": {
                        "fields": {
                            "$each": _data.pop("fields")
                        }
                    }
                }
                _id = _data.pop("_id")
                op.append(UpdateOne({"_id": _id}, update_query, True))
                for key in _data["field_elems"].keys():
                    update_query = {
                        "$addToSet": {
                            "field_elems.%s" % key: {
                                "$each": [
                                    item
                                    for item in _data["field_elems"].pop(key)
                                ]
                            }
                        }
                    }
                    # import json
                    # print "-"*100, json.dumps(update_query)
                    # 元素个数 >100 的不更新
                    op.append(
                        UpdateOne(
                            {
                                "_id": _id,
                                "field_elems.%s.100" % (key, ): {
                                    "$exists": False
                                }
                            }, update_query, True))
            except:
                import traceback
                print(traceback.print_exc(), op)
        try:
            if op:
                op = self.client.bulkWrite(appkey, modename, op)
        except:
            print(
                "Warn: bulkStore 'UserProfile' Rise a error; Switch to Single Mode"
            )
            for op_item in op:
                try:
                    self.client.bulkWrite(appkey, modename, [op_item])
                except:
                    import traceback
                    print(traceback.print_exc())
        # self.client.getConn()[appkey][modename].remove({"field_elems": {"$exists": True}})
        finallyMask(appkey, modename, self.client)
        print("UserMapMetaWriter cost seconds %.10f" % ((time.time() - a), ))
コード例 #8
0
class UserProfileWriter(ModeWriter):
    def __init__(self, mongo_id=1):
        self.client = PyMongoClient(mongo_id=mongo_id)
        self.conn = self.client.getConn()
        self.modename = "UserProfile"

    def setClient(self, client):
        self.client = client
        self.conn = self.client.getConn()

    def remove(self, appkey, modename, tm):
        pass

    def write(self, data, appkey, modename, modetools, *args, **kwargs):
        a = time.time()
        curDay = kwargs["today"].replace("-", "")
        fix_deltaday = getDayDelta(curDay, "20160101")
        modename = self.modename
        uids = data.keys()

        docs = self.client.find(appkey, modename, {"_id": {"$in": uids}})

        oldusers = set()
        for doc in docs:
            try:
                # UserProfile _id 为 userkey
                key = doc["_id"]
                oldusers.add(key)
                # 如果新添加数据比首次访问时间要早,对历史数据进行修正处理
                if "lastLoginTime" in data[key]:
                    lastLoginTime_new = data[key]["lastLoginTime"][:8]
                if "firstLoginTime" in doc and "firstLoginTime" in data[key]:
                    activelife = doc.get("activelife", [0])
                    firstLoginTime_new = data[key]["firstLoginTime"][:8]
                    firstLoginTime_old = doc["firstLoginTime"][:8]
                    if firstLoginTime_new < firstLoginTime_old:
                        firstLoginDelta = getDayDelta(firstLoginTime_old,
                                                      firstLoginTime_new)
                        doc["activelife"] = map(lambda i: i + firstLoginDelta,
                                                activelife)

                data[key] = modetools.mergeUserProfile(data[key], doc)
                # 生成用户生命周期数据
                firstLoginDay = data[key]["firstLoginTime"][:8]
                lastLoginDay = data[key]["lastLoginTime"][:8]
                dayDelta = getDayDelta(lastLoginTime_new, firstLoginDay)
                data[key].setdefault("activelife", [0])  # 兼容历史数据
                if dayDelta not in data[key]["activelife"]:
                    data[key]["activelife"].append(dayDelta)
                data[key]["activelife"].sort()
                # 用户绝对活跃数据,起始 日期为 2016-01-01
                try:
                    firstlogin_deltaday = getDayDelta(firstLoginDay,
                                                      "20160101")
                    data[key]["activelifeabs"] = [
                        firstlogin_deltaday + remain_day
                        for remain_day in data[key]["activelife"]
                    ]
                except:
                    import traceback
                    print(traceback.print_exc())
            except:
                import traceback
                print(traceback.print_exc())
        # 设置新增版本
        # for key in set(uids)-set([item["_id"] for item in docs]):
        for key in set(uids) - oldusers:
            data[key]["comever"] = data[key]["ver"]
            # 用户绝对活跃数据,起始 日期为 2016-01-01
            try:
                firstLoginDay = data[key]["lastLoginTime"][:8]
                firstlogin_deltaday = getDayDelta(firstLoginDay, "20160101")
                data[key]["activelifeabs"] = [
                    firstlogin_deltaday + remain_day
                    for remain_day in data[key]["activelife"]
                ]
            except:
                import traceback
                print(traceback.print_exc())

        op = []
        for key in data:
            op.append(ReplaceOne({"_id": key}, data[key], True))
        try:
            if op:
                self.client.bulkWrite(appkey, modename, op)
        except:
            print(
                "Warn: bulkStore 'UserProfile' Rise a error; Switch to Single Mode"
            )
            try:
                replace_onebyone(data, appkey, modename, self.client)
            except:
                import traceback
                print(traceback.print_exc())
        finallyMask(appkey, modename, self.client)
        print("UserProfileWriter cost seconds %.3f" % ((time.time() - a), ))
コード例 #9
0
class UserCrumbsWriter(ModeWriter):
    def __init__(self, mongo_id=1):
        self.client = PyMongoClient(mongo_id=mongo_id)
        self.conn = self.client.getConn()
        self.modename = "uvfile"
        # self.store_attachmode = UserIP()
        self.attachmode_storers = []
        try:
            # self.attachmode_storers = [UserIP(), UserProfileUpdateWriter()]
            self.attachmode_storers = [UserIP()]
        except:
            import traceback
            print(traceback.print_exc())

    def setClient(self, client):
        self.client = client
        self.conn = self.client.getConn()

    def remove(self, appkey, modename, tm):
        modename = self.modename
        tm = tm.replace("-", "")
        tm = time.strftime(
            "%Y-%m-%d", time.localtime(time.mktime(time.strptime(tm,
                                                                 "%Y%m%d"))))
        self.client.remove(appkey, modename, {"tm": tm})

    def getMeasure(self, activelifeabs, fix_deltaday):
        activelifeabs = [i for i in activelifeabs if i <= fix_deltaday]
        measure = {
            "last7ActiveNum": 0,
            "last14ActiveNum": 0,
            "last28ActiveNum": 0,
            "last30ActiveNum": 0,
        }
        for activelifeabs_delta in activelifeabs:
            delta = fix_deltaday - activelifeabs_delta
            if delta <= 6:
                measure["last7ActiveNum"] += 1
            if delta <= 13:
                measure["last14ActiveNum"] += 1
            if delta <= 27:
                measure["last28ActiveNum"] += 1
            if delta <= 29:
                measure["last30ActiveNum"] += 1
        return measure

    def write(self, data, appkey, modename, modetools, *args, **kwargs):
        modename = self.modename
        # today = kwargs["today"] if "today" in kwargs else time.strftime("%Y-%m-%d", time.localtime(time.time()-86400))
        today = kwargs["today"]
        today = today.replace("-", "")
        uids = data.keys()
        yesterday = time.strftime(
            "%Y-%m-%d",
            time.localtime(
                time.mktime(time.strptime(today, "%Y%m%d")) - 86400))
        yyyy_mm_dd = time.strftime(
            "%Y-%m-%d",
            time.localtime(
                time.mktime(time.strptime(yesterday.replace("-", ""),
                                          "%Y%m%d")) + 86400))

        uvfile = self.client.find(
            appkey, "uvfile",
            OrderedDict([("tm", yyyy_mm_dd), ("jhd_userkey", {
                "$in": uids
            })]))

        user_profile = self.client.find(appkey, "UserProfile",
                                        {"_id": {
                                            "$in": uids
                                        }})

        ips = set()
        ip_loc = {}
        try:
            for uid in data:
                ips = ips.union(data[uid].get("jhd_ip"))
            ip_loc_cur = self.conn["jh"]["UserIP"].find(
                OrderedDict([("_id", {
                    "$in": list(ips)
                }), ("province", {
                    "$exists": True
                }), ("city", {
                    "$exists": True
                })]), {
                    "province": True,
                    "city": True
                })
            for item in ip_loc_cur:
                ip = item["_id"]
                province = item["province"]
                city = item["city"]
                if not province:
                    continue
                if not city:
                    city = province
                # ip_loc.setdefault(ip, "_".join([province, city]))
                ip_loc.setdefault(ip, {"prov": province, "city": city})
        except:
            import traceback
            print traceback.print_exc()

        # 合并 数据
        for doc in uvfile:
            uid = doc["jhd_userkey"]
            data[uid] = modetools.mergeUserCrumbs(doc, data[uid])
            try:
                ip_lis = data[uid]["jhd_ip"]
                data[uid].setdefault("jhd_loc", [])

                for ip in ip_lis:
                    loc = ip_loc.get(ip, None)
                    if loc and loc not in data[uid]["jhd_loc"]:
                        data[uid]["jhd_loc"].append(loc)
                # tmp = []
                # for item in data[uid]["jhd_loc"]:
                #     if isinstance(item, dict):
                #         tmp.append(item)
                # data[uid]["jhd_loc"] = tmp
            except:
                import traceback
                print traceback.print_exc()

        fix_deltaday = getDayDelta(today, "20160101")

        # lastActiveInterval
        # firstLoginTime
        for doc in user_profile:
            # print("doc", doc["_id"], fix_deltaday, doc.get("activelifeabs", []))
            key = doc["_id"]
            tmp = {}
            # 获取用户首次登录时间
            tmp["firstLoginTime"] = doc.get("firstLoginTime", "unknown")
            firstloginday = tmp["firstLoginTime"][:8]
            activelifeabs = doc.get("activelifeabs", [])
            # 获取用户最近最近活跃信息
            tmp["measure"] = self.getMeasure(activelifeabs, fix_deltaday)
            tmp["measure"]["firstLoginTime"] = tmp["firstLoginTime"]
            # 更新数据
            data[key] = dict(data[key], **tmp)
        op = []
        for key in data:

            if "_id" not in data[key]:
                data[key]["_id"] = ObjectId()
            _id = data[key]["_id"]
            op.append(
                ReplaceOne({"_id": _id}, modetools.formatList(data[key]),
                           True))

        try:
            if op:
                self.client.bulkWrite(appkey, modename, op)
        except:
            print(
                "Warn: bulkStore 'uvfile' Rise a error; Switch to Single Mode")
            try:
                replace_onebyone(data, appkey, modename, self.client)
            except:
                import traceback
                print(traceback.print_exc())
        finallyMask(appkey, modename, self.client)
        try:
            kwargs["ip_loc"] = ip_loc
            self.store_attachmode(data, appkey, modename, modetools, *args,
                                  **kwargs)
        except:
            import traceback
            print traceback.print_exc()