Esempio n. 1
0
class UserEventWriter(ModeWriter):
    def __init__(self):
        self.client = PyMongoClient()
        self.groupwriter = UserEventGroupWriter()
        self.modename = "UserEvent"

    def setClient(self, client):
        self.client = client

    def remove(self, appkey, modename, tm):
        modename = self.modename
        tm = tm.replace("-", "")
        # 格式化
        tm = time.strftime(
            "%Y%m%d", time.localtime(time.mktime(time.strptime(tm, "%Y%m%d"))))
        self.client.remove(appkey, modename, {"partition_date": tm})
        self.groupwriter.remove(appkey, modename, tm)

    def write(self, data, appkey, modename, modetools, *args, **kwargs):
        a = time.time()
        modename = self.modename
        op = []
        for doc in data:
            try:
                # doc["_id"] = ObjectId()
                doc["partition_date"] = kwargs["today"].replace("-", "") \
                    if "today" in kwargs else time.strftime("%Y%m%d", time.localtime(time.time()-86400))
                jhd_userkey = doc["jhd_userkey"]
                jhd_ts = doc["jhd_ts"]
                jhd_eventId = doc["jhd_eventId"]
                _id = "_".join(
                    map(str, [jhd_userkey, jhd_ts, jhd_eventId[:10]]))
                doc["_id"] = _id
                # op.append(InsertOne(doc))
                op.append(ReplaceOne({"_id": _id}, doc, True))
            except:
                import traceback
                print(traceback.print_exc(), doc)
        try:
            if op:
                op = self.client.bulkWrite(appkey, modename, op)
        except:
            print(
                "Warn: bulkStore 'UserProfile' Rise a error; Switch to Single Mode"
            )
            for op_item in op:
                try:
                    self.client.bulkWrite(appkey, modename, [op_item])
                except:
                    import traceback
                    print(traceback.print_exc())
        finallyMask(appkey, modename, self.client)
        self.groupwriter.write(data, appkey, modename, modetools, *args,
                               **kwargs)
        print("UserEventWriter cost seconds %.3f" % ((time.time() - a), ))
Esempio n. 2
0
class UserActiveWriter(ModeWriter):
    def __init__(self):
        self.client = PyMongoClient()
        self.modename = "UserActive"

    def setClient(self, client):
        self.client = client

    def remove(self, appkey, modename, tm):
        modename = self.modename
        tm = tm.replace("-", "")
        # 格式化
        tm = time.strftime(
            "%Y%m%d", time.localtime(time.mktime(time.strptime(tm, "%Y%m%d"))))
        self.client.remove(appkey, modename, {"partition_date": tm})

    # def write(self, appkey, modename, curDay=time.strftime("%Y-%m-%d", time.localtime(time.time()-86400)), *args, **kwargs):
    def write(self, data, appkey, modename, modetools, *args, **kwargs):
        a = time.time()
        modename = self.modename
        curDay = kwargs["today"].replace(
            "-", "") if "today" in kwargs else time.strftime(
                "%Y%m%d", time.localtime(time.time() - 86400))
        conn = self.client.getConn()
        userActiveCollection = conn[appkey][modename]
        docs = self.client.find(appkey, "UserProfile", {})
        yesterday = getDay(curDay, "%Y%m%d", -1)
        op = []
        a = time.time()
        for doc in docs:
            try:
                key = doc["_id"]
                activelife = doc.get("activelife", [0])
                firstLoginTime = doc["firstLoginTime"][:8]
                login = getDayDelta(curDay, firstLoginTime) in activelife
                # 查找前一天的用户活跃记录,需要配合索引提升速度db.UserActive.ensureIndex({partition_date: -1, jh_uid: 1})
                userActive = userActiveCollection.find_one({
                    "jh_uid":
                    key,
                    "partition_date":
                    yesterday
                })
                # 构造今天的用户记录
                newUserActive = UserActiveBuilder()
                newUserActive.setJhdUid(key)
                newUserActive.setPartitionDate(curDay)
                if userActive is None:
                    newUserActive.setActive([1] if login else [0])
                else:
                    userActive["active"].append(1 if login else 0)
                    newUserActive.setActive(userActive["active"])
                # 计算衡量指标
                newUserActive.setFirstLoginTime(doc["firstLoginTime"])
                newUserActive.setLastLoginTime(doc["lastLoginTime"])
                op.append(
                    ReplaceOne({
                        "jh_uid": key,
                        "partition_date": curDay
                    },
                               newUserActive.builder(),
                               upsert=True))
            except:
                import traceback
                print(traceback.print_exc(), doc)
        print("find cost time: %d" % int(time.time() - a))
        # print("len(op): ", len(op), "yesterday: ", yesterday)
        try:
            if op:
                userActiveCollection.bulk_write(op)
        except:
            import traceback
            print(traceback.print_exc())
            print(
                "Warn: bulkStore 'UserActive' Rise a error; Switch to Single Mode"
            )
            for op_item in op:
                try:
                    userActiveCollection.bulk_write([op_item])
                except:
                    import traceback
                    print(traceback.print_exc())
        finallyMask(appkey, modename, self.client)
        print("UserActiveWriter cost seconds %.10f" % ((time.time() - a), ))
Esempio n. 3
0
class UserCrumbsWriter(ModeWriter):
    def __init__(self, mongo_id=1):
        self.client = PyMongoClient(mongo_id=mongo_id)
        self.conn = self.client.getConn()
        self.modename = "uvfile"
        # self.store_attachmode = UserIP()
        self.attachmode_storers = []
        try:
            # self.attachmode_storers = [UserIP(), UserProfileUpdateWriter()]
            self.attachmode_storers = [UserIP()]
        except:
            import traceback
            print(traceback.print_exc())

    def setClient(self, client):
        self.client = client
        self.conn = self.client.getConn()

    def remove(self, appkey, modename, tm):
        modename = self.modename
        tm = tm.replace("-", "")
        tm = time.strftime(
            "%Y-%m-%d", time.localtime(time.mktime(time.strptime(tm,
                                                                 "%Y%m%d"))))
        self.client.remove(appkey, modename, {"tm": tm})

    def getMeasure(self, activelifeabs, fix_deltaday):
        activelifeabs = [i for i in activelifeabs if i <= fix_deltaday]
        measure = {
            "last7ActiveNum": 0,
            "last14ActiveNum": 0,
            "last28ActiveNum": 0,
            "last30ActiveNum": 0,
        }
        for activelifeabs_delta in activelifeabs:
            delta = fix_deltaday - activelifeabs_delta
            if delta <= 6:
                measure["last7ActiveNum"] += 1
            if delta <= 13:
                measure["last14ActiveNum"] += 1
            if delta <= 27:
                measure["last28ActiveNum"] += 1
            if delta <= 29:
                measure["last30ActiveNum"] += 1
        return measure

    def write(self, data, appkey, modename, modetools, *args, **kwargs):
        modename = self.modename
        # today = kwargs["today"] if "today" in kwargs else time.strftime("%Y-%m-%d", time.localtime(time.time()-86400))
        today = kwargs["today"]
        today = today.replace("-", "")
        uids = data.keys()
        yesterday = time.strftime(
            "%Y-%m-%d",
            time.localtime(
                time.mktime(time.strptime(today, "%Y%m%d")) - 86400))
        yyyy_mm_dd = time.strftime(
            "%Y-%m-%d",
            time.localtime(
                time.mktime(time.strptime(yesterday.replace("-", ""),
                                          "%Y%m%d")) + 86400))

        uvfile = self.client.find(
            appkey, "uvfile",
            OrderedDict([("tm", yyyy_mm_dd), ("jhd_userkey", {
                "$in": uids
            })]))

        user_profile = self.client.find(appkey, "UserProfile",
                                        {"_id": {
                                            "$in": uids
                                        }})

        ips = set()
        ip_loc = {}
        try:
            for uid in data:
                ips = ips.union(data[uid].get("jhd_ip"))
            ip_loc_cur = self.conn["jh"]["UserIP"].find(
                OrderedDict([("_id", {
                    "$in": list(ips)
                }), ("province", {
                    "$exists": True
                }), ("city", {
                    "$exists": True
                })]), {
                    "province": True,
                    "city": True
                })
            for item in ip_loc_cur:
                ip = item["_id"]
                province = item["province"]
                city = item["city"]
                if not province:
                    continue
                if not city:
                    city = province
                # ip_loc.setdefault(ip, "_".join([province, city]))
                ip_loc.setdefault(ip, {"prov": province, "city": city})
        except:
            import traceback
            print traceback.print_exc()

        # 合并 数据
        for doc in uvfile:
            uid = doc["jhd_userkey"]
            data[uid] = modetools.mergeUserCrumbs(doc, data[uid])
            try:
                ip_lis = data[uid]["jhd_ip"]
                data[uid].setdefault("jhd_loc", [])

                for ip in ip_lis:
                    loc = ip_loc.get(ip, None)
                    if loc and loc not in data[uid]["jhd_loc"]:
                        data[uid]["jhd_loc"].append(loc)
                # tmp = []
                # for item in data[uid]["jhd_loc"]:
                #     if isinstance(item, dict):
                #         tmp.append(item)
                # data[uid]["jhd_loc"] = tmp
            except:
                import traceback
                print traceback.print_exc()

        fix_deltaday = getDayDelta(today, "20160101")

        # lastActiveInterval
        # firstLoginTime
        for doc in user_profile:
            # print("doc", doc["_id"], fix_deltaday, doc.get("activelifeabs", []))
            key = doc["_id"]
            tmp = {}
            # 获取用户首次登录时间
            tmp["firstLoginTime"] = doc.get("firstLoginTime", "unknown")
            firstloginday = tmp["firstLoginTime"][:8]
            activelifeabs = doc.get("activelifeabs", [])
            # 获取用户最近最近活跃信息
            tmp["measure"] = self.getMeasure(activelifeabs, fix_deltaday)
            tmp["measure"]["firstLoginTime"] = tmp["firstLoginTime"]
            # 更新数据
            data[key] = dict(data[key], **tmp)
        op = []
        for key in data:

            if "_id" not in data[key]:
                data[key]["_id"] = ObjectId()
            _id = data[key]["_id"]
            op.append(
                ReplaceOne({"_id": _id}, modetools.formatList(data[key]),
                           True))

        try:
            if op:
                self.client.bulkWrite(appkey, modename, op)
        except:
            print(
                "Warn: bulkStore 'uvfile' Rise a error; Switch to Single Mode")
            try:
                replace_onebyone(data, appkey, modename, self.client)
            except:
                import traceback
                print(traceback.print_exc())
        finallyMask(appkey, modename, self.client)
        try:
            kwargs["ip_loc"] = ip_loc
            self.store_attachmode(data, appkey, modename, modetools, *args,
                                  **kwargs)
        except:
            import traceback
            print traceback.print_exc()