예제 #1
0
 def __init__(self, client=None):
     if client is None:
         self.client = PyMongoClient()
     else:
         self.client = client
     self.dbname = "jh"
     self.modename = "UserIP"
예제 #2
0
class UserIP(ModeWriter):
    '''
    {
        _id: [ip],
        inctag: [number],
        timestamp: [number]
    }
    '''
    def __init__(self, client=None):
        if client is None:
            self.client = PyMongoClient()
        else:
            self.client = client
        self.dbname = "jh"
        self.modename = "UserIP"

    def setClient(self, client):
        self.client = client

    def remove(self, *args, **kwargs):
        pass

    def write(self, data, appkey, modename, modetools, *args, **kwargs):
        modename = self.modename
        ip_set = set()
        for key in data:
            # uid = key
            for ip in data[key]["jhd_ip"]:
                ip_set.add(ip)
        op = []
        for ip in ip_set:
            query_update = {
                "$set": {
                    "timestamp": time.time()
                },
                "$inc": {
                    "inctag": 1
                },
                "$addToSet": {
                    "appkey": appkey
                }
            }
            op.append(UpdateOne({"_id": ip}, query_update, upsert=True))
        try:
            if op:
                self.client.bulkWrite(self.dbname, modename, op)
        except:
            print(
                "Warn: bulkStore 'jh.UserIP' Rise a error; Switch to Single Mode"
            )
            for op_item in op:
                try:
                    self.client.bulkWrite(self.dbname, modename, [op_item])
                except:
                    import traceback
                    print(traceback.print_exc())
예제 #3
0
class UserProfileUpdateWriter(ModeWriter):
    def __init__(self):
        self.client = PyMongoClient()
        self.conn = self.client.getConn()
        self.modename = "UserProfile"

    def setClient(self, client):
        self.client = client
        self.conn = self.client.getConn()

    def remove(self, appkey, modename, tm):
        pass

    def write(self, data, appkey, modename, modetools, *args, **kwargs):
        modename = self.modename
        a = time.time()

        ip_loc = kwargs["ip_loc"]
        curDay = kwargs["today"].replace("-", "")
        # fix_deltaday = getDayDelta(curDay, "20160101")
        for uid in data:
            for ip in data[uid]["jhd_ip"]:
                loc = ip_loc.get(ip, {})
                if not loc:
                    continue
                prov = loc.get("prov", "#")
                city = loc.get("city", "#")

                loc_data = {"prov": prov, "city": city}

                data[uid].setdefault("locs", []).append(loc_data)

        op = []
        for uid in data:
            locs = data[uid].get("locs", [])
            if not locs:
                continue
            for loc in locs:
                op.append(UpdateOne({"_id": uid}, {"$addToSet": {
                    "locs": loc
                }}))

        try:
            if op:
                self.client.bulkWrite(appkey, modename, op)
        except:
            print(
                "Warn: bulkStore 'UserProfile' Rise a error; Switch to Single Mode"
            )
            try:
                replace_onebyone(data, appkey, modename, self.client)
            except:
                import traceback
                print(traceback.print_exc())
        print("UserProfileUpdateWriter cost seconds %.3f" %
              ((time.time() - a), ))
예제 #4
0
class UserEventWriter(ModeWriter):
    def __init__(self):
        self.client = PyMongoClient()
        self.groupwriter = UserEventGroupWriter()
        self.modename = "UserEvent"

    def setClient(self, client):
        self.client = client

    def remove(self, appkey, modename, tm):
        modename = self.modename
        tm = tm.replace("-", "")
        # 格式化
        tm = time.strftime(
            "%Y%m%d", time.localtime(time.mktime(time.strptime(tm, "%Y%m%d"))))
        self.client.remove(appkey, modename, {"partition_date": tm})
        self.groupwriter.remove(appkey, modename, tm)

    def write(self, data, appkey, modename, modetools, *args, **kwargs):
        a = time.time()
        modename = self.modename
        op = []
        for doc in data:
            try:
                # doc["_id"] = ObjectId()
                doc["partition_date"] = kwargs["today"].replace("-", "") \
                    if "today" in kwargs else time.strftime("%Y%m%d", time.localtime(time.time()-86400))
                jhd_userkey = doc["jhd_userkey"]
                jhd_ts = doc["jhd_ts"]
                jhd_eventId = doc["jhd_eventId"]
                _id = "_".join(
                    map(str, [jhd_userkey, jhd_ts, jhd_eventId[:10]]))
                doc["_id"] = _id
                # op.append(InsertOne(doc))
                op.append(ReplaceOne({"_id": _id}, doc, True))
            except:
                import traceback
                print(traceback.print_exc(), doc)
        try:
            if op:
                op = self.client.bulkWrite(appkey, modename, op)
        except:
            print(
                "Warn: bulkStore 'UserProfile' Rise a error; Switch to Single Mode"
            )
            for op_item in op:
                try:
                    self.client.bulkWrite(appkey, modename, [op_item])
                except:
                    import traceback
                    print(traceback.print_exc())
        finallyMask(appkey, modename, self.client)
        self.groupwriter.write(data, appkey, modename, modetools, *args,
                               **kwargs)
        print("UserEventWriter cost seconds %.3f" % ((time.time() - a), ))
예제 #5
0
 def __init__(self, mongo_id=1):
     self.client = PyMongoClient(mongo_id=mongo_id)
     self.conn = self.client.getConn()
     self.modename = "uvfile"
     # self.store_attachmode = UserIP()
     self.attachmode_storers = []
     try:
         # self.attachmode_storers = [UserIP(), UserProfileUpdateWriter()]
         self.attachmode_storers = [UserIP()]
     except:
         import traceback
         print(traceback.print_exc())
예제 #6
0
class UserActiveUpdateWriter(ModeWriter):
    def __init__(self):
        self.client = PyMongoClient()
        self.modename = "UserActiveUpdate"

    def setClient(self, client):
        self.client = client

    def remove(self, appkey, modename, tm):
        pass

    def write(self, data, appkey, modename, modetools, *args, **kwargs):
        a = time.time()
        modename = self.modename
        curDay = kwargs["today"].replace("-", "")

        # self.client.getConn()[appkey][modename].remove({})
        # data_count = self.client.getConn()[appkey][modename].find({}).count()
        # if data_count == 0:
        #     data_cur = self.client.getConn()[appkey]["UserActive"].find({"partition_date": "20161114"}, {"jh_uid": 1})
        #     for item in data_cur:
        #         self.client.getConn()[appkey][modename].insert({"_id": item["jh_uid"]})

        activelifeabs = getDayDelta(curDay, "20160101")
        update_query = {"$addToSet": {"activelifeabs": activelifeabs}}
        op = []
        for key in data:
            # key is userkey
            try:
                uid = key
                op.append(UpdateOne({"_id": uid}, update_query, True))
            except:
                import traceback
                print(traceback.print_exc(), key, data[data])
        try:
            if op:
                op = self.client.bulkWrite(appkey, modename, op)
        except:
            import traceback
            print(traceback.print_exc())
            print(
                "Warn: bulkStore 'UserActive' Rise a error; Switch to Single Mode"
            )
            for op_item in op:
                try:
                    self.client.bulkWrite(appkey, modename, [op_item])
                except:
                    import traceback
                    print(traceback.print_exc())
        finallyMask(appkey, modename, self.client)
        print("UserActiveUpdateWriter cost seconds %.10f" %
              ((time.time() - a), ))
예제 #7
0
def usetimeDistribute(num, appkey="BIQU_ANDROID", delta=120):
    # def usetimeDistribute(num, appkey="biqu", delta=120):
    curday = datetime.datetime.today().strftime("%Y%m%d")
    dayStr = time.strftime("%Y-%m-%d",
                           time.localtime(time.time() - 86400 * num))
    client = PyMongoClient()
    result = {}
    # for item in client.find(appkey, "uvfile", {"tm": dayStr, "jhd_userkey": userkey}):
    m, n = 0, 0
    for item in client.find(appkey, "uvfile", {"tm": dayStr}):
        opas = ["action", "page", "in", "end"]
        uid = item["jhd_userkey"]
        end_sum = item["item_add"].get("end", 0)
        opatms = list(
            set(
                reduce(
                    lambda a, b: a + b,
                    map(
                        lambda opa: item["item_count"].get(opa, {}).get(
                            "opatm", []), opas))))
        opatms.sort()
        opsdtsmps = map(
            lambda opatm: int(
                time.mktime(
                    time.strptime("".join([curday, opatm]), "%Y%m%d%H:%M:%S"))
            ), opatms)
        tmp = [
            0,
        ]
        for opastamp, pos in zip(opsdtsmps, range(len(opsdtsmps) - 1)):
            a = opsdtsmps[pos]
            b = opsdtsmps[pos + 1]
            tmp.append(b - a)
        if end_sum >= 600:
            print(uid, end_sum, sum([i for i in tmp if i <= delta]), tmp)
            print(uid, end_sum, sum([i for i in tmp if i <= delta]), opatms)
            m += 1
            print(i, end_sum)
        else:
            print(uid, end_sum, sum([i for i in tmp if i <= delta]), tmp)
            print(uid, end_sum, sum([i for i in tmp if i <= delta]), opatms)
            n += 1
        total_opatm = sum([i for i in tmp if i <= delta])
        if total_opatm != 0:
            result.setdefault(uid, total_opatm)
    print(m, n)
예제 #8
0
class IPStorageMG(IPStorage):
    def __init__(self):
        self.client = PyMongoClient()
        self.conn = self.client.getConn()

    def store(self, _id, **kwargs):
        op = UpdateOne({"_id": _id}, {"$set": dict({}, **kwargs)}, False)
        self.client.bulkWrite("jh", "UserIP", [op])

    def storeItem(self, _id, key, value):
        pass

    def bulkStore(self, data):
        op = []
        for key in data:
            _id = key
            op.append(
                UpdateOne({"_id": _id}, {"$set": dict({}, **data[key])},
                          False))
        self.client.bulkWrite("jh", "UserIP", op)
예제 #9
0
class IPLoaderFromMG(IPLoader):
    '''
    load 需要定位的IP地址 -> IPContainer(StackSet)
    '''
    def __init__(self):
        self.client = PyMongoClient()
        self.IPContainer = StackSet()

    def load(self):
        t = threading.Thread(target=self.load_thread,
                             name="Thread_LoadIPFromMG")
        t.setDaemon(True)
        t.start()

    def load_thread(self, once_sleep=30):
        logger.info("IP Loader Starting.......")
        _counter = 0
        while True:
            if self.IPContainer.size() >= 10000:
                time.sleep(once_sleep)
                continue
            size_before = self.IPContainer.size()
            cur = self.client.find(
                "jh", "UserIP", {
                    "timestamp": {
                        "$gte": time.time() - 5 * 60
                    },
                    "city": {
                        "$exists": False
                    }
                })
            for item in cur:
                ip = item["_id"]
                self.IPContainer.push(ip)
                _counter += 1
            size_after = self.IPContainer.size()
            # print self.IPContainer.items
            logger.info(
                "Total Read IP: %s, IPContainer has ip: %d, load ip: %d" %
                (_counter, size_after, size_after - size_before))
            time.sleep(once_sleep)

    def iter(self):
        while True:
            try:
                item = self.IPContainer.pop()
            except IndexError:
                import traceback
                logger.warning("IPContainer is empty!")
                time.sleep(10)
                continue
            yield item
예제 #10
0
 def getData(self, appkey, *args, **kwargs):
     modename = "UserMapMeta"
     self.get_mongoid(appkey)
     self.client = PyMongoClient(self.mongo_id)
     cur = self.client.getConn()[appkey][modename].find({})
     result = {}
     # format: { "_id" : "ac7", "fields" : [ { "type" : "string", "name" : "type" } ], "field_elems" : { "type" : [ "分类视图" ] } }
     for item in cur:
         eventid = item["_id"]
         fields = item["fields"]
         field_elems = item.get("field_elems", {})
         # 保存 type/elems
         for field in fields:
             try:
                 mapkey = field["name"]
                 mapkey_type = field["type"]
                 elems = field_elems.get(mapkey, [])
                 elems_tmp = []
                 for elem in elems:
                     if isinstance(elem, str) or isinstance(elem, unicode):
                         # 排除URL类型属性
                         if "http" in elem:
                             continue
                         if len(elem) >= 60:
                             continue
                     elems_tmp.append(elem)
                     if len(elems_tmp) >= 100:
                         break
                 result.setdefault(eventid,
                                   {}).setdefault(mapkey, {
                                       "type": mapkey_type,
                                       "elems": elems_tmp,
                                   })
             except:
                 continue
         # 不包含map的情况
         if bool(fields) == False:
             result.setdefault(eventid, {})
     return result
예제 #11
0
 def __init__(self, mongo_id=1):
     self.client = PyMongoClient(mongo_id=mongo_id)
     self.conn = self.client.getConn()
     self.dbname = "jh"
     self.modename = "UserIP"
예제 #12
0
 def __init__(self, mongo_id=1):
     self.client = PyMongoClient(mongo_id=mongo_id)
     self.modename = "UserMapMeta"
예제 #13
0
 def __init__(self):
     self.client = PyMongoClient()
예제 #14
0
 def __init__(self):
     self.client = PyMongoClient()
     self.modename = "UserActiveUpdate"
예제 #15
0
class UserMapMetaWriter(ModeWriter):
    def __init__(self, mongo_id=1):
        self.client = PyMongoClient(mongo_id=mongo_id)
        self.modename = "UserMapMeta"

    def setClient(self, client):
        self.client = client

    def remove(self, appkey, modename, tm):
        modename = "UserMapMeta"
        pass

    def write(self, data, appkey, modename, modetools, *args, **kwargs):
        a = time.time()
        # self.client.getConn()[appkey][modename].remove({})
        modename = self.modename
        op = []
        for _data in data:
            try:
                if not _data:
                    continue
                update_query = {
                    "$addToSet": {
                        "fields": {
                            "$each": _data.pop("fields")
                        }
                    }
                }
                _id = _data.pop("_id")
                op.append(UpdateOne({"_id": _id}, update_query, True))
                for key in _data["field_elems"].keys():
                    update_query = {
                        "$addToSet": {
                            "field_elems.%s" % key: {
                                "$each": [
                                    item
                                    for item in _data["field_elems"].pop(key)
                                ]
                            }
                        }
                    }
                    # import json
                    # print "-"*100, json.dumps(update_query)
                    # 元素个数 >100 的不更新
                    op.append(
                        UpdateOne(
                            {
                                "_id": _id,
                                "field_elems.%s.100" % (key, ): {
                                    "$exists": False
                                }
                            }, update_query, True))
            except:
                import traceback
                print(traceback.print_exc(), op)
        try:
            if op:
                op = self.client.bulkWrite(appkey, modename, op)
        except:
            print(
                "Warn: bulkStore 'UserProfile' Rise a error; Switch to Single Mode"
            )
            for op_item in op:
                try:
                    self.client.bulkWrite(appkey, modename, [op_item])
                except:
                    import traceback
                    print(traceback.print_exc())
        # self.client.getConn()[appkey][modename].remove({"field_elems": {"$exists": True}})
        finallyMask(appkey, modename, self.client)
        print("UserMapMetaWriter cost seconds %.10f" % ((time.time() - a), ))
예제 #16
0
class UserCrumbsWriter(ModeWriter):
    def __init__(self, mongo_id=1):
        self.client = PyMongoClient(mongo_id=mongo_id)
        self.conn = self.client.getConn()
        self.modename = "uvfile"
        # self.store_attachmode = UserIP()
        self.attachmode_storers = []
        try:
            # self.attachmode_storers = [UserIP(), UserProfileUpdateWriter()]
            self.attachmode_storers = [UserIP()]
        except:
            import traceback
            print(traceback.print_exc())

    def setClient(self, client):
        self.client = client
        self.conn = self.client.getConn()

    def remove(self, appkey, modename, tm):
        modename = self.modename
        tm = tm.replace("-", "")
        tm = time.strftime(
            "%Y-%m-%d", time.localtime(time.mktime(time.strptime(tm,
                                                                 "%Y%m%d"))))
        self.client.remove(appkey, modename, {"tm": tm})

    def getMeasure(self, activelifeabs, fix_deltaday):
        activelifeabs = [i for i in activelifeabs if i <= fix_deltaday]
        measure = {
            "last7ActiveNum": 0,
            "last14ActiveNum": 0,
            "last28ActiveNum": 0,
            "last30ActiveNum": 0,
        }
        for activelifeabs_delta in activelifeabs:
            delta = fix_deltaday - activelifeabs_delta
            if delta <= 6:
                measure["last7ActiveNum"] += 1
            if delta <= 13:
                measure["last14ActiveNum"] += 1
            if delta <= 27:
                measure["last28ActiveNum"] += 1
            if delta <= 29:
                measure["last30ActiveNum"] += 1
        return measure

    def write(self, data, appkey, modename, modetools, *args, **kwargs):
        modename = self.modename
        # today = kwargs["today"] if "today" in kwargs else time.strftime("%Y-%m-%d", time.localtime(time.time()-86400))
        today = kwargs["today"]
        today = today.replace("-", "")
        uids = data.keys()
        yesterday = time.strftime(
            "%Y-%m-%d",
            time.localtime(
                time.mktime(time.strptime(today, "%Y%m%d")) - 86400))
        yyyy_mm_dd = time.strftime(
            "%Y-%m-%d",
            time.localtime(
                time.mktime(time.strptime(yesterday.replace("-", ""),
                                          "%Y%m%d")) + 86400))

        uvfile = self.client.find(
            appkey, "uvfile",
            OrderedDict([("tm", yyyy_mm_dd), ("jhd_userkey", {
                "$in": uids
            })]))

        user_profile = self.client.find(appkey, "UserProfile",
                                        {"_id": {
                                            "$in": uids
                                        }})

        ips = set()
        ip_loc = {}
        try:
            for uid in data:
                ips = ips.union(data[uid].get("jhd_ip"))
            ip_loc_cur = self.conn["jh"]["UserIP"].find(
                OrderedDict([("_id", {
                    "$in": list(ips)
                }), ("province", {
                    "$exists": True
                }), ("city", {
                    "$exists": True
                })]), {
                    "province": True,
                    "city": True
                })
            for item in ip_loc_cur:
                ip = item["_id"]
                province = item["province"]
                city = item["city"]
                if not province:
                    continue
                if not city:
                    city = province
                # ip_loc.setdefault(ip, "_".join([province, city]))
                ip_loc.setdefault(ip, {"prov": province, "city": city})
        except:
            import traceback
            print traceback.print_exc()

        # 合并 数据
        for doc in uvfile:
            uid = doc["jhd_userkey"]
            data[uid] = modetools.mergeUserCrumbs(doc, data[uid])
            try:
                ip_lis = data[uid]["jhd_ip"]
                data[uid].setdefault("jhd_loc", [])

                for ip in ip_lis:
                    loc = ip_loc.get(ip, None)
                    if loc and loc not in data[uid]["jhd_loc"]:
                        data[uid]["jhd_loc"].append(loc)
                # tmp = []
                # for item in data[uid]["jhd_loc"]:
                #     if isinstance(item, dict):
                #         tmp.append(item)
                # data[uid]["jhd_loc"] = tmp
            except:
                import traceback
                print traceback.print_exc()

        fix_deltaday = getDayDelta(today, "20160101")

        # lastActiveInterval
        # firstLoginTime
        for doc in user_profile:
            # print("doc", doc["_id"], fix_deltaday, doc.get("activelifeabs", []))
            key = doc["_id"]
            tmp = {}
            # 获取用户首次登录时间
            tmp["firstLoginTime"] = doc.get("firstLoginTime", "unknown")
            firstloginday = tmp["firstLoginTime"][:8]
            activelifeabs = doc.get("activelifeabs", [])
            # 获取用户最近最近活跃信息
            tmp["measure"] = self.getMeasure(activelifeabs, fix_deltaday)
            tmp["measure"]["firstLoginTime"] = tmp["firstLoginTime"]
            # 更新数据
            data[key] = dict(data[key], **tmp)
        op = []
        for key in data:

            if "_id" not in data[key]:
                data[key]["_id"] = ObjectId()
            _id = data[key]["_id"]
            op.append(
                ReplaceOne({"_id": _id}, modetools.formatList(data[key]),
                           True))

        try:
            if op:
                self.client.bulkWrite(appkey, modename, op)
        except:
            print(
                "Warn: bulkStore 'uvfile' Rise a error; Switch to Single Mode")
            try:
                replace_onebyone(data, appkey, modename, self.client)
            except:
                import traceback
                print(traceback.print_exc())
        finallyMask(appkey, modename, self.client)
        try:
            kwargs["ip_loc"] = ip_loc
            self.store_attachmode(data, appkey, modename, modetools, *args,
                                  **kwargs)
        except:
            import traceback
            print traceback.print_exc()
예제 #17
0
 def __init__(self):
     self.client = PyMongoClient()
     self.conn = self.client.getConn()
예제 #18
0
 def __init__(self):
     self.client = PyMongoClient()
     self.groupwriter = UserEventGroupWriter()
     self.modename = "UserEvent"
예제 #19
0
 def __init__(self):
     self.client = PyMongoClient()
     self.modename = "UserEventGroup"
예제 #20
0
class UserProfileWriter(ModeWriter):
    def __init__(self, mongo_id=1):
        self.client = PyMongoClient(mongo_id=mongo_id)
        self.conn = self.client.getConn()
        self.modename = "UserProfile"

    def setClient(self, client):
        self.client = client
        self.conn = self.client.getConn()

    def remove(self, appkey, modename, tm):
        pass

    def write(self, data, appkey, modename, modetools, *args, **kwargs):
        a = time.time()
        curDay = kwargs["today"].replace("-", "")
        fix_deltaday = getDayDelta(curDay, "20160101")
        modename = self.modename
        uids = data.keys()

        docs = self.client.find(appkey, modename, {"_id": {"$in": uids}})

        oldusers = set()
        for doc in docs:
            try:
                # UserProfile _id 为 userkey
                key = doc["_id"]
                oldusers.add(key)
                # 如果新添加数据比首次访问时间要早,对历史数据进行修正处理
                if "lastLoginTime" in data[key]:
                    lastLoginTime_new = data[key]["lastLoginTime"][:8]
                if "firstLoginTime" in doc and "firstLoginTime" in data[key]:
                    activelife = doc.get("activelife", [0])
                    firstLoginTime_new = data[key]["firstLoginTime"][:8]
                    firstLoginTime_old = doc["firstLoginTime"][:8]
                    if firstLoginTime_new < firstLoginTime_old:
                        firstLoginDelta = getDayDelta(firstLoginTime_old,
                                                      firstLoginTime_new)
                        doc["activelife"] = map(lambda i: i + firstLoginDelta,
                                                activelife)

                data[key] = modetools.mergeUserProfile(data[key], doc)
                # 生成用户生命周期数据
                firstLoginDay = data[key]["firstLoginTime"][:8]
                lastLoginDay = data[key]["lastLoginTime"][:8]
                dayDelta = getDayDelta(lastLoginTime_new, firstLoginDay)
                data[key].setdefault("activelife", [0])  # 兼容历史数据
                if dayDelta not in data[key]["activelife"]:
                    data[key]["activelife"].append(dayDelta)
                data[key]["activelife"].sort()
                # 用户绝对活跃数据,起始 日期为 2016-01-01
                try:
                    firstlogin_deltaday = getDayDelta(firstLoginDay,
                                                      "20160101")
                    data[key]["activelifeabs"] = [
                        firstlogin_deltaday + remain_day
                        for remain_day in data[key]["activelife"]
                    ]
                except:
                    import traceback
                    print(traceback.print_exc())
            except:
                import traceback
                print(traceback.print_exc())
        # 设置新增版本
        # for key in set(uids)-set([item["_id"] for item in docs]):
        for key in set(uids) - oldusers:
            data[key]["comever"] = data[key]["ver"]
            # 用户绝对活跃数据,起始 日期为 2016-01-01
            try:
                firstLoginDay = data[key]["lastLoginTime"][:8]
                firstlogin_deltaday = getDayDelta(firstLoginDay, "20160101")
                data[key]["activelifeabs"] = [
                    firstlogin_deltaday + remain_day
                    for remain_day in data[key]["activelife"]
                ]
            except:
                import traceback
                print(traceback.print_exc())

        op = []
        for key in data:
            op.append(ReplaceOne({"_id": key}, data[key], True))
        try:
            if op:
                self.client.bulkWrite(appkey, modename, op)
        except:
            print(
                "Warn: bulkStore 'UserProfile' Rise a error; Switch to Single Mode"
            )
            try:
                replace_onebyone(data, appkey, modename, self.client)
            except:
                import traceback
                print(traceback.print_exc())
        finallyMask(appkey, modename, self.client)
        print("UserProfileWriter cost seconds %.3f" % ((time.time() - a), ))
예제 #21
0
def get_mongo_conn(appkey):
    m_client = MysqlClient()
    mongo_id = m_client.get_mongoid(appkey)[0]
    m_client.closeMysql()
    conn = PyMongoClient(mongo_id=mongo_id)
    return conn.getConn()
예제 #22
0
 def write(self, dataDict, appkey, modename, modetools, *args, **kwargs):
     num = kwargs["num"]
     client = PyMongoClient(self.mongo_id)
     uvfile_path = get_uvfile_path(num, appkey)
     cur_day = time.strftime("%Y-%m-%d",
                             time.localtime(time.time() - 86400 * num))
     uids = dataDict.keys()
     # userProfile = client.find(appkey, "UserProfile", {"_id": {"$in": uids}})
     uvfile = client.findElemIn(
         appkey, "uvfile", "jhd_userkey", uids,
         OrderedDict([("tm", cur_day)]), {
             "_id": False,
             "jhd_userkey": True,
             "jhd_loc": True,
             "firstLoginTime": True,
             "lastOpaTime": True,
             "jhd_pb": True
         })
     for item in uvfile:
         try:
             uid = item["jhd_userkey"]
             comepub = item.get("jhd_pb", ["#"])
             firstLoginTime = item.get("firstLoginTime", "#")
             lastLoginTime = item.get("lastOpaTime", "#")
             data = dataDict[uid]
             data["firstLoginTime"] = firstLoginTime
             data["lastLoginTime"] = lastLoginTime
             data["comepub"] = comepub[0] if comepub else "#"
             locs = item.get("jhd_loc", None)
             if locs:
                 data["locs"] = []
                 for item in locs:
                     if isinstance(item, dict):
                         prov, city = item.get("prov",
                                               "#"), item.get("city", "#")
                     else:
                         prov, city = item.split("_")
                     if (prov, city) not in data["locs"]:
                         data["locs"].append((prov, city))
         except:
             import traceback
             print traceback.print_exc()
     for key in dataDict:
         data = dataDict[key]
         uid = key
         pushid = data["pushid"]
         plat = data["plat"]
         ua = data["ua"]
         net = "#".join(list(data["net"]))
         curpub = "#".join(list(data["curpubs"]))
         comepub = data["comepub"]
         firstLoginTime = data["firstLoginTime"]
         lastLoginTime = data["lastLoginTime"]
         ver = "#".join(list(data["vers"]))
         loc = "#".join(map(lambda item: "_".join(item), data["locs"]))
         in_num = data["in"][0]
         dur = "#".join(map(str, data["end"])) if data["end"] else "#"
         actions = {}
         [
             actions.setdefault(key, data["action"][key][0])
             for key in data["action"]
         ]
         actionDict = json.dumps(actions)
         pages = {}
         [
             pages.setdefault(key, data["page"][key][0])
             for key in data["page"]
         ]
         pageDict = json.dumps(pages)
         isactive = data["isactive"]
         line = []
         line.append(uid)  # 1
         line.append(isactive)
         line.append(comepub)
         line.append(curpub)
         line.append(plat)  # 5
         line.append(ver)
         line.append(ua)
         line.append(net)
         line.append(firstLoginTime)
         line.append(lastLoginTime)  # 10
         line.append(loc)
         line.append(in_num)
         line.append(dur)
         line.append(actionDict)
         line.append(pageDict)  # 15
         line.append(pushid)  # 16
         JHWrite(uvfile_path, "\t".join(map(str, line)))
     JHWrite.finished(iszip=True)
예제 #23
0
 def __init__(self):
     self.client = PyMongoClient()
     self.IPContainer = StackSet()
예제 #24
0
from apscheduler.triggers.cron import CronTrigger
from apscheduler.events import (
    EVENT_SCHEDULER_STARTED, EVENT_SCHEDULER_SHUTDOWN, EVENT_SCHEDULER_PAUSED,
    EVENT_SCHEDULER_RESUMED, EVENT_EXECUTOR_ADDED, EVENT_EXECUTOR_REMOVED,
    EVENT_JOBSTORE_ADDED, EVENT_JOBSTORE_REMOVED, EVENT_ALL_JOBS_REMOVED,
    EVENT_JOB_ADDED, EVENT_JOB_REMOVED, EVENT_JOB_MODIFIED, EVENT_JOB_EXECUTED,
    EVENT_JOB_ERROR, EVENT_JOB_MISSED, EVENT_JOB_SUBMITTED,
    EVENT_JOB_MAX_INSTANCES)

from apscheduler.executors.pool import ThreadPoolExecutor, ProcessPoolExecutor

from JobHandler import JobHandler
from DBClient.PyMongoClient import PyMongoClient
global _mongoclient
_mongoclient = PyMongoClient().getConn()


class SchedulerManager(object):
    global _mongoclient

    def __init__(self):

        self._jobs = {}
        self._jobhandlers = {}  # format, key: jobid,  value: jobhandler
        self.create_scheduler()
        self.start()

    def create_scheduler(self):
        self.jobstores = {
            'mongo':
예제 #25
0
class ModeWriteMongoUserIP(ModeWriter):
    def __init__(self, mongo_id=1):
        self.client = PyMongoClient(mongo_id=mongo_id)
        self.conn = self.client.getConn()
        self.dbname = "jh"
        self.modename = "UserIP"

    def remove(self, *args, **kwargs):
        pass

    def write(self, *args, **kwargs):
        '''
        :param args: 保留参数
        :param kwargs: today = 当天日期(yyyy-mm-dd)
        :return:
        '''
        today = kwargs["today"]
        cur = self.conn[self.dbname][self.modename].find(
            {
                "timestamp": {
                    "$gte": time.time() - 100
                },
                "province": {
                    "$exists": True
                },
                "city": {
                    "$exists": True
                },
                "appkey": {
                    "$exists": True
                }
            }, {
                "province": True,
                "city": True,
                "appkey": True
            })
        update_appkey = {}
        for item in cur:
            ip = item["_id"]
            province = item["province"]
            city = item["city"]
            if not province:
                continue
            if not city:
                city = province
            appkey = item["appkey"]
            for a_appkey in appkey:
                update_appkey.setdefault(a_appkey, []).append(
                    UpdateOne(
                        OrderedDict([("tm", today),
                                     ("jhd_loc", {
                                         "$exista": False
                                     }), ("jhd_ip", ip)]),  # 需要配合索引使用提高更新速度
                        {
                            "$addToSet": {
                                "jhd_loc": {
                                    "prov": province,
                                    "city": city
                                }
                            }
                        }))
        for a_appkey in update_appkey:
            a = time.time()
            self.client.bulkWrite(a_appkey, "uvfile", update_appkey[a_appkey])
            print(__name__,
                  time.time() - a, a_appkey, len(update_appkey[a_appkey]))
예제 #26
0
class UserActiveWriter(ModeWriter):
    def __init__(self):
        self.client = PyMongoClient()
        self.modename = "UserActive"

    def setClient(self, client):
        self.client = client

    def remove(self, appkey, modename, tm):
        modename = self.modename
        tm = tm.replace("-", "")
        # 格式化
        tm = time.strftime(
            "%Y%m%d", time.localtime(time.mktime(time.strptime(tm, "%Y%m%d"))))
        self.client.remove(appkey, modename, {"partition_date": tm})

    # def write(self, appkey, modename, curDay=time.strftime("%Y-%m-%d", time.localtime(time.time()-86400)), *args, **kwargs):
    def write(self, data, appkey, modename, modetools, *args, **kwargs):
        a = time.time()
        modename = self.modename
        curDay = kwargs["today"].replace(
            "-", "") if "today" in kwargs else time.strftime(
                "%Y%m%d", time.localtime(time.time() - 86400))
        conn = self.client.getConn()
        userActiveCollection = conn[appkey][modename]
        docs = self.client.find(appkey, "UserProfile", {})
        yesterday = getDay(curDay, "%Y%m%d", -1)
        op = []
        a = time.time()
        for doc in docs:
            try:
                key = doc["_id"]
                activelife = doc.get("activelife", [0])
                firstLoginTime = doc["firstLoginTime"][:8]
                login = getDayDelta(curDay, firstLoginTime) in activelife
                # 查找前一天的用户活跃记录,需要配合索引提升速度db.UserActive.ensureIndex({partition_date: -1, jh_uid: 1})
                userActive = userActiveCollection.find_one({
                    "jh_uid":
                    key,
                    "partition_date":
                    yesterday
                })
                # 构造今天的用户记录
                newUserActive = UserActiveBuilder()
                newUserActive.setJhdUid(key)
                newUserActive.setPartitionDate(curDay)
                if userActive is None:
                    newUserActive.setActive([1] if login else [0])
                else:
                    userActive["active"].append(1 if login else 0)
                    newUserActive.setActive(userActive["active"])
                # 计算衡量指标
                newUserActive.setFirstLoginTime(doc["firstLoginTime"])
                newUserActive.setLastLoginTime(doc["lastLoginTime"])
                op.append(
                    ReplaceOne({
                        "jh_uid": key,
                        "partition_date": curDay
                    },
                               newUserActive.builder(),
                               upsert=True))
            except:
                import traceback
                print(traceback.print_exc(), doc)
        print("find cost time: %d" % int(time.time() - a))
        # print("len(op): ", len(op), "yesterday: ", yesterday)
        try:
            if op:
                userActiveCollection.bulk_write(op)
        except:
            import traceback
            print(traceback.print_exc())
            print(
                "Warn: bulkStore 'UserActive' Rise a error; Switch to Single Mode"
            )
            for op_item in op:
                try:
                    userActiveCollection.bulk_write([op_item])
                except:
                    import traceback
                    print(traceback.print_exc())
        finallyMask(appkey, modename, self.client)
        print("UserActiveWriter cost seconds %.10f" % ((time.time() - a), ))
예제 #27
0
 def __init__(self):
     self.client = PyMongoClient()
     self.conn = self.client.getConn()
     self.modename = "UserProfile"
예제 #28
0
                            {
                                "_id": _id,
                                "field_elems.%s.100" % (key, ): {
                                    "$exists": False
                                }
                            }, update_query, True))
            except:
                import traceback
                print(traceback.print_exc(), op)
        try:
            if op:
                op = self.client.bulkWrite(appkey, modename, op)
        except:
            print(
                "Warn: bulkStore 'UserProfile' Rise a error; Switch to Single Mode"
            )
            for op_item in op:
                try:
                    self.client.bulkWrite(appkey, modename, [op_item])
                except:
                    import traceback
                    print(traceback.print_exc())
        # self.client.getConn()[appkey][modename].remove({"field_elems": {"$exists": True}})
        finallyMask(appkey, modename, self.client)
        print("UserMapMetaWriter cost seconds %.10f" % ((time.time() - a), ))


if __name__ == "__main__":
    client = PyMongoClient()
    uewriter = dropIndex("caiyu_ios_free", "UserEvent", client)
    # uewriter.remove("feeling", )