def activeUserDetail(self, datatype, num, data={}): data = data uvfile_path = get_uvfile_path(num, datatype, iszip=True) log_count = 0 err_count = 0 for line in JHOpen().readLines(uvfile_path): if not line: continue log_count += 1 try: items = line.split("\t") uid = items[0].strip() isactive = True if items[1].strip() == "1" else False if not isactive: continue comepub = items[2].strip() vers = items[5].strip() for ver in vers.split("#"): data.setdefault((ver, comepub), set()) data.setdefault((ver, "all"), set()) data.setdefault(("all", comepub), set()) data.setdefault(("all", "all"), set()) data[(ver, comepub)].add(uid) data[(ver, "all")].add(uid) data[("all", comepub)].add(uid) data[("all", "all")].add(uid) except: import traceback print(traceback.print_exc()) err_count += 1 return data
def paths(self, **kwargs): logtype = kwargs["logtype"] if logtype == "logfile": _paths = get_file_path(**kwargs) return [ PathProperty(_path=_path, pathtype="logfile") for _path in _paths ] elif logtype == "uvfile": num = kwargs["num"] datatype = kwargs["datatype"] iszip = kwargs.get("iszip", True) _paths = [get_uvfile_path(num, datatype, iszip=iszip)] return [ PathProperty(_path=_path, pathtype="uvfile") for _path in _paths ] # 所有类型日志路径 if isinstance(logtype, list): paths = [] for _logtype in logtype: kwargs = dict(kwargs, **{"logtype": _logtype}) paths += self.paths(**kwargs) return paths
def newcomers(self, datatype, num): uvfile_path = get_uvfile_path(num, datatype, iszip=True) curDay = time.strftime("%Y%m%d", time.localtime(time.time() - 86400 * num)) result = {} for line in JHOpen().readLines(uvfile_path): if not line: continue try: items = line.split("\t") uid = items[0].strip() isnewcomer = True if curDay == items[8][:8] else False if not isnewcomer: continue isactive = True if items[1].strip() == "1" else False if not isactive: continue curpub = items[3].strip() comepub = items[2].strip( ) if items[2].strip() != "#" else curpub.split("#")[0] vers = items[5].strip() for ver in vers.split("#"): result.setdefault( (ver, comepub), set()).add(uid) if isnewcomer else None result.setdefault( ("all", comepub), set()).add(uid) if isnewcomer else None result.setdefault( (ver, "all"), set()).add(uid) if isnewcomer else None result.setdefault( ("all", "all"), set()).add(uid) if isnewcomer else None except: import traceback print(traceback.print_exc()) return result
def activeUser(self, datatype, num): users = set() uvfile_path = get_uvfile_path(num, datatype, iszip=True) for line in JHOpen().readLines(uvfile_path): if not line: continue try: items = line.split("\t") uid = items[0].strip() isactive = True if items[1].strip() == "1" else False if not isactive: continue users.add(uid) except: import traceback print(traceback.print_exc()) return users
def rules(self, analysisresult, data, num, *args, **kwargs): num += 7 result = analysisresult.result datatype = kwargs["datatype"] curDay = time.strftime("%Y%m%d", time.localtime(time.time() - 86400 * num)) try: tmp = {} newuserData = self.newcomers(datatype, num) newusers = newuserData.get(("all", "all"), set()) # 计算每一个新增用户的活跃天数 for i in range(num - 7, num): j = num - i activeUser = self.activeUser(datatype, j) for uid in newusers: tmp.setdefault(uid, [0]) if uid in activeUser: tmp.setdefault(uid, [0])[0] += 1 # 计算新增活跃天数,分版本、分渠道 uvfile_path = get_uvfile_path(num, datatype, iszip=True) for line in JHOpen().readLines(uvfile_path): if not line: continue try: items = line.split("\t") uid = items[0].strip() isnewcomer = True if curDay == items[8][:8] else False if not isnewcomer: continue curpub = items[3].strip() comepub = items[2].strip( ) if items[2].strip() != "#" else curpub.split("#")[0] vers = items[5].strip() is_allver = True for ver in vers.split("#"): days = tmp.get(uid, [0])[0] result.setdefault( (ver, comepub), [{}, 0])[0].setdefault(days, set()).add(uid) result.setdefault( (ver, "all"), [{}, 0])[0].setdefault(days, set()).add(uid) result.setdefault( ("all", comepub), [{}, 0])[0].setdefault(days, set()).add(uid) result.setdefault( ("all", "all"), [{}, 0])[0].setdefault(days, set()).add(uid) # 新增 result.setdefault((ver, comepub), [{}, 0])[1] += 1 result.setdefault((ver, "all"), [{}, 0])[1] += 1 result.setdefault(("all", comepub), [{}, 0])[1] += 1 if is_allver else 0 result.setdefault(("all", "all"), [{}, 0])[1] += 1 if is_allver else 0 is_allver = False except: import traceback print(traceback.print_exc(), line) self.finished = True except: import traceback print(traceback.print_exc()) self.finished = True return False
def write(self, dataDict, appkey, modename, modetools, *args, **kwargs): num = kwargs["num"] client = PyMongoClient(self.mongo_id) uvfile_path = get_uvfile_path(num, appkey) cur_day = time.strftime("%Y-%m-%d", time.localtime(time.time() - 86400 * num)) uids = dataDict.keys() # userProfile = client.find(appkey, "UserProfile", {"_id": {"$in": uids}}) uvfile = client.findElemIn( appkey, "uvfile", "jhd_userkey", uids, OrderedDict([("tm", cur_day)]), { "_id": False, "jhd_userkey": True, "jhd_loc": True, "firstLoginTime": True, "lastOpaTime": True, "jhd_pb": True }) for item in uvfile: try: uid = item["jhd_userkey"] comepub = item.get("jhd_pb", ["#"]) firstLoginTime = item.get("firstLoginTime", "#") lastLoginTime = item.get("lastOpaTime", "#") data = dataDict[uid] data["firstLoginTime"] = firstLoginTime data["lastLoginTime"] = lastLoginTime data["comepub"] = comepub[0] if comepub else "#" locs = item.get("jhd_loc", None) if locs: data["locs"] = [] for item in locs: if isinstance(item, dict): prov, city = item.get("prov", "#"), item.get("city", "#") else: prov, city = item.split("_") if (prov, city) not in data["locs"]: data["locs"].append((prov, city)) except: import traceback print traceback.print_exc() for key in dataDict: data = dataDict[key] uid = key pushid = data["pushid"] plat = data["plat"] ua = data["ua"] net = "#".join(list(data["net"])) curpub = "#".join(list(data["curpubs"])) comepub = data["comepub"] firstLoginTime = data["firstLoginTime"] lastLoginTime = data["lastLoginTime"] ver = "#".join(list(data["vers"])) loc = "#".join(map(lambda item: "_".join(item), data["locs"])) in_num = data["in"][0] dur = "#".join(map(str, data["end"])) if data["end"] else "#" actions = {} [ actions.setdefault(key, data["action"][key][0]) for key in data["action"] ] actionDict = json.dumps(actions) pages = {} [ pages.setdefault(key, data["page"][key][0]) for key in data["page"] ] pageDict = json.dumps(pages) isactive = data["isactive"] line = [] line.append(uid) # 1 line.append(isactive) line.append(comepub) line.append(curpub) line.append(plat) # 5 line.append(ver) line.append(ua) line.append(net) line.append(firstLoginTime) line.append(lastLoginTime) # 10 line.append(loc) line.append(in_num) line.append(dur) line.append(actionDict) line.append(pageDict) # 15 line.append(pushid) # 16 JHWrite(uvfile_path, "\t".join(map(str, line))) JHWrite.finished(iszip=True)