Ejemplo n.º 1
0
 def activeUserDetail(self, datatype, num, data={}):
     data = data
     uvfile_path = get_uvfile_path(num, datatype, iszip=True)
     log_count = 0
     err_count = 0
     for line in JHOpen().readLines(uvfile_path):
         if not line:
             continue
         log_count += 1
         try:
             items = line.split("\t")
             uid = items[0].strip()
             isactive = True if items[1].strip() == "1" else False
             if not isactive:
                 continue
             comepub = items[2].strip()
             vers = items[5].strip()
             for ver in vers.split("#"):
                 data.setdefault((ver, comepub), set())
                 data.setdefault((ver, "all"), set())
                 data.setdefault(("all", comepub), set())
                 data.setdefault(("all", "all"), set())
                 data[(ver, comepub)].add(uid)
                 data[(ver, "all")].add(uid)
                 data[("all", comepub)].add(uid)
                 data[("all", "all")].add(uid)
         except:
             import traceback
             print(traceback.print_exc())
             err_count += 1
     return data
Ejemplo n.º 2
0
 def newcomers(self, datatype, num):
     uvfile_path = get_uvfile_path(num, datatype, iszip=True)
     curDay = time.strftime("%Y%m%d",
                            time.localtime(time.time() - 86400 * num))
     result = {}
     for line in JHOpen().readLines(uvfile_path):
         if not line:
             continue
         try:
             items = line.split("\t")
             uid = items[0].strip()
             isnewcomer = True if curDay == items[8][:8] else False
             if not isnewcomer:
                 continue
             isactive = True if items[1].strip() == "1" else False
             if not isactive:
                 continue
             curpub = items[3].strip()
             comepub = items[2].strip(
             ) if items[2].strip() != "#" else curpub.split("#")[0]
             vers = items[5].strip()
             for ver in vers.split("#"):
                 result.setdefault(
                     (ver, comepub), set()).add(uid) if isnewcomer else None
                 result.setdefault(
                     ("all",
                      comepub), set()).add(uid) if isnewcomer else None
                 result.setdefault(
                     (ver, "all"), set()).add(uid) if isnewcomer else None
                 result.setdefault(
                     ("all", "all"), set()).add(uid) if isnewcomer else None
         except:
             import traceback
             print(traceback.print_exc())
     return result
Ejemplo n.º 3
0
 def pipeline(self, path):
     for line in JHOpen().readLines(path):
         if not line:
             continue
         try:
             data = self.transform.transform(line)
             yield [data]
         except:
             import traceback
             print(traceback.print_exc())
         yield []
Ejemplo n.º 4
0
 def pipeline(self, path):
     for line in JHOpen().readLines(path):
         if not line:
             continue
         try:
             data = json.loads(line.strip())
             yield [data]
         except:
             import traceback
             print(traceback.print_exc())
         yield []
Ejemplo n.º 5
0
 def activeUser(self, datatype, num):
     users = set()
     uvfile_path = get_uvfile_path(num, datatype, iszip=True)
     for line in JHOpen().readLines(uvfile_path):
         if not line:
             continue
         try:
             items = line.split("\t")
             uid = items[0].strip()
             isactive = True if items[1].strip() == "1" else False
             if not isactive:
                 continue
             users.add(uid)
         except:
             import traceback
             print(traceback.print_exc())
     return users
Ejemplo n.º 6
0
 def getData(self, datatype, num):
     curDay = time.strftime("%Y%m%d",
                            time.localtime(time.time() - 86400 * num))
     paths = self.daily_paths if self.daily_paths else get_file_path(
         datatype=datatype, yyyymmdd=curDay, hhmm="2359", last=1440)
     result = {}
     for path in paths:
         for line in JHOpen().readLines(path):
             if not line:
                 continue
             try:
                 data = json.loads(line)
                 optype = data["type"].strip()
                 if optype != "ac":
                     continue
                 eventid = data["event"].strip()
                 result.setdefault(optype, set()).add(eventid)
             except:
                 import traceback
                 print(traceback.print_exc())
                 print(line)
     return result
Ejemplo n.º 7
0
 def pipline(self, path):
     for line in JHOpen().readLines(path):
         if not line:
             continue
         yield [line]
Ejemplo n.º 8
0
 def pipeline(self, path):
     for line in JHOpen().readLines(path):
         if not line:
             continue
         data = json.loads(line)
         yield [data]
Ejemplo n.º 9
0
 def rules(self, analysisresult, data, num, *args, **kwargs):
     num += 7
     result = analysisresult.result
     datatype = kwargs["datatype"]
     curDay = time.strftime("%Y%m%d",
                            time.localtime(time.time() - 86400 * num))
     try:
         tmp = {}
         newuserData = self.newcomers(datatype, num)
         newusers = newuserData.get(("all", "all"), set())
         # 计算每一个新增用户的活跃天数
         for i in range(num - 7, num):
             j = num - i
             activeUser = self.activeUser(datatype, j)
             for uid in newusers:
                 tmp.setdefault(uid, [0])
                 if uid in activeUser:
                     tmp.setdefault(uid, [0])[0] += 1
         # 计算新增活跃天数,分版本、分渠道
         uvfile_path = get_uvfile_path(num, datatype, iszip=True)
         for line in JHOpen().readLines(uvfile_path):
             if not line:
                 continue
             try:
                 items = line.split("\t")
                 uid = items[0].strip()
                 isnewcomer = True if curDay == items[8][:8] else False
                 if not isnewcomer:
                     continue
                 curpub = items[3].strip()
                 comepub = items[2].strip(
                 ) if items[2].strip() != "#" else curpub.split("#")[0]
                 vers = items[5].strip()
                 is_allver = True
                 for ver in vers.split("#"):
                     days = tmp.get(uid, [0])[0]
                     result.setdefault(
                         (ver, comepub),
                         [{}, 0])[0].setdefault(days, set()).add(uid)
                     result.setdefault(
                         (ver, "all"),
                         [{}, 0])[0].setdefault(days, set()).add(uid)
                     result.setdefault(
                         ("all", comepub),
                         [{}, 0])[0].setdefault(days, set()).add(uid)
                     result.setdefault(
                         ("all", "all"),
                         [{}, 0])[0].setdefault(days, set()).add(uid)
                     # 新增
                     result.setdefault((ver, comepub), [{}, 0])[1] += 1
                     result.setdefault((ver, "all"), [{}, 0])[1] += 1
                     result.setdefault(("all", comepub),
                                       [{}, 0])[1] += 1 if is_allver else 0
                     result.setdefault(("all", "all"),
                                       [{}, 0])[1] += 1 if is_allver else 0
                     is_allver = False
             except:
                 import traceback
                 print(traceback.print_exc(), line)
         self.finished = True
     except:
         import traceback
         print(traceback.print_exc())
         self.finished = True
         return False
Ejemplo n.º 10
0
# --coding=utf8--
import os
import os.path as path
from SaaSCommon.JHOpen import JHOpen
import json
from SaaSMode.EventDetailH5 import H5EventDetail
import datetime
from DBClient.PostgreSqlClient import PostgreSqlClient

if __name__ == "__main__":
    dataPath = os.sep.join([path.dirname(path.abspath(__file__)), "data"])
    inputStream = JHOpen.readLines(dataPath)
    data = []
    for line in inputStream:
        item = dict(json.loads(line))
        temp = H5EventDetail()
        type = item.get("type")
        if type == "page":
            temp.support = item.get("support")
            temp.usermap = item.get("usermap")
            temp.ref = item.get("ref")
        elif type == "dur":
            temp.status = item.get("status")
            temp.value = item.get("value")
        elif type == "ac":
            temp.event = item.get("event")
        temp.appkey = item.get("appkey")
        temp.type = item.get("type")
        temp.uri = item.get("uri")
        temp.uid = item.get("uid")
        temp.opatime = datetime.datetime.fromtimestamp(item.get("ts") / 1000)