class yiUser(clickhouseModel): user_id = fields.StringField() mobile = fields.StringField() status = fields.StringField() realname = fields.StringField() identity = fields.StringField() come_from = fields.StringField() down_from = fields.StringField() create_time = fields.StringField() birth_year = fields.StringField() last_login_time = fields.StringField() verify_time = fields.StringField() theday = fields.DateField() engines = engines.MergeTree('theday', ('user_id', 'theday'), 8192) # 表名 @classmethod def table_name(cls): return 'yi_user_all' def count(self, user_id): return self.objects_in(self.riskModel).filter(user_id = user_id).count() def batchSave(self, insertList): try: self.riskModel.insert(insertList) return True except: return False
class Actions(md.Model): # describes datatypes and fields user_id = fd.UInt64Field() user_name = fd.StringField() time = fd.StringField() event_type = fd.StringField() screen_name = fd.StringField() app_name = fd.StringField() app_productname = fd.StringField() app_version = fd.StringField() app_publisher = fd.StringField() app_file = fd.StringField() app_copyright = fd.StringField() app_language = fd.StringField() file_versioninfo = fd.StringField() file_description = fd.StringField() file_internalname = fd.StringField() file_originalname = fd.StringField() Date = fd.DateField(default=datetime.date.today()) engine = en.MergeTree( 'Date', ('user_id', 'user_name', 'time', 'event_type', 'screen_name', 'app_name', 'app_productname', 'app_version', 'app_publisher', 'app_file', 'app_copyright', 'app_language', 'file_versioninfo', 'file_description', 'file_internalname', 'file_originalname'))
class Test(models.Model): id = fields.Int64Field() a = fields.StringField() b = fields.StringField() c = fields.StringField() d = fields.StringField() engine = engines.MergeTree('id', ('a', 'b', 'c', 'd'))
class Person(models.Model): first_name = fields.StringField() last_name = fields.StringField() birthday = fields.DateField() height = fields.Float32Field() engine = engines.MergeTree('birthday', ('first_name', 'last_name', 'birthday'))
class yafIosLbs(clickhouseModel): creat_time = fields.StringField() mil_timestmp = fields.StringField() itime = fields.StringField() user_id = fields.StringField() uuid = fields.StringField() platform = fields.StringField() generation = fields.StringField() wifi = fields.StringField() ssid = fields.StringField() ip = fields.StringField() cell_id = fields.StringField() location_area_code = fields.StringField() mobile_country_code = fields.StringField() mobile_network_code = fields.StringField() radio_type = fields.StringField() type = fields.StringField() network_type = fields.StringField() coordinate_source = fields.StringField() coordinate = fields.StringField() network_speed = fields.StringField() wifi_lists = fields.StringField() bluetooth_lists = fields.StringField() mHasSpeed = fields.StringField() mSpeed = fields.StringField() mHasRadius = fields.StringField() mRadius = fields.StringField() netWorkLocationType = fields.StringField() locationID = fields.StringField() theday = fields.DateField() engines = engines.MergeTree('theday', ('user_id', 'theday'), 8192) # 表名 @classmethod def table_name(cls): return 'yaf_yyy_ios_lbs_all' # 项目代号 @classmethod def project_num(cls): return 2 # 获取最大时间 def getLastTime(self): lastTime = self.objects_in(self.yyyModel).filter( creat_time__between=['2000-01-01 00:00:00', '2100-12-31 23:59:59' ]).order_by('-creat_time').only('creat_time') if lastTime.count() == 0: return '2000-01-01 00:00:00' else: return lastTime[0].creat_time
class weixin(clickhouseModel): source = fields.StringField() request_time = fields.StringField() start_time = fields.StringField() sessionId = fields.StringField() ip = fields.StringField() g_uid = fields.StringField() is_login = fields.StringField() channelid = fields.StringField() activity = fields.StringField() user_agent = fields.StringField() from_url = fields.StringField() url = fields.StringField() cookieId = fields.StringField() logId = fields.StringField() _aid = fields.StringField() sign = fields.StringField() uuid = fields.StringField() nickname = fields.StringField() sex = fields.StringField() area = fields.StringField() event_name = fields.StringField() openId = fields.StringField() end_time = fields.StringField() taken_time = fields.StringField() theday = fields.DateField() engines = engines.MergeTree('theday', ('g_uid', 'theday'), 8192) # 表名 @classmethod def table_name(cls): return 'youkayouqian_weixin_all' # 项目代号 @classmethod def project_num(cls): return 4 # 获取最大时间 def getLastTime(self): lastTime = self.objects_in( self.ykyqModel).filter(request_time__between=[ '2000-01-01 00:00:00', '2100-12-31 23:59:59' ]).order_by('-request_time').only('request_time') if lastTime.count() == 0: return '2000-01-01 00:00:00' else: return lastTime[0].request_time
class yafIosGlobal(clickhouseModel): creat_time = fields.StringField() mil_timestmp = fields.StringField() g_eventname = fields.StringField() g_url = fields.StringField() g_sessionId = fields.StringField() g_uid = fields.StringField() g_ip = fields.StringField() g_build = fields.StringField() g_source = fields.StringField() g_channelid = fields.StringField() g_activity = fields.StringField() g_uuid = fields.StringField() _residence_time = fields.StringField() _source_page = fields.StringField() _share_friend = fields.StringField() _share_c_friend = fields.StringField() coupon_amount = fields.StringField() logId = fields.StringField() _location_status = fields.StringField() _notice_status = fields.StringField() request_result = fields.StringField() password_show = fields.StringField() payType = fields.StringField() theday = fields.DateField() engines = engines.MergeTree('theday', ('g_uid', 'theday'), 8192) # 表名 @classmethod def table_name(cls): return 'yaf_zrkey_ios_global_all' # 项目代号 @classmethod def project_num(cls): return 3 # 获取最大时间 def getLastTime(self): lastTime = self.objects_in(self.zrkeyModel).filter( creat_time__between=['2000-01-01 00:00:00', '2100-12-31 23:59:59' ]).order_by('-creat_time').only('creat_time') if lastTime.count() == 0: return '2000-01-01 00:00:00' else: return lastTime[0].creat_time
class BearRequests_development(models.Model): EventDate = fields.DateField() RequestTime = fields.DateTimeField() RequestPath = fields.StringField() RequestCommand = fields.StringField() RequestVersion = fields.StringField() RequestRaw = fields.StringField() ProbeName = fields.StringField() RequestDetectionID = fields.Int32Field() BotIP = fields.StringField() BotCountry = fields.StringField() BotUA = fields.StringField() BotContinent = fields.StringField() BotTracert = fields.StringField() BotDNSName = fields.StringField() engine = engines.MergeTree('EventDate', ('RequestTime', 'BotIP'))
class yafWeb(clickhouseModel): creat_time = fields.StringField() mil_timestmp = fields.StringField() _aid = fields.StringField() activity = fields.StringField() channelid = fields.StringField() cookieId = fields.StringField() from_url = fields.StringField() g_uid = fields.StringField() height = fields.StringField() ip = fields.StringField() Is_login = fields.StringField() logId = fields.StringField() request_time = fields.StringField() sreen_height = fields.StringField() screen_width = fields.StringField() sessionId = fields.StringField() source = fields.StringField() url = fields.StringField() user_agent = fields.StringField() uuid = fields.StringField() width = fields.StringField() theday = fields.DateField() engines = engines.MergeTree('theday', ('g_uid', 'theday'), 8192) # 表名 @classmethod def table_name(cls): return 'yaf_zrkey_web_all' # 项目代号 @classmethod def project_num(cls): return 3 # 获取最大时间 def getLastTime(self): lastTime = self.objects_in(self.zrkeyModel).filter( creat_time__between=['2000-01-01 00:00:00', '2100-12-31 23:59:59' ]).order_by('-creat_time').only('creat_time') if lastTime.count() == 0: return '2000-01-01 00:00:00' else: return lastTime[0].creat_time
class Event(models.Model): t = fields.Enum8Field(EventType) tid = fields.StringField() cid = fields.NullableField(fields.UUIDField()) cn = fields.StringField() cf1 = fields.NullableField(fields.StringField()) cf2 = fields.NullableField(fields.StringField()) cf3 = fields.NullableField(fields.StringField()) cf4 = fields.NullableField(fields.StringField()) cf5 = fields.NullableField(fields.StringField()) dl = fields.NullableField(fields.StringField()) dr = fields.NullableField(fields.StringField()) uip = fields.NullableField(fields.StringField()) utt = fields.NullableField(fields.StringField()) ua = fields.NullableField(fields.StringField()) # Event fields # (Required for event type) ec = fields.NullableField(fields.StringField()) ea = fields.NullableField(fields.StringField()) el = fields.NullableField(fields.StringField()) ev = fields.NullableField(fields.Int64Field()) # Transaction fields # (Required for transaction type) ti = fields.NullableField(fields.StringField()) tr = fields.NullableField(fields.Decimal64Field(scale=6)) # Revenue fields r = fields.NullableField(fields.Decimal64Field(scale=6)) event_time = fields.DateTimeField() event_date = fields.DateField(materialized="toDate(event_time)") engine = engines.MergeTree("event_date", ("tid", "event_date")) @classmethod def table_name(cls): return "events"
def make_ch_model_for_df(df, date_field_name, table_name, pk_columns=None): """ creates ORM Model for DataFrame models.Model is meta class so it is a bit tricky to dynamically create child-class with given attrivutes ToDo: Add support for engine Memory and Log :param df: PySpark DataFrame :param date_field_name: Date-typed field for partitioning :param pk_columns: primary key columns :param table_name: table name in DB :return: ORM Model class """ assert date_field_name in df.schema.names assert 'engine' not in df.schema.names if pk_columns is None: pk_columns = df.schema.names attrs = {'engine': engines.MergeTree(date_field_name, pk_columns)} for field in df.schema.fields: clickhouse_field = spark_field2clickhouse_field(field) attrs[field.name] = clickhouse_field Model = type('MyModel', (models.Model, ), attrs) Model.table_name = staticmethod( types.MethodType(lambda cls: table_name, Model)) return Model
class yafWeb(clickhouseModel): creat_time = fields.StringField() mil_timestmp = fields.StringField() _aid = fields.StringField() activity = fields.StringField() channelid = fields.StringField() cookieId = fields.StringField() from_url = fields.StringField() g_uid = fields.StringField() height = fields.StringField() ip = fields.StringField() Is_login = fields.StringField() logId = fields.StringField() request_time = fields.StringField() sreen_height = fields.StringField() screen_width = fields.StringField() sessionId = fields.StringField() source = fields.StringField() url = fields.StringField() user_agent = fields.StringField() uuid = fields.StringField() width = fields.StringField() theday = fields.DateField() engines = engines.MergeTree('theday', ('g_uid', 'theday'), 8192) # 表名 @classmethod def table_name(cls): return 'yaf_peanut_web_all' # 项目代号 @classmethod def project_num(cls): return 1 # 客户端代号 @classmethod def client_num(cls): return 4 # 获取最大时间 def getLastTime(self): lastTime = self.objects_in(self.peanutModel).filter( creat_time__between=['2000-01-01 00:00:00', '2100-12-31 23:59:59' ]).order_by('-creat_time').only('creat_time') if lastTime.count() == 0: return '2000-01-01 00:00:00' else: return lastTime[0].creat_time # 获取每小时的注册量 def getRegisterNum(self, now, before): haveData = self.objects_in( self.peanutModel).filter(creat_time__gte=now).count() if haveData > 0: maxSize = self.objects_in(self.peanutModel).filter( creat_time__lte=now).only('g_uid').distinct().count() minSize = self.objects_in(self.peanutModel).filter( creat_time__lte=before).only('g_uid').distinct().count() size = maxSize - minSize else: size = -1 return size # 获取页面的浏览量 def getBrowseNum(self, now, before): haveData = self.objects_in( self.peanutModel).filter(creat_time__gte=now).count() if haveData <= 0: return [] browseData = self.objects_in(self.peanutModel).filter( creat_time__between=[before, now], url__ne='empty').only('url') if browseData.count() == 0: return [["https://www.yaoyuefu.com/", 0]] browseList = {} for browse in browseData: pattern = re.compile( r'\w*\:\/\/[\w*.]*[\w*\/]*\?type=\d*|\w*\:\/\/[\w*.]*[\w*\/]*\?id=|\w*\:\/\/[\w*.]*[\w*\/]*/?|\w*\:\/\/[\w*.]*[\w*\/]*' ) result = pattern.findall(browse.url) if len(result) > 0: if result[0] in browseList: browseList[result[0]] += 1 else: browseList[result[0]] = 1 if len(browseList) == 0: return [["https://www.yaoyuefu.com/", 0]] urlList = [] for url, num in browseList.items(): urlList.append([url, num]) return urlList
class yafIosStartup(clickhouseModel): creat_time = fields.StringField() mil_timestmp = fields.StringField() g_eventname = fields.StringField() g_url = fields.StringField() g_sessionId = fields.StringField() g_uid = fields.StringField() g_ip = fields.StringField() g_build = fields.StringField() g_source = fields.StringField() g_channelid = fields.StringField() g_activity = fields.StringField() g_uuid = fields.StringField() _residence_time = fields.StringField() _source_page = fields.StringField() _share_friend = fields.StringField() _share_c_friend = fields.StringField() coupon_amount = fields.StringField() logId = fields.StringField() _location_status = fields.StringField() _notice_status = fields.StringField() request_result = fields.StringField() password_show = fields.StringField() payType = fields.StringField() _models = fields.StringField() _screen = fields.StringField() _startup_time = fields.StringField() _resolution = fields.StringField() _systemversion = fields.StringField() _mem = fields.StringField() _storage = fields.StringField() _cpu = fields.StringField() _charge = fields.StringField() _rem_charge = fields.StringField() _bluetooth = fields.StringField() _bluetooth_status = fields.StringField() _language = fields.StringField() _operator = fields.StringField() _light = fields.StringField() _imei = fields.StringField() _mac = fields.StringField() _gps = fields.StringField() _net = fields.StringField() _wifi = fields.StringField() _bssid = fields.StringField() _gyro = fields.StringField() _gyro_info = fields.StringField() _app_version = fields.StringField() _is_login = fields.StringField() _last_start_time = fields.StringField() _last_end_time = fields.StringField() theday = fields.DateField() engines = engines.MergeTree('theday', ('g_uid', 'theday'), 8192) # 表名 @classmethod def table_name(cls): return 'yaf_zrkey_ios_startup_all' # 项目代号 @classmethod def project_num(cls): return 3 # 获取最大时间 def getLastTime(self): lastTime = self.objects_in(self.zrkeyModel).filter( creat_time__between=['2000-01-01 00:00:00', '2100-12-31 23:59:59' ]).order_by('-creat_time').only('creat_time') if lastTime.count() == 0: return '2000-01-01 00:00:00' else: return lastTime[0].creat_time
class yafIosGlobal(clickhouseModel): creat_time = fields.StringField() mil_timestmp = fields.StringField() g_eventname = fields.StringField() g_url = fields.StringField() g_sessionId = fields.StringField() g_uid = fields.StringField() g_ip = fields.StringField() g_build = fields.StringField() g_source = fields.StringField() g_channelid = fields.StringField() g_activity = fields.StringField() g_uuid = fields.StringField() _residence_time = fields.StringField() _source_page = fields.StringField() _share_friend = fields.StringField() _share_c_friend = fields.StringField() coupon_amount = fields.StringField() logId = fields.StringField() _location_status = fields.StringField() _notice_status = fields.StringField() h5_url = fields.StringField() banner_url = fields.StringField() payType = fields.StringField() standard_id = fields.StringField() standard_type = fields.StringField() recharge_amount = fields.StringField() bankcard_num = fields.StringField() withdraw_amount = fields.StringField() theday = fields.DateField() engines= engines.MergeTree('theday', ('g_uid','theday'),8192) # 表名 @classmethod def table_name(cls): return 'yaf_peanut_ios_global_all' # 项目代号 @classmethod def project_num(cls): return 1 # 客户端代号 @classmethod def client_num(cls): return 2 # 获取最大时间 def getLastTime(self): lastTime = self.objects_in(self.peanutModel).filter(creat_time__between=['2000-01-01 00:00:00', '2100-12-31 23:59:59']).order_by('-creat_time').only('creat_time') if lastTime.count() == 0: return '2000-01-01 00:00:00' else: return lastTime[0].creat_time # 获取每小时的注册量 def getRegisterNum(self,now,before): haveData = self.objects_in(self.peanutModel).filter(creat_time__gte=now).count() if haveData > 0: maxSize = self.objects_in(self.peanutModel).filter(creat_time__lte=now).only('g_uid').distinct().count() minSize = self.objects_in(self.peanutModel).filter(creat_time__lte=before).only('g_uid').distinct().count() size = maxSize - minSize else: size = -1 return size # 获取页面的浏览量 def getBrowseNum(self, now, before): haveData = self.objects_in(self.peanutModel).filter(creat_time__gte=now).count() if haveData <= 0: return [] browseData = self.objects_in(self.peanutModel).filter(creat_time__between=[before, now], g_url__ne='empty').aggregate('g_url', sum='count()') if browseData.count() == 0: return [["Home", 0]] browseList = [] for browse in browseData: browseList.append([browse.g_url, browse.sum]) return browseList