class InfoHandlerV1(tornado.web.RequestHandler): def initialize(self): self.set_header('Content-type', 'application/json') self.mysql_client = MySQLUtils() self.res_status = dict() @tornado.gen.coroutine @tornado.web.asynchronous def get(self, domain, file_name): try: if file_name: query_status, query_result = self.mysql_client.raw_sql_fdfs_file_info(file_name.strip(), domain) if query_status == 0: self.res_status['status'], self.res_status['result'] = 0, query_result elif query_status == 1: raise HandlerExcept('mysql query no data') else: raise MysqlExcept("mysql query fail , error:%s" % str(query_result)) else: raise HandlerExcept("no file name") except MysqlExcept as error: logging.error("file: %s,domain: %s,error: %s" % (file_name, domain, error.msg)) self.res_status['status'], self.res_status['result'] = 1, error.msg except HandlerExcept as error: logging.info("file: %s,domain: %s,error: %s" % (file_name, domain, error.msg)) self.res_status['status'], self.res_status['result'] = 2, error.msg except Exception as error: logging.error(str(error)) self.res_status['status'], self.res_status['result'] = 3, str(error) finally: self.mysql_client.close_connetc() self.write(json.dumps(self.res_status)) self.finish()
class ListFileHandlerV1(tornado.web.RequestHandler): def initialize(self): self.set_header('Content-type', 'application/json') self.mysql_client = MySQLUtils() self.res_status = dict() @tornado.gen.coroutine @tornado.web.asynchronous def get(self): domain = self.get_argument('domain', default='test', strip=True) limit = self.get_argument('limit', default='10', strip=True) try: if isinstance(int(limit), int): domain_file_stat, domain_file_info = self.mysql_client.list_file(domain, int(limit)) if domain_file_stat == 0: self.res_status['status'], self.res_status['result'] = 0, domain_file_info elif domain_file_stat == 1: self.res_status['status'], self.res_status['result'] = 0, domain_file_info else: raise MysqlExcept(domain_file_info) else: raise HandlerExcept("Limit Not Number") except HandlerExcept as error: self.res_status['status'], self.res_status['result'] = 1, error.msg except MysqlExcept as error: logging.error("%s" % error.msg) self.res_status['status'], self.res_status['result'] = 2, error.msg except Exception as error: logging.error(str(error)) self.res_status['status'], self.res_status['result'] = 3, str(error) finally: self.mysql_client.close_connetc() self.write(json.dumps(self.res_status)) self.finish()
class GetDomainHandlerV1(tornado.web.RequestHandler): def initialize(self): self.set_header('Content-type', 'application/json') self.mysql_client = MySQLUtils() self.res_status = dict() @tornado.gen.coroutine @tornado.web.asynchronous def get(self, *args, **kwargs): try: all_domain_stat, all_domain_info = self.mysql_client.get_all_domain() if all_domain_stat == 0: self.res_status['status'], self.res_status['result'] = 0, all_domain_info elif all_domain_stat == 1: self.res_status['status'], self.res_status['result'] = 0, all_domain_info else: MysqlExcept('query all domain error: %s' % all_domain_info) except MysqlExcept as error: logging.error("%s" % error.msg) self.res_status['status'], self.res_status['result'] = 1, error.msg except Exception as error: logging.error(str(error)) self.res_status['status'], self.res_status['result'] = 2, str(error) finally: self.mysql_client.close_connetc() self.write(json.dumps(self.res_status)) self.finish()
class DownloadHandlerV1(tornado.web.RequestHandler): def initialize(self): self.mysql_client = MySQLUtils() @tornado.gen.coroutine @tornado.web.asynchronous def get(self, domain, file_name): try: if file_name: query_status, query_result = self.mysql_client.raw_sql_fdfs_download(file_name.strip(), domain.strip()) if query_status: self.redirect(url=query_result, permanent=False, status=None) else: # logging.error("file: %s, domain: %s , error: %s" % (file_name, domain, query_result)) raise HTTPError(404) else: raise HTTPError(404) except: raise HTTPError(404) finally: self.mysql_client.close_connetc() # pass @tornado.gen.coroutine @tornado.web.asynchronous def head(self, *args, **kwargs): return self.get(*args, **kwargs)
def cache_dataset(self, sc, cache_conf): broadcast_enabled = cache_conf.get('broadcast.enabled', False) if isinstance(broadcast_enabled, bool) and broadcast_enabled: # cache_id = 'ds_id_' + cache_conf['source.id'] + '#cache_id_' + cache_conf['cache.id'] cache_id = 'ds_id_' + str(cache_conf['source.id']) + "#cache_id_" + str(cache_conf['cache.id']) if cache_id in self.cache_pools: print('= = ' * 10, '[myapp CacheManager.cache_dataset] found dataset has been cached') return self.cache_pools[cache_id] else: host = cache_conf['host'] port = cache_conf.get('port', 3306) db = cache_conf['db'] user = cache_conf['user'] password = cache_conf.get('password', '') table_name = cache_conf['tableName'] key_name = cache_conf['keyName'] cache_key_name_list = cache_conf['cache.keyName.list'] cache_sql = 'select ' + key_name + ', ' + cache_key_name_list + ' from ' + db + '.' + table_name conn = MySQLUtils.get_connection(host=host, db=db, user=user, password=password, port=port) external_cache = MySQLUtils.query(conn, sql_text=cache_sql, key_name=key_name) cache_broadcast = sc.broadcast(external_cache) self.cache_pools[cache_id] = cache_broadcast return cache_broadcast else: print('= = ' * 10, '[myapp CacheManager.cache_dataset] ' 'configuration warning: found cache is not enabled, with broadcast.enabled = ', broadcast_enabled) return None
def main(): cfg = { 'host': '192.168.158.11', 'port': 3306, 'user': '******', 'passwd': 'maixiaochai_p', 'db': 'maixiaochai_db' } db = MySQLUtils(**cfg) result = db.has_table('maixiaochai_t') print(result)
def initialize(self): """ 第一步执行,初始化操作 :return: """ self.set_header('Content-type', 'application/json') self.upload_dir = FDFS_CONFIG.get('tmp_path') if not os.path.exists(self.upload_dir): os.mkdir(self.upload_dir) self.fdfs_client = FDFSUtils() self.mysql_client = MySQLUtils() self.file_tmp_path = None self.res_status = dict() self.file_info = dict()
def cache_dataset(self, sc, cache_conf): broadcast_enabled = cache_conf.get('broadcast.enabled', False) if isinstance(broadcast_enabled, bool) and broadcast_enabled: # cache_id = 'ds_id_' + cache_conf['source.id'] + '#cache_id_' + cache_conf['cache.id'] cache_id = 'ds_id_' + str( cache_conf['source.id']) + "#cache_id_" + str( cache_conf['cache.id']) if cache_id in self.cache_pools: print( '= = ' * 10, '[myapp CacheManager.cache_dataset] found dataset has been cached' ) return self.cache_pools[cache_id] else: host = cache_conf['host'] port = cache_conf.get('port', 3306) db = cache_conf['db'] user = cache_conf['user'] password = cache_conf.get('password', '') table_name = cache_conf['tableName'] key_name = cache_conf['keyName'] cache_key_name_list = cache_conf['cache.keyName.list'] cache_sql = 'select ' + key_name + ', ' + cache_key_name_list + ' from ' + db + '.' + table_name conn = MySQLUtils.get_connection(host=host, db=db, user=user, password=password, port=port) external_cache = MySQLUtils.query(conn, sql_text=cache_sql, key_name=key_name) cache_broadcast = sc.broadcast(external_cache) self.cache_pools[cache_id] = cache_broadcast return cache_broadcast else: print( '= = ' * 10, '[myapp CacheManager.cache_dataset] ' 'configuration warning: found cache is not enabled, with broadcast.enabled = ', broadcast_enabled) return None
def get_api_deviceinfo(): host = '192.168.9.228' port = 3306 db = 'edxapp' user = '******' password = '' # table_name = 'api_deviceinfo' key_name = 'uuid' cache_key_name_list = 'channel, event, uid' # cache_sql = 'select ' + key_name + ', ' + cache_key_name_list + ' from ' + db + '.' + table_name # conn = MySQLUtils.get_connection(host=host, db=db, user=user, password=password, port=port) # external_cache = MySQLUtils.query(conn, sql_text=cache_sql, key_name=key_name) return external_cache
class DeleteDomainHandlerV1(tornado.web.RequestHandler): def initialize(self): self.set_header('Content-type', 'application/json') self.mysql_client = MySQLUtils() self.res_status = dict() @tornado.gen.coroutine @tornado.web.asynchronous def get(self, domain): try: if domain: domain_exist_stst, domain_exist_info = self.mysql_client.domain_id_exist(domain) if domain_exist_stst == 0: domain_empty_stat, domain_empty_info = self.mysql_client.fdfs_empty(domain) if domain_empty_stat == 0: domain_delete_stat, domain_delete_info = self.mysql_client.delete_domain(domain) if domain_delete_stat == 0: logging.info("domain %s delete success" % domain) self.res_status['status'], self.res_status['result'] = 0, 'domain delete success' else: raise MysqlExcept('delete domain error: %s' % domain_delete_info) elif domain_empty_stat == 1: raise MysqlExcept('Domain not empty') else: raise MysqlExcept(domain_empty_info) elif domain_exist_stst == 1: raise MysqlExcept('not this domain') else: raise MysqlExcept(domain_exist_info) else: raise HandlerExcept("No domain") except HandlerExcept as error: self.res_status['status'], self.res_status['result'] = 1, error.msg except MysqlExcept as error: logging.error("%s" % error.msg) self.res_status['status'], self.res_status['result'] = 2, error.msg except Exception as error: logging.error(str(error)) self.res_status['status'], self.res_status['result'] = 3, str(error) finally: self.mysql_client.close_connetc() self.write(json.dumps(self.res_status)) self.finish()
class DeleteHandlerV1(tornado.web.RequestHandler): def initialize(self): self.mysql_client = MySQLUtils() self.fdfs_client = FDFSUtils() self.res_status = dict() @tornado.gen.coroutine @tornado.web.asynchronous def get(self, domain, file_name): try: if file_name: exist_status, exist_info = self.mysql_client.fdfs_exist(file_name, domain) if exist_status == 0: # exist delete_status, delete_result = self.fdfs_client.delete_file(exist_info.get('file_group', ''), exist_info.get('file_local_path', '')) if delete_status: mysql_status, mysql_info = self.mysql_client.fdfs_delete(file_name, domain) if mysql_status == 0: logging.info("file: %s ,domain: %s ,delete mysql success" % (file_name, domain)) self.res_status['status'], self.res_status['result'] = 0, None else: raise MysqlExcept(mysql_info) else: raise FdfsExcept(delete_result) elif exist_status == 1: raise MysqlExcept('mysql query no data') else: raise MysqlExcept(exist_info) else: raise HandlerExcept("no file name") except MysqlExcept as error: logging.error("file: %s,domain: %s,error: %s" % (file_name, domain, error.msg)) self.res_status['status'], self.res_status['result'] = 1, error.msg except FdfsExcept as error: logging.error("file: %s,domain: %s,error: %s" % (file_name, domain, error.msg)) self.res_status['status'], self.res_status['result'] = 2, error.msg except Exception as error: logging.error(str(error)) self.res_status['status'], self.res_status['result'] = 3, str(error) finally: self.mysql_client.close_connetc() self.write(json.dumps(self.res_status)) self.finish()
class GetPoolHandlerV1(tornado.web.RequestHandler): def initialize(self): self.set_header('Content-type', 'application/json') self.mysql_client = MySQLUtils() self.res_status = dict() @tornado.gen.coroutine @tornado.web.asynchronous def get(self): try: connections_pool = self.mysql_client.get_pool() self.res_status['status'], self.res_status['result'] = 0, connections_pool except Exception as error: logging.error(str(error)) self.res_status['status'], self.res_status['result'] = 1, str(error) finally: self.mysql_client.close_connetc() self.write(json.dumps(self.res_status)) self.finish()
def fun_deviceinfo_in_rdd_mapPartitions(iter_x, step_conf, cache_conf): """ 输入: iter[Row] 输出: iter[Row] """ import re domain_pattern = re.compile('https?://(.*?)/.*') ORIGIN_REFERER_KEY = 'origin_referer' CHANNEL_KEY = 'channel' UNKNOWN_ORIGIN_REFERER_VALUE = 'unknown' USER_ID_KEY = 'user_id' UID_KEY = 'uid' SPAM_KEY = 'spam' EVENT_KEY = 'event' UNKNOWN_SPAM_VALUE = 'unknown' host = cache_conf['host'] port = cache_conf.get('port', 3306) db = cache_conf['db'] user = cache_conf['user'] password = cache_conf.get('password', '') table_name = cache_conf['tableName'] key_name = cache_conf['keyName'] cache_key_name_list = cache_conf['cache.keyName.list'] cache_sql = 'select ' + key_name + ', ' + cache_key_name_list + ' from ' + db + '.' + table_name query_sql = cache_sql + ' where ' + key_name + ' = %s' # TODO: 支持批量查询 batch_query_sql = cache_sql + ' where ' + key_name + " in (?)" # TODO: 支持 broadcast 应用启动时cache一份外部缓存 broadcast_enabled = cache_conf.get('broadcast.enabled', False) \ if isinstance(cache_conf.get('broadcast.enabled'), bool) else False # 增量动态缓存 cache_id = 'ds_id_' + str( cache_conf['source.id']) + '#cache_id_' + str( cache_conf['cache.id']) cache_pool = MySQLUtils.cache_pools.setdefault(cache_id, {}) conn = MySQLUtils.get_connection(host=host, db=db, user=user, password=password, port=port) # 更新属性 # 1 更新 origin_referer 规则:如果日志中 origin_referer 为空(null or “”)且设备信息channel字段不为空, # 取设备信息的channel字段值,其他情况取日志中 origin_referer 字段 # Note: origin_referer 暂取 domain # 2 更新 spam 规则:如果用户日志中 spam 为空(null or ””)且设备信息event字段不为空, # 取设备信息的event,其他情况取用户日志中 spam 的值 # 3 更新 user_id 规则: 如果关联到缓存uid,更新;日志中的 user_id 可能不是有效的 def proc_update(obj, cache_channel=None, cache_event=None, cache_uid=None): if not obj[ORIGIN_REFERER_KEY] and cache_channel: obj[ORIGIN_REFERER_KEY] = cache_channel if obj[ORIGIN_REFERER_KEY]: match_result = re.match(domain_pattern, obj[ORIGIN_REFERER_KEY]) if match_result: obj[ORIGIN_REFERER_KEY] = match_result.group(1) if not obj[ORIGIN_REFERER_KEY]: obj[ORIGIN_REFERER_KEY] = UNKNOWN_ORIGIN_REFERER_VALUE if not obj[SPAM_KEY] and cache_event: obj[SPAM_KEY] = cache_event if not obj[SPAM_KEY]: obj[SPAM_KEY] = UNKNOWN_SPAM_VALUE if cache_uid and obj[USER_ID_KEY] != cache_uid: obj[USER_ID_KEY] = cache_uid for row in iter_x: obj = row.asDict() key_value = obj.get(key_name, '').strip() print( '= = ' * 10, '[myapp EnhanceApiDeviceInfoProcessor.process.fun_deviceinfo_in_rdd_mapPartitions] found key_value =', key_value, ', obj = ', obj) # 约定 key_value 唯一,为空 if str(key_value) == '': # 没有关联信息处理: web端日志 uuid 为空,不需要关联 mysql.api_deviceinfo 更新 origin_referer, spam # 判断是否更新 proc_update(obj) else: # 先检查内存缓存,再检查broadcast,最后检查外部缓存 if key_value in cache_pool and isinstance( cache_pool[key_value], dict): key_cache = cache_pool[key_value] # 内存缓存中命中 proc_update(obj, key_cache[CHANNEL_KEY], key_cache[EVENT_KEY], key_cache[UID_KEY]) elif broadcast_enabled and key_value in cache_conf.get( 'broadcast').value: # broadcast 内存命中 key_cache = cache_conf.get('broadcast').value[key_value] print( '= = ' * 10, '[myapp EnhanceApiDeviceInfoProcessor.process.fun_deviceinfo_in_rdd_mapPartitions] found ' 'broadcast_cache = ', key_cache) proc_update(obj, key_cache[CHANNEL_KEY], key_cache[EVENT_KEY], key_cache[UID_KEY]) else: # 如果内存中没有缓存,查询外部缓存 external_cache = MySQLUtils.query(conn=conn, sql_text=query_sql, key_name=key_name, sql_args=(key_value, )) print( '= = ' * 10, '[myapp EnhanceApiDeviceInfoProcessor.process.fun_deviceinfo_in_rdd_mapPartitions] found ' 'external_cache = ', external_cache) if len(external_cache) == 0: # 外部缓存也没有关联信息时处理: 根据日志中的 origin_referer, spam 字段信息,更新 origin_referer, spam proc_update(obj) else: # 查到关联信息后处理,先根据关联信息更新字段,再更新 origin_referer, spam key_cache = external_cache[key_value] print( '= = ' * 10, '[myapp EnhanceApiDeviceInfoProcessor.process.fun_deviceinfo_in_rdd_mapPartitions] found ' 'mysql_cache = ', key_cache) proc_update(obj, key_cache[CHANNEL_KEY], key_cache[EVENT_KEY], key_cache[UID_KEY]) # 更新内存缓存 for k, v in external_cache.iteritems(): cache_pool[k] = v external_cache.clear() yield Row(**obj)
def fun_userinfo_in_rdd_mapPartitions(iter_x, step_conf, cache_conf): """ 输入: iter[Row] 输出: iter[Row] """ from datetime import datetime, timedelta DATA_JOIN_KEY = 'date_joined' host = cache_conf['host'] port = cache_conf.get('port', 3306) db = cache_conf['db'] user = cache_conf['user'] password = cache_conf.get('password', '') table_name = cache_conf['tableName'] key_name = cache_conf['keyName'] # course_id cache_key_name_list = cache_conf['cache.keyName.list'] cache_sql = 'select ' + key_name + ', ' + cache_key_name_list + ' from ' + db + '.' + table_name query_sql = cache_sql + ' where ' + key_name + ' = %s' # TODO: 支持批量查询 # batch_query_sql = cache_sql + ' where ' + key_name + " in (?)" # TODO: 支持 broadcast 应用启动时cache一份外部缓存 broadcast_enabled = cache_conf.get('broadcast.enabled', False) \ if isinstance(cache_conf.get('broadcast.enabled'), bool) else False # 增量动态缓存 cache_id = 'ds_id_' + str( cache_conf['source.id']) + '#cache_id_' + str( cache_conf['cache.id']) cache_pool = MySQLUtils.cache_pools.setdefault(cache_id, {}) conn = MySQLUtils.get_connection(host=host, db=db, user=user, password=password, port=port) # 更新属性 def proc_update(obj, date_joined=None): obj[DATA_JOIN_KEY] = date_joined if date_joined else '' for row in iter_x: obj = row.asDict() key_value = obj.get(key_name, '').strip() # course_id # 约定 key_value 唯一,为空 if str(key_value) == '': # 没有关联信息处理: web端日志 uuid 为空,不需要关联 mysql.api_deviceinfo 更新 origin_referer, spam # 判断是否更新 proc_update(obj) else: if key_value in cache_pool: key_cache = cache_pool[key_value] # 内存缓存中命中 proc_update(obj, key_cache[DATA_JOIN_KEY]) elif broadcast_enabled and key_value in cache_conf.get( 'broadcast').value: # broadcast 内存命中 key_cache = cache_conf.get('broadcast').value[key_value] print( '= = ' * 10, '[myapp EnhanceUserInfoProcessor.process.fun_userinfo_in_rdd_mapPartitions] found ' 'broadcast_cache = ', key_cache) proc_update(obj, key_cache[DATA_JOIN_KEY]) else: # 如果内存中没有缓存,查询外部缓存 external_cache = MySQLUtils.query(conn=conn, sql_text=query_sql, key_name=key_name, sql_args=(key_value, )) if len(external_cache) == 0: # 外部缓存也没有关联信息时处理: 设置默认值 proc_update(obj) else: # 查到关联信息后处理,先根据关联信息更新字段,再更新课程信息 key_cache = external_cache[key_value] proc_update(obj, key_cache[DATA_JOIN_KEY]) # 更新内存缓存 for k, v in external_cache.iteritems(): cache_pool[k] = v external_cache.clear() yield Row(**obj)
def fun_userinfo_in_rdd_mapPartitions(iter_x, step_conf, cache_conf): """ 输入: iter[Row] 输出: iter[Row] """ from datetime import datetime, timedelta DATA_JOIN_KEY = 'date_joined' host = cache_conf['host'] port = cache_conf.get('port', 3306) db = cache_conf['db'] user = cache_conf['user'] password = cache_conf.get('password', '') table_name = cache_conf['tableName'] key_name = cache_conf['keyName'] # course_id cache_key_name_list = cache_conf['cache.keyName.list'] cache_sql = 'select ' + key_name + ', ' + cache_key_name_list + ' from ' + db + '.' + table_name query_sql = cache_sql + ' where ' + key_name + ' = %s' # TODO: 支持批量查询 # batch_query_sql = cache_sql + ' where ' + key_name + " in (?)" # TODO: 支持 broadcast 应用启动时cache一份外部缓存 broadcast_enabled = cache_conf.get('broadcast.enabled', False) \ if isinstance(cache_conf.get('broadcast.enabled'), bool) else False # 增量动态缓存 cache_id = 'ds_id_' + str(cache_conf['source.id']) + '#cache_id_' + str(cache_conf['cache.id']) cache_pool = MySQLUtils.cache_pools.setdefault(cache_id, {}) conn = MySQLUtils.get_connection(host=host, db=db, user=user, password=password, port=port) # 更新属性 def proc_update(obj, date_joined=None): obj[DATA_JOIN_KEY] = date_joined if date_joined else '' for row in iter_x: obj = row.asDict() key_value = obj.get(key_name, '').strip() # course_id # 约定 key_value 唯一,为空 if str(key_value) == '': # 没有关联信息处理: web端日志 uuid 为空,不需要关联 mysql.api_deviceinfo 更新 origin_referer, spam # 判断是否更新 proc_update(obj) else: if key_value in cache_pool: key_cache = cache_pool[key_value] # 内存缓存中命中 proc_update(obj, key_cache[DATA_JOIN_KEY]) elif broadcast_enabled and key_value in cache_conf.get('broadcast').value: # broadcast 内存命中 key_cache = cache_conf.get('broadcast').value[key_value] print('= = ' * 10, '[myapp EnhanceUserInfoProcessor.process.fun_userinfo_in_rdd_mapPartitions] found ' 'broadcast_cache = ', key_cache) proc_update(obj, key_cache[DATA_JOIN_KEY]) else: # 如果内存中没有缓存,查询外部缓存 external_cache = MySQLUtils.query( conn=conn, sql_text=query_sql, key_name=key_name, sql_args=(key_value,)) if len(external_cache) == 0: # 外部缓存也没有关联信息时处理: 设置默认值 proc_update(obj) else: # 查到关联信息后处理,先根据关联信息更新字段,再更新课程信息 key_cache = external_cache[key_value] proc_update(obj, key_cache[DATA_JOIN_KEY]) # 更新内存缓存 for k, v in external_cache.iteritems(): cache_pool[k] = v external_cache.clear() yield Row(**obj)
def fun_deviceinfo_in_rdd_mapPartitions(iter_x, step_conf, cache_conf): """ 输入: iter[Row] 输出: iter[Row] """ import re domain_pattern = re.compile('https?://(.*?)/.*') ORIGIN_REFERER_KEY = 'origin_referer' CHANNEL_KEY = 'channel' UNKNOWN_ORIGIN_REFERER_VALUE = 'unknown' USER_ID_KEY = 'user_id' UID_KEY = 'uid' SPAM_KEY = 'spam' EVENT_KEY = 'event' UNKNOWN_SPAM_VALUE = 'unknown' host = cache_conf['host'] port = cache_conf.get('port', 3306) db = cache_conf['db'] user = cache_conf['user'] password = cache_conf.get('password', '') table_name = cache_conf['tableName'] key_name = cache_conf['keyName'] cache_key_name_list = cache_conf['cache.keyName.list'] cache_sql = 'select ' + key_name + ', ' + cache_key_name_list + ' from ' + db + '.' + table_name query_sql = cache_sql + ' where ' + key_name + ' = %s' # TODO: 支持批量查询 batch_query_sql = cache_sql + ' where ' + key_name + " in (?)" # TODO: 支持 broadcast 应用启动时cache一份外部缓存 broadcast_enabled = cache_conf.get('broadcast.enabled', False) \ if isinstance(cache_conf.get('broadcast.enabled'), bool) else False # 增量动态缓存 cache_id = 'ds_id_' + str(cache_conf['source.id']) + '#cache_id_' + str(cache_conf['cache.id']) cache_pool = MySQLUtils.cache_pools.setdefault(cache_id, {}) conn = MySQLUtils.get_connection(host=host, db=db, user=user, password=password, port=port) # 更新属性 # 1 更新 origin_referer 规则:如果日志中 origin_referer 为空(null or “”)且设备信息channel字段不为空, # 取设备信息的channel字段值,其他情况取日志中 origin_referer 字段 # Note: origin_referer 暂取 domain # 2 更新 spam 规则:如果用户日志中 spam 为空(null or ””)且设备信息event字段不为空, # 取设备信息的event,其他情况取用户日志中 spam 的值 # 3 更新 user_id 规则: 如果关联到缓存uid,更新;日志中的 user_id 可能不是有效的 def proc_update(obj, cache_channel=None, cache_event=None, cache_uid=None): if not obj[ORIGIN_REFERER_KEY] and cache_channel: obj[ORIGIN_REFERER_KEY] = cache_channel if obj[ORIGIN_REFERER_KEY]: match_result = re.match(domain_pattern, obj[ORIGIN_REFERER_KEY]) if match_result: obj[ORIGIN_REFERER_KEY] = match_result.group(1) if not obj[ORIGIN_REFERER_KEY]: obj[ORIGIN_REFERER_KEY] = UNKNOWN_ORIGIN_REFERER_VALUE if not obj[SPAM_KEY] and cache_event: obj[SPAM_KEY] = cache_event if not obj[SPAM_KEY]: obj[SPAM_KEY] = UNKNOWN_SPAM_VALUE if cache_uid and obj[USER_ID_KEY] != cache_uid: obj[USER_ID_KEY] = cache_uid for row in iter_x: obj = row.asDict() key_value = obj.get(key_name, '').strip() print('= = ' * 10, '[myapp EnhanceApiDeviceInfoProcessor.process.fun_deviceinfo_in_rdd_mapPartitions] found key_value =', key_value, ', obj = ', obj) # 约定 key_value 唯一,为空 if str(key_value) == '': # 没有关联信息处理: web端日志 uuid 为空,不需要关联 mysql.api_deviceinfo 更新 origin_referer, spam # 判断是否更新 proc_update(obj) else: # 先检查内存缓存,再检查broadcast,最后检查外部缓存 if key_value in cache_pool and isinstance(cache_pool[key_value], dict): key_cache = cache_pool[key_value] # 内存缓存中命中 proc_update(obj, key_cache[CHANNEL_KEY], key_cache[EVENT_KEY], key_cache[UID_KEY]) elif broadcast_enabled and key_value in cache_conf.get('broadcast').value: # broadcast 内存命中 key_cache = cache_conf.get('broadcast').value[key_value] print('= = ' * 10, '[myapp EnhanceApiDeviceInfoProcessor.process.fun_deviceinfo_in_rdd_mapPartitions] found ' 'broadcast_cache = ', key_cache) proc_update(obj, key_cache[CHANNEL_KEY], key_cache[EVENT_KEY], key_cache[UID_KEY]) else: # 如果内存中没有缓存,查询外部缓存 external_cache = MySQLUtils.query( conn=conn, sql_text=query_sql, key_name=key_name, sql_args=(key_value,)) print('= = ' * 10, '[myapp EnhanceApiDeviceInfoProcessor.process.fun_deviceinfo_in_rdd_mapPartitions] found ' 'external_cache = ', external_cache) if len(external_cache) == 0: # 外部缓存也没有关联信息时处理: 根据日志中的 origin_referer, spam 字段信息,更新 origin_referer, spam proc_update(obj) else: # 查到关联信息后处理,先根据关联信息更新字段,再更新 origin_referer, spam key_cache = external_cache[key_value] print('= = ' * 10, '[myapp EnhanceApiDeviceInfoProcessor.process.fun_deviceinfo_in_rdd_mapPartitions] found ' 'mysql_cache = ', key_cache) proc_update(obj, key_cache[CHANNEL_KEY], key_cache[EVENT_KEY], key_cache[UID_KEY]) # 更新内存缓存 for k, v in external_cache.iteritems(): cache_pool[k] = v external_cache.clear() yield Row(**obj)
def fun_courseinfo_in_rdd_mapPartitions(iter_x, step_conf, cache_conf): """ 输入: iter[Row] 输出: iter[Row] """ from datetime import datetime, timedelta COURSE_TYPE_KEY = 'course_type' COURSE_OWNER_KEY = 'owner' COURSE_STATUS_KEY = 'status' COURSE_START_KEY = 'start' COURSE_END_KEY = 'end' COURSE_PROCESS_KEY = 'course_process' host = cache_conf['host'] port = cache_conf.get('port', 3306) db = cache_conf['db'] user = cache_conf['user'] password = cache_conf.get('password', '') table_name = cache_conf['tableName'] key_name = cache_conf['keyName'] # course_id cache_key_name_list = cache_conf['cache.keyName.list'] cache_sql = 'select ' + key_name + ', ' + cache_key_name_list + ' from ' + db + '.' + table_name print('= = ' * 10, '[myapp EnhanceCourseInfoProcessor.process.fun_courseinfo_in_rdd_mapPartitions] cache_sql = ' + cache_sql) query_sql = cache_sql + ' where ' + key_name + ' = %s' # TODO: 支持批量查询 batch_query_sql = cache_sql + ' where ' + key_name + " in (?)" # TODO: 支持 broadcast 应用启动时cache一份外部缓存 broadcast_enabled = cache_conf.get('broadcast.enabled', False) \ if isinstance(cache_conf.get('broadcast.enabled'), bool) else False # 增量动态缓存 cache_id = 'ds_id_' + str(cache_conf['source.id']) + '#cache_id_' + str(cache_conf['cache.id']) cache_pool = MySQLUtils.cache_pools.setdefault(cache_id, {}) conn = MySQLUtils.get_connection(host=host, db=db, user=user, password=password, port=port) # def check_process(course_id, course_type, status, start, end, et, course_map): # """ # 离线计算使用 # """ # if course_type == "0": # return -1 if (start is None or start > et or status == "-1") else (1 if (end < et or course_map.has_key(course_id)) else 0) # if course_type == "1": # return 1 if status == "-1" else 0 # return -1 def check_process(course_type, course_status, course_start, course_end, check_date): """ 实时计算使用 et: 取处理记录时的 CST 时间 course_map: 不用判断 """ if course_type == "0": return -1 \ if (course_start is None or course_start > check_date or course_status == "-1") \ else (1 if (course_end < check_date) else 0) elif course_type == "1": return 1 if course_status == "-1" else 0 else: return -1 # 更新属性 def proc_update( obj, course_type=None, course_owner=None, course_status=None, course_start=None, course_end=None, check_date=(datetime.utcnow() + timedelta(hours=8)).strftime('%Y-%m-%d %H:%M:%S')): obj[COURSE_TYPE_KEY] = course_type obj[COURSE_OWNER_KEY] = course_owner obj[COURSE_STATUS_KEY] = course_status obj[COURSE_START_KEY] = course_start obj[COURSE_END_KEY] = course_end course_process = check_process(course_type, course_status, course_start, course_end, check_date) obj[COURSE_PROCESS_KEY] = course_process for row in iter_x: obj = row.asDict() key_value = obj.get(key_name, '').strip() # course_id # 约定 key_value 唯一,为空 if str(key_value) == '': # 没有关联信息处理: web端日志 uuid 为空,不需要关联 mysql.api_deviceinfo 更新 origin_referer, spam # 判断是否更新 proc_update(obj) else: if key_value in cache_pool: key_cache = cache_pool[key_value] # 内存缓存中命中 proc_update(obj, key_cache[COURSE_TYPE_KEY], key_cache[COURSE_OWNER_KEY], key_cache[COURSE_STATUS_KEY], key_cache[COURSE_START_KEY], key_cache[COURSE_END_KEY]) elif broadcast_enabled and key_value in cache_conf.get('broadcast').value: # broadcast 内存命中 key_cache = cache_conf.get('broadcast').value[key_value] print('= = ' * 10, '[myapp EnhanceCourseInfoProcessor.process.fun_courseinfo_in_rdd_mapPartitions] found ' 'broadcast_cache = ', key_cache) proc_update(obj, key_cache[COURSE_TYPE_KEY], key_cache[COURSE_OWNER_KEY], key_cache[COURSE_STATUS_KEY], key_cache[COURSE_START_KEY], key_cache[COURSE_END_KEY]) else: # 如果内存中没有缓存,查询外部缓存 external_cache = MySQLUtils.query( conn=conn, sql_text=query_sql, key_name=key_name, sql_args=(key_value,)) if len(external_cache) == 0: # 外部缓存也没有关联信息时处理: 设置默认值 proc_update(obj) else: # 查到关联信息后处理,先根据关联信息更新字段,再更新课程信息 key_cache = external_cache[key_value] proc_update(obj, key_cache[COURSE_TYPE_KEY], key_cache[COURSE_OWNER_KEY], key_cache[COURSE_STATUS_KEY], key_cache[COURSE_START_KEY], key_cache[COURSE_END_KEY]) # 更新内存缓存 for k, v in external_cache.iteritems(): cache_pool[k] = v external_cache.clear() yield Row(**obj)
def initialize(self): self.mysql_client = MySQLUtils()
def initialize(self): self.mysql_client = MySQLUtils() self.fdfs_client = FDFSUtils() self.res_status = dict()
def fun_courseinfo_in_rdd_mapPartitions(iter_x, step_conf, cache_conf): """ 输入: iter[Row] 输出: iter[Row] """ from datetime import datetime, timedelta COURSE_TYPE_KEY = 'course_type' COURSE_OWNER_KEY = 'owner' COURSE_STATUS_KEY = 'status' COURSE_START_KEY = 'start' COURSE_END_KEY = 'end' COURSE_PROCESS_KEY = 'course_process' host = cache_conf['host'] port = cache_conf.get('port', 3306) db = cache_conf['db'] user = cache_conf['user'] password = cache_conf.get('password', '') table_name = cache_conf['tableName'] key_name = cache_conf['keyName'] # course_id cache_key_name_list = cache_conf['cache.keyName.list'] cache_sql = 'select ' + key_name + ', ' + cache_key_name_list + ' from ' + db + '.' + table_name print( '= = ' * 10, '[myapp EnhanceCourseInfoProcessor.process.fun_courseinfo_in_rdd_mapPartitions] cache_sql = ' + cache_sql) query_sql = cache_sql + ' where ' + key_name + ' = %s' # TODO: 支持批量查询 batch_query_sql = cache_sql + ' where ' + key_name + " in (?)" # TODO: 支持 broadcast 应用启动时cache一份外部缓存 broadcast_enabled = cache_conf.get('broadcast.enabled', False) \ if isinstance(cache_conf.get('broadcast.enabled'), bool) else False # 增量动态缓存 cache_id = 'ds_id_' + str( cache_conf['source.id']) + '#cache_id_' + str( cache_conf['cache.id']) cache_pool = MySQLUtils.cache_pools.setdefault(cache_id, {}) conn = MySQLUtils.get_connection(host=host, db=db, user=user, password=password, port=port) # def check_process(course_id, course_type, status, start, end, et, course_map): # """ # 离线计算使用 # """ # if course_type == "0": # return -1 if (start is None or start > et or status == "-1") else (1 if (end < et or course_map.has_key(course_id)) else 0) # if course_type == "1": # return 1 if status == "-1" else 0 # return -1 def check_process(course_type, course_status, course_start, course_end, check_date): """ 实时计算使用 et: 取处理记录时的 CST 时间 course_map: 不用判断 """ if course_type == "0": return -1 \ if (course_start is None or course_start > check_date or course_status == "-1") \ else (1 if (course_end < check_date) else 0) elif course_type == "1": return 1 if course_status == "-1" else 0 else: return -1 # 更新属性 def proc_update( obj, course_type=None, course_owner=None, course_status=None, course_start=None, course_end=None, check_date=(datetime.utcnow() + timedelta(hours=8)).strftime('%Y-%m-%d %H:%M:%S')): obj[COURSE_TYPE_KEY] = course_type obj[COURSE_OWNER_KEY] = course_owner obj[COURSE_STATUS_KEY] = course_status obj[COURSE_START_KEY] = course_start obj[COURSE_END_KEY] = course_end course_process = check_process(course_type, course_status, course_start, course_end, check_date) obj[COURSE_PROCESS_KEY] = course_process for row in iter_x: obj = row.asDict() key_value = obj.get(key_name, '').strip() # course_id # 约定 key_value 唯一,为空 if str(key_value) == '': # 没有关联信息处理: web端日志 uuid 为空,不需要关联 mysql.api_deviceinfo 更新 origin_referer, spam # 判断是否更新 proc_update(obj) else: if key_value in cache_pool: key_cache = cache_pool[key_value] # 内存缓存中命中 proc_update(obj, key_cache[COURSE_TYPE_KEY], key_cache[COURSE_OWNER_KEY], key_cache[COURSE_STATUS_KEY], key_cache[COURSE_START_KEY], key_cache[COURSE_END_KEY]) elif broadcast_enabled and key_value in cache_conf.get( 'broadcast').value: # broadcast 内存命中 key_cache = cache_conf.get('broadcast').value[key_value] print( '= = ' * 10, '[myapp EnhanceCourseInfoProcessor.process.fun_courseinfo_in_rdd_mapPartitions] found ' 'broadcast_cache = ', key_cache) proc_update(obj, key_cache[COURSE_TYPE_KEY], key_cache[COURSE_OWNER_KEY], key_cache[COURSE_STATUS_KEY], key_cache[COURSE_START_KEY], key_cache[COURSE_END_KEY]) else: # 如果内存中没有缓存,查询外部缓存 external_cache = MySQLUtils.query(conn=conn, sql_text=query_sql, key_name=key_name, sql_args=(key_value, )) if len(external_cache) == 0: # 外部缓存也没有关联信息时处理: 设置默认值 proc_update(obj) else: # 查到关联信息后处理,先根据关联信息更新字段,再更新课程信息 key_cache = external_cache[key_value] proc_update(obj, key_cache[COURSE_TYPE_KEY], key_cache[COURSE_OWNER_KEY], key_cache[COURSE_STATUS_KEY], key_cache[COURSE_START_KEY], key_cache[COURSE_END_KEY]) # 更新内存缓存 for k, v in external_cache.iteritems(): cache_pool[k] = v external_cache.clear() yield Row(**obj)
class UploadHandlerV1(tornado.web.RequestHandler): """ Tornado文件上传类,使用了 stream_request_body 支持大文件上传 """ def initialize(self): """ 第一步执行,初始化操作 :return: """ self.set_header('Content-type', 'application/json') self.upload_dir = FDFS_CONFIG.get('tmp_path') if not os.path.exists(self.upload_dir): os.mkdir(self.upload_dir) self.fdfs_client = FDFSUtils() self.mysql_client = MySQLUtils() self.file_tmp_path = None self.res_status = dict() self.file_info = dict() @tornado.gen.coroutine def prepare(self): """ 第二步执行,读取请求头 :return: """ try: total = int(self.request.headers.get("Content-Length", "0")) except: total = 0 self.ps = PostDataStreamer(total, self.upload_dir) @tornado.gen.coroutine def data_received(self, chunk): """ 第三步执行,写文件 :param chunk: 文件内容 :return: """ self.ps.receive(chunk) def fdfs_index(self, file_name, domain_name, replace=False): """ 上传文件到fdfs,插入索引信息到mysql :return: 返回 0 , 正常上传并写入索引信息 返回 1 , mysql相关错误 返回 2 , fdfs相关错误 返回 3 , 其他错误 """ try: exist_status, exist_info = self.mysql_client.raw_sql_fdfs_exist(file_name, domain_name) if exist_status == 0: # 已经存在,决定是否覆盖 if replace: # 覆盖 fdfs_up_status, fdfs_up_info = self.fdfs_client.upload_file(self.file_tmp_path) if fdfs_up_status: file_group, file_local_path = fdfs_up_info.split('/', 1) self.file_info['file_group'] = file_group self.file_info['file_local_path'] = file_local_path mysql_up_status, mysql_up_info = self.mysql_client.fdfs_update(self.file_info, file_name, domain_name) if mysql_up_status == 0: if exist_info.get('file_group', '') == '' or exist_info.get('file_local_path', '') == '': pass else: delete_status, delete_result = self.fdfs_client.delete_file( exist_info.get('file_group', ''), exist_info.get('file_local_path', '')) if delete_status: pass else: raise FdfsExcept("{res}:{group}/{path}".format(res=delete_result, group=self.file_info['file_group'], path=self.file_info['file_local_path'])) else: raise MysqlExcept(mysql_up_info) else: # 上传失败 raise FdfsExcept(fdfs_up_info) else: pass elif exist_status == 1: # 不存在,上传新文件 # insert 半条数据 self.file_info['file_group'] = '' self.file_info['file_local_path'] = '' mysql_insert_status, mysql_insert_info = self.mysql_client.fdfs_insert(self.file_info) if mysql_insert_status: fdfs_up_status, fdfs_up_info = self.fdfs_client.upload_file(self.file_tmp_path) if fdfs_up_status: file_group, file_local_path = fdfs_up_info.split('/', 1) self.file_info['file_group'] = file_group self.file_info['file_local_path'] = file_local_path # mysql_status, mysql_info = self.mysql_client.fdfs_insert(self.file_info) mysql_up_status, mysql_up_info = self.mysql_client.fdfs_update_id(self.file_info, file_name, self.file_info['domain_id']) if mysql_up_status: pass else: raise MysqlExcept("update-%s" % mysql_up_info) else: raise FdfsExcept(str(fdfs_up_info)) else: raise MysqlExcept("insert-%s" % mysql_insert_info) else: raise MysqlExcept("exist-%s" % exist_info) """ elif exist_status == 1: # 不存在,上传新文件 fdfs_up_status, fdfs_up_info = self.fdfs_client.upload_file(self.file_tmp_path) if fdfs_up_status: file_group, file_local_path = fdfs_up_info.split('/', 1) self.file_info['file_group'] = file_group self.file_info['file_local_path'] = file_local_path mysql_insert_status, mysql_insert_info = self.mysql_client.fdfs_insert(self.file_info) if mysql_status: pass else: raise MysqlExcept("insert-%s" % mysql_insert_info) else: raise FdfsExcept(str(fdfs_up_info)) else: raise MysqlExcept("exist-%s" % exist_info) """ except MysqlExcept as error: return 1, str(error.msg) except FdfsExcept as error: return 2, str(error.msg) except Exception as error: return 3, str(error) return 0, None @tornado.gen.coroutine @tornado.web.asynchronous def post(self, *args, **kwargs): """ 第四步执行,获取文件信息,上传写数据库,销毁文件 :param args: :param kwargs: :return: """ domain = self.get_argument('domain', default='test', strip=True) file_name = self.get_argument('filename', default=None, strip=True) hash_flag = self.get_argument('hash', default='false', strip=True) replace = self.get_argument('replace', default='false', strip=True) replace_flag = False # redis = self.get_argument('redis', default='false', strip=True) try: self.ps.finish_receive() # 获取文件信息 for idx, part in enumerate(self.ps.parts): self.file_info['file_size'] = part.get('size', 0) self.file_tmp_path = part.get("tmpfile").name if hash_flag == 'true': md5, crc32 = HashUtils(self.file_tmp_path).file_hash() self.file_info['file_md5'] = md5 self.file_info['file_crc32'] = crc32 else: self.file_info['file_md5'] = "" self.file_info['file_crc32'] = "" for header in part["headers"]: params = header.get("params", None) if params: if file_name: self.file_info['file_name'] = file_name else: self.file_info['file_name'] = params.get("filename", "") domain_exist_stat, domain_exist_info = self.mysql_client.domain_id_exist(domain) if domain_exist_stat == 0: domain_id = domain_exist_info.get('id') self.file_info['domain_id'] = domain_id # 上传文件,写入索引 if replace == 'true': replace_flag = True fdfs_index_status, fdfs_index_info = self.fdfs_index(self.file_info['file_name'], domain, replace=replace_flag) if fdfs_index_status == 0: logging.info("file: %s, domain: %s ,fdfs upload, index insert success" % (self.file_info['file_name'], domain)) self.res_status['status'], self.res_status['result'] = 0, self.file_info['file_name'] else: logging.error("file: %s, domain: %s , error: %s-%s" % (self.file_info['file_name'], domain, str(fdfs_index_status), fdfs_index_info)) self.res_status['status'], self.res_status['result'] = fdfs_index_status, fdfs_index_info elif domain_exist_stat == 1: self.res_status['status'], self.res_status['result'] = 6, "Domain not exist" else: logging.error("file: %s, domain: %s , error: %s" % (self.file_info['file_name'], domain, domain_exist_info)) self.res_status['status'], self.res_status['result'] = 5, domain_exist_info except Exception as error: logging.error(str(error)) self.res_status['status'], self.res_status['result'] = 4, str(error) finally: self.mysql_client.close_connetc() self.file_info.clear() self.ps.release_parts() # 删除处理 self.write(json.dumps(self.res_status)) self.finish()
def initialize(self): self.set_header('Content-type', 'application/json') self.mysql_client = MySQLUtils() self.res_status = dict()