def do_insert(msg): try: data = json.loads(msg.value.decode('utf-8'))['data'] offset = msg.offset print(offset) # print(data['project']) insert_data(project=data['project'], data_decode=data['data_decode'], User_Agent=data['User_Agent'], Host=data['Host'], Connection=data['Connection'], Pragma=data['Pragma'], Cache_Control=data['Cache_Control'], Accept=data['Accept'], Accept_Encoding=data['Accept_Encoding'], Accept_Language=data['Accept_Language'], ip=data['ip'], ip_city=data['ip_city'], ip_asn=data['ip_asn'], url=data['url'], referrer=data['referrer'], remark=data['remark'], ua_platform=data['ua_platform'], ua_browser=data['ua_browser'], ua_version=data['ua_version'], ua_language=data['ua_language'], ip_is_good=data['ip_is_good'], ip_asn_is_good=data['ip_asn_is_good'], created_at=data['created_at'], updated_at=data['updated_at'], use_kafka=False) except Exception: error = traceback.format_exc() write_to_log(filename='kafka_consumer', defname='do_insert', result=error)
def realtime_subscribe(broker): consumer = KafkaConsumer('event_topic', bootstrap_servers=[broker]) for message in consumer: data = json.loads(message.value.decode('utf-8')) # pprint.pprint(message.value.decode('utf-8')) json_data = json.dumps(data) # pprint.pprint(json_data) remark = '' project = data['project'] ua_platform = data['properties']['$os'] if '$os' in data[ 'properties'] else '' #客户端操作系统 ua_browser = data['properties']['$browser'] if '$browser' in data[ 'properties'] else '' #客户端的浏览器 ua_version = data['properties'][ '$browser_version'] if '$browser_version' in data[ 'properties'] else '' #客户端浏览器的版本 ip = data['properties']['$ip'] if '$ip' in data['properties'] else '' ip_city, ip_is_good = get_addr(ip) ip_asn, ip_asn_is_good = get_asn(ip) if ip_is_good == 0: ip_city = '{}' if ip_asn_is_good == 0: ip_asn = '{}' referrer = data['properties'][ '$latest_referrer'] if '$latest_referrer' in data[ 'properties'] else '' insert_data(project=project, data_decode=data, User_Agent='', Host='', Connection='', Pragma='', Cache_Control='', Accept='', Accept_Encoding='', Accept_Language='', ip=ip, ip_city=ip_city, ip_asn=ip_asn, url='', referrer=referrer, remark=remark, ua_platform=ua_platform, ua_browser=ua_browser, ua_version=ua_version, ua_language='', ip_is_good=ip_is_good, ip_asn_is_good=ip_asn_is_good, use_kafka=admin.use_kafka)
def upload_events_from_pickle_to_sql(project='tvcbook', remark='production'): #所有的文件 filelist = [] dirpath = os.path.join('data_export', project, remark, 'events') for maindir, subdir, file_name_list in os.walk(dirpath): # print("1:",maindir) #当前主目录 # print("2:",subdir) #当前主目录下的所有目录 # print("3:",str(file_name_list)) #当前主目录下的所有文件 # file_name_list.sort() # subdir.sort() for filename in file_name_list: apath = os.path.join(maindir, filename) #合并成一个完整路径 filelist.append(apath) # print(file_name_list) filelist.sort() # print(filelist) for pkl in filelist: # print(pkl) with open(pkl, "rb") as f2: results = pickle._loads(f2.read()) # p = multiprocessing.Pool(processes = 3) for item in results: # # print(item) try: itemdict = json.loads(item) all_json = { "properties": itemdict, "distinct_id": itemdict["distinct_id"], "event": itemdict["event"], "type": "track" } # first_id = itemdict['first_id'] if 'first_id' in itemdict else None # second_id = itemdict['second_id'] if 'second_id' in itemdict else None # unionid = itemdict['unionid'] if 'unionid' in itemdict else None # id = itemdict['id'] if 'id' in itemdict else None ip_city, ip_is_good = get_addr(itemdict["$ip"]) ip_asn, ip_asn_is_good = get_asn(itemdict["$ip"]) if ip_is_good == 0: ip_city = '{}' if ip_asn_is_good == 0: ip_asn = '{}' print(all_json) created_at = time.mktime( time.strptime(itemdict["time"].split('.')[0], '%Y-%m-%d %H:%M:%S')) # all_json = json.dumps(itemdict,ensure_ascii=False) insert_data(project='tvcbook', data_decode=all_json, User_Agent=None, Host=None, Connection=None, Pragma=None, Cache_Control=None, Accept=None, Accept_Encoding=None, Accept_Language=None, ip=itemdict["$ip"] if "$ip" in itemdict else None, ip_city=ip_city, ip_asn=ip_asn, url=None, referrer=itemdict["$referrer"] if "$referrer" in itemdict else None, remark=remark, ua_platform=itemdict["$lib"] if "$lib" in itemdict else None, ua_browser=itemdict["$browser"] if "$browser" in itemdict else None, ua_version=itemdict["$browser_version"] if "$browser_version" in itemdict else None, ua_language=None, ip_is_good=ip_is_good, ip_asn_is_good=ip_asn_is_good, created_at=created_at) except Exception: error = traceback.format_exc() write_to_log(filename='import_from_sa', defname='upload_events_from_pickle_to_sql', result=error) # f2.close() # p.apply_async(func=insert_data,kwds={ # "project":"tvcbook", # "data_decode":all_json, # "User_Agent":None, # "Host":None, # "Connection":None, # "Pragma":None, # "Cache_Control":None, # "Accept":None, # "Accept_Encoding":None, # "Accept_Language":None, # "ip":itemdict["$ip"] if "$ip" in itemdict else None, # "ip_city":ip_city, # "ip_asn":ip_asn, # "url":None, # "referrer":itemdict["$referrer"] if "$referrer" in itemdict else None, # "remark":'production', # "ua_platform":itemdict["$lib"] if "$lib" in itemdict else None, # "ua_browser":itemdict["$browser"] if "$browser" in itemdict else None, # "ua_version":itemdict["$browser_version"] if "$browser_version" in itemdict else None, # "ua_language":None, # "ip_is_good":ip_is_good, # "ip_asn_is_good":ip_asn_is_good, # "created_at":created_at}) # # insert_data # p.close() # p.join() os.remove(pkl)
def do_insert(msg): try: group = json.loads( msg.value.decode('utf-8'))['group'] if "group" in json.loads( msg.value.decode('utf-8')) else None data = json.loads(msg.value.decode('utf-8'))['data'] offset = msg.offset print(offset) if group == 'event_track': # print(data['project']) insert_data(project=data['project'], data_decode=data['data_decode'], User_Agent=data['User_Agent'], Host=data['Host'], Connection=data['Connection'], Pragma=data['Pragma'], Cache_Control=data['Cache_Control'], Accept=data['Accept'], Accept_Encoding=data['Accept_Encoding'], Accept_Language=data['Accept_Language'], ip=data['ip'], ip_city=data['ip_city'], ip_asn=data['ip_asn'], url=data['url'], referrer=data['referrer'], remark=data['remark'], ua_platform=data['ua_platform'], ua_browser=data['ua_browser'], ua_version=data['ua_version'], ua_language=data['ua_language'], ip_is_good=data['ip_is_good'], ip_asn_is_good=data['ip_asn_is_good'], created_at=data['created_at'], updated_at=data['updated_at'], use_kafka=False) elif group == 'installation_track': insert_installation_track(project=data['project'], data_decode=data['data_decode'], User_Agent=data['User_Agent'], Host=data['Host'], Connection=data['Connection'], Pragma=data['Pragma'], Cache_Control=data['Cache_Control'], Accept=data['Accept'], Accept_Encoding=data['Accept_Encoding'], Accept_Language=data['Accept_Language'], ip=data['ip'], ip_city=data['ip_city'], ip_asn=data['ip_asn'], url=data['url'], referrer=data['referrer'], remark=data['remark'], ua_platform=data['ua_platform'], ua_browser=data['ua_browser'], ua_version=data['ua_version'], ua_language=data['ua_language'], ip_is_good=data['ip_is_good'], ip_asn_is_good=data['ip_asn_is_good'], created_at=data['created_at'], updated_at=data['updated_at'], use_kafka=False) elif group == 'shortcut_history': insert_shortcut_history(short_url=data['short_url'], result=data['status'], cost_time=data['time2'], ip=data['ip'], user_agent=data['user_agent'], accept_language=data['accept_language'], ua_platform=data['ua_platform'], ua_browser=data['ua_browser'], ua_version=data['ua_version'], ua_language=data['ua_language'], created_at=data['created_at']) elif group == 'shortcut_read': insert_shortcut_read(short_url=data['short_url'], ip=data['ip'], user_agent=data['user_agent'], accept_language=data['accept_language'], ua_platform=data['ua_platform'], ua_browser=data['ua_browser'], ua_version=data['ua_version'], ua_language=data['ua_language'], referrer=data['referrer'], created_at=data['created_at']) else: insert_data(project=data['project'], data_decode=data['data_decode'], User_Agent=data['User_Agent'], Host=data['Host'], Connection=data['Connection'], Pragma=data['Pragma'], Cache_Control=data['Cache_Control'], Accept=data['Accept'], Accept_Encoding=data['Accept_Encoding'], Accept_Language=data['Accept_Language'], ip=data['ip'], ip_city=data['ip_city'], ip_asn=data['ip_asn'], url=data['url'], referrer=data['referrer'], remark=data['remark'], ua_platform=data['ua_platform'], ua_browser=data['ua_browser'], ua_version=data['ua_version'], ua_language=data['ua_language'], ip_is_good=data['ip_is_good'], ip_asn_is_good=data['ip_asn_is_good'], created_at=data['created_at'], updated_at=data['updated_at'], use_kafka=False) except Exception: error = traceback.format_exc() write_to_log(filename='kafka_consumer', defname='do_insert', result=error)