def upload_users_from_pickle_to_sql(project='tvcbook', remark='production'): dirpath = os.path.join('data_export', project, remark, 'users') filepath = os.path.join(dirpath, 'users_all.pkl') with open(filepath, "rb") as f2: results = pickle._loads(f2.read()) a = 1 for item in results: print(a) a += 1 # print('a',item) data_rebuild = { "properties": {}, "lib": {}, "distinct_id": "", "event": "", "type": "profile_set" } # print(item) try: item = json.loads(item) # print(item["first_id"]) if len(item["first_id"]) == 16: data_rebuild['lib']['$lib'] = 'js' elif len(item['first_id']) >= 39 and len(item['first_id']) <= 46: data_rebuild['lib']['$lib'] = 'MiniProgram' elif len(item['first_id']) >= 51 and len(item['first_id']) <= 64: data_rebuild['lib']['$lib'] = 'js' else: data_rebuild['lib']['$lib'] = 'unknow' if 'second_id' in item: data_rebuild["distinct_id"] = item['second_id'] data_rebuild["map_id"] = item['first_id'] data_rebuild["original_id"] = item['first_id'] if 'userid' in item: # data_rebuild["properties"]["user_id"] = item['userid'] data_rebuild["properties"]["userId"] = item['userid'] if 'name' in item: data_rebuild["properties"]["name"] = item['name'] if 'realname' in item: data_rebuild["properties"]["realname"] = item['realname'] if 'sex' in item: data_rebuild["properties"]["sex"] = item['sex'] if 'verification_type' in item: data_rebuild["properties"]["verification_type"] = item[ 'verification_type'] if 'company' in item: data_rebuild["properties"]["company"] = item['company'] # print(item) else: data_rebuild["distinct_id"] = item['first_id'] # data_rebuild["map_id"] = item['first_id'] # data_rebuild["original_id"] = item['first_id'] if 'userid' in item: # data_rebuild["properties"]["user_id"] = item['userid'] data_rebuild["properties"]["userId"] = item['userid'] if 'name' in item: data_rebuild["properties"]["name"] = item['name'] if 'realname' in item: data_rebuild["properties"]["realname"] = item['realname'] if 'sex' in item: data_rebuild["properties"]["sex"] = item['sex'] if 'verification_type' in item: data_rebuild["properties"]["verification_type"] = item[ 'verification_type'] if 'company' in item: data_rebuild["properties"]["company"] = item['company'] if 'viptype' in item: data_rebuild["properties"]["viptype"] = item['viptype'] print(data_rebuild) insert_user(project='tvcbook', data_decode=data_rebuild, created_at=0) except Exception: error = traceback.format_exc() write_to_log(filename='import_from_sa', defname='upload_users_from_pickle_to_sql', result=error) os.remove(filepath)
def insert_data(project, data_decode, User_Agent, Host, Connection, Pragma, Cache_Control, Accept, Accept_Encoding, Accept_Language, ip, ip_city, ip_asn, url, referrer, remark, ua_platform, ua_browser, ua_version, ua_language, ip_is_good, ip_asn_is_good, created_at=None, updated_at=None, use_kafka=admin.use_kafka): start_time = time.time() jsondump = json.dumps(data_decode, ensure_ascii=False) if '_track_id' in data_decode: track_id = data_decode['_track_id'] else: track_id = 0 distinct_id = data_decode['distinct_id'] if 'event' in data_decode: event = data_decode['event'] else: event = None if remark: remark = remark else: remark = '' type_1 = data_decode['type'] if 'type' in data_decode else None # lib = data_decode['lib']['$lib'] if '$lib' in data_decode['lib'] else None lib = None if 'lib' in data_decode: if '$lib' in data_decode['lib']: lib = data_decode['lib']['$lib'] elif 'properties' in data_decode: if '$lib' in data_decode['properties']: lib = data_decode['properties']['$lib'] # else: # lib = None if use_kafka is False: try: # count = insert_event(table=project,alljson=jsondump.replace('\\','\\\\').replace("'","\\'"),track_id=track_id,distinct_id=distinct_id,lib=lib,event=event,type_1=type_1,User_Agent=User_Agent,Host=Host,Connection=Connection,Pragma=Pragma,Cache_Control=Cache_Control,Accept=Accept,Accept_Encoding=Accept_Encoding,Accept_Language=Accept_Language,ip=ip,ip_city=ip_city,ip_asn=ip_asn,url=url,referrer=referrer,remark=remark,ua_platform=ua_platform,ua_browser=ua_browser,ua_version=ua_version,ua_language=ua_language) count = insert_event(table=project, alljson=jsondump, track_id=track_id, distinct_id=distinct_id, lib=lib, event=event, type_1=type_1, User_Agent=User_Agent, Host=Host, Connection=Connection, Pragma=Pragma, Cache_Control=Cache_Control, Accept=Accept, Accept_Encoding=Accept_Encoding, Accept_Language=Accept_Language, ip=ip, ip_city=ip_city, ip_asn=ip_asn, url=url, referrer=referrer, remark=remark, ua_platform=ua_platform, ua_browser=ua_browser, ua_version=ua_version, ua_language=ua_language, created_at=created_at) # print('插入行数:'+str(count)) insert_device(project=project, data_decode=data_decode, user_agent=User_Agent, accept_language=Accept_Language, ip=ip, ip_city=ip_city, ip_is_good=ip_is_good, ip_asn=ip_asn, ip_asn_is_good=ip_asn_is_good, ua_platform=ua_platform, ua_browser=ua_browser, ua_version=ua_version, ua_language=ua_language, created_at=created_at) properties_key = [] for keys in data_decode['properties'].keys(): properties_key.append(keys) if event and admin.use_properties is True: insert_properties(project=project, lib=lib, remark=remark, event=event, properties=json.dumps(properties_key), properties_len=len( data_decode['properties'].keys()), created_at=created_at, updated_at=updated_at) except Exception: error = traceback.format_exc() write_to_log(filename='api', defname='insert_date', result=error) # if type_1 == 'profile_set' or type_1 == 'track_signup' or type_1 =='profile_set_once' or event == '$SignUp': if type_1 == 'profile_set' or type_1 == 'track_signup' or type_1 == 'profile_set_once': try: insert_user(project=project, data_decode=data_decode, created_at=created_at) except Exception: error = traceback.format_exc() write_to_log(filename='api', defname='insert_date', result=error) elif use_kafka is True: timenow = int(time.time()) timenow16 = int(round(time.time() * 1000)) msg = { "timestamp": timenow16, "data": { "project": project, "data_decode": data_decode, "User_Agent": User_Agent, "Host": Host, "Connection": Connection, "Pragma": Pragma, "Cache_Control": Cache_Control, "Accept": Accept, "Accept_Encoding": Accept_Encoding, "Accept_Language": Accept_Language, "ip": ip, "ip_city": ip_city, "ip_asn": ip_asn, "url": url, "referrer": referrer, "remark": remark, "ua_platform": ua_platform, "ua_browser": ua_browser, "ua_version": ua_version, "ua_language": ua_language, "ip_is_good": ip_is_good, "ip_asn_is_good": ip_asn_is_good, "created_at": timenow, "updated_at": timenow } } insert_message_to_kafka(msg=msg) print(time.time() - start_time)