def dfsha2udpatecontactuuid(inputdf: pd.DataFrame): # ['contactuuid', 'NickName', 'ContactFlag', 'RemarkName', 'Sex', 'Signature', 'StarFriend', 'AttrStatus', 'Province', 'City', 'SnsFlag', 'KeyWord', 'appendtime', 'imguuid'] frddf2append = inputdf.copy() # print(frddf2append.dtypes) # [NickName', 'ContactFlag', 'RemarkName', 'Sex', 'Signature', 'StarFriend', 'AttrStatus', 'Province', 'City', 'SnsFlag', 'KeyWord'] clnamescleanlst = [ cl for cl in list(frddf2append.columns.values) if cl.lower() not in ['contactuuid', 'imguuid', 'appendtime'] ] print(clnamescleanlst) # frddf2appendnoimguuid = frddf2append.loc[:, clnamescleanlst] frddf2append['contactuuid'] = frddf2append[clnamescleanlst].apply( lambda x: sha2hexstr(list(x.values)), axis=1) return frddf2append
def dfsha2noimg(inputdf: pd.DataFrame): # ['UserName', 'NickName', 'ContactFlag', 'RemarkName', 'Sex', 'Signature', 'StarFriend', 'AttrStatus', 'Province', 'City', 'SnsFlag', 'KeyWord', 'imguuid' 'headimg'] frddf2append = inputdf.copy(deep=True) # [NickName', 'ContactFlag', 'RemarkName', 'Sex', 'Signature', 'StarFriend', 'AttrStatus', 'Province', 'City', 'SnsFlag', 'KeyWord', 'imguuid'] clnamescleanlst = [ cl for cl in list(frddf2append.columns.values) if cl.lower() not in ['username', 'imguuid', 'headimg', 'appendtime', 'contactuuid'] ] # print(clnamescleanlst) # frddf2appendnoimguuid = frddf2append.loc[:, clnamescleanlst] frddf2append['contactuuid'] = frddf2append[clnamescleanlst].apply( lambda x: sha2hexstr(list(x.values)), axis=1) # ['UserName', 'NickName', 'ContactFlag', 'RemarkName', 'Sex', 'Signature', 'StarFriend', 'AttrStatus', 'Province', 'City', 'SnsFlag', 'KeyWord', 'headimg', 'appendtime'] frddf2append['appendtime'] = pd.Timestamp.now() return frddf2append
def splitcontentfromnotemysms(noteguid: str): @trycounttimes2('evernote服务器') def gettitleandcontent(ntguid: str): ns = get_notestore() nttitle = ns.getNote(ntguid, False, False, False, False).title evernoteapijiayi() ntcontent = ns.getNoteContent(ntguid) evernoteapijiayi() return nttitle, ntcontent nstitle, notecontent = gettitleandcontent(noteguid) titlesplitlst = nstitle.split(" ") name = titlesplitlst[0] number = titlesplitlst[1].replace("+86", "") print(name, number) nclines = BeautifulSoup(notecontent, 'lxml').find('en-note').find_all('div') tiqulst = [[(False, True)[re.findall("float:(left|right)", item.attrs['style'])[0] == 'right']] + [line.strip() for line in item.text.split(":", 1)] for item in nclines] ptntime = re.compile("\d+:\d+\s+[A|P]M, \d+/\d+/\d+") tiqudonelst = [[ line[0], name, number, re.sub(ptntime, "", line[2]), pd.to_datetime(re.findall(ptntime, line[2])[0]), 'sms' ] for line in tiqulst] smsnotedf = pd.DataFrame( tiqudonelst, columns=['sent', 'name', 'number', 'content', 'time', 'type']) smsnotedf['smsuuid'] = smsnotedf[['sent', 'number', 'time', 'content']].apply( lambda x: sha2hexstr(list(x.values)), axis=1) return [name], smsnotedf[[ 'sent', 'name', 'number', 'time', 'content', 'smsuuid', 'type' ]].sort_values('time', ascending=False)
print(tiqu) if len(tiqu) == 0: return nameold else: return tiqu[-1] alllst = pcdone + wenbiaodone + fubiaofinal alldone = [[ normalname(item[0], item[2], item[3]), item[1], item[2], item[3], item[4], item[5] ] for item in alllst] smsnotedf = pd.DataFrame( alldone, columns=['name', 'time', 'number', 'content', 'type', 'sent']) smsnotedf['smsuuid'] = smsnotedf[['sent', 'number', 'time', 'content']].apply( lambda x: sha2hexstr(list(x.values)), axis=1) return [nstitle, descpc + descnormal], smsnotedf[[ 'sent', 'name', 'number', 'time', 'content', 'smsuuid', 'type' ]].sort_values('time', ascending=False) def smsfromnotes2smsdb(notebookguid: str): notelst = findnotefromnotebook(notebookguid) for item in notelst: if not (guidchuli := getcfpoptionvalue('everpim', "noteguid", 'noteguid')): guidchulilst = [] else: guidchulilst = guidchuli.split(',')
for ix in list(witherrordf.index.values): recordctdf.loc[ix, 'number'] = recordctdf.loc[ix, 'name'] recordctdf # ### 规范number中的数据(其实就是去除可能的前缀+86或者86) import re recordctdf['number'] = recordctdf['number'].apply(lambda x: re.sub('^\+?(86)', "", str(x)) if len(str(x)) > 8 else str(x)) recordctdf # ### 重构uuid(不再包含name列) recordctdf['smsuuid'] = recordctdf[['sent', 'number', 'time', 'content']].apply(lambda x: sha2hexstr(list(x.values)), axis=1) recordctdf # ### 检查重复项 recordctdf[recordctdf['smsuuid'].duplicated().values == False] chongfudf = recordctdf[recordctdf['smsuuid'].duplicated().values == True] recordctdf[recordctdf.smsuuid.isin(list(chongfudf['smsuuid'])).values == True].sort_values('smsuuid') smsdonedf = recordctdf.drop_duplicates('smsuuid') smsdonedf getdeviceid()
def getimguuid(inputbytes: bytes): imgfrombytes = Image.open(BytesIO(inputbytes)) return sha2hexstr(np.array(imgfrombytes))
# + from hashlib import sha256, md5, blake2b def sha2hexstr(inputo: object): if type(inputo) == bytes: targetb = inputo else: targetb = str(inputo).encode('utf-8') hhh = sha256(targetb) return hhh.hexdigest().upper() sha2hexstr('heart5') # - # ~~尸体,在这里吃了好大的亏!~~一直怀疑是和运行环境相关,是变化的,今天才算找到问题所在,hash就是基于运行环境的,hashlib才用于永久存储 # + jupyter={"source_hidden": true} def uuid3hexstr(inputo: object): # inputstr = str(inputo)[:767] inputstr = str(inputo) # print(f"输入对象str化后长度为:{len(inputstr)}") uuidout = uuid.uuid3(uuid.NAMESPACE_URL, inputstr) return hex(hash(uuidout))[2:].upper()
smsdf smsdf.groupby('type').count().index smsdfclean = smsdf[smsdf.type != 'failed'] smsdf[smsdf.type == 'sent'] # + import re ptn = re.compile("^\+86") smsdfclean['sent'] = smsdfclean['type'].apply(lambda x: True if x == 'sent' else False) smsdfclean['number'] = smsdfclean['number'].apply(lambda x: re.sub(ptn, '', x)) smsdfdone = smsdfclean[['sent', 'sender', 'number', 'received', 'body']] smsdfdone['smsuuid'] = smsdfdone.apply(lambda x: sha2hexstr(list(x.values)), axis=1) smsdfdone['type'] = 'sms' smsdfdone.columns = [ 'sent', 'name', 'number', 'time', 'content', 'smsuuid', 'type' ] # - smsdfdone.sort_values('time', ascending=False) smsdfdone # ##### evernote中《mysms》笔记本中短信存档信息入库 # ###### 库准备