def download_resources(): url = WxConfig.getAppDownloadUrl() download_address = FilePathUtil.get_full_dir('dist', 'window_main_resources.zip') if os.path.exists(download_address): os.remove(download_address) downloadFile(download_address, url) return FilePathUtil.get_full_dir('dist')
def main(full_dir): global upload_md5_pic_position Logger.println(f"【().excel={full_dir}】") array = excel_util.excel2array(full_dir) # 错误 start_index = 0 if upload_md5_pic_position > 0: Logger.println(f"【找到上次上传的下标={upload_md5_pic_position}】") start_index = upload_md5_pic_position length = len(array) if start_index + 1 > length: Logger.println(f"【已经是最后一条了】") return upload_count = 0 for index, item in enumerate(array[start_index:length]): if wx_stop.stopFlag: break content_md5 = item['content_md5'] count = int(item['count']) if count == 0: Logger.println(f"【{content_md5}没有抓取到图片】") continue Logger.println(f"【().content_md5={content_md5}】") item['content'] = '1' res = WxUploader.need_upload_photo_item(item) # 有房源刷新的列表 if res: Logger.println(f"【().开始图片上传={content_md5}】") try: files = FilePathUtil.get_files_by_dir( FilePathUtil.get_full_dir("wxfriend", "pic", "WeiXinCopy", content_md5)) tokens = files_token(count=count) if tokens: img_ids = [] for index, file in enumerate(files): if index > count - 1: Logger.println(f"【content_md5={content_md5}图片出现重复】") break img_id = upload_img(tokens[index], file) img_ids.append(str(img_id)) join = ",".join(img_ids) if join: put_img(content_md5, join) upload_count += 1 upload_md5_pic_position = index else: Logger.println(f"【content_md5={content_md5}没有对应的图片】") else: Logger.println(f"【token 生成失败】") except Exception as e: Logger.println(f"【图片文件上传失败{e}】") break else: Logger.println(f"【()={content_md5}无需上传图片】") Logger.println(f"【本次共完成{upload_count}条朋友圈信息的图片文件上传】") des_dir = FilePathUtil.get_full_dir('wxfriend', 'pic', 'WeiXinCopy') if os.path.exists(des_dir): shutil.rmtree(des_dir)
def download(signals: pyqtSignal = None): url = WxConfig.getAppDownloadUrl() localpath = WxConfig.getAppexeReplaceDir() if localpath is None: localpath = FilePathUtil.get_full_dir('dist', 'window_main.exe') if os.path.exists(localpath): os.remove(localpath) downloadFile(localpath, url, signals) return FilePathUtil.get_full_dir('dist')
def export(): full_dir = FilePathUtil.get_full_dir('wxfriend', 'pic') weixinPath = FilePathUtil.get_full_dir('wxfriend', 'pic', 'WeiXin') if os.path.exists(weixinPath): try: shutil.rmtree(weixinPath) except: pass deviceId = WxConfig.getAppiumConfig()["deviceName"] if deviceId: execute_adb_shell(f'pull /sdcard/Pictures/WeiXin/ {full_dir}', deviceId=deviceId) else: Logger.println(f"【().未找到设备】")
def save_screenshot(self, content_element, dir, file_name): """ :param content_element: :param dir: :param file_name: :return: """ try: full_dir = FilePathUtil.get_full_dir("wxfriend", "pic", dir) if not os.path.exists(full_dir): os.mkdir(full_dir) content_element.screenshot(FilePathUtil.get_full_dir(full_dir, file_name)) return True except Exception as e: Logger.println(f"【save_screenshot().e={e}】") return False
def batch_export_upload(full_dir): global upload_md5_pic_position upload_md5_pic_position = 0 global index start_time = int(time.time()) config = MonitorConfig() sleeptime = 3 * 60 value = config.get_value('appiumConfig', 'batch_pic_seconds') if value: sleeptime = int(value) Logger.println( f"【====================开始批量导出并上传图片batch_pic_seconds={sleeptime}=======================】") while True: time.sleep(5) time_start_time = int(time.time()) - start_time if sleeptime - time_start_time > 0: Logger.println( f"=================【{sleeptime - time_start_time}秒后执行第={index}个批量导出并上传图片任务===================】") if time_start_time >= sleeptime: index = index + 1 break time.sleep(5) PicClassfyUtil.classify_from(full_dir) excel_full_dir = FilePathUtil.get_lastmodify_file( FilePathUtil.get_full_dir("wxfriend", "excel", "pic")) main(excel_full_dir) if wx_stop.stopFlag: Logger.println(f"【===================批量导出并上传图片任务停止=================】") return batch_export_upload(full_dir)
def clickUploadPic(self): full_dir = FilePathUtil.get_full_dir("wxfriend", "excel", "pic") filepath = self.open_file(full_dir) if filepath: self.runthread5.set_data(filepath) self.call_backlog("正在上传图片到后台 ,请稍后...") self.runthread5.start()
def clickPicClassfy(self): self.call_backlog("正在按文本内容对图片分组 ,请稍后...") full_dir = FilePathUtil.get_full_dir("wxfriend", "excel", "pic") filepath = self.open_file(full_dir) if filepath: self.call_backlog("正在按文本内容对图片分组 ,请稍后...") self.runthread31.set_data(filepath) self.runthread31.start()
def classify(full_dir): # 2.excel获取朋友圈所有说说的md5以及图片的保存起始时间值 Logger.println(f"【().full_dir={full_dir}】") array = excel_util.excel2array(full_dir) count = 0 for index, item in enumerate(array): content_md5 = item['content_md5'] start = int(item['start']) end = int(item['end']) count = int(item['count']) if count > 0: # Logger.println(f"【classify().item={item}】") # 3.根据起始时间值以及md5值分类好图片 full_dir = FilePathUtil.get_full_dir('wxfriend', 'pic', 'WeiXin') des_dir = FilePathUtil.get_full_dir('wxfriend', 'pic', 'WeiXinCopy', content_md5) files = FilePathUtil.move_files_by_time(full_dir, des_dir, start, end)
def main(self): sleep(self.get_sleep(6, 10)) # 点击搜索按钮 self.find_element_by_id('com.tencent.mm:id/f8y').click() sleep(self.get_sleep(3, 5)) full_dir = FilePathUtil.get_full_dir("wxfriend", "excel", "手机号列表.xls") array = excel_util.excel2array(full_dir) length = len(array) if not array: Logger.println(f"【main(请先配置手机号列表文件的sheet_name为手机号列表】") return # array = [{'手机': '17602876095'}] # array = [{'手机': '13301616272'}, {'手机': '15821902923'}] start_index = 0 for index, element in enumerate(array): if str(int(element["手机"])) == self.last_record_phone: start_index = index Logger.println( f"【main找到上次添加的手机位置={start_index + 1}={self.last_record_phone}】" ) break count = 0 if start_index + 1 > length: Logger.println(f"【已经是最后一条了】") return for index, item in enumerate(array[start_index + 1:length]): if wx_stop.stopFlag: break phone = str(int(item['手机'])) Logger.println(f"【main开始执行{start_index + count + 1}.第{count}个任务】") if self.max_count > 1 and count > 1 and (count % self.max_count) == 0: start_time = int(time()) sleeptime = self.addfriend_inte_seconds Logger.println( f"【main(暂时停止任务开启休闲模式).{sleeptime}秒后执行第={count}个任务】") while True: rdsleep = self.get_sleep(5, 6) by_id = self.find_element_by_id('com.tencent.mm:id/bhn') if rdsleep == 5: if by_id: by_id.send_keys(f'已经休眠{int(time()) - start_time}s') else: if by_id: by_id.send_keys('') sleep(rdsleep) if int(time()) - start_time > sleeptime: break Logger.println(f"【main(退出休闲模式)继续执行第={count}个任务】") if wx_stop.stopFlag: break if self.modify_name(phone): break count += 1 Logger.println(f"【main(花费时间).total_count={self.total_count}s】")
def __init__(self): """ 初始化 """ # 驱动配置 super().__init__() self.md5_contents = [] self.today_md5_contents = [] full_dir = FilePathUtil.get_lastmodify_file( FilePathUtil.get_full_dir("wxfriend", "excel", "pic")) array = excel_util.excel2array(full_dir) if array: for item in array: self.today_md5_contents.append(item['content_md5'])
def exportPhone(full_dir): array = excel2array(full_dir) datas = [] phones = [] for item in array: new_item = {} txt = get_phone_from_txt(item['描述']) if txt in phones: continue else: if txt: new_item['描述'] = item['描述'] new_item['手机'] = txt datas.append(new_item) phones.append(txt) date = time_util.now_to_date('%Y%m%d_%H') des_dir = FilePathUtil.get_full_dir("wxfriend", "excel", f"手机号列表{date}.xlsx") write_excel(des_dir, '手机号列表', datas) Logger.println(f"【main().检测到重复电话号码={len(array) - len((datas))}条】") Logger.println(f"【main().获取到电话号码={len((datas))}条】") FilePathUtil.startfile(des_dir) return des_dir
def add_phone_excel(self): full_dir = FilePathUtil.get_full_dir("wxfriend", "excel") filepath = self.open_file(full_dir) if filepath: WxConfig.setPhoneExcel(filepath) self.le4.setText(filepath)
def main(): export() full_dir = FilePathUtil.get_lastmodify_file( FilePathUtil.get_full_dir("wxfriend", "excel", "pic")) classify(full_dir)
phones.append(txt) date = time_util.now_to_date('%Y%m%d_%H') des_dir = FilePathUtil.get_full_dir("wxfriend", "excel", f"手机号列表{date}.xlsx") write_excel(des_dir, '手机号列表', datas) Logger.println(f"【main().检测到重复电话号码={len(array) - len((datas))}条】") Logger.println(f"【main().获取到电话号码={len((datas))}条】") FilePathUtil.startfile(des_dir) return des_dir def transfer(item): return contact if __name__ == '__main__': full_dir = FilePathUtil.get_full_dir("./common/20210207wx_contacts_moments.xls") array = excel2array(full_dir, "20210207") array_new = [] for item in array: if item['phone']: contact = {} # nick_name, wx_number, phone contact["姓名"] = item['nick_name'] contact["电话"] = item['phone'] contact["备注"] = item['wx_number'] array_new.append(contact) path = f'{time_util.now_to_date("%Y-%m-%d")}wx_contacts_moments.csv' keys = csv_util.get_head_from_arr(array_new) csv_util.create_csv(path, keys, force=True, encoding=ENCODING_GBK) csv_util.append_csv(path, array_new, encoding=ENCODING_GBK) # datas = [{'id': 1, 'text': "dsd", "abs": 1},
def crawl(self): self.enter() """ 爬取 :return: """ index = 0 nickNames = [] contents = [] finished = False while True: if wx_stop.stopFlag: break if index > 0: # 上滑 self.swipe_up() sleep(0.5) items = self.find_elements_by_id("com.tencent.mm:id/b36") if items is None: continue if finished: break for item in items: nickName = self.getContentTextById('com.tencent.mm:id/dy5', item) if nickName: Logger.println(f"【获取到联系人={nickName}】") else: continue if nickName in nickNames: Logger.println(f"【已经抓取跳过联系人{nickName}】") continue nickNames.append(nickName) phone = self.get_phone_from_txt(nickName) wx_number = "" try: item.click() sleep(0.5) by_xpath_nickname = self.find_element_by_xpath( "//*[contains(@text,'昵称:')]") if by_xpath_nickname: nickName = by_xpath_nickname.get_attribute( "text").replace('昵称:', '').strip() by_xpath = self.find_element_by_xpath( "//*[contains(@text,'微信号:')]") if by_xpath: wx_number = by_xpath.get_attribute("text").replace( '微信号:', '').strip() Logger.println(f"【微信号={wx_number}】") xpath = self.find_element_by_xpath( "//*[contains(@text,'电话号码')]") if xpath: phone_parent = xpath.parent phone = self.getPhone(phone_parent) Logger.println(f"【phone={phone}】") sleep(0.5) self.driver.back() except Exception as e: Logger.println(f"【nick_name_element.click.e={e}】") pass contents.append({ 'nick_name': nickName, 'wx_number': wx_number, 'phone': phone }) date = time_util.now_to_date('%Y%m%d') full_dir = FilePathUtil.get_full_dir( "wxfriend", "excel", "text", date + "wx_contacts_moments.xls") excel_util.write_excel(filename=full_dir, worksheet_name=date, items=contents) index += 1 last_flag = self.getContentTextById("com.tencent.mm:id/azb") if last_flag: date = time_util.now_to_date('%Y%m%d') full_dir = FilePathUtil.get_full_dir( "wxfriend", "excel", "text", date + "wx_contacts_moments.xls") FilePathUtil.startfile(full_dir) break
def crawl(self): """ 爬取 :return: """ index = 0 md5_contents = [] contents = [] last_base_md5 = None finished = False while True: if wx_stop.stopFlag: break if index > 0: # 上滑 self.swipe_up() sleep(3) items = self.find_elements_by_id("com.tencent.mm:id/fn9") if items is None: continue if finished: break for item in items: b_e_content = None # 查找笔记 content_element = self.find_element_by_id( "com.tencent.mm:id/dm3", item) if content_element: content_element.click() sleep(2) b_e_content = self.getContentTextById( 'com.tencent.mm:id/fpu') if b_e_content: Logger.println(f"【获取到全文内容={b_e_content}】") self.driver.back() if b_e_content is None: b_e_content = self.getContentTextById( "com.tencent.mm:id/b_e", item) if b_e_content is None: if index == 0: index = +1 Logger.println(f"【该条说说没有文本,忽略】") continue nickName = self.getNickName(item) phone = "" md5_ = self.MD5(b_e_content) if md5_ in self.wx_content_md5: Logger.println( f"【crawl{index}已经抓取到上一次位置({md5_}).data={b_e_content}】") md5 = None if len(md5_contents) > 1: md5 = ','.join(md5_contents[0:2]) elif len(md5_contents) > 0: md5 = md5_contents[0] if md5: self.config.set_value("wx_content", "md5_pic", md5) finished = True break if md5_ in md5_contents: continue wx_number = "" image0 = self.find_element_by_xpath("//*[@content-desc='图片']", item) if image0: image0.click() sleep(1) start = '0' for index_img in range(9): image_detail = self.find_element_by_id( 'com.tencent.mm:id/c9h') if image_detail: base64 = self.get_screenshot_as_base64( image_detail) base_md5 = self.MD5(base64) if last_base_md5 == base_md5: end = time_util.get_time() data = { 'content_md5': md5_, 'nick_name': nickName, 'wx_number': wx_number, 'content': b_e_content, 'phone': phone, 'file_ids': '', 'start': start, 'end': end, 'count': str(index_img) } contents.append(data) self.driver.back() break if index_img == 0: start = time_util.get_time() # name = f"{i}_{base_md5}.png" # if self.save_screenshot(image_detail, md5_, name): try: action1 = TouchAction(self.driver) action1.long_press(el=image_detail, duration=500).perform() sleep(0.5) self.find_element_by_xpath( "//android.widget.TextView[contains(@text,'保存图片')]" ).click() except: continue pass time = time_util.get_time() name = f'mmexport{time}.jpg' Logger.println( f"【crawl({index}.{index_img}).已保存图片={name}】") last_base_md5 = base_md5 is_oppo = self.desired_caps[ 'deviceName'] == '5e8caad5' if index_img == 8 or is_oppo: sleep(1) end = time_util.get_time() data = { 'content_md5': md5_, 'nick_name': nickName, 'wx_number': wx_number, 'content': b_e_content, 'phone': phone, 'file_ids': '', 'start': start, 'end': end, 'count': str(index_img + 1) } contents.append(data) self.driver.back() break sleep(1) self.swipeLeft() md5_contents.append(md5_) date = time_util.now_to_date('%Y%m%d_%H') full_dir = FilePathUtil.get_full_dir( "wxfriend", "excel", "pic", date + "wx_pic_moments.xls") excel_util.write_excel(filename=full_dir, worksheet_name=date, items=contents) index += 1 md5 = None if len(md5_contents) > 1: md5 = ','.join(md5_contents[0:2]) elif len(md5_contents) > 0: md5 = md5_contents[0] if md5: self.config.set_value("wx_content", "md5_pic", md5)
def upload(): full_dir = FilePathUtil.get_lastmodify_file( FilePathUtil.get_full_dir("dist")) token = files_token() filepath = upload_file(token[0], full_dir) print(f"【().filepath={filepath}】")
def crawl(self): self.config = MonitorConfig() self.wx_content_md5 = self.config.get_value("wx_content", "md5_pic") self.md5_contents = [] self.enter() """ 爬取 :return: """ index = 0 isFirst = True contents = [] lastItem = None while True: self.crawl_max_count = int(WxConfig.get_crawl_max_count()) if index % self.crawl_max_count == 0: self.md5_contents.clear() Logger.println( f"【======={index},抓取数退出{self.crawl_max_count},本轮抓取已完成,开始滑动到顶部下拉刷新继续==========】" ) self.swipe_to_top() if wx_stop.stopFlag: break # 上滑 if not isFirst: self.swipe_up_slow() isFirst = False top_element = self.wait_find_element( By.XPATH, '//android.support.v7.widget.LinearLayoutCompat') if lastItem and top_element: self.scrollElement(lastItem, top_element) sleep(3) items = self.wait_find_elements( By.XPATH, '//android.widget.ListView/android.widget.RelativeLayout') if items is None: Logger.println(f"【============未获取到列表================】") continue for item in items: accessibility_id = self.find_element_by_accessibility_id( '头像', item) if accessibility_id: lastItem = item b_e_content = None last_pic_md5 = None last_md5_ = None advise = self.find_element_by_xpath( "//android.widget.TextView[contains(@text,'广告')]", item) if advise: Logger.println( f"【============检测到广告忽略进入下一条================】") continue message_text_container = self.config.get_value( "wx_content_ids", "message_text_container") content_element = self.find_element_by_id( message_text_container, item) if content_element: content_element.click() b_e_content = self.waitContentTextById( "com.tencent.mm:id/fpu") if b_e_content: Logger.println(f"【获取到详情页面全文内容={b_e_content}】") self.go_back() if b_e_content is None: message_text = self.config.get_value( "wx_content_ids", "message_text") b_e_content = self.getContentTextById(message_text, item) if b_e_content is None: Logger.println(f"【============该条说说没有文本,忽略===========】") continue nickName = self.getNickName(item) phone = self.get_phone_from_txt(b_e_content) md5_ = self.MD5(b_e_content) if len(b_e_content) > 3 and b_e_content[-3:] == '...': elment_datas = self.scan_all_text_elment(item) LogUtil.info_jsonformat(elment_datas) contition = (last_md5_ in self.wx_content_md5) and ( md5_ in self.wx_content_md5 ) if last_md5_ else md5_ in self.wx_content_md5 if contition: Logger.println( f"【crawl{index}已经抓取到上一次位置md5_=({md5_},last_md5_={last_md5_}).data={b_e_content}】" ) md5 = None if len(self.md5_contents) > 1: md5 = ','.join(self.md5_contents[0:2]) elif len(self.md5_contents) > 0: md5 = self.md5_contents[0] if md5: self.config.set_value("wx_content", "md5_pic", md5) Logger.println(f"【============开始滑动到顶部===========】") Logger.dingdingException(f"本轮抓取已完成,开始滑动到顶部下拉刷新继续") self.swipe_to_top() break if md5_ in self.today_md5_contents: last_md5_ = md5_ Logger.println(f"【============该条说说已经抓取过,忽略===========】") continue image0 = self.find_element_by_xpath("//*[@content-desc='图片']", item) if image0: Logger.println(f"【crawl({index}).开始点击图片】") image0.click() sleep(2) start = '0' end = '0' for index_img in range(9): image_detail = self.find_element_by_id( 'com.tencent.mm:id/c9h') if image_detail: text_content = self.wait_find_element( By.XPATH, "//*[contains(@content-desc,'当前')]" ).get_attribute('content-desc') Logger.println( f"【crawl.{index} text_content ={text_content}】" ) pic_md5 = self.MD5(text_content) if last_pic_md5 == pic_md5: Logger.println( f"【crawl({index}.{index_img}).前后图片一致退出】") if not end: sleep(2) end = time_util.get_time() data = { 'content_md5': md5_, 'nick_name': nickName, 'wx_number': "", 'content': b_e_content, 'phone': phone, 'start': start, 'end': end, 'crawl_time': time_util.now_to_date(), 'count': str(index_img) } if start != '0': contents.append(data) self.md5_contents.append(md5_) self.today_md5_contents.append(md5_) last_md5_ = md5_ self.go_back() break try: action1 = TouchAction(self.driver) action1.long_press(el=image_detail, duration=500).perform() saveBtn = self.wait_find_element( By.XPATH, "//*[contains(@text,'保存图片')]") if saveBtn: saveBtn.click() element = self.wait_find_element( By.XPATH, "//*[contains(@text,'图片已保存')]") attribute = element.get_attribute('text') Logger.println( f"【crawl.{index} text_content ={attribute}】" ) if attribute and ".jpg" in attribute: if index_img == 0: start = get_time_from_text( attribute) end = get_time_from_text(attribute) else: Logger.dingdingException(f"找不到保存按钮,保存图片失败") except Exception as e: Logger.dingdingException(f"保存图片失败{e}") Logger.println(f'保存图片失败 Exception{e}') self.go_back() continue last_pic_md5 = pic_md5 if index_img == 8: if not end: sleep(2) end = time_util.get_time() data = { 'content_md5': md5_, 'nick_name': nickName, 'wx_number': "", 'content': b_e_content, 'phone': phone, 'start': start, 'end': end, 'crawl_time': time_util.now_to_date(), 'count': str(index_img + 1) } if start != '0': Logger.println( f"【crawl({index}.{index_img}).已保存图片=mmexport{end}.jpg】" ) contents.append(data) self.md5_contents.append(md5_) self.today_md5_contents.append(md5_) last_md5_ = md5_ self.go_back() break self.swipeLeft() else: # 纯文本 data = { 'content_md5': md5_, 'nick_name': nickName, 'wx_number': "", 'content': b_e_content, 'phone': phone, 'start': "0", 'end': "0", 'crawl_time': time_util.now_to_date(), 'count': "0" } contents.append(data) if len(contents) > 0: value = self.config.get_value("wx_content", "select") if value == 'True': Logger.println(f"开始上传第{index}条数据") res = WxUploader.uploadItems(contents) # 有房源刷新的列表 if '20003' == res: contents[0]['content'] = "" date = time_util.now_to_date('%Y%m%d') full_dir = FilePathUtil.get_full_dir( "wxfriend", "excel", "text", date + "wx_pic_update_moments.xls") excel_util.write_excel_append(filename=full_dir, worksheet_name=date, items=contents) contents.clear() # 新房源列表 if len(contents) > 0: contents[0]['content'] = "" date = time_util.now_to_date('%Y%m%d') full_dir = FilePathUtil.get_full_dir( "wxfriend", "excel", "pic", date + "wx_pic_moments.xls") excel_util.write_excel_append(filename=full_dir, worksheet_name=date, items=contents) contents.clear() index += 1 else: Logger.println(f"【没有数据不处理】") md5 = None if len(self.md5_contents) > 1: md5 = ','.join(self.md5_contents[0:2]) elif len(self.md5_contents) > 0: md5 = self.md5_contents[0] if md5: self.config.set_value("wx_content", "md5_pic", md5)
def crawl(self): self.enter() """ 爬取 :return: """ index = 0 md5_contents = [] contents = [] finished = False while True: if wx_stop.stopFlag: break if index > 0: # 上滑 self.swipe_up() sleep(3) items = self.find_elements_by_id("com.tencent.mm:id/fn9") if items is None: continue if finished: break for item in items: self.scan_all_text_elment(item) b_e_content = None content_element = self.find_element_by_id( "com.tencent.mm:id/b_m", item) if content_element: content_element.click() sleep(2) b_e_content = self.getContentTextById( 'com.tencent.mm:id/fpu') if b_e_content: Logger.println(f"【获取到全文内容={b_e_content}】") self.driver.back() if b_e_content is None: b_e_content = self.getContentTextById( "com.tencent.mm:id/b_e", item) if b_e_content is None: if index == 0: index = +1 Logger.println(f"【该条说说没有文本,忽略】") continue nickName = self.getNickName(item) md5_ = self.MD5(b_e_content) if md5_ in self.wx_content_md5: Logger.println( f"【crawl{index}已经抓取到上一次位置({md5_}).data={b_e_content}】") finished = True break if md5_ in md5_contents: continue wx_number = "" phone = self.get_phone_from_txt(b_e_content) nick_name_element = self.getNickNameElement(item) if nick_name_element: try: nick_name_element.click() sleep(1) by_xpath_nickname = self.find_element_by_xpath( "//*[contains(@text,'昵称:')]") if by_xpath_nickname: nickName = by_xpath_nickname.get_attribute( "text").replace('昵称:', '').strip() by_xpath = self.find_element_by_xpath( "//*[contains(@text,'微信号:')]") if by_xpath: wx_number = by_xpath.get_attribute("text").replace( '微信号:', '').strip() Logger.println(f"【微信号={wx_number}】") xpath = self.find_element_by_xpath( "//*[contains(@text,'电话号码')]") if xpath: phone_parent = xpath.parent phone = self.getPhone(phone_parent) Logger.println(f"【phone={phone}】") sleep(1) self.driver.back() except Exception as e: Logger.println(f"【nick_name_element.click.e={e}】") pass contents.append({ 'content_md5': md5_, 'nick_name': nickName, 'wx_number': wx_number, 'phone': phone }) md5_contents.append(md5_) date = time_util.now_to_date('%Y%m%d') full_dir = FilePathUtil.get_full_dir( "wxfriend", "excel", "text", date + "wx_user_moments.xls") excel_util.write_excel_append(filename=full_dir, worksheet_name=date, items=contents) WxUploader.putItems(contents) contents.clear() index += 1
def openPicDir(self): startfile(FilePathUtil.get_full_dir('wxfriend', 'pic'))
def crawl(self): """ 爬取 :return: """ index = 0 contents = [] finished = False md5_contents = [] while True: if wx_stop.stopFlag: break if index > 0: # 上滑 self.swipe_up() sleep(3) items = self.find_elements_by_id("com.tencent.mm:id/fn9") if items is None: continue if finished: break for item in items: b_e_content = None advise = self.find_element_by_xpath( "//android.widget.TextView[contains(@text,'广告')]", item) if advise: Logger.println( f"【============检测到广告忽略进入下一条================】") continue content_element = self.find_element_by_id( "com.tencent.mm:id/b_m", item) if content_element: content_element.click() sleep(2) b_e_content = self.getContentTextById( 'com.tencent.mm:id/fpu') if b_e_content: Logger.println(f"【获取到全文内容={b_e_content}】") self.driver.back() if b_e_content is None: b_e_content = self.getContentTextById( "com.tencent.mm:id/b_e", item) if b_e_content is None: if index == 0: index = +1 Logger.println(f"【该条说说没有文本,忽略】") continue nickName = self.getNickName(item) md5_ = self.MD5(b_e_content) phone = self.get_phone_from_txt(b_e_content) data = { 'content_md5': md5_, 'nick_name': nickName, 'content': b_e_content, 'phone': phone, 'crawl_time': time_util.now_to_date(), 'file_ids': '', } if md5_ in self.wx_content_md5: Logger.println( f"【crawl{index}已经抓取到上一次位置({md5_}).data={data}】") md5 = None if len(md5_contents) > 1: md5 = ','.join(md5_contents[0:2]) elif len(md5_contents) > 0: md5 = md5_contents[0] if md5: self.config.set_value("wx_content", "md5", md5) finished = True break if md5_ in md5_contents: continue else: md5_contents.append(md5_) Logger.println(f"【crawl({index}).data={data}】") contents.append(data) index = index + 1 if index % 5 == 0: date = time_util.now_to_date('%Y%m%d_%H') full_dir = FilePathUtil.get_full_dir( "wxfriend", "excel", "text", date + "wx_moments.xls") excel_util.write_excel(filename=full_dir, worksheet_name=date, items=contents) md5 = None if len(md5_contents) > 1: md5 = ','.join(md5_contents[0:2]) elif len(md5_contents) > 0: md5 = md5_contents[0] if md5: self.config.set_value("wx_content", "md5", md5)
value = config.get_value('appiumConfig', 'batch_pic_seconds') if value: sleeptime = int(value) Logger.println( f"【====================开始批量导出并上传图片batch_pic_seconds={sleeptime}=======================】") while True: time.sleep(5) time_start_time = int(time.time()) - start_time if sleeptime - time_start_time > 0: Logger.println( f"=================【{sleeptime - time_start_time}秒后执行第={index}个批量导出并上传图片任务===================】") if time_start_time >= sleeptime: index = index + 1 break time.sleep(5) PicClassfyUtil.classify_from(full_dir) excel_full_dir = FilePathUtil.get_lastmodify_file( FilePathUtil.get_full_dir("wxfriend", "excel", "pic")) main(excel_full_dir) if wx_stop.stopFlag: Logger.println(f"【===================批量导出并上传图片任务停止=================】") return batch_export_upload(full_dir) if __name__ == '__main__': full_dir = FilePathUtil.get_lastmodify_file( FilePathUtil.get_full_dir("wxfriend", "excel", "pic")) batch_export_upload(full_dir) # main(full_dir)
def download(self): startfile(FilePathUtil.get_full_dir('dist', 'UpgradeHelper.exe')) self.close()
def openPhoneDir(self): startfile(FilePathUtil.get_full_dir('wxfriend', 'excel', 'pic'))
import os import codecs import configparser from common import FilePathUtil # configPath = FilePathUtil.get_full_dir('config', "test_config.ini") configPath = FilePathUtil.get_full_dir('config', "config.ini") class MonitorConfig: def __init__(self): fd = open(configPath) data = fd.read() # remove BOM if data[:3] == codecs.BOM_UTF8: data = data[3:] file = codecs.open(configPath, "w") file.write(data) file.close() fd.close() self.cf = configparser.ConfigParser() self.cf.read(configPath) def get_value(self, section, name): if not self.cf.has_section(section): self.cf.add_section(section) if not self.cf.has_option(section, name): self.cf.set(section, name, "") value = self.cf.get(section, name)