def crawl(self): self.config = MonitorConfig() self.wx_content_md5 = self.config.get_value("wx_content", "md5_pic") self.md5_contents = [] self.enter() """ 爬取 :return: """ index = 0 isFirst = True contents = [] lastItem = None while True: self.crawl_max_count = int(WxConfig.get_crawl_max_count()) if index % self.crawl_max_count == 0: self.md5_contents.clear() Logger.println( f"【======={index},抓取数退出{self.crawl_max_count},本轮抓取已完成,开始滑动到顶部下拉刷新继续==========】" ) self.swipe_to_top() if wx_stop.stopFlag: break # 上滑 if not isFirst: self.swipe_up_slow() isFirst = False top_element = self.wait_find_element( By.XPATH, '//android.support.v7.widget.LinearLayoutCompat') if lastItem and top_element: self.scrollElement(lastItem, top_element) sleep(3) items = self.wait_find_elements( By.XPATH, '//android.widget.ListView/android.widget.RelativeLayout') if items is None: Logger.println(f"【============未获取到列表================】") continue for item in items: accessibility_id = self.find_element_by_accessibility_id( '头像', item) if accessibility_id: lastItem = item b_e_content = None last_pic_md5 = None last_md5_ = None advise = self.find_element_by_xpath( "//android.widget.TextView[contains(@text,'广告')]", item) if advise: Logger.println( f"【============检测到广告忽略进入下一条================】") continue message_text_container = self.config.get_value( "wx_content_ids", "message_text_container") content_element = self.find_element_by_id( message_text_container, item) if content_element: content_element.click() b_e_content = self.waitContentTextById( "com.tencent.mm:id/fpu") if b_e_content: Logger.println(f"【获取到详情页面全文内容={b_e_content}】") self.go_back() if b_e_content is None: message_text = self.config.get_value( "wx_content_ids", "message_text") b_e_content = self.getContentTextById(message_text, item) if b_e_content is None: Logger.println(f"【============该条说说没有文本,忽略===========】") continue nickName = self.getNickName(item) phone = self.get_phone_from_txt(b_e_content) md5_ = self.MD5(b_e_content) if len(b_e_content) > 3 and b_e_content[-3:] == '...': elment_datas = self.scan_all_text_elment(item) LogUtil.info_jsonformat(elment_datas) contition = (last_md5_ in self.wx_content_md5) and ( md5_ in self.wx_content_md5 ) if last_md5_ else md5_ in self.wx_content_md5 if contition: Logger.println( f"【crawl{index}已经抓取到上一次位置md5_=({md5_},last_md5_={last_md5_}).data={b_e_content}】" ) md5 = None if len(self.md5_contents) > 1: md5 = ','.join(self.md5_contents[0:2]) elif len(self.md5_contents) > 0: md5 = self.md5_contents[0] if md5: self.config.set_value("wx_content", "md5_pic", md5) Logger.println(f"【============开始滑动到顶部===========】") Logger.dingdingException(f"本轮抓取已完成,开始滑动到顶部下拉刷新继续") self.swipe_to_top() break if md5_ in self.today_md5_contents: last_md5_ = md5_ Logger.println(f"【============该条说说已经抓取过,忽略===========】") continue image0 = self.find_element_by_xpath("//*[@content-desc='图片']", item) if image0: Logger.println(f"【crawl({index}).开始点击图片】") image0.click() sleep(2) start = '0' end = '0' for index_img in range(9): image_detail = self.find_element_by_id( 'com.tencent.mm:id/c9h') if image_detail: text_content = self.wait_find_element( By.XPATH, "//*[contains(@content-desc,'当前')]" ).get_attribute('content-desc') Logger.println( f"【crawl.{index} text_content ={text_content}】" ) pic_md5 = self.MD5(text_content) if last_pic_md5 == pic_md5: Logger.println( f"【crawl({index}.{index_img}).前后图片一致退出】") if not end: sleep(2) end = time_util.get_time() data = { 'content_md5': md5_, 'nick_name': nickName, 'wx_number': "", 'content': b_e_content, 'phone': phone, 'start': start, 'end': end, 'crawl_time': time_util.now_to_date(), 'count': str(index_img) } if start != '0': contents.append(data) self.md5_contents.append(md5_) self.today_md5_contents.append(md5_) last_md5_ = md5_ self.go_back() break try: action1 = TouchAction(self.driver) action1.long_press(el=image_detail, duration=500).perform() saveBtn = self.wait_find_element( By.XPATH, "//*[contains(@text,'保存图片')]") if saveBtn: saveBtn.click() element = self.wait_find_element( By.XPATH, "//*[contains(@text,'图片已保存')]") attribute = element.get_attribute('text') Logger.println( f"【crawl.{index} text_content ={attribute}】" ) if attribute and ".jpg" in attribute: if index_img == 0: start = get_time_from_text( attribute) end = get_time_from_text(attribute) else: Logger.dingdingException(f"找不到保存按钮,保存图片失败") except Exception as e: Logger.dingdingException(f"保存图片失败{e}") Logger.println(f'保存图片失败 Exception{e}') self.go_back() continue last_pic_md5 = pic_md5 if index_img == 8: if not end: sleep(2) end = time_util.get_time() data = { 'content_md5': md5_, 'nick_name': nickName, 'wx_number': "", 'content': b_e_content, 'phone': phone, 'start': start, 'end': end, 'crawl_time': time_util.now_to_date(), 'count': str(index_img + 1) } if start != '0': Logger.println( f"【crawl({index}.{index_img}).已保存图片=mmexport{end}.jpg】" ) contents.append(data) self.md5_contents.append(md5_) self.today_md5_contents.append(md5_) last_md5_ = md5_ self.go_back() break self.swipeLeft() else: # 纯文本 data = { 'content_md5': md5_, 'nick_name': nickName, 'wx_number': "", 'content': b_e_content, 'phone': phone, 'start': "0", 'end': "0", 'crawl_time': time_util.now_to_date(), 'count': "0" } contents.append(data) if len(contents) > 0: value = self.config.get_value("wx_content", "select") if value == 'True': Logger.println(f"开始上传第{index}条数据") res = WxUploader.uploadItems(contents) # 有房源刷新的列表 if '20003' == res: contents[0]['content'] = "" date = time_util.now_to_date('%Y%m%d') full_dir = FilePathUtil.get_full_dir( "wxfriend", "excel", "text", date + "wx_pic_update_moments.xls") excel_util.write_excel_append(filename=full_dir, worksheet_name=date, items=contents) contents.clear() # 新房源列表 if len(contents) > 0: contents[0]['content'] = "" date = time_util.now_to_date('%Y%m%d') full_dir = FilePathUtil.get_full_dir( "wxfriend", "excel", "pic", date + "wx_pic_moments.xls") excel_util.write_excel_append(filename=full_dir, worksheet_name=date, items=contents) contents.clear() index += 1 else: Logger.println(f"【没有数据不处理】") md5 = None if len(self.md5_contents) > 1: md5 = ','.join(self.md5_contents[0:2]) elif len(self.md5_contents) > 0: md5 = self.md5_contents[0] if md5: self.config.set_value("wx_content", "md5_pic", md5)
def __init__(self, parent=None): super(ConfigDialog, self).__init__(parent) layout = QFormLayout() self.config = MonitorConfig() self.label = QLabel("已连接设备名称:") self.le0 = QLineEdit() deviceId = WxConfig.getAppiumConfig()["deviceName"] self.le0.setPlaceholderText("未检测到设备") self.le0.setText(deviceId) layout.addRow(self.label, self.le0) self.label = QLabel("已连接安卓版本:") self.le1 = QLineEdit() platformVersion = WxConfig.getAppiumConfig()["platformVersion"] self.le1.setPlaceholderText("未检测到版本") self.le1.setText(platformVersion) layout.addRow(self.label, self.le1) self.label = QLabel("服务器地址:") self.le2 = QLineEdit() url = WxConfig.getServerUrl() self.le2.setText(url) layout.addRow(self.label, self.le2) self.label = QLabel("最大添加好友上限(人数):") self.le3 = QLineEdit() add_friend_max_count = WxConfig.get_add_friend_max_count() self.le3.setText(add_friend_max_count) layout.addRow(self.label, self.le3) self.label = QLabel("任务暂停后继续执行的时间间隔(秒):") self.le31 = QLineEdit() max_count = WxConfig.get_addfriend_inte_seconds() self.le31.setText(max_count) layout.addRow(self.label, self.le31) self.label = QLabel("每次抓取任务上线数量(条):") self.le32 = QLineEdit() crawl_max_count = WxConfig.get_crawl_max_count() self.le32.setText(crawl_max_count) layout.addRow(self.label, self.le32) self.label = QLabel("批量导出图片上传脚本时间间隔(秒):") self.le41 = QLineEdit() batch_pic_seconds = self.config.get_value('appiumConfig', 'batch_pic_seconds') self.le41.setText(batch_pic_seconds) layout.addRow(self.label, self.le41) self.addbtn = QPushButton("设置手机号文件地址") self.addbtn.clicked.connect( self.add_phone_excel) # 当点击save按钮时,对话框将会消失,点击Cacel按钮时,则不会消失。 self.le4 = QLineEdit() phone_excel = WxConfig.getPhoneExcel() self.le4.setText(phone_excel) layout.addRow(self.addbtn, self.le4) self.label5 = QLabel("最新朋友圈图片md5值") self.le5 = QLineEdit() self.config = MonitorConfig() md5_pic = self.config.get_value("wx_content", "md5_pic") self.le5.setText(md5_pic) layout.addRow(self.label5, self.le5) self.label6 = QLabel("最新朋友圈文本md5值") self.le6 = QLineEdit() self.config = MonitorConfig() md5 = self.config.get_value("wx_content", "md5") self.le6.setText(md5) layout.addRow(self.label6, self.le6) self.label7 = QLabel("抓取文本时同时同步到云端") self.select_checkbox = QCheckBox("") value = self.config.get_value("wx_content", "select") if value == 'True': self.select_checkbox.setChecked(True) else: self.select_checkbox.setChecked(False) layout.addRow(self.label7, self.select_checkbox) self.label8 = QLabel("是否输出日志") self.log_checkbox = QCheckBox("") if Logger.debug: self.log_checkbox.setChecked(True) else: self.log_checkbox.setChecked(False) layout.addRow(self.label8, self.log_checkbox) self.cacelButton = QPushButton("重新检测") self.saveButton = QPushButton("保存") self.cacelButton.clicked.connect( self.reconnect) # 当点击save按钮时,对话框将会消失,点击Cacel按钮时,则不会消失。 self.saveButton.clicked.connect( self.save) # 当点击save按钮时,对话框将会消失,点击Cacel按钮时,则不会消失。 self.buttonBox = QDialogButtonBox(QtCore.Qt.Horizontal) self.buttonBox.addButton(self.saveButton, QDialogButtonBox.RejectRole) self.buttonBox.addButton(self.cacelButton, QDialogButtonBox.YesRole) layout.addRow(self.buttonBox) self.setLayout(layout) self.setWindowTitle("配置服务器地址") self.setWindowIcon(QIcon(IconConfig.LOGO_DIR))