def changePageSuccessfully(self): Config.writeLog("换页成功") time.sleep(3) if WaitEngine(self.__driver).wait_for_loading(): self.__pageCollection.startCollecting(self.__progressInfo.getPatentTypeIndex()) else: self.changePageUnsuccessfully()
def startProgress(self): print("开始爬取进程") Config.writeLog("开始爬取进程") try: self.__connection.connectUrl() except Exception as e: Config.writeException(e)
def __wait_for_law_state_loading(self): if not self.wait_state.wait_for_loading(): Config.writeLog("等待超时") print("等待超时") return False # self.__itemCollection.collectingLawDataUnsuccessfully() return True
def __check_if_lost(self): if self.wait_state.query_result_state(): pass else: print("加载异常") Config.writeLog("加载异常") self.__itemCollection.collectingLawDataUnsuccessfully( ) # TODO:添加加载失败的处理函数
def __deal_with_element(self, str_script): try: self.__driver.execute_script(str_script) return True except Exception as e: Config.writeException(e) print(e) return False
def refreshUrl(self): try: self.__driver.refresh() self.__progressController.loadUrlSuccessfully() return True except Exception as e: Config.writeException(e) self.__progressController.loadUrlUnsuccessfully() return False
def connectUrl(self): try: self.__driver.get(self.__url) self.__progressController.loadUrlSuccessfully() return True except Exception as e: Config.writeException(e) self.__progressController.loadUrlUnsuccessfully() return False
def changePageUnsuccessfully(self): Config.writeLog("换页失败") print("换页失败") queryInfo = self.__progressInfo.getQueryInfo() inventor = queryInfo.getInventorList()[self.__progressInfo.getInventorIndex()] proposer = queryInfo.getProposer() startDate = queryInfo.getStartDate() patentTypeIndex = self.__progressInfo.getPatentTypeIndex() self.__query.queryTarget(inventor, proposer, startDate, patentTypeIndex)
def collectingUnsuccessfully(self, itemIndex): Config.writeLog("收集信息失败") print("收集信息失败") self.__progressInfo.setItemIndex(itemIndex) queryInfo = self.__progressInfo.getQueryInfo() inventor = queryInfo.getInventorList()[self.__progressInfo.getInventorIndex()] proposer = queryInfo.getProposer() startDate = queryInfo.getStartDate() patentTypeIndex = self.__progressInfo.getPatentTypeIndex() self.__query.queryTarget(inventor, proposer, startDate, patentTypeIndex)
def collectingItemSuccessfullyWithOutData(self): Config.writeLog("采集空item成功") self.__itemIndex += 1 if self.__itemIndex < self.__itemLength: itemCollectiong = ItemCollection( self.__driver, self, CollectionResult.PATENT_TYPE[self.__patentTypeIndex], self.__itemIndex) itemCollectiong.collectingData() else: self.__progressController.collectingSuccessfully()
def loadUrlUnsuccessfully(self): Config.writeLog("url连接失败") self.__refreshLostTime += 1 if self.__refreshLostTime % 5 == 0: self.__rConnectLostTime += 1 if self.__rConnectLostTime % 2 == 0: self.__driver.quit() time.sleep(10) self.__driver = self.__generateWebDriver(self.__browser) self.__connection.connectUrl() else: self.__connection.refreshUrl()
def startCollecting(self, patentTypeIndex, startItemIndex=0): self.__itemLength = 0 self.__patentTypeIndex = patentTypeIndex self.__itemIndex = startItemIndex try: self.__itemLength = self.__driver.execute_script( "return document.getElementsByClassName(\"item\").length;") except Exception as e: Config.writeException(e) print(e) self.__itemLength = 0 self.__progressController.collectingUnsuccessfully() return False if self.__itemIndex < self.__itemLength: Config.writeLog("开始收集") itemCollectiong = ItemCollection( self.__driver, self, CollectionResult.PATENT_TYPE[patentTypeIndex], self.__itemIndex) itemCollectiong.collectingData() else: Config.writeLog("收集失败") print("收集失败") self.__progressController.collectingUnsuccessfully( self.__itemIndex) Config.writeLog("itemIndex = {0}".format(self.__itemIndex)) return True
def collectingItemSuccessfully(self, itemData): Config.writeLog("采集item成功") self.__collectionResult.addItem(itemData) self.__itemIndex += 1 Config.writeLog("采集item成功itemIndex = {0}, itemLength = {1}".format( self.__itemIndex, self.__itemLength)) if self.__itemIndex < self.__itemLength: itemCollectiong = ItemCollection( self.__driver, self, CollectionResult.PATENT_TYPE[self.__patentTypeIndex], self.__itemIndex) itemCollectiong.collectingData() else: self.__progressController.collectingSuccessfully()
def changePage(self, pageIndex): try: self.__driver.execute_script( "document.getElementById(\"txt\").setAttribute(\"value\", " + str(pageIndex) + ");") self.__driver.execute_script( "document.getElementsByClassName(\"page_bottom\").item(0).childNodes.item(document.getElementsByClassName(\"page_bottom\").item(0).childNodes.length - 2).click();" ) self.__progressController.changePageSuccessfully() return True except Exception as e: Config.writeException(e) print(e) self.__progressController.changePageUnsuccessfully() return False
def loadUrlSuccessfully(self): Config.writeLog("成功连接url") if self.__driver.page_source.find("您的操作太过频繁") != -1: Config.writeLog("操作太过频繁") print(Config.REJECT_WAY) self.endProgress() return self.__refreshLostTime = 1 self.__rConnectLostTime = 1 queryInfo = self.__progressInfo.getQueryInfo() inventor = queryInfo.getInventorList()[self.__progressInfo.getInventorIndex()] proposer = queryInfo.getProposer() startDate = queryInfo.getStartDate() patentTypeIndex = self.__progressInfo.getPatentTypeIndex() self.__query.queryTarget(inventor, proposer, startDate, patentTypeIndex)
def __getPageSum(self): if self.__driver.page_source.find("没有检索到") != -1: print("没有检索到") return 0 else: try: page_sum_str = self.__driver.execute_script( "return document.getElementsByClassName(\"page_top\").item(0).childNodes.item(document.getElementsByClassName(\"page_top\").item(0).childNodes.length - 1).textContent;" ) strTemp = page_sum_str[page_sum_str.find("共") + 1:-1] page_sum = int(strTemp[:strTemp.find("页")]) return page_sum except Exception as e: Config.writeException(e) print(e) return None
def queryTargetSuccessfully(self, pageSum): Config.writeLog("检索成功") self.__refreshLostTime = 1 self.__rConnectLostTime = 1 self.__progressInfo.setPageSum(pageSum) if pageSum == 0: Config.writeLog("pageSum = 0") self.__progressInfo.setItemIndex(0) self.__progressInfo.setPageIndex(1) pt = self.__progressInfo.getPatentTypeIndex() if pt >= 2: Config.writeLog("pt >= 2") self.__progressInfo.setPatentTypeIndex(0) inventorIndex = self.__progressInfo.getInventorIndex() + 1 if inventorIndex >= len( self.__progressInfo.getQueryInfo().getInventorList()): self.endProgress() else: self.__progressInfo.setInventorIndex(inventorIndex) queryInfo = self.__progressInfo.getQueryInfo() print(queryInfo.getInventorList()[ self.__progressInfo.getInventorIndex()]) else: Config.writeLog("pt < 2") self.__progressInfo.setPatentTypeIndex(pt + 1) queryInfo = self.__progressInfo.getQueryInfo() inventor = queryInfo.getInventorList()[ self.__progressInfo.getInventorIndex()] proposer = queryInfo.getProposer() startDate = queryInfo.getStartDate() patentTypeIndex = self.__progressInfo.getPatentTypeIndex() self.__query.queryTarget(inventor, proposer, startDate, patentTypeIndex) else: Config.writeLog("pageSum != 0") if self.__progressInfo.getPageIndex() != 1: self.__query.changePage(self.__progressInfo.getPageIndex()) else: self.__pageCollection.startCollecting( self.__progressInfo.getPatentTypeIndex(), self.__progressInfo.getItemIndex())
def __click_law_state_button(self, which_item): # 法律信息 try: self.driver.execute_script( "document.getElementsByClassName(\"item-footer\").item(" + str(which_item) + ").childNodes.item(1).childNodes.item(3).click();") except Exception as e: Config.writeException(e) print(e) try: self.driver.execute_script( "document.getElementsByClassName(\"item-footer\").item(" + str(which_item) + ").childNodes.item(1).childNodes.item(3).click();") except Exception as e: print(e) Config.writeException(e) return
def collectingData(self): try: name = self.collecting_name() self.__item_data.set_name(name) type = self.collecting_type() self.__item_data.set_type(type) if name != "" and type != "": pLen = self.__driver.execute_script( "return document.getElementsByClassName(\"item-content-body\")[" + str(self.__whichItem) + "].children.length;") for i in range(pLen): strData = self.__driver.execute_script( "return document.getElementsByClassName(\"item-content-body\")[" + str(self.__whichItem) + "].children[" + str(i) + "].innerText;") strTemp = str(strData) if strTemp.find("申请号") != -1: requestNumber = strTemp[7:] self.__item_data.set_request_number(requestNumber) elif strTemp.find("申请日") != -1: requestDate = strTemp[6:] self.__item_data.set_request_date(requestDate) elif strTemp.find("公告") != -1 and strTemp.find("日") != -1: announcement_date = strTemp[10:] self.__item_data.set_announcement_date( announcement_date) elif strTemp.find("申请") != -1 and strTemp.find("人") != -1: proposer_name = strTemp[11:-2] self.__item_data.set_proposer_name(proposer_name) elif strTemp.find("发明人") != -1: inventor_name = strTemp[6:-2].replace('\n', '') self.__item_data.set_inventor_name(inventor_name) Config.writeLog("准备收集法律信息") LawState(self.__driver, self).collectingLawState(self.__whichItem) else: self.__pageCollection.collectingItemSuccessfullyWithOutData() except Exception as e: # print(e) Config.writeException(e) self.__pageCollection.collectingItemUnsuccessfully() return False
def __writeToExcel(self, index, patentType, name, lawState, lawStateDate, aDate, requestNumber, requestDate, proposerName, inventorName): try: editor = ExcelUtil(Config.FILE_NAME).edit() sh = editor.getSheet(0) sh.write(index, 0, patentType) sh.write(index, 1, name) sh.write(index, 2, lawState) sh.write(index, 3, lawStateDate) sh.write(index, 4, aDate) sh.write(index, 5, requestNumber) sh.write(index, 6, requestDate) sh.write(index, 7, proposerName) sh.write(index, 8, inventorName) editor.commit() except Exception as e: print("写excel报错") Config.writeLog("写excel报错") Config.writeException(e)
def getSheet(self, which, mode): try: wb = self.getExcel(mode) if isinstance(which, str): if mode.upper() == "READ": return wb.sheet_by_name(which) else: return None elif isinstance(which, int): if mode.upper() == "READ": return wb.sheet_by_index(which) elif mode.upper() == "WRITE": return wb.get_sheet(which) else: return None else: return None except Exception as e: Config.writeException(e) return
def __isElementLoadingSuccess(self): try: search_button = self.__driver.find_element_by_xpath( Query.search_button_xpath) inventor_input = self.__driver.find_element_by_id( Query.inventor_input_id) proposer_input = self.__driver.find_element_by_id( Query.proposer_input_id) time_select = self.__driver.find_element_by_id( Query.time_select_id) time_input = self.__driver.find_element_by_id(Query.time_input_id) if search_button.is_displayed() and inventor_input.is_displayed( ) and proposer_input.is_displayed() and time_select.is_displayed( ) and time_input.is_displayed(): return True else: print("元素没显示") Config.writeLog("元素没显示") return False except Exception as e: print("元素抛异常") Config.writeLog("元素抛异常") Config.writeException(e) return False
def queryTarget(self, inventor, proposer, startDate, patentTypeIndex): if self.__waitEngine.wait_for_loading(): if self.__isElementLoadingSuccess(): if self.__inputQueryTargetData(inventor, proposer, startDate, patentTypeIndex): if self.__waitEngine.wait_for_loading(): self.__waitEngine.waitForSeconds(1) pageSum = self.__getPageSum() if pageSum is not None: self.__progressController.queryTargetSuccessfully( pageSum) return True else: print("页码为零") Config.writeLog("页码为零") self.__progressController.queryTargetUnsuccessfully( ) return False else: print("查询等待超时") Config.writeLog("查询等待超时") self.__progressController.queryTargetUnsuccessfully() return False else: print("查询失败") Config.writeLog("查询失败") self.__progressController.queryTargetUnsuccessfully() return False else: print("元素未加载") Config.writeLog("元素未加载") self.__progressController.queryTargetUnsuccessfully() return False else: print("url加载超时") Config.writeLog("url加载超时") self.__progressController.queryTargetUnsuccessfully() return False
def collectingSuccessfully(self): Config.writeLog("收集信息成功") pi = self.__progressInfo.getPageIndex() pi += 1 if pi > self.__progressInfo.getPageSum(): self.__progressInfo.setPageIndex(1) self.__progressInfo.setItemIndex(0) pt = self.__progressInfo.getPatentTypeIndex() if pt >= 2: self.__progressInfo.setPatentTypeIndex(0) ii = self.__progressInfo.getInventorIndex() if ii < len(self.__progressInfo.getQueryInfo().getInventorList( )) - 1: self.__progressInfo.setInventorIndex(ii + 1) queryInfo = self.__progressInfo.getQueryInfo() inventor = queryInfo.getInventorList()[ self.__progressInfo.getInventorIndex()] proposer = queryInfo.getProposer() startDate = queryInfo.getStartDate() patentTypeIndex = self.__progressInfo.getPatentTypeIndex() print(inventor) self.__query.queryTarget(inventor, proposer, startDate, patentTypeIndex) else: Config.writeLog("InventorIndex = {0}".format(ii)) self.endProgress() else: pt += 1 self.__progressInfo.setPatentTypeIndex(pt) queryInfo = self.__progressInfo.getQueryInfo() inventor = queryInfo.getInventorList()[ self.__progressInfo.getInventorIndex()] proposer = queryInfo.getProposer() startDate = queryInfo.getStartDate() patentTypeIndex = self.__progressInfo.getPatentTypeIndex() self.__query.queryTarget(inventor, proposer, startDate, patentTypeIndex) else: Config.writeLog("pageIndex = {0}".format(pi)) self.__progressInfo.setPageIndex(pi) self.__progressInfo.setItemIndex(0) self.__query.changePage(pi)
def __wait_for_law_state(self): if not self.wait_state.wait_for_loading(): Config.writeLog("等待超时") print("等待超时") self.__itemCollection.collectingLawDataUnsuccessfully() if self.__wait_for_close_button(): pass else: Config.writeLog("关闭按钮没出来") print("关闭按钮没出来") self.__itemCollection.collectingLawDataUnsuccessfully() if self.wait_state.query_result_state(): pass else: Config.writeLog("加载异常") print("加载异常") self.__itemCollection.collectingLawDataUnsuccessfully( ) # TODO:添加加载失败的处理函数 return
def collectingLawDataSuccessfully(self, lawUpdate, lawState): Config.writeLog("采集法律信息成功") self.__item_data.set_law_state(lawState) self.__item_data.set_law_state_date(lawUpdate) self.__pageCollection.collectingItemSuccessfully(self.__item_data)
def __inputQueryTargetData(self, inventor, proposer, startDate, patentTypeIndex): try: # 填写发明人 self.__driver.execute_script("document.getElementById(\"" + Query.inventor_input_id + "\").setAttribute(\"value\",\"" + inventor + "\")") Config.writeLog("发明人") # 填写申请人 self.__driver.execute_script("document.getElementById(\"" + Query.proposer_input_id + "\").setAttribute(\"value\",\"" + proposer + "\")") Config.writeLog("申请人") # 点击时间的check_list self.__driver.execute_script( "document.getElementById(\"" + Query.time_select_id + "\").firstElementChild.firstElementChild.click();") WaitEngine.waitForSeconds(2) # 等待两秒 self.__driver.execute_script( "document.getElementById(\"" + Query.time_select_id + "\").firstElementChild.childNodes[2].childNodes[2].firstElementChild.click();" ) Config.writeLog("点击时间") # 填写时间 self.__driver.execute_script("document.getElementById(\"" + Query.time_input_id + "\").setAttribute(\"value\",\"" + startDate + "\")") Config.writeLog("填写时间") # 选择专利类型 self.__choosePatentType(patentTypeIndex) Config.writeLog("专利类型") WaitEngine.waitForSeconds(3) # 等待三秒 # 点击检索按钮 self.__driver.execute_script( "document.getElementsByClassName(\"box-content-bottom\").item(0).childNodes.item(5).click();" ) Config.writeLog("点击按钮") return True except Exception as e: Config.writeException(e) print(e) return False
def __check_for_colse_button(self): if self.__wait_for_close_button(): pass else: Config.writeLog("关闭按钮没出来") self.__itemCollection.collectingLawDataUnsuccessfully()
def collectingLawDataUnsuccessfully(self): Config.writeLog("收集法律信息失败") self.__pageCollection.collectingItemUnsuccessfully()
def endProgress(self): Config.writeLog("结束进程") print("结束进程") self.__driver.quit() time.sleep(1) os._exit(0)