def chooseArea(broswer, area, type_): time.sleep(2) areaElement = util.findElementsByCssSelector( broswer, "li[data-type=\"city-panel\"]", False) time.sleep(1) areaElement.click() time.sleep(1) util.switchToCurrentWindow(broswer) ulElement = util.findElementsByClassName(broswer, "hd-list-wrap", False) time.sleep(1) # li = util.findElementsByLinkText(ulElement,area,False) # time.sleep(1) # li.click() # time.sleep(1) areaElement = util.findElementsByCssSelector( ulElement, 'li[data-value="' + area + '"]', False) time.sleep(1) ActionChains(broswer).move_to_element(areaElement).perform() time.sleep(1) # lis = ulElement.find_elements_by_tag_name("li") # for li in lis: # if area in util.getInnerHtml(li).encode("utf-8"): # time.sleep(1) # ActionChains(broswer).move_to_element(li).perform() # time.sleep(1) # break bsAreaList = util.findElementById(broswer, "bsAreaList") areaAll = util.findElementsByLinkText(bsAreaList, type_, False) areaAll.click() time.sleep(1)
def getAllTag(broswer): util.switchToCurrentWindow(broswer) time.sleep(1) areaElement = util.findElementsByCssSelector( broswer, "li[data-type=\"tag-panel\"]", False) time.sleep(2) areaElement.click() time.sleep(1) util.switchToCurrentWindow(broswer) divElement = util.findElementsByClassName(broswer, "cater-filter-tag", False) items = divElement.find_elements_by_tag_name("dl") result = {} for item in items: dt = item.find_elements_by_tag_name("dt") if len(dt) > 0: tagName = dt[0].find_elements_by_tag_name("h2")[0].get_attribute( "innerHTML").strip().encode("utf-8") dd = item.find_elements_by_tag_name("dd") lis = dd[0].find_elements_by_tag_name("li") result[tagName] = [] for li in lis: tagType = li.find_elements_by_tag_name("a")[0].get_attribute( "innerHTML").strip().encode("utf-8") result[tagName].append(tagType) print len(result[tagName]) print len(result) time.sleep(1)
def getAllArea(broswer): time.sleep(2) areaElement = util.findElementsByCssSelector( broswer, "li[data-type=\"city-panel\"]", False) time.sleep(1) areaElement.click() time.sleep(1) util.switchToCurrentWindow(broswer) ulElement = util.findElementsByClassName(broswer, "hd-list-wrap", False) time.sleep(1) lis = ulElement.find_elements_by_tag_name("li") result = {} for li in lis: time.sleep(1) ActionChains(broswer).move_to_element(li).perform() time.sleep(1) areaName = li.get_attribute("data-value") bsAreaList = util.findElementById(broswer, "bsAreaList") li_node = bsAreaList.find_elements_by_tag_name("li") result[areaName] = [] for item in li_node: typeName = item.find_elements_by_tag_name("a")[0].get_attribute( "innerHTML") result[areaName].append(typeName) print len(result[areaName]) print len(result) return result
def chooseType(broswer, tag, type_): util.switchToCurrentWindow(broswer) time.sleep(1) areaElement = util.findElementsByCssSelector( broswer, "li[data-type=\"tag-panel\"]", False) time.sleep(2) areaElement.click() time.sleep(1) util.switchToCurrentWindow(broswer) divElement = util.findElementsByClassName(broswer, "cater-filter-tag", False) items = divElement.find_elements_by_tag_name("dl") for item in items: dt = item.find_elements_by_tag_name("dt") if len(dt) > 0: if tag in util.getInnerHtml(dt[0]).encode("utf-8"): print("find tag") dd = item.find_elements_by_tag_name("dd") if "全部" in util.getInnerHtml(dd[0]).encode("utf-8"): li = dd[0].find_elements_by_tag_name("li")[0] li.find_elements_by_tag_name("a")[0].click() else: lis = dd[0].find_elements_by_tag_name("li") for li in lis: if type_ in util.getInnerHtml(li).encode("utf-8"): li.find_elements_by_tag_name("a")[0].click() break break time.sleep(1)
def main(cityName, area, area_type, tag, tag_type): pid = None broswer = None try: pid, broswer = getReady() # queryByChooseLabel(broswer,area,area_type,tag,tag_type) queryByInput(broswer, area, tag) hasNext = True i = 1 while hasNext: print i i += 1 if i >= 60: break poiElement = util.findElementsByClassName(broswer, "poilist")[0] items = poiElement.find_elements_by_tag_name("li") already_scraw = file_func.readFileToList("data/uid.txt") for item in items: pageSource = util.getInnerHtml(item) uid, poiInfo = parsePOI_Info(pageSource) if uid not in already_scraw: print uid + " not in" file_func.listAppendToTxt("data/info.txt", poiInfo) file_func.writeToTxt("data/uid.txt", uid) else: print uid + " in" time.sleep(1) hasNext = goToNextPage(broswer) # if len(util.findElementsByClassName(broswer,"no_result_title")) > 0: # gotoQuery(broswer) if getItemCount(broswer) == 0: gotoQuery(broswer) util.switchToCurrentWindow(broswer) time.sleep(2) cardElement = util.findElementById(broswer, "toast-wrapper") time.sleep(2) ActionChains(broswer).move_to_element(cardElement).perform() time.sleep(10) # poiElement = util.findElementsByClassName(broswer,"poilist")[0] # ActionChains(broswer).move_to_element(poiElement).perform() util.closeAllHandle(broswer) print pid util.terminatePid(pid) except Exception as e: print e util.closeAllHandle(broswer) print pid util.terminatePid(pid)
def getReady(): # driverType = "phantomjs.exe" driverType = "geckodriver.exe" # driverType = "chromedriver.exe" running_pids1 = psutil.pids() broswer = util.createBrowser("https://map.baidu.com/") time.sleep(3) running_pids2 = psutil.pids() pid = util.getPid(running_pids1, running_pids2, driverType) util.switchToCurrentWindow(broswer) inputElement = util.findElementById(broswer, "sole-input") inputElement.clear() inputElement.send_keys(cityName.decode("utf-8")) #.decode("utf-8") searchBtn = util.findElementById(broswer, "search-button") searchBtn.click() time.sleep(2) inputElement.clear() inputElement.click() caterElement = util.findElementsByClassName(broswer, "city-special-item") caterElement = caterElement[0] caterElement.click() return pid, broswer