コード例 #1
0
ファイル: beauty_poi_spider.py プロジェクト: frenweh/POI
def chooseArea(broswer, area, type_):
    time.sleep(2)
    areaElement = util.findElementsByCssSelector(
        broswer, "li[data-type=\"city-panel\"]", False)
    time.sleep(1)
    areaElement.click()
    time.sleep(1)
    util.switchToCurrentWindow(broswer)
    ulElement = util.findElementsByClassName(broswer, "hd-list-wrap", False)
    time.sleep(1)
    # li = util.findElementsByLinkText(ulElement,area,False)
    # time.sleep(1)
    # li.click()
    # time.sleep(1)
    areaElement = util.findElementsByCssSelector(
        ulElement, 'li[data-value="' + area + '"]', False)
    time.sleep(1)
    ActionChains(broswer).move_to_element(areaElement).perform()
    time.sleep(1)

    # lis = ulElement.find_elements_by_tag_name("li")
    # for li in lis:
    # 	if area in util.getInnerHtml(li).encode("utf-8"):
    # 		time.sleep(1)
    # 		ActionChains(broswer).move_to_element(li).perform()
    # 		time.sleep(1)
    # 		break
    bsAreaList = util.findElementById(broswer, "bsAreaList")
    areaAll = util.findElementsByLinkText(bsAreaList, type_, False)
    areaAll.click()
    time.sleep(1)
コード例 #2
0
ファイル: beauty_poi_spider.py プロジェクト: frenweh/POI
def getAllTag(broswer):
    util.switchToCurrentWindow(broswer)
    time.sleep(1)
    areaElement = util.findElementsByCssSelector(
        broswer, "li[data-type=\"tag-panel\"]", False)
    time.sleep(2)
    areaElement.click()
    time.sleep(1)
    util.switchToCurrentWindow(broswer)
    divElement = util.findElementsByClassName(broswer, "cater-filter-tag",
                                              False)
    items = divElement.find_elements_by_tag_name("dl")
    result = {}
    for item in items:
        dt = item.find_elements_by_tag_name("dt")
        if len(dt) > 0:
            tagName = dt[0].find_elements_by_tag_name("h2")[0].get_attribute(
                "innerHTML").strip().encode("utf-8")
            dd = item.find_elements_by_tag_name("dd")
            lis = dd[0].find_elements_by_tag_name("li")
            result[tagName] = []
            for li in lis:
                tagType = li.find_elements_by_tag_name("a")[0].get_attribute(
                    "innerHTML").strip().encode("utf-8")
                result[tagName].append(tagType)
            print len(result[tagName])
    print len(result)
    time.sleep(1)
コード例 #3
0
ファイル: beauty_poi_spider.py プロジェクト: frenweh/POI
def getAllArea(broswer):
    time.sleep(2)
    areaElement = util.findElementsByCssSelector(
        broswer, "li[data-type=\"city-panel\"]", False)
    time.sleep(1)
    areaElement.click()
    time.sleep(1)
    util.switchToCurrentWindow(broswer)
    ulElement = util.findElementsByClassName(broswer, "hd-list-wrap", False)
    time.sleep(1)
    lis = ulElement.find_elements_by_tag_name("li")
    result = {}
    for li in lis:
        time.sleep(1)
        ActionChains(broswer).move_to_element(li).perform()
        time.sleep(1)
        areaName = li.get_attribute("data-value")
        bsAreaList = util.findElementById(broswer, "bsAreaList")
        li_node = bsAreaList.find_elements_by_tag_name("li")
        result[areaName] = []
        for item in li_node:
            typeName = item.find_elements_by_tag_name("a")[0].get_attribute(
                "innerHTML")
            result[areaName].append(typeName)
        print len(result[areaName])
    print len(result)
    return result
コード例 #4
0
ファイル: beauty_poi_spider.py プロジェクト: frenweh/POI
def chooseType(broswer, tag, type_):
    util.switchToCurrentWindow(broswer)
    time.sleep(1)
    areaElement = util.findElementsByCssSelector(
        broswer, "li[data-type=\"tag-panel\"]", False)
    time.sleep(2)
    areaElement.click()
    time.sleep(1)
    util.switchToCurrentWindow(broswer)
    divElement = util.findElementsByClassName(broswer, "cater-filter-tag",
                                              False)
    items = divElement.find_elements_by_tag_name("dl")
    for item in items:
        dt = item.find_elements_by_tag_name("dt")
        if len(dt) > 0:
            if tag in util.getInnerHtml(dt[0]).encode("utf-8"):
                print("find tag")
                dd = item.find_elements_by_tag_name("dd")
                if "全部" in util.getInnerHtml(dd[0]).encode("utf-8"):
                    li = dd[0].find_elements_by_tag_name("li")[0]
                    li.find_elements_by_tag_name("a")[0].click()
                else:
                    lis = dd[0].find_elements_by_tag_name("li")
                    for li in lis:
                        if type_ in util.getInnerHtml(li).encode("utf-8"):
                            li.find_elements_by_tag_name("a")[0].click()
                            break
                break
    time.sleep(1)
コード例 #5
0
ファイル: beauty_poi_spider.py プロジェクト: frenweh/POI
def main(cityName, area, area_type, tag, tag_type):
    pid = None
    broswer = None
    try:
        pid, broswer = getReady()

        # queryByChooseLabel(broswer,area,area_type,tag,tag_type)

        queryByInput(broswer, area, tag)

        hasNext = True
        i = 1
        while hasNext:
            print i
            i += 1
            if i >= 60:
                break
            poiElement = util.findElementsByClassName(broswer, "poilist")[0]
            items = poiElement.find_elements_by_tag_name("li")
            already_scraw = file_func.readFileToList("data/uid.txt")
            for item in items:
                pageSource = util.getInnerHtml(item)
                uid, poiInfo = parsePOI_Info(pageSource)
                if uid not in already_scraw:
                    print uid + " not in"
                    file_func.listAppendToTxt("data/info.txt", poiInfo)
                    file_func.writeToTxt("data/uid.txt", uid)
                else:
                    print uid + " in"
            time.sleep(1)
            hasNext = goToNextPage(broswer)
            # if len(util.findElementsByClassName(broswer,"no_result_title")) > 0:
            # 	gotoQuery(broswer)
            if getItemCount(broswer) == 0:
                gotoQuery(broswer)
            util.switchToCurrentWindow(broswer)
            time.sleep(2)
            cardElement = util.findElementById(broswer, "toast-wrapper")
            time.sleep(2)
            ActionChains(broswer).move_to_element(cardElement).perform()
            time.sleep(10)
            # poiElement = util.findElementsByClassName(broswer,"poilist")[0]
            # ActionChains(broswer).move_to_element(poiElement).perform()
        util.closeAllHandle(broswer)
        print pid
        util.terminatePid(pid)
    except Exception as e:
        print e
        util.closeAllHandle(broswer)
        print pid
        util.terminatePid(pid)
コード例 #6
0
ファイル: beauty_poi_spider.py プロジェクト: frenweh/POI
def getReady():
    # driverType = "phantomjs.exe"
    driverType = "geckodriver.exe"
    # driverType = "chromedriver.exe"
    running_pids1 = psutil.pids()
    broswer = util.createBrowser("https://map.baidu.com/")
    time.sleep(3)
    running_pids2 = psutil.pids()
    pid = util.getPid(running_pids1, running_pids2, driverType)
    util.switchToCurrentWindow(broswer)
    inputElement = util.findElementById(broswer, "sole-input")
    inputElement.clear()
    inputElement.send_keys(cityName.decode("utf-8"))  #.decode("utf-8")
    searchBtn = util.findElementById(broswer, "search-button")
    searchBtn.click()
    time.sleep(2)
    inputElement.clear()
    inputElement.click()
    caterElement = util.findElementsByClassName(broswer, "city-special-item")
    caterElement = caterElement[0]
    caterElement.click()
    return pid, broswer