コード例 #1
0
ファイル: Dazhong.py プロジェクト: helanfeiyun/Crawl-Selenium
def main():

    writer(title,district, file,interval=False)
    sleep_time = random.randint(6,13)
    for page in range(1,num+1):
        time.sleep(sleep_time)
        search(str(page))
コード例 #2
0
def getDetailInfo():
    wait.until(
        EC.presence_of_element_located((By.CSS_SELECTOR, '._j_commentlist')))

    for i in range(1, num + 1):
        wait.until(
            EC.presence_of_element_located(
                (By.CSS_SELECTOR, '._j_commentlist')))
        html = browser.page_source
        doc = pq(html)

        items = doc('.rev-list .rev-item').items()
        for item in items:
            if item == None:
                print("item中没有信息")
            else:
                title = str(item.find('.s-star').attr('class'))[-1]
                if title == '1':
                    title = "很差"
                elif title == '2':
                    title = "一般"
                elif title == '3':
                    title = "好"
                elif title == '4':
                    title = "很好"
                elif title == '5':
                    title = "非常好"
                comment = item.find('.rev-txt').text()
                comment_time = item.find('.time').text()[:11]
                info = [title, comment, comment_time]
                title_info = name_info + info
                writer(title_info, district, file, interval=False)

        print(i)
        nextPage()
コード例 #3
0
def getDetailInfo():
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#daohang')))
    time.sleep(1)
    while showMoreInfo()== 2 and num <=16 :
        time.sleep(2)
        print("in the more information")
        pass

    html = browser.page_source
    doc = pq(html)
    print(doc)
    houses_name = doc('#xfxq_B03_01').attr('title')
    houses_first = doc('body .Comprehensive_score .mgt_2').text()
    houses_second = re.compile('(.\d\d)').findall(doc('body .Comprehensive_score .fbold14').text())
    houses_score = str(houses_first) + houses_second[0]
    houses_gray = doc('body .Comprehensive_score .font_gray').text()
    houses_price = re.compile('\d.\d\d').findall(houses_gray)[0]
    houses_loc = re.compile('\d.\d\d').findall(houses_gray)[1]
    houses_support = re.compile('\d.\d\d').findall(houses_gray)[2]
    houses_trans = re.compile('\d.\d\d').findall(houses_gray)[3]
    houses_env = re.compile('\d.\d\d').findall(houses_gray)[4]
    houses_info = [houses_name,houses_score,houses_price,houses_loc,houses_support,houses_trans,houses_env]
    items = doc('#dpContentList .comm_list .comm_list_nr').items()
    print('楼盘信息保存完毕')
    for item in items:
        if item == None:
            print("item中没有信息")
        else:
            try:
                list_score = item.find('.comm_list_score .inf').text()
                comm_price = re.compile('(\d)').findall(list_score)[0]
                comm_loc = re.compile('(\d)').findall(list_score)[1]
                comm_trans = re.compile('(\d)').findall(list_score)[2]
                comm_support = re.compile('(\d)').findall(list_score)[3]
                comm_env = re.compile('(\d)').findall(list_score)[4]
                comm_score = (int(comm_price)+int(comm_loc)+int(comm_trans)+int(comm_support)+int(comm_env))/5
                comment = item.find('.comm_list_con').text()
                comment_time = item.find('.look_hou').text()[:10]
                comm_info = [comm_score,comm_price,comm_loc,comm_trans,comm_support,comm_env,comment,comment_time]
                title_info = houses_info + comm_info
                writer(title_info, district, file, interval=False)
            except:
                continue
コード例 #4
0
ファイル: Dazhong.py プロジェクト: helanfeiyun/Crawl-Selenium
def getDetailInfo():
    print('in the getDetailInfo')
    wait.until(
        EC.presence_of_element_located((By.CSS_SELECTOR, '#review-list > div.review-list-container > div.review-list-main > div.review-list-header > h1 > a'))
    )
    html = browser.page_source
    print('got html')
    doc = pq(html)
    items = doc('#review-list .reviews-items .main-review').items()
    for item in items:
        if item == None:
            print("item中没有信息")
        else:
            try:
                title = str(item.find('.sml-rank-stars').attr('class'))[-7]
                if title == '1':
                    title = "很差"
                elif title =='2':
                    title = "一般"
                elif title =='3':
                    title ="好"
                elif title =='4':
                    title = "很好"
                elif title =='5':
                    title = "非常好"
            except:
                continue
            comment = item.find('.review-words').text()
            comment_time = item.find('.time ').text()
            info = [title,comment,comment_time]
            title_info = shop_info + info
            writer(title_info, district, file ,interval=False)

            print(info)
            print('\n')

    return None
コード例 #5
0
def main():
    browser.get(url)
    writer(title, district, file, interval=False)
    #goComment()
    getDetailInfo()