Python DBHelper.db_insertCrawlingData 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: DbMgr

클래스/타입: DBHelper

메소드/함수: db_insertCrawlingData

hotexamples.com에서의 예제들: 4

Python DBHelper.db_insertCrawlingData - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 DbMgr.DBHelper.db_insertCrawlingData에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

db_insertCrawlingData(4)

db_insertCrawlingDAta(1)

db_insertcrawlingdata(1)

예제 #1

파일 보기

파일: main.py 프로젝트: stelch1234/Python_Crawling

        time.sleep(2)
        # pip install bs4
        # 혖재 페이지를 beautifulsoup의 DOM으로 구성
        soup = bs(driver.page_source, 'html.parser')
        # 현제 상세 정보 페이지에서 스케줄 정보 획득
        data = soup.select('.tip-cover')
        # print( type(data), len(data), type(data[0].contents)  )
        # 디비 입력 => pip install pymysql
        # 데이터 sum
        content_final = ''
        for c in data[0].contents:
            content_final += str(c)

        # html 콘첸츠 데이터 전처리 (디비에 입력 가능토록)
        import re

        content_final = re.sub("'", '"', content_final)
        content_final = re.sub(re.compile(r'\r\n|\r|\n|\n\r+'), '',
                               content_final)

        print(content_final)
        # 콘텐츠 내용에 따라 전처리 => data[0].contents
        db.db_insertCrawlingData(tour.title, tour.price[:-1],
                                 tour.area.replace('출발 가능 기간 : ', ''),
                                 content_final, keyword)

# 종료
driver.close()

import sys
sys.exit()

예제 #2

파일 보기

파일: run.py 프로젝트: WebScrapingStudy/StudyRepository

    print(type(tour))
    # 링크데이터에서 실데이터 획득
    # 분해
    arr = tour.link.split(',')
    if arr:
        # 대체
        link = arr[0].replace('searchModule.OnClickDetail(', '')
        # 슬라이싱 => 앞에 ', 뒤에 ' 제거
        detail_url = link[1:-1]
        # 상세 페이지 이동 : URL 값이 완성된 형태인지 확인 (http~)
        driver.get(detail_url)
        time.sleep(1)
        # 현재 페이지를 bs4의 DOM으로 구성
        soup = bs(driver.page_source, 'html.parser')
        # 현재 상세 정보 페이지에서 스케줄 정보 획득
        data = soup.select('.tip-cover')
        print(type(data), len(data), data[0].contents)
        # DB 입력
        content_final = ''
        # for c in data[0].contents:
        #     contnet_final = str(c)

        # 컨텐츠 내용에 따라 전처리
        db.db_insertCrawlingData(tour.title, tour.price, tour.area,
                                 data[0].contents, keyword)

# 종료
driver.close()
driver.quit()
sys.exit()

예제 #3

파일 보기

        link_notnull = 'None'
    else:
        link_notnull = iteminfo.link

    img_notnull = ''
    if not iteminfo.img:
        img_notnull = 'None'
    else:
        img_notnull = iteminfo.img

    # 콘텐츠 내용에 따라서 전처리 => data[0].contents
    db.db_insertCrawlingData(
            iteminfo.title,
            iteminfo.price,
            '',
            '',
            keyword,
            img_notnull,
            link_notnull
    )

# 종료

driver.close()
driver.quit()
sys.exit()

# 추가 작업
# proxy , agent
# mobile, pc 구분
# image 제거

예제 #4

파일 보기

파일: run.py 프로젝트: rheehot/itemcrawling

        #DB 입력 => pip install pymysql
        # 데이터 sum
        content_final = ''
        for c in data[0].contents:
            content_final += str(c)

        # html 콘첸츠 데이터 전처리 (디비에 입력 가능토록)
        import re
        content_final   = re.sub("'", '"', content_final)
        content_final   = re.sub(re.compile(r'\r\n|\r|\n|\n\r+'), '', content_final)

        # 콘텐츠 내용에 따라서 전처리 => data[0].contents
        db.db_insertCrawlingData(
                tour.title,
                tour.price,
                tour.area,
                content_final,
                keyword
        )

# 종료

driver.close()
driver.quit()
import sys
sys.exit()

# 추가 작업
# proxy , agent
# mobile, pc 구분
# image 제거