Python close_driver Exemples, utils.close_driver Python Exemples

Exemple #1

0

Afficher le fichier

def report_main(jm_code, rcp_no):
    try:
        # driver 세팅(결의, 공고)
        driver = get_driver(
            'C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe',
            'http://dart.fss.or.kr/dsaf001/main.do?rcpNo={0}'.format(rcp_no))

        # 주총 결의의 rcpno 히스토리
        rcpno_list = get_rcpno_list(driver)
        # 최초 문서의 공고년도
        first_rcp_yy = rcpno_list[0][:4]

        conn = get_dbcon('esg')
        cursor = conn.cursor()

        # 보상위원회 유무 확인
        get_tab(driver, 'b')
        bosang_yn = get_board_yn(driver, 'b')
        print(bosang_yn)
        driver.switch_to_default_content()
        # 감사위원회 유무 확인
        get_tab(driver, 'g')
        gamsa_yn = get_board_yn(driver, 'g')
        print(gamsa_yn)

        # --------------------------------------------------------------------------------- #
        # DB 삽입
        # 중복체크
        insert_qry = """insert into proxy700_tmp values('{0}', '{1}', '{2}', '{3}')""".format(
            jm_code, '2018', bosang_yn, gamsa_yn)
        cursor.execute(insert_qry)
    finally:
        cursor.close()
        close_dbcon(conn)
        close_driver(driver)

Exemple #2

0

Afficher le fichier

def get_search_options():

    driver = utils.start_driver('chrome')
    home_dir = str(pathlib.Path.home())
    session.login(driver, home_dir + '/plst.credential.json')
    utils.wait(3)

    filt_name_dict = {
        'level': 'SKILL LEVELS',
        'role': 'ROLES',
        'subject': 'SUBJECTS TO LEARN',
        'tool': 'TOOLS',
        'cert': 'CERTIFICATIONS',
        'author': 'AUTHORS',
    }

    try:

        for filt, filt_name in sorted(filt_name_dict.items()):
            opt_url_dict = search.get_filter_options_url(driver, filt_name)
            utils.save_json(opt_url_dict,
                            'search/filt_{}_urls.json'.format(filt))
            utils.wait(10)

    finally:
        session.logout(driver)
        utils.wait(3)
        utils.close_driver(driver)

Exemple #3

0

Afficher le fichier

 def tearDown(self):
     """
     Cleanup the environment.
     """
     print('\nTearing down.')
     utils.close_driver(self.driver)
     self.stop_proxy()

Exemple #4

0

Afficher le fichier

Fichier : script_get_video.py Projet : zyhuang/ragno

def get_course_videos(course_list, ncourse_max):

    cache_dir = 'cache'

    driver = utils.start_driver('chrome', download_dir=cache_dir)
    home_dir = str(pathlib.Path.home())
    session.login(driver, home_dir + '/plst.credential.json')
    utils.wait(3)

    try:
        ncourse = 0
        for line in open(course_list):
            if line.startswith('#'):
                continue
            course_id = line.rstrip()
            video_list, nmodule, nclip = load_course_video_list(course_id)
            nvideo = len(video_list)

            out_dir = 'courses/{}/videos'.format(course_id)
            os.makedirs(out_dir, exist_ok=True)

            if count_videos(out_dir) == nvideo:
                continue

            utils.print_message('get video of "{}" ({}/{}): {} modules and '
                                '{} clips'.format(course_id, ncourse + 1,
                                                  ncourse_max, nmodule, nclip))

            for module_id, clip_id, clip_url in video_list:
                video_url = course.get_video_url(driver, clip_url)
                video_basename = video_url.split('?')[0].split('/')[-1]
                video_name = '{}/{}.{}.{}'.format(out_dir, module_id, clip_id,
                                                  video_basename)
                utils.download_file(video_url, video_name, verbose=True)

            ndownload = count_videos(out_dir)
            if ndownload != nvideo:
                utils.print_message('*ERROR*: course "{}", expected {} clips, '
                                    'downloaded {}'.format(
                                        course_id, nvideo, ndownload))
                raise

            ncourse += 1
            utils.print_message(
                '----------------------------------------------')
            utils.wait(3)

            if ncourse == ncourse_max:
                break

    finally:
        session.logout(driver)
        utils.wait(3)
        utils.close_driver(driver)

Exemple #5

0

Afficher le fichier

def get_rcpNo(jm_code, keywod, st_dt, ed_dt):
    driver = get_driver(
        'C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe',
        'http://dart.fss.or.kr/dsab002/main.do#')  # 드라이버 로드
    driver.implicitly_wait(10)

    driver.find_element_by_name('textCrpNm').send_keys(jm_code)  # 종목코드
    driver.find_element_by_xpath(
        '//*[@id="searchForm"]/fieldset/div/p[3]/span[2]/a[7]').click()  # 기간
    #driver.find_element_by_name('startDate').send_keys(st_dt)                                         # 기간_시작
    #driver.find_element_by_name('endDate').send_keys(ed_dt)                                           # 기간_종료
    driver.find_element_by_name('reportName').send_keys(keywod)  # 검색어
    time.sleep(1)
    driver.find_element_by_xpath(
        '//*[@id="searchForm"]/fieldset/div/p[8]/input').click()  # 검색

    res_list = driver.find_elements_by_xpath(
        '//*[@id="listContents"]/div[1]/table/tbody/tr')  # 결과 리스트

    # 최상위 데이터만 수집 ( => 짧은 주기로 수집해야 함)
    # 결과 리스트에서 가용 데이터 추출
    if len(res_list) == 0:
        print('검색 결과가 없습니다.')
        return 0
    else:
        item = res_list[0]

    # 문서번호
    rcp_no = item.find_elements_by_tag_name('td')[2].find_element_by_tag_name(
        'a').get_attribute('href')[-14:]
    # 기재정정
    rcp_yn = item.find_elements_by_tag_name('td')[2].find_element_by_tag_name(
        'a').find_element_by_tag_name('span').text
    # 시장구분
    rcp_gb = item.find_elements_by_tag_name('td')[5].find_element_by_tag_name(
        'img').get_attribute('title')
    if '유가' in rcp_gb:
        rcp_gb = 'K'
    else:
        rcp_gb = 'Q'

    if len(rcp_no) != 14:
        print('rcpNo 형식이 다릅니다.')
        return 0

    if '첨부' in rcp_yn:
        print('첨부정정은 수집대상 제외')
        return 0

    close_driver(driver)

    return rcp_no

Exemple #6

0

Afficher le fichier

def hando_main(jm_code, rcp_no, gijun_yy):
    # driver 세팅
    driver = get_driver(
        'C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe',
        'http://dart.fss.or.kr/dsaf001/main.do?rcpNo={0}'.format(rcp_no))

    hando = []
    hando.extend(get_hando(driver, jm_code, gijun_yy))

    for h in hando:
        print(h)

    # driver close
    close_driver(driver)

Exemple #7

0

Afficher le fichier

def get_rcpNo(jm_code, keywod):
    driver = get_driver(
        'C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe',
        'http://dart.fss.or.kr/dsab002/main.do#')  # 드라이버 로드

    driver.find_element_by_name('textCrpNm').send_keys(jm_code)  # 종목코드
    driver.find_element_by_xpath(
        '//*[@id="searchForm"]/fieldset/div/p[3]/span[2]/a[4]').click(
        )  # 기간 : 1년
    checked = driver.find_element_by_xpath(
        '//*[@id="finalReport"]').get_attribute('checked')  # 최종보고서 여부
    #if checked:
    #    driver.find_element_by_xpath('//*[@id="finalReport"]').click()                              # 최종보고서 체크 해제
    driver.find_element_by_id('reportName').send_keys(keywod)  # 검색구분 : 결의

    driver.find_element_by_xpath(
        '//*[@id="searchForm"]/fieldset/div/p[8]/input').click()  # 검색

    res_list = driver.find_elements_by_xpath(
        '//*[@id="listContents"]/div[1]/table/tbody/tr')  # 결과 리스트

    # 최상위 데이터만 수집 ( => 짧은 주기로 수집해야 함)
    # 결과 리스트에서 가용 데이터 추출
    if len(res_list) == 0:
        print('검색 결과가 없습니다.')
        return 0
    else:
        item = res_list[0]
        for i in range(0, len(res_list)):
            if '2018.12' in res_list[i].find_elements_by_tag_name(
                    'td')[2].find_element_by_tag_name('a').text:
                item = res_list[i]
    # 문서번호
    rcp_no = item.find_elements_by_tag_name('td')[2].find_element_by_tag_name(
        'a').get_attribute('href')[-14:]
    # 기재정정
    rcp_yn = item.find_elements_by_tag_name('td')[2].find_element_by_tag_name(
        'a').find_element_by_tag_name('span').text

    if len(rcp_no) != 14:
        print('rcpNo 형식이 다릅니다.')
        return 0

    if '첨부' in rcp_yn:
        print('첨부정정은 수집대상 제외')
        return 0

    close_driver(driver)

    return rcp_no

Exemple #8

0

Afficher le fichier

Fichier : script_get_option_courses.py Projet : zyhuang/ragno

def get_search_option_courses():

    driver = utils.start_driver('chrome')
    home_dir = str(pathlib.Path.home())
    session.login(driver, home_dir + '/plst.credential.json')
    utils.wait(3)

    filt_name_dict = {
        'role': 'ROLES',
        'subject': 'SUBJECTS TO LEARN',
        'tool': 'TOOLS',
        'cert': 'CERTIFICATIONS',
        # 'level': 'SKILL LEVELS',
        # 'author': 'AUTHORS',
    }

    try:

        for filt, filt_name in sorted(filt_name_dict.items()):
            opt_url_dict = utils.load_json(
                'search/filt_{}_urls.json'.format(filt))
            out_dir = 'search/filt_{}_courses'.format(filt)
            os.makedirs(out_dir, exist_ok=True)

            opt_index = 0
            nopt = len(opt_url_dict)
            for opt, url in sorted(opt_url_dict.items()):
                opt_index += 1
                fname_json = '{}/{}.json'.format(out_dir, opt_index)
                if os.path.isfile(fname_json):
                    continue
                # if opt_index >= 10:
                #     break
                utils.print_message(
                    'get all courses with filt={}, option={} ({}/{})'.format(
                        filt, opt, opt_index, nopt))
                course_id_list = search.get_all_courses_per_option(
                    driver, url, wait_time=10)
                opt_courses_dict = {opt: course_id_list}
                utils.save_json(opt_courses_dict, fname_json)
                utils.wait(20)

    finally:
        session.logout(driver)
        utils.wait(3)
        utils.close_driver(driver)

Exemple #9

0

Afficher le fichier

Fichier : crawlResolution_kind.py Projet : ParkKT/deri-project

def get_rcpNo(jm_code, keywod):
    driver = get_driver('C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe',
                        'http://kind.krx.co.kr/disclosure/details.do?method=searchDetailsMain#viewer')  # 드라이버 로드

    driver.find_element_by_xpath('//*[@id="AKCKwd"]').send_keys(jm_code)                                     # 종목코드
    time.sleep(1)
    driver.find_element_by_xpath('//*[@id="search-btn-dates"]/ul/li[5]/a').send_keys(Keys.ENTER)    # 기간 : 1년
    time.sleep(1)
    checked = driver.find_element_by_xpath('//*[@id="lastReport"]').get_attribute('checked')       # 최종보고서 여부
    if checked:
        driver.find_element_by_xpath('//*[@id="lastReport"]').send_keys(Keys.ENTER)                   # 최종보고서 체크 해제
    driver.find_element_by_id('reportNmTemp').send_keys(keywod)                                       # 검색구분 : 결의
    time.sleep(1)
    driver.find_element_by_xpath('//*[@id="searchForm"]/section[1]/div/div[3]/a[1]').send_keys(Keys.ENTER)           # 검색
    time.sleep(1)
    res_list = driver.find_elements_by_xpath('//*[@id="main-contents"]/section[1]/table/tbody/tr')       # 결과 리스트

    # 최상위 데이터만 수집 ( => 짧은 주기로 수집해야 함)
    # 결과 리스트에서 가용 데이터 추출
    if len(res_list) == 0:
        print('검색 결과가 없습니다.')
        sys.exit(0)
    else:
        item = res_list[0]
    # 문서번호
    rcp_no = item.find_elements_by_tag_name('td')[3].find_element_by_tag_name('a').get_attribute('onclick')[-19:-5]
    print(rcp_no)
    # 기재정정
    #rcp_yn = item.find_elements_by_tag_name('td')[3].find_element_by_tag_name('a').find_element_by_tag_name('font').text
    rcp_yn = ''
    # 시장구분
    rcp_gb = item.find_elements_by_tag_name('td')[2].find_element_by_tag_name('img').get_attribute('alt')

    if len(rcp_no) != 14:
        print('rcpNo 형식이 다릅니다.')
        sys.exit(0)

    if '첨부' in rcp_yn:
        print('첨부정정은 수집대상 제외')
        sys.exit(0)

    close_driver(driver)

    return rcp_no, rcp_yn, rcp_gb

Exemple #10

0

Afficher le fichier

def get_rcpNo(jm_code, keywod, st_dt, en_dt):
    driver = get_driver(
        'C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe',
        'http://dart.fss.or.kr/dsab002/main.do#')  # 드라이버 로드

    driver.find_element_by_name('textCrpNm').send_keys(jm_code)  # 종목코드
    driver.find_element_by_xpath('//*[@id="startDate"]').send_keys(
        st_dt)  # 기간 시작
    driver.find_element_by_xpath('//*[@id="endDate"]').send_keys(
        en_dt)  # 기간 종료
    driver.find_element_by_id('reportName').send_keys(keywod)  # 검색구분 : 결의

    driver.find_element_by_xpath(
        '//*[@id="searchForm"]/fieldset/div/p[8]/input').click()  # 검색

    res_list = driver.find_elements_by_xpath(
        '//*[@id="listContents"]/div[1]/table/tbody/tr')  # 결과 리스트

    # 최상위 데이터만 수집 ( => 짧은 주기로 수집해야 함)
    # 결과 리스트에서 가용 데이터 추출
    if len(res_list) == 0:
        print('검색 결과가 없습니다.')
        sys.exit(0)
    else:
        item = res_list[0]
    # 문서번호
    rcp_no = item.find_elements_by_tag_name('td')[2].find_element_by_tag_name(
        'a').get_attribute('href')[-14:]
    # 기재정정
    rcp_yn = item.find_elements_by_tag_name('td')[2].find_element_by_tag_name(
        'a').find_element_by_tag_name('span').text

    if len(rcp_no) != 14:
        print('rcpNo 형식이 다릅니다.')
        sys.exit(0)

    if '첨부' in rcp_yn:
        print('첨부정정은 수집대상 제외')
        sys.exit(0)

    close_driver(driver)

    return rcp_no, rcp_yn

Exemple #11

0

Afficher le fichier

def bd_main(jm_code, rcp_no):
    try:
        # driver 세팅(결의, 공고)
        driver = get_driver(
            'C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe',
            'http://dart.fss.or.kr/dsaf001/main.do?rcpNo={0}'.format(rcp_no))

        driver.implicitly_wait(10)

        bd_gubun, bd_kind, bd_gum, bd_total, bd_gijun_ymd = get_bd_table(
            driver)

        conn = get_dbcon('esg')
        cursor = conn.cursor()

        # 중복 체크 및 DB 삽입
        dup_select = """select * from proxy080 where jm_code = '{0}' and bd_gijun_ymd = '{1}'
                     """.format(jm_code, bd_gijun_ymd)

        cursor.execute(dup_select)

        if cursor.rowcount > 0:
            insert_qry = """update proxy080
                            set bd_gubun = '{2}', bd_kind = '{3}', bd_gum = {4}, bd_total = {5}
                            where jm_code = '{0}' and bd_gijun_ymd = '{1}'
                         """.format(jm_code, bd_gijun_ymd, bd_gubun, bd_kind,
                                    bd_gum, bd_total)
        else:
            insert_qry = """insert into proxy080 values('{0}', '{1}', '{2}', '{3}', {4}, {5})
                         """.format(jm_code, bd_gijun_ymd, bd_gubun, bd_kind,
                                    bd_gum, bd_total)

        cursor.execute(insert_qry)
    finally:
        cursor.close()
        close_dbcon(conn)
        close_driver(driver)

Exemple #12

0

Afficher le fichier

Fichier : get_zh_cn.py Projet : zyhuang/ragno

def get_all_htmls():
    '''Download all htmls of Bible in Chinese.

    Output data will be saved in ../data/chinese_cn/*.html
    '''

    driver = utils.start_driver('phantomjs', verbose=True)
    out_dir = '{}/data/chinese_cn'.format(work_dir)
    os.makedirs(out_dir, exist_ok=True)

    try:
        for i in range(1, 74):
            url = ('http://xiaozhushou.org/index.php/?m=bible&template={}'
                   .format(i))
            utils.open_url(driver, url, verbose=True)
            chpt_url_list = []
            for elem in driver.find_elements_by_xpath(
                    '//ul[@id="chapter_list"]/li/a'):
                chpt_url = elem.get_attribute('href')
                chpt_url_list.append(chpt_url)

            for chpt_url in chpt_url_list:
                book_id = str(i).zfill(3)
                chpt_id = chpt_url.split('=')[-1].zfill(3)
                out_html_name = ('{}/{}_{}_chapter.html'
                                 .format(out_dir, book_id, chpt_id))
                out_audio_name = ('{}/{}_{}_audio.mp3'
                                 .format(out_dir, book_id, chpt_id))
                get_content(driver, chpt_url, out_html_name, out_audio_name)

    except:
        print('*ERROR* something wrong')
        raise

    finally:
        utils.close_driver(driver, verbose=True)

Exemple #13

0

Afficher le fichier

Fichier : crawlNotice.py Projet : ParkKT/deri-project

def get_notice(jm_code, rcp_no, cursor):
    try:
        # driver 세팅
        driver = get_driver(
            'C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe',
            'http://dart.fss.or.kr/dsaf001/main.do?rcpNo={0}'.format(rcp_no))

        # 주총 공고의 rcpno 히스토리
        rcpno_list = get_rcpno_list(driver)
        # 최초 문서의 공고년도
        first_rcp_no = rcpno_list[0]
        first_rcp_yy = first_rcp_no[:4]
        # 이전 rcp_no
        pre_rcp_no = ''
        for i in range(0, len(rcpno_list)):
            if rcp_no == rcpno_list[i] and i > 0:
                pre_rcp_no = rcpno_list[i - 1]
                break
        print(rcpno_list, pre_rcp_no)
        # ------------------------- 주총공고 -------------------------
        try:
            notice_gb, notice_tb, notice_ref, notice_etc = get_notice_data(
                rcp_no, driver)

            # 중복체크
            dup_select = """select * from proxy011 where rcp_no = '{0}'""".format(
                rcp_no)
            cursor.execute(dup_select)
            dup_cnt = cursor.rowcount
            if dup_cnt > 0:
                return 0

            res_rcpno = ''
            # 결의문 rcpno 가져오기
            if len(notice_tb[0]) == 8:
                res_select = """select first_rcpno from proxy001 where jm_code = '{0}' and meet_ymd = '{1}' and meet_gb = '{2}' and meet_time = '{3}'
                             """.format(jm_code, notice_tb[0], notice_gb,
                                        notice_tb[1])

                cursor.execute(res_select)
                if cursor.rowcount > 0:
                    res_rcpno = cursor.fetchone()[0]

            # 회차 max 값
            max_select = """select * from proxy011 where left(first_rcpno, 4) = '{0}' and jm_code = '{1}' group by meet_seq
                         """.format(first_rcp_yy, jm_code)

            cursor.execute(max_select)
            max_seq = cursor.rowcount

            # meet_seq 생성
            seq_select = """select meet_seq from proxy011 where first_rcpno = '{0}'
                         """.format(first_rcp_no)

            cursor.execute(seq_select)
            seq = cursor.fetchone()

            if cursor.rowcount < 1:
                seq = str(max_seq + 1).zfill(2)
            else:
                seq = "".join(seq)
                seq = seq[-2:]

            yyyy = make_ymd(notice_tb[0])
            if yyyy is not None and yyyy != '':
                yyyy = yyyy[:4]
            else:
                yyyy = time.strftime('%Y')

            meet_seq = jm_code + yyyy + seq

            notice_qry = notice_mst_ins(meet_seq, rcp_no, jm_code, notice_gb,
                                        rcpno_list[0], notice_tb, notice_ref,
                                        res_rcpno)
            cursor.execute(notice_qry)

            # crawling to deri
            ymdstr = get_full_ymdstr(notice_tb[0], notice_tb[1])
            deri_qry = deri_ins(meet_seq, rcp_no, pre_rcp_no, jm_code,
                                notice_tb[0], notice_gb, ymdstr, notice_tb[2])
            cursor.execute(deri_qry)

            driver.switch_to_default_content()

            info_logger.info('[0] Key creation success.')
        except Exception as e:
            error_logger.error('[0] Key creation fail. [{0}] : {1}'.format(
                rcp_no, e))

        # ------------------------- 이사 및 위원회 활동내역 -------------------------
        get_isa_act(driver, meet_seq, rcp_no, cursor)
        driver.switch_to_default_content()

        # ------------------------- 사외이사보수 -------------------------
        get_isa_bosu(driver, meet_seq, rcp_no, cursor)
        driver.switch_to_default_content()

        # ------------------------- 단일 거래규모 일정규모 이상 거래 -------------------------
        get_transaction_single(driver, meet_seq, rcp_no, cursor)
        driver.switch_to_default_content()

        # ------------------------- 거래총액 일정규모 이상 거래 -------------------------
        get_transaction_total(driver, meet_seq, rcp_no, cursor)
        driver.switch_to_default_content()

        # ------------------------- 재무제표 -------------------------
        get_financial_table(driver, meet_seq, rcp_no, cursor)

        # ------------------------- 정관의 변경 -------------------------
        get_change_article(driver, meet_seq, rcp_no, cursor)

        # ------------------------- 이사선임 -------------------------
        get_elect_isa(driver, meet_seq, rcp_no, cursor)

        # ------------------------- 이사보수한도 -------------------------
        get_limit_bosu(driver, meet_seq, rcp_no, cursor)

        # ------------------------- 주식매수선택권 -------------------------
        get_stockoption(driver, meet_seq, rcp_no, cursor)

    except Exception as e:
        error_logger.error('[Notice] crawling fail. [{0}] : {1}'.format(
            rcp_no, e))
    finally:
        close_driver(driver)

Exemple #14

0

Afficher le fichier

Fichier : run.py Projet : vvscode/code-notes

    description = utils.get_course_description(course_url)
    curriculum = utils.get_course_curriculum(course_url)


    content = template.format(
        preview_img=utils.get_course_preview_image(course_url),
        title=slug,
        short_description=course_url,
        description=description,
        description_ru=utils.translate_2_ru(description),
        curriculum=curriculum,
        curriculum_ru=utils.translate_2_ru(curriculum),
        url=course_url,
        video=utils.get_course_youtube_share_id(course_url)
    )
    pyperclip.copy(content)
    open('./tmp/result.txt', 'w').write(content)
    print('Done')

if __name__ == '__main__':
    try:
        main()
    except Exception as inst:
        print("Unexpected error:", sys.exc_info()[0])
        print(type(inst))  
        print(inst.args)
        print(inst)   
    finally:
        time.sleep(100)
        utils.close_driver()

Exemple #15

0

Afficher le fichier

Fichier : search.py Projet : zyhuang/ragno

        course_meta = course.find_element_by_xpath(
            './/*[@class="courses-list__item-meta"]')
        out['author'] = course_meta.find_element_by_xpath(
            './p[@class="courses-list__item-authors"]/span/a').text
        out['level'] = course_meta.find_element_by_xpath(
            './p[@class="courses-list__item-level"]').text
        out['date'] = course_meta.find_element_by_xpath(
            './time[@class="courses-list__item-date"]').text
        out['level'] = course_meta.find_element_by_xpath(
            './time[@class="courses-list__item-duration"]').text
        ncourse += 1
        out_list.append(out)

    utils.save_json(out_list, json_name)


if __name__ == '__main__':

    import session

    driver = utils.start_driver('chrome')
    out_json = sys.argv[1]
    out_html = out_json.rsplit('.', 1)[0] + '.html'
    try:
        session.login(driver, 'input/credential.json')
        load_all_courses(driver, out_html, num_load=None)
        get_all_courses(driver, out_json)

    finally:
        utils.close_driver(driver)

Exemple #16

0

Afficher le fichier

def get_resolution(jm_code, rcp_no, rcp_gb, cursor):
    try:
        # driver 세팅(결의, 공고)
        driver = get_driver(
            'C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe',
            'http://dart.fss.or.kr/dsaf001/main.do?rcpNo={0}'.format(rcp_no))

        driver.implicitly_wait(10)

        # 주총 공고의 rcpno 히스토리
        rcpno_list = get_rcpno_list(driver)
        # 최초 문서의 공고년도
        first_rcp_no = rcpno_list[0]
        first_rcp_yy = first_rcp_no[:4]

        # 주총결의 데이터 세팅
        driver.switch_to.frame(driver.find_element_by_tag_name("iframe"))
        # 상장 구분
        if 'Y' in rcp_gb:
            tb_mst = driver.find_elements_by_xpath(
                '//*[@id="XFormD52_Form0_Table0"]/tbody/tr')
        else:
            tb_mst = driver.find_elements_by_xpath(
                '//*[@id="XFormD2_Form0_Table0"]/tbody/tr')

        # 주총 결의
        meet_tb = [0 for x in range(9)]
        if 'Y' in rcp_gb:
            meet_tb[0] = tb_mst[1].find_elements_by_tag_name(
                'td')[1].text  # 일자
            meet_tb[1] = tb_mst[1].find_elements_by_tag_name(
                'td')[2].text  # 시간
            meet_tb[8] = tb_mst[0].find_elements_by_tag_name(
                'td')[1].text  # 주총구분
        else:
            meet_tb[0] = tb_mst[0].find_elements_by_tag_name(
                'td')[2].text  # 일자
            meet_tb[1] = tb_mst[1].find_elements_by_tag_name(
                'td')[1].text  # 시간
            meet_tb[8] = tb_mst[8].find_elements_by_tag_name(
                'td')[1].text  # 주총구분

        meet_tb[2] = tb_mst[2].find_elements_by_tag_name('td')[1].text  # 장소
        meet_tb[3] = tb_mst[3].find_elements_by_tag_name('td')[1].text  # 의안내용
        meet_tb[4] = tb_mst[4].find_elements_by_tag_name(
            'td')[1].text  # 이사회결의일
        meet_tb[5] = tb_mst[5].find_elements_by_tag_name(
            'td')[2].text  # 사외이사_참
        meet_tb[6] = tb_mst[6].find_elements_by_tag_name(
            'td')[1].text  # 사외이사_불참
        meet_tb[7] = tb_mst[7].find_elements_by_tag_name(
            'td')[1].text  # 감사참석여부

        # 사외이사 선임 및 사업목적 테이블 유무 확인
        """isa_1, isa_2, isa_3, isa_4, tb_biz = False, False, False, False, False
        spans = driver.find_elements_by_tag_name('span')
        for span in spans:
            title = span.text
            title = get_hangul(title)
            if title == '이사선임세부내역':
                isa_1 = True
            elif title == '사외이사선임세부내역':
                isa_2 = True
            elif title == '감사위원선임세부내역':
                isa_3 = True
            elif title == '감사선임세부내역':
                isa_4 = True
            elif title == '사업목적변경세부내역':
                tb_biz = True
        """

        # 이사선임 & 사업목적
        isa_arr = []
        biz_arr = []
        if 'Y' in rcp_gb:
            # 이사선임
            isa_1 = driver.find_elements_by_xpath(
                '//*[@id="LIB_L9019"]')  # 이사선임 div 유무
            isa_2 = driver.find_elements_by_xpath(
                '//*[@id="LIB_L9018"]')  # 사외이사선임 div 유무
            isa_3 = driver.find_elements_by_xpath(
                '//*[@id="LIB_L9016"]')  # 감사위원선임 div 유무
            isa_4 = driver.find_elements_by_xpath(
                '//*[@id="LIB_L9015"]')  # 감사선임 div 유무

            if isa_1:
                isa_arr.extend(get_isa(driver, 'LIB_L9019'))
            if isa_2:
                isa_arr.extend(get_isa(driver, 'LIB_L9018'))
            if isa_3:
                isa_arr.extend(get_isa(driver, 'LIB_L9016'))
            if isa_4:
                isa_arr.extend(get_isa(driver, 'LIB_L9015'))

            # 사업목적
            tb_biz = driver.find_elements_by_xpath(
                '//*[@id="LIB_L9017"]')  # 사업목적 div 유무

            if tb_biz:
                biz_arr.extend(get_biz(driver, 'LIB_L9017'))
        elif 'K' in rcp_gb:
            # 이사선임
            isa_1 = driver.find_elements_by_xpath(
                '//*[@id="LIB_L7021"]')  # 이사선임 div 유무
            isa_2 = driver.find_elements_by_xpath(
                '//*[@id="LIB_L7020"]')  # 사외이사선임 div 유무
            isa_3 = driver.find_elements_by_xpath(
                '//*[@id="LIB_L7018"]')  # 감사위원선임 div 유무
            isa_4 = driver.find_elements_by_xpath(
                '//*[@id="LIB_L7017"]')  # 감사선임 div 유무

            if isa_1:
                isa_arr.extend(get_isa(driver, 'LIB_L7021'))
            if isa_2:
                isa_arr.extend(get_isa(driver, 'LIB_L7020'))
            if isa_3:
                isa_arr.extend(get_isa(driver, 'LIB_L7018'))
            if isa_4:
                isa_arr.extend(get_isa(driver, 'LIB_L7017'))

            # 사업목적
            tb_biz = driver.find_elements_by_xpath(
                '//*[@id="LIB_L7019"]')  # 사업목적 div 유무

            if tb_biz:
                biz_arr.extend(get_biz(driver, 'LIB_L7019'))
        else:
            # 이사선임
            isa_1 = driver.find_elements_by_xpath(
                '//*[@id="LIB_L3025"]')  # 이사선임 div 유무
            isa_2 = driver.find_elements_by_xpath(
                '//*[@id="LIB_L3024"]')  # 사외이사선임 div 유무
            isa_3 = driver.find_elements_by_xpath(
                '//*[@id="LIB_L3022"]')  # 감사위원선임 div 유무
            isa_4 = driver.find_elements_by_xpath(
                '//*[@id="LIB_L3021"]')  # 감사선임 div 유무

            if isa_1:
                isa_arr.extend(get_isa(driver, 'LIB_L3025'))
            if isa_2:
                isa_arr.extend(get_isa(driver, 'LIB_L3024'))
            if isa_3:
                isa_arr.extend(get_isa(driver, 'LIB_L3022'))
            if isa_4:
                isa_arr.extend(get_isa(driver, 'LIB_L3021'))

            # 사업목적
            tb_biz = driver.find_elements_by_xpath(
                '//*[@id="LIB_L3023"]')  # 사업목적 div 유무

            if tb_biz:
                biz_arr.extend(get_biz(driver, 'LIB_L3023'))

        # --------------------------------------------------------------------------------- #
        # DB 삽입
        # 중복체크
        dup_select = """select * from proxy001 where rcp_no = '{0}'""".format(
            rcp_no)
        cursor.execute(dup_select)
        dup_cnt = cursor.rowcount
        if dup_cnt > 0:
            return 0

        # 회차 max 값
        max_select = """select * from proxy001 where left(first_rcpno, 4) = '{0}' and jm_code = '{1}' group by meet_seq
                             """.format(first_rcp_yy, jm_code)

        cursor.execute(max_select)
        max_seq = cursor.rowcount

        # meet_seq 생성
        seq_select = """select meet_seq from proxy001 where first_rcpno = '{0}'
                             """.format(first_rcp_no)

        cursor.execute(seq_select)
        seq = cursor.fetchone()

        if cursor.rowcount < 1:
            seq = str(max_seq + 1).zfill(2)
        else:
            seq = "".join(seq)
            seq = seq[-2:]

        yyyy = make_ymd(meet_tb[0][:4])
        if yyyy is not None and yyyy != '':
            yyyy = yyyy[:4]
        else:
            yyyy = time.strftime('%Y')

        meet_seq = jm_code + yyyy + seq

        # 결의 mst 삽입
        in_qry = resolution_mst_ins(meet_seq, meet_tb, jm_code, rcp_no,
                                    rcpno_list[0])
        cursor.execute(in_qry)
        #print(in_qry)

        # 이사선임 삽입
        if isa_arr:
            #print(isa_arr)
            for i in range(0, len(isa_arr)):
                ins_isa_info = isa_info_ins(meet_seq, isa_arr[i], rcp_no, i)
                cursor.execute(ins_isa_info)
                #print(str(i), '번째 이사 쿼리 : ', ins_isa_info)

                if chk_no_data(isa_arr[i][4]):
                    ins_isa_car = isa_car_ins(meet_seq, isa_arr[i], rcp_no,
                                              i)  # 이사선임_경력
                    cursor.execute(ins_isa_car)
                    #print(str(i), '번째 이사 경력 쿼리 : ', ins_isa_car)

                if chk_no_data(isa_arr[i][5]):
                    ins_isa_dup = isa_dup_ins(meet_seq, isa_arr[i], rcp_no,
                                              i)  # 이사선임_겸직
                    cursor.execute(ins_isa_dup)
                    #print(str(i), '번째 이사 겸직 쿼리 : ', ins_isa_dup)

        # 사업목적 변경 삽입
        if biz_arr:
            for i in range(0, len(biz_arr)):
                #print(biz_arr[i])
                ins_biz = biz_ins(meet_seq, biz_arr[i], rcp_no)
                cursor.execute(ins_biz)
                #print("사업목적 변경 쿼리 : ", ins_biz)
    except Exception as e:
        error_logger.error(
            'Resolution crawling fail. : [{0}] [{1}] {2}'.format(
                jm_code, rcp_no, e))
    finally:
        close_driver(driver)

Exemple #17

0

Afficher le fichier

Fichier : crawlResolution_html.py Projet : ParkKT/deri-project

def resolution_main(jm_code, rcp_no, rcp_yn, rcp_gb):
    # driver 세팅(결의, 공고)
    driver = get_driver(
        'C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe',
        'http://dart.fss.or.kr/dsaf001/main.do?rcpNo={0}'.format(rcp_no))

    # 주총결의 데이터 세팅
    driver.switch_to.frame(driver.find_element_by_tag_name("iframe"))
    # 상장 구분
    if '유가' in rcp_gb:
        tb_mst = driver.find_elements_by_xpath(
            '//*[@id="XFormD52_Form0_Table0"]/tbody/tr')
    else:
        tb_mst = driver.find_elements_by_xpath(
            '//*[@id="XFormD2_Form0_Table0"]/tbody/tr')

    # 주총 결의
    meet_tb = [0 for x in range(9)]
    if '유가' in rcp_gb:
        meet_tb[0] = tb_mst[1].find_elements_by_tag_name('td')[1].text  # 일자
        meet_tb[1] = tb_mst[1].find_elements_by_tag_name('td')[2].text  # 시간
        meet_tb[8] = tb_mst[0].find_elements_by_tag_name('td')[1].text  # 주총구분
    else:
        meet_tb[0] = tb_mst[0].find_elements_by_tag_name('td')[2].text  # 일자
        meet_tb[1] = tb_mst[1].find_elements_by_tag_name('td')[1].text  # 시간
        meet_tb[8] = tb_mst[8].find_elements_by_tag_name('td')[1].text  # 주총구분

    meet_tb[2] = tb_mst[2].find_elements_by_tag_name('td')[1].text  # 장소
    meet_tb[3] = tb_mst[3].find_elements_by_tag_name('td')[1].text  # 의안내용
    meet_tb[4] = tb_mst[4].find_elements_by_tag_name('td')[1].text  # 이사회결의일
    meet_tb[5] = tb_mst[5].find_elements_by_tag_name('td')[2].text  # 사외이사_참
    meet_tb[6] = tb_mst[6].find_elements_by_tag_name('td')[1].text  # 사외이사_불참
    meet_tb[7] = tb_mst[7].find_elements_by_tag_name('td')[1].text  # 감사참석여부

    # 이사선임 & 사업목적
    isa_arr = []
    biz_arr = []
    if '유가' in rcp_gb:
        # 이사선임
        isa_1 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L9019"]')  # 이사선임 div 유무
        isa_2 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L9018"]')  # 사외이사선임 div 유무
        isa_3 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L9016"]')  # 감사위원선임 div 유무
        isa_4 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L9015"]')  # 감사선임 div 유무

        if isa_1 != '' and isa_1:
            isa_arr.extend(get_isa(driver, 'LIB_L9019'))
        if isa_2 != '' and isa_2:
            isa_arr.extend(get_isa(driver, 'LIB_L9018'))
        if isa_3 != '' and isa_3:
            isa_arr.extend(get_isa(driver, 'LIB_L9016'))
        if isa_4 != '' and isa_4:
            isa_arr.extend(get_isa(driver, 'LIB_L9015'))

        # 사업목적
        tb_biz = driver.find_elements_by_xpath(
            '//*[@id="LIB_L9017"]')  # 사업목적 div 유무

        if tb_biz != '' and tb_biz:
            biz_arr.extend(get_biz(driver, 'LIB_L9017'))
    elif '코스닥' in rcp_gb:
        # 이사선임
        isa_1 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L7021"]')  # 이사선임 div 유무
        isa_2 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L7020"]')  # 사외이사선임 div 유무
        isa_3 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L7018"]')  # 감사위원선임 div 유무
        isa_4 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L7017"]')  # 감사선임 div 유무

        if isa_1 != '' and isa_1:
            isa_arr.extend(get_isa(driver, 'LIB_L7021'))
        if isa_2 != '' and isa_2:
            isa_arr.extend(get_isa(driver, 'LIB_L7020'))
        if isa_3 != '' and isa_3:
            isa_arr.extend(get_isa(driver, 'LIB_L7018'))
        if isa_4 != '' and isa_4:
            isa_arr.extend(get_isa(driver, 'LIB_L7017'))

        # 사업목적
        tb_biz = driver.find_elements_by_xpath(
            '//*[@id="LIB_L7019"]')  # 사업목적 div 유무

        if tb_biz != '' and tb_biz:
            biz_arr.extend(get_biz(driver, 'LIB_L7019'))
    else:
        # 이사선임
        isa_1 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L3025"]')  # 이사선임 div 유무
        isa_2 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L3024"]')  # 사외이사선임 div 유무
        isa_3 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L3022"]')  # 감사위원선임 div 유무
        isa_4 = driver.find_elements_by_xpath(
            '//*[@id="LIB_L3021"]')  # 감사선임 div 유무

        if isa_1 != '' and isa_1:
            isa_arr.extend(get_isa(driver, 'LIB_L3025'))
        if isa_2 != '' and isa_2:
            isa_arr.extend(get_isa(driver, 'LIB_L3024'))
        if isa_3 != '' and isa_3:
            isa_arr.extend(get_isa(driver, 'LIB_L3022'))
        if isa_4 != '' and isa_4:
            isa_arr.extend(get_isa(driver, 'LIB_L3021'))

        # 사업목적
        tb_biz = driver.find_elements_by_xpath(
            '//*[@id="LIB_L3023"]')  # 사업목적 div 유무

        if tb_biz != '' and tb_biz:
            biz_arr.extend(get_biz(driver, 'LIB_L3023'))

    # DB 삽입
    try:
        conn = get_dbcon('esg')
        cursor = conn.cursor()

        try:
            # 조회용 주총 값
            ymd = make_ymd(meet_tb[0])
            gb = get_regYn(meet_tb[8])
            seq_select = """select * from proxy001 where meet_ymd = '{0}' and jm_code = '{1}' and meet_gb = '{2}'
                         """.format(ymd, jm_code, gb)

            cursor.execute(seq_select)
            rows = cursor.rowcount

            # 기재정정이 아닐 경우 중복체크
            if rcp_yn == '' and rows > 0:
                print('중복 데이터가 있습니다.')
                sys.exit(0)

            # report_ver 키값 생성(개정일 + seq)
            report_ver = rcp_no[:8] + str(rows + 1).zfill(2)

            # 결의 mst 삽입
            in_qry = resolution_mst_ins(meet_tb, jm_code, report_ver, rcp_no)
            cursor.execute(in_qry)
            print(in_qry)

            # 이사선임 삽입
            if isa_arr:
                ins_isa, dup_isa = isa_mst_ins(isa_arr, meet_tb[0], jm_code,
                                               gb, report_ver)  # 이사선임
                for i in range(0, len(ins_isa)):
                    # 이사 중복 체크
                    cursor.execute(dup_isa[i])
                    dup_cnt = cursor.rowcount
                    if dup_cnt > 0:
                        print('중복된 이사가 있습니다.')
                        continue

                    cursor.execute(ins_isa[i])
                    print(str(i) + " : " + ins_isa[i])

                    if chk_no_data(isa_arr[i][4]):
                        ins_isa_car = isa_car_ins(isa_arr[i], meet_tb[0],
                                                  jm_code, gb, report_ver,
                                                  i)  # 이사선임_경력
                        cursor.execute(ins_isa_car)
                        print(str(i) + " : " + ins_isa_car)

                    if chk_no_data(isa_arr[i][5]):
                        ins_isa_dup = isa_dup_ins(isa_arr[i], meet_tb[0],
                                                  jm_code, gb, report_ver,
                                                  i)  # 이사선임_겸직
                        cursor.execute(ins_isa_dup)
                        print(str(i) + " : " + ins_isa_dup)

            # 사업목적 변경 삽입
            if biz_arr:
                for i in range(0, len(biz_arr)):
                    ins_biz = biz_ins(biz_arr, meet_tb[0], jm_code, gb,
                                      report_ver)
                    cursor.execute(ins_biz[i])
                    print(str(i) + " : " + str(ins_biz[i]))
        except:
            f = open("C:\\Users\\rmffo\\PycharmProjects\\log\\error_log.txt",
                     'a')
            f.write(jm_code + '\n')
            f.close()

        cursor.close()
    finally:
        close_dbcon(conn)

    # driver close
    close_driver(driver)