def geocontrib_delete_project(project_name, admin_url):
    get_driver().get(admin_url)
    get_driver().find_element_by_link_text("Projets").click()
    get_driver().find_element_by_link_text(project_name).click()
    get_driver().find_element_by_link_text("Supprimer").click()
    get_driver().find_element_by_xpath(
        u"//input[@value='Oui, je suis sûr']").click()
def geocontrib_delete_feature(admin_url, feature_name):
    get_driver().get(admin_url)
    get_driver().find_element_by_link_text("Signalements").click()
    get_driver().find_element_by_link_text(feature_name).click()
    get_driver().find_element_by_link_text("Supprimer").click()
    get_driver().find_element_by_xpath(
        u"//input[@value='Oui, je suis sûr']").click()
def geocontrib_create_feature(feature_type_name, feature_name):
    get_driver().find_element_by_xpath(
        "//div[2]/div/div/div/div/a[2]/i").click()
    get_driver().find_element_by_id("id_title").click()
    get_driver().find_element_by_id("id_title").clear()
    get_driver().find_element_by_id("id_title").send_keys(feature_name)
    get_driver().find_element_by_link_text("Dessiner un point").click()
Exemple #4
0
def geocontrib_json_export(project_name, feature_type_name, added_text):
    get_driver().find_element_by_xpath("//img").click()
    get_driver().find_element_by_link_text(project_name).click()
    get_driver().find_element_by_link_text("{}{}".format(
        feature_type_name, added_text)).click()
    get_driver().find_element_by_link_text("Exporter").click()
    get_driver().find_element_by_xpath("//div[3]/div/div").click()
def geocontrib_create_featuretype(feature_type_name):
    get_driver().find_element_by_link_text(
        u"Créer un nouveau type de signalement").click()
    get_driver().find_element_by_id("id_title").click()
    get_driver().find_element_by_id("id_title").clear()
    get_driver().find_element_by_id("id_title").send_keys(feature_type_name)
    get_driver().find_element_by_xpath("//button[@type='submit']").click()
def geocontrib_draft_search_map(project_name):
    get_driver().find_element_by_link_text(u"GéoContrib").click()
    get_driver().find_element_by_link_text(project_name).click()
    get_driver().find_element_by_id("map").click()
    get_driver().find_element_by_xpath(
        "//form[@id='form-filters']/div[2]/div/input[2]").click()
    get_driver().find_element_by_xpath(
        "//form[@id='form-filters']/div[2]/div/div[2]/div").click()
def geocontrib_delete_layer(admin_url, layer_title, layer_url, layer_type):
    get_driver().get(admin_url)
    get_driver().find_element_by_link_text("Couches").click()
    get_driver().find_element_by_link_text("{} - {} ({})".format(
        layer_title, layer_url, layer_type.lower())).click()
    get_driver().find_element_by_link_text("Supprimer").click()
    get_driver().find_element_by_xpath(
        u"//input[@value='Oui, je suis sûr']").click()
Exemple #8
0
def geocontrib_json_import():
    get_driver().find_element_by_xpath(
        "//form[@id='form-import-features']/div/label/span").click()
    get_driver().find_element_by_id("json_file").click()
    time.sleep(1)
    get_driver().send_keys("export_projet.json")
    get_driver().find_element_by_xpath("//button[@type='submit']").click()
Exemple #9
0
 def brfore_func(self):
     self.driver = get_driver()
     self.add_page = AddPage(self.driver)
     self.setting_page = SettingPage(self.driver)
     yield
     time.sleep(3)
     self.driver.quit()
def get_imdb_list_page_link():
    class PageLink(tool.MovieMetadata):
        def __init__(self, link):
            self.link = link

    driver = utils.get_driver()

    pre_link = "https://www.imdb.com/search/title/?release_date=2010-01-01,2020-12-31&runtime=60,300&start=9951&ref_=adv_nxt"
    pre_link = "https://www.imdb.com/search/title/?release_date=2000-01-01,2009-12-31&runtime=60,300"

    try:
        pre_df = pandas.read_csv(pathmng.imdb_next_link_path)
        pre_link = list(pre_df["link"])[-1]
    except:
        pass
    link_list = []

    driver.get(pre_link)
    import time
    for i in range(1, 1000):
        link = driver.find_element_by_xpath(
            '//*[@class="lister-page-next next-page"]').get_attribute("href")
        link_list.append(PageLink(link))
        driver.find_element_by_xpath(
            '//*[@class="lister-page-next next-page"]').click()
        time.sleep(2)
        if i % 50 == 0:
            tool.save_metadata_to_csv(
                utils.filter_duplicate_preserve_order(link_list),
                pathmng.imdb_next_link_path)
            link_list.clear()
Exemple #11
0
 def setUp(self):
     self.logger = get_logger('Authentication')
     self.driver = get_driver()
     # self.driver.maximize_window()
     self.url = parser.get('site_to_test', 'url')
     self.driver.get(self.url)
     self.driver.implicitly_wait(3)
Exemple #12
0
def get_tf_class():
    storage = get_driver(options.storage_driver)
    tf_info = storage.get_tf_info()
    if not tf_info['class_path']:
        print('Test Fixture Class is not defined please use admin to set class')
        exit(0)
    return tf_info['class_path']
Exemple #13
0
def local_main():
    """
    Main Function!
    make output file at /output/
    """
    # If True, also run selenium code. Else, only make output python file.
    run_selenium = args.run_selenium
    use_stanford_corenlp = args.use_corenlp
    pathsaver = PathSaver()
    if not use_stanford_corenlp:
        nlp = nltk
    else:
        from utils import get_stanford_parser
        nlp = get_stanford_parser(pathsaver.parser_path)
    allennlp = get_allen_parser(pathsaver.allen_path)

    sample_sents = [
        'Enter the "KAIST" in "SearchBox" and click the "Search" button',
        'Wait the "3 seconds".',
        'Refresh the website and move to "https://www.naver.com/".'
    ]
    sample_tuple = [("SearchBox", 'q'), ("Search", 'btnK')]
    # TODO: Change the above sample sents and tuple into Chrome extension output using 'read_extension_output()'

    for sample_sent in sample_sents:
        driver = get_driver(pathsaver.driver_path) if run_selenium else None
        main_helper(sample_tuple, sample_sent, nlp, allennlp, driver,
                    pathsaver.driver_path, run_selenium)
Exemple #14
0
def report_main(jm_code, rcp_no):
    try:
        # driver 세팅(결의, 공고)
        driver = get_driver(
            'C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe',
            'http://dart.fss.or.kr/dsaf001/main.do?rcpNo={0}'.format(rcp_no))

        # 주총 결의의 rcpno 히스토리
        rcpno_list = get_rcpno_list(driver)
        # 최초 문서의 공고년도
        first_rcp_yy = rcpno_list[0][:4]

        conn = get_dbcon('esg')
        cursor = conn.cursor()

        # 보상위원회 유무 확인
        get_tab(driver, 'b')
        bosang_yn = get_board_yn(driver, 'b')
        print(bosang_yn)
        driver.switch_to_default_content()
        # 감사위원회 유무 확인
        get_tab(driver, 'g')
        gamsa_yn = get_board_yn(driver, 'g')
        print(gamsa_yn)

        # --------------------------------------------------------------------------------- #
        # DB 삽입
        # 중복체크
        insert_qry = """insert into proxy700_tmp values('{0}', '{1}', '{2}', '{3}')""".format(
            jm_code, '2018', bosang_yn, gamsa_yn)
        cursor.execute(insert_qry)
    finally:
        cursor.close()
        close_dbcon(conn)
        close_driver(driver)
Exemple #15
0
 def setUp(self):
     self.logger = get_logger('JobPlacement')
     self.driver = get_driver()
     # self.driver.maximize_window()
     self.url = parser.get('site_to_test', 'url')
     self.driver.get(self.url)
     self.driver.find_element_by_link_text("Job Placement Program").click()
Exemple #16
0
 def before_function(self):
     # 获取驱动对象
     self.driver = get_driver()
     self.page_factory = PageFactory(self.driver)  # 实例化统一入口类
     yield  # 结束
     # time.sleep(3)
     self.driver.quit()  # 关闭驱动对象
Exemple #17
0
def get_movie_id(num=100):
    driver = utils.get_driver()
    driver.get(
        "https://www.rottentomatoes.com/browse/dvd-streaming-all?minTomato=0&maxTomato=100&services=amazon;hbo_go;itunes;netflix_iw;vudu;amazon_prime;fandango_now&genres=1;2;4;5;6;8;9;10;11;13;18;14&sortBy=release"
    )

    # click show more button
    time.sleep(2)
    while True:
        driver.find_element_by_xpath('//*[@id="show-more-btn"]/button').click()
        time.sleep(2)
        num_movies = len(
            driver.find_elements_by_xpath('//*[@class="movie_info"]/a'))
        print(f"Find {num_movies} movies")
        if num_movies > num:
            break

    movies = driver.find_elements_by_xpath('//*[@class="movie_info"]/a')
    movie_id_set = set()
    for ele in movies:
        movie_id_set.add(str(ele.get_attribute("href")).split("/")[-1])

    with open(movie_id_path, "w") as f:
        f.write("\n".join(list(movie_id_set)))

    driver.close()
Exemple #18
0
 def setUp(self):
     self.classes = []
     self.active_class = None
     self.logger = get_logger('Groupclass')
     self.driver = get_driver()
     self.url = parser.get('site_to_test', 'url')
     self.driver.get(self.url)
     self.driver.maximize_window()
Exemple #19
0
    def setUp(self):

        self.logger = get_logger('AccountSettings')
        self.driver = get_driver()
        self.url = parser.get('site_to_test', 'url')
        self.driver.get(self.url)
        self.driver.maximize_window()
        self.driver.implicitly_wait(3)
def get_Academy_Award_for_Best_Actor_Director():
    driver = utils.get_driver()
    urls = ["https://en.wikipedia.org/wiki/Academy_Award_for_Best_Actor",
            "https://en.wikipedia.org/wiki/Academy_Award_for_Best_Director"]

    data = []
    for url in urls[0:2]:
        data.extend(get_academy_award_for_best_people(driver, url))

    tool.save_metadata_to_csv(data, pathmng.wiki_best_actor_director_path)
Exemple #21
0
def get_rcpNo(jm_code, keywod, st_dt, ed_dt):
    driver = get_driver(
        'C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe',
        'http://dart.fss.or.kr/dsab002/main.do#')  # 드라이버 로드
    driver.implicitly_wait(10)

    driver.find_element_by_name('textCrpNm').send_keys(jm_code)  # 종목코드
    driver.find_element_by_xpath(
        '//*[@id="searchForm"]/fieldset/div/p[3]/span[2]/a[7]').click()  # 기간
    #driver.find_element_by_name('startDate').send_keys(st_dt)                                         # 기간_시작
    #driver.find_element_by_name('endDate').send_keys(ed_dt)                                           # 기간_종료
    driver.find_element_by_name('reportName').send_keys(keywod)  # 검색어
    time.sleep(1)
    driver.find_element_by_xpath(
        '//*[@id="searchForm"]/fieldset/div/p[8]/input').click()  # 검색

    res_list = driver.find_elements_by_xpath(
        '//*[@id="listContents"]/div[1]/table/tbody/tr')  # 결과 리스트

    # 최상위 데이터만 수집 ( => 짧은 주기로 수집해야 함)
    # 결과 리스트에서 가용 데이터 추출
    if len(res_list) == 0:
        print('검색 결과가 없습니다.')
        return 0
    else:
        item = res_list[0]

    # 문서번호
    rcp_no = item.find_elements_by_tag_name('td')[2].find_element_by_tag_name(
        'a').get_attribute('href')[-14:]
    # 기재정정
    rcp_yn = item.find_elements_by_tag_name('td')[2].find_element_by_tag_name(
        'a').find_element_by_tag_name('span').text
    # 시장구분
    rcp_gb = item.find_elements_by_tag_name('td')[5].find_element_by_tag_name(
        'img').get_attribute('title')
    if '유가' in rcp_gb:
        rcp_gb = 'K'
    else:
        rcp_gb = 'Q'

    if len(rcp_no) != 14:
        print('rcpNo 형식이 다릅니다.')
        return 0

    if '첨부' in rcp_yn:
        print('첨부정정은 수집대상 제외')
        return 0

    close_driver(driver)

    return rcp_no
Exemple #22
0
def get_rcpNo(jm_code, keywod):
    driver = get_driver(
        'C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe',
        'http://dart.fss.or.kr/dsab002/main.do#')  # 드라이버 로드

    driver.find_element_by_name('textCrpNm').send_keys(jm_code)  # 종목코드
    driver.find_element_by_xpath(
        '//*[@id="searchForm"]/fieldset/div/p[3]/span[2]/a[4]').click(
        )  # 기간 : 1년
    checked = driver.find_element_by_xpath(
        '//*[@id="finalReport"]').get_attribute('checked')  # 최종보고서 여부
    #if checked:
    #    driver.find_element_by_xpath('//*[@id="finalReport"]').click()                              # 최종보고서 체크 해제
    driver.find_element_by_id('reportName').send_keys(keywod)  # 검색구분 : 결의

    driver.find_element_by_xpath(
        '//*[@id="searchForm"]/fieldset/div/p[8]/input').click()  # 검색

    res_list = driver.find_elements_by_xpath(
        '//*[@id="listContents"]/div[1]/table/tbody/tr')  # 결과 리스트

    # 최상위 데이터만 수집 ( => 짧은 주기로 수집해야 함)
    # 결과 리스트에서 가용 데이터 추출
    if len(res_list) == 0:
        print('검색 결과가 없습니다.')
        return 0
    else:
        item = res_list[0]
        for i in range(0, len(res_list)):
            if '2018.12' in res_list[i].find_elements_by_tag_name(
                    'td')[2].find_element_by_tag_name('a').text:
                item = res_list[i]
    # 문서번호
    rcp_no = item.find_elements_by_tag_name('td')[2].find_element_by_tag_name(
        'a').get_attribute('href')[-14:]
    # 기재정정
    rcp_yn = item.find_elements_by_tag_name('td')[2].find_element_by_tag_name(
        'a').find_element_by_tag_name('span').text

    if len(rcp_no) != 14:
        print('rcpNo 형식이 다릅니다.')
        return 0

    if '첨부' in rcp_yn:
        print('첨부정정은 수집대상 제외')
        return 0

    close_driver(driver)

    return rcp_no
Exemple #23
0
def hando_main(jm_code, rcp_no, gijun_yy):
    # driver 세팅
    driver = get_driver(
        'C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe',
        'http://dart.fss.or.kr/dsaf001/main.do?rcpNo={0}'.format(rcp_no))

    hando = []
    hando.extend(get_hando(driver, jm_code, gijun_yy))

    for h in hando:
        print(h)

    # driver close
    close_driver(driver)
def get_html(rcp_no):
    # driver 세팅(결의, 공고)
    driver = get_driver(
        'C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe',
        'http://dart.fss.or.kr/dsaf001/main.do?rcpNo={0}'.format(rcp_no))
    # 주총결의 데이터 세팅
    driver.switch_to.frame(driver.find_element_by_tag_name("iframe"))

    html = driver.page_source
    print(html)

    f_nm = 'C:\\Users\\admin\\Desktop\\html\\{0}.html'.format(rcp_no)
    f = open(f_nm, 'w')
    f.write(html)
    f.close()
Exemple #25
0
def api_call_main(sample, pathsaver, run_selenium, nlp, allennlp):
    """
    API interaction main function.
    Input
        sample
        pathsaver
        run_selenium
        nlp
        allennlp
    Output
        code
    """
    driver = get_driver(pathsaver.driver_path) if run_selenium else None
    _, code = main_helper(sample['selectors'], sample['text'], nlp, allennlp,
                          driver, pathsaver.driver_path, run_selenium)
    return code
Exemple #26
0
def main(
):
    """Process Wilma's IG posts."""
    driver = get_driver()
    with open("posts.txt") as posts:
        for post in posts:
            post = post.strip()
            print(post)
            driver.get(urljoin(
                "https://www.instagram.com/",
                post,
            ))
            os.makedirs("posts-data", exist_ok=True)
            process_html(
                driver,
                f"posts-data/{post.split('/')[2]}.json",
            )
def get_rcpNo(jm_code, keywod):
    driver = get_driver('C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe',
                        'http://kind.krx.co.kr/disclosure/details.do?method=searchDetailsMain#viewer')  # 드라이버 로드

    driver.find_element_by_xpath('//*[@id="AKCKwd"]').send_keys(jm_code)                                     # 종목코드
    time.sleep(1)
    driver.find_element_by_xpath('//*[@id="search-btn-dates"]/ul/li[5]/a').send_keys(Keys.ENTER)    # 기간 : 1년
    time.sleep(1)
    checked = driver.find_element_by_xpath('//*[@id="lastReport"]').get_attribute('checked')       # 최종보고서 여부
    if checked:
        driver.find_element_by_xpath('//*[@id="lastReport"]').send_keys(Keys.ENTER)                   # 최종보고서 체크 해제
    driver.find_element_by_id('reportNmTemp').send_keys(keywod)                                       # 검색구분 : 결의
    time.sleep(1)
    driver.find_element_by_xpath('//*[@id="searchForm"]/section[1]/div/div[3]/a[1]').send_keys(Keys.ENTER)           # 검색
    time.sleep(1)
    res_list = driver.find_elements_by_xpath('//*[@id="main-contents"]/section[1]/table/tbody/tr')       # 결과 리스트

    # 최상위 데이터만 수집 ( => 짧은 주기로 수집해야 함)
    # 결과 리스트에서 가용 데이터 추출
    if len(res_list) == 0:
        print('검색 결과가 없습니다.')
        sys.exit(0)
    else:
        item = res_list[0]
    # 문서번호
    rcp_no = item.find_elements_by_tag_name('td')[3].find_element_by_tag_name('a').get_attribute('onclick')[-19:-5]
    print(rcp_no)
    # 기재정정
    #rcp_yn = item.find_elements_by_tag_name('td')[3].find_element_by_tag_name('a').find_element_by_tag_name('font').text
    rcp_yn = ''
    # 시장구분
    rcp_gb = item.find_elements_by_tag_name('td')[2].find_element_by_tag_name('img').get_attribute('alt')

    if len(rcp_no) != 14:
        print('rcpNo 형식이 다릅니다.')
        sys.exit(0)

    if '첨부' in rcp_yn:
        print('첨부정정은 수집대상 제외')
        sys.exit(0)

    close_driver(driver)

    return rcp_no, rcp_yn, rcp_gb
Exemple #28
0
def get_rcpNo(jm_code, keywod, st_dt, en_dt):
    driver = get_driver(
        'C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe',
        'http://dart.fss.or.kr/dsab002/main.do#')  # 드라이버 로드

    driver.find_element_by_name('textCrpNm').send_keys(jm_code)  # 종목코드
    driver.find_element_by_xpath('//*[@id="startDate"]').send_keys(
        st_dt)  # 기간 시작
    driver.find_element_by_xpath('//*[@id="endDate"]').send_keys(
        en_dt)  # 기간 종료
    driver.find_element_by_id('reportName').send_keys(keywod)  # 검색구분 : 결의

    driver.find_element_by_xpath(
        '//*[@id="searchForm"]/fieldset/div/p[8]/input').click()  # 검색

    res_list = driver.find_elements_by_xpath(
        '//*[@id="listContents"]/div[1]/table/tbody/tr')  # 결과 리스트

    # 최상위 데이터만 수집 ( => 짧은 주기로 수집해야 함)
    # 결과 리스트에서 가용 데이터 추출
    if len(res_list) == 0:
        print('검색 결과가 없습니다.')
        sys.exit(0)
    else:
        item = res_list[0]
    # 문서번호
    rcp_no = item.find_elements_by_tag_name('td')[2].find_element_by_tag_name(
        'a').get_attribute('href')[-14:]
    # 기재정정
    rcp_yn = item.find_elements_by_tag_name('td')[2].find_element_by_tag_name(
        'a').find_element_by_tag_name('span').text

    if len(rcp_no) != 14:
        print('rcpNo 형식이 다릅니다.')
        sys.exit(0)

    if '첨부' in rcp_yn:
        print('첨부정정은 수집대상 제외')
        sys.exit(0)

    close_driver(driver)

    return rcp_no, rcp_yn
Exemple #29
0
def create_accounts():
    while True:
        logging.info("Getting proxy")
        proxy = get_proxy()
        logging.info(f"Got proxy, {proxy}")
        for _ in range(10):
            try:
                driver = get_driver(proxy)
                register_link = "https://login.aliexpress.com/"
                driver.get(register_link)
                set_location_cookie(driver)
                email, password = create_new_account(driver)

                with open("accounts.txt", "a") as myfile:
                    myfile.write(f"{email}:{password}\n")

                driver.close()
            except Exception as e:
                logging.warning(e)
                break
Exemple #30
0
    def get_recovery_url(self, email_name, email_url):
        print(" + get_recovery_url \n".upper())
        print('In get_validation_code')
        driver_mail = get_driver(True)
        driver_mail.get("http://www." + email_url)

        wait = WebDriverWait(driver_mail, 30)
        submit_btn = wait.until(EC.element_to_be_clickable((By.ID, 'sm')))
        name_field = driver_mail.find_element_by_id("mailbox")
        name_field.clear()
        name_field.send_keys(email_name)
        submit_btn.click()

        wait = WebDriverWait(driver_mail, 30)
        wait.until(EC.url_to_be("http://mailnesia.com/mailbox/" + email_name))
        driver_mail.find_element_by_xpath("//tbody/tr[1]/td[2]").click()
        wait = WebDriverWait(driver_mail, 30)
        wait.until(
            EC.text_to_be_present_in_element((By.XPATH, '//pre/a[1]'), "http"))
        code = driver_mail.find_element_by_xpath("//pre/a[1]").text
        driver_mail.quit()
        close_firefox()
        return code
Exemple #31
0
def bd_main(jm_code, rcp_no):
    try:
        # driver 세팅(결의, 공고)
        driver = get_driver(
            'C:\\Users\\admin\\PycharmProjects\\webCrawl\\chromedriver.exe',
            'http://dart.fss.or.kr/dsaf001/main.do?rcpNo={0}'.format(rcp_no))

        driver.implicitly_wait(10)

        bd_gubun, bd_kind, bd_gum, bd_total, bd_gijun_ymd = get_bd_table(
            driver)

        conn = get_dbcon('esg')
        cursor = conn.cursor()

        # 중복 체크 및 DB 삽입
        dup_select = """select * from proxy080 where jm_code = '{0}' and bd_gijun_ymd = '{1}'
                     """.format(jm_code, bd_gijun_ymd)

        cursor.execute(dup_select)

        if cursor.rowcount > 0:
            insert_qry = """update proxy080
                            set bd_gubun = '{2}', bd_kind = '{3}', bd_gum = {4}, bd_total = {5}
                            where jm_code = '{0}' and bd_gijun_ymd = '{1}'
                         """.format(jm_code, bd_gijun_ymd, bd_gubun, bd_kind,
                                    bd_gum, bd_total)
        else:
            insert_qry = """insert into proxy080 values('{0}', '{1}', '{2}', '{3}', {4}, {5})
                         """.format(jm_code, bd_gijun_ymd, bd_gubun, bd_kind,
                                    bd_gum, bd_total)

        cursor.execute(insert_qry)
    finally:
        cursor.close()
        close_dbcon(conn)
        close_driver(driver)
 def initialize(self):
     self.logger = get_logger('StorageHandler')
     self._storage = get_driver(options.storage_driver)
Exemple #33
0
 def __init__(self):
     self.driver = get_driver()
Exemple #34
0
def set_tf_class(cls_name):
    storage = get_driver(options.storage_driver)
    tf_info = storage.get_tf_info()
    tf_info['class_path'] = cls_name
    tf_info = storage.set_tf_info(tf_info)