Ejemplo n.º 1
0
def click_status(driver: webdriver) -> WebElement:
    """

    :param driver: a webdriver object
    :return: a WebElement of the status component
    """
    if first_time:
        set_first_time_false()
        composer = driver.find_element_by_id("pagelet_composer")
        return composer.find_element_by_tag_name("textarea")
    else:
        return find_element_by_test_id(driver, "status-attachment-mentions-input")
Ejemplo n.º 2
0
def blockFindByXpath(driver: webdriver, xpath, retry=200) -> WebElement:
    tag = None
    for _ in range(retry):
        try:
            tag = driver.find_element_by_xpath(xpath)
        except Exception:
            pass
        if tag:
            break
        sleep(0.1)
    sleep(0.1)
    return tag
Ejemplo n.º 3
0
def radio_button_helper(driver: webdriver, common_id: str,
                        unique_id: str) -> None:
    """
    Helper to deal with radio buttons which share NAME, ID attributes.
    :param driver: webdriver object.
    :param common_id: example: "@name='my_radio_button'"
    :param unique_id: example: "@value='01'"
    :return: None.
    """

    # build an xpath to locate element on page, execute through JS engine
    try:
        # Builds and executes the xpath we need.
        # Who said they were inflexible?
        driver.execute_script(
            'arguments[0].click()',
            driver.find_element(
                By.XPATH, ''.join(['//*[', common_id, ' and ', unique_id,
                                   ']'])))
    except NoSuchElementException:
        pass
Ejemplo n.º 4
0
def fetch_image_urls(query:str, max_links_to_fetch:int, wd:webdriver, sleep_between_interactions:int=1):
    def scroll_to_end(wd):
        wd.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        sleep(sleep_between_interactions)

    # build the google query
    search_url = """https://www.google.com/search?safe=off&site=&tbm=isch&source=hp&q={q}&oq={q}&gs_l=img"""

    # load the page
    wd.get(search_url.format(q=query))
    sleep(0.7)
    image_urls = set()
    image_count = 0
    results_start = 0
    while image_count < max_links_to_fetch:
        scroll_to_end(wd)

        # get all image thumbnail results
        thumbnail_results = wd.find_elements_by_css_selector("img.rg_i")
        number_results = len(thumbnail_results)

        print(f"Found: {number_results} search results. Extracting links from {results_start}:{number_results}")

        for img in thumbnail_results[results_start:number_results]:
            # try to click every thumbnail such that we can get the real image behind it
            try:
                img.click()
                sleep(sleep_between_interactions)
            except Exception:
                continue

            # extract image urls
            actual_images = wd.find_elements_by_css_selector('img.rg_i')
            for actual_image in actual_images:
                if actual_image.get_attribute('src'):
                    image_urls.add(actual_image.get_attribute('src'))

            image_count = len(image_urls)

            if len(image_urls) >= max_links_to_fetch:
                print(f"Found: {len(image_urls)} image links, done!")
                break
            else:
                print("Found:", len(image_urls), "image links, looking for more ...")
                sleep(0.8)
                load_more_button = wd.find_element_by_class_name(".ksb")
                #load_more_button = wd.find_element_by_id('n3VNCb')
                # load_more_button = wd.find_element_by_css_selector("")  #
                if load_more_button:
                    wd.execute_script("document.querySelector('.ksb').click();")  # .ksb

            # move the result startpoint further down
            results_start = len(thumbnail_results)

    return image_urls
Ejemplo n.º 5
0
def fetch_image_urls(query: str,
                     max_links_to_fetch: int,
                     wd: webdriver,
                     sleep_between_interactions: int = 1):
    def scroll_to_end(wd):
        wd.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(sleep_between_interactions)

    search_url = "https://www.google.com/search?safe=off&site=&tbm=isch&source=hp&q={q}&oq={q}&gs_l=img"

    wd.get(search_url.format(q=query))

    image_urls = set()
    image_count = 0
    results_start = 0
    while image_count < max_links_to_fetch:
        scroll_to_end(wd)

        thumbnail_results = wd.find_elements_by_css_selector("img.Q4LuWd")
        number_results = len(thumbnail_results)

        print(
            f"Found: {number_results} search results. Extracting links from {results_start}:{number_results}"
        )

        for img in thumbnail_results[results_start:number_results]:
            try:
                img.click()
                time.sleep(sleep_between_interactions)
            except Exception:
                continue

            actual_images = wd.find_elements_by_css_selector('img.n3VNCb')
            for actual_image in actual_images:
                if actual_image.get_attribute(
                        'src') and 'http' in actual_image.get_attribute('src'):
                    image_urls.add(actual_image.get_attribute('src'))

            image_count = len(image_urls)

            if len(image_urls) >= max_links_to_fetch:
                print(f"Found: {len(image_urls)} image links, done!")
                break
        else:
            print("Found:", len(image_urls),
                  "image links, looking for more ...")
            time.sleep(30)
            return
            load_more_button = wd.find_element_by_css_selector(".mye4qd")
            if load_more_button:
                wd.execute_script("document.querySelector('.mye4qd').click();")

        results_start = len(thumbnail_results)

    return image_urls
Ejemplo n.º 6
0
def fetch_image_urls_google(query: str, max_links_to_fetch: int, wd: webdriver, sleep_between_interactions: int = 2.5):
    search_url = f'https://www.google.com/search?safe=off&site=&tbm=isch&source=hp&q={query}&oq={query}&gs_l=img'
    wd.get(search_url)

    last_height = wd.execute_script('return document.body.scrollHeight')
    while True:
        wd.execute_script('window.scrollTo(0,document.body.scrollHeight)')
        time.sleep(sleep_between_interactions)
        new_height = wd.execute_script('return document.body.scrollHeight')
        try:
            wd.find_element_by_xpath('//*[@id="islmp"]/div/div/div/div/div[4]/div[2]/input').click()
            time.sleep(sleep_between_interactions)
        except:
            pass
        if new_height == last_height:
            break
        last_height = new_height

    image_urls = set()
    thumbnail_results = wd.find_elements_by_css_selector("img.Q4LuWd")
    number_results = len(thumbnail_results)
    print(f"Found: {number_results} search results. Extracting links.")

    limit = max_links_to_fetch if number_results > max_links_to_fetch else number_results

    for idx, img in enumerate(thumbnail_results[:limit]):
        try:
            img.click()
            time.sleep(sleep_between_interactions)
            actual_image = wd.find_element_by_xpath(
                '//*[@id="Sva75c"]/div/div/div[3]/div[2]/c-wiz/div/div[1]/div[1]/div/div[2]/a/img'
            )
            if actual_image.get_attribute('src') and 'http' in actual_image.get_attribute('src'):
                url = actual_image.get_attribute('src')
                image_urls.add(url)
                print(f'{idx}/{limit}: {url}')
        except:
            continue

    return image_urls
Ejemplo n.º 7
0
def chat(dialogue: str, driver: webdriver, count: int) -> List[str]:
    """Input dialogue for the chat bot and return their response."""

    # Locate chat form.
    chat_form = driver.find_element_by_xpath("//div[@id='main-input']/input")

    # Wait for chat window to load.
    WebDriverWait(driver, 10).until(
        ec.visibility_of_all_elements_located(
            (By.XPATH, "(//div[@class='pb-bot-response'])[1]")))

    chat_form.send_keys(dialogue)
    chat_form.send_keys(Keys.RETURN)

    # Wait for response.
    WebDriverWait(driver, 10).until(
        ec.visibility_of_all_elements_located(
            (By.XPATH,
             "(//div[@class='pb-bot-response'])[" + str(count) + ']')))

    # Get response and extract relevant text.
    messages = driver.find_element_by_xpath(
        "(//div[@class='pb-bot-response'])[" + str(count) +
        ']').find_elements_by_class_name('pb-message')

    response = []

    for message in messages:

        if has_image(message):
            children = message.find_elements_by_xpath(
                ".//img[@class='pb-standalone-image pb-fullSizeImage']")

            for child in children:
                response.append(child.get_attribute('src'))

        elif not has_button(message):
            response.append(message.text)

    return response
def filter_posts_list(driver: webdriver, months: list,
                      posts_list: list) -> list:
    """Отфильтровывает только нужные для анализа записи"""
    res = []
    for post_url in posts_list:
        driver.get(post_url)
        try:
            sleep(2)
            time = driver.find_element_by_class_name('Nzb55').get_attribute(
                'datetime')
            for month in months:
                if time[0:7] == month:
                    res.append(post_url)
        except NoSuchElementException:
            print(f'ДАТУ НЕ НАХОДИТ!!! ПОСТ {post_url}')
            driver.get(post_url)
            try:
                sleep(2)
                time = driver.find_element_by_class_name(
                    'Nzb55').get_attribute('datetime')
                for month in months:
                    if time[0:7] == month:
                        res.append(post_url)
            except NoSuchElementException:
                print(f'ДАТУ НЕ НАХОДИТ ВТОРОЙ РАЗ!!! ПОСТ {post_url}')
                res.append(post_url)
    return res
Ejemplo n.º 9
0
def test_invalid_custom_docker(driver: selenium.webdriver, *args, **kwargs):
    """
    Test invalid custom Docker instructions.

    Args:
        driver
    """
    r = testutils.prep_py3_minimal_base(driver)
    username, project_name = r.username, r.project_name
    # add an invalid custom docker instruction
    logging.info("Adding invalid custom Docker")
    environment = testutils.EnvironmentElements(driver)
    environment.environment_tab_button.click()
    time.sleep(1)
    driver.execute_script("window.scrollBy(0, 600);")
    environment.custom_docker_edit_button.click()
    time.sleep(1)
    environment.custom_docker_text_input.send_keys("RUN /bin/false")
    time.sleep(1)
    driver.execute_script("window.scrollBy(0, 300);")
    environment.custom_docker_save_button.click()
    # wait until container status is stopped
    wait = selenium.webdriver.support.ui.WebDriverWait(driver, 200)
    wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, ".flex>.Rebuild")))
    time.sleep(2)
    # assert container status is stopped and 'Successfully tagged' is in footer
    envelts = testutils.elements.EnvironmentElements(driver)
    assert driver.find_element_by_css_selector(".flex>.Rebuild").is_displayed(), "Expected rebuild container status"
    assert "Project failed to build" in driver.find_element_by_css_selector(".Footer__message-title").text, \
        "Expected 'Project failed to build' in footer"
Ejemplo n.º 10
0
def execSearch(browser: webdriver):
    """
    京大の課題サイトにアクセスし、未実行の課題を教える。
    """
    browser.get('https://cas.ecs.kyoto-u.ac.jp/cas/login?service=https%3A%2F%2Fpanda.ecs.kyoto-u.ac.jp%2Fsakai-login-tool%2Fcontainer')
    sleep(1)

    #ログイン
    user_id = browser.find_element_by_name("username")
    user_id.clear()
    user_id.send_keys(settings.USN)

    password = browser.find_element_by_name("password")
    password.clear()
    password.send_keys(settings.PWD)

    login = browser.find_element_by_class_name("btn-submit")
    login.click()
    sleep(1)

    #各科目のページに遷移
    base_url = browser.current_url
    
    
    links = {
    "弾性体の力学解析":"2020-110-3200-000",
    "流体力学":"2020-110-3165-000",
    "一般力学":"2020-110-3010-100",
    "基礎有機化学I":"2020-888-N347-014",
    "地球環境学のすすめ":"2020-888-Y201-001",
    "社会基盤デザインI":"2020-110-3181-000",
    "工業数学B2":"2020-110-3174-000",
    "確率統計解析及び演習":"2020-110-3003-000",
    "水文学基礎":"2020-110-3030-000",
    "地球工学基礎数理":"2020-110-3005-000",
    }
    
    # nav = browser.find_element_by_id("2020-110-3165-000")
    for subject,link_id in links.items():
        unkadai(base_url,subject,link_id)
Ejemplo n.º 11
0
def submit_solution(driver: webdriver, local_file_path: str):
    driver.find_element_by_css_selector(
        CSS_SELECTORS['task_submit_tab']).click()
    driver.find_element_by_css_selector(
        CSS_SELECTORS['task_file_selector']).send_keys(local_file_path)
    driver.find_element_by_css_selector(
        CSS_SELECTORS['task_submit_button']).click()
Ejemplo n.º 12
0
    def __paginate(driver: webdriver,
                   pagination_index: int,
                   timeout=5) -> object:
        next_page_button = driver.execute_script(
            '''
                return document.querySelector(arguments[0]);                
            ''', Selectors.paginationNextBtn)

        if next_page_button is None:
            return {
                'success': False,
                'error': 'There are no more pages to visit'
            }

        try:
            offset = int(get_query_params(driver.current_url)['start'])
        except:
            offset = 0

        offset += 25
        url = override_query_params(driver.current_url, {'start': offset})
        driver.get(url)

        elapsed = 0
        sleep_time = 0.05

        # Wait for new jobs to load
        while elapsed < timeout:
            loaded = driver.execute_script(
                '''
                    return document.querySelectorAll(arguments[0]).length > 0;                
                ''', Selectors.links)

            if loaded:
                return {'success': True}

            sleep(sleep_time)
            elapsed += sleep_time

        return {'success': False, 'error': 'Timeout on pagination'}
Ejemplo n.º 13
0
def fetch_image_urls(query:str,max_links_to_fetch:int,wd:webdriver,sleep_between_interaction: int =1):
    def scroll_to_end(wd):
        wd.execute_script("window.scrollTo(0,document.body.scrollHeight);")
        time.sleep(sleep_between_interaction)

    search_url = "https://www.google.com/search?safe=off&site=&tbm=isch&source=hp&q={q}&oq={q}&gs_l=img"

    #load the page
    wd.get(search_url.format(q=query))

    image_urls=set()
    image_count= 0
    result_start=0

    while image_count <max_links_to_fetch:
        scroll_to_end(wd)

        #get all the images thumbnails results
        thumbnails_result= wd.find_elements_by_css_selector("img.Q4LuWd")
        #print(thumbnails_result)
        num_results= len(thumbnails_result)
        print(f"Found: {num_results}  search results. Extracting links from {result_start} :{num_results}")



        for img in thumbnails_result[result_start:num_results]:
            #try to click every thumbnail and get  the image page behind it.
            try:
                img.click()
                time.sleep(sleep_between_interaction)
            except Exception:
                continue

            #extract images urls
            actual_images=wd.find_elements_by_css_selector('img.n3VNCb')
            for actual_image in actual_images:
                if actual_image.get_attribute('src') and 'http' in actual_image.get_attribute('src'):
                    image_urls.add(actual_image.get_attribute('src'))
            image_count=len(image_urls)


            if len(image_urls) >= max_links_to_fetch:
                print(f"Found: {len(image_urls)} image links ,Done!!")
                break
        else:
            print(f"Found {len(image_urls)} images links ,Looking for more")
            time.sleep(30)
            return
            load_more_button=wd.find_element_by_css_selector('.mye4qd')
            if load_more_button:
                wd.execute_script("document.querySelector('.mye4qd').click();")

        #move the result startpoint further down
        result_start=len(thumbnails_result)
    return image_urls
def borrar_archivo_claro_drive(webdriver_test_ux: webdriver, jsonEval):
    tiempo_step_inicio = Temporizador.obtener_tiempo_timer()
    fecha_inicio = Temporizador.obtener_fecha_tiempo_actual()

    try:
        btn_borrar = webdriver_test_ux.find_element_by_xpath(
            '//input[@class="menuItem svg deleteImage icon-delete icon-32"]')
        btn_borrar.click()
        time.sleep(10)
        btn_cerrar = webdriver_test_ux.find_element_by_xpath(
            '//input[@class="svg exit icon-close icon-32"]')
        time.sleep(4)
        btn_cerrar.click()
        time.sleep(4)

        jsonEval["steps"][3]["output"][0]["status"] = jsonConst.SUCCESS
        jsonEval["steps"][3]["status"] = jsonConst.SUCCESS
        jsonEval["steps"][3]["output"][0][
            "output"] = 'Se realiza el borrado del archivo correctamente'
    except selExcep.NoSuchElementException:
        jsonEval["steps"][3]["output"][0]["status"] = jsonConst.FAILED
        jsonEval["steps"][3]["status"] = jsonConst.FAILED
        jsonEval["steps"][3]["output"][0][
            "output"] = 'No fue posibles realizar el borrado del archivo correctamente'

    except selExcep.ElementClickInterceptedException:
        jsonEval["steps"][3]["output"][0]["status"] = jsonConst.FAILED
        jsonEval["steps"][3]["status"] = jsonConst.FAILED
        jsonEval["steps"][3]["output"][0][
            "output"] = 'No fue posibles realizar el borrado del archivo correctamente'

    tiempo_step_final = Temporizador.obtener_tiempo_timer(
    ) - tiempo_step_inicio
    fecha_fin = Temporizador.obtener_fecha_tiempo_actual()
    jsonEval["steps"][3]["time"] = FormatUtils.truncar_float_cadena(
        tiempo_step_final)
    jsonEval["steps"][3]["start"] = fecha_inicio
    jsonEval["steps"][3]["end"] = fecha_fin

    return jsonEval
def cerrar_sesion_claro_drive(webdriver_test_ux: webdriver, jsonEval):
    tiempo_step_inicio = Temporizador.obtener_tiempo_timer()
    fecha_inicio = Temporizador.obtener_fecha_tiempo_actual()

    try:
        boton_ajustes = webdriver_test_ux.find_element_by_id('expand')
        boton_ajustes.click()

        time.sleep(4)
        boton_cerrar_sesion = webdriver_test_ux.find_element_by_xpath(
            '//li[@data-id="logout"]')
        boton_cerrar_sesion.click()
        time.sleep(10)

        jsonEval["steps"][4]["output"][0]["status"] = jsonConst.SUCCESS
        jsonEval["steps"][4]["status"] = jsonConst.SUCCESS
        jsonEval["steps"][4]["output"][0][
            "output"] = 'Se cierra sesion correctamente'

    except selExcep.NoSuchElementException:
        jsonEval["steps"][4]["output"][0]["status"] = jsonConst.FAILED
        jsonEval["steps"][4]["status"] = jsonConst.FAILED
        jsonEval["steps"][4]["output"][0][
            "output"] = 'No fue posible realizar el cierre de sesion'

    except selExcep.ElementClickInterceptedException:
        jsonEval["steps"][4]["output"][0]["status"] = jsonConst.FAILED
        jsonEval["steps"][4]["status"] = jsonConst.FAILED
        jsonEval["steps"][4]["output"][0][
            "output"] = 'No fue posible realizar el cierre de sesion'

    tiempo_step_final = Temporizador.obtener_tiempo_timer(
    ) - tiempo_step_inicio
    fecha_fin = Temporizador.obtener_fecha_tiempo_actual()
    jsonEval["steps"][4]["time"] = FormatUtils.truncar_float_cadena(
        tiempo_step_final)
    jsonEval["steps"][4]["start"] = fecha_inicio
    jsonEval["steps"][4]["end"] = fecha_fin

    return jsonEval
Ejemplo n.º 16
0
def testGETXSSDriver(url: str, cookies: Mapping[str, str], driver: webdriver) -> Optional[str]:
    """ If the given URL pops an alert box when accessed with the given cookies, return the contents of the alert box,
        otherwise return None """
    driver.setCookies(url, cookies)

    try:
        driver.get(url)

        WebDriverWait(driver, config.timeout).until(expected_conditions.alert_is_present())
        # Note that despite the name switch_to_alert also handles prompt:
        #   - http://selenium-python.readthedocs.io/navigating.html#popup-dialogs
        alert = driver.switch_to_alert()
        text = alert.text
        driver.reset()
        return text
    except (TimeoutException, URLError):
        driver.reset()
        return None
Ejemplo n.º 17
0
def select_style(driver: webdriver):
    styles = driver.find_elements_by_xpath(
        '//*[@id="container"]/aside/div/table/tbody/tr[1]/td[1]/ul/li')
    max_iter = len(styles)

    for i in range(max_iter):
        styles = driver.find_elements_by_xpath(
            '//*[@id="container"]/aside/div/table/tbody/tr[1]/td[1]/ul/li')
        style_name = str(styles[i].text)
        styles[i].find_element_by_tag_name('a').send_keys(Keys.ENTER)
        time.sleep(3)
        driver.find_element_by_xpath(
            '//*[@id="container"]/section/div/header/p[2]/a[1]').click()

        title_list = []
        artist_list = []
        image_list = []

        select_playlist(driver, title_list, artist_list, image_list)

        data = zip(title_list, artist_list, image_list)
        data_to_csv(data, style_name, "style")
Ejemplo n.º 18
0
def select_song(driver: webdriver, title_list, artist_list, image_list):
    url = driver.current_url
    request = requests.get(url)
    html = request.text
    bs = BeautifulSoup(html, 'html.parser')

    titles = bs.select('p.title')
    artists = bs.select('p.artist')
    images = bs.select('a.thumbnail')

    for i in range(len(titles)):
        if '[권리없는 곡]' in str(titles[i].text):
            continue

        title = str(titles[i].find('a').text)

        anchor_list = artists[i].find_all('a')
        if len(anchor_list) > 1:
            attr = anchor_list[1]['onclick']
            attr = attr.split("'")
            attr = attr[1]
            attr = attr.split("||")

            for word in attr:
                if word.isdigit():
                    attr.remove(word)

            artist = attr[1::2]
            artist = ", ".join(artist)
        else:
            artist = artists[i].text.strip().split('\n')[0]

        image = images[i].find('img')['src']

        title_list.append(title)
        artist_list.append(artist)
        image_list.append(image)

    driver.back()
Ejemplo n.º 19
0
def log_in(driver: selenium.webdriver, user_index: int = 0) -> str:
    """
    Log in to Gigantum.

    Args:
        driver
        user_index: an offset into credentials.txt

    Returns:
        Username of user just logged in
    """
    username, password = testutils.load_credentials(user_index=user_index)

    driver.get(f"{os.environ['GIGANTUM_HOST']}/projects/local#")
    auth0_elts = elements.Auth0LoginElements(driver)
    auth0_elts.login_green_button.wait().click()
    auth0_elts.auth0_lock_widget.wait()
    if auth0_elts.auth0_lock_button.selector_exists():
        logging.info("Clicking 'Not your account?'")
        auth0_elts.not_your_account_button.wait().click()
    auth0_elts.do_login(username, password)
    time.sleep(5)
    # Set the ID and ACCESS TOKENS -- Used as headers for GraphQL mutations
    access_token = driver.execute_script(
        "return window.localStorage.getItem('access_token')")
    id_token = driver.execute_script(
        "return window.localStorage.getItem('id_token')")
    active_username = driver.execute_script(
        "return window.localStorage.getItem('username')")

    assert active_username == username, \
        f"Username from credentials.txt ({username}) must match chrome cache ({active_username})"

    os.environ['GIGANTUM_USERNAME'] = active_username
    os.environ['ACCESS_TOKEN'] = access_token
    os.environ['ID_TOKEN'] = id_token
    assert os.environ['ACCESS_TOKEN'], "ACCESS_TOKEN could not be retrieved"
    assert os.environ['ID_TOKEN'], "ID_TOKEN could not be retrieved"
    return username.strip()
Ejemplo n.º 20
0
def removeKataomoi(browser: webdriver, safe_accounts):
    global remain_remove_count
    """
    片思いったーにログインする
    :param browser: webdriver
    """

    # 片思いったーにアクセス
    browser.get('http://kataomoi.net/redirect.php')
    sleep(1)

    url = browser.current_url
    is_confirm = url.startswith(
        "https://api.twitter.com/oauth/authorize?oauth_token")

    if is_confirm:
        submit_btn = browser.find_element_by_id("allow")
        submit_btn.click()
        sleep(1)
    else:
        # ログイン情報の入力
        username_or_email = browser.find_element_by_xpath(
            "//*[@id='username_or_email']")
        username_or_email.send_keys(USER_NAME)
        password = browser.find_element_by_xpath("//*[@id='password']")
        password.send_keys(PASSWORD)
        # ログイン
        password.submit()
        sleep(1)

    browser.get('http://kataomoi.net/find_one_way.php')
    sleep(1)

    trs = browser.find_elements_by_tag_name("tr")

    # 古い順位に並び替え
    reversed_trs = reversed(trs)

    print('▼フォロー解除中…▼')
    for tr in reversed_trs:
        if remain_remove_count <= 0:
            break
        tds = tr.find_elements_by_tag_name("td")
        if len(tds) > 1 and not (tds[1].find_element_by_tag_name("a").text
                                 in safe_accounts):
            print(tds[1].find_element_by_tag_name("a").text)
            tr.find_elements_by_tag_name("span")[0].click()
            remain_remove_count = remain_remove_count - 1
            sleep(0.5)
Ejemplo n.º 21
0
def get_budget(brow: webdriver) -> float:
    """
	Extract the text from the HTML element and return it as a float.
	"""

    money_path = './/span[@class="user-balance ng-binding"]'
    money_el = brow.find_element_by_xpath(money_path)

    money_value = None
    while not money_value:
        money_value = money_el.get_attribute('innerText')

    return float(money_value.replace(',', '.'))
Ejemplo n.º 22
0
def extract_match_datetime(brow: webdriver, match_obj: webdriver) -> datetime:

    scroll_to_element(brow, match_obj)

    data = match_obj.find_element_by_xpath(
        './/div[@class="event-date ng-binding"]').text
    ddmmyy, hhmm = data.split(' - ')
    day, month, year = ddmmyy.split('/')
    hour, minute = hhmm.split(':')

    match_datetime = f'{year}-{month}-{day} {hour}:{minute}:00'

    return datetime.strptime(match_datetime, '%Y-%m-%d %H:%M:%S')
Ejemplo n.º 23
0
def fetch_image_urls(query: str, max_links_to_fetch: int, wd: webdriver, sleep_between_interactions: float = 1.0):
    def scroll_to_end(wd):
        wd.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(sleep_between_interactions)



    search_url = "https://www.google.com/search?safe=off&site=&tbm=isch&source=hp&q={q}&oq={q}&gs_l=img"

    # load the page
    wd.get(search_url.format(q=query))

    image_urls = set()
    image_count = 0
    results_start = 0
    while image_count < max_links_to_fetch:
        scroll_to_end(wd)

        # get all image thumbnail results
        thumbnail_results = wd.find_elements_by_css_selector("img.Q4LuWd")
        number_results = len(thumbnail_results)

        logger.info("Found: %i search results. Extracting links from %i:%i" % (number_results, results_start, number_results))

        for img in thumbnail_results[results_start:number_results]:
            # try to click every thumbnail such that we can get the real image behind it
            try:
                img.click()
                time.sleep(sleep_between_interactions)
            except Exception:
                continue

            # extract image urls
            actual_images = wd.find_elements_by_css_selector("img.n3VNCb")
            for actual_image in actual_images:
                if actual_image.get_attribute("src") and "http" in actual_image.get_attribute("src"):
                    image_urls.add(actual_image.get_attribute("src"))

            image_count = len(image_urls)

            if len(image_urls) >= max_links_to_fetch:
                logger.info("Found: %i image links, done!" % len(image_urls))
                break
        else:
            logger.info("Found: %i image links, looking for more ..." % len(image_urls))
            time.sleep(30)
            load_more_button = wd.find_element_by_css_selector(".mye4qd")
            if load_more_button:
                wd.execute_script("document.querySelector('.mye4qd').click();")

        # move the result startpoint further down
        results_start = len(thumbnail_results)

    return image_urls
Ejemplo n.º 24
0
def scrape_once(driver: webdriver, save_into_dict: bool, tagDict=[]):
    tagCount = 0
    postCount = 0
    driver.get('https://www.instagram.com/explore')
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.TAG_NAME, 'article')))
    all_links = driver.find_elements_by_tag_name('a')
    for link in all_links:
        href = link.get_attribute('href')
        if (href.endswith('?explore=true')):
            media_html = requests.get(href).text
            soup = BeautifulSoup(media_html, 'lxml')
            timestamp: int = time.time()
            hashtags = soup.findAll(attrs={"property": "instapp:hashtags"})
            for tagElement in hashtags:
                tag = tagElement.get('content')
                if save_into_dict:
                    tagDict[tag] += 1
                put_tag(tag, timestamp)
                tagCount += 1
            postCount += 1
    print('%d tags processed in %d posts' % (tagCount, postCount))
Ejemplo n.º 25
0
def test_delete_project(driver: selenium.webdriver, *args, **kwargs):
    """
        Test that deleting a project in Gigantum deletes it from the file system.

        Args:
            driver
    """
    r = testutils.prep_py3_minimal_base(driver)
    username, project_name = r.username, r.project_name

    # Check that project path exists on file system
    logging.info("Checking that the project exists in the file system")
    project_path = os.path.join(os.environ['GIGANTUM_HOME'], username,
                                username, 'labbooks', project_name)
    assert os.path.exists(project_path), \
        f"Project {project_name} should exist at {project_path}"
    logging.info("Finding project Docker image")
    dc = docker.from_env()
    project_img = []
    for img in dc.images.list():
        for t in img.tags:
            if 'gmlb-' in t and project_name in t:
                logging.info(f"Found Docker image {t} for {project_name}")
                project_img.append(img)
    assert len(project_img) == 1, f"Must be one docker tag for {project_name}"

    # Navigate to the "Delete Project" button and click it
    logging.info("Navigating to 'Delete Project' and delete the project")
    driver.find_element_by_css_selector(".ActionsMenu__btn").click()
    time.sleep(3)
    driver.find_element_by_css_selector(".ActionsMenu__item--delete").click()
    time.sleep(3)
    driver.find_element_by_css_selector("#deleteInput").send_keys(project_name.lstrip())
    driver.find_element_by_css_selector(".DeleteLabbook .ButtonLoader").click()
    time.sleep(5)

    # Check all post conditions for delete:
    # 1. Does not exist in filesystem, and
    # 2. Docker image no longer exists
    logging.info("Checking that project path and project Docker image no longer exist")
    assert not os.path.exists(project_path), f"Project at {project_path} not deleted"
    project_img = []
    for img in dc.images.list():
        for t in img.tags:
            if 'gmlb-' in t and project_name in t:
                logging.error(f'Docker tag {t} still exists for deleted project {project_name}')
                project_img.append(img)
    assert len(project_img) == 0, \
        f"Docker image for {project_path}: {project_img[0]} still exists"
Ejemplo n.º 26
0
def get_neighborhood_state_zip(driver: webdriver) -> []:
    state_list = []
    zipcode_list = []
    neighborhood_list = []
    addresses_2 = driver.find_elements_by_css_selector('.bfg-gallery-address2')

    for t in addresses_2:
        t_list = t.text.split()
        neighborhood_list.append(t_list[0][:-1])
        state_list.append(t_list[1])
        zipcode_list.append(int(t_list[2]))

    return neighborhood_list, state_list, zipcode_list
Ejemplo n.º 27
0
def page_up(driver: webdriver):
    """
    翻页
    """
    while True:
        try:
            tag = driver.find_element_by_css_selector(Order.page_down.value)
            if tag:
                time.sleep(2)
                tag.click()
                break
        except ElementClickInterceptedException:
            move_down(driver)
Ejemplo n.º 28
0
def fetch_image_urls(search_term: str, n_links: int, web_driver: webdriver, sleep_between_interactions: int = 1):
    def scroll_to_end(web_driver):
        web_driver.execute_script(
            "window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(sleep_between_interactions)

    search_url = f"https://www.google.com/search?tbm=isch&q={'+'.join(search_term.split())}"
    web_driver.get(search_url)

    image_counter = 0
    res_start = 0
    image_urls = set()

    while(image_counter < n_links):

        thumbnail_results = web_driver.find_elements_by_css_selector(
            'img.Q4LuWd')
        number_results = len(thumbnail_results)
        scroll_to_end(web_driver)
        print(
            f'Found {number_results} search results. Extracting links {res_start} to {number_results}')

        for img in thumbnail_results[res_start:number_results]:
            try:
                img.click()
                time.sleep(sleep_between_interactions)
            except:
                continue

            actual_image = web_driver.find_elements_by_css_selector(
                'img.n3VNCb')
            for img in actual_image:
                src = img.get_attribute('src')
                if src and 'http' in src:
                    image_urls.add(src)

            image_counter = len(image_urls)
            if image_counter >= n_links:
                print(f"Found {n_links} image links.")
                break

        else:
            print("Found:", len(image_urls),
                  "image links, looking for more ...")
            time.sleep(3)
            load_more_button = web_driver.find_element_by_css_selector(
                ".mye4qd")
            if load_more_button:
                web_driver.execute_script(
                    "document.querySelector('.mye4qd').click();")

        res_start = len(thumbnail_results)

    return image_urls
Ejemplo n.º 29
0
 def find_sizes(self, driver: webdriver) -> List[Size]:
     # pylint: disable=missing-function-docstring
     sizes = driver.find_elements_by_css_selector(
         '.size-list .product-size')
     obj_sizes = []
     for size in sizes:
         deque_size_types = collections.deque(
             size.get_attribute('data-name').split(' ('), 2)
         obj_size = Size(deque_size_types.popleft(),
                         self.clean_nubmer_size(deque_size_types.popleft()),
                         "disabled" in size.get_attribute("class"))
         obj_sizes.append(obj_size)
     return obj_sizes
Ejemplo n.º 30
0
def click_related_images(driver: webdriver, url:str):
    related = []
    pbar = tqdm(desc=f'starting looping on {len(tbnids)} data ids')
    while len(tbnids):
        pbar.update()
        data_id = tbnids.pop()
        pbar.set_description(f'left {len(tbnids)} data ids to process')
        visited.add(data_id)
        driver.get(f'{url}#imgrc={data_id}')
        if '#imgrc' not in driver.current_url:
            continue

        # _,_, doc = parse(driver)
        text = driver.page_source.encode('utf-8').decode('utf-8')
        doc = html.fromstring(text)
        l = [f'https://www.google.com/{a.get("href")}' for a in doc.cssselect('a') if
               'aria-label' in a.attrib and 'Related images' in a.get('aria-label')]
        if len(l) > 0:
            rel = l[0]
            if rel.startswith('http'):
                related.append(rel)
    return related
Ejemplo n.º 31
0
def execSearch(browser: webdriver):
    """
    Googleで検索を実行する
    :param browser: webdriver
    """
    # スクリーンショットのファイル名用に日付を取得
    dt = datetime.datetime.today()
    dtstr = dt.strftime("%Y%m%d%H%M%S")

    # Googleにアクセス
    browser.get('https://www.google.co.jp/')
    sleep(1)

    # キーワードの入力
    browser.find_element_by_id('lst-ib').send_keys('docker selenium')

    # 検索実行
    browser.find_element_by_name('btnK').submit()
    sleep(1)

    # スクリーンショット
    browser.save_screenshot('images/' + dtstr + '.png')