def get_tabelog_ranking(driver: webdriver, area: str, keyword: str):
    result = list()

    driver.get(TABELOG_URL)
    driver.set_page_load_timeout(TIMEOUT)
    sleep(1)

    driver.find_element_by_xpath(AREA_FORM_XPATH).send_keys(area)
    driver.find_element_by_xpath(KEYWORD_FORM_XPATH).send_keys(keyword)
    driver.find_element_by_xpath(SEARCH_BTN_XPATH).click()
    driver.set_page_load_timeout(TIMEOUT)
    sleep(1)

    driver.find_elements_by_class_name(RANKING_BTN_CSS)[0].click()
    driver.set_page_load_timeout(TIMEOUT)
    sleep(1)

    ranking = driver.find_elements_by_class_name(RANKING_LIST_CSS)
    ranking_star = driver.find_elements_by_class_name(RANKING_STAR_LIST_CSS)
    for index, shop in enumerate(ranking):
        if hasattr(shop, "text"):
            newShop = ShopInfo()
            newShop.name = shop.text
            newShop.rank = index + 1
            newShop.star = ranking_star[index].text
            result.append(newShop)

    return result
Ejemplo n.º 2
0
def getConCafeData(browser: webdriver, url):
    try:
        # URLにアクセス
        browser.get(url)
        sleep(3)

        # エリアのリンク一覧を配列で取得
        AreaList = browser.find_elements_by_class_name("f-found_link")[0]

        for Area in AreaList:
            Area[0].click()
            sleep(3)
            for Shop in browser.find_elements_by_class_name("free_shop"):
                print(shop.find_elements_by_class_name("shop_name ellipsis"))
                sleep(3)

        # データの取得
        articleElements = browser.find_elements_by_class_name("data")
        contactAddress = articleElements[3].text
        updateDate = articleElements[2].text

        return [contactAddress, updateDate]

    except Exception as e:
        return e
Ejemplo n.º 3
0
def login(driver: webdriver):
    login_url = "https://pvoutput.org/login.jsp"
    driver.get(login_url)
    driver.implicitly_wait(100)

    username = driver.find_element_by_id("login")
    password = driver.find_element_by_id("password")
    username.send_keys(USERNAME)
    password.send_keys(PASSWORD)
    driver.find_elements_by_class_name("btn-primary")[0].click()
def parse_post_data(driver: webdriver, url):
    """Собирает данные о лайках и комментах к посту"""
    driver.get(url)
    try:
        likes = int(
            driver.find_elements_by_class_name(
                'sqdOP')[2].find_element_by_tag_name('span').text.replace(
                    ' ', ''))  # sqdOP yWX7d     _8A5w5 vcOH2
        views = 0
    except NoSuchElementException:
        print('это видео')
        try:
            button = driver.find_element_by_class_name('vcOH2')
            views = int(
                button.find_element_by_tag_name('span').text.replace(' ', ''))
            button.click()
            likes = int(
                driver.find_element_by_class_name(
                    'vJRqr').find_element_by_tag_name('span').text.replace(
                        ' ', ''))
            button = driver.find_element_by_class_name('QhbhU')
            button.click()
        except NoSuchElementException:
            views = 0
            try:
                likes = int(
                    driver.find_element_by_class_name(
                        'vJRqr').find_element_by_tag_name('span').text.replace(
                            ' ', ''))
            except NoSuchElementException:
                likes = 0
                print(f'Нихуя не нашло у поста: {url}')
    comments = get_comments_count(driver=driver)
    return likes, views, comments
Ejemplo n.º 5
0
def handle_room_tag(driver: webdriver, comm: str):
    """
    进入每一个直播, 并插播广告
    写入日志
    """
    # 将所有标签统计出来
    driver.execute_script(Order.page_end.value)
    sum_room = driver.find_elements_by_class_name(Order.room_tag.value)

    i = 0
    while i < len(sum_room):
        try:
            _into_room_handle(driver, comm, i)
        except ElementClickInterceptedException:
            move_down(driver)
            _into_room_handle(driver, comm, i)
        i += 1

    try:
        move_down(driver)
        tag = driver.find_element_by_css_selector(Order.page_down.value)
        if tag:
            time.sleep(1)
            tag.click()
            driver.implicitly_wait(5)
            handle_room_tag(driver, comm)

    except NoSuchElementException:
        print("finish")
        driver.close()
        return
Ejemplo n.º 6
0
def setup(driver: webdriver) -> None:
    """Set up the web page that's to be scraped."""

    # Find and click buttons to activate chat window.
    driver.get(URL)

    button = driver.find_elements_by_class_name('pb-quickReply')[1]
    button.click()

    button = driver.find_elements_by_class_name('pb-quickReply')[2]
    button.click()

    button = driver.find_elements_by_class_name('pb-quickReply')[5]
    button.click()

    # Wait until input form is visible.
    WebDriverWait(driver, 10).until(
        ec.visibility_of_all_elements_located((By.ID, 'main-input')))
Ejemplo n.º 7
0
def fetch_image_urls(query:str, max_links_to_fetch:int, wd:webdriver, sleep_between_interactions:int=1):
    def scroll_to_end(wd):
        wd.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        sleep(sleep_between_interactions)    
    
    # build the google query
    search_url = "https://www.google.com/search?safe=off&site=&tbm=isch&source=hp&q={q}&oq={q}&gs_l=img"

    # load the page
    wd.get(search_url.format(q=query))

    image_urls = set()
    image_count = 0
    results_start = 0
    while image_count < max_links_to_fetch:
        scroll_to_end(wd)

        # get all image thumbnail results
        thumbnail_results = wd.find_elements_by_xpath('//*[@id="islrg"]/div[1]/div')
        number_results = len(thumbnail_results)
        
        print(f"Found: {number_results} search results. Extracting links from {results_start}:{number_results}")
        
        for img in thumbnail_results[results_start:number_results]:
            # try to click every thumbnail such that we can get the real image behind it
            try:
                img.click()
                sleep(sleep_between_interactions)
            except Exception:
                continue

            # extract image urls    
            actual_images = wd.find_elements_by_class_name('n3VNCb')
            for actual_image in actual_images:
                if actual_image.get_attribute('src'):
                    image_urls.add(actual_image.get_attribute('src'))

            image_count = len(image_urls)

            if len(image_urls) >= max_links_to_fetch:
                print(f"Found: {len(image_urls)} image links, done!")
                break
        else:
            print("Found:", len(image_urls), "image links, looking for more ...")
            sleep(1)
            load_more_button = wd.find_element_by_class_name("mye4qd")
            if load_more_button:
                load_more_button.click()

        # move the result startpoint further down
        results_start = len(thumbnail_results)

    return image_urls
Ejemplo n.º 8
0
    def make_reservation(self, sportjaDriver: webdriver):
        self.sportjaDriver = sportjaDriver
        _check_if_next_week(self)
        classnames = sportjaDriver.find_elements_by_class_name("classname")
        weightList = []
        for classname in classnames:
            if classname.text == "Weightlifting":
                weightList.append(classname)

        for elem in weightList[1:]:
            elem.click()

        sleep(1)
        reserveButton = None
        waitingListButton = None
        cancelButton = None

        try:
            cancelButton = sportjaDriver.find_element_by_css_selector(
                "a.grey_btn_small:nth-child(2) > span:nth-child(1)")
        except NoSuchElementException:
            print("No cancelButton found.")

        try:
            reserveButton = sportjaDriver.find_element_by_css_selector(
                "#book_btn > span:nth-child(1)")
        except NoSuchElementException:
            print("No reserveButton found.")

        try:
            waitingListButton = sportjaDriver.find_element_by_css_selector(
                "#join_waiting_list_btn")
        except NoSuchElementException:
            print("No waitingListButton found.")

        if reserveButton:
            reserveButton.click()
            sportjaDriver.close()
            print('Class reserved for next Saturday!')
            return 0
        elif waitingListButton:
            waitingListButton.click()
            sportjaDriver.close()
            print('Put on waiting list, check your mailbox')
            return 0
        elif cancelButton:
            sportjaDriver.close()
            print('Already reserved')
            return 0
        else:
            print('Can\'t reserve or put on waiting list')
            sportjaDriver.close()
            return 1
def get_posts_list_and_subs(driver: webdriver, url: str, months: list):
    """Прогружает страницу пользователя и возвращает список из большинства последних постов"""
    driver.get(url)
    sleep(2)
    try:
        subs = int(
            driver.find_elements_by_class_name('g47SY')[1].get_attribute(
                'title').replace(' ', ''))
        print(f'{subs} подписчиков')
    except IndexError:
        subs = 0
        print(f'У аккаунта {url} не получилось собрать количество подписчиков')
    driver.execute_script('window.scrollTo(0, document.body.scrollHeight);')
    sleep(2)
    posts = driver.find_elements_by_class_name('v1Nh3')
    posts_links = []
    for post in posts:
        posts_links.append(
            post.find_element_by_tag_name('a').get_attribute('href'))
    try:
        more_button = driver.find_element_by_class_name('xLCgt')
        more_button.click()
    except NoSuchElementException:
        pass
    except ElementClickInterceptedException:
        print('кнопка не нажимается')
    for _ in range(5):
        driver.execute_script(
            'window.scrollTo(0, document.body.scrollHeight);')
        sleep(2)
        posts = driver.find_elements_by_class_name('v1Nh3')
        for post in posts:
            posts_links.append(
                post.find_element_by_tag_name('a').get_attribute('href'))
    posts_links = delete_dublicates_from_list(posts_links)
    res = filter_posts_list(driver=driver,
                            months=months,
                            posts_list=posts_links)
    return res, subs
def get_atms(driver: webdriver, city_name: str, region_name: str):
    """Проходим по всем страницам, и получаем список банкоматов.

    :driver: selenium.webdriver
    :returns: [{ ... }]

    """
    pages = 0
    rows = list()
    has_next_page = True

    while has_next_page:
        pages = pages + 1
        for row in driver.find_elements_by_class_name('page-atm__table_row'):
            rows.append({
                'region':
                region_name,
                'city':
                city_name,
                'bank':
                row.find_element_by_class_name('address-logo').text,
                'address_title':
                row.find_element_by_class_name('address-title').text,
                'address_type':
                row.find_element_by_class_name('address-type').text,
                'working_time':
                row.find_element_by_class_name(
                    'page-atm__table_col--time').text,
                'currency':
                row.find_element_by_class_name(
                    'page-atm__table_col--currency').text,
                'address_metro':
                row.find_element_by_class_name('address-metro').text,
                'address_map': (row.find_element_by_class_name(
                    'address-map').find_element_by_link_text(
                        'Показать на карте').get_attribute("href"))
            })

        try:
            sleep(randint(1, 3))
            driver.find_element_by_class_name('pagination-arrow--next').click()
        except NoSuchElementException:
            has_next_page = False

    logging.info('{}: {} (pages: {}; atms: {})'.format(region_name, city_name,
                                                       pages, len(rows)))
    return rows
Ejemplo n.º 11
0
def getAccountsFromList(browser: webdriver, list_name):
    """
    Twitterでリストのフォロワーを抽出する
    :param browser: webdriver
    """

    # リストにアクセスする
    browser.get('https://twitter.com/' + USER_NAME + '/lists/' + list_name +
                '/members')

    accounts = browser.find_elements_by_class_name("js-actionable-user")
    account_list = []
    for account in accounts:
        name = account.get_attribute('data-screen-name')
        if name != None:
            account_list.append(name)
    return account_list
def empty_cart(driver: webdriver):
    try:
        WebDriverWait(driver, 3).until(
            expected_conditions.presence_of_element_located(
                (By.CLASS_NAME, 'footer')))
        remove_buttons = driver.find_elements_by_class_name('cart_button')
        for remove_button in remove_buttons:
            inventory_item_name = remove_button.find_element_by_xpath(
                '../../a/div').text
            remove_button.click()
            print(
                str(datetime.datetime.now()) + ' Item "' +
                inventory_item_name + '" removed from cart.')
        print(str(datetime.datetime.now()) + ' empty_cart PASSED')
    except:
        traceback.print_exc()
        raise
Ejemplo n.º 13
0
    def scrape_popularity_changes(self,
                                  driver: webdriver) -> List[ChangeStock]:
        WebDriverWait(driver, 3).until(
            EC.presence_of_element_located(
                (By.CLASS_NAME, constants.CLASS_USER_CHANGE)))

        stocks = driver.find_elements_by_class_name(
            constants.CLASS_USER_CHANGE)
        change_stocks = []
        for i in range(0, len(stocks), 5):
            rank = stocks[i].text
            symbol = stocks[i + constants.SYMBOL_DIFF].text
            change = stocks[i + constants.CHANGE_DIFF].text
            prev_day = stocks[i + constants.USERS_PREV_DAY_DIFF].text
            cur_day = stocks[i + constants.USERS_CUR_DAY_DIFF].text
            change_stock = ChangeStock(rank, symbol, change, prev_day, cur_day)
            change_stocks.append(change_stock)

        return change_stocks
Ejemplo n.º 14
0
def fill_cart(driver: webdriver):
    try:
        WebDriverWait(driver, 3).until(
            expected_conditions.presence_of_element_located(
                (By.CLASS_NAME, 'footer')))
        products = {}
        inventory_item_names = driver.find_elements_by_class_name(
            'inventory_item_name')
        for inventory_item_name in inventory_item_names:
            add_to_cart_button = inventory_item_name.find_element_by_xpath(
                '../../../div[@class="pricebar"]/button')
            products[inventory_item_name.text] = add_to_cart_button
        for product, button in products.items():
            button.click()
            print(
                str(datetime.datetime.now()) + ' "' + product +
                '" added to cart.')

        print(str(datetime.datetime.now()) + ' fill_cart PASSED')
    except:
        traceback.print_exc()
        raise
Ejemplo n.º 15
0
def sign_in(driver: webdriver) -> None:
    username = env("username")
    password = env("password")

    driver.get(URL)
    sleep(1.5)

    # go to log in by mousehunt account
    driver.find_elements_by_class_name("signInText")[0].click()

    # Enter credentials
    user_field = driver.find_elements_by_class_name("username")[3]
    user_field.send_keys(username)
    password_field = driver.find_elements_by_class_name("password")[3]
    sleep(1.5)
    password_field.send_keys(password)

    # Click Login
    driver.find_elements_by_class_name("actionButton")[1].click()
    sleep(1.5)
Ejemplo n.º 16
0
def get_vineyards(link: str, driver: webdriver, destination: str, date: date):
    driver.get(link)

    # Select category 'Crops'
    product_selector = driver.find_element_by_id('filter_3_primary')
    product_selector.click()
    product_selector.send_keys(Keys.ARROW_DOWN)
    product_selector.send_keys(Keys.ENTER)
    sleep(0.3)
    print('Loading category "Crops" successful.')

    # Select subcategory 'Fruit
    crop_selector = driver.find_element_by_id('filter_3_secondary')
    crop_selector.click()
    crop_selector.send_keys(Keys.ARROW_DOWN)
    crop_selector.send_keys(Keys.ARROW_DOWN)
    crop_selector.send_keys(Keys.ENTER)
    sleep(0.3)
    print('Loading subcategory "Fruit" successful.')

    # Select 'Grapes For Wine'
    fruit_selector = driver.find_element_by_id('filter_3_tertiary')
    fruit_selector.click()
    for i in range(19):
        sleep(0.1)
        fruit_selector.send_keys(Keys.ARROW_DOWN)
    fruit_selector.send_keys(Keys.ENTER)
    print('Loading "Grapes For Wine" successful.')

    # Load all producers
    load_button = driver.find_element_by_id('scrollDown')
    n_elements = len(driver.find_elements_by_class_name('results_list_item'))
    while True:
        print('Loading more vineyards.')
        for i in range(20):
            load_button.click()
            sleep(0.1)
        n_elements_new = len(
            driver.find_elements_by_class_name('results_list_item'))
        if n_elements < n_elements_new:
            n_elements = n_elements_new
        else:
            print('Loaded all vineyards.')
            break

    # Create .csv
    with open(destination, 'w') as output:
        writer = csv.writer(output)
        writer.writerow([
            'Name', 'Category', 'Address', 'Phone', 'Email', 'Website',
            'Short description', 'Description', 'Crops', 'Processed products',
            'Cropped_acreage', 'Total_acreage'
        ])

    # Prepare spider
    selector = Selector(text=driver.page_source)
    links = [
        'http://www.biodynamicfood.org' + link for link in selector.xpath(
            '//*[@class="results_list_item"]//a/@href').extract()
    ]

    class MySpider(Spider):
        name = 'biodynamic'
        allowed_domains = ['http://www.biodynamicfood.org/']
        start_urls = links

        def parse(self, response):
            sel = Selector(response)
            Name = sel.xpath('//h1/text()').extract_first()
            Category = sel.xpath(
                '//h2[@class="business-type"]/text()').extract_first()

            address_field_1 = sel.xpath(
                '//div[@class="member-address"]/p/text()[1]').extract_first(
                ).strip()
            address_field_2 = sel.xpath(
                '//div[@class="member-address"]/p/text()[2]').extract_first(
                ).strip()
            Address = address_field_1 + '\n' + address_field_2

            contact_info = sel.xpath(
                '//div[@class="member-address"]/p/text()').extract()
            contact_info = [line.strip() for line in contact_info]
            Phone = [
                line for line in contact_info if line.startswith('Phone: ')
            ][0]
            Phone = Phone.replace('Phone: ', '')
            Email = sel.xpath(
                '//div[@class="member-address"]//a[1]/text()').extract_first()
            Website = sel.xpath(
                '//div[@class="member-address"]//a[2]/text()').extract_first()
            Short_description = sel.xpath(
                '//p[@class="quote"]/text()').extract_first()

            profile = sel.xpath(
                '//div[@class="member-profile"]/div/p/text()').extract()
            profile = [element.strip() for element in profile]
            len_Description = max([len(element) for element in profile])
            Description = [
                element for element in profile
                if len(element) == len_Description
            ][0]
            Crops = sel.xpath(
                '//div[p/*/text()="Crops"]//li//text()').extract()
            Crops = ', '.join(Crops)
            Processed_products = sel.xpath(
                '//div[p/*/text()="Processed Product"]//li//text()').extract()
            Processed_products = ', '.join(Processed_products)

            all_text = sel.xpath('//p/text()').extract()
            all_text = [text.strip() for text in all_text]
            Acreage = [text for text in all_text if 'Acres' in text]
            try:
                Cropped_acreage = Acreage[0]
                Total_acreage = Acreage[1]

            except IndexError:
                print('Acreage not specified for one organization.')
                Cropped_acreage = ''
                Total_acreage = ''

            with open(destination, 'a', newline='') as output:
                writer = csv.writer(output)
                writer.writerow([
                    Name, Category, Address, Phone, Email, Website,
                    Short_description, Description, Crops, Processed_products,
                    Cropped_acreage, Total_acreage
                ])

    # Run spider
    process = CrawlerProcess(
        {'USER_AGENT': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)'})
    process.crawl(MySpider)
    process.start()
    process.stop()
Ejemplo n.º 17
0
def download_chapter(driver: webdriver, chapter_url: str):
    driver.get(chapter_url)  # Open chapter

    settings = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((
            By.XPATH,
            "//div[starts-with(@class, 'Navigation-module_settingsContainer_')]"
        )))  # Find settings button with wait
    settings.click()  # Open settings
    driver.find_elements_by_xpath(
        "//div[starts-with(@class, 'Modal-module_quarity_')]")[2].click(
        )  # Set image quality
    settings.click()  # Open settings again
    driver.execute_script(
        'document.querySelector("input#mode-horizontal").removeAttribute("disabled")'
    )  # Endble horizontal if it disabled
    driver.execute_script(
        'document.querySelector("input#mode-horizontal").click()'
    )  # Turn into horizontal mode
    time.sleep(3)

    title = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located(
            (By.XPATH, '/html/body/div/div[2]/div[2]/div[3]/div[1]/div[2]/a/h1'
             ))).text  # Wait for page load after refresh
    chapter = driver.find_element_by_xpath(
        '/html/body/div/div[2]/div[2]/div[3]/div[1]/div[2]/div/p').text[1:]

    title = title.replace(':', '')

    # load all images
    pages = int(
        driver.find_element_by_xpath(
            '/html/body/div/div[2]/div[2]/div[2]/div[2]/p').text.split(' / ')
        [1])
    actions = ActionChains(driver)
    while True:
        actions.send_keys(Keys.LEFT).perform()
        time.sleep(1)
        if driver.find_element_by_xpath('/html/body/div/div[2]/div[2]/div[2]/div[2]/p').text == f'{pages - 1} / {pages}' or\
                driver.find_element_by_xpath('/html/body/div/div[2]/div[2]/div[2]/div[2]/p').text == f'{pages} / {pages}':
            break
    os.makedirs(os.path.dirname(f'.//{title}//{chapter}//'), exist_ok=True)
    for page, img in enumerate(driver.find_elements_by_class_name('zao-image'),
                               start=1):
        b64 = driver.execute_script(
            '''function getBase64Image(img)
            {
                var canvas = document.createElement("canvas");
                canvas.width = img.naturalWidth;
                canvas.height = img.naturalHeight;
                var ctx = canvas.getContext("2d");
                ctx.drawImage(img, 0, 0);
                var dataURL = canvas.toDataURL();
                return dataURL.replace(/^data:image\/(png|jpg);base64,/, "");
            }
            return getBase64Image(arguments[0])
            ''', img)
        image_data = base64.b64decode(b64)
        with open(f'.//{title}//{chapter}//{page}.png', 'wb') as file:
            file.write(image_data)

    driver.close()
    def extract(self, df: pd.DataFrame, driver: webdriver):
        # df2 = pd.DataFrame(columns=['first', 'last', 'profile', 'email', 'occupation', 'company', 'phone'])
        result = []
        total_rows = len(df)
        control_count = 0

        try:
            for index, r in df.iterrows():
                if (control_count % 100 == 0) and (control_count > 0):
                    driver = self.perform_login()
                    control_count = 0

                control_count += 1
                found_email = None
                found_job = None
                found_company = None
                found_phone = None
                found_location = None

                if pd.isnull(r['profileUrl']):
                    continue

                first_name = r['firstName']
                last_name = r['lastName']
                profile_url = r['profileUrl']

                # driver.execute_script("window.open('', '_BLANK')")
                # driver.switch_to.window(driver.window_handles[1])
                driver.get(profile_url + '/detail/contact-info')
                not_found = driver.current_url == 'https://www.linkedin.com/in/unavailable/'

                # noinspection PyBroadException
                try:
                    email = driver.find_element_by_xpath(
                        "//a[contains(@href, 'mailto')]")
                    if email is not None:
                        found_email = email.text
                except:
                    found_email = None

                try:
                    occupation = driver.find_element_by_xpath(
                        "//h2[contains(@class, 'mt1 t-18 t-black t-normal break-words')]"
                    )
                    if occupation is not None:
                        found_job = occupation.text
                except:
                    found_job = None

                try:
                    company_name = driver.find_element_by_xpath(
                        "//span[contains(@class, 'text-align-left ml2 t-14 t-black t-bold full-width lt-line-clamp lt-line-clamp--multi-line ember-view')]"
                    )
                    if company_name is not None:
                        found_company = company_name.text
                except:
                    found_company = None

                try:
                    phone_number = driver.find_element_by_xpath(
                        "//li[contains(@class, 'pv-contact-info__ci-container t-14')]/span[contains(@class, 't-14 t-black t-normal')]"
                    )
                    if phone_number is not None:
                        found_phone = phone_number.text
                except:
                    found_phone = None

                try:
                    e1 = driver.find_elements_by_class_name("ph5")
                    e2 = e1[0].find_elements_by_class_name(
                        "pv-top-card--list-bullet")
                    e3 = e2[0].find_element_by_class_name("t-16")
                    found_location = e3.text
                except:
                    found_location = None

                if (found_phone is None) and (found_company is None) \
                    and (found_email is None) and (found_job is None) \
                    and (found_location is None):
                    if not not_found:
                        breakpoint()
                        driver = self.perform_login()

                result.append({
                    'first': first_name,
                    'last': last_name,
                    'profile': profile_url,
                    'location': found_location,
                    'email': found_email,
                    'company': found_company,
                    'occupation': found_job,
                    'phone': found_phone,
                })

                found_not_found = 'NOT FOUND' if not_found else 'Found'
                if not_found:
                    print(
                        f'NOT FOUND: {first_name} {last_name} - {profile_url}')
                else:
                    print(
                        f'Found and added: {first_name} {last_name} {found_email} / {found_location} ({index}/{total_rows})'
                    )
                # time.sleep(10)
        finally:
            return pd.DataFrame(result)
Ejemplo n.º 19
0
def courseDetails(driver: webdriver, url: str):
    driver.get(url)
    #time.sleep(7)
    l = WebDriverWait(driver, timeout=10).until(
        expected_conditions.presence_of_element_located(
            (By.CSS_SELECTOR, "div[jsname='rymPhb']")))
    # while not WebDriverWait(driver, timeout=10).until(expected_conditions.element_to_be_clickable(l.find_element_by_tag_name("div"))):
    # time.sleep(2)
    check_height = driver.execute_script("return document.body.scrollHeight;")
    while True:
        driver.execute_script(
            "window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(4)
        height = driver.execute_script("return document.body.scrollHeight;")
        if height == check_height:
            break
        check_height = height
    pg = BeautifulSoup(driver.page_source, "lxml")
    temp = pg.find(name="div", attrs={"jsname": "rymPhb"})
    assign, material, text = [], [], []
    for i in range(len(temp.contents)):
        if len(temp.contents[i]["class"]) > 4:
            j = temp.contents[i].contents[0].contents[0].contents
            text = j[2].contents[0].contents[1].contents[0].string.split(
                ": ")[1]
            date = j[2].contents[1].contents[1].string
            if j[0]["aria-label"][0] == "A":
                assign.append([i, text, date])
            else:
                material.append([i, text, date])
    time.sleep(3)
    total = driver.find_elements(By.CSS_SELECTOR, "div[jsname='rymPhb'] > div")
    for i in assign:
        #WebDriverWait(driver, timeout = 5).until(expected_conditions.element_to_be_clickable((By.CSS_SELECTOR, "div[jsname='rymPhb']")))
        driver.execute_script("arguments[0].click();", total[i[0]])
        #total[i[0]].click()
        time.sleep(3)
        assignment = BeautifulSoup(driver.page_source, "lxml")
        WebDriverWait(driver, timeout=6).until(
            expected_conditions.visibility_of(
                driver.find_elements_by_class_name("W4hhKd")[-1]))
        details = assignment.find_all(name="div",
                                      attrs={"class": "W4hhKd"
                                             })[-1].contents  #make DOM visible
        i.append(details[1].string)  #due date
        if i[-1] is None:
            i[-1] = "No due date"
        #i.append(details[0].contents[0].contents[0].string) #max marks
        if len(details[0].contents) == 0:
            i.append("No marks mentioned")
            i.append("No marks received")
        else:
            temp = details[0].contents[0].contents[0].contents
            if len(temp) > 1:
                i.append(temp[1].string.split()[-1])
                i.append(temp[1].string.split()[0])
            else:
                i.append(temp[0])
                i.append("No marks received")
        details = assignment.find_all(name="aside", attrs={
            "class": "asCVDb"
        })[-1].contents[0].contents[0].contents[1].contents[0]
        if details.contents[0].string[0] == 'A':
            i.append(details.contents[0].string + " (Not submitted)")
        else:
            i.append(details.contents[0].string)
        i[0] = driver.current_url
        driver.find_element_by_tag_name("nav").find_element_by_tag_name(
            "div").find_element_by_tag_name("div").find_element_by_tag_name(
                "div").find_element_by_tag_name("h1").find_element_by_tag_name(
                    "a").click()
        time.sleep(2)

    for i in material:
        total[i[0]].click()
        time.sleep(2)
        i[0] = driver.current_url
        driver.find_element_by_tag_name("nav").find_element_by_tag_name(
            "div").find_element_by_tag_name("div").find_element_by_tag_name(
                "div").find_element_by_tag_name("h1").find_element_by_tag_name(
                    "a").click()
        time.sleep(2)
    return assign, material
Ejemplo n.º 20
0
def get_related_search_words(driver: webdriver):
    related_search_words = driver.find_elements_by_class_name("nVcaUb")
    for i in range(len(related_search_words)):
        print(related_search_words[i].text)
        print(type(related_search_words[i]))