Ejemplo n.º 1
0
def get_lineup(regular_elem: webdriver, bench_elem: webdriver) -> (str, str):

    captain = None
    vice = None
    complete_lineup = []

    players = regular_elem.find_elements_by_xpath(
        './/tr[contains(@class, "player-list-item")]')
    players += bench_elem.find_elements_by_xpath(
        './/tr[contains(@class, "player-list-item")]')[:-1]
    for player in players:
        name = get_player_name(player_elem=player)
        complete_lineup.append(name)

        try:
            player.find_element_by_xpath(
                './/li[@data-original-title="Capitano"]')
            captain = name
        except NoSuchElementException:
            pass

        try:
            player.find_element_by_xpath(
                './/li[@data-original-title="Vice capitano"]')
            vice = name
        except NoSuchElementException:
            pass

    captains = f'{captain}, {vice}'.upper()
    complete_lineup = ', '.join(complete_lineup).upper()

    return captains, complete_lineup
Ejemplo n.º 2
0
def check_hmi(proj: webdriver):
    # 尝试查找页面元素并返回用于判断页面存在的信息
    hmi_alive = False
    hmi_date = '-'
    hmi_time = '-'
    check_info = '-'
    if not proj:
        # 无法获取目标页面
        check_info = 'fail to open the url'
        log.info(check_info)
        print(proj)
        return hmi_alive, hmi_date, hmi_time, check_info
    else:
        try:
            elem = WebDriverWait(proj, 10).until(
                expected_conditions.presence_of_element_located(
                    (By.XPATH, "/html/body/div")))
            if not elem:
                pass
            else:
                hmi_alive = True
                try:
                    divs = proj.find_elements_by_xpath("/html/body/div")
                    if divs:
                        div_ids = {}
                        div_scrnos = {}
                        span_ids = {}
                        span_texts = {}
                        # div_id = ''
                        s = 1
                        for i in range(len(divs)):
                            div_ids[i] = divs[i].get_attribute('id')
                            div_scrnos[i] = divs[i].get_attribute('scrno')
                            if div_scrnos[i]:
                                # div_id = div_ids[i]
                                break
                            # print(i, div_ids[i], div_scrnos[i])
                            s += 1
                        # print(div_id)
                        spans = proj.find_elements_by_xpath(
                            "/html/body/div[{}]/span".format(s))
                        for i in range(len(spans)):
                            span_ids[i] = spans[i].get_attribute('id')
                            span_texts[i] = spans[i].text
                            if 'DD' in span_ids[i]:
                                hmi_date = span_texts[i]
                            if 'TIME' in span_ids[i]:
                                hmi_time = span_texts[i]
                            # print(i, span_ids[i], span_texts[i])
                except Exception as e:
                    # 查找第一个div元素失败 = HMI未在线
                    check_info = 'date part not found'
                    # log.error(check_info)
        except Exception as e:
            # 无法获得对应HMI的网页,退出当前连接
            check_info = 'div not found'
            # log.error(check_info)
            return hmi_alive, hmi_date, hmi_time, check_info
    return hmi_alive, hmi_date, hmi_time, check_info
Ejemplo n.º 3
0
def get_match_data(brow: webdriver, match_element: webdriver) -> zip:

    scroll_to_element(brow, match_element)
    teams = match_element.find_elements_by_xpath(
        './/h4[@class="media-heading ellipsis"]')
    schemes = match_element.find_elements_by_xpath('.//h5')
    first11 = match_element.find_elements_by_xpath(
        './/table[@id="formationTable"]')
    reserves = match_element.find_elements_by_xpath(
        './/table[@id="releaseTable"]')
    points = match_element.find_elements_by_xpath(
        './/div[@class="team-main-info"]')
    time.sleep(1)

    return zip(teams, schemes, first11, reserves, points)
Ejemplo n.º 4
0
def regular_or_from_bench(player: webdriver) -> (int, int, int):
    """
	Set info about playing and substitutions for each player.

	:param player: selenium element

	:return: tuple, (int, int, int)

	"""

    in_out = player.find_elements_by_xpath('.//td//em')
    attrs = [i.get_attribute('title') for i in in_out]

    regular = 0
    going_in = 0
    going_out = 0
    if 'Entrato' not in attrs and 'Uscito' not in attrs:
        regular += 1
    elif 'Entrato' in attrs and 'Uscito' not in attrs:
        going_in += 1
    elif 'Entrato' not in attrs and 'Uscito' in attrs:
        regular += 1
        going_out += 1
    elif 'Entrato' in attrs and 'Uscito' in attrs:
        going_in += 1
        going_out += 1

    return regular, going_in, going_out
Ejemplo n.º 5
0
def scrape_classifica(brow: webdriver) -> None:
    """
	Scrape real data from website in order to check later how the algorithm is
	working.
	"""

    brow.get(f'{cfg.BASE_URL}classifica')
    time.sleep(3)

    dbf.empty_table(table='classifica')

    positions = brow.find_elements_by_xpath(
        './/table/tbody/tr[contains(@data-logo, ".png")]')

    columns = ['team', 'G', 'V', 'N', 'P', 'Gf', 'Gs', 'Dr', 'Pt', 'Tot']
    for pos in positions:
        team_data = []
        scroll_to_element(brow, pos)
        fields = pos.find_elements_by_xpath('.//td')[2:-2]

        for field in fields:
            team_data.append(field.text)

        dbf.db_insert(table='classifica', columns=columns, values=team_data)

    brow.close()
Ejemplo n.º 6
0
def find_matches(brow: webdriver) -> (list, bool):

    # To know if absolute points need to be scraped. It will be False when
    # scraping lineups of the current day, still incomplete
    day_is_closed = True

    # Find all matches
    matches = brow.find_elements_by_xpath(
        './/div[contains(@class, "match-details card calculated")]')
    if not matches:
        # If day it is not concluded it has a different attribute
        matches = brow.find_elements_by_xpath(
            './/div[contains(@class, "match-details")]')
        day_is_closed = False

    return matches, day_is_closed
def getalpha(page: webdriver):
    """
    Selection de la liste alphabétique des communes
    :param page:
    :return:
    """
    return page.find_elements_by_xpath(dbox + '/tbody/tr[1]/td[2]/p/a')
Ejemplo n.º 8
0
def get_image_links(driver: webdriver, site: str) -> Links:

    # On a side note, I am genuinely surprised how they managed to obfuscate the images on each of their sites.
    # On KissComics, they have all the links in some script located in the page, this script holds the image links
    # On KissManga, they have img tags that somehow only load the image links once the page is loaded in a browser
    # viewing the raw HTML leads to the images somehow not being there, I don't know enough WebDev to say how they do it
    image_links = []
    if site == 'comics':
        regex = re.compile('lstImages.push\\("(.*?)"')

        for a in driver.find_elements_by_tag_name('script'):
            img_set = re.findall(regex, a.get_attribute('innerHTML'))
            if not img_set == []:
                image_links.append(img_set)

    elif site == 'manga':
        elements = driver.find_elements_by_xpath(
            '//img[@onerror="onErrorImg(this)"]')

        for elem in elements:
            src = elem.get_attribute('src')
            image_links.append(src)

    print(image_links)
    return np.array(image_links).flatten().tolist()
Ejemplo n.º 9
0
def open_panels(brow: webdriver, specific_panel: str = '') -> list:

    all_panels_path = '//div[@class="item-group ng-scope"]'
    wait_visible(brow, all_panels_path)
    all_panels = brow.find_elements_by_xpath(all_panels_path)

    panel_name_path = './/div[contains(@class, "group-name")]'
    buttons = [p.find_element_by_xpath(panel_name_path) for p in all_panels]

    # When playing the bet only the right panel is opened
    if specific_panel:
        pairs = [(all_panels[x], buttons[x]) for x in range(len(buttons))
                 if buttons[x].get_attribute('innerText').strip().lower() ==
                 specific_panel]

    # while when scraping quotes all the valid panels are opened
    else:
        pairs = [(all_panels[x], buttons[x]) for x in range(len(buttons))
                 if buttons[x].get_attribute('innerText').strip().lower() in
                 cfg.PANELS_TO_USE]

    for _, b in pairs:
        scroll_to_element(brow, b)
        panel_name = b.text
        WebDriverWait(brow, cfg.WAIT).until(
            EC.element_to_be_clickable((By.LINK_TEXT, panel_name)))
        if 'active' not in b.get_attribute('class'):
            b.find_element_by_xpath('.//a').click()
            time.sleep(1)

    return [(b.get_attribute('innerText').strip().lower(), p)
            for p, b in pairs]
Ejemplo n.º 10
0
def cross_check_teams(table: webdriver, bets_db: list) -> (int, tuple):

    preds_list = table.find_elements_by_xpath('.//tr[@class="ng-scope"]')
    teams_web = []
    preds_details = []
    for pred in preds_list:
        match = pred.find_element_by_xpath('.//td[6]').text
        team1, team2 = match.strip().split(' - ')
        quote = float(pred.find_element_by_xpath('.//td[10]').text)
        result = pred.find_element_by_xpath('.//td[11]').text
        label_element = pred.find_element_by_xpath(
            './/div[contains(@class,"ng-scope")]')
        label = label_element.get_attribute('ng-switch-when')

        teams_web.append(team1)
        teams_web.append(team2)
        preds_details.append((team1, team2, quote, result, label))
    teams_web.sort()

    for bet_db_id, _ in bets_db:
        teams_db = dbf.db_select(table='predictions',
                                 columns=['team1', 'team2'],
                                 where=f'bet_id = {bet_db_id}')
        teams_db = [t for i in teams_db for t in i]
        teams_db.sort()
        if teams_web == teams_db:
            return bet_db_id, preds_details
        else:
            continue
    return 0, []
Ejemplo n.º 11
0
def click_sur_fiche_departement_annee(page: webdriver, Niveau):
    elems = page.find_elements_by_xpath("//a[@href]")
    for elem in elems:
        print(Niveau, "elem:", elem.text, elem.get_attribute("href"))
        if Annee in elem.text:
            elem.click()
            break
Ejemplo n.º 12
0
def extract_all_bets_from_container(bets_container: webdriver) -> [webdriver]:

    bets_ngclass = "{'active':selection.selected}"
    all_bets = bets_container.find_elements_by_xpath(
        f'.//div[@ng-class="{bets_ngclass}"]')

    return all_bets
Ejemplo n.º 13
0
def close_all_headers(browser: webdriver) -> webdriver:

	to_collapse_path = './/div[contains(@class, "collapse")]'
	to_collapse = browser.find_elements_by_xpath(to_collapse_path)

	for icon in to_collapse:
		sf.scroll_to_element(browser, icon)
		icon.click()
Ejemplo n.º 14
0
def waitForSearchUpdate(driver: webdriver, wait: float) -> list:
    "等待搜尋頁面更新完成"
    # 取出上一頁的最後一個店名作為識別符號(lastShop)
    # 循環等待一定秒數直到看不見該店名為止,此時應該就更新完成
    time.sleep(wait)
    tbody_elements = driver.find_elements_by_xpath(
        '//*[@id="inpage"]/div/div/div[2]/div/table/tbody/tr')
    return tbody_elements
Ejemplo n.º 15
0
def get_prize(brow: webdriver) -> float:

    prize_table = ('//div[@class="col-md-5 col-lg-5 col-xs-5 ' +
                   'pull-right pull-down"]')

    prize_el = brow.find_elements_by_xpath(prize_table + '//tr/td')[7]
    prize_value = prize_el.text[:-1].replace('.', '').replace(',', '.')
    return float(prize_value)
Ejemplo n.º 16
0
def turnHtmlintoElements(driver: webdriver, soup: BeautifulSoup, tag: str,
                         atri: str, value: str) -> []:
    return list(
        map(
            lambda j: driver.find_elements_by_xpath(j),
            list(
                map(lambda obj: xpath_soup(obj),
                    soup.find_all(tag, {atri: value})))))
Ejemplo n.º 17
0
def get_bet_status(bet: webdriver) -> str:

    text = bet.find_elements_by_xpath('.//td')[2].text
    if text == 'Vincente':
        return 'WINNING'
    elif text == 'Non Vincente':
        return 'LOSING'
    else:
        return ''
Ejemplo n.º 18
0
def get_page_review_ids(driver: webdriver):
    ids: List[str] = []
    review_count_on_page: int = len(
        driver.find_elements_by_xpath(Config.xp_reviews_list))
    for i in range(review_count_on_page):
        review_id = driver.find_element_by_xpath(
            Config.xp_review_at_index(i + 1)).get_attribute('id')
        ids.append(review_id)
    return ids
Ejemplo n.º 19
0
def fetch_image_urls(query: str,
                     max_links_to_fetch: int,
                     wd: webdriver,
                     sleep_between_interactions: int = 1):
    def scroll_to_end(wd):
        wd.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(sleep_between_interactions)

    search_url = "https://www.google.com/search?q={q}&source=lnms&tbm=isch"
    # load the page
    wd.get(search_url.format(q=query))

    image_urls = set()
    image_count = 0
    results_start = 0
    while image_count < max_links_to_fetch:
        for _ in range(10):
            scroll_to_end(wd)

        # get all image thumbnail results
        thumbnail_results = wd.find_elements_by_css_selector("img.Q4LuWd")
        number_results = len(thumbnail_results)

        for img in thumbnail_results[results_start:number_results]:

            if img.get_attribute('src') and 'http' in img.get_attribute('src'):
                image_urls.add(img.get_attribute('src'))

            if img.get_attribute('src') and 'data' in img.get_attribute('src'):
                image_urls.add(img.get_attribute('src'))

            image_count = len(image_urls)

        if len(image_urls) >= max_links_to_fetch:
            print(f"Found: {len(image_urls)} image links, done!")
            break
        else:
            print("Found:", len(image_urls),
                  "image links, looking for more ...")
            # return
            load_more_button = wd.find_element_by_css_selector(".mye4qd")
            if load_more_button:
                wd.execute_script("document.querySelector('.mye4qd').click();")
                time.sleep(3)

            # end_of_page = wd.find_element_by_xpath("//div[@class='OuJzKb Yu2Dnd']")
            end_of_page = wd.find_elements_by_xpath(
                "//*[ contains (text(), 'Looks like') ]")
            if end_of_page:
                print("end of the page")
                break

        # move the result startpoint further down
        results_start = len(thumbnail_results)

    return image_urls
Ejemplo n.º 20
0
def find_all_matches(brow: webdriver, league_name: str) -> [webdriver]:

    matches_path = './/div[@class="block-event event-description"]'
    try:
        wait_clickable(brow, matches_path)
    except TimeoutException:
        cfg.LOGGER.info(f'Nessun match trovato per {league_name}.')
        return []

    return brow.find_elements_by_xpath(matches_path)
Ejemplo n.º 21
0
def all_fields_and_bets(panel: webdriver) -> [(str, webdriver)]:

    # Select all fields we want to scrape
    fields_in_db = dbf.db_select(table='fields', columns=['name'], where='')

    fields_names = [f for i in fields_in_db for f, _ in (i.split('_'), )]
    fields_names = set(fields_names)

    all_fields_path = './/div[@class="market-info"]/div'
    all_bets_path = './/div[@class="market-selections"]'
    fields = panel.find_elements_by_xpath(all_fields_path)
    bets = panel.find_elements_by_xpath(all_bets_path)

    field_bets = []
    for field, bet_group in zip(fields, bets):
        field_name = field.get_attribute('innerText').upper().strip()
        if field_name in fields_names:
            field_bets.append((field_name, bet_group))

    return field_bets
Ejemplo n.º 22
0
 def extract_urls(self, driver: webdriver, regex: str) -> list:
     urls = []
     elems = driver.find_elements_by_xpath("//a[@href]")
     for elem in elems:
         try:
             url = elem.get_attribute("href")
             if re.search(rf"{regex}", url):
                 urls.append(url)
         except:
             pass
     return urls
Ejemplo n.º 23
0
	def scrape_components(driver: webdriver):
		df = pd.DataFrame()
		for item in COMPONENT_HEADER_DATA:
			element_lst = WebDriverWait(driver, 10).until(lambda dr: driver.find_elements_by_xpath(item['xpath']))
			if item['header'] == 'name':
				df[item['header']] = [element.get_attribute('title') for element in element_lst]
			else:
				df[item['header']] = [element.text for element in element_lst]
		time_stamp = time.time() + 25200
		df['timestamp'] = time_stamp
		return df
Ejemplo n.º 24
0
def select_playlist(driver: webdriver, title_list, artist_list, image_list):
    time.sleep(3)

    for i in range(12):
        playlists = driver.find_elements_by_xpath(
            '//*[@id="container"]/section/div/ul/li')
        playlists[i].click()
        select_song(driver, title_list, artist_list, image_list)
        time.sleep(3)

    driver.back()
Ejemplo n.º 25
0
def fetch_image_urls(query:str, max_links_to_fetch:int, wd:webdriver, sleep_between_interactions:int=1):
    def scroll_to_end(wd):
        wd.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        sleep(sleep_between_interactions)    
    
    # build the google query
    search_url = "https://www.google.com/search?safe=off&site=&tbm=isch&source=hp&q={q}&oq={q}&gs_l=img"

    # load the page
    wd.get(search_url.format(q=query))

    image_urls = set()
    image_count = 0
    results_start = 0
    while image_count < max_links_to_fetch:
        scroll_to_end(wd)

        # get all image thumbnail results
        thumbnail_results = wd.find_elements_by_xpath('//*[@id="islrg"]/div[1]/div')
        number_results = len(thumbnail_results)
        
        print(f"Found: {number_results} search results. Extracting links from {results_start}:{number_results}")
        
        for img in thumbnail_results[results_start:number_results]:
            # try to click every thumbnail such that we can get the real image behind it
            try:
                img.click()
                sleep(sleep_between_interactions)
            except Exception:
                continue

            # extract image urls    
            actual_images = wd.find_elements_by_class_name('n3VNCb')
            for actual_image in actual_images:
                if actual_image.get_attribute('src'):
                    image_urls.add(actual_image.get_attribute('src'))

            image_count = len(image_urls)

            if len(image_urls) >= max_links_to_fetch:
                print(f"Found: {len(image_urls)} image links, done!")
                break
        else:
            print("Found:", len(image_urls), "image links, looking for more ...")
            sleep(1)
            load_more_button = wd.find_element_by_class_name("mye4qd")
            if load_more_button:
                load_more_button.click()

        # move the result startpoint further down
        results_start = len(thumbnail_results)

    return image_urls
Ejemplo n.º 26
0
def select_style(driver: webdriver):
    styles = driver.find_elements_by_xpath(
        '//*[@id="container"]/aside/div/table/tbody/tr[1]/td[1]/ul/li')
    max_iter = len(styles)

    for i in range(max_iter):
        styles = driver.find_elements_by_xpath(
            '//*[@id="container"]/aside/div/table/tbody/tr[1]/td[1]/ul/li')
        style_name = str(styles[i].text)
        styles[i].find_element_by_tag_name('a').send_keys(Keys.ENTER)
        time.sleep(3)
        driver.find_element_by_xpath(
            '//*[@id="container"]/section/div/header/p[2]/a[1]').click()

        title_list = []
        artist_list = []
        image_list = []

        select_playlist(driver, title_list, artist_list, image_list)

        data = zip(title_list, artist_list, image_list)
        data_to_csv(data, style_name, "style")
Ejemplo n.º 27
0
def filter_by_color(brow: webdriver) -> list:

    table_path = './/table[@id="tabellaRisultatiTransazioni"]'
    wait_visible(brow, table_path)
    bets_list = brow.find_elements_by_xpath(table_path +
                                            '//tr[@class="ng-scope"]')

    color_path = './/td[contains(@class,"state state")]'
    filtered = []
    for bet in bets_list:
        c = bet.find_element_by_xpath(color_path).get_attribute('class')
        if 'blue' not in c:
            filtered.append(bet)

    return filtered
Ejemplo n.º 28
0
def set_time_filter(brow: webdriver) -> None:

    path = ('.//div[@id="movement-filters"]/div[@id="games-filter"]' +
            '//label[@class="radio-inline"]')
    wait_visible(brow, path)

    all_filters = brow.find_elements_by_xpath(path)

    right_filter = [
        f for f in all_filters
        if f.get_attribute('innerText').strip() == cfg.BETS_FILTER
    ][0]
    scroll_to_element(brow, right_filter)
    right_filter.click()
    time.sleep(5)
Ejemplo n.º 29
0
def wrong_day_for_lineups(brow: webdriver, day_to_scrape: int) -> bool:

    # First check if day in the webpage is the same as the day to scrape
    real_day_path = './/div[@class="filter-option-inner-inner"]'
    wait_visible(brow, cfg.WAIT, real_day_path)
    real_day = brow.find_element_by_xpath(real_day_path)
    real_day = int(real_day.text.split('°')[0])

    if day_to_scrape != real_day:
        return True

    # Then check if some lineup is missing
    hidden_path = './/div[contains(@class, "hidden-formation")]'
    missing_lineups = brow.find_elements_by_xpath(hidden_path)

    return True if missing_lineups else False
def search_hotel(driver: webdriver, placename):
    """ This City name and search it

    :param driver:
    :param placename:
    :return:
    """
    #import pdb;pdb.set_trace()
    driver.find_element_by_id(SEARCH_HOTEL_INPUT_BOX_ID).send_keys(placename)
    time.sleep(2)
    sugget_elements = driver.find_elements_by_xpath(SEARCH_PLACE_XPATH)
    for element in sugget_elements:
        if element.text == placename:
            element.click()
            break
        else:
            continue