def analyze_element(self, el: WebElement):
        # term
        term_full_text = el.find_element_by_xpath(
            ".//ul[@class='price-bullet']/li").text

        try:
            term = re.search(r'\b\d+\b', term_full_text).group()
        except Exception:
            term = 1

        # price
        price = el.find_element_by_xpath(
            ".//div[@class='price-container']/h4/span").text

        # product
        product_name = el.find_element_by_xpath('.//h3').text

        # download
        # close the survey dialog box if it is open
        link = el.find_elements_by_xpath(
            './/div[@class="gridPlanLinks"]/a')[-1]
        link.click()

        return {
            'term': term,
            'price': price,
            'product_name': product_name,
        }
Esempio n. 2
0
 def __get_all_similar_question_in_row(self, row: WebElement):
     result = []
     locator = "./td[1]/div[1]//span[@draggable]"
     similar_questions = row.find_elements_by_xpath(locator)
     for question in similar_questions:
         result.append(question.text)
     return result
Esempio n. 3
0
def get_times_items(tbody: WebElement):
    # collecting applicable items
    items = tbody.find_elements_by_xpath("tr")
    # getting times from the items
    times = []
    for row in items:
        str_time = row.find_element_by_xpath("td[2]").text.upper()
        datetime_obj = datetime.strptime(str_time, TIME_FORMAT)
        times.append((row, datetime_obj, str_time))
    # returning tuple(WebElement, datetime, str_time)
    return times
Esempio n. 4
0
def find_child_hrefs(button: WebElement) -> List[str]:
    children = button.find_elements_by_xpath(".//*")
    hrefs = []
    for child in children:
        try:
            href = child.get_attribute("href")
            hrefs.append(href)
        except:
            continue
    hrefs = [href for href in hrefs if href is not None]
    return hrefs
Esempio n. 5
0
        def get_caption(node: WebElement):

            caption = ''

            children = node.find_elements_by_xpath('child::*')

            for child in children:

                txt = MediumScraper.safe_get_attribute(child, 'text', '')
                caption += child_reformat(txt)

            return caption
Esempio n. 6
0
    def _get_children(element: WebElement) -> typing.List[WebElement]:
        try:
            children = element.find_elements_by_xpath('./*')
            children_of_children = []

            for child in children:
                children_of_children += WebElementHandler._get_children(child)

            children += children_of_children
        except InvalidSelectorException:
            children = []

        return children
Esempio n. 7
0
    def _get_parents(element: WebElement) -> typing.List[WebElement]:
        try:
            parents = element.find_elements_by_xpath('..')
            parents_of_parents = []

            for parent in parents:
                parents_of_parents += WebElementHandler._get_parents(parent)

            parents += parents_of_parents
        except InvalidSelectorException:
            parents = []

        return parents
 def make_card_instance(self, html_element: WebElement, card: MagicCard,
                        store: MagicStore):
     html_instance_attributes = html_element.find_elements_by_xpath(".//td")
     matching = re.search(r'  (.*?) (.*) (.*?) unid. (.*) (.*$)',
                          html_element.text, re.S)
     quantity = int(matching.group(3))
     if quantity == 0:
         return None
     edition = html_instance_attributes[0].find_element_by_xpath(
         ".//a/img").get_attribute("title")
     language = matching.group(1)
     quality = next(filter(lambda x: x in matching.group(2),
                           self.qualities))
     price = float(matching.group(5).replace(".", "").replace(",", "."))
     foil = True if "Foil" in matching.group(2) else False
     return MagicCardInstance(price, quality, quantity, language, edition,
                              foil, card, store)
Esempio n. 9
0
    def node_find_elements_by_xpath(node: WebElement, xpath: str,
                                    raise_exc: bool = True) -> Union[None, List[WebElement]]:

        if node is None:
            return []

        try:

            return node.find_elements_by_xpath(xpath)

        except WebDriverException as exc:

            if raise_exc:

                raise exc

            else:

                return None
Esempio n. 10
0
    def analyze_element(self, el: WebElement):
        card_body_elements = el.find_elements_by_xpath(
            './/div[contains(@class, "card")]'
            '/div[contains(@class, "card-body")]/div')

        # term
        term_element = card_body_elements[1].find_element_by_xpath(
            ".//div/span")
        term = term_element.text.lstrip("Term:").rstrip("months").strip()

        # price
        price_element = card_body_elements[0].find_element_by_xpath(".//div")
        price = price_element.text.rstrip('¢')

        # product
        product_name = el.find_element_by_xpath('.//div/div/h2').text

        # download
        # close the survey dialog box if it is open
        footer_element = el.find_element_by_xpath(
            './/div[contains(@class, "card")]'
            '/div[contains(@class, "card-footer")]')
        dropdown_element = footer_element.find_element_by_xpath('.//button')
        self.wait_for()
        dropdown_element.click()

        link = footer_element.find_element_by_xpath(".//div/a")
        retries = 0
        while not link and retries < 5:
            self.wait_for()
            link = footer_element.find_element_by_xpath(".//div/a")
            retries = 0

        if link:
            link.click()
            self.wait_for()

        return {
            'term': term,
            'price': price,
            'product_name': product_name,
        }
Esempio n. 11
0
def parse_row(tr: WebElement) -> Tuple[str, str]:
    """Use for parsing trade card or lot card"""
    cells = tr.find_elements_by_xpath('./td')
    if len(cells) == 2:
        field_name = cells[0].get_attribute('innerText').replace('.',
                                                                 ' ').strip()

        inner_tables = cells[1].find_elements_by_tag_name('table')
        if inner_tables:
            '''Бывает, что ячейка содержит историю изменений в виде table '''
            field_value = inner_tables[0].find_element_by_tag_name(
                'td').get_attribute('innerText')
        else:
            field_value = cells[1].get_attribute('innerText')
    elif len(cells) == 1:
        field_name = cells[0].find_element_by_tag_name('b').get_attribute(
            'innerText').strip()
        field_value = cells[0].find_element_by_tag_name('div').get_attribute(
            'innerText')
    else:
        raise ValueError(f'Wrang amount of cells')

    return field_name, field_value
 def get_child_nodes(self, node: WebElement) -> list:
     return node.find_elements_by_xpath('./*')
def parse_experience_row(experience_row: WebElement) -> dict:
    experience = {'positions': []}

    # ONE POSITION
    try:
        experience['company'] = clean_company_name(
            experience_row.find_element_by_xpath(
                selectors['profile_company_name_with_one_position']).text)

        experience['duration_summary'] = parse_duration(experience_row)
        position = {
            'name': parse_one_position_name(experience_row),
            'location': parse_location(experience_row),
            'description': parse_description(experience_row),
            'dates': parse_dates_from_to(experience_row)
        }
        position['dates']['duration'] = experience['duration_summary']
        experience['positions'].append(position)
    except NoSuchElementException as e:
        experience['company'] = ''
        logging.debug(
            f"profile_company_name_with_one_position not found (maybe because it's many positions?) {e}"
        )
    except Exception as e:
        experience['company'] = ''
        logging.debug(f"Unknown Exception {e}")

    # MANY POSITIONS
    try:
        experience['company'] = clean_company_name(
            experience_row.find_element_by_xpath(
                selectors['profile_company_name_with_many_positions']).text)

        try:
            experience[
                'duration_summary'] = experience_row.find_element_by_xpath(
                    selectors[
                        'profile_company_summary_duration_with_many_positions']
                ).text
        except NoSuchElementException as e:
            experience['duration_summary'] = ''
            logging.debug(
                f"Can't find profile_company_summary_duration_with_many_positions {e}"
            )
            print(
                f"Can't find profile_company_summary_duration_with_many_positions"
            )
        except Exception as e:
            experience['duration_summary'] = ''
            logging.debug(f"Unknown Exception {e}")

        try:
            for role in experience_row.find_elements_by_xpath(
                    selectors['profile_experience_role_for_many_positions']):
                scroll_to_element(
                    role, 'profile_experience_role_for_many_positions role')
                position = {
                    'name': parse_many_position_name(role),
                    'description': parse_description(role),
                    'dates': parse_dates_from_to(role),
                    'location': parse_location(role)
                }
                position['dates']['duration'] = parse_duration(role)
                experience['positions'].append(position)
        except NoSuchElementException as e:
            experience['positions'].append({
                'name': '',
                'location': '',
                'description': '',
                'dates': {
                    'from': '',
                    'to': '',
                    'duration': ''
                }
            })
            logging.debug(
                f"Can't find profile_experience_role_for_many_positions {e}")
        except Exception as e:
            logging.debug(f"Unknown Exception {e}")
            experience['positions'].append({
                'name': '',
                'location': '',
                'description': '',
                'dates': {
                    'from': '',
                    'to': '',
                    'duration': ''
                }
            })

    except NoSuchElementException as e:
        logging.debug(
            f'profile_company_name_with_many_positions not found (its normal!) {e}'
        )
    except Exception as e:
        logging.debug(f"Unknown Exception {e}")

    return experience
Esempio n. 14
0
 def scrape_post(self, e: WebElement, url: str) -> dict:
     date = e.find_element_by_css_selector("span.tojvnm2t.a6sixzi8.abs2jz4q.a8s20v7p.t1p8iaqh.k5wvi7nf.q3lfd5jv.pk4s997a.bipmatt0.cebpdrjk.qowsmv63.owwhemhu.dp1hu0rb.dhp61c6y.iyyx5f41").text
     print(0, date)
     if not date:
 #except NoSuchElementException:
         date = e.find_element_by_css_selector("a.oajrlxb2.g5ia77u1.qu0x051f.esr5mh6w.e9989ue4.r7d6kgcz.rq0escxv.nhd2j8a9.nc684nl6.p7hjln8o.kvgmc6g5.cxmmr5t8.oygrvhab.hcukyx3x.jb3vyjys.rz4wbd8a.qt6c0cv9.a8nywdso.i1ao9s8h.esuyzwwr.f1sip0of.lzcic4wl.gmql0nx0.gpro0wi8.b1v8xokw").text
         print(1,date)
     locale.setlocale(locale.LC_ALL, 'nl_NL.UTF-8')
     if re.search(r'\du', date):
         date = datetime.datetime.today()
         print(2,date)
     elif re.search(r'\dm',date):
         date = datetime.datetime.today()
         print(3,date)
     elif 'Gisteren' in date:
         datum = datetime.datetime.today()
         date = datum - datetime.timedelta(days=1)
         print(4,date)
     elif re.search(r'om', date):
         m = re.match(r"(\d+ \w+) om (\d\d:\d\d)", date)
         date, time = m.groups()
         date = f"{date} 2020 {time}"
         date = datetime.datetime.strptime(date, "%d %B %Y %H:%M")
         print(5, date)
     else:
         m = re.match(r"(\d+ \w+)", date)
         date = m[1]
         date = f"{date} 2020"
         print(9,date)
         date = datetime.datetime.strptime(date, "%d %B %Y")
       #  date = datetime.datetime.strptime(date, "%d %B")
         print(6, date)
     try:
         headline = e.find_element_by_css_selector("a.oajrlxb2.g5ia77u1.qu0x051f.esr5mh6w.e9989ue4.r7d6kgcz.rq0escxv.nhd2j8a9.nc684nl6.p7hjln8o.kvgmc6g5.cxmmr5t8.oygrvhab.hcukyx3x.jb3vyjys.rz4wbd8a.qt6c0cv9.a8nywdso.i1ao9s8h.esuyzwwr.f1sip0of.lzcic4wl.oo9gr5id.gpro0wi8.lrazzd5p").text
     except NoSuchElementException:
         logging.debug(f"No headline by: {e}")
         headline = "-"
     print(headline)
     try:
         url = e.find_element_by_css_selector(".fsm > ._5pcq")
         url = fbposturl(url.get_attribute("href"))
     except NoSuchElementException:
         logging.debug(f"No url by: {e}")
         url = "-"
     print(url)
     try:
         date = e.find_element_by_css_selector("abbr._5ptz")
         date = date.get_attribute("title")
         date = datetime.strptime(date, "%d-%m-%Y %H:%M")
     except NoSuchElementException:
         logging.debug(f"No headline by: {e}")
         date = "01-01-1990 00:00"
         date = datetime.strptime(date, "%d-%m-%Y %H:%M")
     print(date)
     article = dict(title=headline, date=date, text=msg, url=url)
     try:
         msg = e.find_element_by_css_selector("div.kvgmc6g5.cxmmr5t8.oygrvhab.hcukyx3x.c1et5uql.ii04i59q").text
     except NoSuchElementException:
         logging.debug(f"No message by: {e}")
         msg = "-"
     if msg.strip() == "":
         logging.debug(f"No message by: {e}")
         msg = "-"
     #url = e.find_element_by_css_selector("a.oajrlxb2.g5ia77u1.qu0x051f.esr5mh6w.e9989ue4.r7d6kgcz.rq0escxv.nhd2j8a9.nc684nl6.p7hjln8o.kvgmc6g5.cxmmr5t8.oygrvhab.hcukyx3x.jb3vyjys.rz4wbd8a.qt6c0cv9.a8nywdso.i1ao9s8h.esuyzwwr.f1sip0of.lzcic4wl.oo9gr5id.gpro0wi8.lrazzd5p")
     #url = fbposturl(url.get_attribute("href"))
     article = dict(title=headline, date=date, text=msg, url=url, medium="dtvnieuws")
   #  print(f"artikel is {headline},{date}")
     try:
         lijst = [x.text for x in e.find_elements_by_xpath(".//div[@class='oajrlxb2 g5ia77u1 qu0x051f esr5mh6w e9989ue4 r7d6kgcz rq0escxv nhd2j8a9 nc684nl6 p7hjln8o kvgmc6g5 cxmmr5t8 oygrvhab hcukyx3x jb3vyjys rz4wbd8a qt6c0cv9 a8nywdso i1ao9s8h esuyzwwr f1sip0of lzcic4wl l9j0dhe7 abiwlrkh gpro0wi8 dwo3fsh8 ow4ym5g4 auili1gw du4w35lb gmql0nx0']")]
         for i in lijst:
             if 'opmerkingen' in i:
                 remarks = i
                 article['nremarks'] = fbnumber(remarks)
             else:
                 article['nremarks'] = 0
             if 'gedeeld' in i:
                 share = i
                 article['nshares'] = fbnumber(share)
             else:
                 article['nshares'] = 0
     except NoSuchElementException:
         logging.debug(f"No remarks by: {e}")
     try:
         share = e.find_element_by_xpath("//div[@class='bp9cbjyn m9osqain j83agx80 jq4qci2q bkfpd7mw a3bd9o3v kvgmc6g5 wkznzc2l oygrvhab dhix69tm jktsbyx5 rz4wbd8a osnr6wyh a8nywdso s1tcr66n']").text
         article["shares"] = fbnumber(share)
     except NoSuchElementException:
         logging.debug(f"No shares by: {e}")
     try:
         link = e.find_element_by_css_selector("._52c6")
         link = link.get_attribute("href")
         article["article_url"]= fburl(link)
     except NoSuchElementException:
         logging.debug(f"No link by: {e}")
     return article
Esempio n. 15
0
async def analyzecomment(comment: WebElement):
    # print(comment.text)

    files.analyzed_comments += 1

    commenttext = comment.find_element_by_xpath("div[1]")
    commenttexttext = str(commenttext.text)
    commentid = str(comment.find_element_by_xpath('..').get_attribute("id"))
    commentauthorid: str = comment.find_element_by_xpath("h3/a").get_attribute(
        "href").replace("https://mbasic.facebook.com/", "")
    if "profile.php" in commentauthorid:
        commentauthorid = commentauthorid.replace("profile.php?id=",
                                                  "").split("&")[0]
    else:
        commentauthorid = commentauthorid.split("?")[0]
    try:
        tag = commenttext.find_element_by_xpath("a")
    except NoSuchElementException:
        return

    href = tag.get_attribute("href")
    tagtext = tag.text
    if "Djy No" in tagtext or "Paolo Curetti" in tagtext:  # Ya un truc vraiment chelou avec son nom et le bot le signale en boucle du coup pour l'instant je vais juste l'ignorer
        return
    if "mbasic.facebook.com/" in href and "/groups/" not in href and "/hashtag/" not in href and tagtext not in href:
        if len(commenttexttext) < len(tagtext) + 10:
            temphistory = await files.readhistory()
            try:
                if str(commentid) in str(
                        temphistory["warnings"][commentauthorid]):
                    # print("---- Already Seen Tag: \"" + tagtext.replace("\n", "") + "\"\n" + driver.current_url)
                    return
            except KeyError:
                pass

            await switchtab(3)  # Vérification de page
            driver.get(href)
            if "profile picture" not in driver.find_element_by_css_selector(
                    "div#root").find_element_by_xpath(
                        "div/div/div[2]/div/div/div/a/img").get_attribute(
                            "alt"):
                print("---- Page Tag: \"" + tagtext.replace("\n", "") +
                      "\"\n" + driver.current_url)
                await switchtab(1)  # Post Facebook & Commentaires
                return

            await switchtab(1)  # Post Facebook & Commentaires
            answerlink = ""
            for element in comment.find_elements_by_xpath("div[3]/a"):
                if "répon" in element.text.lower():
                    answerlink = element.get_attribute("href")

            await switchtab(2)  # Réponse à un commentaire
            if answerlink != "":
                driver.get(answerlink)
                with open('messages.json', encoding="utf-8") as messages_json:
                    messages = json.load(messages_json, encoding="utf-8")
                    driver.find_element_by_css_selector(
                        "#composerInput").send_keys(
                            (messages["prefix"] +
                             messages["wildtag"][random.randint(
                                 0,
                                 len(messages["wildtag"]) - 1)] +
                             messages["suffix"]).replace("{}", commentid))
                driver.find_element_by_xpath(
                    "//input[@type='submit'][@value='Répondre']").click()
                driver.get_screenshot_as_file("screenshots/" + commentid +
                                              ".png")
            else:
                print("---- NO ANSWER BUTTON " + driver.current_url)
                return
            print("---- Potential Tag: \"" + tagtext.replace("\n", "") +
                  "\"\n" + driver.current_url)
            warning_content = {
                commentid: {
                    "date": str(date.today()),
                    "comment": commenttexttext,
                    "publication": driver.current_url
                }
            }

            await files.addtohistory("warnings", commentauthorid,
                                     warning_content)
            await files.printstats()
            await switchtab(1)  # Post Facebook & Commentaires
Esempio n. 16
0
def nf_get_all_posts_on_element(element: WebElement) -> List[WebElement]:
    return element.find_elements_by_xpath('//a[starts-with(@href, "/p/")]')