Python main Beispiele

Programmiersprache: Python

Namespace / Paketname: scrapers.scraper

Methode / Funktion: main

Beispiele auf hotexamples.com: 4

Python main - 4 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die scrapers.scraper.main, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Beispiel #1

Datei anzeigen

Datei: onionSpider.py Projekt: jzaia18/DandyHacks2018

def crawl(links, pass_args) -> List[str]:
    """
    Constructs the appropriate href and passes it to the scrapers for information
    :param links: The list of 'links' that are essentially elements with attributes according to the website
    :param pass_args: List of arguments based on the input
    :return: - Information collected by the scrapers concatenated together
    """
    used_href = {}
    headers = []
    for link in links:
        href = link.get("href")
        if not (href in used_href):
            used_href[href] = True
            headers.append(scraper.main([href] + pass_args).replace("\xa0", "") + " ")
    return headers

Beispiel #2

Datei anzeigen

Datei: wiredSpider.py Projekt: jzaia18/DandyHacks2018

def main(args) -> List[str]:
    args = setup_arguments(args)
    html_code = requests.get(args.html_link)
    plain_text = html_code.text
    soup = BeautifulSoup(plain_text, 'lxml')
    used_href = {}
    headers = []
    pass_args = []
    if args.title:
        pass_args.append("-t")
    if args.body:
        pass_args.append("-b")
    for link in soup.findAll("a"):
        href = link.get("href")
        if not (href in used_href) and "/story/" in href:
            used_href[href] = True
            href1 = "https://www.wired.com" + href
            headers.append(scraper.main([href1] + pass_args).replace("\n", "").replace("| WIRED", "").replace("\xa0", ""))
    # print(headers)
    return headers

Beispiel #3

Datei anzeigen

Datei: abcSpider.py Projekt: jzaia18/DandyHacks2018

def main(args) -> List[str]:
    args = setup_arguments(args)
    html_code = requests.get(args.html_link)
    plain_text = html_code.text
    soup = BeautifulSoup(plain_text, 'html.parser')
    used_href = {}
    headers = []
    pass_args = []
    if args.title:
        pass_args.append("-t")
    if args.body:
        pass_args.append("-b")
    for link in soup.findAll("a", {"class": "white-ln"}):
        href = link.get("href")
        if not (href in used_href) and "https://abcnews.go.com/US" in href:
            used_href[href] = True
            headers.append(
                scraper.main([href] + pass_args).replace("\n", "").replace(
                    "- ABC News", "").replace("\xa0", ""))
    # print(headers)
    return headers

Beispiel #4

Datei anzeigen

def main(args) -> List[str]:
    args = setup_arguments(args)
    html_code = requests.get(args.html_link)
    plain_text = html_code.text
    soup = BeautifulSoup(plain_text, 'html.parser')
    used_href = {}
    headers = []
    pass_args = []
    if args.title:
        pass_args.append("-t")
    if args.body:
        pass_args.append("-b")
    for link in soup.findAll("h4"):
        href = link.find("a").get("href")
        if not (href in used_href):
            used_href[href] = True
            href1 = "https://thehill.com" + href
            headers.append(
                scraper.main([href1] + pass_args).replace("\n", "").replace(
                    "| TheHill", "").replace("\xa0", ""))
    # print(headers)
    return headers