Python NullSource Beispiele

Programmiersprache: Python

Namespace / Paketname: spatula

Klasse / Typ: NullSource

Beispiele auf hotexamples.com: 16

Python NullSource - 16 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die spatula.NullSource, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

NullSource(16)

Häufig verwendete Methoden

NullSource (16)

Beispiel #1

Datei anzeigen

class FirstPage(Page):
    source = NullSource()

    def process_page(self):
        yield SecondPage({"first": 1})
        yield SecondPage({"first": 2})
        yield SecondPage({"first": 3})

Beispiel #2

Datei anzeigen

    class SkipOddList(ListPage):
        source = NullSource()

        def process_page(self):
            yield from self._process_or_skip_loop([1, 2, 3, 4, 5])

        def process_item(self, item):
            return SkipOddDetail(item, source=NullSource())

Beispiel #3

Datei anzeigen

    class SingleReturnPaginatedPage(Page):
        source = NullSource()

        def process_page(self):
            return {"dummy": "value"}

        def get_next_source(self):
            # a hack to fake a second identical page
            if isinstance(self.source, NullSource):
                return "https://httpbin.org/get"

Beispiel #4

Datei anzeigen

class ExampleListPage(ListPage):
    # need this here to test that default source is used
    source = NullSource()

    def process_page(self):
        yield {"val": "1"}
        yield {"val": "2"}
        yield {"val": "3"}
        yield {"val": "4"}
        yield {"val": "5"}

Beispiel #5

Datei anzeigen

Datei: scrape_ultrasignup.py Projekt: trailhawks/trailhawks.com

class ResultJsonListPage(JsonListPage):
    """
    Parse each row of our JSON results file supplemented with additional page
    information so that we know which race we have parsed.
    """

    example_source = "https://ultrasignup.com/service/events.svc/results/63105/1/json"
    source = NullSource()

    def process_item(self, item):
        return dict(**self.input, **item)

Beispiel #6

Datei anzeigen

Datei: ne.py Projekt: paulschreiber/people

class LegPageGenerator(ListPage):
    source = NullSource()
    """
    NE is an interesting test case for Spatula, since there are individual senator pages
    but no real index that's useful at all.  Right now this is using a dummy source page
    to spawn the 49 subpage scrapers.
    """
    def process_page(self):
        for n in range(1, 50):
            yield LegPage(
                source=f"http://news.legislature.ne.gov/dist{n:02d}/")

Beispiel #7

Datei anzeigen

    class SkipOddPage(ListPage):
        source = NullSource()

        def process_page(self):
            yield from self._process_or_skip_loop([1, 2, 3, 4, 5])

        def process_item(self, item):
            if item % 2:
                raise SkipItem(f"{item} is odd!")
            else:
                return item

Beispiel #8

Datei anzeigen

class ExamplePaginatedPage(Page):
    source = NullSource()
    another_page = True

    def process_page(self):
        yield {"val": "a man"}
        yield {"val": "a plan"}
        yield {"val": "panama"}

    def get_next_source(self):
        # a hack to fake a second identical page
        if isinstance(self.source, NullSource):
            return "https://httpbin.org/get"

Beispiel #9

Datei anzeigen

Datei: scrape_ultrasignup.py Projekt: trailhawks/trailhawks.com

class RaceListFromDjango(Page):
    source = NullSource()

    def process_error_response(self, exception):
        self.logger.warning(exception)

    def process_page(self):
        races = Race.objects.exclude(ultrasignup_id=None)
        for race in races:
            source = (
                f"https://ultrasignup.com/results_event.aspx?did={race.ultrasignup_id}"
            )
            yield RaceResultListPage(
                dict(
                    did=race.ultrasignup_id,
                    race_url=f"https://trailhawks.com{race.get_absolute_url()}",
                    year=race.start_datetime.year,
                ),
                source=source,
            )

Beispiel #10

Datei anzeigen

Datei: scrape_ultrasignup.py Projekt: trailhawks/trailhawks.com

class RaceResultListPage(HtmlListPage):
    """
    Every race may have zero or more distances which have their own unique`did`
    race number.
    """

    selector = XPath(
        "//a[@class='event_link' or @class='event_selected_link']",
        min_items=None)
    source = NullSource()

    def process_error_response(self, exception):
        self.logger.warning(exception)

    def process_item(self, item):
        href = XPath("@href").match_one(item)
        if not href.startswith("http"):
            href = f"https://ultrasignup.com{href}"
        race_id = href.split("=")[-1]
        return RaceResultDetail(dict(race_id=race_id,
                                     race_results_url=href,
                                     **self.input),
                                source=href)

Beispiel #11

Datei anzeigen

Datei: scrape_ultrasignup.py Projekt: trailhawks/trailhawks.com

class RaceResultDetail(HtmlPage):
    """
    Process the main race information including individual information about the event.
    """

    example_source = "https://ultrasignup.com/results_event.aspx?did=63105"
    source = NullSource()

    def process_error_response(self, exception):
        self.logger.warning(exception)

    def process_page(self):
        try:
            cancellation = XPath(
                "//span[contains(@class,'cancellation_text')]").match_one(
                    self.root)
            cancellation = True
        except SelectorError:
            cancellation = False

        try:
            did = (XPath("//a[@class='event_selected_link']").match_one(
                self.root).get("href").split("=")[-1])
        except SelectorError:
            did = ""

        try:
            distance = (XPath("//a[@class='event_selected_link']").match_one(
                self.root).text)
        except SelectorError:
            distance = ""

        # try:
        #     distance_results = XPath(
        #         "//a[@class='event_link' or @class='event_selected_link']"
        #     ).match(self.root)
        #     distance_results = {
        #         item.text: item.get("href") for item in distance_results
        #     }
        # except SelectorError:
        #     distance_results = None

        try:
            event_date = XPath("//span[@class='event-date']").match_one(
                self.root).text
        except SelectorError:
            event_date = ""

        try:
            virtual = XPath(
                "//span[contains(@class,'virtual_text')]").match_one(self.root)
            virtual = True
        except SelectorError:
            virtual = False

        title = XPath("//h1").match_one(self.root)
        website = XPath("//a[@class='websiteitem']").match_one(
            self.root).get("href")

        return ResultJsonListPage(
            dict(
                cancellation=cancellation,
                date=event_date,
                distance=distance,
                title=title.text,
                virtual=virtual,
                website=website,
                **self.input,
            ),
            source=
            f"https://ultrasignup.com/service/events.svc/results/{did}/1/json",
        )

Beispiel #12

Datei anzeigen

 def process_item(self, item):
     return SkipOddDetail(item, source=NullSource())

Beispiel #13

Datei anzeigen

class SecondPage(Page):
    source = NullSource()

    def process_page(self):
        return {**self.input, "second": "appended"}

Beispiel #14

Datei anzeigen

class SimpleInputPage(Page):
    source = NullSource()
    input_type = Input

    def process_page(self):
        return {"name": self.input.name, "number": self.input.number}

Beispiel #15

Datei anzeigen

class ExamplePage(Page):
    # need this here to test example_sources are picked up
    example_source = NullSource()

    def process_page(self):
        return {"source": str(self.source)}

Beispiel #16

Datei anzeigen

class Subpage(Page):
    source = NullSource()

    def process_page(self):
        return self.input