Python ListStorage Beispiele

Programmiersprache: Python

Namespace / Paketname: okscraper.storages

Klasse / Typ: ListStorage

Beispiele auf hotexamples.com: 8

Python ListStorage - 8 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die okscraper.storages.ListStorage, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

ListStorage(2)

store(2)

Beispiel #1

Datei anzeigen

Datei: lobbyists_index.py Projekt: JoeyHa/Open-Knesset

class LobbyistsIndexScraper(BaseScraper):
    """
    This scraper gets the list of lobbyist ids from the knesset lobbyists page html
    returns a list of lobbyist ids - doesn't store anything in db
    """

    def __init__(self):
        super(LobbyistsIndexScraper, self).__init__(self)
        self.source = UrlSource('http://www.knesset.gov.il/lobbyist/heb/lobbyist.aspx')
        self.storage = ListStorage()

    def _storeLobbyistIdsFromSoup(self, soup):
        elts = soup.findAll(lobbyist_id=True)
        counter = 0
        for elt in elts:
            lobbyist_id = elt.get('lobbyist_id')
            if lobbyist_id.isdigit():
                self.storage.store(lobbyist_id)
                self._getLogger().debug(lobbyist_id)
                counter = counter + 1
        self._getLogger().info('got %s lobbyists', str(counter))

    def _scrape(self):
        html = self.source.fetch()
        soup = BeautifulSoup(html)
        return self._storeLobbyistIdsFromSoup(soup)

Beispiel #2

Datei anzeigen

Datei: lobbyists_index.py Projekt: MeirKriheli/Open-Knesset

class LobbyistsIndexScraper(BaseScraper):
    """
    This scraper gets the list of lobbyist ids from the knesset lobbyists page html
    returns a list of lobbyist ids - doesn't store anything in db
    """
    LOBBYISTS_INDEX_PAGE_URL = 'http://www.knesset.gov.il/lobbyist/heb/lobbyist.aspx'

    def __init__(self):
        super(LobbyistsIndexScraper, self).__init__(self)
        self.source = UrlSource(self.LOBBYISTS_INDEX_PAGE_URL)
        self.storage = ListStorage()

    def _storeLobbyistIdsFromSoup(self, soup):
        elts = soup.findAll(lobbyist_id=True)
        counter = 0
        for elt in elts:
            lobbyist_id = elt.get('lobbyist_id')
            if lobbyist_id.isdigit():
                self.storage.store(lobbyist_id)
                self._getLogger().debug(lobbyist_id)
                counter = counter + 1
        self._getLogger().info('got %s lobbyists', str(counter))

    def _scrape(self):
        try:
            html = self.source.fetch()
            soup = BeautifulSoup(html)
        except Exception as e:
            send_chat_notification(__file__, 'failed to fetch or parse the lobbyists index page', {'url': self.LOBBYISTS_INDEX_PAGE_URL})
            raise e
        return self._storeLobbyistIdsFromSoup(soup)

Beispiel #3

Datei anzeigen

class LobbyistsIndexScraper(BaseScraper):
    """
    This scraper gets the list of lobbyist ids from the knesset lobbyists page html
    returns a list of lobbyist ids - doesn't store anything in db
    """
    LOBBYISTS_INDEX_PAGE_URL = 'http://www.knesset.gov.il/lobbyist/heb/lobbyist.aspx'

    def __init__(self):
        super(LobbyistsIndexScraper, self).__init__(self)
        self.source = UrlSource(self.LOBBYISTS_INDEX_PAGE_URL)
        self.storage = ListStorage()

    def _storeLobbyistIdsFromSoup(self, soup):
        elts = soup.findAll(lobbyist_id=True)
        counter = 0
        for elt in elts:
            lobbyist_id = elt.get('lobbyist_id')
            if lobbyist_id.isdigit():
                self.storage.store(lobbyist_id)
                self._getLogger().debug(lobbyist_id)
                counter = counter + 1
        self._getLogger().info('got %s lobbyists', str(counter))

    def _scrape(self):
        try:
            html = self.source.fetch()
            soup = BeautifulSoup(html)
        except Exception as e:
            send_chat_notification(
                __file__, 'failed to fetch or parse the lobbyists index page',
                {'url': self.LOBBYISTS_INDEX_PAGE_URL})
            raise e
        return self._storeLobbyistIdsFromSoup(soup)

Beispiel #4

Datei anzeigen

class LobbyistsIndexScraper(BaseScraper):
    """
    This scraper gets the list of lobbyist ids from the knesset lobbyists page html
    returns a list of lobbyist ids - doesn't store anything in db
    """
    def __init__(self):
        super(LobbyistsIndexScraper, self).__init__(self)
        self.source = UrlSource(
            'http://www.knesset.gov.il/lobbyist/heb/lobbyist.aspx')
        self.storage = ListStorage()

    def _storeLobbyistIdsFromSoup(self, soup):
        elts = soup.findAll(lobbyist_id=True)
        counter = 0
        for elt in elts:
            lobbyist_id = elt.get('lobbyist_id')
            if lobbyist_id.isdigit():
                self.storage.store(lobbyist_id)
                self._getLogger().debug(lobbyist_id)
                counter = counter + 1
        self._getLogger().info('got %s lobbyists', str(counter))

    def _scrape(self):
        html = self.source.fetch()
        soup = BeautifulSoup(html)
        return self._storeLobbyistIdsFromSoup(soup)

Beispiel #5

Datei anzeigen

Datei: lobbyists_index.py Projekt: MeirKriheli/Open-Knesset

 def __init__(self):
     super(LobbyistsIndexScraper, self).__init__(self)
     self.source = UrlSource(self.LOBBYISTS_INDEX_PAGE_URL)
     self.storage = ListStorage()

Beispiel #6

Datei anzeigen

Datei: lobbyists_index.py Projekt: JoeyHa/Open-Knesset

 def __init__(self):
     super(LobbyistsIndexScraper, self).__init__(self)
     self.source = UrlSource('http://www.knesset.gov.il/lobbyist/heb/lobbyist.aspx')
     self.storage = ListStorage()

Beispiel #7

Datei anzeigen

 def __init__(self):
     super(LobbyistsIndexScraper, self).__init__(self)
     self.source = UrlSource(self.LOBBYISTS_INDEX_PAGE_URL)
     self.storage = ListStorage()

Beispiel #8

Datei anzeigen

 def __init__(self):
     super(LobbyistsIndexScraper, self).__init__(self)
     self.source = UrlSource(
         'http://www.knesset.gov.il/lobbyist/heb/lobbyist.aspx')
     self.storage = ListStorage()