Example #1
0
class LobbyistsIndexScraper(BaseScraper):
    """
    This scraper gets the list of lobbyist ids from the knesset lobbyists page html
    returns a list of lobbyist ids - doesn't store anything in db
    """

    def __init__(self):
        super(LobbyistsIndexScraper, self).__init__(self)
        self.source = UrlSource('http://www.knesset.gov.il/lobbyist/heb/lobbyist.aspx')
        self.storage = ListStorage()

    def _storeLobbyistIdsFromSoup(self, soup):
        elts = soup.findAll(lobbyist_id=True)
        counter = 0
        for elt in elts:
            lobbyist_id = elt.get('lobbyist_id')
            if lobbyist_id.isdigit():
                self.storage.store(lobbyist_id)
                self._getLogger().debug(lobbyist_id)
                counter = counter + 1
        self._getLogger().info('got %s lobbyists', str(counter))

    def _scrape(self):
        html = self.source.fetch()
        soup = BeautifulSoup(html)
        return self._storeLobbyistIdsFromSoup(soup)
class LobbyistsIndexScraper(BaseScraper):
    """
    This scraper gets the list of lobbyist ids from the knesset lobbyists page html
    returns a list of lobbyist ids - doesn't store anything in db
    """
    LOBBYISTS_INDEX_PAGE_URL = 'http://www.knesset.gov.il/lobbyist/heb/lobbyist.aspx'

    def __init__(self):
        super(LobbyistsIndexScraper, self).__init__(self)
        self.source = UrlSource(self.LOBBYISTS_INDEX_PAGE_URL)
        self.storage = ListStorage()

    def _storeLobbyistIdsFromSoup(self, soup):
        elts = soup.findAll(lobbyist_id=True)
        counter = 0
        for elt in elts:
            lobbyist_id = elt.get('lobbyist_id')
            if lobbyist_id.isdigit():
                self.storage.store(lobbyist_id)
                self._getLogger().debug(lobbyist_id)
                counter = counter + 1
        self._getLogger().info('got %s lobbyists', str(counter))

    def _scrape(self):
        try:
            html = self.source.fetch()
            soup = BeautifulSoup(html)
        except Exception as e:
            send_chat_notification(__file__, 'failed to fetch or parse the lobbyists index page', {'url': self.LOBBYISTS_INDEX_PAGE_URL})
            raise e
        return self._storeLobbyistIdsFromSoup(soup)
Example #3
0
class LobbyistsIndexScraper(BaseScraper):
    """
    This scraper gets the list of lobbyist ids from the knesset lobbyists page html
    returns a list of lobbyist ids - doesn't store anything in db
    """
    LOBBYISTS_INDEX_PAGE_URL = 'http://www.knesset.gov.il/lobbyist/heb/lobbyist.aspx'

    def __init__(self):
        super(LobbyistsIndexScraper, self).__init__(self)
        self.source = UrlSource(self.LOBBYISTS_INDEX_PAGE_URL)
        self.storage = ListStorage()

    def _storeLobbyistIdsFromSoup(self, soup):
        elts = soup.findAll(lobbyist_id=True)
        counter = 0
        for elt in elts:
            lobbyist_id = elt.get('lobbyist_id')
            if lobbyist_id.isdigit():
                self.storage.store(lobbyist_id)
                self._getLogger().debug(lobbyist_id)
                counter = counter + 1
        self._getLogger().info('got %s lobbyists', str(counter))

    def _scrape(self):
        try:
            html = self.source.fetch()
            soup = BeautifulSoup(html)
        except Exception as e:
            send_chat_notification(
                __file__, 'failed to fetch or parse the lobbyists index page',
                {'url': self.LOBBYISTS_INDEX_PAGE_URL})
            raise e
        return self._storeLobbyistIdsFromSoup(soup)
Example #4
0
class LobbyistsIndexScraper(BaseScraper):
    """
    This scraper gets the list of lobbyist ids from the knesset lobbyists page html
    returns a list of lobbyist ids - doesn't store anything in db
    """
    def __init__(self):
        super(LobbyistsIndexScraper, self).__init__(self)
        self.source = UrlSource(
            'http://www.knesset.gov.il/lobbyist/heb/lobbyist.aspx')
        self.storage = ListStorage()

    def _storeLobbyistIdsFromSoup(self, soup):
        elts = soup.findAll(lobbyist_id=True)
        counter = 0
        for elt in elts:
            lobbyist_id = elt.get('lobbyist_id')
            if lobbyist_id.isdigit():
                self.storage.store(lobbyist_id)
                self._getLogger().debug(lobbyist_id)
                counter = counter + 1
        self._getLogger().info('got %s lobbyists', str(counter))

    def _scrape(self):
        html = self.source.fetch()
        soup = BeautifulSoup(html)
        return self._storeLobbyistIdsFromSoup(soup)
 def __init__(self):
     super(LobbyistsIndexScraper, self).__init__(self)
     self.source = UrlSource(self.LOBBYISTS_INDEX_PAGE_URL)
     self.storage = ListStorage()
Example #6
0
 def __init__(self):
     super(LobbyistsIndexScraper, self).__init__(self)
     self.source = UrlSource('http://www.knesset.gov.il/lobbyist/heb/lobbyist.aspx')
     self.storage = ListStorage()
Example #7
0
 def __init__(self):
     super(LobbyistsIndexScraper, self).__init__(self)
     self.source = UrlSource(self.LOBBYISTS_INDEX_PAGE_URL)
     self.storage = ListStorage()
Example #8
0
 def __init__(self):
     super(LobbyistsIndexScraper, self).__init__(self)
     self.source = UrlSource(
         'http://www.knesset.gov.il/lobbyist/heb/lobbyist.aspx')
     self.storage = ListStorage()