コード例 #1
0
def web_scraper(page_id):
    """This function accepts the id,checks if it is within the list of ids in the database, and
    scrapes only 10 links on that particular link page"""
    all_ids = Pages(DB.connect()).select_id()
    new_all_id = [pid[0] for pid in all_ids]

    if page_id not in new_all_id:
        raise TypeError('Id does not exist.')

    else:
        url = Pages(DB.connect()).select_url(page_id)
        DB.pages().update(True, page_id)
        value = requests.get(url)
        soup = BeautifulSoup(value.text, 'html.parser')

        list_urls = []
        for link in soup.find_all('a', href=True):
          if link['href'].startswith('https'):
            list_urls.append(link['href'])

        new_list_urls = list_urls[:10]
        DB.links().delete_by_page_id(page_id)

        for item in new_list_urls:
            Links(DB.connect()).insert(page_id, item)

        DB.pages().update(False, page_id)
コード例 #2
0
 def pages(cls):
   """
   Executes the SQL scripts for links table.
   :return
   None: Returns None.
   """
   return Pages(cls.connect())
コード例 #3
0
 def setUp(self) -> None:
     # set up the Pages class
     self.exec = Pages(DB.connect())
コード例 #4
0
 def setUp(self) -> None:
     self.exec = Pages(DB.connect())
コード例 #5
0
def task():
    return web_scraper(Pages(DB.connect()).find_url(1))
コード例 #6
0
 def pages(cls):
   # Returns a reference to the pages interface
   conn = cls.connect()
   page = Pages(conn)
   return page
コード例 #7
0
 def setUp(self):
     """Setup all the necessary class and functions"""
     self.pages = Pages()
     self.conn_server = DB.only_server()
     self.conn = self.pages.connect()
     self.cursor = self.conn.cursor()
コード例 #8
0
ファイル: __init__.py プロジェクト: Remi288/Spiderapp
 def pages(cls):
     # Returns a referslence to the pages interface
     result = cls.new_connect()
     pages = Pages(result)
     return pages
コード例 #9
0
 def setUp(self) -> None:
     self.pages = Pages(DB.new_connect())
コード例 #10
0
 def pages(cls):
     conn = cls.connect()
     return Pages(conn)