Esempio n. 1
0
def web_scraper(page_id):
    """This function accepts the id,checks if it is within the list of ids in the database, and
    scrapes only 10 links on that particular link page"""
    all_ids = Pages(DB.connect()).select_id()
    new_all_id = [pid[0] for pid in all_ids]

    if page_id not in new_all_id:
        raise TypeError('Id does not exist.')

    else:
        url = Pages(DB.connect()).select_url(page_id)
        DB.pages().update(True, page_id)
        value = requests.get(url)
        soup = BeautifulSoup(value.text, 'html.parser')

        list_urls = []
        for link in soup.find_all('a', href=True):
          if link['href'].startswith('https'):
            list_urls.append(link['href'])

        new_list_urls = list_urls[:10]
        DB.links().delete_by_page_id(page_id)

        for item in new_list_urls:
            Links(DB.connect()).insert(page_id, item)

        DB.pages().update(False, page_id)
Esempio n. 2
0
class TestDb(unittest.TestCase):
    '''class that tests db class in _init_.py'''
    def setUp(self):
        '''function that sets up for testing '''
        self.db = DB()

    def test_connect(self):
        '''function that tests the connect function'''
        connection_object = self.db.connect()
        self.assertIsNotNone(connection_object)

    def test_new_connect(self):
        '''function that tests the new_connect function'''
        connection_object = self.db.new_connect()
        self.assertIsNotNone(connection_object)

    def test_setup(self):
        '''function that tests the setup function'''
        self.assertEqual(self.db.setup(), None)
        cursor = self.db.new_connect().cursor()
        query = cursor.execute('SELECT url FROM pages WHERE id=1 ')
        self.assertEqual(query, None)

    def test_seed(self):
        '''function that tests the seed function'''
        self.db.setup()
        seed = self.db.seed()
        self.assertIsNone(seed)

    def tearDown(self):
        self.db = None
Esempio n. 3
0
class TestDatabase(TestCase):
    '''Class to test the database (db) functions'''
    def setUp(self):
        self.db = DB()

    def test_connection(self):
        '''tests that the connection function does it's work.'''
        connection = self.db.connect()
        self.assertIsNotNone(connection)

    def test_setup(self):
        '''tests that the setup function does what it was designed to do.'''
        self.db.setup()
        self.assertIsNone(self.db.setup())

    def test_seed(self):
        '''tests that the seed function does what it was designed to do.'''
        self.db.connect()
        self.db.setup()
        self.db.seed()
        self.assertIsNone(self.db.seed())

    def test_pages(self):
        '''tests that the pages function does what it was designed to do.'''
        self.db.connect()
        self.db.setup()
        self.db.seed()
        selecter = self.db.pages().select()
        self.assertIsNotNone(selecter)

    def test_links(self):
        '''tests that the links function does what it was designed to do.'''
        self.db.connect()
        self.db.setup()
        select_link = self.db.links().select()
        self.assertIsNotNone(select_link)

    def TearDown(self):
        '''the teardown function for all the tests.'''
        self.db.connect().close()
Esempio n. 4
0
 def setUp(self) -> None:
     # set up the Pages class
     self.exec = Pages(DB.connect())
Esempio n. 5
0
 def test_connect(self):
     connection_object = DB.connect()
     self.assertIsNotNone(connection_object)
Esempio n. 6
0
 def setUp(self) -> None:
     self.exec = Pages(DB.connect())
Esempio n. 7
0
# Show examples of how you would use ALL your implementations here
from src.db import DB
from src.spider import spider_scrap
from celery import Celery
from decouple import config
#
db = DB()
db.connect()
db.new_connect()
db.setup()
db.seed()
dd = DB.new_connect()
pages = DB.pages()
# pages.fetch_url(2)
print(pages.fetch_url(2))
print(pages.select())
print(pages.find(2))
# print(pages.update_id(1))
links = DB.links()
print(links.insert(1, 'www.goggle.com'))
print(links.delete(1))
print(links.select(1))
# #
# app = Celery('main', broker=config('CELERY_BROKER'), backend=config('CELERY_BACKEND'))
#
#
# @app.task
# def scrap_url():
#   return spider_scrap(1)

# spider_scrap(1)
Esempio n. 8
0
 def setUp(self) -> None:
     self.exec = Links(DB.connect())
Esempio n. 9
0
def task():
    return web_scraper(Pages(DB.connect()).find_url(1))
Esempio n. 10
0
 def setUp(self) -> None:
     # Set up the Links class
     self.exec = Links(DB.connect())
Esempio n. 11
0
 def test_db_connect(self):
     ''' Test connection to database '''
     self.assertIsNotNone(DB.connect())
Esempio n. 12
0
 def test_connect(self):
     conn = DB.connect()
     self.assertIsNotNone(conn)