Exemple #1
0
class UrlTest(unittest.TestCase):
    def setUp(self):
        self.sut = Url("test_name", 0)

    def test_init(self):
        expected = "test_name", 0
        result = self.sut.url_string, self.sut.layer
        self.assertEquals(expected, result)

    def test___str__(self):
        expected = "test_name"
        result = str(self.sut)
        self.assertEquals(expected, result)

    def test___eq___positive(self):
        positive = Url("test_name")
        self.assertTrue(self.sut.__eq__(positive))
        negative_url = Url("Different name")
        self.assertFalse(self.sut.__eq__(negative_url))
        negative_non_url = 'Different class'
        self.assertFalse(self.sut.__eq__(negative_non_url))

    def test___ne__(self):
        negative_non_url = 'Different class'
        self.assertTrue(self.sut.__ne__(negative_non_url))
Exemple #2
0
 def test___eq___positive(self):
     positive = Url("test_name")
     self.assertTrue(self.sut.__eq__(positive))
     negative_url = Url("Different name")
     self.assertFalse(self.sut.__eq__(negative_url))
     negative_non_url = 'Different class'
     self.assertFalse(self.sut.__eq__(negative_non_url))
 def setUp(self):
     self.url = Url("http://example.com", 1)
     name = "name"
     Properties.MODULES_CONFIG_FILE = '../app/modules.conf'
     self.redis = MockRedis()
     self.elastic = MockElastic()
     self.sut = DataParser(self.url, self.redis, name, self.elastic)
Exemple #4
0
 def __parse_url(self, url):
     """
     This method will create a new URL object from the url and add it to the self.links set.
     :param url: a url
     :return: nothing
     """
     parsed_url = Url(parse.urljoin(self.page_url.url_string, url),
                      self.page_url.layer + 1)
     self.links.add(parsed_url)
Exemple #5
0
 def get_queue_urls(self):
     """
     This method will return all url's that haven't been visited in the last x days from the database
     :return: all url's
     """
     conn = MySQLConnection().get_connection()
     cursor = conn.cursor()
     result = []
     try:
         date_today = str(datetime.today() - timedelta(days=0))
         query = "SELECT url FROM url WHERE last_visited <= '{0}' ORDER BY last_visited DESC"
         cursor.execute(query.format(date_today))
         for item in cursor.fetchall():
             url = item[0]
             x = Url(url)
             result.append(x)
     except pymysql.Error as e:
         code, msg = e.args
         print("MySQL Error [{0}]: {1}".format(str(code), msg))
     finally:
         if conn:
             cursor.close()
             conn.close()
     return result
 def test_url_in_urlset_true(self):
     url1 = Url('http://www.google.nl', 1)
     url2 = Url('http://www.google.nl', 1)
     urlset = set()
     urlset.add(url1)
     self.assertTrue(URLHelper.url_in_urlset(url2, urlset))
Exemple #7
0
 def test_http_error(self):
     Properties.SPIDER_MAX_DEPTH = 1
     self.sut = Spider(Url('http://example.com/', 1), 'testSpider', UrlDAOMock(), MockRedis())
     self.sut.run()
     self.assertGreater(len(self.sut.crawled), 0)
Exemple #8
0
 def setUp(self):
     self.sut = Spider(Url('https://www.technovium.nl', 1), 'testSpider', UrlDAOMock(), MockRedis())
     Properties.SPIDER_MAX_PAGES = 1
Exemple #9
0
 def setUp(self):
     self.sut = Url("test_name", 0)
 def setUp(self):
     Properties.BLACKLIST_FILE = '../test/test_crawler/test_blacklist_csv.csv'
     self.base_url = Url('http://www.technovium.nl')
     self.page_url = Url('http://www.technovium.nl')
     self.sut = LinkFinder(page_url=self.page_url)