class UrlTest(unittest.TestCase): def setUp(self): self.sut = Url("test_name", 0) def test_init(self): expected = "test_name", 0 result = self.sut.url_string, self.sut.layer self.assertEquals(expected, result) def test___str__(self): expected = "test_name" result = str(self.sut) self.assertEquals(expected, result) def test___eq___positive(self): positive = Url("test_name") self.assertTrue(self.sut.__eq__(positive)) negative_url = Url("Different name") self.assertFalse(self.sut.__eq__(negative_url)) negative_non_url = 'Different class' self.assertFalse(self.sut.__eq__(negative_non_url)) def test___ne__(self): negative_non_url = 'Different class' self.assertTrue(self.sut.__ne__(negative_non_url))
def test___eq___positive(self): positive = Url("test_name") self.assertTrue(self.sut.__eq__(positive)) negative_url = Url("Different name") self.assertFalse(self.sut.__eq__(negative_url)) negative_non_url = 'Different class' self.assertFalse(self.sut.__eq__(negative_non_url))
def setUp(self): self.url = Url("http://example.com", 1) name = "name" Properties.MODULES_CONFIG_FILE = '../app/modules.conf' self.redis = MockRedis() self.elastic = MockElastic() self.sut = DataParser(self.url, self.redis, name, self.elastic)
def __parse_url(self, url): """ This method will create a new URL object from the url and add it to the self.links set. :param url: a url :return: nothing """ parsed_url = Url(parse.urljoin(self.page_url.url_string, url), self.page_url.layer + 1) self.links.add(parsed_url)
def get_queue_urls(self): """ This method will return all url's that haven't been visited in the last x days from the database :return: all url's """ conn = MySQLConnection().get_connection() cursor = conn.cursor() result = [] try: date_today = str(datetime.today() - timedelta(days=0)) query = "SELECT url FROM url WHERE last_visited <= '{0}' ORDER BY last_visited DESC" cursor.execute(query.format(date_today)) for item in cursor.fetchall(): url = item[0] x = Url(url) result.append(x) except pymysql.Error as e: code, msg = e.args print("MySQL Error [{0}]: {1}".format(str(code), msg)) finally: if conn: cursor.close() conn.close() return result
def test_url_in_urlset_true(self): url1 = Url('http://www.google.nl', 1) url2 = Url('http://www.google.nl', 1) urlset = set() urlset.add(url1) self.assertTrue(URLHelper.url_in_urlset(url2, urlset))
def test_http_error(self): Properties.SPIDER_MAX_DEPTH = 1 self.sut = Spider(Url('http://example.com/', 1), 'testSpider', UrlDAOMock(), MockRedis()) self.sut.run() self.assertGreater(len(self.sut.crawled), 0)
def setUp(self): self.sut = Spider(Url('https://www.technovium.nl', 1), 'testSpider', UrlDAOMock(), MockRedis()) Properties.SPIDER_MAX_PAGES = 1
def setUp(self): self.sut = Url("test_name", 0)
def setUp(self): Properties.BLACKLIST_FILE = '../test/test_crawler/test_blacklist_csv.csv' self.base_url = Url('http://www.technovium.nl') self.page_url = Url('http://www.technovium.nl') self.sut = LinkFinder(page_url=self.page_url)