Ejemplo n.º 1
0
 def test_page_crawled(self):
     pool = get_pool()
     subject = RedisMetadata(pool, True)
     r1 = Request("md1", int(time()) - 10, 'https://www.knuthellan.com/', domain='knuthellan.com')
     subject.page_crawled(r1)
     connection = StrictRedis(connection_pool=pool)
     self.assertEqual(b'200', connection.hmget('md1', FIELD_STATUS_CODE)[0])
Ejemplo n.º 2
0
 def test_links_extracted(self):
     pool = get_pool()
     subject = RedisMetadata(pool, True)
     l1 = Request("l1",
                  int(time()) - 10,
                  'https://www.knuthellan.com/',
                  domain='knuthellan.com')
     l2 = Request("l2",
                  int(time()) - 10,
                  'https://www.khellan.com/',
                  domain='khellan.com')
     l3 = Request("l3",
                  int(time()) - 10,
                  'https://www.hellan.me/',
                  domain='hellan.me')
     links = [l1, l2, l3]
     subject.links_extracted(None, links)
     connection = StrictRedis(connection_pool=pool)
     self.assertEqual(b'https://www.knuthellan.com/',
                      connection.hmget('l1', FIELD_URL)[0])
     self.assertEqual(b'd_l1',
                      connection.hmget('l1', FIELD_DOMAIN_FINGERPRINT)[0])
     self.assertEqual(b'https://www.khellan.com/',
                      connection.hmget("l2", FIELD_URL)[0])
     self.assertEqual(b'd_l2',
                      connection.hmget('l2', FIELD_DOMAIN_FINGERPRINT)[0])
     self.assertEqual(b'https://www.hellan.me/',
                      connection.hmget("l3", FIELD_URL)[0])
     self.assertEqual(b'd_l3',
                      connection.hmget('l3', FIELD_DOMAIN_FINGERPRINT)[0])
Ejemplo n.º 3
0
 def test_add_seeds(self):
     pool = get_pool()
     subject = RedisMetadata(pool, True)
     r1 = Request("md1",
                  int(time()) - 10,
                  'https://www.knuthellan.com/',
                  domain='knuthellan.com')
     r2 = Request("md2",
                  int(time()) - 10,
                  'https://www.khellan.com/',
                  domain='khellan.com')
     r3 = Request("md3",
                  int(time()) - 10,
                  'https://www.hellan.me/',
                  domain='hellan.me')
     seeds = [r1, r2, r3]
     subject.add_seeds(seeds)
     connection = StrictRedis(connection_pool=pool)
     self.assertEqual(b'https://www.knuthellan.com/',
                      connection.hmget('md1', FIELD_URL)[0])
     self.assertEqual(b'd_md1',
                      connection.hmget('md1', FIELD_DOMAIN_FINGERPRINT)[0])
     self.assertEqual(b'https://www.khellan.com/',
                      connection.hmget("md2", FIELD_URL)[0])
     self.assertEqual(b'd_md2',
                      connection.hmget('md2', FIELD_DOMAIN_FINGERPRINT)[0])
     self.assertEqual(b'https://www.hellan.me/',
                      connection.hmget("md3", FIELD_URL)[0])
     self.assertEqual(b'd_md3',
                      connection.hmget('md3', FIELD_DOMAIN_FINGERPRINT)[0])
Ejemplo n.º 4
0
 def test_page_crawled(self):
     pool = get_pool()
     subject = RedisMetadata(pool, True)
     r1 = Request("md1", int(time()) - 10, 'https://www.knuthellan.com/', domain='knuthellan.com')
     subject.page_crawled(r1)
     connection = StrictRedis(connection_pool=pool)
     self.assertEqual(b'200', connection.hmget('md1', FIELD_STATUS_CODE)[0])
Ejemplo n.º 5
0
 def test_request_error(self):
     pool = get_pool()
     subject = RedisMetadata(pool, True)
     r1 = Request("md1", int(time()) - 10, 'https://www.knuthellan.com/', domain='knuthellan.com')
     subject.request_error(r1, 404)
     connection = StrictRedis(connection_pool=pool)
     self.assertEqual(b'https://www.knuthellan.com/', connection.hmget('md1', FIELD_URL)[0])
     self.assertEqual(b'd_md1', connection.hmget('md1', FIELD_DOMAIN_FINGERPRINT)[0])
     self.assertEqual(b'404', connection.hmget('md1', FIELD_ERROR)[0])
Ejemplo n.º 6
0
 def test_request_error(self):
     pool = get_pool()
     subject = RedisMetadata(pool, True)
     r1 = Request("md1", int(time()) - 10, 'https://www.knuthellan.com/', domain='knuthellan.com')
     subject.request_error(r1, 404)
     connection = StrictRedis(connection_pool=pool)
     self.assertEqual(b'https://www.knuthellan.com/', connection.hmget('md1', FIELD_URL)[0])
     self.assertEqual(b'd_md1', connection.hmget('md1', FIELD_DOMAIN_FINGERPRINT)[0])
     self.assertEqual(b'404', connection.hmget('md1', FIELD_ERROR)[0])
Ejemplo n.º 7
0
 def test_links_extracted(self):
     pool = get_pool()
     subject = RedisMetadata(pool, True)
     l1 = Request("l1", int(time()) - 10, 'https://www.knuthellan.com/', domain='knuthellan.com')
     l2 = Request("l2", int(time()) - 10, 'https://www.khellan.com/', domain='khellan.com')
     l3 = Request("l3", int(time()) - 10, 'https://www.hellan.me/', domain='hellan.me')
     links = [l1, l2, l3]
     subject.links_extracted(None, links)
     connection = StrictRedis(connection_pool=pool)
     self.assertEqual(b'https://www.knuthellan.com/', connection.hmget('l1', FIELD_URL)[0])
     self.assertEqual(b'd_l1', connection.hmget('l1', FIELD_DOMAIN_FINGERPRINT)[0])
     self.assertEqual(b'https://www.khellan.com/', connection.hmget("l2", FIELD_URL)[0])
     self.assertEqual(b'd_l2', connection.hmget('l2', FIELD_DOMAIN_FINGERPRINT)[0])
     self.assertEqual(b'https://www.hellan.me/', connection.hmget("l3", FIELD_URL)[0])
     self.assertEqual(b'd_l3', connection.hmget('l3', FIELD_DOMAIN_FINGERPRINT)[0])
Ejemplo n.º 8
0
 def test_add_seeds(self):
     pool = get_pool()
     subject = RedisMetadata(pool, True)
     r1 = Request("md1", int(time()) - 10, 'https://www.knuthellan.com/', domain='knuthellan.com')
     r2 = Request("md2", int(time()) - 10, 'https://www.khellan.com/', domain='khellan.com')
     r3 = Request("md3", int(time()) - 10, 'https://www.hellan.me/', domain='hellan.me')
     seeds = [r1, r2, r3]
     subject.add_seeds(seeds)
     connection = StrictRedis(connection_pool=pool)
     self.assertEqual(b'https://www.knuthellan.com/', connection.hmget('md1', FIELD_URL)[0])
     self.assertEqual(b'd_md1', connection.hmget('md1', FIELD_DOMAIN_FINGERPRINT)[0])
     self.assertEqual(b'https://www.khellan.com/', connection.hmget("md2", FIELD_URL)[0])
     self.assertEqual(b'd_md2', connection.hmget('md2', FIELD_DOMAIN_FINGERPRINT)[0])
     self.assertEqual(b'https://www.hellan.me/', connection.hmget("md3", FIELD_URL)[0])
     self.assertEqual(b'd_md3', connection.hmget('md3', FIELD_DOMAIN_FINGERPRINT)[0])