Beispiel #1
0
 def test_queue_with_delay(self):
     connection = Connection(host='hbase-docker', port=9090)
     queue = HBaseQueue(connection,
                        1,
                        b'queue',
                        use_snappy=False,
                        drop=True)
     r5 = r3.copy()
     crawl_at = int(time()) + 1000
     r5.meta[b'crawl_at'] = crawl_at
     batch = [(r5.meta[b'fingerprint'], 0.5, r5, True)]
     queue.schedule(batch)
     with mock.patch('frontera.contrib.backends.hbase.time') as mocked_time:
         mocked_time.return_value = time()
         assert queue.get_next_requests(10,
                                        0,
                                        min_requests=3,
                                        min_hosts=1,
                                        max_requests_per_host=10) == []
         mocked_time.return_value = crawl_at + 1
         assert set([
             r.url
             for r in queue.get_next_requests(10,
                                              0,
                                              min_requests=3,
                                              min_hosts=1,
                                              max_requests_per_host=10)
         ]) == set([r5.url])
Beispiel #2
0
 def test_queue(self):
     connection = Connection(host='hbase-docker', port=9090)
     queue = HBaseQueue(connection, 2, b'queue', True)
     batch = [('10', 0.5, r1, True), ('11', 0.6, r2, True),
              ('12', 0.7, r3, True)]
     queue.schedule(batch)
     assert set([r.url for r in queue.get_next_requests(10, 0, min_requests=3, min_hosts=1,
                max_requests_per_host=10)]) == set([r3.url])
     assert set([r.url for r in queue.get_next_requests(10, 1, min_requests=3, min_hosts=1,
                max_requests_per_host=10)]) == set([r1.url, r2.url])
Beispiel #3
0
 def test_queue(self):
     connection = Connection(host='hbase-docker', port=9090)
     queue = HBaseQueue(connection, 2, b'queue', True)
     batch = [('10', 0.5, r1, True), ('11', 0.6, r2, True),
              ('12', 0.7, r3, True)]
     queue.schedule(batch)
     assert set([r.url for r in queue.get_next_requests(10, 0, min_requests=3, min_hosts=1,
                max_requests_per_host=10)]) == set([r3.url])
     assert set([r.url for r in queue.get_next_requests(10, 1, min_requests=3, min_hosts=1,
                max_requests_per_host=10)]) == set([r1.url, r2.url])
Beispiel #4
0
 def test_queue_with_delay(self):
     connection = Connection(host='hbase-docker', port=9090)
     queue = HBaseQueue(connection, 1, b'queue', True)
     r5 = r3.copy()
     r5.meta[b'crawl_at'] = int(time()) + 1
     batch = [(r5.meta[b'fingerprint'], 0.5, r5, True)]
     queue.schedule(batch)
     assert queue.get_next_requests(10, 0, min_requests=3, min_hosts=1,
                max_requests_per_host=10) == []
     sleep(1.5)
     assert set([r.url for r in queue.get_next_requests(10, 0, min_requests=3, min_hosts=1,
                max_requests_per_host=10)]) == set([r5.url])
Beispiel #5
0
 def test_queue_with_delay(self):
     connection = Connection(host='hbase-docker', port=9090)
     queue = HBaseQueue(connection, 1, b'queue', use_snappy=False, drop=True)
     r5 = r3.copy()
     crawl_at = int(time()) + 1000
     r5.meta[b'crawl_at'] = crawl_at
     batch = [(r5.meta[b'fingerprint'], 0.5, r5, True)]
     queue.schedule(batch)
     with mock.patch('frontera.contrib.backends.hbase.time') as mocked_time:
         mocked_time.return_value = time()
         assert queue.get_next_requests(10, 0, min_requests=3, min_hosts=1,
                                        max_requests_per_host=10) == []
         mocked_time.return_value = crawl_at + 1
         assert set([r.url for r in queue.get_next_requests(10, 0, min_requests=3, min_hosts=1,
                    max_requests_per_host=10)]) == set([r5.url])
Beispiel #6
0
 def test_queue_with_delay(self):
     connection = Connection(host='hbase-docker', port=9090)
     queue = HBaseQueue(connection, 1, b'queue', True)
     r5 = r3.copy()
     r5.meta[b'crawl_at'] = int(time()) + 1
     batch = [(r5.meta[b'fingerprint'], 0.5, r5, True)]
     queue.schedule(batch)
     assert queue.get_next_requests(10,
                                    0,
                                    min_requests=3,
                                    min_hosts=1,
                                    max_requests_per_host=10) == []
     sleep(1.5)
     assert set([
         r.url for r in queue.get_next_requests(
             10, 0, min_requests=3, min_hosts=1, max_requests_per_host=10)
     ]) == set([r5.url])
Beispiel #7
0
 def test_queue_with_post_request(self):
     connection = Connection(host='hbase-docker', port=9090)
     queue = HBaseQueue(connection,
                        1,
                        b'queue',
                        drop=True,
                        use_snappy=False)
     batch = [('10', 0.5, r1, True)]
     queue.schedule(batch)
     requests = queue.get_next_requests(10,
                                        0,
                                        min_requests=3,
                                        min_hosts=1,
                                        max_requests_per_host=10)
     self.assertEqual(b'POST', requests[0].method)
     self.assertEqual(data, requests[0].body)