def test_can_get_with_auto_start(self): otto = Octopus(concurrency=1, auto_start=True) def handle_url_response(url, response): self.response = response otto.enqueue('http://www.twitter.com', handle_url_response) otto.wait(5) expect(self.response).not_to_be_null() expect(self.response.status_code).to_equal(200)
def test_can_handle_timeouts(self): url = 'http://baidu.com' otto = Octopus(concurrency=1, request_timeout_in_seconds=0.1) def handle_url_response(url, response): self.response = response otto.enqueue(url, handle_url_response) otto.start() otto.wait(5) expect(self.response.text).to_include('Connection to baidu.com timed out') expect(self.response.error).to_include('Connection to baidu.com timed out. (connect timeout=0.1)')
def test_times_out_on_wait(self): otto = Octopus(concurrency=1) def handle_url_response(url, response): self.response = response otto.enqueue('http://www.google.com', handle_url_response) try: otto.wait(0.1) except TimeoutError: err = sys.exc_info()[1] expect(err).to_have_an_error_message_of("") else: assert False, "Should not have gotten this far"
def test_can_handle_cached_responses(self): response = Mock(status_code=200, body="whatever") url = 'http://www.google.com' otto = Octopus(concurrency=1, cache=True) otto.response_cache.put(url, response) def handle_url_response(url, response): self.response = response otto.enqueue(url, handle_url_response) expect(self.response).not_to_be_null() expect(self.response.status_code).to_equal(200) expect(self.response.body).to_equal("whatever")
def otto_cached_requests(repetitions, concurrency, urls_to_retrieve): message = "Retrieving URLs concurrently with Octopus with caching enabled..." print print("=" * len(message)) print(message) print("=" * len(message)) print otto = Octopus(concurrency=concurrency, cache=True, auto_start=True) for url in urls_to_retrieve: otto.enqueue(url, handle_url_response) start_time = time() otto.wait(0) return time() - start_time
def octopus_create_GET_request(urls): data = [] otto = Octopus(concurrency=4, auto_start=True, cache=True, expiration_in_seconds=10, request_timeout_in_seconds=8) def handle_url_response(url, response): if "Not found" == response.text: print("URL Not Found: %s" % url) else: data.append(response.text) for url in urls: otto.enqueue(url, handle_url_response) otto.wait(25) return data
def test_can_handle_cached_responses_when_not_cached(self): url = 'http://www.twitter.com' otto = Octopus(concurrency=1, cache=True) def handle_url_response(url, response): self.response = response otto.enqueue(url, handle_url_response) otto.enqueue(url, handle_url_response) otto.enqueue(url, handle_url_response) otto.enqueue(url, handle_url_response) otto.start() otto.wait(5) expect(self.response).not_to_be_null() expect(self.response.status_code).to_equal(200)
def test_can_handle_invalid_urls(self): url = 'http://kagdjdkjgka.fk' otto = Octopus(concurrency=1) def handle_url_response(url, response): self.response = response otto.enqueue(url, handle_url_response) otto.start() otto.wait(5) expect(self.response).not_to_be_null() expect(self.response.status_code).to_equal(599) expect(self.response.text).to_include("HTTPConnectionPool(host='kagdjdkjgka.fk', port=80)") expect(self.response.text).to_include('Max retries exceeded with url: /') expect(self.response.error).to_equal(self.response.text)
def request(urls): data = [] otto = Octopus( concurrency = 4, auto_start = True, cache = True, expiration_in_seconds=10 ) def handle_url_response(url, response): if "Not found" == response.text: print("URL Not Found: %s" %url) else: data.append(response.text) for url in urls: otto.enqueue(url, handle_url_response) otto.wait() json_data = json.JSONEncoder(indent = None, separators = (',', ':')).encode(data) return pprint(json_data)
def create_request(urls, method, kwargs): data = [] otto = Octopus( concurrency=1, auto_start=True, cache=False, expiration_in_seconds=20 ) def handle_url_response(url, response): if "Not found" == response.text: print ("URL Not Found: %s" % url) else: data.append(response.text) for url in urls: otto.enqueue(url, handle_url_response, method, data=kwargs, headers={ "Content-type": "application/x-www-form-urlencoded" }) otto.wait() json_data = json.JSONEncoder(indent=None, separators=(',', ': ')).encode(data) return json_data
def test_should_call_limiter_miss_twice(self): limiter = PerDomainInMemoryLimiter( {'http://g1.globo.com': 1}, {'http://globoesporte.globo.com': 1}, ) limiter.subscribe_to_lock_miss(self.handle_limiter_miss) otto = Octopus(concurrency=10, auto_start=True, limiter=limiter) otto.enqueue('http://globoesporte.globo.com/', self.handle_url_response) otto.enqueue('http://globoesporte.globo.com/futebol/times/flamengo/', self.handle_url_response) otto.enqueue('http://g1.globo.com/', self.handle_url_response) otto.enqueue('http://g1.globo.com/economia/', self.handle_url_response) otto.wait() expect(self.cache_miss).to_length(2)
def create_request(urls, buscar): data = [] otto = Octopus(concurrency=10, auto_start=True, cache=True, expiration_in_seconds=30) def handle_url_response(url, response): if "Not found" == response.text: print("URL Not Found: %s" % url) else: aux_dict = json.loads(response.text) if aux_dict["url"][-3] == '/': data.append((aux_dict[buscar], aux_dict["url"][-2])) else: data.append((aux_dict[buscar], aux_dict["url"][-3:-1])) for url in urls: otto.enqueue(url, handle_url_response) otto.wait() return data
def test_can_handle_more_urls_concurrently(self): urls = [ 'http://www.twitter.com', 'http://www.cnn.com', 'http://www.bbc.com', 'http://www.facebook.com' ] otto = Octopus(concurrency=4) def handle_url_response(url, response): self.responses[url] = response for url in urls: otto.enqueue(url, handle_url_response) otto.start() otto.wait(10) expect(self.responses).to_length(4) for url in urls: expect(self.responses).to_include(url) expect(self.responses[url].status_code).to_equal(200)
def test_should_not_get_more_than_one_url_for_same_domain_concurrently(self): limiter = PerDomainRedisLimiter( {'http://g1.globo.com': 1}, {'http://globoesporte.globo.com': 1}, redis=self.redis ) otto = Octopus(concurrency=10, auto_start=True, limiter=limiter) otto.enqueue('http://globoesporte.globo.com', self.handle_url_response) otto.enqueue('http://globoesporte.globo.com/futebol/times/flamengo/', self.handle_url_response) otto.enqueue('http://g1.globo.com', self.handle_url_response) otto.enqueue('http://g1.globo.com/economia', self.handle_url_response) otto.wait(10) expect(self.responses).to_length(4) expect(self.redis.zcard('limit-for-http://g1.globo.com')).to_equal(0) expect(self.redis.zcard('limit-for-http://globoesporte.globo.com')).to_equal(0)
#!/usr/bin/env python3 ''' octo.py sample octopus http for python3 training at www.jasaplus.com ''' from octopus import Octopus otto = Octopus(concurrency=2, auto_start=True) def handle_url_response(url, response): print(response.text) otto.enqueue( 'https://raw.githubusercontent.com/brandiqa/json-examples/master/src/google_markers.json', handle_url_response) otto.enqueue( 'https://raw.githubusercontent.com/brandiqa/json-examples/master/src/products.json', handle_url_response) otto.wait()
def test_can_enqueue_url(self): otto = Octopus() otto.enqueue('http://www.google.com', None) expect(otto.queue_size).to_equal(1)
sitemap = open(f'./sitemaps/{sitemapFile}').readlines() sitemap = [line.rstrip('\n') for line in sitemap] for num, line in enumerate(sitemap, start=1): if num == 1: print(f'Processing {len(sitemap)} requests') # If no "strategy" parameter is included, the query by default returns desktop data. url = f'https://www.googleapis.com/pagespeedonline/v5/runPagespeed?url={line}&strategy={strategy}' for cat in categories: url += f'&category={cat}' if apiKey: url += f'&key={apiKey}' url += f'&num={num}' otto.enqueue(url, handle_url_response) otto.wait(timeout=0) # waits until queue is empty or timeout is ellapsed print(f'Adding summary...') resultsFile.close() getSummary = open(filePath, 'r').readlines() getSummary = [line.rstrip('\n') for line in getSummary] accContentfulPaint = accFirstInteractive = accPerformance = accSeo = accAccessability = accBestPractices = 0 sumResults = len(getSummary) - 1 for num, line in enumerate(getSummary, start=1): if num == 1: continue line = line.split(',') accContentfulPaint += float(line[2][:-2]) accFirstInteractive += float(line[3][:-2]) accPerformance += float(line[4])