def test_worker_is_working(self): review = ReviewFactory.create() worker = WorkerFactory.create() worker2 = WorkerFactory.create(current_url=review.domain.url) expect(worker.working).to_be_false() expect(worker2.working).to_be_true()
def test_can_get_worker_by_uuid(self): worker = WorkerFactory.create() WorkerFactory.create() loaded_worker = Worker.by_uuid(worker.uuid, self.db) expect(loaded_worker.id).to_equal(worker.id) invalid_worker = Worker.by_uuid(uuid4(), self.db) expect(invalid_worker).to_be_null()
def test_get_next_job_does_not_get_from_inactive_domains(self): WorkerFactory.create() domain = DomainFactory.create(is_active=False) PageFactory.create(domain=domain) next_job = Page.get_next_job( self.db, expiration=100, cache=self.sync_cache, lock_expiration=1 ) expect(next_job).to_be_null()
def test_worker_to_dict(self): review = ReviewFactory.create() worker = WorkerFactory.create(current_url=review.domain.url) worker_dict = worker.to_dict() expect(worker_dict['uuid']).to_equal(str(worker.uuid)) expect(worker_dict['last_ping']).to_equal(str(worker.last_ping)) expect(worker_dict['working']).to_be_true()
def test_worker_removal_after_long_time_without_ping_alive(self): date = datetime.now()-timedelta(seconds=300) worker_old = WorkerFactory.create(last_ping=date) worker_new = WorkerFactory.create() self.db.flush() yield self.http_client.fetch( self.get_url('/worker/%s/alive' % str(worker_new.uuid)), method='POST', body='' ) response = yield self.http_client.fetch( self.get_url('/workers/'), ) returned_json = loads(response.body) expect(returned_json).not_to_be_null() expect(returned_json).to_length(1) expect(returned_json[0]['uuid']).not_to_equal(str(worker_old.uuid))
def test_can_get_next_job(self): domain = DomainFactory.create() pages = [] for i in range(20): WorkerFactory.create() pages.append(PageFactory.create( domain=domain, score=float(i) )) for i in range(20): next_job = Page.get_next_job( self.db, expiration=100, cache=self.sync_cache, lock_expiration=100 ) expect(next_job).not_to_be_null() expect(next_job['page']).to_equal(str(pages[19 - i].uuid))
def test_increases_page_score_when_lambda_is_top_page(self): WorkerFactory.create() page = PageFactory.create() page2 = PageFactory.create() settings = Settings.instance(self.db) settings.lambda_score = 10000 Page.get_next_job( self.db, expiration=100, cache=self.sync_cache, lock_expiration=1 ) self.db.refresh(page) self.db.refresh(page2) expect(page.score).to_equal(5000) expect(page2.score).to_equal(5000)
def test_worker_removal_when_ping_will_die(self): worker_old = WorkerFactory.create() worker_dead = WorkerFactory.create() self.db.flush() yield self.http_client.fetch( self.get_url('/worker/%s/dead' % str(worker_dead.uuid)), method='POST', body='' ) response = yield self.http_client.fetch( self.get_url('/workers/'), ) returned_json = loads(response.body) expect(returned_json).not_to_be_null() expect(returned_json).to_length(1) expect(returned_json[0]['uuid']).not_to_equal(str(worker_dead.uuid)) expect(returned_json[0]['uuid']).to_equal(str(worker_old.uuid))
def test_workers_info(self): WorkerFactory.create(current_url='http://www.globo.com/') self.db.flush() response = yield self.http_client.fetch( self.get_url('/workers/info/'), ) expect(response.code).to_equal(200) total_workers = self.db.query(Worker).count() inactive_workers = self.db.query(Worker).filter(Worker.current_url == None).count() returned_json = loads(response.body) expect(returned_json).to_length(3) expect(returned_json['total']).not_to_be_null() expect(returned_json['active']).not_to_be_null expect(returned_json['inactive']).not_to_be_null() expect(returned_json['total']).to_equal(total_workers) expect(returned_json['active']).to_equal(total_workers - inactive_workers) expect(returned_json['inactive']).to_equal(inactive_workers)
def test_worker_complete_work(self): worker = WorkerFactory.create(current_url="http://www.globo.com/") self.db.flush() response = yield self.http_client.fetch( self.get_url('/worker/%s/complete' % str(worker.uuid)), method='POST', body='' ) worker = Worker.by_uuid(worker.uuid, self.db) expect(worker).not_to_be_null() expect(response.code).to_equal(200) expect(response.body).to_be_like('OK') expect(worker.current_url).to_be_null()
def test_worker_start_working_invalid_review(self): worker = WorkerFactory.create() self.db.flush() try: yield self.http_client.fetch( self.get_url('/worker/%s/start' % str(worker.uuid)), method='POST', body='' ) except HTTPError: err = sys.exc_info()[1] expect(err).not_to_be_null() expect(err.code).to_equal(400) expect(err.response.reason).to_be_like('Invalid URL') else: assert False, 'Should not have got this far'
def test_worker_alive_can_ping_existing_worker(self): date = datetime.now() worker = WorkerFactory.create(last_ping=date) self.db.flush() response = yield self.http_client.fetch( self.get_url('/worker/%s/alive' % str(worker.uuid)), method='POST', body='' ) worker = Worker.by_uuid(worker.uuid, self.db) expect(worker).not_to_be_null() expect(response.code).to_equal(200) expect(response.body).to_be_like(str(worker.uuid)) expect(worker.last_ping).to_be_greater_than(date)
def test_workers_list(self): worker = WorkerFactory.create(current_url='http://www.globo.com/') self.db.flush() response = yield self.http_client.fetch( self.get_url('/workers/'), ) expect(response.code).to_equal(200) workers = self.db.query(Worker).all() returned_json = loads(response.body) expect(returned_json).to_length(len(workers)) expect(returned_json[0]['uuid']).to_equal(str(worker.uuid)) expect(returned_json[0]['current_url']).to_equal('http://www.globo.com/') expect(returned_json[0]['working']).to_be_true()
def test_can_get_next_job_when_domain_limited(self): self.db.query(Domain).delete() self.db.query(Page).delete() domain_a = DomainFactory.create() domain_b = DomainFactory.create() LimiterFactory.create(url=domain_a.url, value=2) pages_a = [] pages_b = [] workers = [] for i in range(10): for j in range(2): workers.append(WorkerFactory.create()) pages_a.append(PageFactory.create(domain=domain_a, url="%s/%d.html" % (domain_a.url, i), score=i * 10)) pages_b.append(PageFactory.create(domain=domain_b, url="%s/%d.html" % (domain_b.url, i), score=i)) # first one should not be limited next_job = Page.get_next_job( self.db, expiration=100, cache=self.sync_cache, lock_expiration=1, avg_links_per_page=10 ) expect(next_job).not_to_be_null() expect(next_job['page']).to_equal(str(pages_a[-1].uuid)) workers[0].current_url = next_job['url'] self.db.flush() # second one should be limited (2 / 10 = 0.2, rounded up = 1 job at a time) next_job = Page.get_next_job( self.db, expiration=100, cache=self.sync_cache, lock_expiration=1 ) expect(next_job).not_to_be_null() expect(next_job['page']).to_equal(str(pages_b[-1].uuid))
def test_can_create_worker(self): worker = WorkerFactory.create() expect(worker.id).not_to_be_null() expect(worker.uuid).not_to_be_null()
def test_worker_current_url(self): review = ReviewFactory.create() worker = WorkerFactory.create(current_url=review.domain.url) loaded_worker = self.db.query(Worker).get(worker.id) expect(loaded_worker.current_url).to_equal(review.domain.url)
def test_worker_model_str(self): worker = WorkerFactory.create() expect(str(worker)).to_equal('Worker %s' % str(worker.uuid))