Example #1
0
    def test_worker_is_working(self):
        review = ReviewFactory.create()
        worker = WorkerFactory.create()
        worker2 = WorkerFactory.create(current_url=review.domain.url)

        expect(worker.working).to_be_false()
        expect(worker2.working).to_be_true()
Example #2
0
    def test_can_get_worker_by_uuid(self):
        worker = WorkerFactory.create()
        WorkerFactory.create()

        loaded_worker = Worker.by_uuid(worker.uuid, self.db)
        expect(loaded_worker.id).to_equal(worker.id)

        invalid_worker = Worker.by_uuid(uuid4(), self.db)
        expect(invalid_worker).to_be_null()
Example #3
0
    def test_get_next_job_does_not_get_from_inactive_domains(self):
        WorkerFactory.create()
        domain = DomainFactory.create(is_active=False)
        PageFactory.create(domain=domain)

        next_job = Page.get_next_job(
            self.db,
            expiration=100,
            cache=self.sync_cache,
            lock_expiration=1
        )

        expect(next_job).to_be_null()
Example #4
0
    def test_worker_to_dict(self):
        review = ReviewFactory.create()
        worker = WorkerFactory.create(current_url=review.domain.url)

        worker_dict = worker.to_dict()

        expect(worker_dict['uuid']).to_equal(str(worker.uuid))
        expect(worker_dict['last_ping']).to_equal(str(worker.last_ping))
        expect(worker_dict['working']).to_be_true()
Example #5
0
    def test_worker_removal_after_long_time_without_ping_alive(self):
        date = datetime.now()-timedelta(seconds=300)
        worker_old = WorkerFactory.create(last_ping=date)
        worker_new = WorkerFactory.create()
        self.db.flush()

        yield self.http_client.fetch(
            self.get_url('/worker/%s/alive' % str(worker_new.uuid)),
            method='POST',
            body=''
        )

        response = yield self.http_client.fetch(
            self.get_url('/workers/'),
        )

        returned_json = loads(response.body)
        expect(returned_json).not_to_be_null()
        expect(returned_json).to_length(1)
        expect(returned_json[0]['uuid']).not_to_equal(str(worker_old.uuid))
Example #6
0
    def test_can_get_next_job(self):
        domain = DomainFactory.create()
        pages = []
        for i in range(20):
            WorkerFactory.create()
            pages.append(PageFactory.create(
                domain=domain,
                score=float(i)
            ))

        for i in range(20):
            next_job = Page.get_next_job(
                self.db,
                expiration=100,
                cache=self.sync_cache,
                lock_expiration=100
            )

            expect(next_job).not_to_be_null()
            expect(next_job['page']).to_equal(str(pages[19 - i].uuid))
Example #7
0
    def test_increases_page_score_when_lambda_is_top_page(self):
        WorkerFactory.create()
        page = PageFactory.create()
        page2 = PageFactory.create()

        settings = Settings.instance(self.db)
        settings.lambda_score = 10000

        Page.get_next_job(
            self.db,
            expiration=100,
            cache=self.sync_cache,
            lock_expiration=1
        )

        self.db.refresh(page)
        self.db.refresh(page2)

        expect(page.score).to_equal(5000)
        expect(page2.score).to_equal(5000)
Example #8
0
    def test_worker_removal_when_ping_will_die(self):
        worker_old = WorkerFactory.create()
        worker_dead = WorkerFactory.create()
        self.db.flush()

        yield self.http_client.fetch(
            self.get_url('/worker/%s/dead' % str(worker_dead.uuid)),
            method='POST',
            body=''
        )

        response = yield self.http_client.fetch(
            self.get_url('/workers/'),
        )

        returned_json = loads(response.body)
        expect(returned_json).not_to_be_null()
        expect(returned_json).to_length(1)
        expect(returned_json[0]['uuid']).not_to_equal(str(worker_dead.uuid))
        expect(returned_json[0]['uuid']).to_equal(str(worker_old.uuid))
Example #9
0
    def test_workers_info(self):
        WorkerFactory.create(current_url='http://www.globo.com/')
        self.db.flush()

        response = yield self.http_client.fetch(
            self.get_url('/workers/info/'),
        )

        expect(response.code).to_equal(200)

        total_workers = self.db.query(Worker).count()
        inactive_workers = self.db.query(Worker).filter(Worker.current_url == None).count()

        returned_json = loads(response.body)
        expect(returned_json).to_length(3)
        expect(returned_json['total']).not_to_be_null()
        expect(returned_json['active']).not_to_be_null
        expect(returned_json['inactive']).not_to_be_null()

        expect(returned_json['total']).to_equal(total_workers)
        expect(returned_json['active']).to_equal(total_workers - inactive_workers)
        expect(returned_json['inactive']).to_equal(inactive_workers)
    def test_worker_complete_work(self):
        worker = WorkerFactory.create(current_url="http://www.globo.com/")
        self.db.flush()

        response = yield self.http_client.fetch(
            self.get_url('/worker/%s/complete' % str(worker.uuid)),
            method='POST',
            body=''
        )

        worker = Worker.by_uuid(worker.uuid, self.db)

        expect(worker).not_to_be_null()
        expect(response.code).to_equal(200)
        expect(response.body).to_be_like('OK')
        expect(worker.current_url).to_be_null()
    def test_worker_start_working_invalid_review(self):
        worker = WorkerFactory.create()
        self.db.flush()

        try:
            yield self.http_client.fetch(
                self.get_url('/worker/%s/start' % str(worker.uuid)),
                method='POST',
                body=''
            )
        except HTTPError:
            err = sys.exc_info()[1]
            expect(err).not_to_be_null()
            expect(err.code).to_equal(400)
            expect(err.response.reason).to_be_like('Invalid URL')
        else:
            assert False, 'Should not have got this far'
Example #12
0
    def test_worker_alive_can_ping_existing_worker(self):
        date = datetime.now()

        worker = WorkerFactory.create(last_ping=date)
        self.db.flush()

        response = yield self.http_client.fetch(
            self.get_url('/worker/%s/alive' % str(worker.uuid)),
            method='POST',
            body=''
        )

        worker = Worker.by_uuid(worker.uuid, self.db)

        expect(worker).not_to_be_null()
        expect(response.code).to_equal(200)
        expect(response.body).to_be_like(str(worker.uuid))
        expect(worker.last_ping).to_be_greater_than(date)
Example #13
0
    def test_workers_list(self):
        worker = WorkerFactory.create(current_url='http://www.globo.com/')
        self.db.flush()

        response = yield self.http_client.fetch(
            self.get_url('/workers/'),
        )

        expect(response.code).to_equal(200)

        workers = self.db.query(Worker).all()

        returned_json = loads(response.body)
        expect(returned_json).to_length(len(workers))

        expect(returned_json[0]['uuid']).to_equal(str(worker.uuid))
        expect(returned_json[0]['current_url']).to_equal('http://www.globo.com/')
        expect(returned_json[0]['working']).to_be_true()
Example #14
0
    def test_can_get_next_job_when_domain_limited(self):
        self.db.query(Domain).delete()
        self.db.query(Page).delete()

        domain_a = DomainFactory.create()
        domain_b = DomainFactory.create()

        LimiterFactory.create(url=domain_a.url, value=2)

        pages_a = []
        pages_b = []
        workers = []
        for i in range(10):
            for j in range(2):
                workers.append(WorkerFactory.create())

            pages_a.append(PageFactory.create(domain=domain_a, url="%s/%d.html" % (domain_a.url, i), score=i * 10))
            pages_b.append(PageFactory.create(domain=domain_b, url="%s/%d.html" % (domain_b.url, i), score=i))

        # first one should not be limited
        next_job = Page.get_next_job(
            self.db,
            expiration=100,
            cache=self.sync_cache,
            lock_expiration=1,
            avg_links_per_page=10
        )

        expect(next_job).not_to_be_null()
        expect(next_job['page']).to_equal(str(pages_a[-1].uuid))
        workers[0].current_url = next_job['url']
        self.db.flush()

        # second one should be limited (2 / 10 = 0.2, rounded up = 1 job at a time)
        next_job = Page.get_next_job(
            self.db,
            expiration=100,
            cache=self.sync_cache,
            lock_expiration=1
        )

        expect(next_job).not_to_be_null()
        expect(next_job['page']).to_equal(str(pages_b[-1].uuid))
Example #15
0
    def test_can_create_worker(self):
        worker = WorkerFactory.create()

        expect(worker.id).not_to_be_null()
        expect(worker.uuid).not_to_be_null()
Example #16
0
    def test_worker_current_url(self):
        review = ReviewFactory.create()
        worker = WorkerFactory.create(current_url=review.domain.url)

        loaded_worker = self.db.query(Worker).get(worker.id)
        expect(loaded_worker.current_url).to_equal(review.domain.url)
Example #17
0
    def test_worker_model_str(self):
        worker = WorkerFactory.create()

        expect(str(worker)).to_equal('Worker %s' % str(worker.uuid))