Ejemplo n.º 1
0
 def test_crawl_request_model(self):
     """
     Test for the Crawl Request model.
     """
     user = User.objects.create_user('testCrawlRequestUser',
                                     '*****@*****.**',
                                     'testpassword')
     crawl_instance = CrawlRequest(user=user)
     crawl_instance.name = 'crawl_model_test'
     crawl_instance.type = 1
     crawl_instance.domain = 'http://abc.com'
     crawl_instance.urls = 'http://url.com'
     crawl_instance.description = 'Test Crawl Request'
     crawl_instance.docs_all = True
     crawl_instance.docs_html = False
     crawl_instance.docs_docx = False
     crawl_instance.docs_pdf = False
     crawl_instance.docs_collected = 10
     crawl_instance.status = 1
     crawl_instance.storage_location = 'http://storage.com'
     crawl_instance.crawler_manager_endpoint = 'http://end.com'
     crawl_instance.manifest = 'http://manifest.com'
     crawl_instance.num_crawlers = 1
     crawl_instance.save()
     crawl_instance.get_absolute_url()
     crawl_instances = CrawlRequest.objects.filter(name='crawl_model_test')
     self.assertEqual(1, len(crawl_instances))
     self.assertEqual("1 crawl_model_test", str(crawl_instances[0]))
Ejemplo n.º 2
0
 def test_update_job_status(self):
     """
     Test for getting the status of a job from manager and updating it in sql.
     """
     with patch('main_app.utilities.requests.get') as mock_get:
         user = User.objects.create_user('testUpdateStatus',
                                         '*****@*****.**',
                                         'testpassword')
         crawl_request = CrawlRequest(user=user)
         crawl_request.crawler_manager_endpoint = "http://abc.com"
         crawl_request.name = 'test_update_status'
         crawl_request.save()
         mock_response = mock.Mock()
         resp_string = '{"job_id":' + str(
             crawl_request.id) + ', "processed_count":10}'
         mock_response.text = resp_string
         mock_get.return_value = mock_response
         utilities.update_job_status(crawl_request)
         crawl_request2 = CrawlRequest(user=user)
         crawl_request2.crawler_manager_endpoint = "http://abc.com"
         crawl_request2.name = 'test_update_status_second'
         crawl_request2.save()
         utilities.update_job_status(crawl_request)
Ejemplo n.º 3
0
 def test_api_get_job_status(self):
     """
     Test to get status of a job using an API end-point.
     """
     user = User.objects.create_user('testStatusUser',
                                     '*****@*****.**',
                                     'testpassword')
     client = APIClient()
     crawl_request = CrawlRequest(user=user)
     crawl_request.name = 'test'
     crawl_request.urls = "http://abc.com"
     crawl_request.save()
     data = {'job_id': crawl_request.id}
     response = client.post(reverse('api_job_status'), data, format="json")
     payload = json.loads(response.content)
     self.assertEqual("test", payload["name"])
Ejemplo n.º 4
0
def api_create_crawl(request):
    """
    External api to submit a crawl request.
    """
    logger.info('In api new job')
    username = request.data['username']
    user_obj = User.objects.get(username=username)
    crawl_request = CrawlRequest(user=user_obj)
    crawl_request.name = request.data['name']
    crawl_request.domain = request.data['domain']
    crawl_request.urls = request.data['urls']
    crawl_request.save()
    logger.info('NewJob created: %s', crawl_request.id)
    logger.info('Received urls: %s', crawl_request.urls)
    launch_crawler_manager(crawl_request, crawl_request.id)
    payload = {}
    payload['jobId'] = crawl_request.id
    return Response(payload, status=status.HTTP_200_OK)