Ejemplo n.º 1
0
 def test_api_crawl_contents(self):
     """
     Test to get crawl contents using an API end-point.
     """
     with patch('api_app.views.get_google_cloud_manifest_contents'
                ) as mock_cloud:
         mock_cloud.return_value = "Test"
         user = User.objects.create_user('testUser2', '*****@*****.**',
                                         'testpassword')
         crawl_request = CrawlRequest(user=user)
         crawl_request.storage_location = "abc/def"
         crawl_request.save()
         id = crawl_request.id
         client = APIClient()
         response_auth = client.post(reverse('authenticate_user'), {
             'username': '******',
             'password': '******'
         },
                                     format="json")
         access_token = response_auth.data['token']
         self.assertNotEqual('', access_token)
         client.credentials(HTTP_AUTHORIZATION='Bearer ' + access_token)
         params = {'JOB_ID': id, 'complete_crawl': 0}
         response_get = client.get(reverse('api_crawl_contents'), params)
         self.assertNotEqual('', response_get.data["crawl_contents"])
Ejemplo n.º 2
0
    def test_crawl_complete(self):
        """
        Test for crawl complete api.
        """
        with patch('main_app.views.requests.post') as mock_post:
            user = User.objects.create_user('testUser4', '*****@*****.**',
                                            'testpassword')
            client = APIClient()
            response_auth = client.post(reverse('authenticate_user'), {
                'username': '******',
                'password': '******'
            },
                                        format="json")
            access_token = response_auth.data['token']
            self.assertNotEqual('', access_token)
            client.credentials(HTTP_AUTHORIZATION='Bearer ' + access_token)

            crawl_request = CrawlRequest(user=user)
            crawl_request.urls = "http://abc.com"
            crawl_request.save()
            User.objects.create_user('admin' + str(crawl_request.id),
                                     '*****@*****.**', 'testpassword')
            data = {
                'job_id': crawl_request.id,
                'manifest': 'http://abc.com',
                'csv': 'http://def.com',
                'resources_count': 20,
                'uploaded_pages': 20,
                'time_taken': 150
            }
            response = client.post(reverse('api_complete_crawl'),
                                   data,
                                   format="json")
            payload = json.loads(response.content)
            self.assertEqual("done", payload["CrawlComplete"])
Ejemplo n.º 3
0
    def test_register_crawler_manager(self):
        """
        Test to check registration of crawler manager.
        """
        user = User.objects.create_user('testUser3', '*****@*****.**',
                                        'testpassword')
        client = APIClient()
        response_auth = client.post(reverse('authenticate_user'), {
            'username': '******',
            'password': '******'
        },
                                    format="json")
        access_token = response_auth.data['token']
        self.assertNotEqual('', access_token)
        client.credentials(HTTP_AUTHORIZATION='Bearer ' + access_token)

        crawl_request = CrawlRequest(user=user)
        crawl_request.urls = "http://abc.com"
        crawl_request.save()
        data = {'job_id': crawl_request.id, 'endpoint': 'http://def.com'}
        response = client.post(reverse('api_register_crawler_manager'),
                               data,
                               format="json")
        payload = json.loads(response.content)
        self.assertEqual("http://abc.com", payload["urls"][0])
Ejemplo n.º 4
0
 def test_job_details(self):
     """
     Test to get the job details.
     """
     user = User.objects.create_user('testJobDetails',
                                     '*****@*****.**',
                                     'testpassword')
     crawl_request = CrawlRequest(user=user)
     crawl_request.urls = "http://abc.com"
     crawl_request.save()
     client = Client()
     client.login(username='******', password='******')
     response = client.get(
         reverse('mainapp_jobdetails', kwargs={'job_id': crawl_request.id}))
     self.assertEqual(response.status_code, 200)
     self.assertContains(response, 'Job Details for crawl job')
Ejemplo n.º 5
0
 def test_crawl_contents(self):
     """
     Test to get the crawl contents.
     """
     with patch('main_app.views.get_google_cloud_manifest_contents'
                ) as mock_cloud:
         mock_cloud.return_value = b'Test'
         user = User.objects.create_user('testCrawlContents',
                                         '*****@*****.**',
                                         'testpassword')
         crawl_request = CrawlRequest(user=user)
         crawl_request.storage_location = "abc/def"
         crawl_request.save()
         id = crawl_request.id
         client = Client()
         client.login(username='******', password='******')
         response = client.get(
             reverse('mainapp_crawlcontents',
                     kwargs={'job_id': crawl_request.id}))
         self.assertEqual(response.status_code, 200)
Ejemplo n.º 6
0
def api_create_crawl(request):
    """
    External api to submit a crawl request.
    """
    logger.info('In api new job')
    username = request.data['username']
    user_obj = User.objects.get(username=username)
    crawl_request = CrawlRequest(user=user_obj)
    crawl_request.name = request.data['name']
    crawl_request.domain = request.data['domain']
    crawl_request.urls = request.data['urls']
    crawl_request.save()
    logger.info('NewJob created: %s', crawl_request.id)
    logger.info('Received urls: %s', crawl_request.urls)
    launch_crawler_manager(crawl_request, crawl_request.id)
    payload = {}
    payload['jobId'] = crawl_request.id
    return Response(payload, status=status.HTTP_200_OK)
Ejemplo n.º 7
0
 def test_api_get_job_status(self):
     """
     Test to get status of a job using an API end-point.
     """
     user = User.objects.create_user('testStatusUser',
                                     '*****@*****.**',
                                     'testpassword')
     client = APIClient()
     crawl_request = CrawlRequest(user=user)
     crawl_request.name = 'test'
     crawl_request.urls = "http://abc.com"
     crawl_request.save()
     data = {'job_id': crawl_request.id}
     response = client.post(reverse('api_job_status'), data, format="json")
     payload = json.loads(response.content)
     self.assertEqual("test", payload["name"])
Ejemplo n.º 8
0
 def test_update_job_status(self):
     """
     Test for getting the status of a job from manager and updating it in sql.
     """
     with patch('main_app.utilities.requests.get') as mock_get:
         user = User.objects.create_user('testUpdateStatus',
                                         '*****@*****.**',
                                         'testpassword')
         crawl_request = CrawlRequest(user=user)
         crawl_request.crawler_manager_endpoint = "http://abc.com"
         crawl_request.name = 'test_update_status'
         crawl_request.save()
         mock_response = mock.Mock()
         resp_string = '{"job_id":' + str(
             crawl_request.id) + ', "processed_count":10}'
         mock_response.text = resp_string
         mock_get.return_value = mock_response
         utilities.update_job_status(crawl_request)
         crawl_request2 = CrawlRequest(user=user)
         crawl_request2.crawler_manager_endpoint = "http://abc.com"
         crawl_request2.name = 'test_update_status_second'
         crawl_request2.save()
         utilities.update_job_status(crawl_request)
Ejemplo n.º 9
0
 def test_crawl_request_model(self):
     """
     Test for the Crawl Request model.
     """
     user = User.objects.create_user('testCrawlRequestUser',
                                     '*****@*****.**',
                                     'testpassword')
     crawl_instance = CrawlRequest(user=user)
     crawl_instance.name = 'crawl_model_test'
     crawl_instance.type = 1
     crawl_instance.domain = 'http://abc.com'
     crawl_instance.urls = 'http://url.com'
     crawl_instance.description = 'Test Crawl Request'
     crawl_instance.docs_all = True
     crawl_instance.docs_html = False
     crawl_instance.docs_docx = False
     crawl_instance.docs_pdf = False
     crawl_instance.docs_collected = 10
     crawl_instance.status = 1
     crawl_instance.storage_location = 'http://storage.com'
     crawl_instance.crawler_manager_endpoint = 'http://end.com'
     crawl_instance.manifest = 'http://manifest.com'
     crawl_instance.num_crawlers = 1
     crawl_instance.save()
     crawl_instance.get_absolute_url()
     crawl_instances = CrawlRequest.objects.filter(name='crawl_model_test')
     self.assertEqual(1, len(crawl_instances))
     self.assertEqual("1 crawl_model_test", str(crawl_instances[0]))