def test_init_not_valid_archive(self):
        """Test if an exception is raised when the file is an invalid archive"""

        archive_path = os.path.join(self.test_path, 'invalid_archive')

        with open(archive_path, 'w') as fd:
            fd.write("Invalid archive file")

        with self.assertRaisesRegex(ArchiveError, "invalid archive file"):
            _ = Archive(archive_path)
Esempio n. 2
0
    def test_retrieve_missing(self):
        """Test whether the retrieval of non archived data throws an error

        In the exceptional case of a failure in retrieving data from an archive (e.g., manual modification),
        an exception is thrown to stop the retrieval from the archive
        """

        archive_path = os.path.join(self.test_path, 'myarchive')
        archive = Archive.create(archive_path)

        with self.assertRaisesRegex(ArchiveError, "not found in archive"):
            _ = archive.retrieve("http://wrong", payload={}, headers={})
Esempio n. 3
0
    def test_init_metadata(self):
        """Test whether metadata information is properly initialized"""

        archive_path = os.path.join(self.test_path, 'myarchive')
        archive = Archive.create(archive_path)

        before_dt = datetime_to_utc(datetime_utcnow())
        archive.init_metadata('marvel.com', 'marvel-comics-backend', '0.1.0',
                              'issue', {'from_date': before_dt})
        after_dt = datetime_to_utc(datetime_utcnow())

        archive_copy = Archive(archive_path)

        # Both copies should have the same parameters
        for arch in [archive, archive_copy]:
            self.assertEqual(arch.origin, 'marvel.com')
            self.assertEqual(arch.backend_name, 'marvel-comics-backend')
            self.assertEqual(arch.backend_version, '0.1.0')
            self.assertEqual(arch.category, 'issue')
            self.assertGreaterEqual(arch.created_on, before_dt)
            self.assertLessEqual(arch.created_on, after_dt)
            self.assertDictEqual(arch.backend_params, {'from_date': before_dt})
Esempio n. 4
0
    def test_init_archive(self):
        """Test whether the archive is properly initialized when executing the fetch method"""

        archive_path = os.path.join(self.test_path, 'myarchive')
        archive = Archive.create(archive_path)
        b = MockedBackend('test', archive=archive)

        _ = [item for item in b.fetch()]

        self.assertEqual(b.archive.backend_name, b.__class__.__name__)
        self.assertEqual(b.archive.backend_version, b.version)
        self.assertEqual(b.archive.origin, b.origin)
        self.assertEqual(b.archive.category, MockedBackend.CATEGORY)
Esempio n. 5
0
    def test_archive(self):
        """Test whether archive value is initializated"""

        archive_path = os.path.join(self.test_path, 'myarchive')
        archive = Archive.create(archive_path)

        b = Backend('test', archive=archive)
        self.assertEqual(b.archive, archive)

        b = Backend('test')
        self.assertEqual(b.archive, None)

        b.archive = archive
        self.assertEqual(b.archive, archive)
Esempio n. 6
0
    def test_fetch_from_archive(self):
        """Test whether responses are correctly fecthed from an archive"""

        archive_path = os.path.join(self.test_path, 'myarchive')
        archive = Archive.create(archive_path)

        httpretty.register_uri(httpretty.GET,
                               CLIENT_SUPERMAN_URL,
                               body="good",
                               status=200)

        client = MockedClient(CLIENT_API_URL, sleep_time=0.1, max_retries=1, archive=archive)
        answer_api = client.fetch(CLIENT_SUPERMAN_URL)

        client = MockedClient(CLIENT_API_URL, sleep_time=0.1, max_retries=1, archive=archive, from_archive=True)
        answer_archive = client.fetch(CLIENT_SUPERMAN_URL)

        self.assertEqual(answer_api.text, answer_archive.text)
Esempio n. 7
0
    def test_retrieve(self):
        """Test whether data is properly retrieved from the archive"""

        url = "https://example.com/tasks"
        payload = {'task_id': 10}
        headers = {'Accept': 'application/json'}

        httpretty.register_uri(httpretty.GET,
                               url,
                               body='{"hey": "there"}',
                               status=200)
        response = requests.get(url, params=payload, headers=headers)

        archive_path = os.path.join(self.test_path, 'myarchive')
        archive = Archive.create(archive_path)
        archive.store(url, payload, headers, response)

        data = archive.retrieve(url, payload, headers)

        self.assertEqual(data.url, response.url)
Esempio n. 8
0
    def test_store_duplicate(self):
        """Test whether the insertion of duplicated data throws an error"""

        url = "https://example.com/tasks"
        payload = {'task_id': 10}
        headers = {'Accept': 'application/json'}

        httpretty.register_uri(httpretty.GET,
                               url,
                               body='{"hey": "there"}',
                               status=200)
        response = requests.get(url, params=payload, headers=headers)

        archive_path = os.path.join(self.test_path, 'myarchive')
        archive = Archive.create(archive_path)

        archive.store(url, payload, headers, response)

        # check the unique index filters duplicated API calls
        with self.assertRaisesRegex(ArchiveError, "duplicated entry"):
            archive.store(url, payload, headers, response)
Esempio n. 9
0
    def test_fetch_from_archive_exception(self):
        """Test whether serialized exceptions are thrown"""

        archive_path = os.path.join(self.test_path, 'myarchive')
        archive = Archive.create(archive_path)

        httpretty.register_uri(httpretty.GET,
                               CLIENT_SPIDERMAN_URL,
                               body="bad",
                               status=404)

        # populate the archive and check that an exception is thown when fetching data from the API
        client = MockedClient(CLIENT_API_URL, sleep_time=0.1, max_retries=1, archive=archive)
        with self.assertRaises(requests.exceptions.HTTPError):
            _ = client.fetch(CLIENT_SPIDERMAN_URL)

        # retrieve data from the archive and check that an exception is
        # thown as happened when fetching data from the API)
        client = MockedClient(CLIENT_API_URL, sleep_time=0.1, max_retries=1, archive=archive, from_archive=True)
        with self.assertRaises(requests.exceptions.HTTPError):
            _ = client.fetch(CLIENT_SPIDERMAN_URL)
Esempio n. 10
0
    def test_create(self):
        """Test a new an empty archive is created"""

        archive_path = os.path.join(self.test_path, 'myarchive')
        archive = Archive.create(archive_path)

        # Archive file was created
        self.assertEqual(archive.archive_path, archive_path)
        self.assertEqual(os.path.exists(archive.archive_path), True)

        # Properties are initialized
        self.assertEqual(archive.created_on, None)
        self.assertEqual(archive.origin, None)
        self.assertEqual(archive.backend_name, None)
        self.assertEqual(archive.backend_version, None)
        self.assertEqual(archive.category, None)
        self.assertEqual(archive.backend_params, None)

        # Tables are empty
        nrows = count_number_rows(archive_path, Archive.ARCHIVE_TABLE)
        self.assertEqual(nrows, 0)

        nrows = count_number_rows(archive_path, Archive.METADATA_TABLE)
        self.assertEqual(nrows, 0)
Esempio n. 11
0
 def setUp(self):
     self.test_path = tempfile.mkdtemp(prefix='perceval-mozilla_')
     archive_path = os.path.join(self.test_path, 'myarchive')
     self.archive = Archive.create(archive_path)
    def test_store(self):
        """Test whether data is properly stored in the archive"""

        data_requests = [
            ("https://example.com/", {
                'q': 'issues',
                'date': '2017-01-10'
            }, {}),
            ("https://example.com/", {
                'q': 'issues',
                'date': '2018-01-01'
            }, {}),
            ("https://example.com/tasks", {
                'task_id': 10
            }, {
                'Accept': 'application/json'
            }),
        ]

        httpretty.register_uri(httpretty.GET,
                               "https://example.com/",
                               body='{"hey": "there"}',
                               status=200)
        httpretty.register_uri(httpretty.GET,
                               "https://example.com/tasks",
                               body='{"task": "my task"}',
                               status=200)

        archive_path = os.path.join(self.test_path, 'myarchive')
        archive = Archive.create(archive_path)

        # Store data in the archive
        responses = []

        for dr in data_requests:
            response = requests.get(dr[0], params=dr[1], headers=dr[2])
            archive.store(dr[0], dr[1], dr[2], response)
            responses.append(response)

        db = sqlite3.connect(archive.archive_path)
        cursor = db.cursor()
        cursor.execute(
            "SELECT hashcode, data, uri, payload, headers FROM archive")
        data_stored = cursor.fetchall()
        cursor.close()

        self.assertEqual(len(data_stored), len(data_requests))

        ds = data_stored[0]
        dr = data_requests[0]
        self.assertEqual(ds[0], '0fa4ce047340780f08efca92f22027514263521d')
        self.assertEqual(pickle.loads(ds[1]).url, responses[0].url)
        self.assertEqual(ds[2], dr[0])
        self.assertEqual(pickle.loads(ds[3]), dr[1])
        self.assertEqual(pickle.loads(ds[4]), dr[2])

        ds = data_stored[1]
        dr = data_requests[1]
        self.assertEqual(ds[0], '3879a6f12828b7ac3a88b7167333e86168f2f5d2')
        self.assertEqual(pickle.loads(ds[1]).url, responses[1].url)
        self.assertEqual(ds[2], dr[0])
        self.assertEqual(pickle.loads(ds[3]), dr[1])
        self.assertEqual(pickle.loads(ds[4]), dr[2])

        ds = data_stored[2]
        dr = data_requests[2]
        self.assertEqual(ds[0], 'ef38f574a0745b63a056e7befdb7a06e7cf1549b')
        self.assertEqual(pickle.loads(ds[1]).url, responses[2].url)
        self.assertEqual(ds[2], dr[0])
        self.assertEqual(pickle.loads(ds[3]), dr[1])
        self.assertEqual(pickle.loads(ds[4]), dr[2])