예제 #1
0
    def test_upload_archive(self):
        archive_path = self.create_test_tar()
        pxz = ListenbrainzHDFSUploader().get_pxz_output(archive_path)
        tmp_dump_dir = tempfile.mkdtemp()

        with tarfile.open(fileobj=pxz.stdout, mode='r|') as tar:
            ListenbrainzHDFSUploader().upload_archive(tmp_dump_dir, tar, '/test', schema.listen_schema,
            ListenbrainzDataUploader().process_json_listens)

        walk = utils.hdfs_walk('/test', depth=1)
        dirs = next(walk)[1]
        self.assertEqual(len(dirs), 1)

        df = utils.read_files_from_HDFS('/test/2020/1.parquet')
        self.assertEqual(df.count(), 1)

        status = utils.path_exists(tmp_dump_dir)
        self.assertFalse(status)

        utils.delete_dir('/test', recursive=True)
    def test_upload_archive_failed(self):
        faulty_tar = MagicMock()
        faulty_tar.extract.side_effect = tarfile.ReadError()
        member = MagicMock()
        faulty_tar.__iter__.return_value = [member]

        tmp_dump_dir = tempfile.mkdtemp()
        self.assertRaises(DumpInvalidException,
                          ListenbrainzHDFSUploader().upload_archive,
                          tmp_dump_dir, faulty_tar, '/test',
                          schema.listen_schema,
                          ListenbrainzDataUploader().process_json_listens)

        status = utils.path_exists('/test')
        self.assertFalse(status)
예제 #3
0
 def test_get_pxz_output(self, mock_popen):
     pxz = ListenbrainzHDFSUploader().get_pxz_output('faketar', threads=8)
     mock_popen.assert_called_once()
     self.assertEqual(pxz, mock_popen.return_value)
예제 #4
0
 def test_is_json_file(self):
     self.assertTrue(ListenbrainzHDFSUploader()._is_json_file('file.json'))
     self.assertFalse(ListenbrainzHDFSUploader()._is_json_file('file.txt'))
예제 #5
0
 def test_init(self, mock_spark_init, mock_hdfs_init):
     ListenbrainzHDFSUploader()
     mock_hdfs_init.assert_called_once_with(config.HDFS_HTTP_URI)
     mock_spark_init.assert_called_once()