def test_upload_archive(self): archive_path = self.create_test_tar() pxz = ListenbrainzHDFSUploader().get_pxz_output(archive_path) tmp_dump_dir = tempfile.mkdtemp() with tarfile.open(fileobj=pxz.stdout, mode='r|') as tar: ListenbrainzHDFSUploader().upload_archive(tmp_dump_dir, tar, '/test', schema.listen_schema, ListenbrainzDataUploader().process_json_listens) walk = utils.hdfs_walk('/test', depth=1) dirs = next(walk)[1] self.assertEqual(len(dirs), 1) df = utils.read_files_from_HDFS('/test/2020/1.parquet') self.assertEqual(df.count(), 1) status = utils.path_exists(tmp_dump_dir) self.assertFalse(status) utils.delete_dir('/test', recursive=True)
def test_upload_archive_failed(self): faulty_tar = MagicMock() faulty_tar.extract.side_effect = tarfile.ReadError() member = MagicMock() faulty_tar.__iter__.return_value = [member] tmp_dump_dir = tempfile.mkdtemp() self.assertRaises(DumpInvalidException, ListenbrainzHDFSUploader().upload_archive, tmp_dump_dir, faulty_tar, '/test', schema.listen_schema, ListenbrainzDataUploader().process_json_listens) status = utils.path_exists('/test') self.assertFalse(status)
def test_get_pxz_output(self, mock_popen): pxz = ListenbrainzHDFSUploader().get_pxz_output('faketar', threads=8) mock_popen.assert_called_once() self.assertEqual(pxz, mock_popen.return_value)
def test_is_json_file(self): self.assertTrue(ListenbrainzHDFSUploader()._is_json_file('file.json')) self.assertFalse(ListenbrainzHDFSUploader()._is_json_file('file.txt'))
def test_init(self, mock_spark_init, mock_hdfs_init): ListenbrainzHDFSUploader() mock_hdfs_init.assert_called_once_with(config.HDFS_HTTP_URI) mock_spark_init.assert_called_once()