예제 #1
0
    def test_package_name(self):
        self.assertEqual(
            "spark-3.0.0-bin-hadoop3.2",
            checked_package_name("spark-3.0.0", "hadoop3.2", "hive2.3"))

        spark_version, hadoop_version, hive_version = checked_versions(
            "3.2.0", "2", "2.3")
        self.assertEqual(
            "spark-3.2.0-bin-hadoop2.7",
            checked_package_name(spark_version, hadoop_version, hive_version),
        )

        spark_version, hadoop_version, hive_version = checked_versions(
            "3.3.0", "2", "2.3")
        self.assertEqual(
            "spark-3.3.0-bin-hadoop2",
            checked_package_name(spark_version, hadoop_version, hive_version),
        )

        spark_version, hadoop_version, hive_version = checked_versions(
            "3.2.0", "3", "2.3")
        self.assertEqual(
            "spark-3.2.0-bin-hadoop3.2",
            checked_package_name(spark_version, hadoop_version, hive_version),
        )

        spark_version, hadoop_version, hive_version = checked_versions(
            "3.3.0", "3", "2.3")
        self.assertEqual(
            "spark-3.3.0-bin-hadoop3",
            checked_package_name(spark_version, hadoop_version, hive_version),
        )
예제 #2
0
    def test_install_spark(self):
        # Test only one case. Testing this is expensive because it needs to download
        # the Spark distribution.
        spark_version, hadoop_version, hive_version = checked_versions(
            "3.0.1", "3.2", "2.3")

        with tempfile.TemporaryDirectory() as tmp_dir:
            install_spark(dest=tmp_dir,
                          spark_version=spark_version,
                          hadoop_version=hadoop_version,
                          hive_version=hive_version)

            self.assertTrue(os.path.isdir("%s/jars" % tmp_dir))
            self.assertTrue(os.path.exists("%s/bin/spark-submit" % tmp_dir))
            self.assertTrue(os.path.exists("%s/RELEASE" % tmp_dir))
예제 #3
0
    def test_checked_versions(self):
        test_version = "3.0.1"  # Just pick one version to test.

        # Positive test cases
        self.assertEqual(
            ("spark-3.0.0", "hadoop2.7", "hive2.3"),
            checked_versions("spark-3.0.0", "hadoop2", "hive2.3"),
        )

        self.assertEqual(("spark-3.0.0", "hadoop2.7", "hive2.3"),
                         checked_versions("3.0.0", "2", "2.3"))

        self.assertEqual(
            ("spark-2.4.1", "without-hadoop", "hive2.3"),
            checked_versions("2.4.1", "without", "2.3"),
        )

        self.assertEqual(
            ("spark-3.0.1", "without-hadoop", "hive2.3"),
            checked_versions("spark-3.0.1", "without-hadoop", "hive2.3"),
        )

        self.assertEqual(
            ("spark-3.3.0", "hadoop3", "hive2.3"),
            checked_versions("spark-3.3.0", "hadoop3", "hive2.3"),
        )

        self.assertEqual(
            ("spark-3.3.0", "hadoop2", "hive2.3"),
            checked_versions("spark-3.3.0", "hadoop2", "hive2.3"),
        )

        # Negative test cases
        for (hadoop_version, hive_version) in UNSUPPORTED_COMBINATIONS:
            with self.assertRaisesRegex(RuntimeError, "Hive.*should.*Hadoop"):
                checked_versions(
                    spark_version=test_version,
                    hadoop_version=hadoop_version,
                    hive_version=hive_version,
                )

        with self.assertRaisesRegex(
                RuntimeError, "Spark version should start with 'spark-'"):
            checked_versions(spark_version="malformed",
                             hadoop_version=DEFAULT_HADOOP,
                             hive_version=DEFAULT_HIVE)

        with self.assertRaisesRegex(RuntimeError,
                                    "Spark distribution.*malformed.*"):
            checked_versions(spark_version=test_version,
                             hadoop_version="malformed",
                             hive_version=DEFAULT_HIVE)

        with self.assertRaisesRegex(RuntimeError,
                                    "Spark distribution.*malformed.*"):
            checked_versions(spark_version=test_version,
                             hadoop_version=DEFAULT_HADOOP,
                             hive_version="malformed")

        with self.assertRaisesRegex(
                RuntimeError,
                "Spark distribution of hive1.2 is not supported"):
            checked_versions(spark_version=test_version,
                             hadoop_version="hadoop3",
                             hive_version="hive1.2")