def build_apache_spark(build_tool, hadoop_version): """Will build Spark against Hive v1.2.1 given the passed in build tool (either `sbt` or `maven`). Defaults to using `sbt`.""" set_title_and_block("Building Spark", "BLOCK_BUILD") rm_r("lib_managed") if build_tool == "maven": build_spark_maven(hadoop_version) else: build_spark_sbt(hadoop_version)
def build_apache_spark(build_tool, extra_profiles): """Will build Spark with the extra profiles and the passed in build tool (either `sbt` or `maven`). Defaults to using `sbt`.""" set_title_and_block("Building Spark", "BLOCK_BUILD") rm_r("lib_managed") if build_tool == "maven": build_spark_maven(extra_profiles) else: build_spark_sbt(extra_profiles)
def main(): opts = parse_opts() # Ensure the user home directory (HOME) is valid and is an absolute directory if not USER_HOME or not os.path.isabs(USER_HOME): print("[error] Cannot determine your home directory as an absolute path;", " ensure the $HOME environment variable is set properly.") sys.exit(1) os.chdir(SPARK_HOME) rm_r(os.path.join(SPARK_HOME, "work")) rm_r(os.path.join(USER_HOME, ".ivy2", "local", "org.apache.spark")) rm_r(os.path.join(USER_HOME, ".ivy2", "cache", "org.apache.spark")) os.environ["CURRENT_BLOCK"] = str(ERROR_CODES["BLOCK_GENERAL"]) java_exe = determine_java_executable() if not java_exe: print("[error] Cannot find a version of `java` on the system; please", " install one and retry.") sys.exit(2) java_version = determine_java_version(java_exe) # install SparkR if which("R"): run_cmd([os.path.join(SPARK_HOME, "R", "install-dev.sh")]) else: print("Cannot install SparkR as R was not found in PATH") if os.environ.get("AMPLAB_JENKINS"): # if we're on the Amplab Jenkins build servers setup variables # to reflect the environment settings build_tool = os.environ.get("AMPLAB_JENKINS_BUILD_TOOL", "sbt") hadoop_version = os.environ.get("AMPLAB_JENKINS_BUILD_PROFILE", "hadoop2.6") test_env = "amplab_jenkins" # add path for Python3 in Jenkins if we're calling from a Jenkins machine os.environ["PATH"] = "/home/anaconda/envs/py3k/bin:" + os.environ.get("PATH") else: # else we're running locally and can use local settings build_tool = "sbt" hadoop_version = os.environ.get("HADOOP_PROFILE", "hadoop2.6") test_env = "local" print("[info] Using build tool", build_tool, "with Hadoop profile", hadoop_version, "under environment", test_env) changed_modules = None changed_files = None if test_env == "amplab_jenkins" and os.environ.get("AMP_JENKINS_PRB"): target_branch = os.environ["ghprbTargetBranch"] changed_files = identify_changed_files_from_git_commits("HEAD", target_branch=target_branch) changed_modules = determine_modules_for_files(changed_files) excluded_tags = determine_tags_to_exclude(changed_modules) if not changed_modules: changed_modules = [modules.root] excluded_tags = [] print("[info] Found the following changed modules:", ", ".join(x.name for x in changed_modules)) # setup environment variables # note - the 'root' module doesn't collect environment variables for all modules. Because the # environment variables should not be set if a module is not changed, even if running the 'root' # module. So here we should use changed_modules rather than test_modules. test_environ = {} for m in changed_modules: test_environ.update(m.environ) setup_test_environ(test_environ) test_modules = determine_modules_to_test(changed_modules) # license checks run_apache_rat_checks() # style checks if not changed_files or any(f.endswith(".scala") or f.endswith("scalastyle-config.xml") for f in changed_files): run_scala_style_checks() if not changed_files or any(f.endswith(".java") or f.endswith("checkstyle.xml") or f.endswith("checkstyle-suppressions.xml") for f in changed_files): # run_java_style_checks() pass if not changed_files or any(f.endswith(".py") for f in changed_files): run_python_style_checks() if not changed_files or any(f.endswith(".R") for f in changed_files): run_sparkr_style_checks() # determine if docs were changed and if we're inside the amplab environment # note - the below commented out until *all* Jenkins workers can get `jekyll` installed # if "DOCS" in changed_modules and test_env == "amplab_jenkins": # build_spark_documentation() if any(m.should_run_build_tests for m in test_modules): run_build_tests() # spark build build_apache_spark(build_tool, hadoop_version) # backwards compatibility checks if build_tool == "sbt": # Note: compatibility tests only supported in sbt for now detect_binary_inop_with_mima(hadoop_version) # Since we did not build assembly/package before running dev/mima, we need to # do it here because the tests still rely on it; see SPARK-13294 for details. build_spark_assembly_sbt(hadoop_version) # run the test suites run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags) modules_with_python_tests = [m for m in test_modules if m.python_test_goals] if modules_with_python_tests: run_python_tests(modules_with_python_tests, opts.parallelism) run_python_packaging_tests() if any(m.should_run_r_tests for m in test_modules): run_sparkr_tests()
def main(): opts = parse_opts() # Ensure the user home directory (HOME) is valid and is an absolute directory if not USER_HOME or not os.path.isabs(USER_HOME): print( "[error] Cannot determine your home directory as an absolute path;", " ensure the $HOME environment variable is set properly.") sys.exit(1) os.chdir(SPARK_HOME) rm_r(os.path.join(SPARK_HOME, "work")) rm_r(os.path.join(USER_HOME, ".ivy2", "local", "org.apache.spark")) rm_r(os.path.join(USER_HOME, ".ivy2", "cache", "org.apache.spark")) os.environ["CURRENT_BLOCK"] = str(ERROR_CODES["BLOCK_GENERAL"]) java_exe = determine_java_executable() if not java_exe: print("[error] Cannot find a version of `java` on the system; please", " install one and retry.") sys.exit(2) # Install SparkR should_only_test_modules = opts.modules is not None test_modules = [] if should_only_test_modules: str_test_modules = [m.strip() for m in opts.modules.split(",")] test_modules = [ m for m in modules.all_modules if m.name in str_test_modules ] if not should_only_test_modules or modules.sparkr in test_modules: # If tests modules are specified, we will not run R linter. # SparkR needs the manual SparkR installation. if which("R"): run_cmd([os.path.join(SPARK_HOME, "R", "install-dev.sh")]) else: print("Cannot install SparkR as R was not found in PATH") if os.environ.get("AMPLAB_JENKINS"): # if we're on the Amplab Jenkins build servers setup variables # to reflect the environment settings build_tool = os.environ.get("AMPLAB_JENKINS_BUILD_TOOL", "sbt") scala_version = os.environ.get("AMPLAB_JENKINS_BUILD_SCALA_PROFILE") hadoop_version = os.environ.get("AMPLAB_JENKINS_BUILD_PROFILE", "hadoop3.2") hive_version = os.environ.get("AMPLAB_JENKINS_BUILD_HIVE_PROFILE", "hive2.3") test_env = "amplab_jenkins" # add path for Python3 in Jenkins if we're calling from a Jenkins machine # TODO(sknapp): after all builds are ported to the ubuntu workers, change this to be: # /home/jenkins/anaconda2/envs/py36/bin os.environ["PATH"] = "/home/anaconda/envs/py36/bin:" + os.environ.get( "PATH") else: # else we're running locally or GitHub Actions. build_tool = "sbt" scala_version = os.environ.get("SCALA_PROFILE") hadoop_version = os.environ.get("HADOOP_PROFILE", "hadoop3.2") hive_version = os.environ.get("HIVE_PROFILE", "hive2.3") if "GITHUB_ACTIONS" in os.environ: test_env = "github_actions" else: test_env = "local" extra_profiles = get_hadoop_profiles(hadoop_version) + get_hive_profiles(hive_version) + \ get_scala_profiles(scala_version) print("[info] Using build tool", build_tool, "with profiles", *(extra_profiles + ["under environment", test_env])) changed_modules = [] changed_files = [] included_tags = [] excluded_tags = [] if should_only_test_modules: # We're likely in the forked repository is_apache_spark_ref = os.environ.get("APACHE_SPARK_REF", "") != "" # We're likely in the main repo build. is_github_prev_sha = os.environ.get("GITHUB_PREV_SHA", "") != "" # Otherwise, we're in either periodic job in Github Actions or somewhere else. # If we're running the tests in GitHub Actions, attempt to detect and test # only the affected modules. if test_env == "github_actions" and (is_apache_spark_ref or is_github_prev_sha): if is_apache_spark_ref: changed_files = identify_changed_files_from_git_commits( "HEAD", target_ref=os.environ["APACHE_SPARK_REF"]) elif is_github_prev_sha: changed_files = identify_changed_files_from_git_commits( os.environ["GITHUB_SHA"], target_ref=os.environ["GITHUB_PREV_SHA"]) modules_to_test = determine_modules_to_test( determine_modules_for_files(changed_files), deduplicated=False) if modules.root not in modules_to_test: # If root module is not found, only test the intersected modules. # If root module is found, just run the modules as specified initially. test_modules = list( set(modules_to_test).intersection(test_modules)) changed_modules = test_modules if len(changed_modules) == 0: print( "[info] There are no modules to test, exiting without testing." ) return # If we're running the tests in AMPLab Jenkins, calculate the diff from the targeted branch, and # detect modules to test. elif test_env == "amplab_jenkins" and os.environ.get("AMP_JENKINS_PRB"): target_branch = os.environ["ghprbTargetBranch"] changed_files = identify_changed_files_from_git_commits( "HEAD", target_branch=target_branch) changed_modules = determine_modules_for_files(changed_files) test_modules = determine_modules_to_test(changed_modules) excluded_tags = determine_tags_to_exclude(changed_modules) # If there is no changed module found, tests all. if not changed_modules: changed_modules = [modules.root] if not test_modules: test_modules = determine_modules_to_test(changed_modules) if opts.excluded_tags: excluded_tags.extend( [t.strip() for t in opts.excluded_tags.split(",")]) if opts.included_tags: included_tags.extend( [t.strip() for t in opts.included_tags.split(",")]) print("[info] Found the following changed modules:", ", ".join(x.name for x in changed_modules)) # setup environment variables # note - the 'root' module doesn't collect environment variables for all modules. Because the # environment variables should not be set if a module is not changed, even if running the 'root' # module. So here we should use changed_modules rather than test_modules. test_environ = {} for m in changed_modules: test_environ.update(m.environ) setup_test_environ(test_environ) if scala_version is not None: # If not set, assume this is default and doesn't need to change. switch_scala_version(scala_version) should_run_java_style_checks = False if not should_only_test_modules: # license checks run_apache_rat_checks() # style checks if not changed_files or any( f.endswith(".scala") or f.endswith("scalastyle-config.xml") for f in changed_files): run_scala_style_checks(extra_profiles) if not changed_files or any( f.endswith(".java") or f.endswith("checkstyle.xml") or f.endswith("checkstyle-suppressions.xml") for f in changed_files): # Run SBT Checkstyle after the build to prevent a side-effect to the build. should_run_java_style_checks = True if not changed_files or any( f.endswith("lint-python") or f.endswith("tox.ini") or f.endswith(".py") for f in changed_files): run_python_style_checks() if not changed_files or any( f.endswith(".R") or f.endswith("lint-r") or f.endswith(".lintr") for f in changed_files): run_sparkr_style_checks() # determine if docs were changed and if we're inside the amplab environment # note - the below commented out until *all* Jenkins workers can get the Bundler gem installed # if "DOCS" in changed_modules and test_env == "amplab_jenkins": # build_spark_documentation() if any(m.should_run_build_tests for m in test_modules) and test_env != "amplab_jenkins": run_build_tests() # spark build build_apache_spark(build_tool, extra_profiles) # backwards compatibility checks if build_tool == "sbt": # Note: compatibility tests only supported in sbt for now if not os.environ.get("SKIP_MIMA"): detect_binary_inop_with_mima(extra_profiles) # Since we did not build assembly/package before running dev/mima, we need to # do it here because the tests still rely on it; see SPARK-13294 for details. build_spark_assembly_sbt(extra_profiles, should_run_java_style_checks) # run the test suites run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags, included_tags) modules_with_python_tests = [ m for m in test_modules if m.python_test_goals ] if modules_with_python_tests: # We only run PySpark tests with coverage report in one specific job with # Spark master with SBT in Jenkins. is_sbt_master_job = "SPARK_MASTER_SBT_HADOOP_2_7" in os.environ run_python_tests(modules_with_python_tests, opts.parallelism, with_coverage=is_sbt_master_job) run_python_packaging_tests() if any(m.should_run_r_tests for m in test_modules): run_sparkr_tests()
def main(): opts = parse_opts() # Ensure the user home directory (HOME) is valid and is an absolute directory if not USER_HOME or not os.path.isabs(USER_HOME): print("[error] Cannot determine your home directory as an absolute path;", " ensure the $HOME environment variable is set properly.") sys.exit(1) os.chdir(SPARK_HOME) rm_r(os.path.join(SPARK_HOME, "work")) rm_r(os.path.join(USER_HOME, ".ivy2", "local", "org.apache.spark")) rm_r(os.path.join(USER_HOME, ".ivy2", "cache", "org.apache.spark")) os.environ["CURRENT_BLOCK"] = str(ERROR_CODES["BLOCK_GENERAL"]) java_exe = determine_java_executable() if not java_exe: print("[error] Cannot find a version of `java` on the system; please", " install one and retry.") sys.exit(2) java_version = determine_java_version(java_exe) if java_version.minor < 8: print("[warn] Java 8 tests will not run because JDK version is < 1.8.") # install SparkR if which("R"): run_cmd([os.path.join(SPARK_HOME, "R", "install-dev.sh")]) else: print("Cannot install SparkR as R was not found in PATH") if os.environ.get("AMPLAB_JENKINS"): # if we're on the Amplab Jenkins build servers setup variables # to reflect the environment settings build_tool = os.environ.get("AMPLAB_JENKINS_BUILD_TOOL", "sbt") hadoop_version = os.environ.get("AMPLAB_JENKINS_BUILD_PROFILE", "hadoop2.6") test_env = "amplab_jenkins" # add path for Python3 in Jenkins if we're calling from a Jenkins machine os.environ["PATH"] = "/home/anaconda/envs/py3k/bin:" + os.environ.get("PATH") else: # else we're running locally and can use local settings build_tool = "sbt" hadoop_version = os.environ.get("HADOOP_PROFILE", "hadoop2.6") test_env = "local" print("[info] Using build tool", build_tool, "with Hadoop profile", hadoop_version, "under environment", test_env) changed_modules = None changed_files = None if test_env == "amplab_jenkins" and os.environ.get("AMP_JENKINS_PRB"): target_branch = os.environ["ghprbTargetBranch"] changed_files = identify_changed_files_from_git_commits("HEAD", target_branch=target_branch) changed_modules = determine_modules_for_files(changed_files) excluded_tags = determine_tags_to_exclude(changed_modules) if not changed_modules: changed_modules = [modules.root] excluded_tags = [] print("[info] Found the following changed modules:", ", ".join(x.name for x in changed_modules)) # setup environment variables # note - the 'root' module doesn't collect environment variables for all modules. Because the # environment variables should not be set if a module is not changed, even if running the 'root' # module. So here we should use changed_modules rather than test_modules. test_environ = {} for m in changed_modules: test_environ.update(m.environ) setup_test_environ(test_environ) test_modules = determine_modules_to_test(changed_modules) # license checks run_apache_rat_checks() # style checks if not changed_files or any(f.endswith(".scala") or f.endswith("scalastyle-config.xml") for f in changed_files): run_scala_style_checks() if not changed_files or any(f.endswith(".java") or f.endswith("checkstyle.xml") or f.endswith("checkstyle-suppressions.xml") for f in changed_files): # run_java_style_checks() pass if not changed_files or any(f.endswith(".py") for f in changed_files): run_python_style_checks() if not changed_files or any(f.endswith(".R") for f in changed_files): run_sparkr_style_checks() # determine if docs were changed and if we're inside the amplab environment # note - the below commented out until *all* Jenkins workers can get `jekyll` installed # if "DOCS" in changed_modules and test_env == "amplab_jenkins": # build_spark_documentation() if any(m.should_run_build_tests for m in test_modules): run_build_tests() # spark build build_apache_spark(build_tool, hadoop_version) # backwards compatibility checks if build_tool == "sbt": # Note: compatibility tests only supported in sbt for now detect_binary_inop_with_mima(hadoop_version) # Since we did not build assembly/package before running dev/mima, we need to # do it here because the tests still rely on it; see SPARK-13294 for details. build_spark_assembly_sbt(hadoop_version) # run the test suites run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags) modules_with_python_tests = [m for m in test_modules if m.python_test_goals] if modules_with_python_tests: run_python_tests(modules_with_python_tests, opts.parallelism) run_python_packaging_tests() if any(m.should_run_r_tests for m in test_modules): run_sparkr_tests()
def main(): opts = parse_opts() # Ensure the user home directory (HOME) is valid and is an absolute directory if not USER_HOME or not os.path.isabs(USER_HOME): print("[error] Cannot determine your home directory as an absolute path;", " ensure the $HOME environment variable is set properly.") sys.exit(1) os.chdir(SPARK_HOME) rm_r(os.path.join(SPARK_HOME, "work")) rm_r(os.path.join(USER_HOME, ".ivy2", "local", "org.apache.spark")) rm_r(os.path.join(USER_HOME, ".ivy2", "cache", "org.apache.spark")) os.environ["CURRENT_BLOCK"] = ERROR_CODES["BLOCK_GENERAL"] java_exe = determine_java_executable() if not java_exe: print("[error] Cannot find a version of `java` on the system; please", " install one and retry.") sys.exit(2) java_version = determine_java_version(java_exe) if java_version.minor < 8: print("[warn] Java 8 tests will not run because JDK version is < 1.8.") if os.environ.get("AMPLAB_JENKINS"): # if we're on the Amplab Jenkins build servers setup variables # to reflect the environment settings build_tool = os.environ.get("AMPLAB_JENKINS_BUILD_TOOL", "sbt") hadoop_version = os.environ.get("AMPLAB_JENKINS_BUILD_PROFILE", "hadoop2.3") test_env = "amplab_jenkins" # add path for Python3 in Jenkins if we're calling from a Jenkins machine os.environ["PATH"] = "/home/anaconda/envs/py3k/bin:" + os.environ.get("PATH") else: # else we're running locally and can use local settings build_tool = "sbt" hadoop_version = "hadoop2.3" test_env = "local" print("[info] Using build tool", build_tool, "with Hadoop profile", hadoop_version, "under environment", test_env) changed_modules = None changed_files = None if test_env == "amplab_jenkins" and os.environ.get("AMP_JENKINS_PRB"): target_branch = os.environ["ghprbTargetBranch"] changed_files = identify_changed_files_from_git_commits("HEAD", target_branch=target_branch) changed_modules = determine_modules_for_files(changed_files) if not changed_modules: changed_modules = [modules.root] print("[info] Found the following changed modules:", ", ".join(x.name for x in changed_modules)) test_modules = determine_modules_to_test(changed_modules) # license checks run_apache_rat_checks() # style checks if not changed_files or any(f.endswith(".scala") for f in changed_files): run_scala_style_checks() if not changed_files or any(f.endswith(".py") for f in changed_files): run_python_style_checks() # determine if docs were changed and if we're inside the amplab environment # note - the below commented out until *all* Jenkins workers can get `jekyll` installed # if "DOCS" in changed_modules and test_env == "amplab_jenkins": # build_spark_documentation() # spark build build_apache_spark(build_tool, hadoop_version) # backwards compatibility checks detect_binary_inop_with_mima() # run the test suites run_scala_tests(build_tool, hadoop_version, test_modules) modules_with_python_tests = [m for m in test_modules if m.python_test_goals] if modules_with_python_tests: run_python_tests(modules_with_python_tests, opts.parallelism) if any(m.should_run_r_tests for m in test_modules): run_sparkr_tests()
def main(): # Ensure the user home directory (HOME) is valid and is an absolute directory if not USER_HOME or not os.path.isabs(USER_HOME): print( "[error] Cannot determine your home directory as an absolute path;", " ensure the $HOME environment variable is set properly.") sys.exit(1) os.chdir(SPARK_HOME) rm_r(os.path.join(SPARK_HOME, "work")) rm_r(os.path.join(USER_HOME, ".ivy2", "local", "org.apache.spark")) rm_r(os.path.join(USER_HOME, ".ivy2", "cache", "org.apache.spark")) os.environ["CURRENT_BLOCK"] = ERROR_CODES["BLOCK_GENERAL"] java_exe = determine_java_executable() if not java_exe: print("[error] Cannot find a version of `java` on the system; please", " install one and retry.") sys.exit(2) java_version = determine_java_version(java_exe) if java_version.minor < 8: print("[warn] Java 8 tests will not run because JDK version is < 1.8.") if os.environ.get("AMPLAB_JENKINS"): # if we're on the Amplab Jenkins build servers setup variables # to reflect the environment settings build_tool = os.environ.get("AMPLAB_JENKINS_BUILD_TOOL", "sbt") hadoop_version = os.environ.get("AMPLAB_JENKINS_BUILD_PROFILE", "hadoop2.3") test_env = "amplab_jenkins" # add path for Python3 in Jenkins if we're calling from a Jenkins machine os.environ["PATH"] = "/home/anaconda/envs/py3k/bin:" + os.environ.get( "PATH") else: # else we're running locally and can use local settings build_tool = "sbt" hadoop_version = "hadoop2.3" test_env = "local" print("[info] Using build tool", build_tool, "with Hadoop profile", hadoop_version, "under environment", test_env) changed_modules = None changed_files = None if test_env == "amplab_jenkins" and os.environ.get("AMP_JENKINS_PRB"): target_branch = os.environ["ghprbTargetBranch"] changed_files = identify_changed_files_from_git_commits( "HEAD", target_branch=target_branch) changed_modules = determine_modules_for_files(changed_files) if not changed_modules: changed_modules = [modules.root] print("[info] Found the following changed modules:", ", ".join(x.name for x in changed_modules)) test_modules = determine_modules_to_test(changed_modules) # license checks run_apache_rat_checks() # style checks if not changed_files or any(f.endswith(".scala") for f in changed_files): run_scala_style_checks() if not changed_files or any(f.endswith(".py") for f in changed_files): run_python_style_checks() # determine if docs were changed and if we're inside the amplab environment # note - the below commented out until *all* Jenkins workers can get `jekyll` installed # if "DOCS" in changed_modules and test_env == "amplab_jenkins": # build_spark_documentation() # spark build build_apache_spark(build_tool, hadoop_version) # backwards compatibility checks detect_binary_inop_with_mima() # run the test suites run_scala_tests(build_tool, hadoop_version, test_modules) modules_with_python_tests = [ m for m in test_modules if m.python_test_goals ] if modules_with_python_tests: run_python_tests(modules_with_python_tests) if any(m.should_run_r_tests for m in test_modules): run_sparkr_tests()
def main(): opts = parse_opts() # Ensure the user home directory (HOME) is valid and is an absolute directory if not USER_HOME or not os.path.isabs(USER_HOME): print("[error] Cannot determine your home directory as an absolute path;", " ensure the $HOME environment variable is set properly.") sys.exit(1) os.chdir(SPARK_HOME) rm_r(os.path.join(SPARK_HOME, "work")) rm_r(os.path.join(USER_HOME, ".ivy2", "local", "org.apache.spark")) rm_r(os.path.join(USER_HOME, ".ivy2", "cache", "org.apache.spark")) os.environ["CURRENT_BLOCK"] = str(ERROR_CODES["BLOCK_GENERAL"]) java_exe = determine_java_executable() if not java_exe: print("[error] Cannot find a version of `java` on the system; please", " install one and retry.") sys.exit(2) # install SparkR if which("R"): run_cmd([os.path.join(SPARK_HOME, "R", "install-dev.sh")]) else: print("Cannot install SparkR as R was not found in PATH") if os.environ.get("AMPLAB_JENKINS"): # if we're on the Amplab Jenkins build servers setup variables # to reflect the environment settings build_tool = os.environ.get("AMPLAB_JENKINS_BUILD_TOOL", "sbt") hadoop_version = os.environ.get("AMPLAB_JENKINS_BUILD_PROFILE", "hadoop3.2") hive_version = os.environ.get("AMPLAB_JENKINS_BUILD_HIVE_PROFILE", "hive2.3") test_env = "amplab_jenkins" # add path for Python3 in Jenkins if we're calling from a Jenkins machine # TODO(sknapp): after all builds are ported to the ubuntu workers, change this to be: # /home/jenkins/anaconda2/envs/py36/bin os.environ["PATH"] = "/home/anaconda/envs/py36/bin:" + os.environ.get("PATH") else: # else we're running locally and can use local settings build_tool = "sbt" hadoop_version = os.environ.get("HADOOP_PROFILE", "hadoop2.7") hive_version = os.environ.get("HIVE_PROFILE", "hive2.3") test_env = "local" print("[info] Using build tool", build_tool, "with Hadoop profile", hadoop_version, "and Hive profile", hive_version, "under environment", test_env) extra_profiles = get_hadoop_profiles(hadoop_version) + get_hive_profiles(hive_version) changed_modules = None changed_files = None should_only_test_modules = "TEST_ONLY_MODULES" in os.environ included_tags = [] if should_only_test_modules: str_test_modules = [m.strip() for m in os.environ.get("TEST_ONLY_MODULES").split(",")] test_modules = [m for m in modules.all_modules if m.name in str_test_modules] # Directly uses test_modules as changed modules to apply tags and environments # as if all specified test modules are changed. changed_modules = test_modules str_excluded_tags = os.environ.get("TEST_ONLY_EXCLUDED_TAGS", None) str_included_tags = os.environ.get("TEST_ONLY_INCLUDED_TAGS", None) excluded_tags = [] if str_excluded_tags: excluded_tags = [t.strip() for t in str_excluded_tags.split(",")] included_tags = [] if str_included_tags: included_tags = [t.strip() for t in str_included_tags.split(",")] elif test_env == "amplab_jenkins" and os.environ.get("AMP_JENKINS_PRB"): target_branch = os.environ["ghprbTargetBranch"] changed_files = identify_changed_files_from_git_commits("HEAD", target_branch=target_branch) changed_modules = determine_modules_for_files(changed_files) excluded_tags = determine_tags_to_exclude(changed_modules) if not changed_modules: changed_modules = [modules.root] excluded_tags = [] print("[info] Found the following changed modules:", ", ".join(x.name for x in changed_modules)) # setup environment variables # note - the 'root' module doesn't collect environment variables for all modules. Because the # environment variables should not be set if a module is not changed, even if running the 'root' # module. So here we should use changed_modules rather than test_modules. test_environ = {} for m in changed_modules: test_environ.update(m.environ) setup_test_environ(test_environ) should_run_java_style_checks = False if not should_only_test_modules: test_modules = determine_modules_to_test(changed_modules) # license checks run_apache_rat_checks() # style checks if not changed_files or any(f.endswith(".scala") or f.endswith("scalastyle-config.xml") for f in changed_files): run_scala_style_checks(extra_profiles) if not changed_files or any(f.endswith(".java") or f.endswith("checkstyle.xml") or f.endswith("checkstyle-suppressions.xml") for f in changed_files): # Run SBT Checkstyle after the build to prevent a side-effect to the build. should_run_java_style_checks = True if not changed_files or any(f.endswith("lint-python") or f.endswith("tox.ini") or f.endswith(".py") for f in changed_files): run_python_style_checks() if not changed_files or any(f.endswith(".R") or f.endswith("lint-r") or f.endswith(".lintr") for f in changed_files): run_sparkr_style_checks() # determine if docs were changed and if we're inside the amplab environment # note - the below commented out until *all* Jenkins workers can get `jekyll` installed # if "DOCS" in changed_modules and test_env == "amplab_jenkins": # build_spark_documentation() if any(m.should_run_build_tests for m in test_modules) and test_env != "amplab_jenkins": run_build_tests() # spark build build_apache_spark(build_tool, extra_profiles) # backwards compatibility checks if build_tool == "sbt": # Note: compatibility tests only supported in sbt for now detect_binary_inop_with_mima(extra_profiles) # Since we did not build assembly/package before running dev/mima, we need to # do it here because the tests still rely on it; see SPARK-13294 for details. build_spark_assembly_sbt(extra_profiles, should_run_java_style_checks) # run the test suites run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags, included_tags) modules_with_python_tests = [m for m in test_modules if m.python_test_goals] if modules_with_python_tests: # We only run PySpark tests with coverage report in one specific job with # Spark master with SBT in Jenkins. is_sbt_master_job = "SPARK_MASTER_SBT_HADOOP_2_7" in os.environ run_python_tests( modules_with_python_tests, opts.parallelism, with_coverage=is_sbt_master_job) run_python_packaging_tests() if any(m.should_run_r_tests for m in test_modules): run_sparkr_tests()