def make_spark_distribution( commit_id, target_dir, spark_git_repo, merge_commit_into_master=False, is_yarn_mode=False, additional_make_distribution_args=""): """ Download Spark, check out a specific version, and create a binary distribution. :param commit_id: the version to build. Can specify any of the following: 1. A git commit hash e.g. "4af93ff3" 2. A branch name e.g. "origin/branch-0.7" 3. A tag name e.g. "origin/tag/v0.8.0-incubating" 4. A pull request e.g. "origin/pr/675" :param target_dir: the directory to clone Spark into. :param merge_commit_into_master: if True, this commit_id will be merged into `master`; this can be useful for testing un-merged pull requests. :param spark_git_repo: the repo to clone from. By default, this is the Spark GitHub mirror. """ clone_spark(target_dir, spark_git_repo) checkout_version(target_dir, commit_id, merge_commit_into_master) with cd(target_dir): logger.info("Building spark at version %s; This may take a while...\n" % commit_id) # According to the SPARK-1520 JIRA, building with Java 7+ will only cause problems when # running PySpark on YARN or when running on Java 6. Since we'll be building and running # Spark on the same machines and using standalone mode, it should be safe to # disable this warning: if is_yarn_mode: run_cmd("./make-distribution.sh -Pyarn " + additional_make_distribution_args) else: run_cmd("./make-distribution.sh " + additional_make_distribution_args)
def make_spark_distribution(commit_id, target_dir, spark_git_repo, merge_commit_into_master=False, is_yarn_mode=False, additional_make_distribution_args=""): """ Download Spark, check out a specific version, and create a binary distribution. :param commit_id: the version to build. Can specify any of the following: 1. A git commit hash e.g. "4af93ff3" 2. A branch name e.g. "origin/branch-0.7" 3. A tag name e.g. "origin/tag/v0.8.0-incubating" 4. A pull request e.g. "origin/pr/675" :param target_dir: the directory to clone Spark into. :param merge_commit_into_master: if True, this commit_id will be merged into `master`; this can be useful for testing un-merged pull requests. :param spark_git_repo: the repo to clone from. By default, this is the Spark GitHub mirror. """ clone_spark(target_dir, spark_git_repo) checkout_version(target_dir, commit_id, merge_commit_into_master) with cd(target_dir): logger.info( "Building spark at version %s; This may take a while...\n" % commit_id) # According to the SPARK-1520 JIRA, building with Java 7+ will only cause problems when # running PySpark on YARN or when running on Java 6. Since we'll be building and running # Spark on the same machines and using standalone mode, it should be safe to # disable this warning: if is_yarn_mode: run_cmd("./make-distribution.sh --skip-java-test -Pyarn " + additional_make_distribution_args) else: run_cmd("./make-distribution.sh --skip-java-test " + additional_make_distribution_args)
def checkout_version(repo_dir, commit_id, merge_commit_into_master=False): with cd(repo_dir): # Fetch updates logger.info("Updating Spark repo...") run_cmd("git fetch") # Check out the requested commit / branch / PR logger.info("Cleaning Spark and checking out commit_id %s." % commit_id) run_cmd("git clean -f -d -x") if merge_commit_into_master: run_cmd("git reset --hard master") run_cmd("git merge %s -m ='Merging %s into master.'" % (commit_id, commit_id)) else: run_cmd("git reset --hard %s" % commit_id)