Python StorageHelper.mkdir Examples

Programming Language: Python

Namespace/Package Name: System.Platform

Class/Type: StorageHelper

Method/Function: mkdir

Examples at hotexamples.com: 5

Python StorageHelper.mkdir - 5 examples found. These are the top rated real world Python examples of System.Platform.StorageHelper.mkdir extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

StorageHelper(6)

mkdir(5)

get_file_size(3)

mv(3)

rm(2)

path_exists(1)

Example #1

Show file

File: Test.py Project: alexwaldrop/GAP

    "test-proc",
    1,
    1,
    zone="us-east1-b",
    service_acct="*****@*****.**",
    boot_disk_size=75,
    disk_image="davelab-image-latest")

proc.set_log_dir("/home/gap/log/")
proc.create()

print "We READY TO RUN!"

try:
    sh = StorageHelper(proc)
    sh.mkdir("/home/alex_waldrop_jr/test/", wait=True)
    sh.mkdir("/home/gap/log/", wait=True)
    sh.mkdir("gs://derp_test/mkdir_test_mofo_2/", wait=True)
    proc.run("perms_gap", "sudo chmod -R 777 /home/gap/")
    proc.run("perms_awal", "sudo chmod -R 777 /home/alex_waldrop_jr/")
    proc.wait()
    print "local exists: %s" % sh.path_exists("/home/alex_waldrop_jr/test/")
    print "local exists: %s" % sh.path_exists("/home/gap/log/")
    print "cloud exists: %s" % sh.path_exists(
        "gs://derp_test/mkdir_test_mofo_2/")
    print "bad exists: %s" % sh.path_exists("/home/aasdasdfk")
    sh.mv("gs://derp_test/dummy.txt",
          "/home/alex_waldrop_jr/test/",
          log=False,
          wait=True)
    sh.mv("/home/alex_waldrop_jr/test/dummy.txt",

Example #2

Show file

class ModuleExecutor(object):
    def __init__(self, task_id, processor, workspace, docker_image=None):
        self.task_id = task_id
        self.processor = processor
        self.workspace = workspace
        self.storage_helper = StorageHelper(self.processor)
        self.docker_helper = DockerHelper(self.processor)
        self.docker_image = docker_image

    def load_input(self, inputs):

        if self.processor.get_status() is Processor.OFF:
            # Create processor if it's off
            logging.info("Creating processor '%s' for task '%s'!" %
                         (self.processor.get_name(), self.task_id))
            self.processor.create()

        # Create workspace directory structure
        self.__create_workspace()

        # List of jobs that have been started in process of loading input
        job_names = []

        # Pull docker image if necessary
        if self.docker_image is not None:
            docker_image_name = self.docker_image.get_image_name().split(
                "/")[0]
            docker_image_name = docker_image_name.replace(":", "_")
            job_name = "docker_pull_%s" % docker_image_name
            self.docker_helper.pull(self.docker_image.get_image_name(),
                                    job_name=job_name)
            job_names.append(job_name)

        # Load input files
        # Inputs: list containing remote files, local files, and docker images
        seen = []
        count = 1
        for task_input in inputs:

            # Case: Transfer file into wrk directory if its not already there
            if task_input.get_transferrable_path() not in seen:

                # Transfer file to workspace directory
                src_path = task_input.get_transferrable_path()
                job_name = "load_input_%s_%s_%s" % (
                    self.task_id, task_input.get_type(), count)
                logging.debug("Input path: %s, transfer path: %s" %
                              (task_input.get_path(), src_path))
                self.storage_helper.mv(src_path=src_path,
                                       dest_path=self.workspace.get_wrk_dir(),
                                       job_name=job_name)

                # Add transfer path to list of remote paths that have been transferred to local workspace
                seen.append(src_path)
                count += 1
                job_names.append(job_name)

            # Update path after transferring to wrk directory
            task_input.update_path(new_dir=self.workspace.get_wrk_dir())
            logging.debug("Updated path: %s" % task_input.get_path())

        # Wait for all processes to finish
        for job_name in job_names:
            self.processor.wait_process(job_name)

        # Recursively give every permission to all files we just added
        logging.info("(%s) Final workspace perm. update for task '%s'..." %
                     (self.processor.name, self.task_id))
        self.__grant_workspace_perms(job_name="grant_final_wrkspace_perms")

    def run(self, cmd):
        # Job name
        job_name = self.task_id
        # Get name of docker image where command should be run (if any)
        docker_image_name = None if self.docker_image is None else self.docker_image.get_image_name(
        )
        # Begin running job and return stdout, stderr after job has finished running
        self.processor.run(job_name, cmd, docker_image=docker_image_name)
        return self.processor.wait_process(job_name)

    def save_output(self, outputs, final_output_types):
        # Return output files to workspace output dir

        # Get workspace places for output files
        final_output_dir = self.workspace.get_output_dir()
        tmp_output_dir = self.workspace.get_tmp_output_dir()
        count = 1
        job_names = []

        for output_file in outputs:
            if output_file.get_type() in final_output_types:
                dest_dir = final_output_dir
            else:
                dest_dir = tmp_output_dir

            # Calculate output file size
            job_name = "get_size_%s_%s_%s" % (self.task_id,
                                              output_file.get_type(), count)
            file_size = self.storage_helper.get_file_size(
                output_file.get_path(), job_name=job_name)
            output_file.set_size(file_size)

            # Transfer to correct output directory
            job_name = "save_output_%s_%s_%s" % (self.task_id,
                                                 output_file.get_type(), count)
            curr_path = output_file.get_transferrable_path()
            self.storage_helper.mv(curr_path, dest_dir, job_name=job_name)

            # Update path of output file to reflect new location
            job_names.append(job_name)
            output_file.update_path(new_dir=dest_dir)
            count += 1

        # Wait for transfers to complete
        for job_name in job_names:
            self.processor.wait_process(job_name)

        # Wait for output files to finish transferring
        self.processor.wait()

    def save_logs(self):
        # Move log files to final output log directory
        log_files = os.path.join(self.workspace.get_wrk_log_dir(), "*")
        final_log_dir = self.workspace.get_final_log_dir()
        self.storage_helper.mv(log_files,
                               final_log_dir,
                               job_name="return_logs",
                               log=False,
                               wait=True)

    def __create_workspace(self):
        # Create all directories specified in task workspace

        logging.info("(%s) Creating workspace for task '%s'..." %
                     (self.processor.name, self.task_id))
        for dir_type, dir_obj in self.workspace.get_workspace().iteritems():
            self.storage_helper.mkdir(dir_obj,
                                      job_name="mkdir_%s" % dir_type,
                                      wait=True)

        # Set processor wrk, log directories
        self.processor.set_wrk_dir(self.workspace.get_wrk_dir())
        self.processor.set_log_dir(self.workspace.get_wrk_log_dir())

        # Give everyone all the permissions on working directory
        logging.info("(%s) Updating workspace permissions..." %
                     self.processor.name)
        self.__grant_workspace_perms(job_name="grant_initial_wrkspace_perms")

        # Wait for all the above commands to complete
        logging.info("(%s) Successfully created workspace for task '%s'!" %
                     (self.processor.name, self.task_id))

    def __grant_workspace_perms(self, job_name):
        cmd = "sudo chmod -R 777 %s" % self.workspace.get_wrk_dir()
        self.processor.run(job_name=job_name, cmd=cmd)
        self.processor.wait_process(job_name)

Example #3

Show file

File: GAPipeline.py Project: siyansusan/CloudConductor

class GAPipeline(object):

    def __init__(self, pipeline_id,
                 graph_config,
                 resource_kit_config,
                 sample_data_config,
                 platform_config,
                 platform_module,
                 final_output_dir):

        # GAP run id
        self.pipeline_id    = pipeline_id

        # Paths to config files
        self.__graph_config         = graph_config
        self.__res_kit_config       = resource_kit_config
        self.__sample_set_config    = sample_data_config
        self.__platform_config      = platform_config

        # Name of platform class where tasks will be executed
        self.__plat_module          = platform_module

        # Final output directory where output is saved
        self.__final_output_dir     = final_output_dir

        # Obtain pipeline name and append to final output dir

        self.graph          = None
        self.resource_kit   = None
        self.sample_data    = None
        self.platform       = None

        # Create datastore from pipeline components
        self.datastore      = None

        # Task scheduler for running jobs
        self.scheduler = None

        # Helper processor for handling platform operations
        self.helper_processor   = None
        self.storage_helper     = None
        self.docker_helper      = None

    def load(self):

        # Load resource kit
        self.resource_kit = ResourceKit(self.__res_kit_config)

        # Load the sample data
        self.sample_data = SampleSet(self.__sample_set_config)

        # Load the graph
        self.graph = Graph(self.__graph_config)

        # Load platform
        plat_module     = importlib.import_module(self.__plat_module)
        plat_class      = plat_module.__dict__[self.__plat_module]
        self.platform   = plat_class(self.pipeline_id, self.__platform_config, self.__final_output_dir)

        # Create datastore and scheduler
        self.datastore = Datastore(self.graph, self.resource_kit, self.sample_data, self.platform)
        self.scheduler = Scheduler(self.graph, self.datastore, self.platform)

    def validate(self):

        # Assume all validations are working
        has_errors = False

        # Validate the sample set
        sample_validator = SampleValidator(self.sample_data)
        has_errors = sample_validator.validate() or has_errors
        if not has_errors:
            logging.debug("Sample sheet validated!")

        # Validate the graph
        graph_validator = GraphValidator(self.graph, self.resource_kit, self.sample_data)
        has_errors = graph_validator.validate() or has_errors
        if not has_errors:
            logging.debug("Graph validated!")

        # Validate the platform
        self.platform.validate()

        # Stop the pipeline before launching if there are any errors
        if has_errors:
            raise SystemError("One or more errors have been encountered during validation. "
                              "See the above logs for more information")

        # Create helper processor and storage/docker helpers for checking input files
        self.helper_processor   = self.platform.get_helper_processor()
        self.helper_processor.create()

        self.storage_helper     = StorageHelper(self.helper_processor)
        self.docker_helper      = DockerHelper(self.helper_processor)

        # Validate all pipeline inputs can be found on platform
        input_validator = InputValidator(self.resource_kit, self.sample_data, self.storage_helper, self.docker_helper)
        has_errors = input_validator.validate() or has_errors

        # Stop the pipeline if there are any errors
        if has_errors:
            raise SystemError("One or more errors have been encountered during validation. "
                              "See the above logs for more information")

        # Validate that pipeline workspace can be created
        workspace = self.datastore.get_task_workspace()
        for dir_type, dir_path in workspace.get_workspace().items():
            self.storage_helper.mkdir(dir_path=str(dir_path), job_name="mkdir_%s" % dir_type, wait=True)
        logging.info("CloudCounductor run validated! Beginning pipeline execution.")

    def run(self, rm_tmp_output_on_success=True):
        # Run until all tasks are complete
        self.scheduler.run()

        # Remove temporary output on success
        if rm_tmp_output_on_success:
            workspace = self.datastore.get_task_workspace()
            try:
                self.storage_helper.rm(path=workspace.get_tmp_output_dir(), job_name="rm_tmp_output", wait=True)
            except BaseException as e:
                logging.error("Unable to remove tmp output directory: %s" % workspace.get_tmp_output_dir())
                if str(e) != "":
                    logging.error("Received the following err message:\n%s" % e)

    def save_progress(self):
        pass

    def publish_report(self, err=False, err_msg=None, git_version=None):
        # Create and publish GAP pipeline report
        try:
            report = self.__make_pipeline_report(err, err_msg, git_version)
            if self.platform is not None:
                self.platform.publish_report(report)
        except BaseException as e:
            logging.error("Unable to publish report!")
            if str(e) != "":
                logging.error("Received the following message:\n%s" % e)
            raise

    def clean_up(self):
        # Destroy the helper processor if it exists
        if self.helper_processor is not None:
            try:
                logging.debug("Destroying helper processor...")
                self.helper_processor.destroy(wait=False)
            except BaseException as e:
                logging.error("Unable to destroy helper processor '%s'!" % self.helper_processor.get_name())
                if str(e) != "":
                    logging.error("Received the follwoing err message:\n%s" % e)

        # Cleaning up the platform (let the platform decide what that means)
        if self.platform is not None:
            self.platform.clean_up()

    def __make_pipeline_report(self, err, err_msg, git_version):

        # Create a pipeline report that summarizes features of pipeline
        report = GAPReport(self.pipeline_id, err, err_msg, git_version)

        # Register helper runtime data
        if self.helper_processor is not None:
            report.set_start_time(self.helper_processor.get_start_time())
            report.set_total_runtime(self.helper_processor.get_runtime())
            report.register_task(task_name="Helper",
                                 start_time=self.helper_processor.get_start_time(),
                                 run_time=self.helper_processor.get_runtime(),
                                 cost=self.helper_processor.compute_cost())

        # Register runtime data for pipeline tasks
        if self.scheduler is not None:
            task_workers = self.scheduler.get_task_workers()
            for task_name, task_worker in task_workers.items():

                # Register data about task runtime
                task        = task_worker.get_task()
                run_time    = task_worker.get_runtime()
                cost        = task_worker.get_cost()
                start_time  = task_worker.get_start_time()
                cmd         = task_worker.get_cmd()
                task_data   = {"parent_task" : task_name.split(".")[0]}
                report.register_task(task_name=task_name,
                                     start_time=start_time,
                                     run_time=run_time,
                                     cost=cost,
                                     cmd=cmd,
                                     task_data=task_data)

                # Register data about task output files
                if task.is_complete():
                    output_files = self.datastore.get_task_output_files(task_id=task_name)
                    for output_file in output_files:
                        file_type       = output_file.get_type()
                        file_path       = output_file.get_path()
                        is_final_output = file_type in task.get_final_output_keys()
                        file_size       = output_file.get_size()
                        if is_final_output or err:
                            # Only declare output files if file is final output file
                            # OR file is temporary output file but pipeline failed
                            report.register_output_file(task_name, file_type, file_path, file_size, is_final_output)

        return report

Example #4

Show file

class ModuleExecutor(object):
    def __init__(self, task_id, processor, workspace, docker_image=None):
        self.task_id = task_id
        self.processor = processor
        self.workspace = workspace
        self.storage_helper = StorageHelper(self.processor)
        self.docker_helper = DockerHelper(self.processor)
        self.docker_image = docker_image

        # Create workspace directory structure
        self.__create_workspace()

    def load_input(self, inputs):

        # List of jobs that have been started in process of loading input
        job_names = []

        # Pull docker image if necessary
        if self.docker_image is not None:
            docker_image_name = self.docker_image.get_image_name().split(
                "/")[0]
            docker_image_name = docker_image_name.replace(":", "_")
            job_name = "docker_pull_%s" % docker_image_name
            self.docker_helper.pull(self.docker_image.get_image_name(),
                                    job_name=job_name)
            job_names.append(job_name)

        # Load input files
        # Inputs: list containing remote files, local files, and docker images
        src_seen = []
        dest_seen = []
        count = 1
        batch_size = 5
        loading_counter = 0
        for task_input in inputs:

            # Don't transfer local files
            if ":" not in task_input.get_path():
                continue

            # Directory where input will be transferred
            dest_dir = self.workspace.get_wrk_dir()

            # Input filename after transfer (None = same as src)
            dest_filename = None

            # Case: Transfer file into wrk directory if its not already there
            if task_input.get_transferrable_path() not in src_seen:

                # Get name of file that's going to be transferred
                src_path = task_input.get_transferrable_path()
                job_name = "load_input_%s_%s_%s" % (
                    self.task_id, task_input.get_type(), count)
                logging.debug("Input path: %s, transfer path: %s" %
                              (task_input.get_path(), src_path))

                # Generate complete transfer path
                dest_path = os.path.join(dest_dir, task_input.filename)

                # Check to see if transferring file would overwrite existing file
                if dest_path in dest_seen:
                    # Add unique tag to destination filename to prevent overwrite
                    if task_input.sample_name is not None:
                        dest_filename = "{0}_{1}".format(
                            task_input.sample_name, task_input.filename)
                    else:
                        dest_filename = "{0}_{1}".format(
                            Platform.generate_unique_id(), dest_filename)
                    logging.debug(
                        "Changing filename from '{0}' to '{1}'.".format(
                            task_input.filename, dest_filename))
                    dest_path = os.path.join(dest_dir, dest_filename)
                else:
                    dest_filename = None
                    dest_path = dest_dir

                # Show the final log file
                logging.debug("Destination: {0}".format(dest_path))

                # Move file to dest_path
                self.storage_helper.mv(src_path=src_path,
                                       dest_path=dest_path,
                                       job_name=job_name)
                loading_counter += 1

                # Add transfer path to list of remote paths that have been transferred to local workspace
                src_seen.append(src_path)
                count += 1
                job_names.append(job_name)

                # If loading_counter is batch_size, clear out queue
                if loading_counter >= batch_size:
                    logging.debug("Batch size reached on task {0}".format(
                        self.task_id))
                    # Wait for all processes to finish
                    while len(job_names):
                        self.processor.wait_process(job_names.pop())
                    loading_counter = 0

            # Update path after transferring to wrk directory and add to list of files in working directory
            task_input.update_path(new_dir=dest_dir,
                                   new_filename=dest_filename)
            dest_seen.append(task_input.get_path())
            logging.debug("Updated path: %s" % task_input.get_path())

        # Wait for all processes to finish
        for job_name in job_names:
            self.processor.wait_process(job_name)

        # Recursively give every permission to all files we just added
        logging.info("(%s) Final workspace perm. update for task '%s'..." %
                     (self.processor.name, self.task_id))
        self.__grant_workspace_perms(job_name="grant_final_wrkspace_perms")

    def run(self, cmd, job_name=None):

        # Check or create job name
        if job_name is None:
            job_name = self.task_id

        # Get name of docker image where command should be run (if any)
        docker_image_name = None if self.docker_image is None else self.docker_image.get_image_name(
        )

        # Begin running job and return stdout, stderr after job has finished running
        self.processor.run(job_name, cmd, docker_image=docker_image_name)
        return self.processor.wait_process(job_name)

    def save_output(self, outputs, final_output_types):
        # Return output files to workspace output dir

        # Get workspace places for output files
        final_output_dir = self.workspace.get_output_dir()
        tmp_output_dir = self.workspace.get_tmp_output_dir()
        count = 1
        job_names = []

        # List of output file paths. We create this list to ensure the files are not being overwritten
        output_filepaths = []

        for output_file in outputs:
            if output_file.get_type() in final_output_types:
                dest_dir = final_output_dir
            else:
                dest_dir = tmp_output_dir

            # Calculate output file size
            job_name = "get_size_%s_%s_%s" % (self.task_id,
                                              output_file.get_type(), count)
            file_size = self.storage_helper.get_file_size(
                output_file.get_path(), job_name=job_name)
            output_file.set_size(file_size)

            # Check if there already exists a file with the same name on the bucket
            destination_path = "{0}/{1}/".format(dest_dir.rstrip("/"),
                                                 output_file.get_filename())
            if destination_path in output_filepaths:

                # Change the destination directory for a new subdirectory
                dest_dir = "{0}/{1}/".format(dest_dir.rstrip("/"),
                                             len(output_filepaths))

                # Regenerate the destination path
                new_destination_path = "{0}/{1}".format(
                    dest_dir.rstrip("/"), output_file.get_filename())

                # Add the new path to the output file paths
                output_filepaths.append(new_destination_path)

            else:
                # Just add the new path to the list of output file paths
                output_filepaths.append(destination_path)

            # Transfer to correct output directory
            job_name = "save_output_%s_%s_%s" % (self.task_id,
                                                 output_file.get_type(), count)
            curr_path = output_file.get_transferrable_path()
            self.storage_helper.mv(curr_path, dest_dir, job_name=job_name)

            # Update path of output file to reflect new location
            job_names.append(job_name)
            output_file.update_path(new_dir=dest_dir)
            logging.debug(
                "(%s) Transferring file '%s' from old path '%s' to new path '%s' ('%s')"
                %
                (self.task_id, output_file.get_type(), curr_path,
                 output_file.get_path(), output_file.get_transferrable_path()))

            count += 1

        # Wait for transfers to complete
        for job_name in job_names:
            self.processor.wait_process(job_name)

        # Wait for output files to finish transferring
        self.processor.wait()

    def save_logs(self):
        # Move log files to final output log directory
        log_files = os.path.join(self.workspace.get_wrk_log_dir(), "*")
        final_log_dir = self.workspace.get_final_log_dir()
        self.storage_helper.mv(log_files,
                               final_log_dir,
                               job_name="return_logs",
                               log=False,
                               wait=True)

    def __create_workspace(self):
        # Create all directories specified in task workspace

        logging.info("(%s) Creating workspace for task '%s'..." %
                     (self.processor.name, self.task_id))
        for dir_type, dir_obj in self.workspace.get_workspace().items():
            self.storage_helper.mkdir(dir_obj,
                                      job_name="mkdir_%s" % dir_type,
                                      wait=True)

        # Set processor wrk, log directories
        self.processor.set_wrk_dir(self.workspace.get_wrk_dir())
        self.processor.set_wrk_out_dir(self.workspace.get_wrk_out_dir())
        self.processor.set_log_dir(self.workspace.get_wrk_log_dir())

        # Give everyone all the permissions on working directory
        logging.info("(%s) Updating workspace permissions..." %
                     self.processor.name)
        self.__grant_workspace_perms(job_name="grant_initial_wrkspace_perms")

        # Wait for all the above commands to complete
        logging.info("(%s) Successfully created workspace for task '%s'!" %
                     (self.processor.name, self.task_id))

    def __grant_workspace_perms(self, job_name):
        cmd = "sudo chmod -R 777 %s" % self.workspace.get_wrk_dir()
        self.processor.run(job_name=job_name, cmd=cmd)
        self.processor.wait_process(job_name)

Example #5

Show file

File: GAPipeline.py Project: alexwaldrop/GAP

class GAPipeline(object):

    def __init__(self, pipeline_id,
                 graph_config,
                 resource_kit_config,
                 sample_data_config,
                 platform_config,
                 platform_module,
                 final_output_dir):

        # GAP run id
        self.pipeline_id    = pipeline_id

        # Paths to config files
        self.__graph_config         = graph_config
        self.__res_kit_config       = resource_kit_config
        self.__sample_set_config    = sample_data_config
        self.__platform_config      = platform_config

        # Name of platform class where tasks will be executed
        self.__plat_module          = platform_module

        # Final output directory where output is saved
        self.__final_output_dir     = final_output_dir

        # Obtain pipeline name and append to final output dir

        self.graph          = None
        self.resource_kit   = None
        self.sample_data    = None
        self.platform       = None

        # Create datastore from pipeline components
        self.datastore      = None

        # Task scheduler for running jobs
        self.scheduler = None

        # Helper processor for handling platform operations
        self.helper_processor   = None
        self.storage_helper     = None
        self.docker_helper      = None

    def load(self):

        # Load resource kit
        self.resource_kit = ResourceKit(self.__res_kit_config)

        # Load the sample data
        self.sample_data = SampleSet(self.__sample_set_config)

        # Load the graph
        self.graph = Graph(self.__graph_config)

        # Load platform
        plat_module     = importlib.import_module(self.__plat_module)
        plat_class      = plat_module.__dict__[self.__plat_module]
        self.platform   = plat_class(self.pipeline_id, self.__platform_config, self.__final_output_dir)

        # Create datastore and scheduler
        self.datastore = Datastore(self.graph, self.resource_kit, self.sample_data, self.platform)
        self.scheduler = Scheduler(self.graph, self.datastore, self.platform)

    def validate(self):

        # Assume all validations are working
        has_errors = False

        # Validate the sample set
        sample_validator = SampleValidator(self.sample_data)
        has_errors = sample_validator.validate() or has_errors
        if not has_errors:
            logging.debug("Sample sheet validated!")

        # Validate the graph
        graph_validator = GraphValidator(self.graph, self.resource_kit, self.sample_data)
        has_errors = graph_validator.validate() or has_errors
        if not has_errors:
            logging.debug("Graph validated!")

        # Validate the platform
        self.platform.validate()

        # Stop the pipeline before launching if there are any errors
        if has_errors:
            raise SystemError("One or more errors have been encountered during validation. "
                              "See the above logs for more information")

        # Create helper processor and storage/docker helpers for checking input files
        self.helper_processor   = self.platform.get_helper_processor()
        self.helper_processor.create()

        self.storage_helper     = StorageHelper(self.helper_processor)
        self.docker_helper      = DockerHelper(self.helper_processor)

        # Validate all pipeline inputs can be found on platform
        input_validator = InputValidator(self.resource_kit, self.sample_data, self.storage_helper, self.docker_helper)
        has_errors = input_validator.validate() or has_errors

        # Stop the pipeline if there are any errors
        if has_errors:
            raise SystemError("One or more errors have been encountered during validation. "
                              "See the above logs for more information")

        # Validate that pipeline workspace can be created
        workspace = self.datastore.get_task_workspace()
        for dir_type, dir_path in workspace.get_workspace().iteritems():
            self.storage_helper.mkdir(dir_path=str(dir_path), job_name="mkdir_%s" % dir_type, wait=True)
        logging.info("GAP run validated! Beginning pipeline execution.")

    def run(self, rm_tmp_output_on_success=True):
        # Run until all tasks are complete
        self.scheduler.run()

        # Remove temporary output on success
        if rm_tmp_output_on_success:
            workspace = self.datastore.get_task_workspace()
            try:
                self.storage_helper.rm(path=workspace.get_tmp_output_dir(), job_name="rm_tmp_output", wait=True)
            except BaseException, e:
                logging.error("Unable to remove tmp output directory: %s" % workspace.get_tmp_output_dir())
                if e.message != "":
                    logging.error("Received the following err message:\n%s" % e.message)