コード例 #1
0
ファイル: pulsar.py プロジェクト: AAFC-BICoE/galaxy-1
 def __init__(self, pulsar_client, job_wrapper, remote_job_config):
     self.pulsar_client = pulsar_client
     self.job_wrapper = job_wrapper
     print "From runner/pulsar, job_wrapper: "
     print self.job_wrapper
     print " All done"
     self.local_path_config = job_wrapper.default_compute_environment()
     self.unstructured_path_rewrites = {}
     # job_wrapper.prepare is going to expunge the job backing the following
     # computations, so precalculate these paths.
     self._wrapper_input_paths = self.local_path_config.input_paths()
     self._wrapper_output_paths = self.local_path_config.output_paths()
     self.path_mapper = PathMapper(
         pulsar_client, remote_job_config,
         self.local_path_config.working_directory())
     self._config_directory = remote_job_config["configs_directory"]
     self._working_directory = remote_job_config["working_directory"]
     self._sep = remote_job_config["system_properties"]["separator"]
     self._tool_dir = remote_job_config["tools_directory"]
     version_path = self.local_path_config.version_path()
     new_version_path = self.path_mapper.remote_version_path_rewrite(
         version_path)
     if new_version_path:
         version_path = new_version_path
     self._version_path = version_path
コード例 #2
0
ファイル: path_mapper_test.py プロジェクト: nuwang/pulsar
 def _path_mapper(self, expected_path, expected_type, staging_needed=True):
     action_mapper = TestActionMapper(expected_path, expected_type,
                                      staging_needed)
     path_mapper = PathMapper(
         client=None,
         remote_job_config=self.__test_remote_config(),
         local_working_directory=self.temp_directory,
         action_mapper=action_mapper,
     )
     return path_mapper
コード例 #3
0
ファイル: pulsar.py プロジェクト: agrauslys/galaxy
 def __init__( self, pulsar_client, job_wrapper, remote_job_config ):
     self.pulsar_client = pulsar_client
     self.job_wrapper = job_wrapper
     self.local_path_config = job_wrapper.default_compute_environment()
     self.unstructured_path_rewrites = {}
     # job_wrapper.prepare is going to expunge the job backing the following
     # computations, so precalculate these paths.
     self._wrapper_input_paths = self.local_path_config.input_paths()
     self._wrapper_output_paths = self.local_path_config.output_paths()
     self.path_mapper = PathMapper(pulsar_client, remote_job_config, self.local_path_config.working_directory())
     self._config_directory = remote_job_config[ "configs_directory" ]
     self._working_directory = remote_job_config[ "working_directory" ]
     self._sep = remote_job_config[ "system_properties" ][ "separator" ]
     self._tool_dir = remote_job_config[ "tools_directory" ]
     version_path = self.local_path_config.version_path()
     new_version_path = self.path_mapper.remote_version_path_rewrite(version_path)
     if new_version_path:
         version_path = new_version_path
     self._version_path = version_path
コード例 #4
0
class PulsarComputeEnvironment(ComputeEnvironment):
    def __init__(self, pulsar_client, job_wrapper, remote_job_config):
        self.pulsar_client = pulsar_client
        self.job_wrapper = job_wrapper
        self.local_path_config = job_wrapper.default_compute_environment()
        self.unstructured_path_rewrites = {}
        # job_wrapper.prepare is going to expunge the job backing the following
        # computations, so precalculate these paths.
        self._wrapper_input_paths = self.local_path_config.input_paths()
        self._wrapper_output_paths = self.local_path_config.output_paths()
        self.path_mapper = PathMapper(
            pulsar_client, remote_job_config,
            self.local_path_config.working_directory())
        self._config_directory = remote_job_config["configs_directory"]
        self._working_directory = remote_job_config["working_directory"]
        self._sep = remote_job_config["system_properties"]["separator"]
        self._tool_dir = remote_job_config["tools_directory"]
        version_path = self.local_path_config.version_path()
        new_version_path = self.path_mapper.remote_version_path_rewrite(
            version_path)
        if new_version_path:
            version_path = new_version_path
        self._version_path = version_path

    def output_paths(self):
        local_output_paths = self._wrapper_output_paths

        results = []
        for local_output_path in local_output_paths:
            wrapper_path = str(local_output_path)
            remote_path = self.path_mapper.remote_output_path_rewrite(
                wrapper_path)
            results.append(self._dataset_path(local_output_path, remote_path))
        return results

    def input_paths(self):
        local_input_paths = self._wrapper_input_paths

        results = []
        for local_input_path in local_input_paths:
            wrapper_path = str(local_input_path)
            # This will over-copy in some cases. For instance in the case of task
            # splitting, this input will be copied even though only the work dir
            # input will actually be used.
            remote_path = self.path_mapper.remote_input_path_rewrite(
                wrapper_path)
            results.append(self._dataset_path(local_input_path, remote_path))
        return results

    def _dataset_path(self, local_dataset_path, remote_path):
        remote_extra_files_path = None
        if remote_path:
            remote_extra_files_path = "%s_files" % remote_path[0:-len(".dat")]
        return local_dataset_path.with_path_for_job(remote_path,
                                                    remote_extra_files_path)

    def working_directory(self):
        return self._working_directory

    def config_directory(self):
        return self._config_directory

    def new_file_path(self):
        return self.working_directory()  # Problems with doing this?

    def sep(self):
        return self._sep

    def version_path(self):
        return self._version_path

    def rewriter(self, parameter_value):
        unstructured_path_rewrites = self.unstructured_path_rewrites
        if parameter_value in unstructured_path_rewrites:
            # Path previously mapped, use previous mapping.
            return unstructured_path_rewrites[parameter_value]
        if parameter_value in unstructured_path_rewrites.itervalues():
            # Path is a rewritten remote path (this might never occur,
            # consider dropping check...)
            return parameter_value

        rewrite, new_unstructured_path_rewrites = self.path_mapper.check_for_arbitrary_rewrite(
            parameter_value)
        if rewrite:
            unstructured_path_rewrites.update(new_unstructured_path_rewrites)
            return rewrite
        else:
            # Did need to rewrite, use original path or value.
            return parameter_value

    def unstructured_path_rewriter(self):
        return self.rewriter

    def tool_directory(self):
        return self._tool_dir
コード例 #5
0
ファイル: pulsar.py プロジェクト: agrauslys/galaxy
class PulsarComputeEnvironment( ComputeEnvironment ):

    def __init__( self, pulsar_client, job_wrapper, remote_job_config ):
        self.pulsar_client = pulsar_client
        self.job_wrapper = job_wrapper
        self.local_path_config = job_wrapper.default_compute_environment()
        self.unstructured_path_rewrites = {}
        # job_wrapper.prepare is going to expunge the job backing the following
        # computations, so precalculate these paths.
        self._wrapper_input_paths = self.local_path_config.input_paths()
        self._wrapper_output_paths = self.local_path_config.output_paths()
        self.path_mapper = PathMapper(pulsar_client, remote_job_config, self.local_path_config.working_directory())
        self._config_directory = remote_job_config[ "configs_directory" ]
        self._working_directory = remote_job_config[ "working_directory" ]
        self._sep = remote_job_config[ "system_properties" ][ "separator" ]
        self._tool_dir = remote_job_config[ "tools_directory" ]
        version_path = self.local_path_config.version_path()
        new_version_path = self.path_mapper.remote_version_path_rewrite(version_path)
        if new_version_path:
            version_path = new_version_path
        self._version_path = version_path

    def output_paths( self ):
        local_output_paths = self._wrapper_output_paths

        results = []
        for local_output_path in local_output_paths:
            wrapper_path = str( local_output_path )
            remote_path = self.path_mapper.remote_output_path_rewrite( wrapper_path )
            results.append( self._dataset_path( local_output_path, remote_path ) )
        return results

    def input_paths( self ):
        local_input_paths = self._wrapper_input_paths

        results = []
        for local_input_path in local_input_paths:
            wrapper_path = str( local_input_path )
            # This will over-copy in some cases. For instance in the case of task
            # splitting, this input will be copied even though only the work dir
            # input will actually be used.
            remote_path = self.path_mapper.remote_input_path_rewrite( wrapper_path )
            results.append( self._dataset_path( local_input_path, remote_path ) )
        return results

    def _dataset_path( self, local_dataset_path, remote_path ):
        remote_extra_files_path = None
        if remote_path:
            remote_extra_files_path = "%s_files" % remote_path[ 0:-len( ".dat" ) ]
        return local_dataset_path.with_path_for_job( remote_path, remote_extra_files_path )

    def working_directory( self ):
        return self._working_directory

    def config_directory( self ):
        return self._config_directory

    def new_file_path( self ):
        return self.working_directory()  # Problems with doing this?

    def sep( self ):
        return self._sep

    def version_path( self ):
        return self._version_path

    def rewriter( self, parameter_value ):
        unstructured_path_rewrites = self.unstructured_path_rewrites
        if parameter_value in unstructured_path_rewrites:
            # Path previously mapped, use previous mapping.
            return unstructured_path_rewrites[ parameter_value ]
        if parameter_value in unstructured_path_rewrites.itervalues():
            # Path is a rewritten remote path (this might never occur,
            # consider dropping check...)
            return parameter_value

        rewrite, new_unstructured_path_rewrites = self.path_mapper.check_for_arbitrary_rewrite( parameter_value )
        if rewrite:
            unstructured_path_rewrites.update(new_unstructured_path_rewrites)
            return rewrite
        else:
            # Did need to rewrite, use original path or value.
            return parameter_value

    def unstructured_path_rewriter( self ):
        return self.rewriter

    def tool_directory( self ):
        return self._tool_dir
コード例 #6
0
class PulsarComputeEnvironment(ComputeEnvironment):

    def __init__(self, pulsar_client, job_wrapper, remote_job_config):
        self.pulsar_client = pulsar_client
        self.job_wrapper = job_wrapper
        self.local_path_config = job_wrapper.default_compute_environment()

        self.path_rewrites_unstructured = {}
        self.path_rewrites_input_extra = {}
        self.path_rewrites_input_metadata = {}

        # job_wrapper.prepare is going to expunge the job backing the following
        # computations, so precalculate these paths.
        self.path_mapper = PathMapper(pulsar_client, remote_job_config, self.local_path_config.working_directory())
        self._config_directory = remote_job_config["configs_directory"]
        self._working_directory = remote_job_config["working_directory"]
        self._sep = remote_job_config["system_properties"]["separator"]
        self._tool_dir = remote_job_config["tools_directory"]
        version_path = self.local_path_config.version_path()
        new_version_path = self.path_mapper.remote_version_path_rewrite(version_path)
        if new_version_path:
            version_path = new_version_path
        self._version_path = version_path

    def output_names(self):
        # Maybe this should use the path mapper, but the path mapper just uses basenames
        return self.job_wrapper.get_output_basenames()

    def input_path_rewrite(self, dataset):
        local_input_path_rewrite = self.local_path_config.input_path_rewrite(dataset)
        if local_input_path_rewrite is not None:
            local_input_path = local_input_path_rewrite
        else:
            local_input_path = dataset.file_name
        remote_path = self.path_mapper.remote_input_path_rewrite(local_input_path)
        return remote_path

    def output_path_rewrite(self, dataset):
        local_output_path_rewrite = self.local_path_config.output_path_rewrite(dataset)
        if local_output_path_rewrite is not None:
            local_output_path = local_output_path_rewrite
        else:
            local_output_path = dataset.file_name
        remote_path = self.path_mapper.remote_output_path_rewrite(local_output_path)
        return remote_path

    def input_extra_files_rewrite(self, dataset):
        input_path_rewrite = self.input_path_rewrite(dataset)
        base_input_path = input_path_rewrite[0:-len(".dat")]
        remote_extra_files_path_rewrite = "%s_files" % base_input_path
        self.path_rewrites_input_extra[dataset.extra_files_path] = remote_extra_files_path_rewrite
        return remote_extra_files_path_rewrite

    def output_extra_files_rewrite(self, dataset):
        output_path_rewrite = self.output_path_rewrite(dataset)
        base_output_path = output_path_rewrite[0:-len(".dat")]
        remote_extra_files_path_rewrite = "%s_files" % base_output_path
        return remote_extra_files_path_rewrite

    def input_metadata_rewrite(self, dataset, metadata_val):
        # May technically be incorrect to not pass through local_path_config.input_metadata_rewrite
        # first but that adds untested logic that wouln't ever be used.
        remote_input_path = self.path_mapper.remote_input_path_rewrite(metadata_val, client_input_path_type=CLIENT_INPUT_PATH_TYPES.INPUT_METADATA_PATH)
        if remote_input_path:
            log.info("input_metadata_rewrite is %s from %s" % (remote_input_path, metadata_val))
            self.path_rewrites_input_metadata[metadata_val] = remote_input_path
            return remote_input_path

        # No rewrite...
        return None

    def unstructured_path_rewrite(self, parameter_value):
        path_rewrites_unstructured = self.path_rewrites_unstructured
        if parameter_value in path_rewrites_unstructured:
            # Path previously mapped, use previous mapping.
            return path_rewrites_unstructured[parameter_value]

        rewrite, new_unstructured_path_rewrites = self.path_mapper.check_for_arbitrary_rewrite(parameter_value)
        if rewrite:
            path_rewrites_unstructured.update(new_unstructured_path_rewrites)
            return rewrite
        else:
            # Did not need to rewrite, use original path or value.
            return None

    def working_directory(self):
        return self._working_directory

    def config_directory(self):
        return self._config_directory

    def new_file_path(self):
        return self.working_directory()  # Problems with doing this?

    def sep(self):
        return self._sep

    def version_path(self):
        return self._version_path

    def tool_directory(self):
        return self._tool_dir

    def home_directory(self):
        # TODO: revisit and implement this, won't break anything working in the
        # meantime.
        return None

    def tmp_directory(self):
        # TODO: revisit and implement this, won't break anything working in the
        # meantime.
        return None

    def galaxy_url(self):
        return self.job_wrapper.get_destination_configuration("galaxy_infrastructure_url")