Beispiel #1
0
    def _upload_job_files_to_hdfs(self, where, job_dir, job, configs, proxy_configs=None):
        mains = job.mains or []
        libs = job.libs or []
        builtin_libs = edp.get_builtin_binaries(job, configs)
        uploaded_paths = []
        hdfs_user = self.get_hdfs_user()
        job_dir_suffix = "lib" if job.type != edp.JOB_TYPE_SHELL else ""
        lib_dir = os.path.join(job_dir, job_dir_suffix)

        with remote.get_remote(where) as r:
            for main in mains:
                raw_data = dispatch.get_raw_binary(main, proxy_configs=proxy_configs, remote=r)
                if isinstance(raw_data, dict) and raw_data["type"] == "path":
                    h.copy_from_local(r, raw_data["path"], job_dir, hdfs_user)
                else:
                    h.put_file_to_hdfs(r, raw_data, main.name, job_dir, hdfs_user)
                uploaded_paths.append(job_dir + "/" + main.name)
            if len(libs) and job_dir_suffix:
                # HDFS 2.2.0 fails to put file if the lib dir does not exist
                self.create_hdfs_dir(r, lib_dir)
            for lib in libs:
                raw_data = dispatch.get_raw_binary(lib, proxy_configs=proxy_configs, remote=remote)
                if isinstance(raw_data, dict) and raw_data["type"] == "path":
                    h.copy_from_local(r, raw_data["path"], lib_dir, hdfs_user)
                else:
                    h.put_file_to_hdfs(r, raw_data, lib.name, lib_dir, hdfs_user)
                uploaded_paths.append(lib_dir + "/" + lib.name)
            for lib in builtin_libs:
                h.put_file_to_hdfs(r, lib["raw"], lib["name"], lib_dir, hdfs_user)
                uploaded_paths.append(lib_dir + "/" + lib["name"])
        return uploaded_paths
Beispiel #2
0
    def _upload_job_binaries(self, r, job_binaries, proxy_configs,
                             hdfs_user, job_dir):
        uploaded_paths = []
        for jb in job_binaries:
            path = jb_manager.JOB_BINARIES. \
                get_job_binary_by_url(jb.url). \
                copy_binary_to_cluster(jb, proxy_configs=proxy_configs,
                                       remote=r, context=context.ctx())

            h.copy_from_local(r, path, job_dir, hdfs_user)
            uploaded_paths.append(path)
        return uploaded_paths
Beispiel #3
0
    def _upload_job_files_to_hdfs(self,
                                  where,
                                  job_dir,
                                  job,
                                  configs,
                                  proxy_configs=None):
        mains = job.mains or []
        libs = job.libs or []
        builtin_libs = edp.get_builtin_binaries(job, configs)
        uploaded_paths = []
        hdfs_user = self.get_hdfs_user()
        job_dir_suffix = 'lib' if job.type != edp.JOB_TYPE_SHELL else ''
        lib_dir = os.path.join(job_dir, job_dir_suffix)

        with remote.get_remote(where) as r:
            for main in mains:
                raw_data = dispatch.get_raw_binary(main,
                                                   proxy_configs=proxy_configs,
                                                   remote=r)
                if isinstance(raw_data, dict) and raw_data["type"] == "path":
                    h.copy_from_local(r, raw_data['path'], job_dir, hdfs_user)
                else:
                    h.put_file_to_hdfs(r, raw_data, main.name, job_dir,
                                       hdfs_user)
                uploaded_paths.append(job_dir + '/' + main.name)
            if len(libs) and job_dir_suffix:
                # HDFS 2.2.0 fails to put file if the lib dir does not exist
                self.create_hdfs_dir(r, lib_dir)
            for lib in libs:
                raw_data = dispatch.get_raw_binary(lib,
                                                   proxy_configs=proxy_configs,
                                                   remote=remote)
                if isinstance(raw_data, dict) and raw_data["type"] == "path":
                    h.copy_from_local(r, raw_data['path'], lib_dir, hdfs_user)
                else:
                    h.put_file_to_hdfs(r, raw_data, lib.name, lib_dir,
                                       hdfs_user)
                uploaded_paths.append(lib_dir + '/' + lib.name)
            for lib in builtin_libs:
                h.put_file_to_hdfs(r, lib['raw'], lib['name'], lib_dir,
                                   hdfs_user)
                uploaded_paths.append(lib_dir + '/' + lib['name'])
        return uploaded_paths
 def test_copy_from_local(self):
     helper.copy_from_local(self.cluster, 'Galaxy', 'Earth', 'BigBang')
     self.cluster.execute_command.assert_called_once_with(
         'sudo su - -c "hadoop dfs -copyFromLocal Galaxy Earth" BigBang')
Beispiel #5
0
 def test_copy_from_local(self):
     helper.copy_from_local(self.cluster, 'Galaxy', 'Earth', 'BigBang')
     self.cluster.execute_command.assert_called_once_with(
         'sudo su - -c "hadoop dfs -copyFromLocal Galaxy Earth" BigBang')