def _upload_job_files_to_hdfs(self, where, job_dir, job, configs, proxy_configs=None): mains = job.mains or [] libs = job.libs or [] builtin_libs = edp.get_builtin_binaries(job, configs) uploaded_paths = [] hdfs_user = self.get_hdfs_user() lib_dir = job_dir + '/lib' with where.remote() as r: for m in mains: raw_data = d.get_raw_binary(m, proxy_configs) mfs.put_file_to_maprfs(r, raw_data, m.name, job_dir, hdfs_user) uploaded_paths.append(os.path.join(job_dir, m.name)) if len(libs) > 0: self.create_hdfs_dir(r, lib_dir) for l in libs: raw_data = d.get_raw_binary(l, proxy_configs) mfs.put_file_to_maprfs(r, raw_data, l.name, lib_dir, hdfs_user) uploaded_paths.append(os.path.join(lib_dir, l.name)) for lib in builtin_libs: mfs.put_file_to_maprfs(r, lib['raw'], lib['name'], lib_dir, hdfs_user) uploaded_paths.append(lib_dir + '/' + lib['name']) return uploaded_paths
def _upload_job_files_to_hdfs(self, where, job_dir, job, configs, proxy_configs=None): mains = job.mains or [] libs = job.libs or [] builtin_libs = edp.get_builtin_binaries(job, configs) uploaded_paths = [] hdfs_user = self.get_hdfs_user() job_dir_suffix = "lib" if job.type != edp.JOB_TYPE_SHELL else "" lib_dir = os.path.join(job_dir, job_dir_suffix) with remote.get_remote(where) as r: for main in mains: raw_data = dispatch.get_raw_binary(main, proxy_configs) h.put_file_to_hdfs(r, raw_data, main.name, job_dir, hdfs_user) uploaded_paths.append(job_dir + "/" + main.name) if len(libs) and job_dir_suffix: # HDFS 2.2.0 fails to put file if the lib dir does not exist self.create_hdfs_dir(r, lib_dir) for lib in libs: raw_data = dispatch.get_raw_binary(lib, proxy_configs) h.put_file_to_hdfs(r, raw_data, lib.name, lib_dir, hdfs_user) uploaded_paths.append(lib_dir + "/" + lib.name) for lib in builtin_libs: h.put_file_to_hdfs(r, lib["raw"], lib["name"], lib_dir, hdfs_user) uploaded_paths.append(lib_dir + "/" + lib["name"]) return uploaded_paths
def _upload_job_files_to_hdfs(self, where, job_dir, job, configs, proxy_configs=None): mains = job.mains or [] libs = job.libs or [] builtin_libs = edp.get_builtin_binaries(job, configs) uploaded_paths = [] hdfs_user = self.get_hdfs_user() lib_dir = job_dir + '/lib' with remote.get_remote(where) as r: for main in mains: raw_data = dispatch.get_raw_binary(main, proxy_configs) h.put_file_to_hdfs(r, raw_data, main.name, job_dir, hdfs_user) uploaded_paths.append(job_dir + '/' + main.name) if len(libs) > 0: # HDFS 2.2.0 fails to put file if the lib dir does not exist self.create_hdfs_dir(r, lib_dir) for lib in libs: raw_data = dispatch.get_raw_binary(lib, proxy_configs) h.put_file_to_hdfs(r, raw_data, lib.name, lib_dir, hdfs_user) uploaded_paths.append(lib_dir + '/' + lib.name) for lib in builtin_libs: h.put_file_to_hdfs(r, lib['raw'], lib['name'], lib_dir, hdfs_user) uploaded_paths.append(lib_dir + '/' + lib['name']) return uploaded_paths
def _upload_job_files_to_hdfs(self, where, job_dir, job, configs, proxy_configs=None): mains = list(job.mains) if job.mains else [] libs = list(job.libs) if job.libs else [] builtin_libs = edp.get_builtin_binaries(job, configs) uploaded_paths = [] hdfs_user = self.get_hdfs_user() job_dir_suffix = 'lib' if job.type != edp.JOB_TYPE_SHELL else '' lib_dir = os.path.join(job_dir, job_dir_suffix) with remote.get_remote(where) as r: job_binaries = mains + libs self._prepare_job_binaries(job_binaries, r) # upload mains uploaded_paths.extend(self._upload_job_binaries(r, mains, proxy_configs, hdfs_user, job_dir)) # upload libs if len(libs) and job_dir_suffix: # HDFS 2.2.0 fails to put file if the lib dir does not exist self.create_hdfs_dir(r, lib_dir) uploaded_paths.extend(self._upload_job_binaries(r, libs, proxy_configs, hdfs_user, lib_dir)) # upload buitin_libs for lib in builtin_libs: h.put_file_to_hdfs(r, lib['raw'], lib['name'], lib_dir, hdfs_user) uploaded_paths.append(lib_dir + lib['name']) return uploaded_paths
def test_get_builtin_binaries_java_available(self): job = mock.Mock(type=edp.JOB_TYPE_JAVA) configs = {edp.ADAPT_FOR_OOZIE: True} binaries = edp.get_builtin_binaries(job, configs) self.assertEqual(1, len(binaries)) binary = binaries[0] self.assertTrue(binary['name'].startswith('builtin-')) self.assertTrue(binary['name'].endswith('.jar')) self.assertIsNotNone(binary['raw'])
def _upload_job_files_to_hdfs(self, where, job_dir, job, configs, proxy_configs=None): mains = job.mains or [] libs = job.libs or [] builtin_libs = edp.get_builtin_binaries(job, configs) uploaded_paths = [] hdfs_user = self.get_hdfs_user() job_dir_suffix = 'lib' if job.type != edp.JOB_TYPE_SHELL else '' lib_dir = os.path.join(job_dir, job_dir_suffix) with remote.get_remote(where) as r: for main in mains: raw_data = dispatch.get_raw_binary(main, proxy_configs=proxy_configs, remote=r) if isinstance(raw_data, dict) and raw_data["type"] == "path": h.copy_from_local(r, raw_data['path'], job_dir, hdfs_user) else: h.put_file_to_hdfs(r, raw_data, main.name, job_dir, hdfs_user) uploaded_paths.append(job_dir + '/' + main.name) if len(libs) and job_dir_suffix: # HDFS 2.2.0 fails to put file if the lib dir does not exist self.create_hdfs_dir(r, lib_dir) for lib in libs: raw_data = dispatch.get_raw_binary(lib, proxy_configs=proxy_configs, remote=remote) if isinstance(raw_data, dict) and raw_data["type"] == "path": h.copy_from_local(r, raw_data['path'], lib_dir, hdfs_user) else: h.put_file_to_hdfs(r, raw_data, lib.name, lib_dir, hdfs_user) uploaded_paths.append(lib_dir + '/' + lib.name) for lib in builtin_libs: h.put_file_to_hdfs(r, lib['raw'], lib['name'], lib_dir, hdfs_user) uploaded_paths.append(lib_dir + '/' + lib['name']) return uploaded_paths
def _upload_job_files_to_hdfs(self, where, job_dir, job, configs, proxy_configs=None): mains = job.mains or [] libs = job.libs or [] builtin_libs = edp.get_builtin_binaries(job, configs) uploaded_paths = [] hdfs_user = self.get_hdfs_user() job_dir_suffix = 'lib' if job.type != edp.JOB_TYPE_SHELL else '' lib_dir = os.path.join(job_dir, job_dir_suffix) with remote.get_remote(where) as r: for main in mains: raw_data = dispatch.get_raw_binary( main, proxy_configs=proxy_configs, remote=r) if isinstance(raw_data, dict) and raw_data["type"] == "path": h.copy_from_local(r, raw_data['path'], job_dir, hdfs_user) else: h.put_file_to_hdfs(r, raw_data, main.name, job_dir, hdfs_user) uploaded_paths.append(job_dir + '/' + main.name) if len(libs) and job_dir_suffix: # HDFS 2.2.0 fails to put file if the lib dir does not exist self.create_hdfs_dir(r, lib_dir) for lib in libs: raw_data = dispatch.get_raw_binary( lib, proxy_configs=proxy_configs, remote=remote) if isinstance(raw_data, dict) and raw_data["type"] == "path": h.copy_from_local(r, raw_data['path'], lib_dir, hdfs_user) else: h.put_file_to_hdfs(r, raw_data, lib.name, lib_dir, hdfs_user) uploaded_paths.append(lib_dir + '/' + lib.name) for lib in builtin_libs: h.put_file_to_hdfs(r, lib['raw'], lib['name'], lib_dir, hdfs_user) uploaded_paths.append(lib_dir + '/' + lib['name']) return uploaded_paths
def _upload_job_files_to_hdfs(self, where, job_dir, job, configs, proxy_configs=None): mains = job.mains or [] libs = job.libs or [] builtin_libs = edp.get_builtin_binaries(job, configs) uploaded_paths = [] hdfs_user = self.get_hdfs_user() lib_dir = job_dir + '/lib' with where.remote() as r: for m in mains: path = jb_manager.JOB_BINARIES. \ get_job_binary_by_url(m.url). \ copy_binary_to_cluster(m, proxy_configs=proxy_configs, remote=r, context=context.ctx()) target = os.path.join(job_dir, m.name) mfs.copy_from_local(r, path, target, hdfs_user) uploaded_paths.append(target) if len(libs) > 0: self.create_hdfs_dir(r, lib_dir) for l in libs: path = jb_manager.JOB_BINARIES. \ get_job_binary_by_url(l.url). \ copy_binary_to_cluster(l, proxy_configs=proxy_configs, remote=r, context=context.ctx()) target = os.path.join(lib_dir, l.name) mfs.copy_from_local(r, path, target, hdfs_user) uploaded_paths.append(target) for lib in builtin_libs: mfs.put_file_to_maprfs(r, lib['raw'], lib['name'], lib_dir, hdfs_user) uploaded_paths.append(lib_dir + '/' + lib['name']) return uploaded_paths
def test_get_builtin_binaries_empty(self): for job_type in edp.JOB_TYPES_ALL: job = mock.Mock(type=job_type) self.assertEqual(0, len(edp.get_builtin_binaries(job, {})))
def get_builtin_binaries(job, configs, **kwargs): return edp.get_builtin_binaries(job, configs)