コード例 #1
0
    def _upload_job_files_to_hdfs(self,
                                  where,
                                  job_dir,
                                  job,
                                  configs,
                                  proxy_configs=None):
        mains = job.mains or []
        libs = job.libs or []
        builtin_libs = edp.get_builtin_binaries(job, configs)
        uploaded_paths = []
        hdfs_user = self.get_hdfs_user()
        lib_dir = job_dir + '/lib'

        with where.remote() as r:
            for m in mains:
                raw_data = d.get_raw_binary(m, proxy_configs)
                mfs.put_file_to_maprfs(r, raw_data, m.name, job_dir, hdfs_user)
                uploaded_paths.append(os.path.join(job_dir, m.name))
            if len(libs) > 0:
                self.create_hdfs_dir(r, lib_dir)
            for l in libs:
                raw_data = d.get_raw_binary(l, proxy_configs)
                mfs.put_file_to_maprfs(r, raw_data, l.name, lib_dir, hdfs_user)
                uploaded_paths.append(os.path.join(lib_dir, l.name))
            for lib in builtin_libs:
                mfs.put_file_to_maprfs(r, lib['raw'], lib['name'], lib_dir,
                                       hdfs_user)
                uploaded_paths.append(lib_dir + '/' + lib['name'])
        return uploaded_paths
コード例 #2
0
ファイル: engine.py プロジェクト: snowind/sahara
    def _upload_job_files_to_hdfs(self, where, job_dir, job, configs, proxy_configs=None):
        mains = job.mains or []
        libs = job.libs or []
        builtin_libs = edp.get_builtin_binaries(job, configs)
        uploaded_paths = []
        hdfs_user = self.get_hdfs_user()
        job_dir_suffix = "lib" if job.type != edp.JOB_TYPE_SHELL else ""
        lib_dir = os.path.join(job_dir, job_dir_suffix)

        with remote.get_remote(where) as r:
            for main in mains:
                raw_data = dispatch.get_raw_binary(main, proxy_configs)
                h.put_file_to_hdfs(r, raw_data, main.name, job_dir, hdfs_user)
                uploaded_paths.append(job_dir + "/" + main.name)
            if len(libs) and job_dir_suffix:
                # HDFS 2.2.0 fails to put file if the lib dir does not exist
                self.create_hdfs_dir(r, lib_dir)
            for lib in libs:
                raw_data = dispatch.get_raw_binary(lib, proxy_configs)
                h.put_file_to_hdfs(r, raw_data, lib.name, lib_dir, hdfs_user)
                uploaded_paths.append(lib_dir + "/" + lib.name)
            for lib in builtin_libs:
                h.put_file_to_hdfs(r, lib["raw"], lib["name"], lib_dir, hdfs_user)
                uploaded_paths.append(lib_dir + "/" + lib["name"])
        return uploaded_paths
コード例 #3
0
    def _upload_job_files_to_hdfs(self,
                                  where,
                                  job_dir,
                                  job,
                                  configs,
                                  proxy_configs=None):
        mains = job.mains or []
        libs = job.libs or []
        builtin_libs = edp.get_builtin_binaries(job, configs)
        uploaded_paths = []
        hdfs_user = self.get_hdfs_user()
        lib_dir = job_dir + '/lib'

        with remote.get_remote(where) as r:
            for main in mains:
                raw_data = dispatch.get_raw_binary(main, proxy_configs)
                h.put_file_to_hdfs(r, raw_data, main.name, job_dir, hdfs_user)
                uploaded_paths.append(job_dir + '/' + main.name)
            if len(libs) > 0:
                # HDFS 2.2.0 fails to put file if the lib dir does not exist
                self.create_hdfs_dir(r, lib_dir)
            for lib in libs:
                raw_data = dispatch.get_raw_binary(lib, proxy_configs)
                h.put_file_to_hdfs(r, raw_data, lib.name, lib_dir, hdfs_user)
                uploaded_paths.append(lib_dir + '/' + lib.name)
            for lib in builtin_libs:
                h.put_file_to_hdfs(r, lib['raw'], lib['name'], lib_dir,
                                   hdfs_user)
                uploaded_paths.append(lib_dir + '/' + lib['name'])
        return uploaded_paths
コード例 #4
0
    def _upload_job_files_to_hdfs(self, where, job_dir, job, configs,
                                  proxy_configs=None):
        mains = job.mains or []
        libs = job.libs or []
        builtin_libs = edp.get_builtin_binaries(job, configs)
        uploaded_paths = []
        hdfs_user = self.get_hdfs_user()
        lib_dir = job_dir + '/lib'

        with where.remote() as r:
            for m in mains:
                raw_data = d.get_raw_binary(m, proxy_configs)
                mfs.put_file_to_maprfs(r, raw_data, m.name, job_dir, hdfs_user)
                uploaded_paths.append(os.path.join(job_dir, m.name))
            if len(libs) > 0:
                self.create_hdfs_dir(r, lib_dir)
            for l in libs:
                raw_data = d.get_raw_binary(l, proxy_configs)
                mfs.put_file_to_maprfs(r, raw_data, l.name, lib_dir,
                                       hdfs_user)
                uploaded_paths.append(os.path.join(lib_dir, l.name))
            for lib in builtin_libs:
                mfs.put_file_to_maprfs(r, lib['raw'], lib['name'], lib_dir,
                                       hdfs_user)
                uploaded_paths.append(lib_dir + '/' + lib['name'])
        return uploaded_paths
コード例 #5
0
ファイル: engine.py プロジェクト: madar010/mad
    def _upload_job_files_to_hdfs(self, where, job_dir, job, configs,
                                  proxy_configs=None):

        mains = list(job.mains) if job.mains else []
        libs = list(job.libs) if job.libs else []
        builtin_libs = edp.get_builtin_binaries(job, configs)
        uploaded_paths = []
        hdfs_user = self.get_hdfs_user()
        job_dir_suffix = 'lib' if job.type != edp.JOB_TYPE_SHELL else ''
        lib_dir = os.path.join(job_dir, job_dir_suffix)

        with remote.get_remote(where) as r:
            job_binaries = mains + libs
            self._prepare_job_binaries(job_binaries, r)

            # upload mains
            uploaded_paths.extend(self._upload_job_binaries(r, mains,
                                                            proxy_configs,
                                                            hdfs_user,
                                                            job_dir))
            # upload libs
            if len(libs) and job_dir_suffix:
                # HDFS 2.2.0 fails to put file if the lib dir does not exist
                self.create_hdfs_dir(r, lib_dir)
            uploaded_paths.extend(self._upload_job_binaries(r, libs,
                                                            proxy_configs,
                                                            hdfs_user,
                                                            lib_dir))
            # upload buitin_libs
            for lib in builtin_libs:
                h.put_file_to_hdfs(r, lib['raw'], lib['name'], lib_dir,
                                   hdfs_user)
                uploaded_paths.append(lib_dir + lib['name'])
        return uploaded_paths
コード例 #6
0
 def test_get_builtin_binaries_java_available(self):
     job = mock.Mock(type=edp.JOB_TYPE_JAVA)
     configs = {edp.ADAPT_FOR_OOZIE: True}
     binaries = edp.get_builtin_binaries(job, configs)
     self.assertEqual(1, len(binaries))
     binary = binaries[0]
     self.assertTrue(binary['name'].startswith('builtin-'))
     self.assertTrue(binary['name'].endswith('.jar'))
     self.assertIsNotNone(binary['raw'])
コード例 #7
0
ファイル: test_edp.py プロジェクト: AlexanderYAPPO/sahara
 def test_get_builtin_binaries_java_available(self):
     job = mock.Mock(type=edp.JOB_TYPE_JAVA)
     configs = {edp.ADAPT_FOR_OOZIE: True}
     binaries = edp.get_builtin_binaries(job, configs)
     self.assertEqual(1, len(binaries))
     binary = binaries[0]
     self.assertTrue(binary['name'].startswith('builtin-'))
     self.assertTrue(binary['name'].endswith('.jar'))
     self.assertIsNotNone(binary['raw'])
コード例 #8
0
ファイル: engine.py プロジェクト: msionkin/sahara
    def _upload_job_files_to_hdfs(self,
                                  where,
                                  job_dir,
                                  job,
                                  configs,
                                  proxy_configs=None):
        mains = job.mains or []
        libs = job.libs or []
        builtin_libs = edp.get_builtin_binaries(job, configs)
        uploaded_paths = []
        hdfs_user = self.get_hdfs_user()
        job_dir_suffix = 'lib' if job.type != edp.JOB_TYPE_SHELL else ''
        lib_dir = os.path.join(job_dir, job_dir_suffix)

        with remote.get_remote(where) as r:
            for main in mains:
                raw_data = dispatch.get_raw_binary(main,
                                                   proxy_configs=proxy_configs,
                                                   remote=r)
                if isinstance(raw_data, dict) and raw_data["type"] == "path":
                    h.copy_from_local(r, raw_data['path'], job_dir, hdfs_user)
                else:
                    h.put_file_to_hdfs(r, raw_data, main.name, job_dir,
                                       hdfs_user)
                uploaded_paths.append(job_dir + '/' + main.name)
            if len(libs) and job_dir_suffix:
                # HDFS 2.2.0 fails to put file if the lib dir does not exist
                self.create_hdfs_dir(r, lib_dir)
            for lib in libs:
                raw_data = dispatch.get_raw_binary(lib,
                                                   proxy_configs=proxy_configs,
                                                   remote=remote)
                if isinstance(raw_data, dict) and raw_data["type"] == "path":
                    h.copy_from_local(r, raw_data['path'], lib_dir, hdfs_user)
                else:
                    h.put_file_to_hdfs(r, raw_data, lib.name, lib_dir,
                                       hdfs_user)
                uploaded_paths.append(lib_dir + '/' + lib.name)
            for lib in builtin_libs:
                h.put_file_to_hdfs(r, lib['raw'], lib['name'], lib_dir,
                                   hdfs_user)
                uploaded_paths.append(lib_dir + '/' + lib['name'])
        return uploaded_paths
コード例 #9
0
ファイル: engine.py プロジェクト: rogeryu27/sahara
    def _upload_job_files_to_hdfs(self, where, job_dir, job, configs,
                                  proxy_configs=None):
        mains = job.mains or []
        libs = job.libs or []
        builtin_libs = edp.get_builtin_binaries(job, configs)
        uploaded_paths = []
        hdfs_user = self.get_hdfs_user()
        job_dir_suffix = 'lib' if job.type != edp.JOB_TYPE_SHELL else ''
        lib_dir = os.path.join(job_dir, job_dir_suffix)

        with remote.get_remote(where) as r:
            for main in mains:
                raw_data = dispatch.get_raw_binary(
                    main, proxy_configs=proxy_configs, remote=r)
                if isinstance(raw_data, dict) and raw_data["type"] == "path":
                    h.copy_from_local(r, raw_data['path'],
                                      job_dir, hdfs_user)
                else:
                    h.put_file_to_hdfs(r, raw_data, main.name,
                                       job_dir, hdfs_user)
                uploaded_paths.append(job_dir + '/' + main.name)
            if len(libs) and job_dir_suffix:
                # HDFS 2.2.0 fails to put file if the lib dir does not exist
                self.create_hdfs_dir(r, lib_dir)
            for lib in libs:
                raw_data = dispatch.get_raw_binary(
                    lib, proxy_configs=proxy_configs, remote=remote)
                if isinstance(raw_data, dict) and raw_data["type"] == "path":
                    h.copy_from_local(r, raw_data['path'],
                                      lib_dir, hdfs_user)
                else:
                    h.put_file_to_hdfs(r, raw_data, lib.name,
                                       lib_dir, hdfs_user)
                uploaded_paths.append(lib_dir + '/' + lib.name)
            for lib in builtin_libs:
                h.put_file_to_hdfs(r, lib['raw'], lib['name'], lib_dir,
                                   hdfs_user)
                uploaded_paths.append(lib_dir + '/' + lib['name'])
        return uploaded_paths
コード例 #10
0
ファイル: base_edp_engine.py プロジェクト: madar010/mad
    def _upload_job_files_to_hdfs(self,
                                  where,
                                  job_dir,
                                  job,
                                  configs,
                                  proxy_configs=None):
        mains = job.mains or []
        libs = job.libs or []
        builtin_libs = edp.get_builtin_binaries(job, configs)
        uploaded_paths = []
        hdfs_user = self.get_hdfs_user()
        lib_dir = job_dir + '/lib'

        with where.remote() as r:
            for m in mains:
                path = jb_manager.JOB_BINARIES. \
                    get_job_binary_by_url(m.url). \
                    copy_binary_to_cluster(m, proxy_configs=proxy_configs,
                                           remote=r, context=context.ctx())
                target = os.path.join(job_dir, m.name)
                mfs.copy_from_local(r, path, target, hdfs_user)
                uploaded_paths.append(target)
            if len(libs) > 0:
                self.create_hdfs_dir(r, lib_dir)
            for l in libs:
                path = jb_manager.JOB_BINARIES. \
                    get_job_binary_by_url(l.url). \
                    copy_binary_to_cluster(l, proxy_configs=proxy_configs,
                                           remote=r, context=context.ctx())
                target = os.path.join(lib_dir, l.name)
                mfs.copy_from_local(r, path, target, hdfs_user)
                uploaded_paths.append(target)
            for lib in builtin_libs:
                mfs.put_file_to_maprfs(r, lib['raw'], lib['name'], lib_dir,
                                       hdfs_user)
                uploaded_paths.append(lib_dir + '/' + lib['name'])
        return uploaded_paths
コード例 #11
0
 def test_get_builtin_binaries_empty(self):
     for job_type in edp.JOB_TYPES_ALL:
         job = mock.Mock(type=job_type)
         self.assertEqual(0, len(edp.get_builtin_binaries(job, {})))
コード例 #12
0
ファイル: edp.py プロジェクト: openstack/sahara
def get_builtin_binaries(job, configs, **kwargs):
    return edp.get_builtin_binaries(job, configs)
コード例 #13
0
def get_builtin_binaries(job, configs, **kwargs):
    return edp.get_builtin_binaries(job, configs)
コード例 #14
0
ファイル: test_edp.py プロジェクト: AlexanderYAPPO/sahara
 def test_get_builtin_binaries_empty(self):
     for job_type in edp.JOB_TYPES_ALL:
         job = mock.Mock(type=job_type)
         self.assertEqual(0, len(edp.get_builtin_binaries(job, {})))