def _post_configuration(pctx, instance): dirs = _get_hadoop_dirs(instance) args = { 'hadoop_user': HADOOP_USER, 'hadoop_group': HADOOP_GROUP, 'hadoop_conf_dir': HADOOP_CONF_DIR, 'oozie_conf_dir': OOZIE_CONF_DIR, 'hadoop_name_dirs': " ".join(dirs['hadoop_name_dirs']), 'hadoop_data_dirs': " ".join(dirs['hadoop_data_dirs']), 'hadoop_log_dir': dirs['hadoop_log_dir'], 'hadoop_secure_dn_log_dir': dirs['hadoop_secure_dn_log_dir'], 'yarn_log_dir': dirs['yarn_log_dir'] } post_conf_script = utils.get_file_text( 'plugins/vanilla/hadoop2/resources/post_conf.template', 'sahara_plugin_vanilla') post_conf_script = post_conf_script.format(**args) with instance.remote() as r: r.write_file_to('/tmp/post_conf.sh', post_conf_script) r.execute_command('chmod +x /tmp/post_conf.sh') r.execute_command('sudo /tmp/post_conf.sh') if config_helper.is_data_locality_enabled(pctx, instance.cluster): t_script = HADOOP_CONF_DIR + '/topology.sh' r.write_file_to( t_script, utils.get_file_text( 'plugins/vanilla/hadoop2/resources/topology.sh', 'sahara_plugin_vanilla'), run_as_root=True) r.execute_command('chmod +x ' + t_script, run_as_root=True)
def get_config_files(self, cluster_context, configs, instance=None): template = 'plugins/mapr/services/hue/resources/hue_%s.template' # hue.ini hue_ini = bcf.TemplateFile("hue.ini") hue_ini.remote_path = self.conf_dir(cluster_context) hue_ini.parse( utils.get_file_text(template % self.version, 'sahara_plugins')) hue_ini.add_properties(self._get_hue_ini_props(cluster_context)) hue_ini.add_property("thrift_version", configs[self.THRIFT_VERSION.name]) # # hue.sh hue_sh_template = 'plugins/mapr/services/hue/' \ 'resources/hue_sh_%s.template' hue_sh = bcf.TemplateFile("hue.sh") hue_sh.remote_path = self.home_dir(cluster_context) + '/bin' hue_sh.parse( utils.get_file_text(hue_sh_template % self.version, 'sahara_plugins')) hue_sh.add_property('hadoop_version', cluster_context.hadoop_version) hue_sh.mode = 777 hue_instances = cluster_context.get_instances(HUE) for instance in hue_instances: if instance not in cluster_context.changed_instances(): cluster_context.should_be_restarted[self] += [instance] return [hue_ini, hue_sh]
def generate_job_cleanup_config(cluster): spark_config = { 'minimum_cleanup_megabytes': utils.get_config_value_or_default("Spark", "Minimum cleanup megabytes", cluster), 'minimum_cleanup_seconds': utils.get_config_value_or_default("Spark", "Minimum cleanup seconds", cluster), 'maximum_cleanup_seconds': utils.get_config_value_or_default("Spark", "Maximum cleanup seconds", cluster) } job_conf = { 'valid': (_convert_config_to_int(spark_config['maximum_cleanup_seconds']) > 0 and _convert_config_to_int(spark_config['minimum_cleanup_megabytes']) > 0 and _convert_config_to_int(spark_config['minimum_cleanup_seconds']) > 0) } if job_conf['valid']: job_conf['cron'] = utils.get_file_text( 'plugins/spark/resources/spark-cleanup.cron', 'sahara_plugin_spark'), job_cleanup_script = utils.get_file_text( 'plugins/spark/resources/tmp-cleanup.sh.template', 'sahara_plugin_spark') job_conf['script'] = job_cleanup_script.format(**spark_config) return job_conf
def get_config_files(self, cluster_context, configs, instance=None): livy_conf_template = 'plugins/mapr/services/hue/' \ 'resources/livy_conf_%s.template' livy_conf = bcf.TemplateFile("livy.conf") livy_conf.parse( utils.get_file_text(livy_conf_template % self.version, 'sahara_plugins')) livy_conf.remote_path = self.home_dir(cluster_context) + '/conf' livy_sh_template = 'plugins/mapr/services/hue/' \ 'resources/livy_sh_%s.template' livy_sh = bcf.TemplateFile("livy-env.sh") livy_sh.remote_path = self.home_dir(cluster_context) + '/conf' livy_sh.parse( utils.get_file_text(livy_sh_template % self.version, 'sahara_plugins')) livy_sh.add_property('hadoop_version', cluster_context.hadoop_version) livy_sh.add_property('spark_version', spark.SparkOnYarnV201().version) livy_sh.mode = 777 hue_instances = cluster_context.get_instances(HUE) for instance in hue_instances: if instance not in cluster_context.changed_instances(): cluster_context.should_be_restarted[self] += [instance] return [livy_sh, livy_conf]
def generate_job_cleanup_config(cluster): args = { 'minimum_cleanup_megabytes': utils.get_config_value_or_default("Spark", "Minimum cleanup megabytes", cluster), 'minimum_cleanup_seconds': utils.get_config_value_or_default("Spark", "Minimum cleanup seconds", cluster), 'maximum_cleanup_seconds': utils.get_config_value_or_default("Spark", "Maximum cleanup seconds", cluster) } job_conf = { 'valid': (args['maximum_cleanup_seconds'] > 0 and (args['minimum_cleanup_megabytes'] > 0 and args['minimum_cleanup_seconds'] > 0)) } if job_conf['valid']: job_conf['cron'] = utils.get_file_text( 'plugins/vanilla/hadoop2/resources/spark-cleanup.cron', 'sahara_plugin_vanilla'), job_cleanup_script = utils.get_file_text( 'plugins/vanilla/hadoop2/resources/tmp-cleanup.sh.template', 'sahara_plugin_vanilla') job_conf['script'] = job_cleanup_script.format(**args) return job_conf
def get_config_files(self, cluster_context, configs, instance=None): defaults = 'plugins/mapr/services/impala/resources/impala-env.sh.j2' impala_env = bcf.TemplateFile("env.sh") impala_env.remote_path = self.conf_dir(cluster_context) if instance: impala_env.fetch(instance) impala_env.parse(utils.get_file_text(defaults, 'sahara_plugin_mapr')) impala_env.add_properties(self._get_impala_env_props(cluster_context)) sentry_host = cluster_context.get_instance(sentry.SENTRY) if sentry_host: sentry_mode = cluster_context._get_cluster_config_value( sentry.Sentry().SENTRY_STORAGE_MODE) ui_name = sentry.Sentry().ui_name sentry_version = cluster_context.get_chosen_service_version( ui_name) sentry_service = cluster_context. \ _find_service_instance(ui_name, sentry_version) if sentry_service.supports(self, sentry_mode): impala_env.add_properties({ 'sentry_home': sentry_service.home_dir(cluster_context), 'sentry_db': sentry_mode == sentry.DB_STORAGE_SENTRY_MODE, 'sentry_policy_file': 'maprfs://' + sentry_service.GLOBAL_POLICY_FILE, }) return [impala_env]
def get_config_files(self, cluster_context, configs, instance=None): hbase_version = self._get_hbase_version(cluster_context) hive_version = self._get_hive_version(cluster_context) # spark-env-sh template = 'plugins/mapr/services/' \ 'spark/resources/spark-env.template' env_sh = bcf.TemplateFile('spark-env.sh') env_sh.remote_path = self.conf_dir(cluster_context) env_sh.parse(utils.get_file_text(template, 'sahara_plugin_mapr')) env_sh.add_property('version', self.version) # spark-defaults conf = bcf.PropertiesFile('spark-defaults.conf', separator=' ') conf.remote_path = self.conf_dir(cluster_context) if instance: conf.fetch(instance) # compatibility.version versions = bcf.PropertiesFile('compatibility.version') versions.remote_path = self.home_dir(cluster_context) + '/mapr-util' if instance: versions.fetch(instance) if hive_version: versions.add_property('hive_versions', hive_version + '.0') conf.add_properties(self._hive_properties(cluster_context)) if hbase_version: versions.add_property('hbase_versions', hbase_version) conf.add_property( 'spark.executor.extraClassPath', '%s/lib/*' % self._hbase(cluster_context).home_dir(cluster_context)) return [conf, versions, env_sh]
def test_configure_sentry(self, keymanager, cluster_get, cluster_update, uuid4, cfg_log): cluster = get_concrete_cluster() manager = cluster.node_groups[0].instances[0] cluster_get.return_value = cluster db_password = '******' uuid4.return_value = db_password create_db_script = utils.get_file_text( 'plugins/cdh/db_resources/create_sentry_db.sql' .format(version=self.version), 'sahara_plugins') create_db_script = create_db_script % db_password self.plug_utils.configure_sentry(cluster) with manager.remote() as r: cmd_exe_sql = ('PGPASSWORD=$(sudo head -1' ' /var/lib/cloudera-scm-server-db/data/' 'generated_password.txt) psql' ' -U cloudera-scm -h localhost -p 7432 -d scm -f' ' script_to_exec.sql') cmd_clean = 'rm script_to_exec.sql' self.assertEqual(create_db_script, r.write_file_to.call_args[0][1]) r.execute_command.assert_has_calls([mock.call(cmd_exe_sql), mock.call(cmd_clean)])
def test_start_hiveserver_process( self, add_provisioning_step, check_cluster_exists, set_current_instance_id, get_oozie, _hive_create_warehouse_dir, _hive_copy_shared_conf, _start_mysql, _hive_create_db, _hive_metastore_start, is_mysql_enabled, get_hive_password): pctx = mock.Mock() path = edp.get_hive_shared_conf_path('hadoop') is_mysql_enabled.return_value = True cluster = self.instance.cluster self.instance.cluster.hadoop_version = '2.7.1' ng_cluster = self.instance.node_group.cluster get_oozie.return_value = None sql_script = pu.get_file_text( 'plugins/vanilla/v2_7_1/resources/create_hive_db.sql', 'sahara_plugin_vanilla') get_hive_password.return_value = '123' pwd_script = sql_script.replace('{{password}}', '123') rs.start_hiveserver_process(pctx, self.instance) set_current_instance_id.assert_called_once_with( self.instance.instance_id) _hive_create_warehouse_dir.assert_called_once_with(self.r) _hive_copy_shared_conf.assert_called_once_with(self.r, path) is_mysql_enabled.assert_called_once_with(pctx, cluster) get_oozie.assert_called_once_with(ng_cluster) _start_mysql.assert_called_once_with(self.r) get_hive_password.assert_called_once_with(cluster) self.r.write_file_to.assert_called_once_with('/tmp/create_hive_db.sql', pwd_script) _hive_create_db.assert_called_once_with(self.r) _hive_metastore_start.assert_called_once_with(self.r)
def _create_script_obj(filename, template, **kwargs): script = cf.TemplateFile(filename) script.remote_path = '/tmp/' script.parse(u.get_file_text( 'plugins/mapr/services/mysql/resources/%s' % template, 'sahara_plugin_mapr')) for k, v in kwargs.items(): script.add_property(k, v) return script
def run_script(instance, script, run_as=None, *args, **kwargs): with instance.remote() as r: path = '/tmp/%s.sh' % uuidutils.generate_uuid() script = utils.get_file_text(script, 'sahara_plugin_mapr') % kwargs r.write_file_to(path, script, run_as_root=(run_as == 'root')) r.execute_command(_run_as(run_as, 'chmod +x %s' % path)) r.execute_command(_run_as(run_as, '%s %s' % (path, ' '.join(args))), timeout=3600) # FIXME(aosadchyi): reuse existing remote remove(instance, path, run_as=run_as)
def get_config_files(self, cluster_context, configs, instance=None): sentry_default = \ 'plugins/mapr/services/sentry/resources/sentry-default.xml' global_policy_template = \ 'plugins/mapr/services/sentry/resources/global-policy.ini' sentry_site = cf.HadoopXML('sentry-site.xml') sentry_site.remote_path = self.conf_dir(cluster_context) if instance: sentry_site.fetch(instance) sentry_site.load_properties(configs) sentry_mode = configs[self.SENTRY_STORAGE_MODE.name] sentry_site.parse( utils.get_file_text(sentry_default, 'sahara_plugin_mapr')) sentry_site.add_properties( self._get_sentry_site_props(cluster_context, sentry_mode)) global_policy = cf.TemplateFile('global-policy.ini') global_policy.remote_path = self.conf_dir(cluster_context) global_policy.parse( utils.get_file_text(global_policy_template, 'sahara_plugin_mapr')) return [sentry_site, global_policy]
def _install_swift_jar(self, cluster_context, instances): LOG.debug('Installing Swift jar') jar = u.get_file_text(Swift.HADOOP_SWIFT_JAR, 'sahara_plugins') path = '%s/swift.jar' % cluster_context.hadoop_lib @el.provision_event() def install_on_instance(inst): with inst.remote() as r: r.write_file_to(path, jar, run_as_root=True) for instance in instances: install_on_instance(instance)
def generate_zk_basic_config(cluster): args = { 'ticktime': utils.get_config_value_or_default("ZooKeeper", "tickTime", cluster), 'initlimit': utils.get_config_value_or_default("ZooKeeper", "initLimit", cluster), 'synclimit': utils.get_config_value_or_default("ZooKeeper", "syncLimit", cluster) } zoo_cfg = utils.get_file_text( 'plugins/vanilla/hadoop2/resources/zoo_sample.cfg', 'sahara_plugins') return zoo_cfg.format(**args)
def test_get_ng_plugin_configs(self): actual_configs = c_h._get_ng_plugin_configs() expected_configs = [] for json_file in json_files: expected_configs += json.loads( utils.get_file_text(path_to_config + json_file, 'sahara_plugin_cdh')) # compare names expected_names = set(i['name'] for i in expected_configs) actual_names = set(i.to_dict()['name'] for i in actual_configs) self.assertEqual(expected_names, actual_names)
def test_datanodes_status(self, nn): report = utils.get_file_text( 'tests/unit/plugins/vanilla/hadoop2/resources/dfs-report.txt', 'sahara_plugins') nn.return_value = self._get_instance(report) statuses = u.get_datanodes_status(None) expected = { 'cluster-worker-001.novalocal': 'normal', 'cluster-worker-002.novalocal': 'normal', 'cluster-worker-003.novalocal': 'normal', 'cluster-worker-004.novalocal': 'decommissioned' } self.assertEqual(expected, statuses)
def test_nodemanagers_status(self, rm): report = utils.get_file_text( 'tests/unit/plugins/vanilla/hadoop2/resources/yarn-report.txt', 'sahara_plugins') rm.return_value = self._get_instance(report) statuses = u.get_nodemanagers_status(None) expected = { 'cluster-worker-001.novalocal': 'running', 'cluster-worker-002.novalocal': 'running', 'cluster-worker-003.novalocal': 'running', 'cluster-worker-004.novalocal': 'decommissioned' } self.assertEqual(expected, statuses)
def from_yaml(cls, yaml_path, validator_map=None, resource_roots=None, package='sahara'): """Constructs and returns a validator from the provided yaml file. :param yaml_path: The relative path to a yaml file. :param validator_map: A map of validator name to class. :param resource_roots: The roots from which relative paths to resources (scripts and such) will be referenced. Any resource will be pulled from the first path in the list at which a file exists. :return: A SaharaImageValidator built to the yaml specification. """ validator_map = validator_map or {} resource_roots = resource_roots or [] file_text = utils.get_file_text(yaml_path, package) spec = yaml.safe_load(file_text) validator_map = cls.get_validator_map(validator_map) return cls.from_spec(spec, validator_map, resource_roots, package)
def get_config_files(self, cluster_context, configs, instance=None): default_path = 'plugins/mapr/services/maprfs/resources/cldb.conf' cldb_conf = bcf.PropertiesFile("cldb.conf") cldb_conf.remote_path = self.conf_dir(cluster_context) if instance: cldb_conf.fetch(instance) cldb_conf.parse(utils.get_file_text(default_path, 'sahara_plugins')) cldb_conf.add_properties(self._get_cldb_conf_props(cluster_context)) warden_conf = bcf.PropertiesFile("warden.conf") warden_conf.remote_path = "/opt/mapr/conf/" if instance: warden_conf.fetch(instance) warden_conf.add_properties( {'service.command.mfs.heapsize.percent': configs[ self.HEAP_SIZE_PERCENT_NAME]}) return [cldb_conf, warden_conf]
def _configure_topology(self, cluster_context, instances): LOG.debug("Configuring cluster topology") topology_map = cluster_context.topology_map topology_map = ("%s %s" % item for item in topology_map.items()) topology_map = "\n".join(topology_map) + "\n" data_path = "%s/topology.data" % cluster_context.mapr_home script = utils.get_file_text(_TOPO_SCRIPT, 'sahara_plugin_mapr') script_path = '%s/topology.sh' % cluster_context.mapr_home @el.provision_event() def write_topology_data(instance): util.write_file(instance, data_path, topology_map, owner="root") util.write_file(instance, script_path, script, mode="+x", owner="root") util.execute_on_instances(instances, write_topology_data) LOG.info('Cluster topology successfully configured')
def from_yaml(cls, yaml_path, validator_map=None, resource_roots=None, package='sahara'): """Constructs and returns a validator from the provided yaml file. :param yaml_path: The relative path to a yaml file. :param validator_map: A map of validator name to class. :param resource_roots: The roots from which relative paths to resources (scripts and such) will be referenced. Any resource will be pulled from the first path in the list at which a file exists. :return: A SaharaImageValidator built to the yaml specification. """ validator_map = validator_map or {} resource_roots = resource_roots or [] file_text = utils.get_file_text(yaml_path, package) spec = yaml.safe_load(file_text) validator_map = cls.get_validator_map(validator_map) return cls.from_spec(spec, validator_map, resource_roots, package)
def start_oozie_process(pctx, instance): with context.set_current_instance_id(instance.instance_id): with instance.remote() as r: if config_helper.is_mysql_enabled(pctx, instance.cluster): _start_mysql(r) LOG.debug("Creating Oozie DB Schema") sql_script = utils.get_file_text( 'plugins/vanilla/hadoop2/resources/create_oozie_db.sql', 'sahara_plugin_vanilla') password = oozie_helper.get_oozie_mysql_configs( instance.cluster)['oozie.service.JPAService.jdbc.password'] sql_script = sql_script.replace("password", password) script_location = "create_oozie_db.sql" r.write_file_to(script_location, sql_script) r.execute_command('mysql -u root < %(script_location)s && ' 'rm %(script_location)s' % {"script_location": script_location}) _oozie_share_lib(r) _start_oozie(r)
def load_configs(version): if OBJ_CONFIGS.get(version): return OBJ_CONFIGS[version] cfg_path = "plugins/ambari/resources/configs-%s.json" % version vanilla_cfg = jsonutils.loads( utils.get_file_text(cfg_path, 'sahara_plugin_ambari')) CONFIGS[version] = vanilla_cfg sahara_cfg = [ hdp_repo_cfg, hdp_utils_repo_cfg, use_base_repos_cfg, autoconfigs_strategy, ambari_pkg_install_timeout ] for service, confs in vanilla_cfg.items(): for k, v in confs.items(): sahara_cfg.append( provisioning.Config(k, _get_service_name(service), _get_param_scope(k), default_value=v)) sahara_cfg.extend(_get_ha_params()) OBJ_CONFIGS[version] = sahara_cfg return sahara_cfg
def start_hiveserver_process(pctx, instance): with context.set_current_instance_id(instance.instance_id): with instance.remote() as r: _hive_create_warehouse_dir(r) _hive_copy_shared_conf(r, edp.get_hive_shared_conf_path('hadoop')) if config_helper.is_mysql_enabled(pctx, instance.cluster): oozie = vu.get_oozie(instance.node_group.cluster) if not oozie or instance.hostname() != oozie.hostname(): _start_mysql(r) version = instance.cluster.hadoop_version sql_script = utils.get_file_text( 'plugins/vanilla/v{}/resources/create_hive_db.sql'.format( version.replace('.', '_')), 'sahara_plugin_vanilla') sql_script = sql_script.replace( '{{password}}', u.get_hive_password(instance.cluster)) r.write_file_to('/tmp/create_hive_db.sql', sql_script) _hive_create_db(r) _hive_metastore_start(r) LOG.info("Hive Metastore server at {host} has been " "started".format(host=instance.hostname()))
def test_start_oozie_process(self, add_provisioning_step, check_cluster_exists, set_current_instance_id, get_oozie_password, is_mysql_enabled, _start_mysql, _oozie_share_lib, _start_oozie): self.instance.instance_id = '112233' pctx = mock.Mock() is_mysql_enabled.return_value = True sql_script = pu.get_file_text( 'plugins/vanilla/hadoop2/resources/create_oozie_db.sql', 'sahara_plugin_vanilla') get_oozie_password.return_value = '123' pwd_script = sql_script.replace('password', '123') rs.start_oozie_process(pctx, self.instance) set_current_instance_id.assert_called_once_with('112233') is_mysql_enabled.assert_called_once_with(pctx, self.instance.cluster) _start_mysql.assert_called_once_with(self.r) self.r.write_file_to.assert_called_once_with('create_oozie_db.sql', pwd_script) self.r.execute_command.assert_called_once_with( 'mysql -u root < create_oozie_db.sql && ' 'rm create_oozie_db.sql') _oozie_share_lib.assert_called_once_with(self.r) _start_oozie.assert_called_once_with(self.r)
class ConfigHelperV5110(c_h.ConfigHelper): path_to_config = 'plugins/cdh/v5_11_0/resources/' CDH5_UBUNTU_REPO = ('deb [arch=amd64] http://archive.cloudera.com/cdh5' '/ubuntu/xenial/amd64/cdh trusty-cdh5.11.0 contrib' '\ndeb-src http://archive.cloudera.com/cdh5/ubuntu' '/xenial/amd64/cdh trusty-cdh5.11.0 contrib') DEFAULT_CDH5_UBUNTU_REPO_KEY_URL = ( 'http://archive.cloudera.com/cdh5/ubuntu' '/xenial/amd64/cdh/archive.key') CM5_UBUNTU_REPO = ('deb [arch=amd64] http://archive.cloudera.com/cm5' '/ubuntu/xenial/amd64/cm trusty-cm5.11.0 contrib' '\ndeb-src http://archive.cloudera.com/cm5/ubuntu' '/xenial/amd64/cm trusty-cm5.11.0 contrib') DEFAULT_CM5_UBUNTU_REPO_KEY_URL = ('http://archive.cloudera.com/cm5/ubuntu' '/xenial/amd64/cm/archive.key') CDH5_CENTOS_REPO = ('[cloudera-cdh5]' '\nname=Cloudera\'s Distribution for Hadoop, Version 5' '\nbaseurl=http://archive.cloudera.com/cdh5/redhat/6' '/x86_64/cdh/5.11.0/' '\ngpgkey = http://archive.cloudera.com/cdh5/redhat/6' '/x86_64/cdh/RPM-GPG-KEY-cloudera' '\ngpgcheck = 1') CM5_CENTOS_REPO = ('[cloudera-manager]' '\nname=Cloudera Manager' '\nbaseurl=http://archive.cloudera.com/cm5/redhat/6' '/x86_64/cm/5.11.0/' '\ngpgkey = http://archive.cloudera.com/cm5/redhat/6' '/x86_64/cm/RPM-GPG-KEY-cloudera' '\ngpgcheck = 1') KEY_TRUSTEE_UBUNTU_REPO_URL = ('http://archive.cloudera.com/navigator-' 'keytrustee5/ubuntu/xenial/amd64/navigator-' 'keytrustee/cloudera.list') DEFAULT_KEY_TRUSTEE_UBUNTU_REPO_KEY_URL = ( 'http://archive.cloudera.com/navigator-' 'keytrustee5/ubuntu/xenial/amd64/navigator-' 'keytrustee/archive.key') KEY_TRUSTEE_CENTOS_REPO_URL = ('http://archive.cloudera.com/navigator-' 'keytrustee5/redhat/6/x86_64/navigator-' 'keytrustee/navigator-keytrustee5.repo') DEFAULT_SWIFT_LIB_URL = ( 'https://repository.cloudera.com/artifactory/repo/org' '/apache/hadoop/hadoop-openstack/2.6.0-cdh5.11.0' '/hadoop-openstack-2.6.0-cdh5.11.0.jar') SWIFT_LIB_URL = p.Config( 'Hadoop OpenStack library URL', 'general', 'cluster', priority=1, default_value=DEFAULT_SWIFT_LIB_URL, description=("Library that adds Swift support to CDH. The file" " will be downloaded by VMs.")) HIVE_SERVER2_SENTRY_SAFETY_VALVE = utils.get_file_text( path_to_config + 'hive-server2-sentry-safety.xml', 'sahara_plugin_cdh') HIVE_METASTORE_SENTRY_SAFETY_VALVE = utils.get_file_text( path_to_config + 'hive-metastore-sentry-safety.xml', 'sahara_plugin_cdh') SENTRY_IMPALA_CLIENT_SAFETY_VALVE = utils.get_file_text( path_to_config + 'sentry-impala-client-safety.xml', 'sahara_plugin_cdh') def __init__(self): super(ConfigHelperV5110, self).__init__() self.priority_one_confs = self._load_json(self.path_to_config + 'priority-one-confs.json') self._init_all_ng_plugin_configs()
def _push_configs_to_new_node(self, cluster, extra, instance): files_hadoop = { os.path.join(c_helper.HADOOP_CONF_DIR, "core-site.xml"): extra['xml']['core-site'], os.path.join(c_helper.HADOOP_CONF_DIR, "hdfs-site.xml"): extra['xml']['hdfs-site'], } sp_home = self._spark_home(cluster) files_spark = { os.path.join(sp_home, 'conf/spark-env.sh'): extra['sp_master'], os.path.join(sp_home, 'conf/slaves'): extra['sp_slaves'], os.path.join(sp_home, 'conf/spark-defaults.conf'): extra['sp_defaults'] } files_init = { '/tmp/sahara-hadoop-init.sh': extra['setup_script'], 'id_rsa': cluster.management_private_key, 'authorized_keys': cluster.management_public_key } # pietro: This is required because the (secret) key is not stored in # .ssh which hinders password-less ssh required by spark scripts key_cmd = ('sudo cp $HOME/id_rsa $HOME/.ssh/; ' 'sudo chown $USER $HOME/.ssh/id_rsa; ' 'sudo chmod 600 $HOME/.ssh/id_rsa') storage_paths = instance.storage_paths() dn_path = ' '.join(c_helper.make_hadoop_path(storage_paths, '/dfs/dn')) nn_path = ' '.join(c_helper.make_hadoop_path(storage_paths, '/dfs/nn')) hdfs_dir_cmd = ('sudo mkdir -p %(nn_path)s %(dn_path)s &&' 'sudo chown -R hdfs:hadoop %(nn_path)s %(dn_path)s &&' 'sudo chmod 755 %(nn_path)s %(dn_path)s' % { "nn_path": nn_path, "dn_path": dn_path }) with utils.get_remote(instance) as r: r.execute_command('sudo chown -R $USER:$USER /etc/hadoop') r.execute_command('sudo chown -R $USER:$USER %s' % sp_home) r.write_files_to(files_hadoop) r.write_files_to(files_spark) r.write_files_to(files_init) r.execute_command('sudo chmod 0500 /tmp/sahara-hadoop-init.sh') r.execute_command('sudo /tmp/sahara-hadoop-init.sh ' '>> /tmp/sahara-hadoop-init.log 2>&1') r.execute_command(hdfs_dir_cmd) r.execute_command(key_cmd) if c_helper.is_data_locality_enabled(cluster): r.write_file_to( '/etc/hadoop/topology.sh', utils.get_file_text('plugins/spark/resources/topology.sh', 'sahara_plugin_spark')) r.execute_command('sudo chmod +x /etc/hadoop/topology.sh') self._write_topology_data(r, cluster, extra) self._push_master_configs(r, cluster, extra, instance) self._push_cleanup_job(r, cluster, extra, instance)
def create_sentry_database(cluster, remote): db_password = get_sentry_db_password(cluster) create_db_script = utils.get_file_text( 'plugins/cdh/db_resources/create_sentry_db.sql', 'sahara_plugins') create_db_script = create_db_script % db_password remote_execute_db_script(remote, create_db_script)
def _load_config_file(self, file_path=None): return json.loads(utils.get_file_text(file_path, 'sahara_plugins'))
def _load_json(self, path_to_file): data = utils.get_file_text(path_to_file, 'sahara_plugin_cdh') return json.loads(data)