def _post_configuration(pctx, instance):
    dirs = _get_hadoop_dirs(instance)
    args = {
        'hadoop_user': HADOOP_USER,
        'hadoop_group': HADOOP_GROUP,
        'hadoop_conf_dir': HADOOP_CONF_DIR,
        'oozie_conf_dir': OOZIE_CONF_DIR,
        'hadoop_name_dirs': " ".join(dirs['hadoop_name_dirs']),
        'hadoop_data_dirs': " ".join(dirs['hadoop_data_dirs']),
        'hadoop_log_dir': dirs['hadoop_log_dir'],
        'hadoop_secure_dn_log_dir': dirs['hadoop_secure_dn_log_dir'],
        'yarn_log_dir': dirs['yarn_log_dir']
    }
    post_conf_script = utils.get_file_text(
        'plugins/vanilla/hadoop2/resources/post_conf.template',
        'sahara_plugin_vanilla')
    post_conf_script = post_conf_script.format(**args)

    with instance.remote() as r:
        r.write_file_to('/tmp/post_conf.sh', post_conf_script)
        r.execute_command('chmod +x /tmp/post_conf.sh')
        r.execute_command('sudo /tmp/post_conf.sh')

        if config_helper.is_data_locality_enabled(pctx, instance.cluster):
            t_script = HADOOP_CONF_DIR + '/topology.sh'
            r.write_file_to(
                t_script,
                utils.get_file_text(
                    'plugins/vanilla/hadoop2/resources/topology.sh',
                    'sahara_plugin_vanilla'),
                run_as_root=True)
            r.execute_command('chmod +x ' + t_script, run_as_root=True)
Esempio n. 2
0
    def get_config_files(self, cluster_context, configs, instance=None):
        template = 'plugins/mapr/services/hue/resources/hue_%s.template'
        # hue.ini
        hue_ini = bcf.TemplateFile("hue.ini")
        hue_ini.remote_path = self.conf_dir(cluster_context)
        hue_ini.parse(
            utils.get_file_text(template % self.version, 'sahara_plugins'))
        hue_ini.add_properties(self._get_hue_ini_props(cluster_context))
        hue_ini.add_property("thrift_version",
                             configs[self.THRIFT_VERSION.name])

        # # hue.sh
        hue_sh_template = 'plugins/mapr/services/hue/' \
                          'resources/hue_sh_%s.template'
        hue_sh = bcf.TemplateFile("hue.sh")
        hue_sh.remote_path = self.home_dir(cluster_context) + '/bin'
        hue_sh.parse(
            utils.get_file_text(hue_sh_template % self.version,
                                'sahara_plugins'))
        hue_sh.add_property('hadoop_version', cluster_context.hadoop_version)
        hue_sh.mode = 777

        hue_instances = cluster_context.get_instances(HUE)
        for instance in hue_instances:
            if instance not in cluster_context.changed_instances():
                cluster_context.should_be_restarted[self] += [instance]

        return [hue_ini, hue_sh]
Esempio n. 3
0
def generate_job_cleanup_config(cluster):
    spark_config = {
        'minimum_cleanup_megabytes':
        utils.get_config_value_or_default("Spark", "Minimum cleanup megabytes",
                                          cluster),
        'minimum_cleanup_seconds':
        utils.get_config_value_or_default("Spark", "Minimum cleanup seconds",
                                          cluster),
        'maximum_cleanup_seconds':
        utils.get_config_value_or_default("Spark", "Maximum cleanup seconds",
                                          cluster)
    }
    job_conf = {
        'valid':
        (_convert_config_to_int(spark_config['maximum_cleanup_seconds']) > 0
         and
         _convert_config_to_int(spark_config['minimum_cleanup_megabytes']) > 0
         and
         _convert_config_to_int(spark_config['minimum_cleanup_seconds']) > 0)
    }
    if job_conf['valid']:
        job_conf['cron'] = utils.get_file_text(
            'plugins/spark/resources/spark-cleanup.cron',
            'sahara_plugin_spark'),
        job_cleanup_script = utils.get_file_text(
            'plugins/spark/resources/tmp-cleanup.sh.template',
            'sahara_plugin_spark')
        job_conf['script'] = job_cleanup_script.format(**spark_config)
    return job_conf
Esempio n. 4
0
    def get_config_files(self, cluster_context, configs, instance=None):
        livy_conf_template = 'plugins/mapr/services/hue/' \
                             'resources/livy_conf_%s.template'
        livy_conf = bcf.TemplateFile("livy.conf")
        livy_conf.parse(
            utils.get_file_text(livy_conf_template % self.version,
                                'sahara_plugins'))
        livy_conf.remote_path = self.home_dir(cluster_context) + '/conf'

        livy_sh_template = 'plugins/mapr/services/hue/' \
                           'resources/livy_sh_%s.template'
        livy_sh = bcf.TemplateFile("livy-env.sh")
        livy_sh.remote_path = self.home_dir(cluster_context) + '/conf'
        livy_sh.parse(
            utils.get_file_text(livy_sh_template % self.version,
                                'sahara_plugins'))
        livy_sh.add_property('hadoop_version', cluster_context.hadoop_version)
        livy_sh.add_property('spark_version', spark.SparkOnYarnV201().version)
        livy_sh.mode = 777

        hue_instances = cluster_context.get_instances(HUE)
        for instance in hue_instances:
            if instance not in cluster_context.changed_instances():
                cluster_context.should_be_restarted[self] += [instance]

        return [livy_sh, livy_conf]
def generate_job_cleanup_config(cluster):
    args = {
        'minimum_cleanup_megabytes':
        utils.get_config_value_or_default("Spark", "Minimum cleanup megabytes",
                                          cluster),
        'minimum_cleanup_seconds':
        utils.get_config_value_or_default("Spark", "Minimum cleanup seconds",
                                          cluster),
        'maximum_cleanup_seconds':
        utils.get_config_value_or_default("Spark", "Maximum cleanup seconds",
                                          cluster)
    }
    job_conf = {
        'valid': (args['maximum_cleanup_seconds'] > 0
                  and (args['minimum_cleanup_megabytes'] > 0
                       and args['minimum_cleanup_seconds'] > 0))
    }
    if job_conf['valid']:
        job_conf['cron'] = utils.get_file_text(
            'plugins/vanilla/hadoop2/resources/spark-cleanup.cron',
            'sahara_plugin_vanilla'),
        job_cleanup_script = utils.get_file_text(
            'plugins/vanilla/hadoop2/resources/tmp-cleanup.sh.template',
            'sahara_plugin_vanilla')
        job_conf['script'] = job_cleanup_script.format(**args)
    return job_conf
Esempio n. 6
0
    def get_config_files(self, cluster_context, configs, instance=None):
        defaults = 'plugins/mapr/services/impala/resources/impala-env.sh.j2'

        impala_env = bcf.TemplateFile("env.sh")
        impala_env.remote_path = self.conf_dir(cluster_context)
        if instance:
            impala_env.fetch(instance)
        impala_env.parse(utils.get_file_text(defaults, 'sahara_plugin_mapr'))
        impala_env.add_properties(self._get_impala_env_props(cluster_context))
        sentry_host = cluster_context.get_instance(sentry.SENTRY)
        if sentry_host:
            sentry_mode = cluster_context._get_cluster_config_value(
                sentry.Sentry().SENTRY_STORAGE_MODE)
            ui_name = sentry.Sentry().ui_name
            sentry_version = cluster_context.get_chosen_service_version(
                ui_name)
            sentry_service = cluster_context. \
                _find_service_instance(ui_name, sentry_version)
            if sentry_service.supports(self, sentry_mode):
                impala_env.add_properties({
                    'sentry_home':
                    sentry_service.home_dir(cluster_context),
                    'sentry_db':
                    sentry_mode == sentry.DB_STORAGE_SENTRY_MODE,
                    'sentry_policy_file':
                    'maprfs://' + sentry_service.GLOBAL_POLICY_FILE,
                })
        return [impala_env]
Esempio n. 7
0
    def get_config_files(self, cluster_context, configs, instance=None):
        hbase_version = self._get_hbase_version(cluster_context)
        hive_version = self._get_hive_version(cluster_context)
        # spark-env-sh
        template = 'plugins/mapr/services/' \
                   'spark/resources/spark-env.template'
        env_sh = bcf.TemplateFile('spark-env.sh')
        env_sh.remote_path = self.conf_dir(cluster_context)
        env_sh.parse(utils.get_file_text(template, 'sahara_plugin_mapr'))
        env_sh.add_property('version', self.version)

        # spark-defaults
        conf = bcf.PropertiesFile('spark-defaults.conf', separator=' ')
        conf.remote_path = self.conf_dir(cluster_context)
        if instance:
            conf.fetch(instance)

        # compatibility.version
        versions = bcf.PropertiesFile('compatibility.version')
        versions.remote_path = self.home_dir(cluster_context) + '/mapr-util'
        if instance:
            versions.fetch(instance)

        if hive_version:
            versions.add_property('hive_versions', hive_version + '.0')
            conf.add_properties(self._hive_properties(cluster_context))
        if hbase_version:
            versions.add_property('hbase_versions', hbase_version)
            conf.add_property(
                'spark.executor.extraClassPath', '%s/lib/*' %
                self._hbase(cluster_context).home_dir(cluster_context))
        return [conf, versions, env_sh]
Esempio n. 8
0
    def test_configure_sentry(self, keymanager, cluster_get,
                              cluster_update, uuid4, cfg_log):
        cluster = get_concrete_cluster()
        manager = cluster.node_groups[0].instances[0]
        cluster_get.return_value = cluster
        db_password = '******'
        uuid4.return_value = db_password
        create_db_script = utils.get_file_text(
            'plugins/cdh/db_resources/create_sentry_db.sql'
                                .format(version=self.version),
            'sahara_plugins')
        create_db_script = create_db_script % db_password

        self.plug_utils.configure_sentry(cluster)

        with manager.remote() as r:
            cmd_exe_sql = ('PGPASSWORD=$(sudo head -1'
                           ' /var/lib/cloudera-scm-server-db/data/'
                           'generated_password.txt) psql'
                           ' -U cloudera-scm -h localhost -p 7432 -d scm -f'
                           ' script_to_exec.sql')
            cmd_clean = 'rm script_to_exec.sql'
            self.assertEqual(create_db_script, r.write_file_to.call_args[0][1])
            r.execute_command.assert_has_calls([mock.call(cmd_exe_sql),
                                                mock.call(cmd_clean)])
Esempio n. 9
0
 def test_start_hiveserver_process(
         self, add_provisioning_step, check_cluster_exists,
         set_current_instance_id, get_oozie, _hive_create_warehouse_dir,
         _hive_copy_shared_conf, _start_mysql, _hive_create_db,
         _hive_metastore_start, is_mysql_enabled, get_hive_password):
     pctx = mock.Mock()
     path = edp.get_hive_shared_conf_path('hadoop')
     is_mysql_enabled.return_value = True
     cluster = self.instance.cluster
     self.instance.cluster.hadoop_version = '2.7.1'
     ng_cluster = self.instance.node_group.cluster
     get_oozie.return_value = None
     sql_script = pu.get_file_text(
         'plugins/vanilla/v2_7_1/resources/create_hive_db.sql',
         'sahara_plugin_vanilla')
     get_hive_password.return_value = '123'
     pwd_script = sql_script.replace('{{password}}', '123')
     rs.start_hiveserver_process(pctx, self.instance)
     set_current_instance_id.assert_called_once_with(
         self.instance.instance_id)
     _hive_create_warehouse_dir.assert_called_once_with(self.r)
     _hive_copy_shared_conf.assert_called_once_with(self.r, path)
     is_mysql_enabled.assert_called_once_with(pctx, cluster)
     get_oozie.assert_called_once_with(ng_cluster)
     _start_mysql.assert_called_once_with(self.r)
     get_hive_password.assert_called_once_with(cluster)
     self.r.write_file_to.assert_called_once_with('/tmp/create_hive_db.sql',
                                                  pwd_script)
     _hive_create_db.assert_called_once_with(self.r)
     _hive_metastore_start.assert_called_once_with(self.r)
Esempio n. 10
0
 def _create_script_obj(filename, template, **kwargs):
     script = cf.TemplateFile(filename)
     script.remote_path = '/tmp/'
     script.parse(u.get_file_text(
         'plugins/mapr/services/mysql/resources/%s' % template,
         'sahara_plugin_mapr'))
     for k, v in kwargs.items():
         script.add_property(k, v)
     return script
Esempio n. 11
0
def run_script(instance, script, run_as=None, *args, **kwargs):
    with instance.remote() as r:
        path = '/tmp/%s.sh' % uuidutils.generate_uuid()
        script = utils.get_file_text(script, 'sahara_plugin_mapr') % kwargs
        r.write_file_to(path, script, run_as_root=(run_as == 'root'))
        r.execute_command(_run_as(run_as, 'chmod +x %s' % path))
        r.execute_command(_run_as(run_as, '%s %s' % (path, ' '.join(args))),
                          timeout=3600)
        # FIXME(aosadchyi): reuse existing remote
        remove(instance, path, run_as=run_as)
Esempio n. 12
0
 def get_config_files(self, cluster_context, configs, instance=None):
     sentry_default = \
         'plugins/mapr/services/sentry/resources/sentry-default.xml'
     global_policy_template = \
         'plugins/mapr/services/sentry/resources/global-policy.ini'
     sentry_site = cf.HadoopXML('sentry-site.xml')
     sentry_site.remote_path = self.conf_dir(cluster_context)
     if instance:
         sentry_site.fetch(instance)
     sentry_site.load_properties(configs)
     sentry_mode = configs[self.SENTRY_STORAGE_MODE.name]
     sentry_site.parse(
         utils.get_file_text(sentry_default, 'sahara_plugin_mapr'))
     sentry_site.add_properties(
         self._get_sentry_site_props(cluster_context, sentry_mode))
     global_policy = cf.TemplateFile('global-policy.ini')
     global_policy.remote_path = self.conf_dir(cluster_context)
     global_policy.parse(
         utils.get_file_text(global_policy_template, 'sahara_plugin_mapr'))
     return [sentry_site, global_policy]
Esempio n. 13
0
    def _install_swift_jar(self, cluster_context, instances):
        LOG.debug('Installing Swift jar')
        jar = u.get_file_text(Swift.HADOOP_SWIFT_JAR, 'sahara_plugins')
        path = '%s/swift.jar' % cluster_context.hadoop_lib

        @el.provision_event()
        def install_on_instance(inst):
            with inst.remote() as r:
                r.write_file_to(path, jar, run_as_root=True)

        for instance in instances:
            install_on_instance(instance)
Esempio n. 14
0
def generate_zk_basic_config(cluster):
    args = {
        'ticktime':
        utils.get_config_value_or_default("ZooKeeper", "tickTime", cluster),
        'initlimit':
        utils.get_config_value_or_default("ZooKeeper", "initLimit", cluster),
        'synclimit':
        utils.get_config_value_or_default("ZooKeeper", "syncLimit", cluster)
    }
    zoo_cfg = utils.get_file_text(
        'plugins/vanilla/hadoop2/resources/zoo_sample.cfg', 'sahara_plugins')
    return zoo_cfg.format(**args)
Esempio n. 15
0
    def test_get_ng_plugin_configs(self):
        actual_configs = c_h._get_ng_plugin_configs()

        expected_configs = []
        for json_file in json_files:
            expected_configs += json.loads(
                utils.get_file_text(path_to_config + json_file,
                                    'sahara_plugin_cdh'))

        # compare names
        expected_names = set(i['name'] for i in expected_configs)
        actual_names = set(i.to_dict()['name'] for i in actual_configs)
        self.assertEqual(expected_names, actual_names)
Esempio n. 16
0
    def test_datanodes_status(self, nn):
        report = utils.get_file_text(
            'tests/unit/plugins/vanilla/hadoop2/resources/dfs-report.txt',
            'sahara_plugins')

        nn.return_value = self._get_instance(report)
        statuses = u.get_datanodes_status(None)

        expected = {
            'cluster-worker-001.novalocal': 'normal',
            'cluster-worker-002.novalocal': 'normal',
            'cluster-worker-003.novalocal': 'normal',
            'cluster-worker-004.novalocal': 'decommissioned'
        }

        self.assertEqual(expected, statuses)
Esempio n. 17
0
    def test_nodemanagers_status(self, rm):
        report = utils.get_file_text(
            'tests/unit/plugins/vanilla/hadoop2/resources/yarn-report.txt',
            'sahara_plugins')

        rm.return_value = self._get_instance(report)
        statuses = u.get_nodemanagers_status(None)

        expected = {
            'cluster-worker-001.novalocal': 'running',
            'cluster-worker-002.novalocal': 'running',
            'cluster-worker-003.novalocal': 'running',
            'cluster-worker-004.novalocal': 'decommissioned'
        }

        self.assertEqual(expected, statuses)
Esempio n. 18
0
    def from_yaml(cls, yaml_path, validator_map=None, resource_roots=None,
                  package='sahara'):
        """Constructs and returns a validator from the provided yaml file.

        :param yaml_path: The relative path to a yaml file.
        :param validator_map: A map of validator name to class.
        :param resource_roots: The roots from which relative paths to
            resources (scripts and such) will be referenced. Any resource will
            be pulled from the first path in the list at which a file exists.
        :return: A SaharaImageValidator built to the yaml specification.
        """
        validator_map = validator_map or {}
        resource_roots = resource_roots or []
        file_text = utils.get_file_text(yaml_path, package)
        spec = yaml.safe_load(file_text)
        validator_map = cls.get_validator_map(validator_map)
        return cls.from_spec(spec, validator_map, resource_roots, package)
Esempio n. 19
0
    def get_config_files(self, cluster_context, configs, instance=None):
        default_path = 'plugins/mapr/services/maprfs/resources/cldb.conf'
        cldb_conf = bcf.PropertiesFile("cldb.conf")
        cldb_conf.remote_path = self.conf_dir(cluster_context)
        if instance:
            cldb_conf.fetch(instance)
        cldb_conf.parse(utils.get_file_text(default_path, 'sahara_plugins'))
        cldb_conf.add_properties(self._get_cldb_conf_props(cluster_context))

        warden_conf = bcf.PropertiesFile("warden.conf")
        warden_conf.remote_path = "/opt/mapr/conf/"
        if instance:
            warden_conf.fetch(instance)
        warden_conf.add_properties(
            {'service.command.mfs.heapsize.percent': configs[
                self.HEAP_SIZE_PERCENT_NAME]})

        return [cldb_conf, warden_conf]
    def _configure_topology(self, cluster_context, instances):
        LOG.debug("Configuring cluster topology")

        topology_map = cluster_context.topology_map
        topology_map = ("%s %s" % item for item in topology_map.items())
        topology_map = "\n".join(topology_map) + "\n"

        data_path = "%s/topology.data" % cluster_context.mapr_home
        script = utils.get_file_text(_TOPO_SCRIPT, 'sahara_plugin_mapr')
        script_path = '%s/topology.sh' % cluster_context.mapr_home

        @el.provision_event()
        def write_topology_data(instance):
            util.write_file(instance, data_path, topology_map, owner="root")
            util.write_file(instance, script_path, script,
                            mode="+x", owner="root")

        util.execute_on_instances(instances, write_topology_data)

        LOG.info('Cluster topology successfully configured')
Esempio n. 21
0
    def from_yaml(cls,
                  yaml_path,
                  validator_map=None,
                  resource_roots=None,
                  package='sahara'):
        """Constructs and returns a validator from the provided yaml file.

        :param yaml_path: The relative path to a yaml file.
        :param validator_map: A map of validator name to class.
        :param resource_roots: The roots from which relative paths to
            resources (scripts and such) will be referenced. Any resource will
            be pulled from the first path in the list at which a file exists.
        :return: A SaharaImageValidator built to the yaml specification.
        """
        validator_map = validator_map or {}
        resource_roots = resource_roots or []
        file_text = utils.get_file_text(yaml_path, package)
        spec = yaml.safe_load(file_text)
        validator_map = cls.get_validator_map(validator_map)
        return cls.from_spec(spec, validator_map, resource_roots, package)
def start_oozie_process(pctx, instance):
    with context.set_current_instance_id(instance.instance_id):
        with instance.remote() as r:
            if config_helper.is_mysql_enabled(pctx, instance.cluster):
                _start_mysql(r)
                LOG.debug("Creating Oozie DB Schema")
                sql_script = utils.get_file_text(
                    'plugins/vanilla/hadoop2/resources/create_oozie_db.sql',
                    'sahara_plugin_vanilla')

                password = oozie_helper.get_oozie_mysql_configs(
                    instance.cluster)['oozie.service.JPAService.jdbc.password']
                sql_script = sql_script.replace("password", password)

                script_location = "create_oozie_db.sql"
                r.write_file_to(script_location, sql_script)
                r.execute_command('mysql -u root < %(script_location)s && '
                                  'rm %(script_location)s' %
                                  {"script_location": script_location})

            _oozie_share_lib(r)
            _start_oozie(r)
Esempio n. 23
0
def load_configs(version):
    if OBJ_CONFIGS.get(version):
        return OBJ_CONFIGS[version]
    cfg_path = "plugins/ambari/resources/configs-%s.json" % version
    vanilla_cfg = jsonutils.loads(
        utils.get_file_text(cfg_path, 'sahara_plugin_ambari'))
    CONFIGS[version] = vanilla_cfg
    sahara_cfg = [
        hdp_repo_cfg, hdp_utils_repo_cfg, use_base_repos_cfg,
        autoconfigs_strategy, ambari_pkg_install_timeout
    ]
    for service, confs in vanilla_cfg.items():
        for k, v in confs.items():
            sahara_cfg.append(
                provisioning.Config(k,
                                    _get_service_name(service),
                                    _get_param_scope(k),
                                    default_value=v))

    sahara_cfg.extend(_get_ha_params())
    OBJ_CONFIGS[version] = sahara_cfg
    return sahara_cfg
def start_hiveserver_process(pctx, instance):
    with context.set_current_instance_id(instance.instance_id):
        with instance.remote() as r:
            _hive_create_warehouse_dir(r)
            _hive_copy_shared_conf(r, edp.get_hive_shared_conf_path('hadoop'))

            if config_helper.is_mysql_enabled(pctx, instance.cluster):
                oozie = vu.get_oozie(instance.node_group.cluster)
                if not oozie or instance.hostname() != oozie.hostname():
                    _start_mysql(r)

                version = instance.cluster.hadoop_version
                sql_script = utils.get_file_text(
                    'plugins/vanilla/v{}/resources/create_hive_db.sql'.format(
                        version.replace('.', '_')), 'sahara_plugin_vanilla')

                sql_script = sql_script.replace(
                    '{{password}}', u.get_hive_password(instance.cluster))
                r.write_file_to('/tmp/create_hive_db.sql', sql_script)
                _hive_create_db(r)
                _hive_metastore_start(r)
                LOG.info("Hive Metastore server at {host} has been "
                         "started".format(host=instance.hostname()))
Esempio n. 25
0
 def test_start_oozie_process(self, add_provisioning_step,
                              check_cluster_exists, set_current_instance_id,
                              get_oozie_password, is_mysql_enabled,
                              _start_mysql, _oozie_share_lib, _start_oozie):
     self.instance.instance_id = '112233'
     pctx = mock.Mock()
     is_mysql_enabled.return_value = True
     sql_script = pu.get_file_text(
         'plugins/vanilla/hadoop2/resources/create_oozie_db.sql',
         'sahara_plugin_vanilla')
     get_oozie_password.return_value = '123'
     pwd_script = sql_script.replace('password', '123')
     rs.start_oozie_process(pctx, self.instance)
     set_current_instance_id.assert_called_once_with('112233')
     is_mysql_enabled.assert_called_once_with(pctx, self.instance.cluster)
     _start_mysql.assert_called_once_with(self.r)
     self.r.write_file_to.assert_called_once_with('create_oozie_db.sql',
                                                  pwd_script)
     self.r.execute_command.assert_called_once_with(
         'mysql -u root < create_oozie_db.sql && '
         'rm create_oozie_db.sql')
     _oozie_share_lib.assert_called_once_with(self.r)
     _start_oozie.assert_called_once_with(self.r)
Esempio n. 26
0
class ConfigHelperV5110(c_h.ConfigHelper):
    path_to_config = 'plugins/cdh/v5_11_0/resources/'

    CDH5_UBUNTU_REPO = ('deb [arch=amd64] http://archive.cloudera.com/cdh5'
                        '/ubuntu/xenial/amd64/cdh trusty-cdh5.11.0 contrib'
                        '\ndeb-src http://archive.cloudera.com/cdh5/ubuntu'
                        '/xenial/amd64/cdh trusty-cdh5.11.0 contrib')

    DEFAULT_CDH5_UBUNTU_REPO_KEY_URL = (
        'http://archive.cloudera.com/cdh5/ubuntu'
        '/xenial/amd64/cdh/archive.key')

    CM5_UBUNTU_REPO = ('deb [arch=amd64] http://archive.cloudera.com/cm5'
                       '/ubuntu/xenial/amd64/cm trusty-cm5.11.0 contrib'
                       '\ndeb-src http://archive.cloudera.com/cm5/ubuntu'
                       '/xenial/amd64/cm trusty-cm5.11.0 contrib')

    DEFAULT_CM5_UBUNTU_REPO_KEY_URL = ('http://archive.cloudera.com/cm5/ubuntu'
                                       '/xenial/amd64/cm/archive.key')

    CDH5_CENTOS_REPO = ('[cloudera-cdh5]'
                        '\nname=Cloudera\'s Distribution for Hadoop, Version 5'
                        '\nbaseurl=http://archive.cloudera.com/cdh5/redhat/6'
                        '/x86_64/cdh/5.11.0/'
                        '\ngpgkey = http://archive.cloudera.com/cdh5/redhat/6'
                        '/x86_64/cdh/RPM-GPG-KEY-cloudera'
                        '\ngpgcheck = 1')

    CM5_CENTOS_REPO = ('[cloudera-manager]'
                       '\nname=Cloudera Manager'
                       '\nbaseurl=http://archive.cloudera.com/cm5/redhat/6'
                       '/x86_64/cm/5.11.0/'
                       '\ngpgkey = http://archive.cloudera.com/cm5/redhat/6'
                       '/x86_64/cm/RPM-GPG-KEY-cloudera'
                       '\ngpgcheck = 1')

    KEY_TRUSTEE_UBUNTU_REPO_URL = ('http://archive.cloudera.com/navigator-'
                                   'keytrustee5/ubuntu/xenial/amd64/navigator-'
                                   'keytrustee/cloudera.list')

    DEFAULT_KEY_TRUSTEE_UBUNTU_REPO_KEY_URL = (
        'http://archive.cloudera.com/navigator-'
        'keytrustee5/ubuntu/xenial/amd64/navigator-'
        'keytrustee/archive.key')

    KEY_TRUSTEE_CENTOS_REPO_URL = ('http://archive.cloudera.com/navigator-'
                                   'keytrustee5/redhat/6/x86_64/navigator-'
                                   'keytrustee/navigator-keytrustee5.repo')

    DEFAULT_SWIFT_LIB_URL = (
        'https://repository.cloudera.com/artifactory/repo/org'
        '/apache/hadoop/hadoop-openstack/2.6.0-cdh5.11.0'
        '/hadoop-openstack-2.6.0-cdh5.11.0.jar')

    SWIFT_LIB_URL = p.Config(
        'Hadoop OpenStack library URL',
        'general',
        'cluster',
        priority=1,
        default_value=DEFAULT_SWIFT_LIB_URL,
        description=("Library that adds Swift support to CDH. The file"
                     " will be downloaded by VMs."))

    HIVE_SERVER2_SENTRY_SAFETY_VALVE = utils.get_file_text(
        path_to_config + 'hive-server2-sentry-safety.xml', 'sahara_plugin_cdh')

    HIVE_METASTORE_SENTRY_SAFETY_VALVE = utils.get_file_text(
        path_to_config + 'hive-metastore-sentry-safety.xml',
        'sahara_plugin_cdh')

    SENTRY_IMPALA_CLIENT_SAFETY_VALVE = utils.get_file_text(
        path_to_config + 'sentry-impala-client-safety.xml',
        'sahara_plugin_cdh')

    def __init__(self):
        super(ConfigHelperV5110, self).__init__()
        self.priority_one_confs = self._load_json(self.path_to_config +
                                                  'priority-one-confs.json')
        self._init_all_ng_plugin_configs()
Esempio n. 27
0
    def _push_configs_to_new_node(self, cluster, extra, instance):
        files_hadoop = {
            os.path.join(c_helper.HADOOP_CONF_DIR, "core-site.xml"):
            extra['xml']['core-site'],
            os.path.join(c_helper.HADOOP_CONF_DIR, "hdfs-site.xml"):
            extra['xml']['hdfs-site'],
        }

        sp_home = self._spark_home(cluster)
        files_spark = {
            os.path.join(sp_home, 'conf/spark-env.sh'): extra['sp_master'],
            os.path.join(sp_home, 'conf/slaves'): extra['sp_slaves'],
            os.path.join(sp_home, 'conf/spark-defaults.conf'):
            extra['sp_defaults']
        }

        files_init = {
            '/tmp/sahara-hadoop-init.sh': extra['setup_script'],
            'id_rsa': cluster.management_private_key,
            'authorized_keys': cluster.management_public_key
        }

        # pietro: This is required because the (secret) key is not stored in
        # .ssh which hinders password-less ssh required by spark scripts
        key_cmd = ('sudo cp $HOME/id_rsa $HOME/.ssh/; '
                   'sudo chown $USER $HOME/.ssh/id_rsa; '
                   'sudo chmod 600 $HOME/.ssh/id_rsa')

        storage_paths = instance.storage_paths()
        dn_path = ' '.join(c_helper.make_hadoop_path(storage_paths, '/dfs/dn'))
        nn_path = ' '.join(c_helper.make_hadoop_path(storage_paths, '/dfs/nn'))

        hdfs_dir_cmd = ('sudo mkdir -p %(nn_path)s %(dn_path)s &&'
                        'sudo chown -R hdfs:hadoop %(nn_path)s %(dn_path)s &&'
                        'sudo chmod 755 %(nn_path)s %(dn_path)s' % {
                            "nn_path": nn_path,
                            "dn_path": dn_path
                        })

        with utils.get_remote(instance) as r:
            r.execute_command('sudo chown -R $USER:$USER /etc/hadoop')
            r.execute_command('sudo chown -R $USER:$USER %s' % sp_home)
            r.write_files_to(files_hadoop)
            r.write_files_to(files_spark)
            r.write_files_to(files_init)
            r.execute_command('sudo chmod 0500 /tmp/sahara-hadoop-init.sh')
            r.execute_command('sudo /tmp/sahara-hadoop-init.sh '
                              '>> /tmp/sahara-hadoop-init.log 2>&1')

            r.execute_command(hdfs_dir_cmd)
            r.execute_command(key_cmd)

            if c_helper.is_data_locality_enabled(cluster):
                r.write_file_to(
                    '/etc/hadoop/topology.sh',
                    utils.get_file_text('plugins/spark/resources/topology.sh',
                                        'sahara_plugin_spark'))
                r.execute_command('sudo chmod +x /etc/hadoop/topology.sh')

            self._write_topology_data(r, cluster, extra)
            self._push_master_configs(r, cluster, extra, instance)
            self._push_cleanup_job(r, cluster, extra, instance)
Esempio n. 28
0
def create_sentry_database(cluster, remote):
    db_password = get_sentry_db_password(cluster)
    create_db_script = utils.get_file_text(
        'plugins/cdh/db_resources/create_sentry_db.sql', 'sahara_plugins')
    create_db_script = create_db_script % db_password
    remote_execute_db_script(remote, create_db_script)
Esempio n. 29
0
 def _load_config_file(self, file_path=None):
     return json.loads(utils.get_file_text(file_path, 'sahara_plugins'))
Esempio n. 30
0
 def _load_json(self, path_to_file):
     data = utils.get_file_text(path_to_file, 'sahara_plugin_cdh')
     return json.loads(data)