Beispiel #1
0
    def start_secondarynamenodes(self, cluster):
        snns = vu.get_secondarynamenodes(cluster)
        if len(snns) == 0:
            return
        cpo.add_provisioning_step(cluster.id, utils.start_process_event_message("SecondaryNameNodes"), len(snns))

        for snn in snns:
            self._start_secondarynamenode(snn)
def start_zk_server(instances):
    utils.add_provisioning_step(instances[0].cluster_id,
                                utils.start_process_event_message("ZooKeeper"),
                                len(instances))

    with context.PluginsThreadGroup() as tg:
        for instance in instances:
            with context.set_current_instance_id(instance.instance_id):
                tg.spawn('ZK-start-processes-%s' % instance.instance_name,
                         _start_zk_processes, instance, 'start')
Beispiel #3
0
    def _start_slave_processes(self, sl_instances):
        if len(sl_instances) == 0:
            return
        cpo.add_provisioning_step(
            sl_instances[0].cluster_id, utils.start_process_event_message("Slave"), len(sl_instances)
        )

        with context.ThreadGroup() as tg:
            for i in sl_instances:
                tg.spawn("storm-start-sl-%s" % i.instance_name, self._start_slaves, i)
Beispiel #4
0
    def start_secondarynamenodes(self, cluster):
        snns = vu.get_secondarynamenodes(cluster)
        if len(snns) == 0:
            return
        cpo.add_provisioning_step(
            snns[0].cluster_id,
            utils.start_process_event_message("SecondaryNameNodes"), len(snns))

        for snn in vu.get_secondarynamenodes(cluster):
            self._start_secondarynamenode(snn)
Beispiel #5
0
    def _start_slave_processes(self, sl_instances):
        if len(sl_instances) == 0:
            return
        utils.add_provisioning_step(sl_instances[0].cluster_id,
                                    utils.start_process_event_message("Slave"),
                                    len(sl_instances))

        with context.PluginsThreadGroup() as tg:
            for i in sl_instances:
                tg.spawn('storm-start-sl-%s' % i.instance_name,
                         self._start_slaves, i)
Beispiel #6
0
    def _start_datanode_processes(self, dn_instances):
        if len(dn_instances) == 0:
            return

        cpo.add_provisioning_step(
            dn_instances[0].cluster_id, utils.start_process_event_message("DataNodes"), len(dn_instances)
        )

        with context.ThreadGroup() as tg:
            for i in dn_instances:
                tg.spawn("spark-start-dn-%s" % i.instance_name, self._start_datanode, i)
Beispiel #7
0
    def _start_zookeeper_processes(self, zk_instances):
        if len(zk_instances) == 0:
            return

        cpo.add_provisioning_step(
            zk_instances[0].cluster_id, utils.start_process_event_message("Zookeeper"), len(zk_instances)
        )

        with context.ThreadGroup() as tg:
            for i in zk_instances:
                tg.spawn("storm-start-zk-%s" % i.instance_name, self._start_zookeeper, i)
Beispiel #8
0
    def _start_datanode_processes(self, dn_instances):
        if len(dn_instances) == 0:
            return

        cpo.add_provisioning_step(
            dn_instances[0].cluster_id,
            utils.start_process_event_message("DataNodes"), len(dn_instances))

        with context.ThreadGroup() as tg:
            for i in dn_instances:
                tg.spawn('spark-start-dn-%s' % i.instance_name,
                         self._start_datanode, i)
Beispiel #9
0
    def _start_zookeeper_processes(self, zk_instances):
        if len(zk_instances) == 0:
            return

        utils.add_provisioning_step(
            zk_instances[0].cluster_id,
            utils.start_process_event_message("Zookeeper"), len(zk_instances))

        with context.PluginsThreadGroup() as tg:
            for i in zk_instances:
                tg.spawn('storm-start-zk-%s' % i.instance_name,
                         self._start_zookeeper, i)
def refresh_zk_servers(cluster, to_delete_instances=None):
    instances = vu.get_zk_servers(cluster)
    if to_delete_instances:
        for instance in to_delete_instances:
            if instance in instances:
                instances.remove(instance)

    utils.add_provisioning_step(cluster.id,
                                utils.start_process_event_message("ZooKeeper"),
                                len(instances))

    with context.PluginsThreadGroup() as tg:
        for instance in instances:
            with context.set_current_instance_id(instance.instance_id):
                tg.spawn('ZK-restart-processes-%s' % instance.instance_name,
                         _start_zk_processes, instance, 'restart')
Beispiel #11
0
 def test_start_dn_nm_processes(self, instances_with_services,
                                add_provisioning_step,
                                set_current_instance_id, _start_processes):
     ins = mock.Mock()
     ins.cluster_id = '111'
     ins.instance_id = '123'
     ins.instance_name = 'ins_1'
     instances = [ins]
     instances_with_services.return_value = instances
     mess = pu.start_process_event_message('DataNodes, NodeManagers')
     ins.node_group.node_processes = ['datanode', 'test']
     rs.start_dn_nm_processes(instances)
     instances_with_services.assert_called_once_with(
         instances, ['datanode', 'nodemanager'])
     add_provisioning_step.assert_called_once_with('111', mess, 1)
     set_current_instance_id.assert_called_once_with('123')
     _start_processes.assert_called_once_with(ins, ['datanode'])
Beispiel #12
0
    def _start_tt_dn_processes(self, instances):
        tt_dn_names = ["datanode", "tasktracker"]

        instances = utils.instances_with_services(instances, tt_dn_names)

        if not instances:
            return

        cpo.add_provisioning_step(
            instances[0].cluster_id, utils.start_process_event_message("DataNodes, TaskTrackers"), len(instances)
        )

        with context.ThreadGroup() as tg:
            for i in instances:
                processes = set(i.node_group.node_processes)
                tt_dn_procs = processes.intersection(tt_dn_names)
                tg.spawn("vanilla-start-tt-dn-%s" % i.instance_name, self._start_tt_dn, i, list(tt_dn_procs))
Beispiel #13
0
def start_dn_nm_processes(instances):
    filternames = ['datanode', 'nodemanager']
    instances = pu.instances_with_services(instances, filternames)

    if len(instances) == 0:
        return

    cpo.add_provisioning_step(
        instances[0].cluster_id,
        pu.start_process_event_message("DataNodes, NodeManagers"),
        len(instances))

    with context.ThreadGroup() as tg:
        for instance in instances:
            processes = set(instance.node_group.node_processes)
            processes = processes.intersection(filternames)
            tg.spawn('vanilla-start-processes-%s' % instance.instance_name,
                     _start_processes, instance, list(processes))
Beispiel #14
0
def start_dn_nm_processes(instances):
    filternames = ['datanode', 'nodemanager']
    instances = pu.instances_with_services(instances, filternames)

    if len(instances) == 0:
        return

    cpo.add_provisioning_step(
        instances[0].cluster_id,
        pu.start_process_event_message("DataNodes, NodeManagers"),
        len(instances))

    with context.ThreadGroup() as tg:
        for instance in instances:
            with context.set_current_instance_id(instance.instance_id):
                processes = set(instance.node_group.node_processes)
                processes = processes.intersection(filternames)
                tg.spawn('vanilla-start-processes-%s' % instance.instance_name,
                         _start_processes, instance, list(processes))
Beispiel #15
0
    def _start_tt_dn_processes(self, instances):
        tt_dn_names = ["datanode", "tasktracker"]

        instances = utils.instances_with_services(instances, tt_dn_names)

        if not instances:
            return

        cpo.add_provisioning_step(
            instances[0].cluster_id,
            utils.start_process_event_message("DataNodes, TaskTrackers"),
            len(instances))

        with context.ThreadGroup() as tg:
            for i in instances:
                processes = set(i.node_group.node_processes)
                tt_dn_procs = processes.intersection(tt_dn_names)
                tg.spawn('vanilla-start-tt-dn-%s' % i.instance_name,
                         self._start_tt_dn, i, list(tt_dn_procs))
Beispiel #16
0
class StormProvider(p.ProvisioningPluginBase):
    def __init__(self):
        self.processes = {
            "Zookeeper": ["zookeeper"],
            "Storm": ["nimbus", "supervisor"]
        }

    def get_title(self):
        return "Apache Storm"

    def get_description(self):
        return (_("This plugin provides an ability to launch Storm "
                  "cluster without any management consoles."))

    def get_labels(self):
        default = {'enabled': {'status': True}, 'stable': {'status': True}}
        deprecated = {
            'enabled': {
                'status': True
            },
            'deprecated': {
                'status': True
            }
        }
        result = {'plugin_labels': copy.deepcopy(default)}
        result['version_labels'] = {
            '1.2': copy.deepcopy(default),
            '1.1.0': copy.deepcopy(default),
            '1.0.1': copy.deepcopy(deprecated),
        }
        return result

    def get_versions(self):
        return ['1.0.1', '1.1.0', '1.2']

    def get_configs(self, storm_version):
        return c_helper.get_plugin_configs()

    def get_node_processes(self, storm_version):
        return self.processes

    def validate(self, cluster):
        # validate Storm Master Node and Storm Slaves
        sm_count = sum(
            [ng.count for ng in utils.get_node_groups(cluster, "nimbus")])

        if sm_count < 1:
            raise ex.RequiredServiceMissingException("Storm nimbus")

        if sm_count >= 2:
            raise ex.InvalidComponentCountException("Storm nimbus", "1",
                                                    sm_count)

        sl_count = sum(
            [ng.count for ng in utils.get_node_groups(cluster, "supervisor")])

        if sl_count < 1:
            raise ex.InvalidComponentCountException("Storm supervisor",
                                                    _("1 or more"), sl_count)

    def update_infra(self, cluster):
        pass

    def configure_cluster(self, cluster):
        self._setup_instances(cluster)

    def start_cluster(self, cluster):
        sm_instance = utils.get_instance(cluster, "nimbus")
        sl_instances = utils.get_instances(cluster, "supervisor")
        zk_instances = utils.get_instances(cluster, "zookeeper")

        # start zookeeper processes
        self._start_zookeeper_processes(zk_instances)

        # start storm master
        if sm_instance:
            self._start_storm_master(sm_instance)

        # start storm slaves
        self._start_slave_processes(sl_instances)

        LOG.info("Cluster {cluster} has been started successfully".format(
            cluster=cluster.name))
        self._set_cluster_info(cluster)

    def get_edp_engine(self, cluster, job_type):
        if job_type in edp_engine.EdpStormEngine.get_supported_job_types():
            return edp_engine.EdpStormEngine(cluster)
        if job_type in edp_engine.EdpPyleusEngine.get_supported_job_types():
            return edp_engine.EdpPyleusEngine(cluster)
        return None

    def get_edp_job_types(self, versions=None):
        res = {}
        for vers in self.get_versions():
            if not versions or vers in versions:
                storm_engine = edp_engine.EdpStormEngine
                pyleus_engine = edp_engine.EdpPyleusEngine
                res[vers] = (storm_engine.get_supported_job_types() +
                             pyleus_engine.get_supported_job_types())
        return res

    def get_edp_config_hints(self, job_type, version):
        if edp_engine.EdpStormEngine.edp_supported(version):
            return edp_engine.EdpStormEngine.get_possible_job_config(job_type)
        if edp_engine.EdpPyleusEngine.edp_supported(version):
            return edp_engine.EdpPyleusEngine.get_possible_job_config(job_type)
        return {}

    def get_open_ports(self, node_group):
        ports_map = {'nimbus': [8080]}

        ports = []
        for process in node_group.node_processes:
            if process in ports_map:
                ports.extend(ports_map[process])

        return ports

    def _extract_configs_to_extra(self, cluster):
        st_master = utils.get_instance(cluster, "nimbus")
        zk_servers = utils.get_instances(cluster, "zookeeper")

        extra = dict()

        config_instances = ''
        if st_master is not None:
            if zk_servers is not None:
                zknames = []
                for zk in zk_servers:
                    zknames.append(zk.hostname())

            config_instances = c_helper.generate_storm_config(
                st_master.hostname(), zknames, cluster.hadoop_version)

        config = self._convert_dict_to_yaml(config_instances)
        supervisor_conf = c_helper.generate_slave_supervisor_conf()
        nimbus_ui_conf = c_helper.generate_master_supervisor_conf()
        zk_conf = c_helper.generate_zookeeper_conf()
        pyleus_conf = c_helper.generate_pyleus_config()

        for ng in cluster.node_groups:
            extra[ng.id] = {
                'st_instances': config,
                'slave_sv_conf': supervisor_conf,
                'master_sv_conf': nimbus_ui_conf,
                'zk_conf': zk_conf,
                'pyleus_conf': pyleus_conf
            }

        return extra

    @utils.event_wrapper(True,
                         step=utils.start_process_event_message("StormMaster"))
    def _start_storm_master(self, sm_instance):
        with utils.get_remote(sm_instance) as r:
            run.start_storm_nimbus_and_ui(r)
            LOG.info("Storm master at {host} has been started".format(
                host=sm_instance.hostname()))

    def _start_slave_processes(self, sl_instances):
        if len(sl_instances) == 0:
            return
        utils.add_provisioning_step(sl_instances[0].cluster_id,
                                    utils.start_process_event_message("Slave"),
                                    len(sl_instances))

        with context.PluginsThreadGroup() as tg:
            for i in sl_instances:
                tg.spawn('storm-start-sl-%s' % i.instance_name,
                         self._start_slaves, i)

    @utils.event_wrapper(True)
    def _start_slaves(self, instance):
        with instance.remote() as r:
            run.start_storm_supervisor(r)

    def _start_zookeeper_processes(self, zk_instances):
        if len(zk_instances) == 0:
            return

        utils.add_provisioning_step(
            zk_instances[0].cluster_id,
            utils.start_process_event_message("Zookeeper"), len(zk_instances))

        with context.PluginsThreadGroup() as tg:
            for i in zk_instances:
                tg.spawn('storm-start-zk-%s' % i.instance_name,
                         self._start_zookeeper, i)

    @utils.event_wrapper(True)
    def _start_zookeeper(self, instance):
        with instance.remote() as r:
            run.start_zookeeper(r)

    def _setup_instances(self, cluster, instances=None):
        extra = self._extract_configs_to_extra(cluster)

        if instances is None:
            instances = utils.get_instances(cluster)

        self._push_configs_to_nodes(cluster, extra, instances)

    def _push_configs_to_nodes(self, cluster, extra, new_instances):
        all_instances = utils.get_instances(cluster)
        utils.add_provisioning_step(cluster.id, _("Push configs to nodes"),
                                    len(all_instances))

        with context.PluginsThreadGroup() as tg:
            for instance in all_instances:
                if instance in new_instances:
                    tg.spawn('storm-configure-%s' % instance.instance_name,
                             self._push_configs_to_new_node, cluster, extra,
                             instance)
                else:
                    tg.spawn('storm-reconfigure-%s' % instance.instance_name,
                             self._push_configs_to_existing_node, cluster,
                             extra, instance)

    def _convert_dict_to_yaml(self, dict_to_convert):
        new_dict = dict_to_convert.copy()
        for key in dict_to_convert:
            if isinstance(dict_to_convert[key], six.string_types):
                new_dict[key] = "\"" + dict_to_convert[key] + "\""

        stream = yaml.dump(new_dict, default_flow_style=False)
        stream = stream.replace("\'", "")

        return stream

    @utils.event_wrapper(True)
    def _push_configs_to_new_node(self, cluster, extra, instance):
        ng_extra = extra[instance.node_group.id]

        files_supervisor = {
            '/etc/supervisor/supervisord.conf': ng_extra['slave_sv_conf']
        }
        files_storm = {
            '/usr/local/storm/conf/storm.yaml': ng_extra['st_instances']
        }
        files_zk = {
            '/opt/zookeeper/zookeeper/conf/zoo.cfg': ng_extra['zk_conf']
        }
        files_supervisor_master = {
            '/etc/supervisor/supervisord.conf': ng_extra['master_sv_conf']
        }
        file_pyleus_conf = {
            '/home/ubuntu/.pyleus.conf': ng_extra['pyleus_conf']
        }

        with utils.get_remote(instance) as r:
            node_processes = instance.node_group.node_processes
            r.write_files_to(files_storm, run_as_root=True)
            if 'zookeeper' in node_processes:
                self._push_zk_configs(r, files_zk)
            if 'nimbus' in node_processes:
                self._push_supervisor_configs(r, files_supervisor_master)
                self._push_supervisor_configs(r, file_pyleus_conf)
            if 'supervisor' in node_processes:
                self._push_supervisor_configs(r, files_supervisor)

    @utils.event_wrapper(True)
    def _push_configs_to_existing_node(self, cluster, extra, instance):
        node_processes = instance.node_group.node_processes
        need_storm_update = ('nimbus' in node_processes
                             or 'supervisor' in node_processes)
        need_zookeeper_update = 'zookeeper' in node_processes

        ng_extra = extra[instance.node_group.id]
        r = utils.get_remote(instance)

        if need_storm_update:
            storm_path = '/usr/local/storm/conf/storm.yaml'
            files_storm = {storm_path: ng_extra['st_instances']}
            r.write_files_to(files_storm)

        if need_zookeeper_update:
            zk_path = '/opt/zookeeper/zookeeper/conf/zoo.cfg'
            files_zookeeper = {zk_path: ng_extra['zk_conf']}
            self._push_zk_configs(r, files_zookeeper)

    def _set_cluster_info(self, cluster):
        st_master = utils.get_instance(cluster, "nimbus")
        info = {}

        if st_master:
            port = "8080"

            info['Strom'] = {
                'Web UI':
                'http://%s:%s' % (st_master.get_ip_or_dns_name(), port)
            }
        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})

    def _push_zk_configs(self, r, files):
        r.write_files_to(files, run_as_root=True)

    def _push_supervisor_configs(self, r, files):
        r.append_to_files(files, run_as_root=True)

    # Scaling

    def _get_running_topologies_names(self, cluster):
        master = utils.get_instance(cluster, "nimbus")

        cmd = ("%(storm)s -c nimbus.host=%(host)s "
               "list | grep ACTIVE | awk '{print $1}'") % (
                   {
                       "storm": "/usr/local/storm/bin/storm",
                       "host": master.hostname()
                   })

        with utils.get_remote(master) as r:
            ret, stdout = r.execute_command(cmd)
        names = stdout.split('\n')
        topology_names = names[0:len(names) - 1]
        return topology_names

    @utils.event_wrapper(True,
                         step=_("Rebalance Topology"),
                         param=('cluster', 1))
    def rebalance_topology(self, cluster):
        topology_names = self._get_running_topologies_names(cluster)
        master = utils.get_instance(cluster, "nimbus")

        for topology_name in topology_names:
            cmd = ('%(rebalance)s -c nimbus.host=%(host)s %(topology_name)s'
                   ) % ({
                       "rebalance": "/usr/local/storm/bin/storm rebalance",
                       "host": master.hostname(),
                       "topology_name": topology_name
                   })

            with utils.get_remote(master) as r:
                ret, stdout = r.execute_command(cmd)

    def validate_scaling(self, cluster, existing, additional):
        self._validate_existing_ng_scaling(cluster, existing)
        self._validate_additional_ng_scaling(cluster, additional)

    def scale_cluster(self, cluster, instances):
        self._setup_instances(cluster, instances)
        # start storm slaves
        self._start_slave_processes(instances)
        self.rebalance_topology(cluster)
        LOG.info("Storm scaling has been started.")

    def _get_scalable_processes(self):
        return ["supervisor"]

    def _validate_additional_ng_scaling(self, cluster, additional):
        scalable_processes = self._get_scalable_processes()

        for ng_id in additional:
            ng = utils.get_by_id(cluster.node_groups, ng_id)
            if not set(ng.node_processes).issubset(scalable_processes):
                raise ex.NodeGroupCannotBeScaled(
                    ng.name,
                    _("Storm plugin cannot scale nodegroup"
                      " with processes: %s") % ' '.join(ng.node_processes))

    def _validate_existing_ng_scaling(self, cluster, existing):
        scalable_processes = self._get_scalable_processes()
        for ng in cluster.node_groups:
            if ng.id in existing:
                if not set(ng.node_processes).issubset(scalable_processes):
                    raise ex.NodeGroupCannotBeScaled(
                        ng.name,
                        _("Storm plugin cannot scale nodegroup"
                          " with processes: %s") % ' '.join(ng.node_processes))
Beispiel #17
0
class VersionHandler(avm.AbstractVersionHandler):
    def get_plugin_configs(self):
        return c_helper.get_plugin_configs()

    def get_node_processes(self):
        return {
            "HDFS": ["namenode", "datanode", "secondarynamenode"],
            "MapReduce": ["tasktracker", "jobtracker"],
            "JobFlow": ["oozie"],
            "Hive": ["hiveserver"]
        }

    def validate(self, cluster):
        nn_count = sum(
            [ng.count for ng in utils.get_node_groups(cluster, "namenode")])
        if nn_count != 1:
            raise ex.InvalidComponentCountException("namenode", 1, nn_count)

        snn_count = sum([
            ng.count
            for ng in utils.get_node_groups(cluster, 'secondarynamenode')
        ])
        if snn_count > 1:
            raise ex.InvalidComponentCountException('secondarynamenode',
                                                    _('0 or 1'), snn_count)

        jt_count = sum(
            [ng.count for ng in utils.get_node_groups(cluster, "jobtracker")])

        if jt_count > 1:
            raise ex.InvalidComponentCountException("jobtracker", _('0 or 1'),
                                                    jt_count)

        oozie_count = sum(
            [ng.count for ng in utils.get_node_groups(cluster, "oozie")])

        if oozie_count > 1:
            raise ex.InvalidComponentCountException("oozie", _('0 or 1'),
                                                    oozie_count)

        hive_count = sum(
            [ng.count for ng in utils.get_node_groups(cluster, "hiveserver")])
        if jt_count == 0:

            tt_count = sum([
                ng.count
                for ng in utils.get_node_groups(cluster, "tasktracker")
            ])
            if tt_count > 0:
                raise ex.RequiredServiceMissingException(
                    "jobtracker", required_by="tasktracker")

            if oozie_count > 0:
                raise ex.RequiredServiceMissingException("jobtracker",
                                                         required_by="oozie")

            if hive_count > 0:
                raise ex.RequiredServiceMissingException("jobtracker",
                                                         required_by="hive")

        if hive_count > 1:
            raise ex.InvalidComponentCountException("hive", _('0 or 1'),
                                                    hive_count)

    def configure_cluster(self, cluster):
        instances = utils.get_instances(cluster)
        self._setup_instances(cluster, instances)

    def start_namenode(self, cluster):
        nn = vu.get_namenode(cluster)
        self._start_namenode(nn)

    @cpo.event_wrapper(True,
                       step=utils.start_process_event_message("NameNode"))
    def _start_namenode(self, nn_instance):
        with remote.get_remote(nn_instance) as r:
            run.format_namenode(r)
            run.start_processes(r, "namenode")

    def start_secondarynamenodes(self, cluster):
        snns = vu.get_secondarynamenodes(cluster)
        if len(snns) == 0:
            return
        cpo.add_provisioning_step(
            cluster.id,
            utils.start_process_event_message("SecondaryNameNodes"), len(snns))

        for snn in snns:
            self._start_secondarynamenode(snn)

    @cpo.event_wrapper(True)
    def _start_secondarynamenode(self, snn):
        run.start_processes(remote.get_remote(snn), "secondarynamenode")

    def start_jobtracker(self, cluster):
        jt = vu.get_jobtracker(cluster)
        if jt:
            self._start_jobtracker(jt)

    @cpo.event_wrapper(True,
                       step=utils.start_process_event_message("JobTracker"))
    def _start_jobtracker(self, jt_instance):
        run.start_processes(remote.get_remote(jt_instance), "jobtracker")

    def start_oozie(self, cluster):
        oozie = vu.get_oozie(cluster)
        if oozie:
            self._start_oozie(cluster, oozie)

    @cpo.event_wrapper(True, step=utils.start_process_event_message("Oozie"))
    def _start_oozie(self, cluster, oozie):
        nn_instance = vu.get_namenode(cluster)

        with remote.get_remote(oozie) as r:
            if c_helper.is_mysql_enable(cluster):
                run.mysql_start(r, oozie)
                run.oozie_create_db(r)
            run.oozie_share_lib(r, nn_instance.hostname())
            run.start_oozie(r)
            LOG.info(
                _LI("Oozie service at {host} has been started").format(
                    host=nn_instance.hostname()))

    def start_hiveserver(self, cluster):
        hs = vu.get_hiveserver(cluster)
        if hs:
            self._start_hiveserver(cluster, hs)

    @cpo.event_wrapper(True,
                       step=utils.start_process_event_message("HiveServer"))
    def _start_hiveserver(self, cluster, hive_server):
        oozie = vu.get_oozie(cluster)

        with remote.get_remote(hive_server) as r:
            run.hive_create_warehouse_dir(r)
            run.hive_copy_shared_conf(r,
                                      edp.get_hive_shared_conf_path('hadoop'))

            if c_helper.is_mysql_enable(cluster):
                if not oozie or hive_server.hostname() != oozie.hostname():
                    run.mysql_start(r, hive_server)
                run.hive_create_db(r, cluster.extra['hive_mysql_passwd'])
                run.hive_metastore_start(r)
                LOG.info(
                    _LI("Hive Metastore server at {host} has been "
                        "started").format(host=hive_server.hostname()))

    def start_cluster(self, cluster):
        self.start_namenode(cluster)

        self.start_secondarynamenodes(cluster)

        self.start_jobtracker(cluster)

        self._start_tt_dn_processes(utils.get_instances(cluster))

        self._await_datanodes(cluster)

        LOG.info(
            _LI("Hadoop services in cluster {cluster} have been started").
            format(cluster=cluster.name))

        self.start_oozie(cluster)

        self.start_hiveserver(cluster)

        LOG.info(
            _LI('Cluster {cluster} has been started successfully').format(
                cluster=cluster.name))
        self._set_cluster_info(cluster)

    @cpo.event_wrapper(True,
                       step=_("Await %s start up") % "DataNodes",
                       param=('cluster', 1))
    def _await_datanodes(self, cluster):
        datanodes_count = len(vu.get_datanodes(cluster))
        if datanodes_count < 1:
            return

        l_message = _("Waiting on %s datanodes to start up") % datanodes_count
        LOG.info(l_message)
        with remote.get_remote(vu.get_namenode(cluster)) as r:
            poll_utils.plugin_option_poll(cluster, run.check_datanodes_count,
                                          c_helper.DATANODES_STARTUP_TIMEOUT,
                                          l_message, 1, {
                                              'remote': r,
                                              'count': datanodes_count
                                          })

    def _generate_hive_mysql_password(self, cluster):
        extra = cluster.extra.to_dict() if cluster.extra else {}
        password = extra.get('hive_mysql_passwd')
        if not password:
            password = six.text_type(uuid.uuid4())
            extra['hive_mysql_passwd'] = password
            conductor.cluster_update(context.ctx(), cluster, {'extra': extra})
        return password

    def _extract_configs_to_extra(self, cluster):
        oozie = vu.get_oozie(cluster)
        hive = vu.get_hiveserver(cluster)

        extra = dict()

        if hive:
            extra['hive_mysql_passwd'] = self._generate_hive_mysql_password(
                cluster)

        for ng in cluster.node_groups:
            extra[ng.id] = {
                'xml':
                c_helper.generate_xml_configs(
                    cluster, ng, extra['hive_mysql_passwd'] if hive else None),
                'setup_script':
                c_helper.generate_setup_script(
                    ng.storage_paths(),
                    c_helper.extract_environment_confs(ng.configuration()),
                    append_oozie=(oozie and oozie.node_group.id == ng.id))
            }

        if c_helper.is_data_locality_enabled(cluster):
            topology_data = th.generate_topology_map(
                cluster, CONF.enable_hypervisor_awareness)
            extra['topology_data'] = "\n".join(
                [k + " " + v for k, v in topology_data.items()]) + "\n"

        return extra

    def decommission_nodes(self, cluster, instances):
        tts = vu.get_tasktrackers(cluster)
        dns = vu.get_datanodes(cluster)
        decommission_dns = False
        decommission_tts = False

        for i in instances:
            if 'datanode' in i.node_group.node_processes:
                dns.remove(i)
                decommission_dns = True
            if 'tasktracker' in i.node_group.node_processes:
                tts.remove(i)
                decommission_tts = True

        nn = vu.get_namenode(cluster)
        jt = vu.get_jobtracker(cluster)

        if decommission_tts:
            sc.decommission_tt(jt, instances, tts)
        if decommission_dns:
            sc.decommission_dn(nn, instances, dns)

    def validate_scaling(self, cluster, existing, additional):
        self._validate_existing_ng_scaling(cluster, existing)
        self._validate_additional_ng_scaling(cluster, additional)

    def scale_cluster(self, cluster, instances):
        self._setup_instances(cluster, instances)

        run.refresh_nodes(remote.get_remote(vu.get_namenode(cluster)),
                          "dfsadmin")
        jt = vu.get_jobtracker(cluster)
        if jt:
            run.refresh_nodes(remote.get_remote(jt), "mradmin")

        self._start_tt_dn_processes(instances)

    def _start_tt_dn_processes(self, instances):
        tt_dn_names = ["datanode", "tasktracker"]

        instances = utils.instances_with_services(instances, tt_dn_names)

        if not instances:
            return

        cpo.add_provisioning_step(
            instances[0].cluster_id,
            utils.start_process_event_message("DataNodes, TaskTrackers"),
            len(instances))

        with context.ThreadGroup() as tg:
            for i in instances:
                processes = set(i.node_group.node_processes)
                tt_dn_procs = processes.intersection(tt_dn_names)
                tg.spawn('vanilla-start-tt-dn-%s' % i.instance_name,
                         self._start_tt_dn, i, list(tt_dn_procs))

    @cpo.event_wrapper(True)
    def _start_tt_dn(self, instance, tt_dn_procs):
        with instance.remote() as r:
            run.start_processes(r, *tt_dn_procs)

    @cpo.event_wrapper(True,
                       step=_("Setup instances and push configs"),
                       param=('cluster', 1))
    def _setup_instances(self, cluster, instances):
        if (CONF.use_identity_api_v3 and CONF.use_domain_for_proxy_users
                and vu.get_hiveserver(cluster)
                and c_helper.is_swift_enable(cluster)):
            cluster = proxy.create_proxy_user_for_cluster(cluster)
            instances = utils.get_instances(cluster)

        extra = self._extract_configs_to_extra(cluster)
        cluster = conductor.cluster_get(context.ctx(), cluster)
        self._push_configs_to_nodes(cluster, extra, instances)

    def _push_configs_to_nodes(self, cluster, extra, new_instances):
        all_instances = utils.get_instances(cluster)
        new_ids = set([instance.id for instance in new_instances])
        with context.ThreadGroup() as tg:
            for instance in all_instances:
                if instance.id in new_ids:
                    tg.spawn('vanilla-configure-%s' % instance.instance_name,
                             self._push_configs_to_new_node, cluster, extra,
                             instance)
                else:
                    tg.spawn('vanilla-reconfigure-%s' % instance.instance_name,
                             self._push_configs_to_existing_node, cluster,
                             extra, instance)

    def _push_configs_to_new_node(self, cluster, extra, instance):
        ng_extra = extra[instance.node_group.id]
        private_key, public_key = c_helper.get_hadoop_ssh_keys(cluster)

        files = {
            '/etc/hadoop/core-site.xml': ng_extra['xml']['core-site'],
            '/etc/hadoop/mapred-site.xml': ng_extra['xml']['mapred-site'],
            '/etc/hadoop/hdfs-site.xml': ng_extra['xml']['hdfs-site'],
            '/tmp/sahara-hadoop-init.sh': ng_extra['setup_script'],
            'id_rsa': private_key,
            'authorized_keys': public_key
        }

        key_cmd = ('sudo mkdir -p /home/hadoop/.ssh/ && '
                   'sudo mv id_rsa authorized_keys /home/hadoop/.ssh && '
                   'sudo chown -R hadoop:hadoop /home/hadoop/.ssh && '
                   'sudo chmod 600 /home/hadoop/.ssh/{id_rsa,authorized_keys}')

        with remote.get_remote(instance) as r:
            # TODO(aignatov): sudo chown is wrong solution. But it works.
            r.execute_command('sudo chown -R $USER:$USER /etc/hadoop')
            r.execute_command('sudo chown -R $USER:$USER /opt/oozie/conf')
            r.write_files_to(files)
            r.execute_command('sudo chmod 0500 /tmp/sahara-hadoop-init.sh')
            r.execute_command('sudo /tmp/sahara-hadoop-init.sh '
                              '>> /tmp/sahara-hadoop-init.log 2>&1')

            r.execute_command(key_cmd)

            if c_helper.is_data_locality_enabled(cluster):
                r.write_file_to(
                    '/etc/hadoop/topology.sh',
                    f.get_file_text(
                        'plugins/vanilla/v1_2_1/resources/topology.sh'))
                r.execute_command('sudo chmod +x /etc/hadoop/topology.sh')

            self._write_topology_data(r, cluster, extra)
            self._push_master_configs(r, cluster, extra, instance)

    def _push_configs_to_existing_node(self, cluster, extra, instance):
        node_processes = instance.node_group.node_processes
        need_update = (c_helper.is_data_locality_enabled(cluster)
                       or 'namenode' in node_processes
                       or 'jobtracker' in node_processes
                       or 'oozie' in node_processes
                       or 'hiveserver' in node_processes)

        if not need_update:
            return

        with remote.get_remote(instance) as r:
            self._write_topology_data(r, cluster, extra)
            self._push_master_configs(r, cluster, extra, instance)

    def _write_topology_data(self, r, cluster, extra):
        if c_helper.is_data_locality_enabled(cluster):
            topology_data = extra['topology_data']
            r.write_file_to('/etc/hadoop/topology.data', topology_data)

    def _push_master_configs(self, r, cluster, extra, instance):
        ng_extra = extra[instance.node_group.id]
        node_processes = instance.node_group.node_processes

        if 'namenode' in node_processes:
            self._push_namenode_configs(cluster, r)

        if 'jobtracker' in node_processes:
            self._push_jobtracker_configs(cluster, r)

        if 'oozie' in node_processes:
            self._push_oozie_configs(ng_extra, r)

        if 'hiveserver' in node_processes:
            self._push_hive_configs(ng_extra, r)

    def _push_namenode_configs(self, cluster, r):
        r.write_file_to(
            '/etc/hadoop/dn.incl',
            utils.generate_fqdn_host_names(vu.get_datanodes(cluster)))

    def _push_jobtracker_configs(self, cluster, r):
        r.write_file_to(
            '/etc/hadoop/tt.incl',
            utils.generate_fqdn_host_names(vu.get_tasktrackers(cluster)))

    def _push_oozie_configs(self, ng_extra, r):
        r.write_file_to('/opt/oozie/conf/oozie-site.xml',
                        ng_extra['xml']['oozie-site'])

    def _push_hive_configs(self, ng_extra, r):
        files = {'/opt/hive/conf/hive-site.xml': ng_extra['xml']['hive-site']}
        r.write_files_to(files)

    def _set_cluster_info(self, cluster):
        nn = vu.get_namenode(cluster)
        jt = vu.get_jobtracker(cluster)
        oozie = vu.get_oozie(cluster)
        info = {}

        if jt:
            ui_port = c_helper.get_port_from_config(
                'MapReduce', 'mapred.job.tracker.http.address', cluster)
            jt_port = c_helper.get_port_from_config('MapReduce',
                                                    'mapred.job.tracker',
                                                    cluster)

            info['MapReduce'] = {
                'Web UI': 'http://%s:%s' % (jt.management_ip, ui_port),
                'JobTracker': '%s:%s' % (jt.hostname(), jt_port)
            }

        if nn:
            ui_port = c_helper.get_port_from_config('HDFS', 'dfs.http.address',
                                                    cluster)
            nn_port = c_helper.get_port_from_config('HDFS', 'fs.default.name',
                                                    cluster)

            info['HDFS'] = {
                'Web UI': 'http://%s:%s' % (nn.management_ip, ui_port),
                'NameNode': 'hdfs://%s:%s' % (nn.hostname(), nn_port)
            }

        if oozie:
            # TODO(yrunts) change from hardcode value
            info['JobFlow'] = {
                'Oozie': 'http://%s:11000' % oozie.management_ip
            }

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})

    def _get_scalable_processes(self):
        return ["datanode", "tasktracker"]

    def _validate_additional_ng_scaling(self, cluster, additional):
        jt = vu.get_jobtracker(cluster)
        scalable_processes = self._get_scalable_processes()

        for ng_id in additional:
            ng = g.get_by_id(cluster.node_groups, ng_id)
            if not set(ng.node_processes).issubset(scalable_processes):
                raise ex.NodeGroupCannotBeScaled(
                    ng.name,
                    _("Vanilla plugin cannot scale nodegroup"
                      " with processes: %s") % ' '.join(ng.node_processes))
            if not jt and 'tasktracker' in ng.node_processes:
                raise ex.NodeGroupCannotBeScaled(
                    ng.name,
                    _("Vanilla plugin cannot scale node group with "
                      "processes which have no master-processes run "
                      "in cluster"))

    def _validate_existing_ng_scaling(self, cluster, existing):
        scalable_processes = self._get_scalable_processes()
        dn_to_delete = 0
        for ng in cluster.node_groups:
            if ng.id in existing:
                if (ng.count > existing[ng.id]
                        and "datanode" in ng.node_processes):
                    dn_to_delete += ng.count - existing[ng.id]
                if not set(ng.node_processes).issubset(scalable_processes):
                    raise ex.NodeGroupCannotBeScaled(
                        ng.name,
                        _("Vanilla plugin cannot scale nodegroup"
                          " with processes: %s") % ' '.join(ng.node_processes))

        dn_amount = len(vu.get_datanodes(cluster))
        rep_factor = c_helper.get_config_value('HDFS', 'dfs.replication',
                                               cluster)

        if dn_to_delete > 0 and dn_amount - dn_to_delete < rep_factor:
            raise ex.ClusterCannotBeScaled(
                cluster.name,
                _("Vanilla plugin cannot shrink cluster because "
                  "it would be not enough nodes for replicas "
                  "(replication factor is %s)") % rep_factor)

    def get_edp_engine(self, cluster, job_type):
        if job_type in edp_engine.EdpOozieEngine.get_supported_job_types():
            return edp_engine.EdpOozieEngine(cluster)
        return None

    def get_edp_job_types(self):
        return edp_engine.EdpOozieEngine.get_supported_job_types()

    def get_edp_config_hints(self, job_type):
        return edp_engine.EdpOozieEngine.get_possible_job_config(job_type)

    def get_open_ports(self, node_group):
        cluster = node_group.cluster

        ports = []

        if "namenode" in node_group.node_processes:
            ports.append(
                c_helper.get_port_from_config('HDFS', 'dfs.http.address',
                                              cluster))
            ports.append(8020)

        if "datanode" in node_group.node_processes:
            ports.append(
                c_helper.get_port_from_config('HDFS',
                                              'dfs.datanode.http.address',
                                              cluster))
            ports.append(
                c_helper.get_port_from_config('HDFS', 'dfs.datanode.address',
                                              cluster))
            ports.append(
                c_helper.get_port_from_config('HDFS',
                                              'dfs.datanode.ipc.address',
                                              cluster))

        if "jobtracker" in node_group.node_processes:
            ports.append(
                c_helper.get_port_from_config(
                    'MapReduce', 'mapred.job.tracker.http.address', cluster))
            ports.append(8021)

        if "tasktracker" in node_group.node_processes:
            ports.append(
                c_helper.get_port_from_config(
                    'MapReduce', 'mapred.task.tracker.http.address', cluster))

        if "secondarynamenode" in node_group.node_processes:
            ports.append(
                c_helper.get_port_from_config('HDFS',
                                              'dfs.secondary.http.address',
                                              cluster))

        if "oozie" in node_group.node_processes:
            ports.append(11000)

        if "hive" in node_group.node_processes:
            ports.append(9999)
            ports.append(10000)

        return ports

    def on_terminate_cluster(self, cluster):
        proxy.delete_proxy_user_for_cluster(cluster)
Beispiel #18
0
def _step_description(x):
    return {'step': gu.start_process_event_message(x), 'param': ('cluster', 0)}
Beispiel #19
0
        if 'nodemanager' in processes:
            r.execute_command(
                'sudo su - -c  "yarn-daemon.sh start nodemanager" hadoop')


def start_hadoop_process(instance, process):
    instance.remote().execute_command(
        'sudo su - -c "hadoop-daemon.sh start %s" hadoop' % process)


def start_yarn_process(instance, process):
    instance.remote().execute_command(
        'sudo su - -c  "yarn-daemon.sh start %s" hadoop' % process)


@cpo.event_wrapper(True, step=pu.start_process_event_message("HistoryServer"))
def start_historyserver(instance):
    instance.remote().execute_command(
        'sudo su - -c "mr-jobhistory-daemon.sh start historyserver" hadoop')


@cpo.event_wrapper(True, step=pu.start_process_event_message("Oozie"))
def start_oozie_process(pctx, instance):
    with context.set_current_instance_id(instance.instance_id):
        with instance.remote() as r:
            if c_helper.is_mysql_enabled(pctx, instance.cluster):
                _start_mysql(r)
                LOG.debug("Creating Oozie DB Schema")
                sql_script = files.get_file_text(
                    'plugins/vanilla/hadoop2/resources/create_oozie_db.sql')
Beispiel #20
0
class StormProvider(p.ProvisioningPluginBase):
    def __init__(self):
        self.processes = {
            "Zookeeper": ["zookeeper"],
            "Storm": ["nimbus", "supervisor"]
        }

    def get_title(self):
        return "Apache Storm"

    def get_description(self):
        return (_("This plugin provides an ability to launch Storm "
                  "cluster without any management consoles."))

    def get_versions(self):
        return ['0.9.2']

    def get_configs(self, storm_version):
        return c_helper.get_plugin_configs()

    def get_node_processes(self, storm_version):
        return self.processes

    def validate(self, cluster):
        # validate Storm Master Node and Storm Slaves
        sm_count = sum(
            [ng.count for ng in utils.get_node_groups(cluster, "nimbus")])

        if sm_count != 1:
            raise ex.RequiredServiceMissingException("Storm nimbus")

        sl_count = sum(
            [ng.count for ng in utils.get_node_groups(cluster, "supervisor")])

        if sl_count < 1:
            raise ex.InvalidComponentCountException("Storm supervisor",
                                                    _("1 or more"), sl_count)

    def update_infra(self, cluster):
        pass

    def configure_cluster(self, cluster):
        self._setup_instances(cluster)

    def start_cluster(self, cluster):
        sm_instance = utils.get_instance(cluster, "nimbus")
        sl_instances = utils.get_instances(cluster, "supervisor")
        zk_instance = utils.get_instances(cluster, "zookeeper")

        if zk_instance:
            self._start_zookeeper_processes(zk_instance)

        # start storm master
        if sm_instance:
            self._start_storm_master(sm_instance)

        # start storm slaves
        self._start_slave_processes(sl_instances)

        LOG.info(
            _LI('Cluster {cluster} has been started successfully').format(
                cluster=cluster.name))
        self._set_cluster_info(cluster)

    def get_edp_engine(self, cluster, job_type):
        if job_type in edp_engine.EdpEngine.get_supported_job_types():
            return edp_engine.EdpEngine(cluster)

        return None

    def get_edp_job_types(self, versions=[]):
        res = {}
        for vers in self.get_versions():
            if not versions or vers in versions:
                if edp_engine.EdpEngine.edp_supported(vers):
                    res[vers] = edp_engine.EdpEngine.get_supported_job_types()
        return res

    def get_edp_config_hints(self, job_type, version):
        if edp_engine.EdpEngine.edp_supported(version):
            return edp_engine.EdpEngine.get_possible_job_config(job_type)
        return {}

    def _extract_configs_to_extra(self, cluster):
        st_master = utils.get_instance(cluster, "nimbus")
        zk_servers = utils.get_instances(cluster, "zookeeper")

        extra = dict()

        config_instances = ''
        if st_master is not None:
            if zk_servers is not None:
                zknames = []
                for zk in zk_servers:
                    zknames.append(zk.hostname())

            config_instances = c_helper.generate_storm_config(
                st_master.hostname(), zknames)

        config = self._convert_dict_to_yaml(config_instances)
        supervisor_conf = c_helper.generate_slave_supervisor_conf()
        nimbus_ui_conf = c_helper.generate_master_supervisor_conf()
        zk_conf = c_helper.generate_zookeeper_conf()

        for ng in cluster.node_groups:
            extra[ng.id] = {
                'st_instances': config,
                'slave_sv_conf': supervisor_conf,
                'master_sv_conf': nimbus_ui_conf,
                'zk_conf': zk_conf
            }

        return extra

    @cpo.event_wrapper(True,
                       step=utils.start_process_event_message("StormMaster"))
    def _start_storm_master(self, sm_instance):
        with remote.get_remote(sm_instance) as r:
            run.start_storm_nimbus_and_ui(r)
            LOG.info(
                _LI("Storm master at {host} has been started").format(
                    host=sm_instance.hostname()))

    def _start_slave_processes(self, sl_instances):
        if len(sl_instances) == 0:
            return
        cpo.add_provisioning_step(sl_instances[0].cluster_id,
                                  utils.start_process_event_message("Slave"),
                                  len(sl_instances))

        with context.ThreadGroup() as tg:
            for i in sl_instances:
                tg.spawn('storm-start-sl-%s' % i.instance_name,
                         self._start_slaves, i)

    @cpo.event_wrapper(True)
    def _start_slaves(self, instance):
        with instance.remote() as r:
            run.start_storm_supervisor(r)

    def _start_zookeeper_processes(self, zk_instances):
        if len(zk_instances) == 0:
            return

        cpo.add_provisioning_step(
            zk_instances[0].cluster_id,
            utils.start_process_event_message("Zookeeper"), len(zk_instances))

        with context.ThreadGroup() as tg:
            for i in zk_instances:
                tg.spawn('storm-start-zk-%s' % i.instance_name,
                         self._start_zookeeper, i)

    @cpo.event_wrapper(True)
    def _start_zookeeper(self, instance):
        with instance.remote() as r:
            run.start_zookeeper(r)

    def _setup_instances(self, cluster, instances=None):
        extra = self._extract_configs_to_extra(cluster)

        if instances is None:
            instances = utils.get_instances(cluster)

        self._push_configs_to_nodes(cluster, extra, instances)

    def _push_configs_to_nodes(self, cluster, extra, new_instances):
        all_instances = utils.get_instances(cluster)
        cpo.add_provisioning_step(cluster.id, _("Push configs to nodes"),
                                  len(all_instances))

        with context.ThreadGroup() as tg:
            for instance in all_instances:
                if instance in new_instances:
                    tg.spawn('storm-configure-%s' % instance.instance_name,
                             self._push_configs_to_new_node, cluster, extra,
                             instance)
                else:
                    tg.spawn('storm-reconfigure-%s' % instance.instance_name,
                             self._push_configs_to_existing_node, cluster,
                             extra, instance)

    def _convert_dict_to_yaml(self, dict_to_convert):
        new_dict = dict_to_convert.copy()
        for key in dict_to_convert:
            if isinstance(dict_to_convert[key], six.string_types):
                new_dict[key] = "\"" + dict_to_convert[key] + "\""

        stream = yaml.dump(new_dict, default_flow_style=False)
        stream = stream.replace("\'", "")

        return stream

    @cpo.event_wrapper(True)
    def _push_configs_to_new_node(self, cluster, extra, instance):
        ng_extra = extra[instance.node_group.id]

        files_supervisor = {
            '/etc/supervisor/supervisord.conf': ng_extra['slave_sv_conf']
        }
        files_storm = {
            '/usr/local/storm/conf/storm.yaml': ng_extra['st_instances']
        }
        files_zk = {
            '/opt/zookeeper/zookeeper/conf/zoo.cfg': ng_extra['zk_conf']
        }
        files_supervisor_master = {
            '/etc/supervisor/supervisord.conf': ng_extra['master_sv_conf']
        }

        with remote.get_remote(instance) as r:
            node_processes = instance.node_group.node_processes
            r.write_files_to(files_storm, run_as_root=True)
            if 'zookeeper' in node_processes:
                self._push_zk_configs(r, files_zk)
            if 'nimbus' in node_processes:
                self._push_supervisor_configs(r, files_supervisor_master)
            if 'supervisor' in node_processes:
                self._push_supervisor_configs(r, files_supervisor)

    @cpo.event_wrapper(True)
    def _push_configs_to_existing_node(self, cluster, extra, instance):
        node_processes = instance.node_group.node_processes
        need_storm_update = ('nimbus' in node_processes
                             or 'supervisor' in node_processes)
        need_zookeeper_update = 'zookeeper' in node_processes

        ng_extra = extra[instance.node_group.id]
        r = remote.get_remote(instance)

        if need_storm_update:
            storm_path = '/usr/local/storm/conf/storm.yaml'
            files_storm = {storm_path: ng_extra['st_instances']}
            r.write_files_to(files_storm)

        if need_zookeeper_update:
            zk_path = '/opt/zookeeper/zookeeper-3.4.6/conf/zoo.cfg'
            files_zookeeper = {zk_path: ng_extra['zk_conf']}
            self._push_zk_configs(r, files_zookeeper)

    def _set_cluster_info(self, cluster):
        st_master = utils.get_instance(cluster, "nimbus")
        info = {}

        if st_master:
            port = "8080"

            info['Strom'] = {
                'Web UI': 'http://%s:%s' % (st_master.management_ip, port)
            }
        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})

    def _push_zk_configs(self, r, files):
        r.write_files_to(files, run_as_root=True)

    def _push_supervisor_configs(self, r, files):
        r.append_to_files(files, run_as_root=True)
Beispiel #21
0
def _step_description(x):
    return {
        'step': gu.start_process_event_message(x),
        'param': ('cluster', 0)
    }
            r.execute_command(
                'sudo su - -c  "yarn-daemon.sh start nodemanager" hadoop')


def start_hadoop_process(instance, process):
    instance.remote().execute_command(
        'sudo su - -c "hadoop-daemon.sh start %s" hadoop' % process)


def start_yarn_process(instance, process):
    instance.remote().execute_command(
        'sudo su - -c  "yarn-daemon.sh start %s" hadoop' % process)


@utils.event_wrapper(True,
                     step=utils.start_process_event_message("HistoryServer"))
def start_historyserver(instance):
    instance.remote().execute_command(
        'sudo su - -c "mr-jobhistory-daemon.sh start historyserver" hadoop')


@utils.event_wrapper(True, step=utils.start_process_event_message("Oozie"))
def start_oozie_process(pctx, instance):
    with context.set_current_instance_id(instance.instance_id):
        with instance.remote() as r:
            if config_helper.is_mysql_enabled(pctx, instance.cluster):
                _start_mysql(r)
                LOG.debug("Creating Oozie DB Schema")
                sql_script = utils.get_file_text(
                    'plugins/vanilla/hadoop2/resources/create_oozie_db.sql',
                    'sahara_plugin_vanilla')
Beispiel #23
0
# limitations under the License.

from sahara.plugins import utils
from sahara.plugins.sandbox.hadoop2 import run_scripts as run
from sahara.plugins.sandbox import utils as vu
from sahara.utils import cluster_progress_ops as cpo
from sahara.utils import files


def start_namenode(cluster, backup=None):
    nn = vu.get_namenode(cluster)
    _start_namenode(nn, backup)


@cpo.event_wrapper(
    True, step=utils.start_process_event_message('NameNode'))
def _start_namenode(nn, backup=None):
    if backup is None:
        run.format_namenode(nn)
    run.start_hadoop_process(nn, 'namenode')


def start_secondarynamenode(cluster):
    snn = vu.get_secondarynamenode(cluster)
    if snn:
        _start_secondarynamenode(snn)


@cpo.event_wrapper(
    True, step=utils.start_process_event_message("SecondaryNameNodes"))
def _start_secondarynamenode(snn):
Beispiel #24
0
# See the License for the specific language governing permissions and
# limitations under the License.

from sahara.plugins import utils
from sahara.plugins.vanilla.hadoop2 import run_scripts as run
from sahara.plugins.vanilla import utils as vu
from sahara.utils import cluster_progress_ops as cpo


def start_namenode(cluster):
    nn = vu.get_namenode(cluster)
    _start_namenode(nn)


@cpo.event_wrapper(
    True, step=utils.start_process_event_message('NameNode'))
def _start_namenode(nn):
    run.format_namenode(nn)
    run.start_hadoop_process(nn, 'namenode')


def start_secondarynamenode(cluster):
    snn = vu.get_secondarynamenode(cluster)
    if snn:
        _start_secondarynamenode(snn)


@cpo.event_wrapper(
    True, step=utils.start_process_event_message("SecondaryNameNodes"))
def _start_secondarynamenode(snn):
    run.start_hadoop_process(snn, 'secondarynamenode')
Beispiel #25
0
class VersionHandler(avm.AbstractVersionHandler):
    def __init__(self):
        self.pctx = {
            'env_confs': c_helper.get_env_configs(),
            'all_confs': c_helper.get_plugin_configs()
        }

    def get_plugin_configs(self):
        return self.pctx['all_confs']

    def get_node_processes(self):
        return {
            "Hadoop": [],
            "MapReduce": ["historyserver"],
            "HDFS": ["namenode", "datanode", "secondarynamenode"],
            "YARN": ["resourcemanager", "nodemanager"],
            "JobFlow": ["oozie"],
            "Hive": ["hiveserver"]
        }

    def validate(self, cluster):
        vl.validate_cluster_creating(self.pctx, cluster)

    def update_infra(self, cluster):
        pass

    def configure_cluster(self, cluster):
        c.configure_cluster(self.pctx, cluster)

    def start_namenode(self, cluster):
        nn = vu.get_namenode(cluster)
        self._start_namenode(nn)

    @cpo.event_wrapper(True,
                       step=utils.start_process_event_message('NameNode'))
    def _start_namenode(self, nn):
        run.format_namenode(nn)
        run.start_hadoop_process(nn, 'namenode')

    def start_secondarynamenodes(self, cluster):
        snns = vu.get_secondarynamenodes(cluster)
        if len(snns) == 0:
            return
        cpo.add_provisioning_step(
            snns[0].cluster_id,
            utils.start_process_event_message("SecondaryNameNodes"), len(snns))

        for snn in vu.get_secondarynamenodes(cluster):
            self._start_secondarynamenode(snn)

    @cpo.event_wrapper(True)
    def _start_secondarynamenode(self, snn):
        run.start_hadoop_process(snn, 'secondarynamenode')

    def start_resourcemanager(self, cluster):
        rm = vu.get_resourcemanager(cluster)
        if rm:
            self._start_resourcemanager(rm)

    @cpo.event_wrapper(
        True, step=utils.start_process_event_message('ResourceManager'))
    def _start_resourcemanager(self, snn):
        run.start_yarn_process(snn, 'resourcemanager')

    def start_historyserver(self, cluster):
        hs = vu.get_historyserver(cluster)
        if hs:
            run.start_historyserver(hs)

    def start_oozie(self, cluster):
        oo = vu.get_oozie(cluster)
        if oo:
            run.start_oozie_process(self.pctx, oo)

    def start_hiveserver(self, cluster):
        hiveserver = vu.get_hiveserver(cluster)
        if hiveserver:
            run.start_hiveserver_process(self.pctx, hiveserver)

    def start_cluster(self, cluster):
        self.start_namenode(cluster)

        self.start_secondarynamenodes(cluster)

        self.start_resourcemanager(cluster)

        run.start_dn_nm_processes(utils.get_instances(cluster))

        run.await_datanodes(cluster)

        self.start_historyserver(cluster)

        self.start_oozie(cluster)

        self.start_hiveserver(cluster)

        self._set_cluster_info(cluster)

    def decommission_nodes(self, cluster, instances):
        sc.decommission_nodes(self.pctx, cluster, instances)

    def validate_scaling(self, cluster, existing, additional):
        vl.validate_additional_ng_scaling(cluster, additional)
        vl.validate_existing_ng_scaling(self.pctx, cluster, existing)

    def scale_cluster(self, cluster, instances):
        sc.scale_cluster(self.pctx, cluster, instances)

    def _set_cluster_info(self, cluster):
        nn = vu.get_namenode(cluster)
        rm = vu.get_resourcemanager(cluster)
        hs = vu.get_historyserver(cluster)
        oo = vu.get_oozie(cluster)

        info = {}

        if rm:
            info['YARN'] = {
                'Web UI': 'http://%s:%s' % (rm.management_ip, '8088'),
                'ResourceManager': 'http://%s:%s' % (rm.management_ip, '8032')
            }

        if nn:
            info['HDFS'] = {
                'Web UI': 'http://%s:%s' % (nn.management_ip, '50070'),
                'NameNode': 'hdfs://%s:%s' % (nn.hostname(), '9000')
            }

        if oo:
            info['JobFlow'] = {
                'Oozie': 'http://%s:%s' % (oo.management_ip, '11000')
            }

        if hs:
            info['MapReduce JobHistory Server'] = {
                'Web UI': 'http://%s:%s' % (hs.management_ip, '19888')
            }

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})

    def get_edp_engine(self, cluster, job_type):
        if job_type in edp_engine.EdpOozieEngine.get_supported_job_types():
            return edp_engine.EdpOozieEngine(cluster)
        return None

    def get_edp_job_types(self):
        return edp_engine.EdpOozieEngine.get_supported_job_types()

    def get_edp_config_hints(self, job_type):
        return edp_engine.EdpOozieEngine.get_possible_job_config(job_type)

    def get_open_ports(self, node_group):
        return c.get_open_ports(node_group)
Beispiel #26
0
        if 'nodemanager' in processes:
            r.execute_command(
                'sudo su - -c  "yarn-daemon.sh start nodemanager" hadoop')


def start_hadoop_process(instance, process):
    instance.remote().execute_command(
        'sudo su - -c "hadoop-daemon.sh start %s" hadoop' % process)


def start_yarn_process(instance, process):
    instance.remote().execute_command(
        'sudo su - -c  "yarn-daemon.sh start %s" hadoop' % process)


@cpo.event_wrapper(True, step=pu.start_process_event_message("HistoryServer"))
def start_historyserver(instance):
    instance.remote().execute_command(
        'sudo su - -c "mr-jobhistory-daemon.sh start historyserver" hadoop')


@cpo.event_wrapper(True, step=pu.start_process_event_message("Oozie"))
def start_oozie_process(pctx, instance):
    with context.set_current_instance_id(instance.instance_id):
        with instance.remote() as r:
            if c_helper.is_mysql_enabled(pctx, instance.cluster):
                _start_mysql(r)
                LOG.debug("Creating Oozie DB Schema")
                sql_script = files.get_file_text(
                    'plugins/vanilla/hadoop2/resources/create_oozie_db.sql')
Beispiel #27
0
class SparkProvider(p.ProvisioningPluginBase):
    def __init__(self):
        self.processes = {
            "HDFS": ["namenode", "datanode"],
            "Spark": ["master", "slave"]
        }

    def get_title(self):
        return "Apache Spark"

    def get_description(self):
        return _("This plugin provides an ability to launch Spark on Hadoop "
                 "CDH cluster without any management consoles.")

    def get_labels(self):
        default = {'enabled': {'status': True}, 'stable': {'status': True}}
        result = {'plugin_labels': copy.deepcopy(default)}
        result['version_labels'] = {
            version: copy.deepcopy(default)
            for version in self.get_versions()
        }
        return result

    def get_versions(self):
        return ['1.6.0', '1.3.1']

    def get_configs(self, hadoop_version):
        return c_helper.get_plugin_configs()

    def get_node_processes(self, hadoop_version):
        return self.processes

    def validate(self, cluster):
        nn_count = sum(
            [ng.count for ng in utils.get_node_groups(cluster, "namenode")])
        if nn_count != 1:
            raise ex.InvalidComponentCountException("namenode", 1, nn_count)

        dn_count = sum(
            [ng.count for ng in utils.get_node_groups(cluster, "datanode")])
        if dn_count < 1:
            raise ex.InvalidComponentCountException("datanode", _("1 or more"),
                                                    nn_count)

        rep_factor = utils.get_config_value_or_default('HDFS',
                                                       "dfs.replication",
                                                       cluster)
        if dn_count < rep_factor:
            raise ex.InvalidComponentCountException(
                'datanode',
                _('%s or more') % rep_factor, dn_count,
                _('Number of %(dn)s instances should not be less '
                  'than %(replication)s') % {
                      'dn': 'datanode',
                      'replication': 'dfs.replication'
                  })

        # validate Spark Master Node and Spark Slaves
        sm_count = sum(
            [ng.count for ng in utils.get_node_groups(cluster, "master")])

        if sm_count != 1:
            raise ex.RequiredServiceMissingException("Spark master")

        sl_count = sum(
            [ng.count for ng in utils.get_node_groups(cluster, "slave")])

        if sl_count < 1:
            raise ex.InvalidComponentCountException("Spark slave",
                                                    _("1 or more"), sl_count)

    def update_infra(self, cluster):
        pass

    def configure_cluster(self, cluster):
        self._setup_instances(cluster)

    @cpo.event_wrapper(True,
                       step=utils.start_process_event_message("NameNode"))
    def _start_namenode(self, nn_instance):
        with remote.get_remote(nn_instance) as r:
            run.format_namenode(r)
            run.start_processes(r, "namenode")

    def start_spark(self, cluster):
        sm_instance = utils.get_instance(cluster, "master")
        if sm_instance:
            self._start_spark(cluster, sm_instance)

    @cpo.event_wrapper(
        True, step=utils.start_process_event_message("SparkMasterNode"))
    def _start_spark(self, cluster, sm_instance):
        with remote.get_remote(sm_instance) as r:
            run.start_spark_master(r, self._spark_home(cluster))
            LOG.info(_LI("Spark service has been started"))

    def start_cluster(self, cluster):
        nn_instance = utils.get_instance(cluster, "namenode")
        dn_instances = utils.get_instances(cluster, "datanode")

        # Start the name node
        self._start_namenode(nn_instance)

        # start the data nodes
        self._start_datanode_processes(dn_instances)
        run.await_datanodes(cluster)

        LOG.info(_LI("Hadoop services have been started"))

        with remote.get_remote(nn_instance) as r:
            r.execute_command("sudo -u hdfs hdfs dfs -mkdir -p /user/$USER/")
            r.execute_command("sudo -u hdfs hdfs dfs -chown $USER "
                              "/user/$USER/")

        # start spark nodes
        self.start_spark(cluster)
        swift_helper.install_ssl_certs(utils.get_instances(cluster))

        LOG.info(_LI('Cluster has been started successfully'))
        self._set_cluster_info(cluster)

    def _spark_home(self, cluster):
        return utils.get_config_value_or_default("Spark", "Spark home",
                                                 cluster)

    def _extract_configs_to_extra(self, cluster):
        sp_master = utils.get_instance(cluster, "master")
        sp_slaves = utils.get_instances(cluster, "slave")

        extra = dict()

        config_master = config_slaves = ''
        if sp_master is not None:
            config_master = c_helper.generate_spark_env_configs(cluster)

        if sp_slaves is not None:
            slavenames = []
            for slave in sp_slaves:
                slavenames.append(slave.hostname())
            config_slaves = c_helper.generate_spark_slaves_configs(slavenames)
        else:
            config_slaves = "\n"

        # Any node that might be used to run spark-submit will need
        # these libs for swift integration
        config_defaults = c_helper.generate_spark_executor_classpath(cluster)

        extra['job_cleanup'] = c_helper.generate_job_cleanup_config(cluster)

        extra['sp_master'] = config_master
        extra['sp_slaves'] = config_slaves
        extra['sp_defaults'] = config_defaults

        if c_helper.is_data_locality_enabled(cluster):
            topology_data = th.generate_topology_map(
                cluster, CONF.enable_hypervisor_awareness)
            extra['topology_data'] = "\n".join(
                [k + " " + v for k, v in topology_data.items()]) + "\n"

        return extra

    def _add_instance_ng_related_to_extra(self, cluster, instance, extra):
        extra = extra.copy()
        ng = instance.node_group
        nn = utils.get_instance(cluster, "namenode")

        extra['xml'] = c_helper.generate_xml_configs(ng.configuration(),
                                                     instance.storage_paths(),
                                                     nn.hostname(), None)
        extra['setup_script'] = c_helper.generate_hadoop_setup_script(
            instance.storage_paths(),
            c_helper.extract_hadoop_environment_confs(ng.configuration()))

        return extra

    def _start_datanode_processes(self, dn_instances):
        if len(dn_instances) == 0:
            return

        cpo.add_provisioning_step(
            dn_instances[0].cluster_id,
            utils.start_process_event_message("DataNodes"), len(dn_instances))

        with context.ThreadGroup() as tg:
            for i in dn_instances:
                tg.spawn('spark-start-dn-%s' % i.instance_name,
                         self._start_datanode, i)

    @cpo.event_wrapper(mark_successful_on_exit=True)
    def _start_datanode(self, instance):
        with instance.remote() as r:
            run.start_processes(r, "datanode")

    def _setup_instances(self, cluster, instances=None):
        extra = self._extract_configs_to_extra(cluster)

        if instances is None:
            instances = utils.get_instances(cluster)

        self._push_configs_to_nodes(cluster, extra, instances)

    def _push_configs_to_nodes(self, cluster, extra, new_instances):
        all_instances = utils.get_instances(cluster)
        cpo.add_provisioning_step(cluster.id, _("Push configs to nodes"),
                                  len(all_instances))
        with context.ThreadGroup() as tg:
            for instance in all_instances:
                extra = self._add_instance_ng_related_to_extra(
                    cluster, instance, extra)
                if instance in new_instances:
                    tg.spawn('spark-configure-%s' % instance.instance_name,
                             self._push_configs_to_new_node, cluster, extra,
                             instance)
                else:
                    tg.spawn('spark-reconfigure-%s' % instance.instance_name,
                             self._push_configs_to_existing_node, cluster,
                             extra, instance)

    @cpo.event_wrapper(mark_successful_on_exit=True)
    def _push_configs_to_new_node(self, cluster, extra, instance):
        files_hadoop = {
            os.path.join(c_helper.HADOOP_CONF_DIR, "core-site.xml"):
            extra['xml']['core-site'],
            os.path.join(c_helper.HADOOP_CONF_DIR, "hdfs-site.xml"):
            extra['xml']['hdfs-site'],
        }

        sp_home = self._spark_home(cluster)
        files_spark = {
            os.path.join(sp_home, 'conf/spark-env.sh'): extra['sp_master'],
            os.path.join(sp_home, 'conf/slaves'): extra['sp_slaves'],
            os.path.join(sp_home, 'conf/spark-defaults.conf'):
            extra['sp_defaults']
        }

        files_init = {
            '/tmp/sahara-hadoop-init.sh': extra['setup_script'],
            'id_rsa': cluster.management_private_key,
            'authorized_keys': cluster.management_public_key
        }

        # pietro: This is required because the (secret) key is not stored in
        # .ssh which hinders password-less ssh required by spark scripts
        key_cmd = ('sudo cp $HOME/id_rsa $HOME/.ssh/; '
                   'sudo chown $USER $HOME/.ssh/id_rsa; '
                   'sudo chmod 600 $HOME/.ssh/id_rsa')

        storage_paths = instance.storage_paths()
        dn_path = ' '.join(c_helper.make_hadoop_path(storage_paths, '/dfs/dn'))
        nn_path = ' '.join(c_helper.make_hadoop_path(storage_paths, '/dfs/nn'))

        hdfs_dir_cmd = ('sudo mkdir -p %(nn_path)s %(dn_path)s &&'
                        'sudo chown -R hdfs:hadoop %(nn_path)s %(dn_path)s &&'
                        'sudo chmod 755 %(nn_path)s %(dn_path)s' % {
                            "nn_path": nn_path,
                            "dn_path": dn_path
                        })

        with remote.get_remote(instance) as r:
            r.execute_command('sudo chown -R $USER:$USER /etc/hadoop')
            r.execute_command('sudo chown -R $USER:$USER %s' % sp_home)
            r.write_files_to(files_hadoop)
            r.write_files_to(files_spark)
            r.write_files_to(files_init)
            r.execute_command('sudo chmod 0500 /tmp/sahara-hadoop-init.sh')
            r.execute_command('sudo /tmp/sahara-hadoop-init.sh '
                              '>> /tmp/sahara-hadoop-init.log 2>&1')

            r.execute_command(hdfs_dir_cmd)
            r.execute_command(key_cmd)

            if c_helper.is_data_locality_enabled(cluster):
                r.write_file_to(
                    '/etc/hadoop/topology.sh',
                    f.get_file_text('plugins/spark/resources/topology.sh'))
                r.execute_command('sudo chmod +x /etc/hadoop/topology.sh')

            self._write_topology_data(r, cluster, extra)
            self._push_master_configs(r, cluster, extra, instance)
            self._push_cleanup_job(r, cluster, extra, instance)

    @cpo.event_wrapper(mark_successful_on_exit=True)
    def _push_configs_to_existing_node(self, cluster, extra, instance):
        node_processes = instance.node_group.node_processes
        need_update_hadoop = (c_helper.is_data_locality_enabled(cluster)
                              or 'namenode' in node_processes)
        need_update_spark = ('master' in node_processes
                             or 'slave' in node_processes)

        if need_update_spark:
            sp_home = self._spark_home(cluster)
            files = {
                os.path.join(sp_home, 'conf/spark-env.sh'):
                extra['sp_master'],
                os.path.join(sp_home, 'conf/slaves'):
                extra['sp_slaves'],
                os.path.join(sp_home, 'conf/spark-defaults.conf'):
                extra['sp_defaults']
            }
            r = remote.get_remote(instance)
            r.write_files_to(files)
            self._push_cleanup_job(r, cluster, extra, instance)
        if need_update_hadoop:
            with remote.get_remote(instance) as r:
                self._write_topology_data(r, cluster, extra)
                self._push_master_configs(r, cluster, extra, instance)

    def _write_topology_data(self, r, cluster, extra):
        if c_helper.is_data_locality_enabled(cluster):
            topology_data = extra['topology_data']
            r.write_file_to('/etc/hadoop/topology.data', topology_data)

    def _push_master_configs(self, r, cluster, extra, instance):
        node_processes = instance.node_group.node_processes
        if 'namenode' in node_processes:
            self._push_namenode_configs(cluster, r)

    def _push_cleanup_job(self, r, cluster, extra, instance):
        node_processes = instance.node_group.node_processes
        if 'master' in node_processes:
            if extra['job_cleanup']['valid']:
                r.write_file_to('/etc/hadoop/tmp-cleanup.sh',
                                extra['job_cleanup']['script'])
                r.execute_command("chmod 755 /etc/hadoop/tmp-cleanup.sh")
                cmd = 'sudo sh -c \'echo "%s" > /etc/cron.d/spark-cleanup\''
                r.execute_command(cmd % extra['job_cleanup']['cron'])
            else:
                r.execute_command("sudo rm -f /etc/hadoop/tmp-cleanup.sh")
                r.execute_command("sudo rm -f /etc/crond.d/spark-cleanup")

    def _push_namenode_configs(self, cluster, r):
        r.write_file_to(
            '/etc/hadoop/dn.incl',
            utils.generate_fqdn_host_names(
                utils.get_instances(cluster, "datanode")))
        r.write_file_to('/etc/hadoop/dn.excl', '')

    def _set_cluster_info(self, cluster):
        nn = utils.get_instance(cluster, "namenode")
        sp_master = utils.get_instance(cluster, "master")
        info = {}

        if nn:
            address = utils.get_config_value_or_default(
                'HDFS', 'dfs.http.address', cluster)
            port = address[address.rfind(':') + 1:]
            info['HDFS'] = {
                'Web UI': 'http://%s:%s' % (nn.get_ip_or_dns_name(), port)
            }
            info['HDFS']['NameNode'] = 'hdfs://%s:8020' % nn.hostname()

        if sp_master:
            port = utils.get_config_value_or_default('Spark',
                                                     'Master webui port',
                                                     cluster)
            if port is not None:
                info['Spark'] = {
                    'Web UI':
                    'http://%s:%s' % (sp_master.get_ip_or_dns_name(), port)
                }
        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})

    # Scaling

    def validate_scaling(self, cluster, existing, additional):
        self._validate_existing_ng_scaling(cluster, existing)
        self._validate_additional_ng_scaling(cluster, additional)

    def decommission_nodes(self, cluster, instances):
        sls = utils.get_instances(cluster, "slave")
        dns = utils.get_instances(cluster, "datanode")
        decommission_dns = False
        decommission_sls = False

        for i in instances:
            if 'datanode' in i.node_group.node_processes:
                dns.remove(i)
                decommission_dns = True
            if 'slave' in i.node_group.node_processes:
                sls.remove(i)
                decommission_sls = True

        nn = utils.get_instance(cluster, "namenode")
        spark_master = utils.get_instance(cluster, "master")

        if decommission_sls:
            sc.decommission_sl(spark_master, instances, sls)
        if decommission_dns:
            sc.decommission_dn(nn, instances, dns)

    def scale_cluster(self, cluster, instances):
        master = utils.get_instance(cluster, "master")
        r_master = remote.get_remote(master)

        run.stop_spark(r_master, self._spark_home(cluster))

        self._setup_instances(cluster, instances)
        nn = utils.get_instance(cluster, "namenode")
        run.refresh_nodes(remote.get_remote(nn), "dfsadmin")
        dn_instances = [
            instance for instance in instances
            if 'datanode' in instance.node_group.node_processes
        ]
        self._start_datanode_processes(dn_instances)

        swift_helper.install_ssl_certs(instances)
        run.start_spark_master(r_master, self._spark_home(cluster))
        LOG.info(_LI("Spark master service has been restarted"))

    def _get_scalable_processes(self):
        return ["datanode", "slave"]

    def _validate_additional_ng_scaling(self, cluster, additional):
        scalable_processes = self._get_scalable_processes()

        for ng_id in additional:
            ng = ug.get_by_id(cluster.node_groups, ng_id)
            if not set(ng.node_processes).issubset(scalable_processes):
                raise ex.NodeGroupCannotBeScaled(
                    ng.name,
                    _("Spark plugin cannot scale nodegroup"
                      " with processes: %s") % ' '.join(ng.node_processes))

    def _validate_existing_ng_scaling(self, cluster, existing):
        scalable_processes = self._get_scalable_processes()
        dn_to_delete = 0
        for ng in cluster.node_groups:
            if ng.id in existing:
                if ng.count > existing[ng.id] and ("datanode"
                                                   in ng.node_processes):
                    dn_to_delete += ng.count - existing[ng.id]
                if not set(ng.node_processes).issubset(scalable_processes):
                    raise ex.NodeGroupCannotBeScaled(
                        ng.name,
                        _("Spark plugin cannot scale nodegroup"
                          " with processes: %s") % ' '.join(ng.node_processes))

        dn_amount = len(utils.get_instances(cluster, "datanode"))
        rep_factor = utils.get_config_value_or_default('HDFS',
                                                       "dfs.replication",
                                                       cluster)

        if dn_to_delete > 0 and dn_amount - dn_to_delete < rep_factor:
            raise ex.ClusterCannotBeScaled(
                cluster.name,
                _("Spark plugin cannot shrink cluster because "
                  "there would be not enough nodes for HDFS "
                  "replicas (replication factor is %s)") % rep_factor)

    def get_edp_engine(self, cluster, job_type):
        if edp_engine.EdpEngine.job_type_supported(job_type):
            return edp_engine.EdpEngine(cluster)

        if shell_engine.ShellEngine.job_type_supported(job_type):
            return shell_engine.ShellEngine(cluster)

        return None

    def get_edp_job_types(self, versions=None):
        res = {}
        for vers in self.get_versions():
            if not versions or vers in versions:
                res[vers] = shell_engine.ShellEngine.get_supported_job_types()

                if edp_engine.EdpEngine.edp_supported(vers):
                    res[vers].extend(
                        edp_engine.EdpEngine.get_supported_job_types())

        return res

    def get_edp_config_hints(self, job_type, version):
        if (edp_engine.EdpEngine.edp_supported(version)
                and edp_engine.EdpEngine.job_type_supported(job_type)):
            return edp_engine.EdpEngine.get_possible_job_config(job_type)

        if shell_engine.ShellEngine.job_type_supported(job_type):
            return shell_engine.ShellEngine.get_possible_job_config(job_type)

        return {}

    def get_open_ports(self, node_group):
        cluster = node_group.cluster
        ports_map = {
            'namenode': [8020, 50070, 50470],
            'datanode': [50010, 1004, 50075, 1006, 50020],
            'master': [
                int(
                    utils.get_config_value_or_default("Spark", "Master port",
                                                      cluster)),
                int(
                    utils.get_config_value_or_default("Spark",
                                                      "Master webui port",
                                                      cluster)),
            ],
            'slave': [
                int(
                    utils.get_config_value_or_default("Spark",
                                                      "Worker webui port",
                                                      cluster))
            ]
        }

        ports = []
        for process in node_group.node_processes:
            if process in ports_map:
                ports.extend(ports_map[process])

        return ports

    def recommend_configs(self, cluster, scaling=False):
        want_to_configure = {
            'cluster_configs': {
                'dfs.replication': ('HDFS', 'dfs.replication')
            }
        }
        provider = ru.HadoopAutoConfigsProvider(
            want_to_configure, self.get_configs(cluster.hadoop_version),
            cluster, scaling)
        provider.apply_recommended_configs()
Beispiel #28
0
        if 'nodemanager' in processes:
            r.execute_command(
                'sudo su - -c  "yarn-daemon.sh start nodemanager" hadoop')


def start_hadoop_process(instance, process):
    instance.remote().execute_command(
        'sudo su - -c "hadoop-daemon.sh start %s" hadoop' % process)


def start_yarn_process(instance, process):
    instance.remote().execute_command(
        'sudo su - -c  "yarn-daemon.sh start %s" hadoop' % process)


@cpo.event_wrapper(True, step=pu.start_process_event_message("HistoryServer"))
def start_historyserver(instance):
    instance.remote().execute_command(
        'sudo su - -c "mr-jobhistory-daemon.sh start historyserver" hadoop')


@cpo.event_wrapper(True, step=pu.start_process_event_message("Oozie"))
def start_oozie_process(pctx, instance):
    with context.set_current_instance_id(instance.instance_id):
        with instance.remote() as r:
            if c_helper.is_mysql_enabled(pctx, instance.cluster):
                _start_mysql(r)
                LOG.debug("Creating Oozie DB Schema")
                sql_script = files.get_file_text(
                    'plugins/vanilla/hadoop2/resources/create_oozie_db.sql')