def validate_additional_ng_scaling(cluster, additional):
    rm = vu.get_resourcemanager(cluster)
    scalable_processes = _get_scalable_processes()

    for ng_id in additional:
        ng = u.get_by_id(cluster.node_groups, ng_id)
        if not set(ng.node_processes).issubset(scalable_processes):
            msg = _("Vanilla plugin cannot scale nodegroup with processes: %s")
            raise ex.NodeGroupCannotBeScaled(ng.name,
                                             msg % ' '.join(ng.node_processes))

        if not rm and 'nodemanager' in ng.node_processes:
            msg = _("Vanilla plugin cannot scale node group with processes "
                    "which have no master-processes run in cluster")
            raise ex.NodeGroupCannotBeScaled(ng.name, msg)
def _check_decommission(cluster, instances, check_func, option):
    utils.plugin_option_poll(cluster, is_decommissioned, option,
                             _("Wait for decommissioning"), 5, {
                                 'cluster': cluster,
                                 'check_func': check_func,
                                 'instances': instances
                             })
    def validate_job_execution(self, cluster, job, data):
        if (not self.edp_supported(cluster.hadoop_version)
                or not v_utils.get_spark_history_server(cluster)):

            raise ex.PluginInvalidDataException(
                _('Spark {base} or higher required to run {type} jobs').format(
                    base=EdpSparkEngine.edp_base_version, type=job.type))

        super(EdpSparkEngine, self).validate_job_execution(cluster, job, data)
def configure_instances(pctx, instances):
    if len(instances) == 0:
        return

    utils.add_provisioning_step(instances[0].cluster_id,
                                _("Configure instances"), len(instances))

    for instance in instances:
        with context.set_current_instance_id(instance.instance_id):
            _configure_instance(pctx, instance)
def await_datanodes(cluster):
    datanodes_count = len(vu.get_datanodes(cluster))
    if datanodes_count < 1:
        return

    l_message = _("Waiting on %s datanodes to start up") % datanodes_count
    with vu.get_namenode(cluster).remote() as r:
        utils.plugin_option_poll(
            cluster, _check_datanodes_count,
            config_helper.DATANODES_STARTUP_TIMEOUT, l_message, 1, {
                'remote': r, 'count': datanodes_count})
def validate_existing_ng_scaling(pctx, cluster, existing):
    scalable_processes = _get_scalable_processes()
    dn_to_delete = 0
    for ng in cluster.node_groups:
        if ng.id in existing:
            if ng.count > existing[ng.id] and "datanode" in ng.node_processes:
                dn_to_delete += ng.count - existing[ng.id]

            if not set(ng.node_processes).issubset(scalable_processes):
                msg = _("Vanilla plugin cannot scale nodegroup "
                        "with processes: %s")
                raise ex.NodeGroupCannotBeScaled(
                    ng.name, msg % ' '.join(ng.node_processes))

    dn_amount = len(vu.get_datanodes(cluster))
    rep_factor = cu.get_config_value(pctx, 'HDFS', 'dfs.replication', cluster)

    if dn_to_delete > 0 and dn_amount - dn_to_delete < rep_factor:
        msg = _("Vanilla plugin cannot shrink cluster because it would be "
                "not enough nodes for replicas (replication factor is %s)")
        raise ex.ClusterCannotBeScaled(cluster.name, msg % rep_factor)
Esempio n. 7
0
 def test_check_decommission(self, plugin_option_poll):
     check_func = mock.Mock()
     option = mock.Mock()
     is_dec = scaling.is_decommissioned
     mess = _("Wait for decommissioning")
     sample_dict = {
         'cluster': self.cluster,
         'check_func': check_func,
         'instances': self.instances
     }
     scaling._check_decommission(self.cluster, self.instances, check_func,
                                 option)
     plugin_option_poll.assert_called_once_with(self.cluster, is_dec,
                                                option, mess, 5,
                                                sample_dict)
def get_config_value(pctx, service, name, cluster=None):
    if cluster:
        for ng in cluster.node_groups:
            cl_param = ng.configuration().get(service, {}).get(name)
            if cl_param is not None:
                return cl_param

    for c in pctx['all_confs']:
        if c.applicable_target == service and c.name == name:
            return c.default_value

    raise ex.PluginNotFoundException({
        "name": name,
        "service": service
    }, _("Unable to get parameter '%(name)s' from service %(service)s"))
def validate_zookeeper_node_count(zk_ng, existing, additional):
    zk_amount = 0
    for ng in zk_ng:
        if ng.id in existing:
            zk_amount += existing[ng.id]
        else:
            zk_amount += ng.count

    for ng_id in additional:
        ng = u.get_by_id(zk_ng, ng_id)
        if "zookeeper" in ng.node_processes:
            zk_amount += ng.count

    if (zk_amount % 2) != 1:
        msg = _("Vanilla plugin cannot scale cluster because it must keep"
                " zookeeper service in odd.")
        raise ex.ClusterCannotBeScaled(zk_ng[0].cluster.name, msg)
Esempio n. 10
0
 def test_await_datanodes(self, plugin_option_poll, add_provisioning_step,
                          check_cluster_exists, get_datanodes,
                          get_namenode):
     cluster = mock.Mock()
     get_datanodes.return_value = ['node1']
     r = mock.Mock()
     remote = mock.Mock(return_value=r)
     remote.__enter__ = remote
     remote.__exit__ = mock.Mock()
     namenode = mock.Mock()
     namenode.remote.return_value = remote
     get_namenode.return_value = namenode
     mess = _('Waiting on 1 datanodes to start up')
     test_data = {'remote': r, 'count': 1}
     timeout = config_helper.DATANODES_STARTUP_TIMEOUT
     rs.await_datanodes(cluster)
     get_datanodes.assert_called_once_with(cluster)
     get_namenode.assert_called_once_with(cluster)
     plugin_option_poll.assert_called_once_with(cluster,
                                                rs._check_datanodes_count,
                                                timeout, mess, 1, test_data)
    config.configure_topology_data(pctx, cluster)
    run.start_dn_nm_processes(instances)
    swift_helper.install_ssl_certs(instances)
    config.configure_zookeeper(cluster)
    run.refresh_zk_servers(cluster)


def _get_instances_with_service(instances, service):
    return [
        instance for instance in instances
        if service in instance.node_group.node_processes
    ]


@utils.event_wrapper(True,
                     step=_("Update include files"),
                     param=('cluster', 0))
def _update_include_files(cluster, dec_instances=None):
    dec_instances = dec_instances or []
    dec_instances_ids = [instance.id for instance in dec_instances]

    instances = utils.get_instances(cluster)

    inst_filter = lambda inst: inst.id not in dec_instances_ids

    datanodes = filter(inst_filter, vu.get_datanodes(cluster))
    nodemanagers = filter(inst_filter, vu.get_nodemanagers(cluster))
    dn_hosts = utils.generate_fqdn_host_names(datanodes)
    nm_hosts = utils.generate_fqdn_host_names(nodemanagers)
    for instance in instances:
        with instance.remote() as r:
def format_namenode(instance):
    instance.remote().execute_command(
        'sudo su - -c "hdfs namenode -format" hadoop')


@utils.event_wrapper(True,
                     step=utils.start_process_event_message("Oozie"),
                     param=('cluster', 0))
def refresh_hadoop_nodes(cluster):
    nn = vu.get_namenode(cluster)
    nn.remote().execute_command(
        'sudo su - -c "hdfs dfsadmin -refreshNodes" hadoop')


@utils.event_wrapper(True,
                     step=_("Refresh %s nodes") % "YARN",
                     param=('cluster', 0))
def refresh_yarn_nodes(cluster):
    rm = vu.get_resourcemanager(cluster)
    rm.remote().execute_command(
        'sudo su - -c "yarn rmadmin -refreshNodes" hadoop')


def _oozie_share_lib(remote):
    LOG.debug("Sharing Oozie libs")
    # remote.execute_command('sudo su - -c "/opt/oozie/bin/oozie-setup.sh '
    #                        'sharelib create -fs hdfs://%s:8020" hadoop'
    #                        % nn_hostname)

    # TODO(alazarev) return 'oozie-setup.sh sharelib create' back
    # when #1262023 is resolved
                                                  '/hdfs/datanode')
    dirs['hadoop_log_dir'] = _make_hadoop_paths(storage_paths,
                                                '/hadoop/logs')[0]
    dirs['hadoop_secure_dn_log_dir'] = _make_hadoop_paths(
        storage_paths, '/hadoop/logs/secure')[0]
    dirs['yarn_log_dir'] = _make_hadoop_paths(storage_paths, '/yarn/logs')[0]

    return dirs


def _make_hadoop_paths(paths, hadoop_dir):
    return [path + hadoop_dir for path in paths]


@utils.event_wrapper(True,
                     step=_("Configure topology data"),
                     param=('cluster', 1))
def configure_topology_data(pctx, cluster):
    if config_helper.is_data_locality_enabled(pctx, cluster):
        LOG.warning("Node group awareness is not implemented in YARN yet "
                    "so enable_hypervisor_awareness set to False explicitly")
        tpl_map = th.generate_topology_map(cluster, is_node_awareness=False)
        topology_data = "\n".join([k + " " + v
                                   for k, v in tpl_map.items()]) + "\n"
        for ng in cluster.node_groups:
            for i in ng.instances:
                i.remote().write_file_to(HADOOP_CONF_DIR + "/topology.data",
                                         topology_data,
                                         run_as_root=True)

def format_namenode(instance):
    instance.remote().execute_command(
        'sudo su - -c "hdfs namenode -format" hadoop')


@utils.event_wrapper(
    True,
    step=utils.start_process_event_message("Oozie"), param=('cluster', 0))
def refresh_hadoop_nodes(cluster):
    nn = vu.get_namenode(cluster)
    nn.remote().execute_command(
        'sudo su - -c "hdfs dfsadmin -refreshNodes" hadoop')


@utils.event_wrapper(
    True, step=_("Refresh %s nodes") % "YARN", param=('cluster', 0))
def refresh_yarn_nodes(cluster):
    rm = vu.get_resourcemanager(cluster)
    rm.remote().execute_command(
        'sudo su - -c "yarn rmadmin -refreshNodes" hadoop')


def _oozie_share_lib(remote):
    LOG.debug("Sharing Oozie libs")
    # remote.execute_command('sudo su - -c "/opt/oozie/bin/oozie-setup.sh '
    #                        'sharelib create -fs hdfs://%s:8020" hadoop'
    #                        % nn_hostname)

    # TODO(alazarev) return 'oozie-setup.sh sharelib create' back
    # when #1262023 is resolved
def validate_cluster_creating(pctx, cluster):
    nn_count = _get_inst_count(cluster, 'namenode')
    if nn_count != 1:
        raise ex.InvalidComponentCountException('namenode', 1, nn_count)

    snn_count = _get_inst_count(cluster, 'secondarynamenode')
    if snn_count > 1:
        raise ex.InvalidComponentCountException('secondarynamenode',
                                                _('0 or 1'), snn_count)

    rm_count = _get_inst_count(cluster, 'resourcemanager')
    if rm_count > 1:
        raise ex.InvalidComponentCountException('resourcemanager', _('0 or 1'),
                                                rm_count)

    hs_count = _get_inst_count(cluster, 'historyserver')
    if hs_count > 1:
        raise ex.InvalidComponentCountException('historyserver', _('0 or 1'),
                                                hs_count)

    nm_count = _get_inst_count(cluster, 'nodemanager')
    if rm_count == 0:
        if nm_count > 0:
            raise ex.RequiredServiceMissingException('resourcemanager',
                                                     required_by='nodemanager')

    oo_count = _get_inst_count(cluster, 'oozie')
    dn_count = _get_inst_count(cluster, 'datanode')
    if oo_count > 1:
        raise ex.InvalidComponentCountException('oozie', _('0 or 1'), oo_count)

    if oo_count == 1:
        if dn_count < 1:
            raise ex.RequiredServiceMissingException('datanode',
                                                     required_by='oozie')

        if nm_count < 1:
            raise ex.RequiredServiceMissingException('nodemanager',
                                                     required_by='oozie')

        if hs_count != 1:
            raise ex.RequiredServiceMissingException('historyserver',
                                                     required_by='oozie')

    spark_hist_count = _get_inst_count(cluster, 'spark history server')
    if spark_hist_count > 1:
        raise ex.InvalidComponentCountException('spark history server',
                                                _('0 or 1'),
                                                spark_hist_count)

    rep_factor = cu.get_config_value(pctx, 'HDFS', 'dfs.replication', cluster)
    if dn_count < rep_factor:
        raise ex.InvalidComponentCountException(
            'datanode', rep_factor, dn_count, _('Number of datanodes must be '
                                                'not less than '
                                                'dfs.replication.'))

    hive_count = _get_inst_count(cluster, 'hiveserver')
    if hive_count > 1:
        raise ex.InvalidComponentCountException('hive', _('0 or 1'),
                                                hive_count)

    zk_count = _get_inst_count(cluster, 'zookeeper')
    if zk_count > 0 and (zk_count % 2) != 1:
        raise ex.InvalidComponentCountException(
            'zookeeper', _('odd'), zk_count, _('Number of zookeeper nodes'
                                               'should be in odd.'))
 def get_description(self):
     return _('The Apache Vanilla plugin provides the ability to launch '
              'upstream Vanilla Apache Hadoop cluster without any '
              'management consoles. It can also deploy the Oozie '
              'component.')