Ejemplo n.º 1
0
    def test_get_datanodes(self):
        cl = tu.create_cluster('cl1', 't1', 'vanilla', '2.6.0',
                               [self.ng_manager, self.ng_namenode,
                                self.ng_datanode])
        datanodes = u.get_datanodes(cl)
        self.assertEqual(2, len(datanodes))
        self.assertEqual(set(['dn1', 'dn2']),
                         set([datanodes[0].instance_id,
                              datanodes[1].instance_id]))

        cl = tu.create_cluster('cl1', 't1', 'vanilla', '2.6.0',
                               [self.ng_manager])
        self.assertEqual([], u.get_datanodes(cl))
Ejemplo n.º 2
0
    def test_get_datanodes(self):
        cl = tu.create_cluster(
            'cl1', 't1', 'vanilla', '1.2.1',
            [self.ng_manager, self.ng_namenode, self.ng_datanode])
        datanodes = u.get_datanodes(cl)
        self.assertEqual(2, len(datanodes))
        self.assertEqual(
            set(['dn1', 'dn2']),
            set([datanodes[0].instance_id, datanodes[1].instance_id]))

        cl = tu.create_cluster('cl1', 't1', 'vanilla', '1.2.1',
                               [self.ng_manager])
        self.assertEqual([], u.get_datanodes(cl))
Ejemplo n.º 3
0
    def _validate_existing_ng_scaling(self, cluster, existing):
        scalable_processes = self._get_scalable_processes()
        dn_to_delete = 0
        for ng in cluster.node_groups:
            if ng.id in existing:
                if ng.count > existing[ng.id] and "datanode" in ng.node_processes:
                    dn_to_delete += ng.count - existing[ng.id]
                if not set(ng.node_processes).issubset(scalable_processes):
                    raise ex.NodeGroupCannotBeScaled(
                        ng.name,
                        _("Vanilla plugin cannot scale nodegroup" " with processes: %s") % " ".join(ng.node_processes),
                    )

        dn_amount = len(vu.get_datanodes(cluster))
        rep_factor = c_helper.get_config_value("HDFS", "dfs.replication", cluster)

        if dn_to_delete > 0 and dn_amount - dn_to_delete < rep_factor:
            raise ex.ClusterCannotBeScaled(
                cluster.name,
                _(
                    "Vanilla plugin cannot shrink cluster because "
                    "it would be not enough nodes for replicas "
                    "(replication factor is %s)"
                )
                % rep_factor,
            )
Ejemplo n.º 4
0
    def start_cluster(self, cluster):
        nn = vu.get_namenode(cluster)
        run.format_namenode(nn)
        run.start_hadoop_process(nn, 'namenode')

        for snn in vu.get_secondarynamenodes(cluster):
            run.start_hadoop_process(snn, 'secondarynamenode')

        rm = vu.get_resourcemanager(cluster)
        run.start_yarn_process(rm, 'resourcemanager')

        for dn in vu.get_datanodes(cluster):
            run.start_hadoop_process(dn, 'datanode')

        run.await_datanodes(cluster)

        for nm in vu.get_nodemanagers(cluster):
            run.start_yarn_process(nm, 'nodemanager')

        hs = vu.get_historyserver(cluster)
        if hs:
            run.start_historyserver(hs)

        oo = vu.get_oozie(cluster)
        if oo:
            run.start_oozie_process(oo)

        self._set_cluster_info(cluster)
Ejemplo n.º 5
0
    def start_cluster(self, cluster):
        nn = vu.get_namenode(cluster)
        run.format_namenode(nn)
        run.start_hadoop_process(nn, "namenode")

        for snn in vu.get_secondarynamenodes(cluster):
            run.start_hadoop_process(snn, "secondarynamenode")

        rm = vu.get_resourcemanager(cluster)
        if rm:
            run.start_yarn_process(rm, "resourcemanager")

        for dn in vu.get_datanodes(cluster):
            run.start_hadoop_process(dn, "datanode")

        run.await_datanodes(cluster)

        for nm in vu.get_nodemanagers(cluster):
            run.start_yarn_process(nm, "nodemanager")

        hs = vu.get_historyserver(cluster)
        if hs:
            run.start_historyserver(hs)

        oo = vu.get_oozie(cluster)
        if oo:
            run.start_oozie_process(oo)

        self._set_cluster_info(cluster)
Ejemplo n.º 6
0
def await_datanodes(cluster):
    datanodes_count = len(vu.get_datanodes(cluster))
    if datanodes_count < 1:
        return

    l_message = _("Waiting on %s datanodes to start up") % datanodes_count
    with vu.get_namenode(cluster).remote() as r:
        poll_utils.plugin_option_poll(
            cluster, _check_datanodes_count,
            c_helper.DATANODES_STARTUP_TIMEOUT, l_message, 1, {
                'remote': r, 'count': datanodes_count})
Ejemplo n.º 7
0
def await_datanodes(cluster):
    datanodes_count = len(vu.get_datanodes(cluster))
    if datanodes_count < 1:
        return

    l_message = _("Waiting on %s datanodes to start up") % datanodes_count
    with vu.get_namenode(cluster).remote() as r:
        poll_utils.plugin_option_poll(
            cluster, _check_datanodes_count,
            c_helper.DATANODES_STARTUP_TIMEOUT, l_message, 1, {
                'remote': r, 'count': datanodes_count})
Ejemplo n.º 8
0
    def _await_datanodes(self, cluster):
        datanodes_count = len(vu.get_datanodes(cluster))
        if datanodes_count < 1:
            return

        l_message = _("Waiting on %s datanodes to start up") % datanodes_count
        LOG.info(l_message)
        with remote.get_remote(vu.get_namenode(cluster)) as r:
            poll_utils.plugin_option_poll(
                cluster, run.check_datanodes_count,
                c_helper.DATANODES_STARTUP_TIMEOUT, l_message, 1, {
                    'remote': r,
                    'count': datanodes_count})
Ejemplo n.º 9
0
    def _await_datanodes(self, cluster):
        datanodes_count = len(vu.get_datanodes(cluster))
        if datanodes_count < 1:
            return

        l_message = _("Waiting on %s datanodes to start up") % datanodes_count
        LOG.info(l_message)
        with remote.get_remote(vu.get_namenode(cluster)) as r:
            poll_utils.plugin_option_poll(
                cluster, run.check_datanodes_count,
                c_helper.DATANODES_STARTUP_TIMEOUT, l_message, 1, {
                    'remote': r,
                    'count': datanodes_count})
Ejemplo n.º 10
0
def _update_include_files(cluster):
    instances = u.get_instances(cluster)

    datanodes = vu.get_datanodes(cluster)
    nodemanagers = vu.get_nodemanagers(cluster)
    dn_hosts = u.generate_fqdn_host_names(datanodes)
    nm_hosts = u.generate_fqdn_host_names(nodemanagers)
    for instance in instances:
        with instance.remote() as r:
            r.execute_command(
                'sudo su - -c "echo \'%s\' > %s/dn-include" hadoop' %
                (dn_hosts, HADOOP_CONF_DIR))
            r.execute_command(
                'sudo su - -c "echo \'%s\' > %s/nm-include" hadoop' %
                (nm_hosts, HADOOP_CONF_DIR))
Ejemplo n.º 11
0
def _update_include_files(cluster):
    instances = u.get_instances(cluster)

    datanodes = vu.get_datanodes(cluster)
    nodemanagers = vu.get_nodemanagers(cluster)
    dn_hosts = u.generate_fqdn_host_names(datanodes)
    nm_hosts = u.generate_fqdn_host_names(nodemanagers)
    for instance in instances:
        with instance.remote() as r:
            r.execute_command(
                'sudo su - -c "echo \'%s\' > %s/dn-include" hadoop' % (
                    dn_hosts, HADOOP_CONF_DIR))
            r.execute_command(
                'sudo su - -c "echo \'%s\' > %s/nm-include" hadoop' % (
                    nm_hosts, HADOOP_CONF_DIR))
Ejemplo n.º 12
0
def _update_include_files(cluster, dec_instances=None):
    dec_instances = dec_instances or []
    dec_instances_ids = [instance.id for instance in dec_instances]

    instances = u.get_instances(cluster)

    inst_filter = lambda inst: inst.id not in dec_instances_ids

    datanodes = filter(inst_filter, vu.get_datanodes(cluster))
    nodemanagers = filter(inst_filter, vu.get_nodemanagers(cluster))
    dn_hosts = u.generate_fqdn_host_names(datanodes)
    nm_hosts = u.generate_fqdn_host_names(nodemanagers)
    for instance in instances:
        with instance.remote() as r:
            r.execute_command("sudo su - -c \"echo '%s' > %s/dn-include\" hadoop" % (dn_hosts, HADOOP_CONF_DIR))
            r.execute_command("sudo su - -c \"echo '%s' > %s/nm-include\" hadoop" % (nm_hosts, HADOOP_CONF_DIR))
Ejemplo n.º 13
0
def await_datanodes(cluster):
    datanodes_count = len(vu.get_datanodes(cluster))
    if datanodes_count < 1:
        return

    LOG.info("Waiting %s datanodes to start up" % datanodes_count)
    with vu.get_namenode(cluster).remote() as r:
        while True:
            if _check_datanodes_count(r, datanodes_count):
                LOG.info("Datanodes on cluster %s has been started" % cluster.name)
                return

            context.sleep(1)

            if not g.check_cluster_exists(cluster):
                LOG.info("Stop waiting datanodes on cluster %s since it has " "been deleted" % cluster.name)
                return
Ejemplo n.º 14
0
def await_datanodes(cluster):
    datanodes_count = len(vu.get_datanodes(cluster))
    if datanodes_count < 1:
        return

    LOG.info("Waiting %s datanodes to start up" % datanodes_count)
    with vu.get_namenode(cluster).remote() as r:
        while True:
            if _check_datanodes_count(r, datanodes_count):
                LOG.info('Datanodes on cluster %s has been started' %
                         cluster.name)
                return

            context.sleep(1)

            if not g.check_cluster_exists(cluster):
                LOG.info('Stop waiting datanodes on cluster %s since it has '
                         'been deleted' % cluster.name)
                return
Ejemplo n.º 15
0
    def _await_datanodes(self, cluster):
        datanodes_count = len(vu.get_datanodes(cluster))
        if datanodes_count < 1:
            return

        LOG.info(_LI("Waiting %s datanodes to start up"), datanodes_count)
        with remote.get_remote(vu.get_namenode(cluster)) as r:
            while True:
                if run.check_datanodes_count(r, datanodes_count):
                    LOG.info(_LI('Datanodes on cluster %s have been started'),
                             cluster.name)
                    return

                context.sleep(1)

                if not g.check_cluster_exists(cluster):
                    LOG.info(
                        _LI('Stop waiting datanodes on cluster %s since it has'
                            ' been deleted'), cluster.name)
                    return
Ejemplo n.º 16
0
def _update_include_files(cluster, dec_instances=None):
    dec_instances = dec_instances or []
    dec_instances_ids = [instance.id for instance in dec_instances]

    instances = u.get_instances(cluster)

    inst_filter = lambda inst: inst.id not in dec_instances_ids

    datanodes = filter(inst_filter, vu.get_datanodes(cluster))
    nodemanagers = filter(inst_filter, vu.get_nodemanagers(cluster))
    dn_hosts = u.generate_fqdn_host_names(datanodes)
    nm_hosts = u.generate_fqdn_host_names(nodemanagers)
    for instance in instances:
        with instance.remote() as r:
            r.execute_command(
                'sudo su - -c "echo \'%s\' > %s/dn-include" hadoop' %
                (dn_hosts, HADOOP_CONF_DIR))
            r.execute_command(
                'sudo su - -c "echo \'%s\' > %s/nm-include" hadoop' %
                (nm_hosts, HADOOP_CONF_DIR))
Ejemplo n.º 17
0
    def decommission_nodes(self, cluster, instances):
        tts = vu.get_tasktrackers(cluster)
        dns = vu.get_datanodes(cluster)
        decommission_dns = False
        decommission_tts = False

        for i in instances:
            if "datanode" in i.node_group.node_processes:
                dns.remove(i)
                decommission_dns = True
            if "tasktracker" in i.node_group.node_processes:
                tts.remove(i)
                decommission_tts = True

        nn = vu.get_namenode(cluster)
        jt = vu.get_jobtracker(cluster)

        if decommission_tts:
            sc.decommission_tt(jt, instances, tts)
        if decommission_dns:
            sc.decommission_dn(nn, instances, dns)
Ejemplo n.º 18
0
def validate_existing_ng_scaling(cluster, existing):
    scalable_processes = _get_scalable_processes()
    dn_to_delete = 0
    for ng in cluster.node_groups:
        if ng.id in existing:
            if ng.count > existing[ng.id] and "datanode" in ng.node_processes:
                dn_to_delete += ng.count - existing[ng.id]

            if not set(ng.node_processes).issubset(scalable_processes):
                msg = ("Vanilla plugin cannot scale nodegroup "
                       "with processes: %s")
                raise ex.NodeGroupCannotBeScaled(
                    ng.name, msg % ' '.join(ng.node_processes))

    dn_amount = len(vu.get_datanodes(cluster))
    rep_factor = c_helper.get_config_value('HDFS', 'dfs.replication', cluster)

    if dn_to_delete > 0 and dn_amount - dn_to_delete < rep_factor:
        msg = ("Vanilla plugin cannot shrink cluster because it would be not "
               "enough nodes for replicas (replication factor is %s)")
        raise ex.ClusterCannotBeScaled(cluster.name, msg % rep_factor)
Ejemplo n.º 19
0
    def _await_datanodes(self, cluster):
        datanodes_count = len(vu.get_datanodes(cluster))
        if datanodes_count < 1:
            return

        LOG.info(_LI("Waiting %s datanodes to start up"), datanodes_count)
        with remote.get_remote(vu.get_namenode(cluster)) as r:
            while True:
                if run.check_datanodes_count(r, datanodes_count):
                    LOG.info(
                        _LI('Datanodes on cluster %s has been started'),
                        cluster.name)
                    return

                context.sleep(1)

                if not g.check_cluster_exists(cluster):
                    LOG.info(
                        _LI('Stop waiting datanodes on cluster %s since it has'
                            ' been deleted'), cluster.name)
                    return
Ejemplo n.º 20
0
    def decommission_nodes(self, cluster, instances):
        tts = vu.get_tasktrackers(cluster)
        dns = vu.get_datanodes(cluster)
        decommission_dns = False
        decommission_tts = False

        for i in instances:
            if 'datanode' in i.node_group.node_processes:
                dns.remove(i)
                decommission_dns = True
            if 'tasktracker' in i.node_group.node_processes:
                tts.remove(i)
                decommission_tts = True

        nn = vu.get_namenode(cluster)
        jt = vu.get_jobtracker(cluster)

        if decommission_tts:
            sc.decommission_tt(jt, instances, tts)
        if decommission_dns:
            sc.decommission_dn(nn, instances, dns)
Ejemplo n.º 21
0
def validate_existing_ng_scaling(pctx, cluster, existing):
    scalable_processes = _get_scalable_processes()
    dn_to_delete = 0
    for ng in cluster.node_groups:
        if ng.id in existing:
            if ng.count > existing[ng.id] and "datanode" in ng.node_processes:
                dn_to_delete += ng.count - existing[ng.id]

            if not set(ng.node_processes).issubset(scalable_processes):
                msg = _("Vanilla plugin cannot scale nodegroup "
                        "with processes: %s")
                raise ex.NodeGroupCannotBeScaled(
                    ng.name, msg % ' '.join(ng.node_processes))

    dn_amount = len(vu.get_datanodes(cluster))
    rep_factor = cu.get_config_value(pctx, 'HDFS', 'dfs.replication', cluster)

    if dn_to_delete > 0 and dn_amount - dn_to_delete < rep_factor:
        msg = _("Vanilla plugin cannot shrink cluster because it would be "
                "not enough nodes for replicas (replication factor is %s)")
        raise ex.ClusterCannotBeScaled(
            cluster.name, msg % rep_factor)
Ejemplo n.º 22
0
 def _push_namenode_configs(self, cluster, r):
     r.write_file_to(
         '/etc/hadoop/dn.incl',
         utils.generate_fqdn_host_names(vu.get_datanodes(cluster)))
Ejemplo n.º 23
0
 def _push_namenode_configs(self, cluster, r):
     r.write_file_to("/etc/hadoop/dn.incl", utils.generate_fqdn_host_names(vu.get_datanodes(cluster)))