Example #1
0
 def test_get_instances(self):
     self.assertEqual(len(u.get_instances(self.c1)), 5)
     self.assertListEqual(u.get_instances(self.c1, "wrong-process"), [])
     self.assertListEqual(u.get_instances(self.c1, "nn"), self.ng1.instances)
     instances = list(self.ng2.instances)
     instances += self.ng3.instances
     self.assertListEqual(u.get_instances(self.c1, "dn"), instances)
Example #2
0
 def test_get_instances(self):
     self.assertEquals(len(u.get_instances(self.c1)), 5)
     self.assertListEqual(u.get_instances(self.c1, 'wrong-process'), [])
     self.assertListEqual(u.get_instances(self.c1, 'nn'),
                          self.ng1.instances)
     self.assertListEqual(u.get_instances(self.c1, 'dn'),
                          self.ng2.instances + self.ng3.instances)
Example #3
0
 def test_get_instances(self):
     self.assertEqual(len(u.get_instances(self.c1)), 5)
     self.assertListEqual(u.get_instances(self.c1, 'wrong-process'), [])
     self.assertListEqual(u.get_instances(self.c1, 'nn'),
                          self.ng1.instances)
     instances = list(self.ng2.instances)
     instances += self.ng3.instances
     self.assertListEqual(u.get_instances(self.c1, 'dn'), instances)
Example #4
0
def install_cluster(cluster):
    mng_instance = u.get_instance(cluster, 'manager')
    mng_ip = mng_instance.management_ip

    all_hosts = list(set([i.fqdn() for i in u.get_instances(cluster)]))

    client = c.IntelClient(mng_ip, cluster.name)

    LOG.info("Create cluster")
    client.cluster.create()

    LOG.info("Add nodes to cluster")
    rack = '/Default'
    client.nodes.add(all_hosts, rack, 'hadoop',
                     '/home/hadoop/.ssh/id_rsa')

    LOG.info("Install software")
    client.cluster.install_software(all_hosts)

    LOG.info("Configure services")
    _configure_services(client, cluster)

    LOG.info("Deploy cluster")
    client.nodes.config(force=True)

    LOG.info("Provisioning configs")
    # cinder and ephemeral drive support
    _configure_storage(client, cluster)
    # swift support
    _configure_swift(client, cluster)
    # user configs
    _add_user_params(client, cluster)

    LOG.info("Format HDFS")
    client.services.hdfs.format()
Example #5
0
def install_cluster(cluster):
    mng_instance = u.get_instance(cluster, 'manager')

    all_hosts = list(set([i.fqdn() for i in u.get_instances(cluster)]))

    client = c.IntelClient(mng_instance, cluster.name)

    LOG.info("Create cluster")
    client.cluster.create()

    LOG.info("Add nodes to cluster")
    rack = '/Default'
    client.nodes.add(all_hosts, rack, 'hadoop', '/home/hadoop/.ssh/id_rsa')

    LOG.info("Install software")
    client.cluster.install_software(all_hosts)

    LOG.info("Configure services")
    _configure_services(client, cluster)

    LOG.info("Deploy cluster")
    client.nodes.config(force=True)

    LOG.info("Provisioning configs")
    # cinder and ephemeral drive support
    _configure_storage(client, cluster)
    # swift support
    _configure_swift(client, cluster)
    # user configs
    _add_user_params(client, cluster)

    LOG.info("Format HDFS")
    client.services.hdfs.format()
Example #6
0
    def _set_cluster_info(self, cluster):
        mng = u.get_instances(cluster, 'manager')[0]
        nn = u.get_namenode(cluster)
        jt = u.get_jobtracker(cluster)
        oozie = u.get_oozie(cluster)

        #TODO(alazarev) make port configurable (bug #1262895)
        info = {'IDH Manager': {
            'Web UI': 'https://%s:9443' % mng.management_ip
        }}

        if jt:
            #TODO(alazarev) make port configurable (bug #1262895)
            info['MapReduce'] = {
                'Web UI': 'http://%s:50030' % jt.management_ip
            }
            #TODO(alazarev) make port configurable (bug #1262895)
            info['MapReduce']['JobTracker'] = '%s:54311' % jt.hostname()
        if nn:
            #TODO(alazarev) make port configurable (bug #1262895)
            info['HDFS'] = {
                'Web UI': 'http://%s:50070' % nn.management_ip
            }
            #TODO(alazarev) make port configurable (bug #1262895)
            info['HDFS']['NameNode'] = 'hdfs://%s:8020' % nn.hostname()

        if oozie:
            #TODO(alazarev) make port configurable (bug #1262895)
            info['JobFlow'] = {
                'Oozie': 'http://%s:11000' % oozie.management_ip
            }

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})
Example #7
0
    def _set_cluster_info(self, cluster):
        mng = u.get_instances(cluster, 'manager')[0]
        nn = u.get_namenode(cluster)
        jt = u.get_jobtracker(cluster)
        oozie = u.get_oozie(cluster)

        #TODO(alazarev) make port configurable (bug #1262895)
        info = {'IDH Manager': {
            'Web UI': 'https://%s:9443' % mng.management_ip
        }}

        if jt:
            #TODO(alazarev) make port configurable (bug #1262895)
            info['MapReduce'] = {
                'Web UI': 'http://%s:50030' % jt.management_ip
            }
            #TODO(alazarev) make port configurable (bug #1262895)
            info['MapReduce']['JobTracker'] = '%s:54311' % jt.hostname()
        if nn:
            #TODO(alazarev) make port configurable (bug #1262895)
            info['HDFS'] = {
                'Web UI': 'http://%s:50070' % nn.management_ip
            }
            #TODO(alazarev) make port configurable (bug #1262895)
            info['HDFS']['NameNode'] = 'hdfs://%s:8020' % nn.hostname()

        if oozie:
            #TODO(alazarev) make port configurable (bug #1262895)
            info['JobFlow'] = {
                'Oozie': 'http://%s:11000' % oozie.management_ip
            }

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})
Example #8
0
def _get_cluster_hosts_information(host, cluster):
    for c in conductor.cluster_get_all(context.ctx()):
        if c.id == cluster.id:
            continue

        for i in u.get_instances(c):
            if i.instance_name == host:
                return g.generate_etc_hosts(c)

    return None
Example #9
0
def _get_cluster_hosts_information(host, cluster):
    for c in conductor.cluster_get_all(context.ctx()):
        if c.id == cluster.id:
            continue

        for i in u.get_instances(c):
            if i.instance_name == host:
                return g.generate_etc_hosts(c)

    return None
Example #10
0
 def _push_configs_to_nodes(self, cluster, extra, new_instances):
     all_instances = utils.get_instances(cluster)
     with context.ThreadGroup() as tg:
         for instance in all_instances:
             if instance in new_instances:
                 tg.spawn('vanilla-configure-%s' % instance.instance_name,
                          self._push_configs_to_new_node, cluster,
                          extra, instance)
             else:
                 tg.spawn('vanilla-reconfigure-%s' % instance.instance_name,
                          self._push_configs_to_existing_node, cluster,
                          extra, instance)
Example #11
0
 def _push_configs_to_nodes(self, cluster, extra, new_instances):
     all_instances = utils.get_instances(cluster)
     with context.ThreadGroup() as tg:
         for instance in all_instances:
             if instance in new_instances:
                 tg.spawn('vanilla-configure-%s' % instance.instance_name,
                          self._push_configs_to_new_node, cluster, extra,
                          instance)
             else:
                 tg.spawn('vanilla-reconfigure-%s' % instance.instance_name,
                          self._push_configs_to_existing_node, cluster,
                          extra, instance)
Example #12
0
    def _push_configs_to_nodes(self, cluster, instances=None):
        extra = self._extract_configs_to_extra(cluster)

        if instances is None:
            instances = utils.get_instances(cluster)

        for inst in instances:
            ng_extra = extra[inst.node_group.id]
            files = {
                '/etc/hadoop/core-site.xml': ng_extra['xml']['core-site'],
                '/etc/hadoop/mapred-site.xml': ng_extra['xml']['mapred-site'],
                '/etc/hadoop/hdfs-site.xml': ng_extra['xml']['hdfs-site'],
                '/tmp/savanna-hadoop-init.sh': ng_extra['setup_script']
            }
            with remote.get_remote(inst) as r:
                # TODO(aignatov): sudo chown is wrong solution. But it works.
                r.execute_command(
                    'sudo chown -R $USER:$USER /etc/hadoop'
                )
                r.execute_command(
                    'sudo chown -R $USER:$USER /opt/oozie/conf'
                )
                r.write_files_to(files)
                r.execute_command(
                    'sudo chmod 0500 /tmp/savanna-hadoop-init.sh'
                )
                r.execute_command(
                    'sudo /tmp/savanna-hadoop-init.sh '
                    '>> /tmp/savanna-hadoop-init.log 2>&1')

        nn = utils.get_namenode(cluster)
        jt = utils.get_jobtracker(cluster)

        with remote.get_remote(nn) as r:
            r.write_file_to('/etc/hadoop/dn.incl', utils.
                            generate_fqdn_host_names(
                            utils.get_datanodes(cluster)))
        if jt:
            with remote.get_remote(jt) as r:
                r.write_file_to('/etc/hadoop/tt.incl', utils.
                                generate_fqdn_host_names(
                                utils.get_tasktrackers(cluster)))

        oozie = utils.get_oozie(cluster)
        if oozie:
            with remote.get_remote(oozie) as r:
                r.write_file_to('/opt/oozie/conf/oozie-site.xml',
                                extra[oozie.node_group.id]
                                ['xml']['oozie-site'])
Example #13
0
    def _push_configs_to_nodes(self, cluster, instances=None):
        extra = self._extract_configs_to_extra(cluster)

        if instances is None:
            instances = utils.get_instances(cluster)

        for inst in instances:
            ng_extra = extra[inst.node_group.id]
            files = {
                '/etc/hadoop/core-site.xml': ng_extra['xml']['core-site'],
                '/etc/hadoop/mapred-site.xml': ng_extra['xml']['mapred-site'],
                '/etc/hadoop/hdfs-site.xml': ng_extra['xml']['hdfs-site'],
                '/tmp/savanna-hadoop-init.sh': ng_extra['setup_script']
            }
            with remote.get_remote(inst) as r:
                # TODO(aignatov): sudo chown is wrong solution. But it works.
                r.execute_command(
                    'sudo chown -R $USER:$USER /etc/hadoop'
                )
                r.execute_command(
                    'sudo chown -R $USER:$USER /opt/oozie/conf'
                )
                r.write_files_to(files)
                r.execute_command(
                    'sudo chmod 0500 /tmp/savanna-hadoop-init.sh'
                )
                r.execute_command(
                    'sudo /tmp/savanna-hadoop-init.sh '
                    '>> /tmp/savanna-hadoop-init.log 2>&1')

        nn = utils.get_namenode(cluster)
        jt = utils.get_jobtracker(cluster)

        with remote.get_remote(nn) as r:
            r.write_file_to('/etc/hadoop/dn.incl', utils.
                            generate_fqdn_host_names(
                            utils.get_datanodes(cluster)))
        if jt:
            with remote.get_remote(jt) as r:
                r.write_file_to('/etc/hadoop/tt.incl', utils.
                                generate_fqdn_host_names(
                                utils.get_tasktrackers(cluster)))

        oozie = utils.get_oozie(cluster)
        if oozie:
            with remote.get_remote(oozie) as r:
                r.write_file_to('/opt/oozie/conf/oozie-site.xml',
                                extra[oozie.node_group.id]
                                ['xml']['oozie-site'])
Example #14
0
def configure_cluster_for_hdfs(cluster, data_source):
    host = urlparse.urlparse(data_source.url).hostname

    etc_hosts_information = _get_cluster_hosts_information(host, cluster)
    if etc_hosts_information is None:
        # Ip address hasn't been resolved, the last chance is for VM itself
        return

    create_etc_host = 'sudo "cat /tmp/etc-hosts-update '
    create_etc_host += '/etc/hosts > /tmp/etc-hosts"'
    copy_etc_host = 'sudo "cat /tmp/etc-hosts > /etc/hosts"'

    for inst in u.get_instances(cluster):
        with inst.remote as r:
            r.write_file_to('/tmp/etc-hosts-update', etc_hosts_information)
            r.execute_command(create_etc_host)
            r.execute_command(copy_etc_host)
Example #15
0
def configure_cluster_for_hdfs(cluster, data_source):
    host = urlparse.urlparse(data_source.url).hostname

    etc_hosts_information = _get_cluster_hosts_information(host, cluster)
    if etc_hosts_information is None:
        # Ip address hasn't been resolved, the last chance is for VM itself
        return

    create_etc_host = 'sudo "cat /tmp/etc-hosts-update '
    create_etc_host += '/etc/hosts > /tmp/etc-hosts"'
    copy_etc_host = 'sudo "cat /tmp/etc-hosts > /etc/hosts"'

    for inst in u.get_instances(cluster):
        with inst.remote as r:
            r.write_file_to('/tmp/etc-hosts-update', etc_hosts_information)
            r.execute_command(create_etc_host)
            r.execute_command(copy_etc_host)
Example #16
0
    def start_cluster(self, cluster):
        nn_instance = utils.get_namenode(cluster)
        with remote.get_remote(nn_instance) as r:
            run.format_namenode(r)
            run.start_processes(r, "namenode")

        for snn in utils.get_secondarynamenodes(cluster):
            run.start_processes(remote.get_remote(snn), "secondarynamenode")

        jt_instance = utils.get_jobtracker(cluster)
        if jt_instance:
            run.start_processes(remote.get_remote(jt_instance), "jobtracker")

        self._start_tt_dn_processes(utils.get_instances(cluster))

        self._await_datanodes(cluster)

        LOG.info("Hadoop services in cluster %s have been started" %
                 cluster.name)

        oozie = utils.get_oozie(cluster)
        if oozie:
            with remote.get_remote(oozie) as r:
                if c_helper.is_mysql_enable(cluster):
                    run.mysql_start(r, oozie)
                    run.oozie_create_db(r)
                run.oozie_share_lib(r, nn_instance.hostname())
                run.start_oozie(r)
                LOG.info("Oozie service at '%s' has been started",
                         nn_instance.hostname())

        hive_server = utils.get_hiveserver(cluster)
        if hive_server:
            with remote.get_remote(nn_instance) as r:
                run.hive_create_warehouse_dir(r)
            if c_helper.is_mysql_enable(cluster):
                with remote.get_remote(hive_server) as h:
                    if not oozie or hive_server.hostname() != oozie.hostname():
                        run.mysql_start(h, hive_server)
                    run.hive_create_db(h)
                    run.hive_metastore_start(h)
                LOG.info("Hive Metastore server at %s has been started",
                         hive_server.hostname())

        LOG.info('Cluster %s has been started successfully' % cluster.name)
        self._set_cluster_info(cluster)
Example #17
0
    def start_cluster(self, cluster):
        instances = utils.get_instances(cluster)
        nn_instance = utils.get_namenode(cluster)
        jt_instance = utils.get_jobtracker(cluster)
        oozie = utils.get_oozie(cluster)
        hive_server = utils.get_hiveserver(cluster)

        with remote.get_remote(nn_instance) as r:
            run.format_namenode(r)
            run.start_processes(r, "namenode")

        for snn in utils.get_secondarynamenodes(cluster):
            run.start_processes(remote.get_remote(snn), "secondarynamenode")

        if jt_instance:
            run.start_processes(remote.get_remote(jt_instance), "jobtracker")

        self._start_tt_dn_processes(instances)

        LOG.info("Hadoop services in cluster %s have been started" %
                 cluster.name)

        if oozie:
            with remote.get_remote(oozie) as r:
                if c_helper.is_mysql_enable(cluster):
                    run.mysql_start(r, oozie)
                    run.oozie_create_db(r)
                run.oozie_share_lib(r, nn_instance.hostname)
                run.start_oozie(r)
                LOG.info("Oozie service at '%s' has been started",
                         nn_instance.hostname)

        if hive_server:
            with remote.get_remote(nn_instance) as r:
                run.hive_create_warehouse_dir(r)
            if c_helper.is_mysql_enable(cluster):
                with remote.get_remote(hive_server) as h:
                    if not oozie or hive_server.hostname != oozie.hostname:
                        run.mysql_start(h, hive_server)
                    run.hive_create_db(h)
                    run.hive_metastore_start(h)
                LOG.info("Hive Metastore server at %s has been started",
                         hive_server.hostname)

        LOG.info('Cluster %s has been started successfully' % cluster.name)
        self._set_cluster_info(cluster)
Example #18
0
def configure_os(cluster):
    instances = u.get_instances(cluster)
    configure_os_from_instances(cluster, instances)
Example #19
0
 def configure_cluster(self, cluster):
     self._push_configs_to_nodes(cluster)
     self._write_hadoop_user_keys(cluster.private_key,
                                  utils.get_instances(cluster))
Example #20
0
 def configure_cluster(self, cluster):
     self._push_configs_to_nodes(cluster)
     self._write_hadoop_user_keys(cluster.private_key,
                                  utils.get_instances(cluster))
Example #21
0
    def configure_cluster(self, cluster):
        instances = utils.get_instances(cluster)

        self._setup_instances(cluster, instances)
Example #22
0
 def configure_cluster(self, cluster):
     self._push_configs_to_nodes(cluster)
     self._write_hadoop_user_keys(utils.get_instances(cluster),
                                  cluster.management_private_key,
                                  cluster.management_public_key)
Example #23
0
def configure_os(cluster):
    instances = u.get_instances(cluster)
    configure_os_from_instances(cluster, instances)
Example #24
0
    def configure_cluster(self, cluster):
        instances = utils.get_instances(cluster)

        self._setup_instances(cluster, instances)
Example #25
0
def decommission_nodes(cluster, instances):
    dec_hosts = [i.fqdn() for i in instances]
    dn_hosts = [dn.fqdn() for dn in u.get_datanodes(cluster)]
    tt_hosts = [dn.fqdn() for dn in u.get_tasktrackers(cluster)]

    mng_ip = u.get_instances(cluster, 'manager')[0].management_ip
    client = c.IntelClient(mng_ip, cluster.name)

    dec_dn_hosts = []
    for dec_host in dec_hosts:
        if dec_host in dn_hosts:
            dec_dn_hosts.append(dec_host)

    if dec_dn_hosts:
        client.services.hdfs.decommission_nodes(dec_dn_hosts)

        #TODO(alazarev) make timeout configurable (bug #1262897)
        timeout = 14400  # 4 hours
        cur_time = 0
        for host in dec_dn_hosts:
            while cur_time < timeout:
                if client.services.hdfs.get_datanode_status(
                        host) == 'Decomissioned':
                    break
                context.sleep(5)
                cur_time += 5
            else:
                LOG.warn("Failed to decomission node '%s' of cluster '%s' "
                         "in %s minutes" % (host, cluster.name, timeout/60))

    client.nodes.stop(dec_hosts)

    # wait stop services
    #TODO(alazarev) make timeout configurable (bug #1262897)
    timeout = 600  # 10 minutes
    cur_time = 0
    for instance in instances:
        while cur_time < timeout:
            stopped = True
            if instance.fqdn() in dn_hosts:
                code, out = instance.remote().execute_command(
                    'sudo /sbin/service hadoop-datanode status',
                    raise_when_error=False)
                if out.strip() != 'datanode is stopped':
                    stopped = False
                if out.strip() == 'datanode dead but pid file exists':
                    instance.remote().execute_command(
                        'sudo rm -f '
                        '/var/run/hadoop/hadoop-hadoop-datanode.pid')
            if instance.fqdn() in tt_hosts:
                code, out = instance.remote().execute_command(
                    'sudo /sbin/service hadoop-tasktracker status',
                    raise_when_error=False)
                if out.strip() != 'tasktracker is stopped':
                    stopped = False
            if stopped:
                break
            else:
                context.sleep(5)
                cur_time += 5
        else:
            LOG.warn("Failed to stop services on node '%s' of cluster '%s' "
                     "in %s minutes" % (instance, cluster.name, timeout/60))

    for node in dec_hosts:
        LOG.info("Deleting node '%s' on cluster '%s'" % (node, cluster.name))
        client.nodes.delete(node)