Esempio n. 1
0
    def _extract_configs_to_extra(self, cluster):
        nn = utils.get_namenode(cluster)
        jt = utils.get_jobtracker(cluster)
        oozie = utils.get_oozie(cluster)
        hive = utils.get_hiveserver(cluster)

        extra = dict()

        if hive:
            extra['hive_mysql_passwd'] = uuidutils.generate_uuid()

        for ng in cluster.node_groups:
            extra[ng.id] = {
                'xml':
                c_helper.generate_xml_configs(
                    ng.configuration, ng.storage_paths, nn.hostname,
                    jt.hostname if jt else None,
                    oozie.hostname if oozie else None,
                    hive.hostname if hive else None,
                    extra['hive_mysql_passwd'] if hive else None),
                'setup_script':
                c_helper.generate_setup_script(
                    ng.storage_paths,
                    c_helper.extract_environment_confs(ng.configuration),
                    append_oozie=(oozie is not None
                                  and oozie.node_group.id == ng.id))
            }

        if c_helper.is_data_locality_enabled(cluster):
            topology_data = th.generate_topology_map(
                cluster, CONF.enable_hypervisor_awareness)
            extra['topology_data'] = "\n".join(
                [k + " " + v for k, v in topology_data.items()]) + "\n"

        return extra
Esempio n. 2
0
    def _set_cluster_info(self, cluster):
        mng = u.get_instances(cluster, 'manager')[0]
        nn = u.get_namenode(cluster)
        jt = u.get_jobtracker(cluster)
        oozie = u.get_oozie(cluster)

        #TODO(alazarev) make port configurable (bug #1262895)
        info = {'IDH Manager': {
            'Web UI': 'https://%s:9443' % mng.management_ip
        }}

        if jt:
            #TODO(alazarev) make port configurable (bug #1262895)
            info['MapReduce'] = {
                'Web UI': 'http://%s:50030' % jt.management_ip
            }
            #TODO(alazarev) make port configurable (bug #1262895)
            info['MapReduce']['JobTracker'] = '%s:54311' % jt.hostname()
        if nn:
            #TODO(alazarev) make port configurable (bug #1262895)
            info['HDFS'] = {
                'Web UI': 'http://%s:50070' % nn.management_ip
            }
            #TODO(alazarev) make port configurable (bug #1262895)
            info['HDFS']['NameNode'] = 'hdfs://%s:8020' % nn.hostname()

        if oozie:
            #TODO(alazarev) make port configurable (bug #1262895)
            info['JobFlow'] = {
                'Oozie': 'http://%s:11000' % oozie.management_ip
            }

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})
Esempio n. 3
0
    def _extract_configs_to_extra(self, cluster):
        nn = utils.get_namenode(cluster)
        jt = utils.get_jobtracker(cluster)
        oozie = utils.get_oozie(cluster)
        hive = utils.get_hiveserver(cluster)

        extra = dict()
        for ng in cluster.node_groups:
            extra[ng.id] = {
                'xml': c_helper.generate_xml_configs(ng.configuration,
                                                     ng.storage_paths,
                                                     nn.hostname,
                                                     jt.hostname
                                                     if jt else None,
                                                     oozie.hostname
                                                     if oozie else None,
                                                     hive.hostname
                                                     if hive else None),
                'setup_script': c_helper.generate_setup_script(
                    ng.storage_paths,
                    c_helper.extract_environment_confs(ng.configuration),
                    append_oozie=(
                        oozie is not None and oozie.node_group.id == ng.id)
                )
            }
        return extra
Esempio n. 4
0
    def _set_cluster_info(self, cluster):
        nn = utils.get_namenode(cluster)
        jt = utils.get_jobtracker(cluster)
        oozie = utils.get_oozie(cluster)
        info = {}

        if jt:
            address = c_helper.get_config_value(
                'MapReduce', 'mapred.job.tracker.http.address', cluster)
            port = address[address.rfind(':') + 1:]
            info['MapReduce'] = {
                'Web UI': 'http://%s:%s' % (jt.management_ip, port)
            }

        if nn:
            address = c_helper.get_config_value('HDFS', 'dfs.http.address',
                                                cluster)
            port = address[address.rfind(':') + 1:]
            info['HDFS'] = {
                'Web UI': 'http://%s:%s' % (nn.management_ip, port)
            }

        if oozie:
            info['JobFlow'] = {
                'Oozie': 'http://%s:11000' % oozie.management_ip
            }

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})
Esempio n. 5
0
    def _set_cluster_info(self, cluster):
        nn = utils.get_namenode(cluster)
        jt = utils.get_jobtracker(cluster)
        oozie = utils.get_oozie(cluster)
        info = {}

        if jt:
            address = c_helper.get_config_value(
                'MapReduce', 'mapred.job.tracker.http.address', cluster)
            port = address[address.rfind(':') + 1:]
            info['MapReduce'] = {
                'Web UI': 'http://%s:%s' % (jt.management_ip, port)
            }

        if nn:
            address = c_helper.get_config_value(
                'HDFS', 'dfs.http.address', cluster)
            port = address[address.rfind(':') + 1:]
            info['HDFS'] = {
                'Web UI': 'http://%s:%s' % (nn.management_ip, port)
            }

        if oozie:
            info['JobFlow'] = {
                'Oozie': 'http://%s:11000' % oozie.management_ip
            }

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})
Esempio n. 6
0
    def start_cluster(self, cluster):
        nn_instance = utils.get_namenode(cluster)
        datanodes = utils.get_datanodes(cluster)
        jt_instance = utils.get_jobtracker(cluster)
        tasktrackers = utils.get_tasktrackers(cluster)
        oozie = utils.get_oozie(cluster)

        with remote.get_remote(nn_instance) as r:
            run.format_namenode(r)
            run.start_process(r, "namenode")

        snns = utils.get_secondarynamenodes(cluster)
        if snns:
            for snn in snns:
                run.start_process(remote.get_remote(snn), "secondarynamenode")
        for dn in datanodes:
            run.start_process(remote.get_remote(dn), "datanode")
        LOG.info("HDFS service at '%s' has been started", nn_instance.hostname)

        if jt_instance:
            run.start_process(remote.get_remote(jt_instance), "jobtracker")
            for tt in tasktrackers:
                run.start_process(remote.get_remote(tt), "tasktracker")
            LOG.info("MapReduce service at '%s' has been started",
                     jt_instance.hostname)

        if oozie:
            with remote.get_remote(oozie) as r:
                run.oozie_share_lib(r, nn_instance.hostname)
                run.start_oozie(r)
                LOG.info("Oozie service at '%s' has been started",
                         nn_instance.hostname)

        LOG.info('Cluster %s has been started successfully' % cluster.name)
        self._set_cluster_info(cluster)
Esempio n. 7
0
    def _set_cluster_info(self, cluster):
        mng = u.get_instances(cluster, 'manager')[0]
        nn = u.get_namenode(cluster)
        jt = u.get_jobtracker(cluster)
        oozie = u.get_oozie(cluster)

        #TODO(alazarev) make port configurable (bug #1262895)
        info = {'IDH Manager': {
            'Web UI': 'https://%s:9443' % mng.management_ip
        }}

        if jt:
            #TODO(alazarev) make port configurable (bug #1262895)
            info['MapReduce'] = {
                'Web UI': 'http://%s:50030' % jt.management_ip
            }
            #TODO(alazarev) make port configurable (bug #1262895)
            info['MapReduce']['JobTracker'] = '%s:54311' % jt.hostname()
        if nn:
            #TODO(alazarev) make port configurable (bug #1262895)
            info['HDFS'] = {
                'Web UI': 'http://%s:50070' % nn.management_ip
            }
            #TODO(alazarev) make port configurable (bug #1262895)
            info['HDFS']['NameNode'] = 'hdfs://%s:8020' % nn.hostname()

        if oozie:
            #TODO(alazarev) make port configurable (bug #1262895)
            info['JobFlow'] = {
                'Oozie': 'http://%s:11000' % oozie.management_ip
            }

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})
Esempio n. 8
0
def generate_savanna_configs(cluster, node_group=None):
    nn_hostname = _get_hostname(utils.get_namenode(cluster))
    jt_hostname = _get_hostname(utils.get_jobtracker(cluster))
    oozie_hostname = _get_hostname(utils.get_oozie(cluster))
    hive_hostname = _get_hostname(utils.get_hiveserver(cluster))

    storage_path = node_group.storage_paths() if node_group else None

    # inserting common configs depends on provisioned VMs and HDFS placement
    # TODO(aignatov): should be moved to cluster context

    cfg = {
        'fs.default.name':
        'hdfs://%s:8020' % nn_hostname,
        'dfs.name.dir':
        extract_hadoop_path(storage_path, '/lib/hadoop/hdfs/namenode'),
        'dfs.data.dir':
        extract_hadoop_path(storage_path, '/lib/hadoop/hdfs/datanode'),
        'dfs.hosts':
        '/etc/hadoop/dn.incl',
        'dfs.hosts.exclude':
        '/etc/hadoop/dn.excl',
    }

    if jt_hostname:
        mr_cfg = {
            'mapred.job.tracker':
            '%s:8021' % jt_hostname,
            'mapred.system.dir':
            extract_hadoop_path(storage_path, '/mapred/mapredsystem'),
            'mapred.local.dir':
            extract_hadoop_path(storage_path, '/lib/hadoop/mapred'),
            'mapred.hosts':
            '/etc/hadoop/tt.incl',
            'mapred.hosts.exclude':
            '/etc/hadoop/tt.excl',
        }
        cfg.update(mr_cfg)

    if oozie_hostname:
        o_cfg = {
            'hadoop.proxyuser.hadoop.hosts': "localhost," + oozie_hostname,
            'hadoop.proxyuser.hadoop.groups': 'hadoop',
        }
        cfg.update(o_cfg)
        LOG.debug('Applied Oozie configs for core-site.xml')
        cfg.update(o_h.get_oozie_required_xml_configs())
        LOG.debug('Applied Oozie configs for oozie-site.xml')

    if hive_hostname:
        h_cfg = {
            'hive.warehouse.subdir.inherit.perms':
            True,
            'javax.jdo.option.ConnectionURL':
            'jdbc:derby:;databaseName=/opt/hive/metastore_db;create=true'
        }
        cfg.update(h_cfg)
        LOG.debug('Applied Hive config for hive metastore server')

    return cfg
Esempio n. 9
0
    def _set_cluster_info(self, cluster):
        nn = utils.get_namenode(cluster)
        jt = utils.get_jobtracker(cluster)
        oozie = utils.get_oozie(cluster)
        info = {}

        if jt:
            address = c_helper.get_config_value(
                'MapReduce', 'mapred.job.tracker.http.address', cluster)
            port = address[address.rfind(':') + 1:]
            info['MapReduce'] = {
                'Web UI': 'http://%s:%s' % (jt.management_ip, port)
            }
            #TODO(aignatov) change from hardcode value
            info['MapReduce']['JobTracker'] = '%s:8021' % jt.hostname()

        if nn:
            address = c_helper.get_config_value(
                'HDFS', 'dfs.http.address', cluster)
            port = address[address.rfind(':') + 1:]
            info['HDFS'] = {
                'Web UI': 'http://%s:%s' % (nn.management_ip, port)
            }
            #TODO(aignatov) change from hardcode value
            info['HDFS']['NameNode'] = 'hdfs://%s:8020' % nn.hostname()

        if oozie:
            info['JobFlow'] = {
                'Oozie': 'http://%s:11000' % oozie.management_ip
            }

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})
Esempio n. 10
0
def _configure_services(client, cluster):
    nn_host = u.get_namenode(cluster).fqdn()
    snn = u.get_secondarynamenodes(cluster)
    snn_host = snn[0].fqdn() if snn else None
    jt_host = u.get_jobtracker(cluster).fqdn() if u.get_jobtracker(
        cluster) else None
    dn_hosts = [dn.fqdn() for dn in u.get_datanodes(cluster)]
    tt_hosts = [tt.fqdn() for tt in u.get_tasktrackers(cluster)]

    oozie_host = u.get_oozie(cluster).fqdn() if u.get_oozie(cluster) else None
    hive_host = u.get_hiveserver(cluster).fqdn() if u.get_hiveserver(
        cluster) else None

    services = []
    if u.get_namenode(cluster):
        services += ['hdfs']

    if u.get_jobtracker(cluster):
        services += ['mapred']

    if oozie_host:
        services += ['oozie']
        services += ['pig']

    if hive_host:
        services += ['hive']

    LOG.debug("Add services: %s" % ', '.join(services))
    client.services.add(services)

    LOG.debug("Assign roles to hosts")
    client.services.hdfs.add_nodes('PrimaryNameNode', [nn_host])

    client.services.hdfs.add_nodes('DataNode', dn_hosts)
    if snn:
        client.services.hdfs.add_nodes('SecondaryNameNode', [snn_host])

    if oozie_host:
        client.services.oozie.add_nodes('Oozie', [oozie_host])

    if hive_host:
        client.services.hive.add_nodes('HiveServer', [hive_host])

    if jt_host:
        client.services.mapred.add_nodes('JobTracker', [jt_host])
        client.services.mapred.add_nodes('TaskTracker', tt_hosts)
Esempio n. 11
0
def _configure_services(client, cluster):
    nn_host = u.get_namenode(cluster).fqdn()
    snn = u.get_secondarynamenodes(cluster)
    snn_host = snn[0].fqdn() if snn else None
    jt_host = u.get_jobtracker(cluster).fqdn()
    dn_hosts = [dn.fqdn() for dn in u.get_datanodes(cluster)]
    tt_hosts = [tt.fqdn() for tt in u.get_tasktrackers(cluster)]

    oozie_host = u.get_oozie(cluster).fqdn() if u.get_oozie(
        cluster) else None
    hive_host = u.get_hiveserver(cluster).fqdn() if u.get_hiveserver(
        cluster) else None

    services = []
    if u.get_namenode(cluster):
        services += ['hdfs']

    if u.get_jobtracker(cluster):
        services += ['mapred']

    if oozie_host:
        services += ['oozie']
        services += ['pig']

    if hive_host:
        services += ['hive']

    LOG.debug("Add services: %s" % ', '.join(services))
    client.services.add(services)

    LOG.debug("Assign roles to hosts")
    client.services.hdfs.add_nodes('PrimaryNameNode', [nn_host])

    client.services.hdfs.add_nodes('DataNode', dn_hosts)
    if snn:
        client.services.hdfs.add_nodes('SecondaryNameNode', [snn_host])

    if oozie_host:
        client.services.oozie.add_nodes('Oozie', [oozie_host])

    if hive_host:
        client.services.hive.add_nodes('HiveServer', [hive_host])

    client.services.mapred.add_nodes('JobTracker', [jt_host])
    client.services.mapred.add_nodes('TaskTracker', tt_hosts)
Esempio n. 12
0
    def scale_cluster(self, cluster, instances):
        self._setup_instances(cluster, instances)

        run.refresh_nodes(remote.get_remote(
            utils.get_namenode(cluster)), "dfsadmin")
        jt = utils.get_jobtracker(cluster)
        if jt:
            run.refresh_nodes(remote.get_remote(jt), "mradmin")

        self._start_tt_dn_processes(instances)
Esempio n. 13
0
    def scale_cluster(self, cluster, instances):
        self._setup_instances(cluster, instances)

        run.refresh_nodes(remote.get_remote(utils.get_namenode(cluster)),
                          "dfsadmin")
        jt = utils.get_jobtracker(cluster)
        if jt:
            run.refresh_nodes(remote.get_remote(jt), "mradmin")

        self._start_tt_dn_processes(instances)
Esempio n. 14
0
def generate_savanna_configs(cluster, node_group=None):
    nn_hostname = _get_hostname(utils.get_namenode(cluster))
    jt_hostname = _get_hostname(utils.get_jobtracker(cluster))
    oozie_hostname = _get_hostname(utils.get_oozie(cluster))
    hive_hostname = _get_hostname(utils.get_hiveserver(cluster))

    storage_path = node_group.storage_paths() if node_group else None

    # inserting common configs depends on provisioned VMs and HDFS placement
    # TODO(aignatov): should be moved to cluster context

    cfg = {
        'fs.default.name': 'hdfs://%s:8020' % nn_hostname,
        'dfs.name.dir': extract_hadoop_path(storage_path,
                                            '/lib/hadoop/hdfs/namenode'),
        'dfs.data.dir': extract_hadoop_path(storage_path,
                                            '/lib/hadoop/hdfs/datanode'),
        'dfs.hosts': '/etc/hadoop/dn.incl',
        'dfs.hosts.exclude': '/etc/hadoop/dn.excl',
    }

    if jt_hostname:
        mr_cfg = {
            'mapred.job.tracker': '%s:8021' % jt_hostname,
            'mapred.system.dir': extract_hadoop_path(storage_path,
                                                     '/mapred/mapredsystem'),
            'mapred.local.dir': extract_hadoop_path(storage_path,
                                                    '/lib/hadoop/mapred'),
            'mapred.hosts': '/etc/hadoop/tt.incl',
            'mapred.hosts.exclude': '/etc/hadoop/tt.excl',
        }
        cfg.update(mr_cfg)

    if oozie_hostname:
        o_cfg = {
            'hadoop.proxyuser.hadoop.hosts': "localhost," + oozie_hostname,
            'hadoop.proxyuser.hadoop.groups': 'hadoop',
        }
        cfg.update(o_cfg)
        LOG.debug('Applied Oozie configs for core-site.xml')
        cfg.update(o_h.get_oozie_required_xml_configs())
        LOG.debug('Applied Oozie configs for oozie-site.xml')

    if hive_hostname:
        h_cfg = {
            'hive.warehouse.subdir.inherit.perms': True,
            'javax.jdo.option.ConnectionURL':
            'jdbc:derby:;databaseName=/opt/hive/metastore_db;create=true'
        }
        cfg.update(h_cfg)
        LOG.debug('Applied Hive config for hive metastore server')

    return cfg
Esempio n. 15
0
    def start_cluster(self, cluster):
        nn_instance = utils.get_namenode(cluster)
        datanodes = utils.get_datanodes(cluster)
        jt_instance = utils.get_jobtracker(cluster)
        tasktrackers = utils.get_tasktrackers(cluster)
        oozie = utils.get_oozie(cluster)
        hive_server = utils.get_hiveserver(cluster)

        with remote.get_remote(nn_instance) as r:
            run.format_namenode(r)
            run.start_process(r, "namenode")

        snns = utils.get_secondarynamenodes(cluster)
        if snns:
            for snn in snns:
                run.start_process(remote.get_remote(snn), "secondarynamenode")
        for dn in datanodes:
            run.start_process(remote.get_remote(dn), "datanode")
        LOG.info("HDFS service at '%s' has been started",
                 nn_instance.hostname)

        if jt_instance:
            run.start_process(remote.get_remote(jt_instance), "jobtracker")
            for tt in tasktrackers:
                run.start_process(remote.get_remote(tt), "tasktracker")
            LOG.info("MapReduce service at '%s' has been started",
                     jt_instance.hostname)

        if oozie:
            with remote.get_remote(oozie) as r:
                if c_helper.is_mysql_enable(cluster):
                    run.mysql_start(r, oozie)
                    run.oozie_create_db(r)
                run.oozie_share_lib(r, nn_instance.hostname)
                run.start_oozie(r)
                LOG.info("Oozie service at '%s' has been started",
                         nn_instance.hostname)

        if hive_server:
            with remote.get_remote(nn_instance) as r:
                run.hive_create_warehouse_dir(r)
            if c_helper.is_mysql_enable(cluster):
                with remote.get_remote(hive_server) as h:
                    if not oozie or hive_server.hostname != oozie.hostname:
                        run.mysql_start(h, hive_server)
                    run.hive_create_db(h)
                    run.hive_metastore_start(h)
                LOG.info("Hive Metastore server at %s has been started",
                         hive_server.hostname)

        LOG.info('Cluster %s has been started successfully' % cluster.name)
        self._set_cluster_info(cluster)
Esempio n. 16
0
def run_job(job_execution):
    ctx = context.ctx()

    cluster = conductor.cluster_get(ctx, job_execution.cluster_id)
    if cluster.status != 'Active':
        return job_execution

    job = conductor.job_get(ctx, job_execution.job_id)
    input_source = conductor.data_source_get(ctx, job_execution.input_id)
    output_source = conductor.data_source_get(ctx, job_execution.output_id)
    #TODO(nprivalova): should be removed after all features implemented
    validate(input_source, output_source, job)

    plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name)
    hdfs_user = plugin.get_hdfs_user()
    wf_dir = create_workflow_dir(u.get_jobtracker(cluster), job, hdfs_user)
    upload_job_files(u.get_jobtracker(cluster), wf_dir, job, hdfs_user)

    creator = workflow_factory.get_creator(job)

    # Do other job type specific setup here, for example
    # uploading hive configuration
    creator.configure_workflow_if_needed(cluster, wf_dir)

    wf_xml = creator.get_workflow_xml(job_execution.job_configs, input_source,
                                      output_source)

    path_to_workflow = upload_workflow_file(u.get_jobtracker(cluster), wf_dir,
                                            wf_xml, hdfs_user)

    jt_path = '%s:8021' % u.get_jobtracker(cluster).hostname
    nn_path = 'hdfs://%s:8020' % u.get_namenode(cluster).hostname

    client = o.OozieClient(cluster['info']['JobFlow']['Oozie'] + "/oozie/")
    job_parameters = {
        "jobTracker": jt_path,
        "nameNode": nn_path,
        "user.name": "hadoop",
        "oozie.wf.application.path": "%s%s" % (nn_path, path_to_workflow),
        "oozie.use.system.libpath": "true"
    }

    oozie_job_id = client.add_job(x.create_hadoop_xml(job_parameters))
    client.run_job(oozie_job_id)
    job_execution = conductor.job_execution_update(
        ctx, job_execution, {
            'oozie_job_id': oozie_job_id,
            'start_time': datetime.datetime.now()
        })

    return job_execution
Esempio n. 17
0
def run_job(job_execution):
    ctx = context.ctx()

    cluster = conductor.cluster_get(ctx, job_execution.cluster_id)
    if cluster.status != 'Active':
        return job_execution

    job = conductor.job_get(ctx, job_execution.job_id)
    input_source = conductor.data_source_get(ctx,  job_execution.input_id)
    output_source = conductor.data_source_get(ctx,  job_execution.output_id)
    #TODO(nprivalova): should be removed after all features implemented
    validate(input_source, output_source, job)

    plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name)
    hdfs_user = plugin.get_hdfs_user()
    wf_dir = create_workflow_dir(u.get_jobtracker(cluster), job, hdfs_user)
    upload_job_files(u.get_jobtracker(cluster), wf_dir, job, hdfs_user)

    creator = workflow_factory.get_creator(job)

    # Do other job type specific setup here, for example
    # uploading hive configuration
    creator.configure_workflow_if_needed(cluster, wf_dir)

    wf_xml = creator.get_workflow_xml(job_execution.job_configs,
                                      input_source, output_source)

    path_to_workflow = upload_workflow_file(u.get_jobtracker(cluster),
                                            wf_dir, wf_xml, hdfs_user)

    jt_path = '%s:8021' % u.get_jobtracker(cluster).hostname
    nn_path = 'hdfs://%s:8020' % u.get_namenode(cluster).hostname

    client = o.OozieClient(cluster['info']['JobFlow']['Oozie'] + "/oozie/")
    job_parameters = {"jobTracker": jt_path,
                      "nameNode": nn_path,
                      "user.name": "hadoop",
                      "oozie.wf.application.path":
                      "%s%s" % (nn_path, path_to_workflow),
                      "oozie.use.system.libpath": "true"}

    oozie_job_id = client.add_job(x.create_hadoop_xml(job_parameters))
    client.run_job(oozie_job_id)
    job_execution = conductor.job_execution_update(ctx, job_execution,
                                                   {'oozie_job_id':
                                                    oozie_job_id,
                                                    'start_time':
                                                    datetime.datetime.now()})

    return job_execution
Esempio n. 18
0
    def _push_configs_to_nodes(self, cluster, instances=None):
        extra = self._extract_configs_to_extra(cluster)

        if instances is None:
            instances = utils.get_instances(cluster)

        for inst in instances:
            ng_extra = extra[inst.node_group.id]
            files = {
                '/etc/hadoop/core-site.xml': ng_extra['xml']['core-site'],
                '/etc/hadoop/mapred-site.xml': ng_extra['xml']['mapred-site'],
                '/etc/hadoop/hdfs-site.xml': ng_extra['xml']['hdfs-site'],
                '/tmp/savanna-hadoop-init.sh': ng_extra['setup_script']
            }
            with remote.get_remote(inst) as r:
                # TODO(aignatov): sudo chown is wrong solution. But it works.
                r.execute_command(
                    'sudo chown -R $USER:$USER /etc/hadoop'
                )
                r.execute_command(
                    'sudo chown -R $USER:$USER /opt/oozie/conf'
                )
                r.write_files_to(files)
                r.execute_command(
                    'sudo chmod 0500 /tmp/savanna-hadoop-init.sh'
                )
                r.execute_command(
                    'sudo /tmp/savanna-hadoop-init.sh '
                    '>> /tmp/savanna-hadoop-init.log 2>&1')

        nn = utils.get_namenode(cluster)
        jt = utils.get_jobtracker(cluster)

        with remote.get_remote(nn) as r:
            r.write_file_to('/etc/hadoop/dn.incl', utils.
                            generate_fqdn_host_names(
                            utils.get_datanodes(cluster)))
        if jt:
            with remote.get_remote(jt) as r:
                r.write_file_to('/etc/hadoop/tt.incl', utils.
                                generate_fqdn_host_names(
                                utils.get_tasktrackers(cluster)))

        oozie = utils.get_oozie(cluster)
        if oozie:
            with remote.get_remote(oozie) as r:
                r.write_file_to('/opt/oozie/conf/oozie-site.xml',
                                extra[oozie.node_group.id]
                                ['xml']['oozie-site'])
Esempio n. 19
0
    def _push_configs_to_nodes(self, cluster, instances=None):
        extra = self._extract_configs_to_extra(cluster)

        if instances is None:
            instances = utils.get_instances(cluster)

        for inst in instances:
            ng_extra = extra[inst.node_group.id]
            files = {
                '/etc/hadoop/core-site.xml': ng_extra['xml']['core-site'],
                '/etc/hadoop/mapred-site.xml': ng_extra['xml']['mapred-site'],
                '/etc/hadoop/hdfs-site.xml': ng_extra['xml']['hdfs-site'],
                '/tmp/savanna-hadoop-init.sh': ng_extra['setup_script']
            }
            with remote.get_remote(inst) as r:
                # TODO(aignatov): sudo chown is wrong solution. But it works.
                r.execute_command(
                    'sudo chown -R $USER:$USER /etc/hadoop'
                )
                r.execute_command(
                    'sudo chown -R $USER:$USER /opt/oozie/conf'
                )
                r.write_files_to(files)
                r.execute_command(
                    'sudo chmod 0500 /tmp/savanna-hadoop-init.sh'
                )
                r.execute_command(
                    'sudo /tmp/savanna-hadoop-init.sh '
                    '>> /tmp/savanna-hadoop-init.log 2>&1')

        nn = utils.get_namenode(cluster)
        jt = utils.get_jobtracker(cluster)

        with remote.get_remote(nn) as r:
            r.write_file_to('/etc/hadoop/dn.incl', utils.
                            generate_fqdn_host_names(
                            utils.get_datanodes(cluster)))
        if jt:
            with remote.get_remote(jt) as r:
                r.write_file_to('/etc/hadoop/tt.incl', utils.
                                generate_fqdn_host_names(
                                utils.get_tasktrackers(cluster)))

        oozie = utils.get_oozie(cluster)
        if oozie:
            with remote.get_remote(oozie) as r:
                r.write_file_to('/opt/oozie/conf/oozie-site.xml',
                                extra[oozie.node_group.id]
                                ['xml']['oozie-site'])
Esempio n. 20
0
    def scale_cluster(self, cluster, instances):
        self._push_configs_to_nodes(cluster, instances=instances)
        self._write_hadoop_user_keys(cluster.private_key, instances)
        run.refresh_nodes(remote.get_remote(utils.get_namenode(cluster)),
                          "dfsadmin")
        jt = utils.get_jobtracker(cluster)
        if jt:
            run.refresh_nodes(remote.get_remote(jt), "mradmin")

        for i in instances:
            with remote.get_remote(i) as r:
                if "datanode" in i.node_group.node_processes:
                    run.start_process(r, "datanode")

                if "tasktracker" in i.node_group.node_processes:
                    run.start_process(r, "tasktracker")
Esempio n. 21
0
    def scale_cluster(self, cluster, instances):
        self._push_configs_to_nodes(cluster, instances=instances)
        self._write_hadoop_user_keys(cluster.private_key,
                                     instances)
        run.refresh_nodes(remote.get_remote(
            utils.get_namenode(cluster)), "dfsadmin")
        jt = utils.get_jobtracker(cluster)
        if jt:
            run.refresh_nodes(remote.get_remote(jt), "mradmin")

        for i in instances:
            with remote.get_remote(i) as r:
                if "datanode" in i.node_group.node_processes:
                    run.start_process(r, "datanode")

                if "tasktracker" in i.node_group.node_processes:
                    run.start_process(r, "tasktracker")
Esempio n. 22
0
    def start_cluster(self, cluster):
        nn_instance = utils.get_namenode(cluster)
        with remote.get_remote(nn_instance) as r:
            run.format_namenode(r)
            run.start_processes(r, "namenode")

        for snn in utils.get_secondarynamenodes(cluster):
            run.start_processes(remote.get_remote(snn), "secondarynamenode")

        jt_instance = utils.get_jobtracker(cluster)
        if jt_instance:
            run.start_processes(remote.get_remote(jt_instance), "jobtracker")

        self._start_tt_dn_processes(utils.get_instances(cluster))

        self._await_datanodes(cluster)

        LOG.info("Hadoop services in cluster %s have been started" %
                 cluster.name)

        oozie = utils.get_oozie(cluster)
        if oozie:
            with remote.get_remote(oozie) as r:
                if c_helper.is_mysql_enable(cluster):
                    run.mysql_start(r, oozie)
                    run.oozie_create_db(r)
                run.oozie_share_lib(r, nn_instance.hostname())
                run.start_oozie(r)
                LOG.info("Oozie service at '%s' has been started",
                         nn_instance.hostname())

        hive_server = utils.get_hiveserver(cluster)
        if hive_server:
            with remote.get_remote(nn_instance) as r:
                run.hive_create_warehouse_dir(r)
            if c_helper.is_mysql_enable(cluster):
                with remote.get_remote(hive_server) as h:
                    if not oozie or hive_server.hostname() != oozie.hostname():
                        run.mysql_start(h, hive_server)
                    run.hive_create_db(h)
                    run.hive_metastore_start(h)
                LOG.info("Hive Metastore server at %s has been started",
                         hive_server.hostname())

        LOG.info('Cluster %s has been started successfully' % cluster.name)
        self._set_cluster_info(cluster)
Esempio n. 23
0
    def start_cluster(self, cluster):
        instances = utils.get_instances(cluster)
        nn_instance = utils.get_namenode(cluster)
        jt_instance = utils.get_jobtracker(cluster)
        oozie = utils.get_oozie(cluster)
        hive_server = utils.get_hiveserver(cluster)

        with remote.get_remote(nn_instance) as r:
            run.format_namenode(r)
            run.start_processes(r, "namenode")

        for snn in utils.get_secondarynamenodes(cluster):
            run.start_processes(remote.get_remote(snn), "secondarynamenode")

        if jt_instance:
            run.start_processes(remote.get_remote(jt_instance), "jobtracker")

        self._start_tt_dn_processes(instances)

        LOG.info("Hadoop services in cluster %s have been started" %
                 cluster.name)

        if oozie:
            with remote.get_remote(oozie) as r:
                if c_helper.is_mysql_enable(cluster):
                    run.mysql_start(r, oozie)
                    run.oozie_create_db(r)
                run.oozie_share_lib(r, nn_instance.hostname)
                run.start_oozie(r)
                LOG.info("Oozie service at '%s' has been started",
                         nn_instance.hostname)

        if hive_server:
            with remote.get_remote(nn_instance) as r:
                run.hive_create_warehouse_dir(r)
            if c_helper.is_mysql_enable(cluster):
                with remote.get_remote(hive_server) as h:
                    if not oozie or hive_server.hostname != oozie.hostname:
                        run.mysql_start(h, hive_server)
                    run.hive_create_db(h)
                    run.hive_metastore_start(h)
                LOG.info("Hive Metastore server at %s has been started",
                         hive_server.hostname)

        LOG.info('Cluster %s has been started successfully' % cluster.name)
        self._set_cluster_info(cluster)
Esempio n. 24
0
def run_job(ctx, job_execution):
    cluster = conductor.cluster_get(ctx, job_execution.cluster_id)
    if cluster.status != "Active":
        return job_execution

    job = conductor.job_get(ctx, job_execution.job_id)
    job_origin = conductor.job_origin_get(context.ctx(), job.job_origin_id)
    input_source = conductor.data_source_get(ctx, job_execution.input_id)
    output_source = conductor.data_source_get(ctx, job_execution.output_id)
    # TODO(nprivalova): should be removed after all features implemented
    validate(input_source, output_source, job)

    wf_dir = create_workflow_dir(u.get_jobtracker(cluster), job)
    upload_job_files(u.get_jobtracker(cluster), wf_dir, job_origin)

    creator = workflow_factory.get_creator(job.type, job_origin)

    # Do other job type specific setup here, for example
    # uploading hive configuration
    creator.configure_workflow_if_needed(cluster, wf_dir)

    wf_xml = creator.get_workflow_xml(job_execution.job_configs, input_source, output_source)

    path_to_workflow = upload_workflow_file(u.get_jobtracker(cluster), wf_dir, wf_xml)

    jt_path = "%s:8021" % u.get_jobtracker(cluster).hostname
    nn_path = "hdfs://%s:8020" % u.get_namenode(cluster).hostname

    client = o.OozieClient(cluster["info"]["JobFlow"]["Oozie"] + "/oozie/")
    job_parameters = {
        "jobTracker": jt_path,
        "nameNode": nn_path,
        "user.name": "hadoop",
        "oozie.wf.application.path": "%s%s" % (nn_path, path_to_workflow),
        "oozie.use.system.libpath": "true",
    }

    oozie_job_id = client.add_job(x.create_hadoop_xml(job_parameters))
    client.run_job(oozie_job_id)
    job_execution = conductor.job_execution_update(
        ctx, job_execution, {"oozie_job_id": oozie_job_id, "start_time": datetime.datetime.now()}
    )

    return job_execution
Esempio n. 25
0
def run_job(ctx, job_execution):
    cluster = conductor.cluster_get(ctx, job_execution.cluster_id)
    if cluster.status != 'Active':
        return job_execution

    job = conductor.job_get(ctx, job_execution.job_id)
    job_origin = conductor.job_origin_get(context.ctx(), job.job_origin_id)
    input_source = conductor.data_source_get(ctx,  job_execution.input_id)
    output_source = conductor.data_source_get(ctx,  job_execution.output_id)
    #TODO(nprivalova): should be removed after all features implemented
    validate(input_source, output_source, job)

    wf_dir = create_workflow_dir(u.get_jobtracker(cluster), job)
    upload_job_files(u.get_jobtracker(cluster), wf_dir, job_origin)

    if job.type == 'Hive':
        upload_hive_site(cluster, wf_dir)

    wf_xml = build_workflow_for_job(job.type, job_execution, job_origin,
                                    input_source, output_source)
    path_to_workflow = upload_workflow_file(u.get_jobtracker(cluster),
                                            wf_dir, wf_xml)

    jt_path = '%s:8021' % u.get_jobtracker(cluster).hostname
    nn_path = 'hdfs://%s:8020' % u.get_namenode(cluster).hostname

    client = o.OozieClient(cluster['info']['JobFlow']['Oozie'] + "/oozie/")
    job_parameters = {"jobTracker": jt_path,
                      "nameNode": nn_path,
                      "user.name": "hadoop",
                      "oozie.wf.application.path":
                      "%s%s" % (nn_path, path_to_workflow),
                      "oozie.use.system.libpath": "true"}

    oozie_job_id = client.add_job(x.create_hadoop_xml(job_parameters))
    client.run_job(oozie_job_id)
    job_execution = conductor.job_execution_update(ctx, job_execution,
                                                   {'oozie_job_id':
                                                   oozie_job_id,
                                                    'start_time':
                                                    datetime.datetime.now()})

    return job_execution
Esempio n. 26
0
    def _extract_configs_to_extra(self, cluster):
        nn = utils.get_namenode(cluster)
        jt = utils.get_jobtracker(cluster)
        oozie = utils.get_oozie(cluster)

        extra = dict()
        for ng in cluster.node_groups:
            extra[ng.id] = {
                'xml':
                c_helper.generate_xml_configs(
                    ng.configuration, ng.storage_paths, nn.hostname,
                    jt.hostname if jt else None,
                    oozie.hostname if oozie else None),
                'setup_script':
                c_helper.generate_setup_script(
                    ng.storage_paths,
                    c_helper.extract_environment_confs(ng.configuration))
            }

        return extra
Esempio n. 27
0
    def _await_datanodes(self, cluster):
        datanodes_count = len(utils.get_datanodes(cluster))
        if datanodes_count < 1:
            return

        LOG.info("Waiting %s datanodes to start up" % datanodes_count)
        with remote.get_remote(utils.get_namenode(cluster)) as r:
            while True:
                if run.check_datanodes_count(r, datanodes_count):
                    LOG.info('Datanodes on cluster %s has been started' %
                             cluster.name)
                    return

                context.sleep(1)

                if not g.check_cluster_exists(cluster):
                    LOG.info(
                        'Stop waiting datanodes on cluster %s since it has '
                        'been deleted' % cluster.name)
                    return
Esempio n. 28
0
    def _extract_configs_to_extra(self, cluster):
        nn = utils.get_namenode(cluster)
        jt = utils.get_jobtracker(cluster)
        oozies_hostnames = [o.hostname for o in utils.get_oozies(cluster)]

        extra = dict()
        for ng in cluster.node_groups:
            extra[ng.id] = {
                'xml': c_helper.generate_xml_configs(ng.configuration,
                                                     ng.storage_paths,
                                                     nn.hostname,
                                                     jt.hostname
                                                     if jt else None,
                                                     oozies_hostnames),
                'setup_script': c_helper.generate_setup_script(
                    ng.storage_paths,
                    c_helper.extract_environment_confs(ng.configuration)
                )
            }

        return extra
Esempio n. 29
0
    def decommission_nodes(self, cluster, instances):
        tts = utils.get_tasktrackers(cluster)
        dns = utils.get_datanodes(cluster)
        decommission_dns = False
        decommission_tts = False

        for i in instances:
            if 'datanode' in i.node_group.node_processes:
                dns.remove(i)
                decommission_dns = True
            if 'tasktracker' in i.node_group.node_processes:
                tts.remove(i)
                decommission_tts = True

        nn = utils.get_namenode(cluster)
        jt = utils.get_jobtracker(cluster)

        if decommission_tts:
            sc.decommission_tt(jt, instances, tts)
        if decommission_dns:
            sc.decommission_dn(nn, instances, dns)
Esempio n. 30
0
    def _await_datanodes(self, cluster):
        datanodes_count = len(utils.get_datanodes(cluster))
        if datanodes_count < 1:
            return

        LOG.info("Waiting %s datanodes to start up" % datanodes_count)
        with remote.get_remote(utils.get_namenode(cluster)) as r:
            while True:
                if run.check_datanodes_count(r, datanodes_count):
                    LOG.info(
                        'Datanodes on cluster %s has been started' %
                        cluster.name)
                    return

                context.sleep(1)

                if not g.check_cluster_exists(cluster):
                    LOG.info(
                        'Stop waiting datanodes on cluster %s since it has '
                        'been deleted' % cluster.name)
                    return
Esempio n. 31
0
    def decommission_nodes(self, cluster, instances):
        tts = utils.get_tasktrackers(cluster)
        dns = utils.get_datanodes(cluster)
        decommission_dns = False
        decommission_tts = False

        for i in instances:
            if 'datanode' in i.node_group.node_processes:
                dns.remove(i)
                decommission_dns = True
            if 'tasktracker' in i.node_group.node_processes:
                tts.remove(i)
                decommission_tts = True

        nn = utils.get_namenode(cluster)
        jt = utils.get_jobtracker(cluster)

        if decommission_tts:
            sc.decommission_tt(jt, instances, tts)
        if decommission_dns:
            sc.decommission_dn(nn, instances, dns)
Esempio n. 32
0
def run_job(ctx, job_execution):
    cluster = conductor.cluster_get(ctx, job_execution.cluster_id)
    if cluster.status != 'Active':
        return job_execution.status

    job = conductor.job_get(ctx, job_execution.job_id)
    job_origin = conductor.job_origin_get(context.ctx(), job.job_origin_id)
    input_source = conductor.data_source_get(ctx, job_execution.input_id)
    output_source = conductor.data_source_get(ctx, job_execution.output_id)
    #TODO(nprivalova): should be removed after all features implemented
    validate(input_source, output_source, job)

    wf_dir = create_workflow_dir(u.get_jobtracker(cluster), job)
    upload_job_file(u.get_jobtracker(cluster), wf_dir, job_origin, job)

    wf_xml = build_workflow_for_job(job.type, input_source, output_source)
    path_to_workflow = upload_workflow_file(u.get_jobtracker(cluster), wf_dir,
                                            wf_xml)

    jt_path = '%s:8021' % u.get_jobtracker(cluster).hostname
    nn_path = 'hdfs://%s:8020' % u.get_namenode(cluster).hostname

    client = o.OozieClient(cluster['info']['JobFlow']['Oozie'] + "/oozie/")
    job_parameters = {
        "jobTracker": jt_path,
        "nameNode": nn_path,
        "user.name": "hadoop",
        "oozie.wf.application.path": "%s%s" % (nn_path, path_to_workflow),
        "oozie.use.system.libpath": "true"
    }

    oozie_job_id = client.add_job(x.create_hadoop_xml(job_parameters))
    client.run_job(oozie_job_id)
    job_execution = conductor.job_execution_update(
        ctx, job_execution, {
            'oozie_job_id': oozie_job_id,
            'start_time': datetime.datetime.now()
        })

    return job_execution
Esempio n. 33
0
    def _set_cluster_info(self, cluster):
        nn = utils.get_namenode(cluster)
        jt = utils.get_jobtracker(cluster)
        oozie = utils.get_oozie(cluster)

        info = {}

        if jt:
            info['MapReduce'] = {
                'Web UI': 'http://%s:50030' % jt.management_ip
            }

        if nn:
            info['HDFS'] = {'Web UI': 'http://%s:50070' % nn.management_ip}

        if oozie:
            info['JobFlow'] = {
                'Oozie': 'http://%s:11000' % oozie.management_ip
            }

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})
Esempio n. 34
0
    def _set_cluster_info(self, cluster):
        nn = utils.get_namenode(cluster)
        jt = utils.get_jobtracker(cluster)

        info = cluster.info

        if jt and jt.management_ip:
            info['MapReduce'] = {
                'Web UI': 'http://%s:50030' % jt.management_ip
            }
        if nn and nn.management_ip:
            info['HDFS'] = {
                'Web UI': 'http://%s:50070' % nn.management_ip
            }

        info['JobFlow'] = {}
        for oozie in utils.get_oozies(cluster):
            if oozie.management_ip:
                info['JobFlow'].update({
                    'Oozie: %s' % oozie.hostname:
                    'http://%s:11000' % oozie.management_ip
                })
Esempio n. 35
0
    def _set_cluster_info(self, cluster):
        nn = utils.get_namenode(cluster)
        jt = utils.get_jobtracker(cluster)
        oozie = utils.get_oozie(cluster)
        info = {}

        if jt:
            info['MapReduce'] = {
                'Web UI': 'http://%s:50030' % jt.management_ip
            }

        if nn:
            info['HDFS'] = {
                'Web UI': 'http://%s:50070' % nn.management_ip
            }

        if oozie:
            info['JobFlow'] = {
                'Oozie': 'http://%s:11000' % oozie.management_ip
            }

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})
Esempio n. 36
0
    def _set_cluster_info(self, cluster):
        nn = utils.get_namenode(cluster)
        jt = utils.get_jobtracker(cluster)
        oozie = utils.get_oozie(cluster)
        info = {}

        if jt:
            ui_port = c_helper.get_port_from_config(
                'MapReduce', 'mapred.job.tracker.http.address', cluster)
            jt_port = c_helper.get_port_from_config('MapReduce',
                                                    'mapred.job.tracker',
                                                    cluster)

            info['MapReduce'] = {
                'Web UI': 'http://%s:%s' % (jt.management_ip, ui_port),
                'JobTracker': '%s:%s' % (jt.hostname(), jt_port)
            }

        if nn:
            ui_port = c_helper.get_port_from_config('HDFS', 'dfs.http.address',
                                                    cluster)
            nn_port = c_helper.get_port_from_config('HDFS', 'fs.default.name',
                                                    cluster)

            info['HDFS'] = {
                'Web UI': 'http://%s:%s' % (nn.management_ip, ui_port),
                'NameNode': 'hdfs://%s:%s' % (nn.hostname(), nn_port)
            }

        if oozie:
            #TODO(yrunts) change from hardcode value
            info['JobFlow'] = {
                'Oozie': 'http://%s:11000' % oozie.management_ip
            }

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})
Esempio n. 37
0
    def _extract_configs_to_extra(self, cluster):
        nn = utils.get_namenode(cluster)
        jt = utils.get_jobtracker(cluster)
        oozie = utils.get_oozie(cluster)
        hive = utils.get_hiveserver(cluster)

        extra = dict()

        if hive:
            extra['hive_mysql_passwd'] = uuidutils.generate_uuid()

        for ng in cluster.node_groups:
            extra[ng.id] = {
                'xml': c_helper.generate_xml_configs(
                    ng.configuration,
                    ng.storage_paths,
                    nn.hostname,
                    jt.hostname if jt else None,
                    oozie.hostname if oozie else None,
                    hive.hostname if hive else None,
                    extra['hive_mysql_passwd'] if hive else None),
                'setup_script': c_helper.generate_setup_script(
                    ng.storage_paths,
                    c_helper.extract_environment_confs(ng.configuration),
                    append_oozie=(
                        oozie is not None and oozie.node_group.id == ng.id)
                )
            }

        if c_helper.is_data_locality_enabled(cluster):
            topology_data = th.generate_topology_map(
                cluster, CONF.enable_hypervisor_awareness)
            extra['topology_data'] = "\n".join(
                [k + " " + v for k, v in topology_data.items()]) + "\n"

        return extra
Esempio n. 38
0
    def _set_cluster_info(self, cluster):
        nn = utils.get_namenode(cluster)
        jt = utils.get_jobtracker(cluster)
        oozie = utils.get_oozie(cluster)
        info = {}

        if jt:
            ui_port = c_helper.get_port_from_config(
                'MapReduce', 'mapred.job.tracker.http.address', cluster)
            jt_port = c_helper.get_port_from_config(
                'MapReduce', 'mapred.job.tracker', cluster)

            info['MapReduce'] = {
                'Web UI': 'http://%s:%s' % (jt.management_ip, ui_port),
                'JobTracker': '%s:%s' % (jt.hostname(), jt_port)
            }

        if nn:
            ui_port = c_helper.get_port_from_config('HDFS', 'dfs.http.address',
                                                    cluster)
            nn_port = c_helper.get_port_from_config('HDFS', 'fs.default.name',
                                                    cluster)

            info['HDFS'] = {
                'Web UI': 'http://%s:%s' % (nn.management_ip, ui_port),
                'NameNode': 'hdfs://%s:%s' % (nn.hostname(), nn_port)
            }

        if oozie:
            #TODO(yrunts) change from hardcode value
            info['JobFlow'] = {
                'Oozie': 'http://%s:11000' % oozie.management_ip
            }

        ctx = context.ctx()
        conductor.cluster_update(ctx, cluster, {'info': info})
Esempio n. 39
0
    def start_cluster(self, cluster):
        nn_instance = utils.get_namenode(cluster)
        datanodes = utils.get_datanodes(cluster)
        jt_instance = utils.get_jobtracker(cluster)
        tasktrackers = utils.get_tasktrackers(cluster)
        oozie = utils.get_oozie(cluster)

        with remote.get_remote(nn_instance) as r:
            run.format_namenode(r)
            run.start_process(r, "namenode")

        snns = utils.get_secondarynamenodes(cluster)
        if snns:
            for snn in snns:
                run.start_process(remote.get_remote(snn), "secondarynamenode")
        for dn in datanodes:
            run.start_process(remote.get_remote(dn), "datanode")
        LOG.info("HDFS service at '%s' has been started",
                 nn_instance.hostname)

        if jt_instance:
            run.start_process(remote.get_remote(jt_instance), "jobtracker")
            for tt in tasktrackers:
                run.start_process(remote.get_remote(tt), "tasktracker")
            LOG.info("MapReduce service at '%s' has been started",
                     jt_instance.hostname)

        if oozie:
            with remote.get_remote(oozie) as r:
                run.oozie_share_lib(r, nn_instance.hostname)
                run.start_oozie(r)
                LOG.info("Oozie service at '%s' has been started",
                         nn_instance.hostname)

        LOG.info('Cluster %s has been started successfully' % cluster.name)
        self._set_cluster_info(cluster)