Esempio n. 1
0
def reinstall_spark():
    spark_master_host = leadership.leader_get('master-fqdn')
    peers = []
    zks = []
    if is_state('zookeeper.ready'):
        # if ZK is availuable we are in HA. We do not want reconfigurations if a leader fails
        # HA takes care of this
        spark_master_host = ''
        zk = RelationBase.from_state('zookeeper.ready')
        zks = zk.zookeepers()
        # We need reconfigure Spark when in HA and peers change ignore otherwise
        peers = get_spark_peers()

    deployment_matrix = {
        'spark_master': spark_master_host,
        'yarn_ready': is_state('hadoop.yarn.ready'),
        'hdfs_ready': is_state('hadoop.hdfs.ready'),
        'zookeepers': zks,
        'peers': peers,
    }

    if not data_changed('deployment_matrix', deployment_matrix):
        return

    hookenv.status_set('maintenance', 'configuring spark')
    hadoop = (RelationBase.from_state('hadoop.yarn.ready') or
              RelationBase.from_state('hadoop.hdfs.ready'))
    if install_spark(hadoop, zks):
        if is_state('hadoop.yarn.ready'):
            set_deployment_mode_state('spark.yarn.installed')
        else:
            set_deployment_mode_state('spark.standalone.installed')

        report_status()
Esempio n. 2
0
def install_spark_yarn():
    """
    Called in 'yarn-*' mode after Juju has elected a leader. The
    'hadoop.yarn.ready' state must be set.
    """
    hosts = {
        'spark-master': leadership.leader_get('master-fqdn'),
    }
    hadoop = (RelationBase.from_state('hadoop.yarn.ready') or
              RelationBase.from_state('hadoop.hdfs.ready'))
    rms = hadoop.resourcemanagers()
    hosts['resourcemanager'] = rms[0]

    # Probably don't need to check this since yarn.ready implies hdfs.ready
    # for us, but it doesn't hurt.
    if is_state('hadoop.hdfs.ready'):
        nns = hadoop.namenodes()
        hosts['namenode'] = nns[0]

    # Always include native hadoop libs in yarn mode; add cuda libs if present.
    extra_libs = ["/usr/lib/hadoop/lib/native"]
    if is_state('cuda.installed'):
        extra_libs.append("/usr/local/cuda/lib64")

    spark = Spark()
    spark.configure(hosts, zk_units=None, peers=None, extra_libs=extra_libs)
    set_deployment_mode_state('spark.yarn.installed')
Esempio n. 3
0
def install_spark_yarn():
    """
    Called in 'yarn-*' mode after Juju has elected a leader. The
    'hadoop.yarn.ready' state must be set.
    """
    hosts = {
        'spark-master': leadership.leader_get('master-fqdn'),
    }
    hadoop = (RelationBase.from_state('hadoop.yarn.ready')
              or RelationBase.from_state('hadoop.hdfs.ready'))
    rms = hadoop.resourcemanagers()
    hosts['resourcemanager'] = rms[0]

    # Probably don't need to check this since yarn.ready implies hdfs.ready
    # for us, but it doesn't hurt.
    if is_state('hadoop.hdfs.ready'):
        nns = hadoop.namenodes()
        hosts['namenode'] = nns[0]

    # Always include native hadoop libs in yarn mode; add cuda libs if present.
    extra_libs = ["/usr/lib/hadoop/lib/native"]
    if is_state('cuda.installed'):
        extra_libs.append("/usr/local/cuda/lib64")

    spark = Spark()
    spark.configure(hosts, zk_units=None, peers=None, extra_libs=extra_libs)
    set_deployment_mode_state('spark.yarn.installed')
def reconfigure_spark():
    config = hookenv.config()
    mode = config['spark_execution_mode']
    hookenv.status_set('maintenance',
                       'Changing default execution mode to {}'.format(mode))

    hadoop = (RelationBase.from_state('hadoop.yarn.ready') or
              RelationBase.from_state('hadoop.hdfs.ready'))

    install_spark(hadoop)
    report_status()
Esempio n. 5
0
def configure_admin():
    remove_state("jenkins.configured.admin")
    api = Api()

    status_set("maintenance", "Configuring Jenkins public url")
    configuration = Configuration()
    needs_restart = configuration.set_url()
    if needs_restart:
        status_set("maintenance", "Restarting Jenkins")
        service_restart('jenkins')
        api.wait()

    status_set("maintenance", "Configuring proxy settings")
    configuration.configure_proxy()
    service_restart('jenkins')
    api.wait()

    status_set("maintenance", "Configuring admin user")
    users = Users()
    users.configure_admin()

    api.reload()
    api.wait()  # Wait for the service to be fully up
    # Inform any extension that the username/password changed
    if get_state("extension.connected"):
        extension_relation = (RelationBase.from_state("extension.connected"))
        extension_relation.joined()

    set_state("jenkins.configured.admin")
Esempio n. 6
0
def reconfigure_spark():
    config = hookenv.config()
    mode = config['spark_execution_mode']
    hookenv.status_set('maintenance',
                       'changing default execution mode to {}'.format(mode))

    hadoop = (RelationBase.from_state('hadoop.yarn.ready') or
              RelationBase.from_state('hadoop.hdfs.ready'))

    zks = None
    if is_state('zookeeper.ready'):
        zk = RelationBase.from_state('zookeeper.ready')
        zks = zk.zookeepers()

    if install_spark(hadoop, zks):
        report_status()
Esempio n. 7
0
def set_java_home():
    java = (RelationBase.from_state('java.ready')
            or RelationBase.from_state('hadoop-plugin.java.ready'))
    java_home = java.java_home()
    unitdata.kv().set('java_home', java_home)
    unitdata.kv().set('java_version', java.java_version())
    if data_changed('java_home', java_home):
        utils.re_edit_in_place(
            '/etc/environment', {
                r'#? *JAVA_HOME *=.*': 'JAVA_HOME={}'.format(java_home),
            },
            append_non_matches=True)

        # If we've potentially setup services with the previous
        # version of Java, set a flag that a layer can use to trigger
        # a restart of those services.
        if is_state('bigtop.available'):
            set_state('bigtop.java.changed')
Esempio n. 8
0
def handle_peers():
    '''
    We use HBase peers to keep track of the RegionServer IP addresses in a
    cluster. Use get_nodes() from the appropriate peer relation to retrieve
    a list of peer tuples, e.g.:
        [('hbase/0', '172.31.5.161'), ('hbase/2', '172.31.5.11')]

    Depending on the state, this handler will add or remove peer IP addresses
    from the regionservers config file.
    '''
    if is_state('hbpeer.departed'):
        hbpeer = RelationBase.from_state('hbpeer.departed')
        is_departing = True
        message = 'removing hbase peer(s)'
    else:
        hbpeer = RelationBase.from_state('hbpeer.joined')
        is_departing = False
        message = 'adding hbase peer(s)'

    # Make sure we have a valid relation object
    if hbpeer:
        nodes = hbpeer.get_nodes()
    else:
        hookenv.log('Ignoring unknown HBase peer state')
        return

    hookenv.status_set('maintenance', message)
    hbase = HBase()
    ip_addrs = [node[1] for node in nodes]
    hookenv.log('{}: {}'.format(message, ip_addrs))
    hbase.update_regionservers(ip_addrs, remove=is_departing)

    # NB: the rs conf file will always change when handling peer updates, but
    # we still include this condition to keep the files_changed kv current.
    if any_file_changed(['/etc/hbase/conf/regionservers']):
        hbase.restart()

    # Dismiss appropriate state now that we've handled the peer
    if is_departing:
        hbpeer.dismiss_departed()
    else:
        hbpeer.dismiss_joined()
    report_status()
def reinstall_spark():
    spark_master_host = leadership.leader_get('master-fqdn')
    deployment_matrix = {
        'spark_master': spark_master_host,
        'yarn_ready': is_state('hadoop.yarn.ready'),
        'hdfs_ready': is_state('hadoop.hdfs.ready'),
    }

    if not data_changed('deployment_matrix', deployment_matrix):
        return

    hookenv.status_set('maintenance', 'Configuring Spark')
    hadoop = (RelationBase.from_state('hadoop.yarn.ready') or
              RelationBase.from_state('hadoop.hdfs.ready'))
    install_spark(hadoop)
    if is_state('hadoop.yarn.ready'):
        set_deployment_mode_state('spark.yarn.installed')
    else:
        set_deployment_mode_state('spark.standalone.installed')

    report_status()
Esempio n. 10
0
def install_hive(hadoop):
    '''
    Anytime our dependencies are available, check to see if we have a valid
    reason to (re)install. These include:
    - initial install
    - HBase has joined/departed
    '''
    # Hive cannot handle - in the metastore db name and
    # mysql uses the service name to name the db
    if "-" in hookenv.service_name():
        hookenv.status_set('blocked', "application name may not contain '-'; "
                                      "redeploy with a different name")
        return

    # Get hbase connection dict if it's available
    if is_state('hbase.ready'):
        hbase = RelationBase.from_state('hbase.ready')
        hbserver = hbase.hbase_servers()[0]
    else:
        hbserver = None

    # Use this to determine if we need to reinstall
    deployment_matrix = {
        'hbase': hbserver,
    }

    # Handle nuances when installing versus re-installing
    if not is_state('hive.installed'):
        prefix = "installing"

        # On initial install, prime our kv with the current deployment matrix.
        # Subsequent calls will use this to determine if a reinstall is needed.
        data_changed('deployment_matrix', deployment_matrix)
    else:
        prefix = "configuring"

        # Return if our matrix has not changed
        if not data_changed('deployment_matrix', deployment_matrix):
            return

    hookenv.status_set('maintenance', '{} hive'.format(prefix))
    hookenv.log("{} hive with: {}".format(prefix, deployment_matrix))
    hive = Hive()
    hive.install(hbase=hbserver)
    hive.restart()
    hive.open_ports()
    set_state('hive.installed')
    report_status()

    # set app version string for juju status output
    hive_version = get_package_version('hive') or 'unknown'
    hookenv.application_version_set(hive_version)
Esempio n. 11
0
def install_spark_yarn():
    """
    Called in 'yarn-*' mode after Juju has elected a leader. The
    'hadoop.yarn.ready' state must be set.
    """
    hosts = {
        'spark-master': leadership.leader_get('master-fqdn'),
    }
    hadoop = (RelationBase.from_state('hadoop.yarn.ready') or
              RelationBase.from_state('hadoop.hdfs.ready'))
    rms = hadoop.resourcemanagers()
    hosts['resourcemanager'] = rms[0]

    # Probably don't need to check this since yarn.ready implies hdfs.ready
    # for us, but it doesn't hurt.
    if is_state('hadoop.hdfs.ready'):
        nns = hadoop.namenodes()
        hosts['namenode'] = nns[0]

    spark = Spark()
    spark.configure(hosts, zk_units=None, peers=None)
    set_deployment_mode_state('spark.yarn.installed')
Esempio n. 12
0
def configure_admin():
    remove_state("jenkins.configured.admin")
    status_set("maintenance", "Configuring admin user")
    users = Users()
    users.configure_admin()
    api = Api()
    api.reload()
    api.wait()  # Wait for the service to be fully up
    # Inform any extension that the username/password changed
    if get_state("extension.connected"):
        extension_relation = (RelationBase.from_state("extension.connected"))
        extension_relation.joined()

    set_state("jenkins.configured.admin")
def restart_services():
    dc = get_dist_config()
    spark = Spark(dc)
    peers = RelationBase.from_state('sparkpeers.joined')
    is_scaled = peers and len(peers.get_nodes()) > 0
    is_master = spark.is_master()
    is_slave = not is_master or not is_scaled
    master_url = spark.get_master()
    master_ip = spark.get_master_ip()
    if data_changed('insightedge.master_url', master_url):
        stop_datagrid_services()
        start_datagrid_services(master_url, master_ip, is_master, is_slave)
    set_state('insightedge.ready')
    hookenv.status_set('active', 'ready')
def configure_flume(sink):
    flume = Flume()
    flume.configure_flume({'agents': sink.agents()})
    if any_file_changed([flume.config_file]):
        # the port is currently hard-coded in the rsyslog-forwarder-ha charm
        # must run as root to listen on low-number UDP port
        hookenv.status_set('maintenance', 'Configuring Flume')
        hookenv.open_port(hookenv.config('source_port'))
        flume.restart(user='******')
        set_state('flume-syslog.started')

    syslog = RelationBase.from_state('syslog.joined')
    if syslog is None:
        hookenv.status_set('active', 'Ready')
    else:
        count = syslog.client_count()
        hookenv.status_set('active', 'Ready (Syslog sources: %s)' % count)
def configure_flume(sink):
    flume = Flume()
    flume.configure_flume({'agents': sink.agents()})
    if any_file_changed([flume.config_file]):
        # the port is currently hard-coded in the rsyslog-forwarder-ha charm
        # must run as root to listen on low-number UDP port
        hookenv.status_set('maintenance', 'Configuring Flume')
        hookenv.open_port(hookenv.config('source_port'))
        flume.restart(user='******')
        set_state('flume-syslog.started')

    syslog = RelationBase.from_state('syslog.joined')
    if syslog is None:
        hookenv.status_set('active', 'Ready')
    else:
        hookenv.status_set('active', 'Ready (Syslog souces: {})'
                                     .format(syslog.client_count()))
Esempio n. 16
0
def reinstall_spark(force=False):
    """
    Gather the state of our deployment and (re)install when leaders, hadoop,
    sparkpeers, or zookeepers change. In the future this should also
    fire when Cassandra or any other storage comes or goes. Config changed
    events will also call this method, but that is invoked with a separate
    handler below.

    Use a deployment-matrix dict to track changes and (re)install as needed.
    """
    spark_master_host = leadership.leader_get('master-fqdn')
    if not spark_master_host:
        hookenv.status_set('maintenance', 'juju leader not elected yet')
        return

    mode = hookenv.config()['spark_execution_mode']
    peers = None
    zks = None

    # If mode is standalone and ZK is ready, we are in HA. Do not consider
    # the master_host from juju leadership in our matrix. ZK handles this.
    if (mode == 'standalone' and is_state('zookeeper.ready')):
        spark_master_host = ''
        zk = RelationBase.from_state('zookeeper.ready')
        zks = zk.zookeepers()
        # peers are only used to set our MASTER_URL in standalone HA mode
        peers = get_spark_peers()

    # Construct a deployment matrix
    sample_data = hookenv.resource_get('sample-data')
    deployment_matrix = {
        'hdfs_ready': is_state('hadoop.hdfs.ready'),
        'peers': peers,
        'sample_data': host.file_hash(sample_data) if sample_data else None,
        'spark_master': spark_master_host,
        'yarn_ready': is_state('hadoop.yarn.ready'),
        'zookeepers': zks,
    }

    # No-op if we are not forcing a reinstall or our matrix is unchanged.
    if not (force or data_changed('deployment_matrix', deployment_matrix)):
        report_status()
        return

    # (Re)install based on our execution mode
    hookenv.status_set('maintenance',
                       'configuring spark in {} mode'.format(mode))
    hookenv.log("Configuring spark with deployment matrix: {}".format(
        deployment_matrix))

    if mode.startswith('yarn') and is_state('hadoop.yarn.ready'):
        install_spark_yarn()
    elif mode.startswith('local') or mode == 'standalone':
        install_spark_standalone(zks, peers)
    else:
        # Something's wrong (probably requested yarn without yarn.ready).
        remove_state('spark.started')
        report_status()
        return

    # restart services to pick up possible config changes
    spark = Spark()
    spark.stop()
    spark.start()

    set_state('spark.started')
    report_status()
Esempio n. 17
0
def get_spark_peers():
    nodes = [(hookenv.local_unit(), hookenv.unit_private_ip())]
    sparkpeer = RelationBase.from_state('sparkpeers.joined')
    if sparkpeer:
        nodes.extend(sorted(sparkpeer.get_nodes()))
    return nodes
Esempio n. 18
0
def reinstall_spark(force=False):
    """
    Gather the state of our deployment and (re)install when leaders, hadoop,
    sparkpeers, or zookeepers change. In the future this should also
    fire when Cassandra or any other storage comes or goes. Config changed
    events will also call this method, but that is invoked with a separate
    handler below.

    Use a deployment-matrix dict to track changes and (re)install as needed.
    """
    spark_master_host = leadership.leader_get('master-fqdn')
    if not spark_master_host:
        hookenv.status_set('maintenance', 'juju leader not elected yet')
        return

    mode = hookenv.config()['spark_execution_mode']
    peers = None
    zks = None

    # If mode is standalone and ZK is ready, we are in HA. Do not consider
    # the master_host from juju leadership in our matrix. ZK handles this.
    if (mode == 'standalone' and is_state('zookeeper.ready')):
        spark_master_host = ''
        zk = RelationBase.from_state('zookeeper.ready')
        zks = zk.zookeepers()
        # peers are only used to set our MASTER_URL in standalone HA mode
        peers = get_spark_peers()

    # Construct a deployment matrix
    sample_data = hookenv.resource_get('sample-data')
    deployment_matrix = {
        'hdfs_ready': is_state('hadoop.hdfs.ready'),
        'peers': peers,
        'sample_data': host.file_hash(sample_data) if sample_data else None,
        'spark_master': spark_master_host,
        'yarn_ready': is_state('hadoop.yarn.ready'),
        'zookeepers': zks,
    }

    # No-op if we are not forcing a reinstall or our matrix is unchanged.
    if not (force or data_changed('deployment_matrix', deployment_matrix)):
        report_status()
        return

    # (Re)install based on our execution mode
    hookenv.status_set('maintenance', 'configuring spark in {} mode'.format(mode))
    hookenv.log("Configuring spark with deployment matrix: {}".format(deployment_matrix))

    if mode.startswith('yarn') and is_state('hadoop.yarn.ready'):
        install_spark_yarn()
    elif mode.startswith('local') or mode == 'standalone':
        install_spark_standalone(zks, peers)
    else:
        # Something's wrong (probably requested yarn without yarn.ready).
        remove_state('spark.started')
        report_status()
        return

    # restart services to pick up possible config changes
    spark = Spark()
    spark.stop()
    spark.start()

    set_state('spark.started')
    report_status()
        if chosen:
            role = utils.ha_node_state(local_hostname) or "down"
        else:
            role = "extra"
        if not degraded:
            extra = "HA {}, with {} fail-over".format(role, failover)
        else:
            missing = " and ".join(
                filter(
                    None,
                    [
                        "NameNode" if not clustered else None,
                        "JournalNodes" if not quorum else None,
                        "active" if not active else None,
                        "standby" if not standby else None,
                    ],
                )
            )
            extra = "HA degraded {} (missing: {}), with {} fail-over".format(role, missing, failover)
    hookenv.status_set(
        "active",
        "Ready ({count} DataNode{s}, {extra})".format(count=num_slaves, s="s" if num_slaves > 1 else "", extra=extra),
    )


if __name__ == "__main__":
    if is_state("datanode.joined"):
        report_status(RelationBase.from_state("datanode.joined"))
    else:
        report_blocked()
Esempio n. 20
0
def reinstall_spark():
    """
    This is tricky. We want to fire on config or leadership changes, or when
    hadoop, sparkpeers, or zookeepers come and go. In the future this should
    fire when Cassandra or any other storage comes or goes. We always fire
    this method (or rather, when bigtop is ready and juju has elected a
    master). We then build a deployment-matrix and (re)install as things
    change.
    """
    spark_master_host = leadership.leader_get('master-fqdn')
    if not spark_master_host:
        hookenv.status_set('maintenance', 'juju leader not elected yet')
        return

    mode = hookenv.config()['spark_execution_mode']
    peers = None
    zks = None

    # If mode is standalone and ZK is ready, we are in HA. Do not consider
    # the master_host from juju leadership in our matrix. ZK handles this.
    if (mode == 'standalone' and is_state('zookeeper.ready')):
        spark_master_host = ''
        zk = RelationBase.from_state('zookeeper.ready')
        zks = zk.zookeepers()
        # peers are only used to set our MASTER_URL in standalone HA mode
        peers = get_spark_peers()

    deployment_matrix = {
        'spark_master': spark_master_host,
        'yarn_ready': is_state('hadoop.yarn.ready'),
        'hdfs_ready': is_state('hadoop.hdfs.ready'),
        'zookeepers': zks,
        'peers': peers,
    }

    # If neither config nor our matrix is changing, there is nothing to do.
    if not (is_state('config.changed') or
            data_changed('deployment_matrix', deployment_matrix)):
        return

    # (Re)install based on our execution mode
    hookenv.status_set('maintenance', 'configuring spark in {} mode'.format(mode))
    hookenv.log("Configuring spark with deployment matrix: {}".format(deployment_matrix))

    if mode.startswith('yarn') and is_state('hadoop.yarn.ready'):
        install_spark_yarn()
    elif mode.startswith('local') or mode == 'standalone':
        install_spark_standalone(zks, peers)
    else:
        # Something's wrong (probably requested yarn without yarn.ready).
        remove_state('spark.started')
        report_status()
        return

    # restart services to pick up possible config changes
    spark = Spark()
    spark.stop()
    spark.start()

    set_state('spark.started')
    report_status()