Exemple #1
0
def _run_service(ctx, stack_name, service):
    if ctx.obj['test']:
        master_ip = '127.0.0.1'
    else:
        # @TODO: a cache mechanism would be better
        try:
            master_ip = get_master_ip(stack_name)
        except KeyError as e:
            print(click.style(str(e), fg='red'))
            ctx.abort()

    # @TODO: after we finalize the AMI, we don't need to switch to the
    # user's directory
    cmd = RemoteCommand(
        master_ip,
        'sudo bash -c "/opt/HPCCSystems/sbin/hpcc-run.sh -a '
        'dafilesrv {}"'.format(service),
        identity_file=ctx.obj['identity'],
        ssh_user=ctx.obj['username'],
    )
    cmd2 = RemoteCommand(
        master_ip,
        'sudo bash -c "/opt/HPCCSystems/sbin/hpcc-run.sh -a '
        'hpcc-init {}"'.format(service),
        identity_file=ctx.obj['identity'],
        ssh_user=ctx.obj['username'],
    )
    if ctx.obj['test']:
        print('not executing `{}`'.format(cmd.command_line))
        print('not executing `{}`'.format(cmd2.command_line))
    else:
        cmd.start()
        cmd2.start()
Exemple #2
0
def main(argv=None):

    global NPS_BASE, REMOTE_MATCH_CMD, REMOTE_MATCH_DIR

    if argv is None:
        argv = sys.argv[1:]

    if (len(argv) < 2):
        print("syntax: match <time control> <games>", file=sys.stderr)
        exit(1)

    try:
        with open('machines.json', 'r') as machineFile:
            data = machineFile.read()
    except:
        print("machine file not found or could not be opened", file=sys.stderr)
        exit(1)

    # machine list
    machineList = []

    try:
        machineList = json.loads(data)
    except:
        print("failed to parse machine file", file=sys.stderr)
        exit(1)

    games = argv[0]
    tc = argv[1]

    # start the matches
    for machine in machineList:
        try:
            host = machine['hostname']
            nps = float(machine['nps'])
            cores = int(machine['cores'])
        except KeyError:
            print(
                "warning: expected hostname, nps and cores for machine, not found",
                file=sys.stderr)
            continue
        # Parse and scale time control
        factor = float(NPS_BASE) / nps
        new_tc = scaleTC(tc, factor)
        # execute the remote or local match script
        try:
            cmd = REMOTE_MATCH_CMD + ' ' + str(
                games) + ' ' + new_tc + ' ' + str(cores)
            print("starting : host=%s tc=%s" % (host, new_tc))
            cmd = RemoteCommand(host,
                                cmd,
                                capture=True,
                                directory=REMOTE_MATCH_DIR)
            cmd.start()
        except:
            print("error starting command " + cmd, file=sys.stderr)
            traceback.print_tb(tb, limit=None, file=None)
            return 2
Exemple #3
0
def ip(ctx):
    """This return the internal ip address of the hosts.

    Here we assume the internal ip is bound to eth0.
    """
    for host in ctx.obj['host_list']:
        cmd = RemoteCommand(host,
                            "hostname -I | awk '{print $1}'",
                            ignore_known_hosts=True,
                            capture=True)
        cmd.start()
        click.echo(cmd.output)
Exemple #4
0
def get_workload_distribution(node):
    grep_cmd = "grep '{}' /var/log/HPCCSystems/myroxie/roxie.log | wc -l"
    index_cmd = RemoteCommand(
        node.get_ip(),
        grep_cmd.format("=> kind=23CRoxieIndexReadActivity"),
        capture=True,
        silent=True)
    fetch_cmd = RemoteCommand(node.get_ip(),
                              grep_cmd.format("=> kind=19CRoxieFetchActivity"),
                              capture=True,
                              silent=True)
    index_attempt_cmd = RemoteCommand(
        node.get_ip(),
        grep_cmd.format("-> kind=23CRoxieIndexReadActivity"),
        capture=True,
        silent=True)
    fetch_attempt_cmd = RemoteCommand(
        node.get_ip(),
        grep_cmd.format("-> kind=19CRoxieFetchActivity"),
        capture=True,
        silent=True)

    index_cmd.start()
    fetch_cmd.start()
    index_attempt_cmd.start()
    fetch_attempt_cmd.start()

    return {
        "IndexReadActivity": index_cmd.output,
        "FetchActivity": fetch_cmd.output,
        "IndexReadActivityAttempt": index_attempt_cmd.output,
        "FetchActivityAttempt": fetch_attempt_cmd.output,
    }
Exemple #5
0
def get_metrics(node):
    cmd = RemoteCommand(
        node.get_ip(),
        "sudo /opt/HPCCSystems/bin/testsocket {} '<control:metrics/>'".format(
            node.get_ip()),
        capture=True,
        silent=True)
    cmd.start()

    output_xml = etree.fromstring(cmd.output)
    metrics = {}
    for metric in output_xml.xpath("//Control/Endpoint/Metrics/Metric[@name]"):
        metrics[metric.get('name')] = metric.get('value')
    return metrics
Exemple #6
0
def send_r_cmd(host, settings, r_cmd):
    rport = settings['port']
    id_file = settings['id_file']
    ruser = settings['user']
    cmd = RemoteCommand(host,
                        r_cmd,
                        capture=True,
                        ssh_user=ruser,
                        port=rport,
                        batch_mode=False,
                        identity_file=id_file,
                        async=True,
                        silent=REMOTE_SILENT)
    cmd.start()
    return cmd
Exemple #7
0
def _wait_until_complete(master_ip, identity, username):
    while True:
        cmd = RemoteCommand(master_ip,
                            "pgrep -f checkpoint.py",
                            identity_file=identity,
                            ssh_user=username,
                            capture=True,
                            check=False)
        cmd.start()
        pid_list = cmd.output
        # print(pid_list, len(pid_list.splitlines()))
        if len(pid_list.splitlines()) > 0:
            print("Data service still processing")
            time.sleep(5)
        else:
            break
Exemple #8
0
def _wait_until_complete(master_ip, conf):
    while True:
        cmd = RemoteCommand(
            master_ip,
            "pgrep -f checkpoint.py",
            identity_file=conf.get(HaasConfigurationKey.HAAS_SSH_KEY),
            ssh_user=conf.get(HaasConfigurationKey.HAAS_SSH_USER),
            capture=True,
            check=False)
        cmd.start()
        pid_list = cmd.output
        # print(pid_list, len(pid_list.splitlines()))
        if len(pid_list.splitlines()) > 0:
            print("Data service still processing")
            time.sleep(5)
        else:
            break
def check_remote(remote, data, timeout=60, verbose=False):
    if verbose:
        print("Checking {} ({}s)...".format(remote, timeout),
              end=' ',
              flush=True)
    cmd = RemoteCommand('{}@{}'.format(data['User'], data['Hostname']),
                        '/bin/true',
                        connect_timeout=timeout,
                        port=data['Port'])
    try:
        cmd.start()
        cmd.wait()
    except RemoteConnectFailed:
        print("Failed")
        return False
    print("Sucess")
    return True
Exemple #10
0
def progress(ctx, stack_name):
    topology = ClusterTopology.parse(stack_name)

    conf = HaasConfigurationManager().get(ctx.obj['config'])
    cmd = RemoteCommand(
        topology.get_master_ip(),
        'source ~/haas/scripts/init.sh; python /home/osr/haas/scripts/checkpoint.py --name {} available; echo $?',
        identity_file=conf.get(HaasConfigurationKey.HAAS_SSH_KEY),
        ssh_user=conf.get(HaasConfigurationKey.HAAS_SSH_USER),
        capture=True)
    cmd.start()
    if cmd.output == '0':
        print("No service is running")
    else:
        print('Data service is running....')
        RemoteCommand(topology.get_master_ip(),
                      "tail -f /tmp/haas_data.out",
                      identity_file=conf.get(
                          HaasConfigurationKey.HAAS_SSH_KEY),
                      ssh_user=conf.get(HaasConfigurationKey.HAAS_SSH_USER),
                      check=False).start()
Exemple #11
0
def progress(ctx, stack_name):
    '''Check progress of checkpointing operation'''
    master_ip = get_master_ip(stack_name)

    cmd = RemoteCommand(master_ip, 'python3 /opt/haas/checkpoint.py '
                        '--name {} available; echo $?',
                        identity_file=ctx.obj['identity'],
                        ssh_user=ctx.obj['username'],
                        capture=True)
    if ctx.obj['test']:
        print('not executing `{}`'.format(cmd.command))
    else:
        cmd.start()
        if cmd.output == '0':
            print("No service is running")
        else:
            print('Data service is running....')
            RemoteCommand(master_ip,
                          "tail -f /tmp/haas_data.out",
                          identity_file=ctx.obj['identity'],
                          ssh_user=ctx.obj['username'],
                          check=False).start()