def _run_service(ctx, stack_name, service): if ctx.obj['test']: master_ip = '127.0.0.1' else: # @TODO: a cache mechanism would be better try: master_ip = get_master_ip(stack_name) except KeyError as e: print(click.style(str(e), fg='red')) ctx.abort() # @TODO: after we finalize the AMI, we don't need to switch to the # user's directory cmd = RemoteCommand( master_ip, 'sudo bash -c "/opt/HPCCSystems/sbin/hpcc-run.sh -a ' 'dafilesrv {}"'.format(service), identity_file=ctx.obj['identity'], ssh_user=ctx.obj['username'], ) cmd2 = RemoteCommand( master_ip, 'sudo bash -c "/opt/HPCCSystems/sbin/hpcc-run.sh -a ' 'hpcc-init {}"'.format(service), identity_file=ctx.obj['identity'], ssh_user=ctx.obj['username'], ) if ctx.obj['test']: print('not executing `{}`'.format(cmd.command_line)) print('not executing `{}`'.format(cmd2.command_line)) else: cmd.start() cmd2.start()
def main(argv=None): global NPS_BASE, REMOTE_MATCH_CMD, REMOTE_MATCH_DIR if argv is None: argv = sys.argv[1:] if (len(argv) < 2): print("syntax: match <time control> <games>", file=sys.stderr) exit(1) try: with open('machines.json', 'r') as machineFile: data = machineFile.read() except: print("machine file not found or could not be opened", file=sys.stderr) exit(1) # machine list machineList = [] try: machineList = json.loads(data) except: print("failed to parse machine file", file=sys.stderr) exit(1) games = argv[0] tc = argv[1] # start the matches for machine in machineList: try: host = machine['hostname'] nps = float(machine['nps']) cores = int(machine['cores']) except KeyError: print( "warning: expected hostname, nps and cores for machine, not found", file=sys.stderr) continue # Parse and scale time control factor = float(NPS_BASE) / nps new_tc = scaleTC(tc, factor) # execute the remote or local match script try: cmd = REMOTE_MATCH_CMD + ' ' + str( games) + ' ' + new_tc + ' ' + str(cores) print("starting : host=%s tc=%s" % (host, new_tc)) cmd = RemoteCommand(host, cmd, capture=True, directory=REMOTE_MATCH_DIR) cmd.start() except: print("error starting command " + cmd, file=sys.stderr) traceback.print_tb(tb, limit=None, file=None) return 2
def ip(ctx): """This return the internal ip address of the hosts. Here we assume the internal ip is bound to eth0. """ for host in ctx.obj['host_list']: cmd = RemoteCommand(host, "hostname -I | awk '{print $1}'", ignore_known_hosts=True, capture=True) cmd.start() click.echo(cmd.output)
def get_workload_distribution(node): grep_cmd = "grep '{}' /var/log/HPCCSystems/myroxie/roxie.log | wc -l" index_cmd = RemoteCommand( node.get_ip(), grep_cmd.format("=> kind=23CRoxieIndexReadActivity"), capture=True, silent=True) fetch_cmd = RemoteCommand(node.get_ip(), grep_cmd.format("=> kind=19CRoxieFetchActivity"), capture=True, silent=True) index_attempt_cmd = RemoteCommand( node.get_ip(), grep_cmd.format("-> kind=23CRoxieIndexReadActivity"), capture=True, silent=True) fetch_attempt_cmd = RemoteCommand( node.get_ip(), grep_cmd.format("-> kind=19CRoxieFetchActivity"), capture=True, silent=True) index_cmd.start() fetch_cmd.start() index_attempt_cmd.start() fetch_attempt_cmd.start() return { "IndexReadActivity": index_cmd.output, "FetchActivity": fetch_cmd.output, "IndexReadActivityAttempt": index_attempt_cmd.output, "FetchActivityAttempt": fetch_attempt_cmd.output, }
def get_metrics(node): cmd = RemoteCommand( node.get_ip(), "sudo /opt/HPCCSystems/bin/testsocket {} '<control:metrics/>'".format( node.get_ip()), capture=True, silent=True) cmd.start() output_xml = etree.fromstring(cmd.output) metrics = {} for metric in output_xml.xpath("//Control/Endpoint/Metrics/Metric[@name]"): metrics[metric.get('name')] = metric.get('value') return metrics
def send_r_cmd(host, settings, r_cmd): rport = settings['port'] id_file = settings['id_file'] ruser = settings['user'] cmd = RemoteCommand(host, r_cmd, capture=True, ssh_user=ruser, port=rport, batch_mode=False, identity_file=id_file, async=True, silent=REMOTE_SILENT) cmd.start() return cmd
def _wait_until_complete(master_ip, identity, username): while True: cmd = RemoteCommand(master_ip, "pgrep -f checkpoint.py", identity_file=identity, ssh_user=username, capture=True, check=False) cmd.start() pid_list = cmd.output # print(pid_list, len(pid_list.splitlines())) if len(pid_list.splitlines()) > 0: print("Data service still processing") time.sleep(5) else: break
def _wait_until_complete(master_ip, conf): while True: cmd = RemoteCommand( master_ip, "pgrep -f checkpoint.py", identity_file=conf.get(HaasConfigurationKey.HAAS_SSH_KEY), ssh_user=conf.get(HaasConfigurationKey.HAAS_SSH_USER), capture=True, check=False) cmd.start() pid_list = cmd.output # print(pid_list, len(pid_list.splitlines())) if len(pid_list.splitlines()) > 0: print("Data service still processing") time.sleep(5) else: break
def check_remote(remote, data, timeout=60, verbose=False): if verbose: print("Checking {} ({}s)...".format(remote, timeout), end=' ', flush=True) cmd = RemoteCommand('{}@{}'.format(data['User'], data['Hostname']), '/bin/true', connect_timeout=timeout, port=data['Port']) try: cmd.start() cmd.wait() except RemoteConnectFailed: print("Failed") return False print("Sucess") return True
def progress(ctx, stack_name): topology = ClusterTopology.parse(stack_name) conf = HaasConfigurationManager().get(ctx.obj['config']) cmd = RemoteCommand( topology.get_master_ip(), 'source ~/haas/scripts/init.sh; python /home/osr/haas/scripts/checkpoint.py --name {} available; echo $?', identity_file=conf.get(HaasConfigurationKey.HAAS_SSH_KEY), ssh_user=conf.get(HaasConfigurationKey.HAAS_SSH_USER), capture=True) cmd.start() if cmd.output == '0': print("No service is running") else: print('Data service is running....') RemoteCommand(topology.get_master_ip(), "tail -f /tmp/haas_data.out", identity_file=conf.get( HaasConfigurationKey.HAAS_SSH_KEY), ssh_user=conf.get(HaasConfigurationKey.HAAS_SSH_USER), check=False).start()
def progress(ctx, stack_name): '''Check progress of checkpointing operation''' master_ip = get_master_ip(stack_name) cmd = RemoteCommand(master_ip, 'python3 /opt/haas/checkpoint.py ' '--name {} available; echo $?', identity_file=ctx.obj['identity'], ssh_user=ctx.obj['username'], capture=True) if ctx.obj['test']: print('not executing `{}`'.format(cmd.command)) else: cmd.start() if cmd.output == '0': print("No service is running") else: print('Data service is running....') RemoteCommand(master_ip, "tail -f /tmp/haas_data.out", identity_file=ctx.obj['identity'], ssh_user=ctx.obj['username'], check=False).start()