def login_aws(): aws_public, aws_secret = get_aws_keys() ecr_client = boto3.client('ecr', region_name='us-east-1', aws_access_key_id=aws_public, aws_secret_access_key=aws_secret) token = ecr_client.get_authorization_token() # TODO: use registryIds username, password = base64.b64decode( token['authorizationData'][0]['authorizationToken']).decode().split( ':') return username, password
def cleanup_aws(docker_repo): aws_public, aws_secret = get_aws_keys() ecr_client = boto3.client('ecr', region_name='us-east-1', aws_access_key_id=aws_public, aws_secret_access_key=aws_secret) docker_repo_name = docker_repo.split("/")[-1] resp = ecr_client.list_images(repositoryName=docker_repo_name, filter={'tagStatus': 'UNTAGGED'}) imageIds = resp['imageIds'] if len(imageIds) == 0: return [] resp = ecr_client.batch_delete_image(repositoryName=docker_repo_name, imageIds=imageIds) return resp['imageIds']
def doctor(): """ Checks and repairs your environment in case of errors. Attempts to provide information to debug your local machine. """ # Check environment pre-reqs click.secho("Running the environment setup script for your OS...") env_setup_cmd = None env_setup_status = -1 env_setup_err = '' if sys.platform == "linux" or sys.platform == "linux2": env_setup_cmd = 'sudo apt update && sudo apt install -y libcurl4 curl && ' \ 'sudo curl https://raw.githubusercontent.com/numerai/numerai-cli/master/scripts/setup-ubu.sh ' \ '| sudo bash' elif sys.platform == "darwin": env_setup_cmd = 'curl https://raw.githubusercontent.com/numerai/numerai-cli/master/scripts/setup-mac.sh | bash' elif is_win10(): env_setup_cmd = 'powershell -command "$Script = Invoke-WebRequest ' \ '\'https://raw.githubusercontent.com/numerai/numerai-cli/master/scripts/setup-win10.ps1\'; ' \ '$ScriptBlock = [ScriptBlock]::Create($Script.Content); Invoke-Command -ScriptBlock $ScriptBlock"' elif is_win8(): # TODO: check if more is needed? env_setup_cmd = 'docker info' else: env_setup_status = 1 env_setup_err = f"Unrecognized Operating System {sys.platform}, " \ f"cannot run environment setup script, skipping..." if env_setup_cmd is not None: res = subprocess.run(env_setup_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) env_setup_status = res.returncode env_setup_err = res.stderr # Check official (non-dev) version click.secho(f"Checking your numerai-cli version...") res = str( subprocess.run('pip3 show numerai-cli', stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)) curr_ver = [s for s in res.split('\\n') if 'Version:' in s][0].split(': ')[1] url = f"https://pypi.org/pypi/numerai-cli/json" versions = list( reversed( sorted( filter(lambda key: 'dev' not in key, json.load(request.urlopen(url))["releases"].keys())))) # Check keys click.secho("Checking your API keys...") nodes_config = load_or_init_nodes() used_providers = [nodes_config[n]['provider'] for n in nodes_config] invalid_providers = [] try: check_numerai_validity(*get_numerai_keys()) except: invalid_providers.append('numerai') if 'aws' in used_providers: try: check_aws_validity(*get_aws_keys()) except: invalid_providers.append('aws') if env_setup_status != 0: click.secho(f"✖ Environment setup incomplete:", fg='red') click.secho(env_setup_err, fg='red') click.secho( f"Ensure your OS is supported and read the Troubleshooting wiki: " f"https://github.com/numerai/numerai-cli/wiki/Troubleshooting", fg='red') else: click.secho("✓ Environment setup with Docker and Python", fg='green') if curr_ver < versions[0]: click.secho( f"✖ numerai-cli needs an upgrade" f"(run `pip3 install -U numerai-cli` to fix)", fg='red') else: click.secho("✓ numerai-cli is up to date", fg='green') if len(invalid_providers): click.secho( f"✖ Invalid provider keys: {invalid_providers}" f"(run `numerai setup` to fix)", fg='red') else: click.secho("✓ API Keys working", fg='green') click.secho( "\nIf you need help troubleshooting or want to report a bug please read the" "\nTroubleshooting and Feedback section of the readme:" "\nhttps://github.com/numerai/numerai-cli#troubleshooting-and-feedback", fg='yellow')
def monitor_aws(node, config, num_lines, log_type, follow_tail, verbose): aws_public, aws_secret = get_aws_keys() logs_client = boto3.client('logs', region_name='us-east-1', aws_access_key_id=aws_public, aws_secret_access_key=aws_secret) ecs_client = boto3.client('ecs', region_name='us-east-1', aws_access_key_id=aws_public, aws_secret_access_key=aws_secret) if log_type == LOG_TYPE_WEBHOOK: get_name_and_print_logs(logs_client, config['api_log_group'], num_lines) get_name_and_print_logs(logs_client, config['webhook_log_group'], num_lines) return if log_type == LOG_TYPE_CLUSTER: family = config['cluster_log_group'] # wait until log stream has been created i = 0 name = None while name is None: i += 1 task = get_recent_task_status_aws(ecs_client, node, verbose) if task is None: get_name_and_print_logs(logs_client, family, num_lines) return task_id = task["taskArn"].split('/')[-1] streams = logs_client.describe_log_streams( logGroupName=family, logStreamNamePrefix=f"ecs/{node}/{task_id}") streams = list( filter(lambda s: s['logStreamName'].endswith(task_id), streams['logStreams'])) msg = f"Task status: {task['lastStatus']}." if task['lastStatus'] == "STOPPED": if len(streams) == 0: click.secho(f"{msg} No log file, did you deploy?", fg='yellow') exit(1) else: click.secho(f"{msg} Checking for log events...", fg='green') break elif len(streams) == 0: click.secho( f"{msg} Waiting for log file to be created..." f"{'.' * i}\r", fg='yellow', nl=False) time.sleep(2) else: name = streams[0]['logStreamName'] click.secho(f"\n{msg} Log file created: {name}", fg='green') break # print out the logs next_token, num_events = print_logs(logs_client, family, name, limit=num_lines) total_events = num_events while follow_tail: next_token, num_events = print_logs( logs_client, family, name, next_token=(next_token if total_events > 0 else None)) total_events += num_events if total_events == 0: click.secho(f"Waiting for log events...\r", fg='yellow', nl=False) task = get_recent_task_status_aws(ecs_client, node, verbose) if task['lastStatus'] == "STOPPED": click.secho(f"\nTask is stopping...", fg='yellow') if len(task['containers'] ) and 'reason' in task['containers'][0]: container = task['containers'][0] click.secho( f"Container Exit code: {container['exitCode']}\n" f"Reason: {container['reason']}", fg='red') break start = time.time() if total_events == 0: while total_events == 0: click.secho(f"No log events yet, still waiting...\r", fg='yellow', nl=False) next_token, num_events = print_logs(logs_client, family, name) total_events += num_events if (time.time() - start) > 60 * 5: click.secho( f"\nTimeout after 5 minutes, please run the `numerai node status`" f"command for this model or visit the log console:\n" f"https://console.aws.amazon.com/cloudwatch/home?" f"region=us-east-1#logsV2:log-groups/log-group/$252Ffargate$252Fservice$252F{node}" f"/log-events/{name.replace('/', '$252F')}", fg='red') break return