def _get_vpc_id_or_die(ec2, subnet_id): subnet = list( ec2.subnets.filter(Filters=[{ "Name": "subnet-id", "Values": [subnet_id] }])) # TODO: better error message cli_logger.doassert(len(subnet) == 1, "Subnet ID not found: {}", subnet_id) assert len(subnet) == 1, "Subnet ID not found: {}".format(subnet_id) subnet = subnet[0] return subnet.vpc_id
def _create_security_group(config, vpc_id, group_name): client = _client("ec2", config) client.create_security_group( Description="Auto-created security group for Ray workers", GroupName=group_name, VpcId=vpc_id) security_group = _get_security_group(config, vpc_id, group_name) cli_logger.verbose("Created new security group {}", cf.bold(security_group.group_name), _tags=dict(id=security_group.id)) cli_logger.old_info( logger, "_create_security_group: Created new security group {} ({})", security_group.group_name, security_group.id) cli_logger.doassert(security_group, "Failed to create security group") # err msg assert security_group, "Failed to create security group" return security_group
def _set_ssh_ip_if_required(self): if self.ssh_ip is not None: return # We assume that this never changes. # I think that's reasonable. deadline = time.time() + NODE_START_WAIT_S with LogTimer(self.log_prefix + "Got IP"): ip = self.wait_for_ip(deadline) cli_logger.doassert(ip is not None, "Could not get node IP.") # todo: msg assert ip is not None, "Unable to find IP of node" self.ssh_ip = ip # This should run before any SSH commands and therefore ensure that # the ControlPath directory exists, allowing SSH to maintain # persistent sessions later on. try: os.makedirs(self.ssh_control_path, mode=0o700, exist_ok=True) except OSError as e: cli_logger.warning("{}", str(e)) # todo: msg cli_logger.old_warning(logger, "{}", str(e))
def _configure_key_pair(config): if "ssh_private_key" in config["auth"]: _set_config_info(keypair_src="config") cli_logger.doassert( # todo: verify schema beforehand? "KeyName" in config["head_node"], "`KeyName` missing for head node.") # todo: err msg cli_logger.doassert( "KeyName" in config["worker_nodes"], "`KeyName` missing for worker nodes.") # todo: err msg assert "KeyName" in config["head_node"] assert "KeyName" in config["worker_nodes"] return config _set_config_info(keypair_src="default") ec2 = _resource("ec2", config) # Try a few times to get or create a good key pair. MAX_NUM_KEYS = 30 for i in range(MAX_NUM_KEYS): key_name = config["provider"].get("key_pair", {}).get("key_name") key_name, key_path = key_pair(i, config["provider"]["region"], key_name) key = _get_key(key_name, config) # Found a good key. if key and os.path.exists(key_path): break # We can safely create a new key. if not key and not os.path.exists(key_path): cli_logger.verbose( "Creating new key pair {} for use as the default.", cf.bold(key_name)) cli_logger.old_info( logger, "_configure_key_pair: " "Creating new key pair {}", key_name) key = ec2.create_key_pair(KeyName=key_name) # We need to make sure to _create_ the file with the right # permissions. In order to do that we need to change the default # os.open behavior to include the mode we want. with open(key_path, "w", opener=partial(os.open, mode=0o600)) as f: f.write(key.key_material) break if not key: cli_logger.abort( "No matching local key file for any of the key pairs in this " "account with ids from 0..{}. " "Consider deleting some unused keys pairs from your account.", key_name) # todo: err msg raise ValueError( "No matching local key file for any of the key pairs in this " "account with ids from 0..{}. ".format(key_name) + "Consider deleting some unused keys pairs from your account.") cli_logger.doassert(os.path.exists(key_path), "Private key file " + cf.bold("{}") + " not found for " + cf.bold("{}"), key_path, key_name) # todo: err msg assert os.path.exists(key_path), \ "Private key file {} not found for {}".format(key_path, key_name) cli_logger.old_info( logger, "_configure_key_pair: " "KeyName not specified for nodes, using {}", key_name) config["auth"]["ssh_private_key"] = key_path config["head_node"]["KeyName"] = key_name config["worker_nodes"]["KeyName"] = key_name return config
def _configure_iam_role(config): if "IamInstanceProfile" in config["head_node"]: _set_config_info(head_instance_profile_src="config") return config _set_config_info(head_instance_profile_src="default") profile = _get_instance_profile(DEFAULT_RAY_INSTANCE_PROFILE, config) if profile is None: cli_logger.verbose( "Creating new IAM instance profile {} for use as the default.", cf.bold(DEFAULT_RAY_INSTANCE_PROFILE)) cli_logger.old_info( logger, "_configure_iam_role: " "Creating new instance profile {}", DEFAULT_RAY_INSTANCE_PROFILE) client = _client("iam", config) client.create_instance_profile( InstanceProfileName=DEFAULT_RAY_INSTANCE_PROFILE) profile = _get_instance_profile(DEFAULT_RAY_INSTANCE_PROFILE, config) time.sleep(15) # wait for propagation cli_logger.doassert(profile is not None, "Failed to create instance profile.") # todo: err msg assert profile is not None, "Failed to create instance profile" if not profile.roles: role = _get_role(DEFAULT_RAY_IAM_ROLE, config) if role is None: cli_logger.verbose( "Creating new IAM role {} for " "use as the default instance role.", cf.bold(DEFAULT_RAY_IAM_ROLE)) cli_logger.old_info(logger, "_configure_iam_role: " "Creating new role {}", DEFAULT_RAY_IAM_ROLE) iam = _resource("iam", config) iam.create_role(RoleName=DEFAULT_RAY_IAM_ROLE, AssumeRolePolicyDocument=json.dumps({ "Statement": [ { "Effect": "Allow", "Principal": { "Service": "ec2.amazonaws.com" }, "Action": "sts:AssumeRole", }, ], })) role = _get_role(DEFAULT_RAY_IAM_ROLE, config) cli_logger.doassert(role is not None, "Failed to create role.") # todo: err msg assert role is not None, "Failed to create role" role.attach_policy( PolicyArn="arn:aws:iam::aws:policy/AmazonEC2FullAccess") role.attach_policy( PolicyArn="arn:aws:iam::aws:policy/AmazonS3FullAccess") profile.add_role(RoleName=role.name) time.sleep(15) # wait for propagation cli_logger.old_info( logger, "_configure_iam_role: " "Role not specified for head node, using {}", profile.arn) config["head_node"]["IamInstanceProfile"] = {"Arn": profile.arn} return config
def log_to_cli(config): provider_name = PROVIDER_PRETTY_NAMES.get("aws", None) cli_logger.doassert(provider_name is not None, "Could not find a pretty name for the AWS provider.") with cli_logger.group("{} config", provider_name): def same_everywhere(key): return config["head_node"][key] == config["worker_nodes"][key] def print_info(resource_string, key, head_src_key, workers_src_key, allowed_tags=["default"], list_value=False): head_tags = {} workers_tags = {} if _log_info[head_src_key] in allowed_tags: head_tags[_log_info[head_src_key]] = True if _log_info[workers_src_key] in allowed_tags: workers_tags[_log_info[workers_src_key]] = True head_value_str = config["head_node"][key] if list_value: head_value_str = cli_logger.render_list(head_value_str) if same_everywhere(key): cli_logger.labeled_value( # todo: handle plural vs singular? resource_string + " (head & workers)", "{}", head_value_str, _tags=head_tags) else: workers_value_str = config["worker_nodes"][key] if list_value: workers_value_str = cli_logger.render_list( workers_value_str) cli_logger.labeled_value(resource_string + " (head)", "{}", head_value_str, _tags=head_tags) cli_logger.labeled_value(resource_string + " (workers)", "{}", workers_value_str, _tags=workers_tags) tags = {"default": _log_info["head_instance_profile_src"] == "default"} cli_logger.labeled_value( "IAM Profile", "{}", _arn_to_name(config["head_node"]["IamInstanceProfile"]["Arn"]), _tags=tags) print_info("EC2 Key pair", "KeyName", "keypair_src", "keypair_src") print_info("VPC Subnets", "SubnetIds", "head_subnet_src", "workers_subnet_src", list_value=True) print_info("EC2 Security groups", "SecurityGroupIds", "head_security_group_src", "workers_security_group_src", list_value=True) print_info("EC2 AMI", "ImageId", "head_ami_src", "workers_ami_src", allowed_tags=["dlami"]) cli_logger.newline()
cli_logger.print("List: {}", cli_logger.render_list([1, 2, 3])) cli_logger.newline() cli_logger.very_verbose("Very verbose") cli_logger.verbose("Verbose") cli_logger.verbose_warning("Verbose warning") cli_logger.verbose_error("Verbose error") cli_logger.print("Info") cli_logger.success("Success") cli_logger.warning("Warning") cli_logger.error("Error") cli_logger.newline() try: cli_logger.abort("Abort") except Exception: pass try: cli_logger.doassert(False, "Assert") except Exception: pass cli_logger.newline() cli_logger.confirm(True, "example") cli_logger.newline() with cli_logger.indented(): cli_logger.print("Indented") with cli_logger.group("Group"): cli_logger.print("Group contents") with cli_logger.timed("Timed (unimplemented)"): cli_logger.print("Timed contents") with cli_logger.verbatim_error_ctx("Verbtaim error"): cli_logger.print("Error contents")