Ejemplo n.º 1
0
class AWSBase(CloudDeploymentBase):

    # default storage class for StorageCluster CRD on AWS platform
    DEFAULT_STORAGECLASS = "gp2"

    def __init__(self):
        """
        This would be base for both IPI and UPI deployment
        """
        super(AWSBase, self).__init__()
        self.aws = AWSUtil(self.region)
        # dict of cluster prefixes with special handling rules (for existence
        # check or during a cluster cleanup)
        self.cluster_prefixes_special_rules = CLUSTER_PREFIXES_SPECIAL_RULES

    def host_network_update(self):
        """
        Update security group rules for HostNetwork
        """
        cluster_id = get_infra_id(self.cluster_path)
        worker_pattern = f"{cluster_id}-worker*"
        worker_instances = self.aws.get_instances_by_name_pattern(
            worker_pattern)
        security_groups = worker_instances[0]["security_groups"]
        sg_id = security_groups[0]["GroupId"]
        security_group = self.aws.ec2_resource.SecurityGroup(sg_id)
        # The ports are not 100 % clear yet. Taken from doc:
        # https://docs.google.com/document/d/1c23ooTkW7cdbHNRbCTztprVU6leDqJxcvFZ1ZvK2qtU/edit#
        security_group.authorize_ingress(
            DryRun=False,
            IpPermissions=[
                {
                    "FromPort":
                    6800,
                    "ToPort":
                    7300,
                    "IpProtocol":
                    "tcp",
                    "UserIdGroupPairs": [
                        {
                            "Description": "Ceph OSDs",
                            "GroupId": sg_id,
                        },
                    ],
                },
                {
                    "FromPort":
                    3300,
                    "ToPort":
                    3300,
                    "IpProtocol":
                    "tcp",
                    "UserIdGroupPairs": [
                        {
                            "Description": "Ceph MONs rule1",
                            "GroupId": sg_id,
                        },
                    ],
                },
                {
                    "FromPort":
                    6789,
                    "ToPort":
                    6789,
                    "IpProtocol":
                    "tcp",
                    "UserIdGroupPairs": [
                        {
                            "Description": "Ceph MONs rule2",
                            "GroupId": sg_id,
                        },
                    ],
                },
                {
                    "FromPort":
                    8443,
                    "ToPort":
                    8443,
                    "IpProtocol":
                    "tcp",
                    "UserIdGroupPairs": [
                        {
                            "Description": "Ceph Dashboard rule1",
                            "GroupId": sg_id,
                        },
                    ],
                },
                {
                    "FromPort":
                    8080,
                    "ToPort":
                    8080,
                    "IpProtocol":
                    "tcp",
                    "UserIdGroupPairs": [
                        {
                            "Description": "Ceph Dashboard rule2",
                            "GroupId": sg_id,
                        },
                    ],
                },
            ],
        )

    def add_node(self):
        # TODO: Implement later
        super(AWSBase, self).add_node()

    def check_cluster_existence(self, cluster_name_prefix):
        """
        Check cluster existence according to cluster name prefix

        Returns:
            bool: True if a cluster with the same name prefix already exists,
                False otherwise

        """
        cluster_name_pattern = cluster_name_prefix + "*"
        instances = self.aws.get_instances_by_name_pattern(
            cluster_name_pattern)
        instance_objs = [
            self.aws.get_ec2_instance(ins.get("id")) for ins in instances
        ]
        non_terminated_instances = [
            ins for ins in instance_objs
            if ins.state.get("Code") != constants.INSTANCE_TERMINATED
        ]
        if non_terminated_instances:
            logger.error(
                f"Non terminated EC2 instances with the same name prefix were"
                f" found: {[ins.id for ins in non_terminated_instances]}")
            return True
        return False
Ejemplo n.º 2
0
def get_clusters(time_to_delete, region_name, prefixes_hours_to_spare):
    """
    Get all cluster names that their EC2 instances running time is greater
    than the specified time to delete

    Args:
        time_to_delete (int): The maximum time in seconds that is allowed
            for clusters to continue running
        region_name (str): The name of the AWS region to delete the resources from
        prefixes_hours_to_spare (dict): Dictionaries of the cluster prefixes to spare
            along with the maximum time in hours that is allowed for spared
            clusters to continue running

    Returns:
        tuple: List of the cluster names (e.g ebenahar-cluster-gqtd4) to be provided to the
            ci-cleanup script, a list of VPCs that are part of cloudformation,
            and a list of remaining clusters

    """
    def determine_cluster_deletion(ec2_instances, cluster_name):
        for instance in ec2_instances:
            allowed_running_time = time_to_delete
            do_not_delete = False
            if instance.state["Name"] == "running":
                for prefix, hours in prefixes_hours_to_spare.items():
                    # case insensitive 'startswith'
                    if bool(re.match(prefix, cluster_name, re.I)):
                        if hours == 'never':
                            do_not_delete = True
                        else:
                            allowed_running_time = int(hours) * 60 * 60
                        break
                if do_not_delete:
                    logger.info(
                        "%s marked as 'do not delete' and will not be "
                        "destroyed", cluster_name)
                    return False
                else:
                    launch_time = instance.launch_time
                    current_time = datetime.datetime.now(launch_time.tzinfo)
                    running_time = current_time - launch_time
                    logger.info(
                        f"Instance {[tag['Value'] for tag in instance.tags if tag['Key'] == 'Name'][0]} "
                        f"(id: {instance.id}) running time is {running_time} hours while the allowed"
                        f" running time for it is {allowed_running_time/3600} hours"
                    )
                    if running_time.total_seconds() > allowed_running_time:
                        return True
        return False

    aws = AWS(region_name=region_name)
    clusters_to_delete = list()
    remaining_clusters = list()
    cloudformation_vpc_names = list()
    vpcs = aws.ec2_client.describe_vpcs()['Vpcs']
    vpc_ids = [vpc['VpcId'] for vpc in vpcs]
    vpc_objs = [aws.ec2_resource.Vpc(vpc_id) for vpc_id in vpc_ids]
    for vpc_obj in vpc_objs:
        vpc_tags = vpc_obj.tags
        if vpc_tags:
            cloudformation_vpc_name = [
                tag['Value'] for tag in vpc_tags
                if tag['Key'] == defaults.AWS_CLOUDFORMATION_TAG
            ]
            if cloudformation_vpc_name:
                cloudformation_vpc_names.append(cloudformation_vpc_name[0])
                continue
            vpc_name = [
                tag['Value'] for tag in vpc_tags if tag['Key'] == 'Name'
            ][0]
            cluster_name = vpc_name.replace('-vpc', '')
            vpc_instances = vpc_obj.instances.all()
            if not vpc_instances:
                clusters_to_delete.append(cluster_name)
                continue

            # Append to clusters_to_delete if cluster should be deleted
            if determine_cluster_deletion(vpc_instances, cluster_name):
                clusters_to_delete.append(cluster_name)
            else:
                remaining_clusters.append(cluster_name)
        else:
            logger.info("No tags found for VPC")

    # Get all cloudformation based clusters to delete
    cf_clusters_to_delete = list()
    for vpc_name in cloudformation_vpc_names:
        instance_dicts = aws.get_instances_by_name_pattern(
            f"{vpc_name.replace('-vpc', '')}*")
        ec2_instances = [
            aws.get_ec2_instance(instance_dict['id'])
            for instance_dict in instance_dicts
        ]
        if not ec2_instances:
            continue
        cluster_io_tag = None
        for instance in ec2_instances:
            cluster_io_tag = [
                tag['Key'] for tag in instance.tags
                if 'kubernetes.io/cluster' in tag['Key']
            ]
            if cluster_io_tag:
                break
        if not cluster_io_tag:
            logger.warning(
                "Unable to find valid cluster IO tag from ec2 instance tags "
                "for VPC %s. This is probably not an OCS cluster VPC!",
                vpc_name)
            continue
        cluster_name = cluster_io_tag[0].replace('kubernetes.io/cluster/', '')
        if determine_cluster_deletion(ec2_instances, cluster_name):
            cf_clusters_to_delete.append(cluster_name)
        else:
            remaining_clusters.append(cluster_name)

    return clusters_to_delete, cf_clusters_to_delete, remaining_clusters
Ejemplo n.º 3
0
class AWSBase(Deployment):
    def __init__(self):
        """
        This would be base for both IPI and UPI deployment
        """
        super(AWSBase, self).__init__()
        self.region = config.ENV_DATA['region']
        self.aws = AWSUtil(self.region)
        if config.ENV_DATA.get('cluster_name'):
            self.cluster_name = config.ENV_DATA['cluster_name']
        else:
            self.cluster_name = get_cluster_name(self.cluster_path)

    def create_ebs_volumes(self, worker_pattern, size=100):
        """
        Add new ebs volumes to the workers

        Args:
            worker_pattern (str):  Worker name pattern e.g.:
                cluster-55jx2-worker*
            size (int): Size in GB (default: 100)
        """
        worker_instances = self.aws.get_instances_by_name_pattern(
            worker_pattern)
        with parallel() as p:
            for worker in worker_instances:
                logger.info(f"Creating and attaching {size} GB "
                            f"volume to {worker['name']}")
                p.spawn(
                    self.aws.create_volume_and_attach,
                    availability_zone=worker['avz'],
                    instance_id=worker['id'],
                    name=f"{worker['name']}_extra_volume",
                    size=size,
                )

    def add_volume(self, size=100):
        """
        Add a new volume to all the workers

        Args:
            size (int): Size of volume in GB (default: 100)
        """
        cluster_id = get_infra_id(self.cluster_path)
        worker_pattern = f'{cluster_id}-worker*'
        logger.info(f'Worker pattern: {worker_pattern}')
        self.create_ebs_volumes(worker_pattern, size)

    def host_network_update(self):
        """
        Update security group rules for HostNetwork
        """
        cluster_id = get_infra_id(self.cluster_path)
        worker_pattern = f'{cluster_id}-worker*'
        worker_instances = self.aws.get_instances_by_name_pattern(
            worker_pattern)
        security_groups = worker_instances[0]['security_groups']
        sg_id = security_groups[0]['GroupId']
        security_group = self.aws.ec2_resource.SecurityGroup(sg_id)
        # The ports are not 100 % clear yet. Taken from doc:
        # https://docs.google.com/document/d/1c23ooTkW7cdbHNRbCTztprVU6leDqJxcvFZ1ZvK2qtU/edit#
        security_group.authorize_ingress(
            DryRun=False,
            IpPermissions=[
                {
                    'FromPort':
                    6800,
                    'ToPort':
                    7300,
                    'IpProtocol':
                    'tcp',
                    'UserIdGroupPairs': [
                        {
                            'Description': 'Ceph OSDs',
                            'GroupId': sg_id,
                        },
                    ],
                },
                {
                    'FromPort':
                    3300,
                    'ToPort':
                    3300,
                    'IpProtocol':
                    'tcp',
                    'UserIdGroupPairs': [
                        {
                            'Description': 'Ceph MONs rule1',
                            'GroupId': sg_id,
                        },
                    ],
                },
                {
                    'FromPort':
                    6789,
                    'ToPort':
                    6789,
                    'IpProtocol':
                    'tcp',
                    'UserIdGroupPairs': [
                        {
                            'Description': 'Ceph MONs rule2',
                            'GroupId': sg_id,
                        },
                    ],
                },
                {
                    'FromPort':
                    8443,
                    'ToPort':
                    8443,
                    'IpProtocol':
                    'tcp',
                    'UserIdGroupPairs': [
                        {
                            'Description': 'Ceph Dashboard rule1',
                            'GroupId': sg_id,
                        },
                    ],
                },
                {
                    'FromPort':
                    8080,
                    'ToPort':
                    8080,
                    'IpProtocol':
                    'tcp',
                    'UserIdGroupPairs': [
                        {
                            'Description': 'Ceph Dashboard rule2',
                            'GroupId': sg_id,
                        },
                    ],
                },
            ])

    def add_node(self):
        # TODO: Implement later
        super(AWSBase, self).add_node()

    def check_cluster_existence(self, cluster_name_prefix):
        """
        Check cluster existence according to cluster name prefix

        Returns:
            bool: True if a cluster with the same name prefix already exists,
                False otherwise

        """
        instances = self.aws.get_instances_by_name_pattern(cluster_name_prefix)
        instance_objs = [
            self.aws.get_ec2_instance(ins.get('id')) for ins in instances
        ]
        non_terminated_instances = [
            ins for ins in instance_objs
            if ins.state.get('Code') != constants.INSTANCE_TERMINATED
        ]
        if non_terminated_instances:
            logger.error(
                f"Non terminated EC2 instances with the same name prefix were"
                f" found: {[ins.id for ins in non_terminated_instances]}")
            return True
        return False
Ejemplo n.º 4
0
class AWSBase(CloudDeploymentBase):

    # default storage class for StorageCluster CRD on AWS platform
    DEFAULT_STORAGECLASS = "gp2"

    def __init__(self):
        """
        This would be base for both IPI and UPI deployment
        """
        super(AWSBase, self).__init__()
        self.aws = AWSUtil(self.region)

    def host_network_update(self):
        """
        Update security group rules for HostNetwork
        """
        cluster_id = get_infra_id(self.cluster_path)
        worker_pattern = f'{cluster_id}-worker*'
        worker_instances = self.aws.get_instances_by_name_pattern(
            worker_pattern)
        security_groups = worker_instances[0]['security_groups']
        sg_id = security_groups[0]['GroupId']
        security_group = self.aws.ec2_resource.SecurityGroup(sg_id)
        # The ports are not 100 % clear yet. Taken from doc:
        # https://docs.google.com/document/d/1c23ooTkW7cdbHNRbCTztprVU6leDqJxcvFZ1ZvK2qtU/edit#
        security_group.authorize_ingress(
            DryRun=False,
            IpPermissions=[
                {
                    'FromPort':
                    6800,
                    'ToPort':
                    7300,
                    'IpProtocol':
                    'tcp',
                    'UserIdGroupPairs': [
                        {
                            'Description': 'Ceph OSDs',
                            'GroupId': sg_id,
                        },
                    ],
                },
                {
                    'FromPort':
                    3300,
                    'ToPort':
                    3300,
                    'IpProtocol':
                    'tcp',
                    'UserIdGroupPairs': [
                        {
                            'Description': 'Ceph MONs rule1',
                            'GroupId': sg_id,
                        },
                    ],
                },
                {
                    'FromPort':
                    6789,
                    'ToPort':
                    6789,
                    'IpProtocol':
                    'tcp',
                    'UserIdGroupPairs': [
                        {
                            'Description': 'Ceph MONs rule2',
                            'GroupId': sg_id,
                        },
                    ],
                },
                {
                    'FromPort':
                    8443,
                    'ToPort':
                    8443,
                    'IpProtocol':
                    'tcp',
                    'UserIdGroupPairs': [
                        {
                            'Description': 'Ceph Dashboard rule1',
                            'GroupId': sg_id,
                        },
                    ],
                },
                {
                    'FromPort':
                    8080,
                    'ToPort':
                    8080,
                    'IpProtocol':
                    'tcp',
                    'UserIdGroupPairs': [
                        {
                            'Description': 'Ceph Dashboard rule2',
                            'GroupId': sg_id,
                        },
                    ],
                },
            ])

    def add_node(self):
        # TODO: Implement later
        super(AWSBase, self).add_node()

    def check_cluster_existence(self, cluster_name_prefix):
        """
        Check cluster existence according to cluster name prefix

        Returns:
            bool: True if a cluster with the same name prefix already exists,
                False otherwise

        """
        cluster_name_pattern = cluster_name_prefix + "*"
        instances = self.aws.get_instances_by_name_pattern(
            cluster_name_pattern)
        instance_objs = [
            self.aws.get_ec2_instance(ins.get('id')) for ins in instances
        ]
        non_terminated_instances = [
            ins for ins in instance_objs
            if ins.state.get('Code') != constants.INSTANCE_TERMINATED
        ]
        if non_terminated_instances:
            logger.error(
                f"Non terminated EC2 instances with the same name prefix were"
                f" found: {[ins.id for ins in non_terminated_instances]}")
            return True
        return False