Esempio n. 1
0
    def create_rhel_instance(self):
        """
        This function does the following:
        1. Create RHEL worker instances, copy required AWS tags from existing
        2. worker instances to new RHEL instances
        3. Copy  IAM role from existing worker to new RHEL workers

        """
        cluster_id = get_infra_id(self.cluster_path)
        num_workers = int(os.environ.get("num_workers", 3))
        logging.info(f"Creating {num_workers} RHEL workers")
        for i in range(num_workers):
            self.gather_worker_data(f"no{i}")
            logging.info(f"Creating {i + 1}/{num_workers} worker")
            response = self.client.run_instances(
                BlockDeviceMappings=[
                    {
                        "DeviceName": config.ENV_DATA["root_disk"],
                        "Ebs": {
                            "DeleteOnTermination": True,
                            "VolumeSize": config.ENV_DATA["root_disk_size"],
                            "VolumeType": "gp2",
                        },
                    },
                ],
                ImageId=config.ENV_DATA["rhel_worker_ami"],
                SubnetId=self.worker_subnet,
                InstanceType=config.ENV_DATA["rhel_worker_instance_type"],
                MaxCount=1,
                MinCount=1,
                Monitoring={"Enabled": False},
                SecurityGroupIds=[
                    self.worker_security_group[0]["GroupId"],
                ],
                KeyName="openshift-dev",
            )
            inst_id = response["Instances"][0]["InstanceId"]
            worker_ec2 = boto3.resource("ec2", region_name=self.region)
            worker_instance = worker_ec2.Instance(inst_id)
            worker_instance.wait_until_running()
            worker_name = f"{cluster_id}-rhel-worker-{i}"
            self.rhel_worker_list[worker_name] = worker_instance
            worker_ec2.create_tags(
                Resources=[inst_id],
                Tags=[
                    {
                        "Key": "Name",
                        "Value": f"{worker_name}"
                    },
                    {
                        "Key": self.worker_tag[0],
                        "Value": self.worker_tag[1]
                    },
                ],
            )
            logging.info(self.worker_iam_role)
            self.client.associate_iam_instance_profile(
                IamInstanceProfile=self.worker_iam_role,
                InstanceId=inst_id,
            )
Esempio n. 2
0
    def _prepare_upi_rhel_node(self, node_conf):
        """
        Handle RHEL worker instance creation
        1. Create RHEL worker instance , copy required AWS tags from existing
        worker instances to new RHEL instance
        2. Copy IAM role from existing worker to new RHEL worker

        """
        cluster_id = get_infra_id(self.cluster_path)
        node_id = node_conf['node_id']
        if not node_conf.get('zone'):
            num_zone = get_az_count()
            zone = random.randint(0, num_zone)
        else:
            zone = node_conf.get('zone')
        logger.info("Creating RHEL worker node")
        self.gather_worker_data(f'no{zone}')
        response = self.client.run_instances(
            BlockDeviceMappings=[
                {
                    'DeviceName': node_conf['root_disk'],
                    'Ebs': {
                        'DeleteOnTermination': True,
                        'VolumeSize': node_conf['root_disk_size'],
                        'VolumeType': 'gp2'
                    },
                },
            ],
            ImageId=node_conf['rhel_worker_ami'],
            SubnetId=self.worker_subnet,
            InstanceType=node_conf['rhel_worker_instance_type'],
            MaxCount=1,
            MinCount=1,
            Monitoring={
                'Enabled': False
            },
            SecurityGroupIds=[
                self.worker_security_group[0]['GroupId'],
            ],
            KeyName='openshift-dev'
        )
        inst_id = response['Instances'][0]['InstanceId']
        worker_ec2 = boto3.resource('ec2', region_name=self.region)
        worker_instance = worker_ec2.Instance(inst_id)
        worker_instance.wait_until_running()
        worker_name = f'{cluster_id}-rhel-worker-{node_id}'
        worker_ec2.create_tags(
            Resources=[inst_id],
            Tags=[
                {'Key': 'Name', 'Value': f'{worker_name}'},
                {'Key': self.worker_tag[0], 'Value': self.worker_tag[1]}
            ]
        )
        logging.info(self.worker_iam_role)
        self.client.associate_iam_instance_profile(
            IamInstanceProfile=self.worker_iam_role,
            InstanceId=inst_id,
        )
        return worker_instance
Esempio n. 3
0
    def create_rhel_instance(self):
        """
        This function does the following:
        1. Create RHEL worker instances, copy required AWS tags from existing
        2. worker instances to new RHEL instances
        3. Copy  IAM role from existing worker to new RHEL workers

        """
        cluster_id = get_infra_id(self.cluster_path)
        num_workers = int(os.environ.get('num_workers', 3))
        logging.info(f"Creating {num_workers} RHEL workers")
        for i in range(num_workers):
            self.gather_worker_data(f'no{i}')
            logging.info(f"Creating {i + 1}/{num_workers} worker")
            response = self.client.run_instances(
                BlockDeviceMappings=[
                    {
                        'DeviceName': config.ENV_DATA['root_disk'],
                        'Ebs': {
                            'DeleteOnTermination': True,
                            'VolumeSize': config.ENV_DATA['root_disk_size'],
                            'VolumeType': 'gp2'
                        },
                    },
                ],
                ImageId=config.ENV_DATA['rhel_worker_ami'],
                SubnetId=self.worker_subnet,
                InstanceType=config.ENV_DATA['rhel_worker_instance_type'],
                MaxCount=1,
                MinCount=1,
                Monitoring={
                    'Enabled': False
                },
                SecurityGroupIds=[
                    self.worker_security_group[0]['GroupId'],
                ],
                KeyName='openshift-dev'

            )
            inst_id = response['Instances'][0]['InstanceId']
            worker_ec2 = boto3.resource('ec2', region_name=self.region)
            worker_instance = worker_ec2.Instance(inst_id)
            worker_instance.wait_until_running()
            worker_name = f'{cluster_id}-rhel-worker-{i}'
            self.rhel_worker_list[worker_name] = worker_instance
            worker_ec2.create_tags(
                Resources=[inst_id],
                Tags=[
                    {'Key': 'Name', 'Value': f'{worker_name}'},
                    {'Key': self.worker_tag[0], 'Value': self.worker_tag[1]}
                ]
            )
            logging.info(self.worker_iam_role)
            self.client.associate_iam_instance_profile(
                IamInstanceProfile=self.worker_iam_role,
                InstanceId=inst_id,
            )
Esempio n. 4
0
    def add_volume(self, size=100):
        """
        Add a new volume to all the workers

        Args:
            size (int): Size of volume in GB (default: 100)
        """
        cluster_id = get_infra_id(self.cluster_path)
        worker_pattern = f'{cluster_id}-worker*'
        logger.info(f'Worker pattern: {worker_pattern}')
        self.create_ebs_volumes(worker_pattern, size)
Esempio n. 5
0
def get_node_data_aws():
    """
    Retrieve bootstrap public IP and master node private IPs running in aws

    Raises:
        NodeNotFoundError: If we are unable to find the bootstrap node or IP

    Returns:
        dict: bootstrap and master node IP data

    """
    session = boto3.Session()
    credentials = session.get_credentials().get_frozen_credentials()
    ec2_driver = get_driver(Provider.EC2)
    driver = ec2_driver(
        credentials.access_key, credentials.secret_key,
        region=config.ENV_DATA['region']
    )
    cluster_path = config.ENV_DATA['cluster_path']
    infra_id = get_infra_id(cluster_path)
    bootstrap_name = f"{infra_id}-bootstrap"
    master_pattern = f"{infra_id}-master"
    data = dict()
    try:
        bootstrap_node = [
            node for node in driver.list_nodes()
            if bootstrap_name == node.name
        ][0]
        bootstrap_ip = bootstrap_node.public_ips[0]
        logger.info(
            "Found bootstrap node %s with IP %s", bootstrap_name, bootstrap_ip
        )
        data['bootstrap_ip'] = bootstrap_ip

    except IndexError:
        raise NodeNotFoundError(
            f"Unable to find bootstrap node with name {bootstrap_name}"
        )
    master_nodes = [
        node for node in driver.list_nodes()
        if master_pattern in node.name
    ]
    master_ips = [master.private_ips[0] for master in master_nodes]
    data['master_ips'] = [ip for ip in master_ips if ip is not None]
    if len(data['master_ips']) < config.ENV_DATA['master_replicas']:
        logger.warning('IP data was not found for all master nodes')
    logger.debug(data)
    return data
Esempio n. 6
0
    def get_rhel_worker_instances(self):
        """
        Get list of rhel worker instance IDs

        Returns:
            list: list of instance IDs of rhel workers

        """
        rhel_workers = []
        worker_pattern = get_infra_id(self.cluster_path) + "*rhel-worker*"
        worker_filter = [{'Name': 'tag:Name', 'Values': [worker_pattern]}]

        response = self.client.describe_instances(Filters=worker_filter)
        for worker in response['Reservations']:
            rhel_workers.append(worker['Instances'][0]['InstanceId'])
        return rhel_workers
Esempio n. 7
0
 def deploy(self, log_level=""):
     self.flexy_instance.deploy(log_level)
     self.test_cluster()
     # add disks to instances
     # Get all instances and for each instance add
     # one disk
     pattern = "-".join(
         [get_infra_id(config.ENV_DATA["cluster_path"]), "compute"])
     for instance in self.utils.get_instances_with_pattern(pattern):
         vol = self.utils.create_volume(
             name=f"{pattern}-disk0-{instance.name[-1]}",
             size=config.FLEXY["volume_size"],
         )
         # wait till volume is available
         sample = TimeoutSampler(300, 10,
                                 self.utils.check_expected_vol_status,
                                 vol, "available")
         if not sample.wait_for_func_status(True):
             logger.info("Volume failed to reach 'available'")
             raise exceptions.PSIVolumeNotInExpectedState
         # attach the volume
         self.utils.attach_volume(vol, instance.id)
Esempio n. 8
0
def get_rhel_worker_instances(cluster_path):
    """
    Get list of rhel worker instance IDs

    Args:
        cluster_path (str): The cluster path

    Returns:
        list: list of instance IDs of rhel workers

    """
    aws = AWS()
    rhel_workers = []
    worker_pattern = get_infra_id(cluster_path) + "*rhel-worker*"
    worker_filter = [{'Name': 'tag:Name', 'Values': [worker_pattern]}]

    response = aws.ec2_client.describe_instances(Filters=worker_filter)
    if not response['Reservations']:
        return
    for worker in response['Reservations']:
        rhel_workers.append(worker['Instances'][0]['InstanceId'])
    return rhel_workers
Esempio n. 9
0
    def generate_cluster_info(self):
        """
        Generates the cluster information file
        """
        logger.info("Generating cluster information file")

        # get kubeconfig and upload to httpd server
        kubeconfig = os.path.join(self.cluster_path,
                                  config.RUN.get('kubeconfig_location'))
        remote_path = os.path.join(config.ENV_DATA.get('path_to_upload'),
                                   f"{config.RUN.get('run_id')}_kubeconfig")
        upload_file(config.ENV_DATA.get('httpd_server'), kubeconfig,
                    remote_path, config.ENV_DATA.get('httpd_server_user'),
                    config.ENV_DATA.get('httpd_server_password'))

        #  Form the kubeconfig url path
        kubeconfig_url_path = os.path.join(
            'http://', config.ENV_DATA.get('httpd_server'),
            remote_path.lstrip('/var/www/html/'))
        config.ENV_DATA['kubeconfig_url'] = kubeconfig_url_path

        # get the infra_id
        infra_id = get_infra_id(self.cluster_path)
        config.ENV_DATA['infra_id'] = infra_id

        # get the cluster id
        cluster_id = get_cluster_id(self.cluster_path)
        config.ENV_DATA['cluster_id'] = cluster_id

        # fetch the installer version
        installer_version_str = run_cmd(
            f"{config.RUN['bin_dir']}/openshift-install version")
        installer_version = installer_version_str.split()[1]
        config.ENV_DATA['installer_version'] = installer_version

        # get the major and minor version of OCP
        version_obj = Version(installer_version)
        ocp_version_x = version_obj.major
        ocp_version_y = version_obj.minor
        config.ENV_DATA['ocp_version_x'] = ocp_version_x
        config.ENV_DATA['ocp_version_y'] = ocp_version_y

        # generate the cluster info yaml file
        terraform_var_template = "cluster_info.yaml.j2"
        terraform_var_template_path = os.path.join("ocp-deployment",
                                                   terraform_var_template)
        terraform_config_str = self._templating.render_template(
            terraform_var_template_path, config.ENV_DATA)
        terraform_var_yaml = os.path.join(self.cluster_path,
                                          constants.TERRAFORM_DATA_DIR,
                                          constants.SCALEUP_TERRAFORM_DATA_DIR,
                                          "cluster_info.yaml")

        with open(terraform_var_yaml, "w") as f:
            f.write(terraform_config_str)

        # config.ENV_DATA['dns_server'] = config.ENV_DATA['dns']
        template_vars = (f"\"dns_server: {config.ENV_DATA['dns']}"
                         f"\\nremove_rhcos_worker: 'yes'\\n\"")

        replace_content_in_file(terraform_var_yaml, "PLACEHOLDER",
                                template_vars)
        logger.info(f"cluster yaml file: {terraform_var_yaml}")
Esempio n. 10
0
 def host_network_update(self):
     """
     Update security group rules for HostNetwork
     """
     cluster_id = get_infra_id(self.cluster_path)
     worker_pattern = f"{cluster_id}-worker*"
     worker_instances = self.aws.get_instances_by_name_pattern(
         worker_pattern)
     security_groups = worker_instances[0]["security_groups"]
     sg_id = security_groups[0]["GroupId"]
     security_group = self.aws.ec2_resource.SecurityGroup(sg_id)
     # The ports are not 100 % clear yet. Taken from doc:
     # https://docs.google.com/document/d/1c23ooTkW7cdbHNRbCTztprVU6leDqJxcvFZ1ZvK2qtU/edit#
     security_group.authorize_ingress(
         DryRun=False,
         IpPermissions=[
             {
                 "FromPort":
                 6800,
                 "ToPort":
                 7300,
                 "IpProtocol":
                 "tcp",
                 "UserIdGroupPairs": [
                     {
                         "Description": "Ceph OSDs",
                         "GroupId": sg_id,
                     },
                 ],
             },
             {
                 "FromPort":
                 3300,
                 "ToPort":
                 3300,
                 "IpProtocol":
                 "tcp",
                 "UserIdGroupPairs": [
                     {
                         "Description": "Ceph MONs rule1",
                         "GroupId": sg_id,
                     },
                 ],
             },
             {
                 "FromPort":
                 6789,
                 "ToPort":
                 6789,
                 "IpProtocol":
                 "tcp",
                 "UserIdGroupPairs": [
                     {
                         "Description": "Ceph MONs rule2",
                         "GroupId": sg_id,
                     },
                 ],
             },
             {
                 "FromPort":
                 8443,
                 "ToPort":
                 8443,
                 "IpProtocol":
                 "tcp",
                 "UserIdGroupPairs": [
                     {
                         "Description": "Ceph Dashboard rule1",
                         "GroupId": sg_id,
                     },
                 ],
             },
             {
                 "FromPort":
                 8080,
                 "ToPort":
                 8080,
                 "IpProtocol":
                 "tcp",
                 "UserIdGroupPairs": [
                     {
                         "Description": "Ceph Dashboard rule2",
                         "GroupId": sg_id,
                     },
                 ],
             },
         ],
     )
Esempio n. 11
0
 def host_network_update(self):
     """
     Update security group rules for HostNetwork
     """
     cluster_id = get_infra_id(self.cluster_path)
     worker_pattern = f'{cluster_id}-worker*'
     worker_instances = self.aws.get_instances_by_name_pattern(
         worker_pattern)
     security_groups = worker_instances[0]['security_groups']
     sg_id = security_groups[0]['GroupId']
     security_group = self.aws.ec2_resource.SecurityGroup(sg_id)
     # The ports are not 100 % clear yet. Taken from doc:
     # https://docs.google.com/document/d/1c23ooTkW7cdbHNRbCTztprVU6leDqJxcvFZ1ZvK2qtU/edit#
     security_group.authorize_ingress(
         DryRun=False,
         IpPermissions=[
             {
                 'FromPort':
                 6800,
                 'ToPort':
                 7300,
                 'IpProtocol':
                 'tcp',
                 'UserIdGroupPairs': [
                     {
                         'Description': 'Ceph OSDs',
                         'GroupId': sg_id,
                     },
                 ],
             },
             {
                 'FromPort':
                 3300,
                 'ToPort':
                 3300,
                 'IpProtocol':
                 'tcp',
                 'UserIdGroupPairs': [
                     {
                         'Description': 'Ceph MONs rule1',
                         'GroupId': sg_id,
                     },
                 ],
             },
             {
                 'FromPort':
                 6789,
                 'ToPort':
                 6789,
                 'IpProtocol':
                 'tcp',
                 'UserIdGroupPairs': [
                     {
                         'Description': 'Ceph MONs rule2',
                         'GroupId': sg_id,
                     },
                 ],
             },
             {
                 'FromPort':
                 8443,
                 'ToPort':
                 8443,
                 'IpProtocol':
                 'tcp',
                 'UserIdGroupPairs': [
                     {
                         'Description': 'Ceph Dashboard rule1',
                         'GroupId': sg_id,
                     },
                 ],
             },
             {
                 'FromPort':
                 8080,
                 'ToPort':
                 8080,
                 'IpProtocol':
                 'tcp',
                 'UserIdGroupPairs': [
                     {
                         'Description': 'Ceph Dashboard rule2',
                         'GroupId': sg_id,
                     },
                 ],
             },
         ])