Esempio n. 1
0
def respawn_elastigroup(elastigroup_id: str, stack_name: str, region: str, batch_size: int):
    '''
    Respawn all instances in the ElastiGroup.
    '''

    if batch_size is None or batch_size < 1:
        batch_size = DEFAULT_BATCH_SIZE

    spotinst_account = elastigroup_api.get_spotinst_account_data(region, stack_name)

    info('Redeploying the cluster for ElastiGroup {} (ID {})'.format(stack_name, elastigroup_id))

    deploy_output = elastigroup_api.deploy(batch_size=batch_size, grace_period=600, elastigroup_id=elastigroup_id,
                                           spotinst_account_data=spotinst_account)

    deploy_count = len(deploy_output)
    deploys_finished = 0
    with Action('Waiting for deploy to complete. Total of {} deploys'.format(deploy_count)) as act:
        while True:
            for deploy in deploy_output:
                deploy_status = elastigroup_api.deploy_status(deploy['id'], elastigroup_id, spotinst_account)
                for ds in deploy_status:
                    if ds['id'] == deploy['id']:
                        if ds['progress']['value'] >= 100\
                                or ds['status'].lower() in ELASTIGROUP_TERMINATED_DEPLOY_STATUS:
                            deploys_finished += 1
                            info('Deploy {} finished with status {}'.format(ds['id'], ds['status']))

            if deploys_finished == deploy_count:
                break
            time.sleep(2)
            act.progress()
Esempio n. 2
0
def update_security_group(region_name: str, security_group: str, trusted_addresses: set):
    networks = trusted_addresses
    prefixlen = 31
    # FIXME the Networkcount is depending on exist Entrys and Port-Count!
    while len(networks) > 50:
        networks = consolidate_networks(networks, prefixlen)
        prefixlen -= 1
    info("{}/{} Prefixlen: {}, {} networks: {}".format(region_name, security_group, prefixlen, len(networks), networks))
    conn = boto.ec2.connect_to_region(region_name)
    for sg in conn.get_all_security_groups():
        if security_group in sg.name:
            for rule in sg.rules:
                info(
                    "Entrys from {}: {} {} {} {}".format(
                        sg.name, rule.ip_protocol, rule.from_port, rule.to_port, rule.grants
                    )
                )
                ipgrants = [IPNetwork("{}".format(grant)) for grant in rule.grants]
                for grant in ipgrants:
                    if grant not in networks:
                        warning("Remove {} from security group {}".format(grant, sg.name))
                        sg.revoke(
                            ip_protocol=rule.ip_protocol, from_port=rule.from_port, to_port=rule.to_port, cidr_ip=grant
                        )
            with Action("Updating security group {}..".format(sg.name)) as act:
                for cidr in sorted(networks):
                    try:
                        sg.authorize(ip_protocol="tcp", from_port=443, to_port=443, cidr_ip=cidr)
                    except boto.exception.EC2ResponseError as e:
                        if "already exists" not in e.message:
                            raise
                    act.progress()
def run_linter(spec_file, verbose: bool = False):
    spec = yaml.safe_load(spec_file)
    spec = compatibility_layer(spec)
    if verbose:
        info('Validating OpenAPI spec..')
    try:
        resolver = validate_spec(spec)
    except Exception as e:
        msg = 'Error during Swagger schema validation:\n{}'.format(e)
        return [
            Issue(
                location='',
                message=msg,
                guideline='Must: Provide API Reference Definition using OpenAPI'
            )
        ]

    # collect all "rules" defined as functions starting with "lint_"
    rules = [f for name, f in globals().items() if name.startswith('lint_')]
    issues = []
    for func in rules:
        if verbose:
            info('Linting {}..'.format(func.__name__.split('_', 1)[-1]))
        for issue in func(spec, resolver):
            if isinstance(issue, tuple):
                location, message = issue
            else:
                location = issue
                message = None
            issues.append(
                Issue(location=location,
                      message=message or '',
                      guideline=func.__doc__))
    return sorted(issues)
Esempio n. 4
0
def create(stack_name, version, dry_run, instance_type, master_nodes,
           worker_nodes, max_worker_nodes, appdynamics_access_key):
    '''
    Create a new Kubernetes cluster (using current AWS credentials)
    '''

    variables = get_cluster_variables(stack_name, version,
                                      appdynamics_access_key)
    info('Cluster name is:             {}'.format(
        variables['webhook_cluster_name']))
    info('API server endpoint will be: {}'.format(variables['api_server']))
    if dry_run:
        print(yaml.safe_dump(variables))
    # TODO: register mint bucket with "kube-secretary" app
    if not has_etcd_cluster() and not dry_run:
        deploy_etcd_cluster(variables['hosted_zone'], variables['etcd_bucket'],
                            variables['region'])
    tag_subnets()
    userdata_master = get_user_data('userdata-master.yaml', variables)
    userdata_worker = get_user_data('userdata-worker.yaml', variables)
    if not dry_run:
        subprocess.check_call([
            'senza', 'create', 'senza-definition.yaml', version,
            'StackName={}'.format(stack_name),
            'UserDataMaster={}'.format(userdata_master),
            'UserDataWorker={}'.format(userdata_worker), 'KmsKey=*',
            'MasterNodes={}'.format(master_nodes),
            'WorkerNodes={}'.format(worker_nodes),
            'MaximumWorkerNodes={}'.format(max_worker_nodes),
            'InstanceType={}'.format(instance_type)
        ])
        # wait up to 15m for stack to be created
        subprocess.check_call(
            ['senza', 'wait', '--timeout=900', stack_name, version])
        wait_for_api_server(variables['api_server'])
Esempio n. 5
0
def get_trusted_addresses(config: dict):
    accounts = config.get('accounts', {})

    addresses = set()

    for name, cidr in config.get('global', {}).get('trusted_networks', {}).items():
        info('Adding trusted network {} ({})'.format(name, cidr))
        addresses.add(cidr)

    for account_name, _cfg in accounts.items():
        cfg = {}
        cfg.update(config.get('global', {}))
        if _cfg:
            cfg.update(_cfg)
        for region in cfg['regions']:
            domains = set(['odd-{}.{}'.format(region, cfg.get('domain').format(account_name=account_name))])
            for az in get_az_names(region):
                domains.add('nat-{}.{}'.format(az, cfg.get('domain').format(account_name=account_name)))
            for domain in sorted(domains):
                with Action('Checking {}'.format(domain)) as act:
                    try:
                        ai = socket.getaddrinfo(domain, 443, family=socket.AF_INET, type=socket.SOCK_STREAM)
                    except:
                        ai = []
                        act.error('n/a')
                        pass
                    for _, _, _, _, ip_port in ai:
                        ip, _ = ip_port
                        addresses.add('{}/32'.format(ip))

    return addresses
Esempio n. 6
0
def delete_old_services(config, application, version, release, execute):
    '''Delete old releases'''
    namespace = config.get('kubernetes_namespace')
    kubectl_login(config)

    data = kubectl_get(namespace, 'services', '-l', 'application={}'.format(application))
    services = data['items']
    target_service_name = '{}-{}'.format(application, release)
    services_to_delete = []
    service_found = False

    for service in sorted(services, key=lambda d: d['metadata']['name'], reverse=True):
        service_name = service['metadata']['name']
        if service_name == target_service_name:
            service_found = True
        else:
            services_to_delete.append(service_name)

    if not service_found:
        error('Service {} was not found.'.format(target_service_name))
        raise click.Abort()

    for service_name in services_to_delete:
        info('Deleting service {}..'.format(service_name))
        cluster_id = config.get('kubernetes_cluster')
        namespace = config.get('kubernetes_namespace')
        path = '/kubernetes-clusters/{}/namespaces/{}/services/{}'.format(
            cluster_id, namespace, service_name)
        response = request(config, requests.delete, path)
        change_request_id = response.json()['id']

        if execute:
            approve_and_execute(config, change_request_id)
        else:
            print(change_request_id)
Esempio n. 7
0
def output(output):
    '''Example for all possible Echo Formats

    You see the message only, if the Output TEXT
    '''
    with OutputFormat(output):
        action('This is a ok:')
        ok()
        action('This is a ok with message:')
        ok('all is fine')
        action('This is a warning:')
        warning('please check this')
        with Action('Start with working..') as act:
            # save_the_world()
            act.progress()
            act.progress()
            act.progress()
            act.progress()
        print_table('id name'.split(), [{
            'id': 1,
            'name': 'Test #1'
        }, {
            'id': 2,
            'name': 'Test #2'
        }])
        info('Only FYI')
        action('This is a error:')
        error('this is wrong, please fix')
        action('This is a fatal error:')
        fatal_error('this is a fuckup')
        info('I\'am not printed, the process a dead')
Esempio n. 8
0
def delete(config, type, resource, execute):
    '''Delete a Kubernetes resource or Cloud Formation stack'''

    if type == 'kubernetes':
        parts = resource.split('/')
        if len(parts) != 2:
            error('Kubernetes resource must be KIND/NAME')
            raise click.Abort()

        kind, name = parts

        info('Deleting Kubernetes {} {}..'.format(kind, name))
        cluster_id = config.get('kubernetes_cluster')
        namespace = config.get('kubernetes_namespace')
        path = '/kubernetes-clusters/{}/namespaces/{}/{}/{}'.format(
            cluster_id, namespace, kind, name)
    else:
        info('Deleting Cloud Formation stack {}..'.format(resource))
        aws_account = config.get('aws_account')
        aws_region = config.get('aws_region')
        path = '/aws-accounts/{}/regions/{}/cloudformation-stacks/{}'.format(
            aws_account, aws_region, resource)

    response = request(config, requests.delete, path)
    change_request_id = response.json()['id']

    if execute:
        approve_and_execute(config, change_request_id)
    else:
        print(change_request_id)
Esempio n. 9
0
def create(stack_name, version, dry_run):
    '''
    Create a new Kubernetes cluster (using current AWS credentials)
    '''

    variables = get_cluster_variables(stack_name, version)
    info('Cluster name is:             {}'.format(
        variables['webhook_cluster_name']))
    info('API server endpoint will be: {}'.format(variables['api_server']))
    if dry_run:
        print(yaml.safe_dump(variables))
    if not has_etcd_cluster() and not dry_run:
        deploy_etcd_cluster(variables['hosted_zone'])
    tag_subnets()
    userdata_master = get_user_data('userdata-master.yaml', variables)
    userdata_worker = get_user_data('userdata-worker.yaml', variables)
    if not dry_run:
        subprocess.check_call([
            'senza', 'create', 'senza-definition.yaml', version,
            'StackName={}'.format(stack_name),
            'UserDataMaster={}'.format(userdata_master),
            'UserDataWorker={}'.format(userdata_worker), 'KmsKey=*'
        ])
        # wait up to 15m for stack to be created
        subprocess.check_call(
            ['senza', 'wait', '--timeout=900', stack_name, version])
        wait_for_api_server(variables['api_server'])
Esempio n. 10
0
def update(stack_name, version, force):
    '''
    Update Kubernetes cluster
    '''
    user_data = get_launch_configuration_user_data(stack_name, version)
    worker_shared_secret = get_worker_shared_secret(user_data)
    variables = get_cluster_variables(stack_name, version,
                                      worker_shared_secret)
    userdata_master = get_user_data('userdata-master.yaml', variables)
    userdata_worker = get_user_data('userdata-worker.yaml', variables)

    # TODO: handle master nodes as well
    if not force and decode_user_data(user_data) == decode_user_data(
            userdata_worker):
        info('Worker user data did not change, not updating anything.')
        return

    # this will only update the Launch Configuration
    subprocess.check_call([
        'senza', 'update', 'senza-definition.yaml', version,
        'StackName={}'.format(stack_name),
        'UserDataMaster={}'.format(userdata_master),
        'UserDataWorker={}'.format(userdata_worker), 'KmsKey=*'
    ])
    # wait for CF update to complete..
    subprocess.check_call(['senza', 'wait', stack_name, version])
    perform_node_updates(stack_name, version, userdata_worker)
Esempio n. 11
0
def scale_deployment(config, application, version, release, replicas, execute):
    '''Scale a single deployment'''
    namespace = config.get('kubernetes_namespace')
    kubectl_login(config)

    deployment_name = '{}-{}-{}'.format(application, version, release)

    info('Scaling deployment {} to {} replicas..'.format(
        deployment_name, replicas))
    resources_update = ResourcesUpdate()
    resources_update.set_number_of_replicas(deployment_name, replicas)

    cluster_id = config.get('kubernetes_cluster')
    namespace = config.get('kubernetes_namespace')
    path = '/kubernetes-clusters/{}/namespaces/{}/resources'.format(
        cluster_id, namespace)
    response = request(config,
                       requests.patch,
                       path,
                       json=resources_update.to_dict())
    change_request_id = response.json()['id']

    if execute:
        approve_and_execute(config, change_request_id)
    else:
        print(change_request_id)
def find_taupage_amis(regions: list) -> dict:
    '''
    Find latest Taupage AMI for each region
    '''
    result = {}
    for region in regions:
        with Action('Finding latest Taupage AMI in {}..'.format(region)):
            ec2 = boto3.resource('ec2', region)
            filters = [{
                'Name': 'name',
                'Values': ['*Taupage-AMI-*']
            }, {
                'Name': 'is-public',
                'Values': ['false']
            }, {
                'Name': 'state',
                'Values': ['available']
            }, {
                'Name': 'root-device-type',
                'Values': ['ebs']
            }]
            images = list(ec2.images.filter(Filters=filters))
            if not images:
                raise Exception('No Taupage AMI found')
            most_recent_image = sorted(images, key=lambda i: i.name)[-1]
            result[region] = most_recent_image
        info(most_recent_image.name)
    return result
Esempio n. 13
0
def output(output):
    '''Example for all possible Echo Formats

    You see the message only, if the Output TEXT
    '''
    with OutputFormat(output):
        action('This is a ok:')
        ok()
        action('This is a ok with message:')
        ok('all is fine')
        action('This is a warning:')
        warning('please check this')
        with Action('Start with working..') as act:
            # save_the_world()
            act.progress()
            act.progress()
            act.progress()
            act.progress()
        print_table('id name'.split(), [{'id': 1, 'name': 'Test #1'}, {'id': 2, 'name': 'Test #2'}])
        info('Only FYI')
        action('This is a error:')
        error('this is wrong, please fix')
        action('This is a fatal error:')
        fatal_error('this is a fuckup')
        info('I\'am not printed, the process a dead')
Esempio n. 14
0
def update(stack_name, version, force):
    '''
    Update Kubernetes cluster
    '''
    existing_user_data_master = get_launch_configuration_user_data(
        stack_name, version, 'Master')
    existing_user_data_worker = get_launch_configuration_user_data(
        stack_name, version, 'Worker')
    worker_shared_secret = get_worker_shared_secret(existing_user_data_worker)
    variables = get_cluster_variables(stack_name, version,
                                      worker_shared_secret)
    user_data_master = get_user_data('userdata-master.yaml', variables)
    user_data_worker = get_user_data('userdata-worker.yaml', variables)

    if not force and same_user_data(
            existing_user_data_master, user_data_master) and same_user_data(
                existing_user_data_worker, user_data_worker):
        info(
            'Neither worker nor master user data did change, not updating anything.'
        )
        return

    # this will only update the Launch Configuration
    subprocess.check_call([
        'senza', 'update', 'senza-definition.yaml', version,
        'StackName={}'.format(stack_name),
        'UserDataMaster={}'.format(user_data_master),
        'UserDataWorker={}'.format(user_data_worker), 'KmsKey=*'
    ])
    # wait for CF update to complete..
    subprocess.check_call(
        ['senza', 'wait', '--timeout=600', stack_name, version])
    perform_node_updates(stack_name, version, 'Master', user_data_master)
    wait_for_api_server(variables['api_server'])
    perform_node_updates(stack_name, version, 'Worker', user_data_worker)
Esempio n. 15
0
def create(definition, region, version, parameter, disable_rollback, dry_run, force):
    '''Create a new Cloud Formation stack from the given Senza definition file'''

    input = definition

    region = get_region(region)
    check_credentials(region)
    account_info = AccountArguments(region=region)
    args = parse_args(input, region, version, parameter, account_info)

    with Action('Generating Cloud Formation template..'):
        data = evaluate(input.copy(), args, account_info, force)
        cfjson = json.dumps(data, sort_keys=True, indent=4)

    stack_name = "{0}-{1}".format(input["SenzaInfo"]["StackName"], version)
    if len(stack_name) > 128:
        raise click.UsageError('Stack name "{}" cannot exceed 128 characters. '.format(stack_name) +
                               ' Please choose another name/version.')

    parameters = []
    for name, parameter in data.get("Parameters", {}).items():
        parameters.append([name, getattr(args, name, None)])

    tags = {}
    for tag in input["SenzaInfo"].get('Tags', []):
        for key, value in tag.items():
            # # As the SenzaInfo is not evaluated, we explicitly evaluate the values here
            tags[key] = evaluate_template(value, info, [], args)

    tags.update({
        "Name": stack_name,
        "StackName": input["SenzaInfo"]["StackName"],
        "StackVersion": version
    })

    if "OperatorTopicId" in input["SenzaInfo"]:
        topic = input["SenzaInfo"]["OperatorTopicId"]
        topic_arn = resolve_topic_arn(region, topic)
        if not topic_arn:
            raise click.UsageError('SNS topic "{}" does not exist'.format(topic))
        topics = [topic_arn]
    else:
        topics = None

    capabilities = get_required_capabilities(data)

    cf = boto.cloudformation.connect_to_region(region)

    with Action('Creating Cloud Formation stack {}..'.format(stack_name)):
        try:
            if dry_run:
                info('**DRY-RUN** {}'.format(topics))
            else:
                cf.create_stack(stack_name, template_body=cfjson, parameters=parameters, tags=tags,
                                notification_arns=topics, disable_rollback=disable_rollback, capabilities=capabilities)
        except boto.exception.BotoServerError as e:
            if e.error_code == 'AlreadyExistsException':
                raise click.UsageError('Stack {} already exists. Please choose another version.'.format(stack_name))
            else:
                raise
Esempio n. 16
0
def wait_for_deployment(config, application, version, release, timeout,
                        interval):
    '''Wait for all pods to become ready'''
    namespace = config.get('kubernetes_namespace')
    kubectl_login(config)
    deployment_name = '{}-{}-{}'.format(application, version, release)
    cutoff = time.time() + timeout
    while time.time() < cutoff:
        data = kubectl_get(
            namespace, 'pods', '-l',
            'application={},version={},release={}'.format(
                application, version, release))
        pods = data['items']
        pods_ready = 0
        for pod in pods:
            if pod['status'].get('phase') == 'Running':
                all_containers_ready = True
                for cont in pod['status'].get('containerStatuses', []):
                    if not cont.get('ready'):
                        all_containers_ready = False
                if all_containers_ready:
                    pods_ready += 1
        if pods and pods_ready >= len(pods):
            return
        info('Waiting up to {:.0f} more secs for deployment '
             '{} ({}/{} pods ready)..'.format(cutoff - time.time(),
                                              deployment_name, pods_ready,
                                              len(pods)))
        time.sleep(interval)
    raise click.Abort()
Esempio n. 17
0
def update(stack_name, version, dry_run, force, instance_type, master_nodes,
           worker_nodes, postpone, max_worker_nodes, appdynamics_access_key):
    '''
    Update Kubernetes cluster
    '''
    existing_user_data_master = get_launch_configuration_user_data(
        stack_name, version, 'Master')
    existing_user_data_worker = get_launch_configuration_user_data(
        stack_name, version, 'Worker')
    worker_shared_secret = get_worker_shared_secret(existing_user_data_worker)
    variables = get_cluster_variables(stack_name, version,
                                      appdynamics_access_key,
                                      worker_shared_secret)
    if dry_run:
        print(yaml.safe_dump(variables))
    user_data_master = get_user_data('userdata-master.yaml', variables)
    user_data_worker = get_user_data('userdata-worker.yaml', variables)

    if instance_type == 'current':
        instance_type = get_launch_configuration(stack_name, version,
                                                 'Worker')['InstanceType']

    if not force and same_user_data(
            existing_user_data_master, user_data_master) and same_user_data(
                existing_user_data_worker, user_data_worker):
        info(
            'Neither worker nor master user data did change, not updating anything.'
        )
        return

    if master_nodes == -1:
        master_nodes = get_current_master_nodes(stack_name, version)

    if worker_nodes == -1:
        worker_nodes = get_current_worker_nodes(stack_name, version)

    if not dry_run:
        # this will only update the Launch Configuration
        subprocess.check_call([
            'senza', 'update', 'senza-definition.yaml', version,
            'StackName={}'.format(stack_name),
            'UserDataMaster={}'.format(user_data_master),
            'UserDataWorker={}'.format(user_data_worker), 'KmsKey=*',
            'MasterNodes={}'.format(master_nodes),
            'WorkerNodes={}'.format(worker_nodes),
            'MaximumWorkerNodes={}'.format(max_worker_nodes),
            'InstanceType={}'.format(instance_type)
        ])
        # wait for CF update to complete..
        subprocess.check_call(
            ['senza', 'wait', '--timeout=600', stack_name, version])

        if not postpone:
            perform_node_updates(stack_name, version, 'Master',
                                 user_data_master, variables)
            wait_for_api_server(variables['api_server'])
            perform_node_updates(stack_name, version, 'Worker',
                                 user_data_worker, variables)
Esempio n. 18
0
def setup_security_groups(use_dmz: bool, cluster_name: str, node_ips: dict,
                          result: dict) -> dict:
    '''
    Allow traffic between regions (or within a VPC, if `use_dmz' is False)
    '''
    for region, ips in node_ips.items():
        with Action('Configuring Security Group in {}..'.format(region)):
            ec2 = boto3.client('ec2', region)
            resp = ec2.describe_vpcs()
            # TODO: support more than one VPC..
            vpc = resp['Vpcs'][0]
            sg_name = cluster_name
            sg = ec2.create_security_group(GroupName=sg_name,
                                           VpcId=vpc['VpcId'],
                                           Description='Allow Cassandra nodes to talk to each other on Secure Transport port 7001')
            result[region] = sg

            ec2.create_tags(Resources=[sg['GroupId']],
                            Tags=[{'Key': 'Name', 'Value': sg_name}])

            ip_permissions = []
            if use_dmz:
                # NOTE: we need to allow ALL public IPs (from all regions)
                for ip in itertools.chain(*node_ips.values()):
                    ip_permissions.append({
                        'IpProtocol': 'tcp',
                        'FromPort': 7001,  # port range: From-To
                        'ToPort':   7001,
                        'IpRanges': [{
                            'CidrIp': '{}/32'.format(ip['PublicIp'])
                        }]
                    })
            # if internal subnets are used we just allow access from
            # within the SG, which we also need in multi-region setup
            # (for the nodetool?)
            ip_permissions.append({'IpProtocol': '-1',
                                   'UserIdGroupPairs': [{'GroupId': sg['GroupId']}]})

            # if we can find the Odd security group, authorize SSH access from it
            try:
                resp = ec2.describe_security_groups(GroupNames=['Odd (SSH Bastion Host)'])
                odd_sg = resp['SecurityGroups'][0]

                ip_permissions.append({
                    'IpProtocol': 'tcp',
                    'FromPort': 22,  # port range: From-To
                    'ToPort': 22,
                    'UserIdGroupPairs': [{
                        'GroupId': odd_sg['GroupId']
                    }]
                })
            except ClientError:
                info("Could not find Odd bastion host in region {}, skipping Security Group rule.".format(region))
                pass

            ec2.authorize_security_group_ingress(GroupId=sg['GroupId'],
                                                 IpPermissions=ip_permissions)
Esempio n. 19
0
def setup_security_groups(use_dmz: bool, cluster_name: str, node_ips: dict,
                          result: dict) -> dict:
    '''
    Allow traffic between regions (or within a VPC, if `use_dmz' is False)
    '''
    for region, ips in node_ips.items():
        with Action('Configuring Security Group in {}..'.format(region)):
            ec2 = boto3.client('ec2', region)
            resp = ec2.describe_vpcs()
            # TODO: support more than one VPC..
            vpc = resp['Vpcs'][0]
            sg_name = cluster_name
            sg = ec2.create_security_group(GroupName=sg_name,
                                           VpcId=vpc['VpcId'],
                                           Description='Allow Cassandra nodes to talk to each other on Secure Transport port 7001')
            result[region] = sg

            ec2.create_tags(Resources=[sg['GroupId']],
                            Tags=[{'Key': 'Name', 'Value': sg_name}])

            ip_permissions = []
            if use_dmz:
                # NOTE: we need to allow ALL public IPs (from all regions)
                for ip in itertools.chain(*node_ips.values()):
                    ip_permissions.append({
                        'IpProtocol': 'tcp',
                        'FromPort': 7001,  # port range: From-To
                        'ToPort':   7001,
                        'IpRanges': [{
                            'CidrIp': '{}/32'.format(ip['PublicIp'])
                        }]
                    })
            # if internal subnets are used we just allow access from
            # within the SG, which we also need in multi-region setup
            # (for the nodetool?)
            ip_permissions.append({'IpProtocol': '-1',
                                   'UserIdGroupPairs': [{'GroupId': sg['GroupId']}]})

            # if we can find the Odd security group, authorize SSH access from it
            try:
                resp = ec2.describe_security_groups(GroupNames=['Odd (SSH Bastion Host)'])
                odd_sg = resp['SecurityGroups'][0]

                ip_permissions.append({
                    'IpProtocol': 'tcp',
                    'FromPort': 22,  # port range: From-To
                    'ToPort': 22,
                    'UserIdGroupPairs': [{
                        'GroupId': odd_sg['GroupId']
                    }]
                })
            except ClientError:
                info("Could not find Odd bastion host in region {}, skipping Security Group rule.".format(region))
                pass

            ec2.authorize_security_group_ingress(GroupId=sg['GroupId'],
                                                 IpPermissions=ip_permissions)
Esempio n. 20
0
def update_security_group(file, region_name, security_group):
    '''Update a Security Group and allow access from all trusted networks, NAT instances and bastion hosts'''
    config = yaml.safe_load(file)

    addresses = get_trusted_addresses(config)

    info('\n'.join(sorted(addresses)))

    update_security_group(region_name, security_group, addresses)
Esempio n. 21
0
def perform_implicit_flow(config: dict):

    # Get new token
    success = False
    # Must match redirect URIs in client configuration (http://localhost:8081-8181)
    port_number = 8081
    max_port_number = port_number + 100

    while True:
        try:
            httpd = ClientRedirectServer(('127.0.0.1', port_number))
        except socket.error as e:
            if port_number > max_port_number:
                success = False
                break
            port_number += 1
        else:
            success = True
            break

    if success:
        params = {'response_type':          'token',
                  'business_partner_id':    config['business_partner_id'],
                  'client_id':              config['client_id'],
                  'redirect_uri':           'http://localhost:{}'.format(port_number)}

        param_list = ['{}={}'.format(key, value) for key, value in sorted(params.items())]
        param_string = '&'.join(param_list)
        parsed_authorize_url = urlparse(config['authorize_url'])
        browser_url = urlunsplit((parsed_authorize_url.scheme, parsed_authorize_url.netloc, parsed_authorize_url.path,
                                  param_string, ''))

        # Redirect stdout and stderr. In Linux, a message is outputted to stdout when opening the browser
        # (and then a message to stderr because it can't write).
        saved_stdout = os.dup(1)
        saved_stderr = os.dup(2)
        os.close(1)
        os.close(2)
        os.open(os.devnull, os.O_RDWR)
        try:
            webbrowser.open(browser_url, new=1, autoraise=True)
        finally:
            os.dup2(saved_stdout, 1)
            os.dup2(saved_stderr, 2)

        info('Your browser has been opened to visit:\n\n\t{}\n'.format(browser_url))

    else:
        raise AuthenticationFailed('Failed to launch local server')

    while not httpd.query_params:
        # Handle first request, which will redirect to Javascript
        # Handle next request, with token
        httpd.handle_request()

    return httpd.query_params
Esempio n. 22
0
def perform_implicit_flow(config: dict):

    # Get new token
    success = False
    # Must match redirect URIs in client configuration (http://localhost:8081-8181)
    port_number = 8081
    max_port_number = port_number + 100

    while True:
        try:
            httpd = ClientRedirectServer(('127.0.0.1', port_number))
        except socket.error:
            if port_number > max_port_number:
                success = False
                break
            port_number += 1
        else:
            success = True
            break

    if success:
        params = {'response_type':          'token',
                  'business_partner_id':    config['business_partner_id'],
                  'client_id':              config['client_id'],
                  'redirect_uri':           'http://localhost:{}'.format(port_number)}

        param_list = ['{}={}'.format(key, value) for key, value in sorted(params.items())]
        param_string = '&'.join(param_list)
        parsed_authorize_url = urlparse(config['authorize_url'])
        browser_url = urlunsplit((parsed_authorize_url.scheme, parsed_authorize_url.netloc, parsed_authorize_url.path,
                                  param_string, ''))

        # Redirect stdout and stderr. In Linux, a message is outputted to stdout when opening the browser
        # (and then a message to stderr because it can't write).
        saved_stdout = os.dup(1)
        saved_stderr = os.dup(2)
        os.close(1)
        os.close(2)
        os.open(os.devnull, os.O_RDWR)
        try:
            webbrowser.open(browser_url, new=1, autoraise=True)
        finally:
            os.dup2(saved_stdout, 1)
            os.dup2(saved_stderr, 2)

        info('Your browser has been opened to visit:\n\n\t{}\n'.format(browser_url))

    else:
        raise AuthenticationFailed('Failed to launch local server')

    while not httpd.query_params:
        # Handle first request, which will redirect to Javascript
        # Handle next request, with token
        httpd.handle_request()

    return httpd.query_params
Esempio n. 23
0
def get_base_ami_id(ec2_conn, cfg: dict):
    images = search_base_ami_ids(ec2_conn, cfg)
    if not images:
        permit_base_image(ec2_conn, cfg)
        images = search_base_ami_ids(ec2_conn, cfg)
        if not images:
            raise Exception("No AMI found")
    most_recent_image = sorted(images, key=lambda i: i.name)[-1]
    info('Most recent AMI is "{}" ({})'.format(most_recent_image.name, most_recent_image.id))
    return most_recent_image.id
def pick_seed_node_ips(node_ips: dict, seed_count: int) -> dict:
    '''
    Take first {seed_count} IPs in every region for the seed nodes.
    '''
    seed_nodes = {}
    for region, ips in node_ips.items():
        seed_nodes[region] = ips[0:seed_count]

        list_ips = ', '.join([ip['_defaultIp'] for ip in seed_nodes[region]])
        info('Our seed nodes in {} will be: {}'.format(region, list_ips))
    return seed_nodes
Esempio n. 25
0
def pick_seed_node_ips(node_ips: dict, seed_count: int) -> dict:
    '''
    Take first {seed_count} IPs in every region for the seed nodes.
    '''
    seed_nodes = {}
    for region, ips in node_ips.items():
        seed_nodes[region] = ips[0:seed_count]

        list_ips = [ip['_defaultIp'] for ip in seed_nodes[region]]
        info('Our seed nodes in {} will be: {}'.format(region, ', '.join(list_ips)))
    return seed_nodes
Esempio n. 26
0
def apply(config, template_or_directory, parameter, execute):
    '''Apply CloudFormation or Kubernetes resource'''

    template_paths = []
    if os.path.isdir(template_or_directory):
        for entry in os.listdir(template_or_directory):
            if entry.endswith('.yaml') and not entry.startswith('.'):
                template_paths.append(os.path.join(template_or_directory, entry))
    else:
        template_paths.append(template_or_directory)

    context = parse_parameters(parameter)
    namespace = config.get('kubernetes_namespace')

    # try to find previous release of a service.
    data = kubectl_get(namespace, 'services', '-l', 'application={}'.format(context['application']))

    context["prev_release"] = get_prev_release(data['items'], context['release'])

    for path in template_paths:
        with open(path, 'r') as fd:
            data = _render_template(fd, context)

        if not isinstance(data, dict):
            error('Invalid YAML contents in {}'.format(path))
            raise click.Abort()

        if 'kind' in data:
            info('Applying Kubernetes manifest {}..'.format(path))
            cluster_id = config.get('kubernetes_cluster')
            namespace = config.get('kubernetes_namespace')
            path = '/kubernetes-clusters/{}/namespaces/{}/resources'.format(cluster_id, namespace)
            response = request(config, requests.post, path, json=data)
            change_request_id = response.json()['id']
        elif 'Resources' in data:
            info('Applying Cloud Formation template {}..'.format(path))
            aws_account = config.get('aws_account')
            aws_region = config.get('aws_region')
            stack_name = data.get('Metadata', {}).get('StackName')
            if not stack_name:
                error('Cloud Formation template requires Metadata/StackName property')
                raise click.Abort()
            path = '/aws-accounts/{}/regions/{}/cloudformation-stacks/{}'.format(
                aws_account, aws_region, stack_name)
            response = request(config, requests.put, path, json=data)
            change_request_id = response.json()['id']
        else:
            error('Neither a Kubernetes manifest nor a Cloud Formation template: {}'.format(path))
            raise click.Abort()

        if execute:
            approve_and_execute(config, change_request_id)
        else:
            print(change_request_id)
Esempio n. 27
0
def respawn_auto_scaling_group(asg_name: str, region: str, inplace: bool=False):
    '''Respawn all EC2 instances in the Auto Scaling Group whose launch configuration is not up-to-date'''
    asg = boto3.client('autoscaling', region)
    group = get_auto_scaling_group(asg, asg_name)
    desired_launch_config = group['LaunchConfigurationName']
    instances_to_terminate, instances_ok = get_instances_to_terminate(group, desired_launch_config)
    info('{}/{} instances need to be updated in {}'.format(len(instances_to_terminate),
         len(instances_to_terminate) + len(instances_ok), asg_name))
    if instances_to_terminate:
        do_respawn_auto_scaling_group(asg_name, group, region, instances_to_terminate, instances_ok, inplace)
    else:
        info('Nothing to do')
Esempio n. 28
0
def switch_deployment(config, application, version, release, ratio, execute):
    '''Switch to new release'''
    namespace = config.get('kubernetes_namespace')
    kubectl_login(config)

    target_replicas, total = ratio.split('/')
    target_replicas = int(target_replicas)
    total = int(total)

    data = kubectl_get(namespace, 'deployments', '-l',
                       'application={}'.format(application))
    deployments = data['items']
    target_deployment_name = '{}-{}-{}'.format(application, version, release)

    target_deployment_exists = False
    for deployment in deployments:
        if deployment['metadata']['name'] == target_deployment_name:
            target_deployment_exists = True
    if not target_deployment_exists:
        error("Deployment {} does not exist!".format(target_deployment_name))
        exit(1)

    resources_update = ResourcesUpdate()
    remaining_replicas = total - target_replicas
    for deployment in sorted(deployments,
                             key=lambda d: d['metadata']['name'],
                             reverse=True):
        deployment_name = deployment['metadata']['name']
        if deployment_name == target_deployment_name:
            replicas = target_replicas
        else:
            # maybe spread across all other deployments?
            replicas = remaining_replicas
            remaining_replicas = 0

        info('Scaling deployment {} to {} replicas..'.format(
            deployment_name, replicas))
        resources_update.set_number_of_replicas(deployment_name, replicas)

    cluster_id = config.get('kubernetes_cluster')
    namespace = config.get('kubernetes_namespace')
    path = '/kubernetes-clusters/{}/namespaces/{}/resources'.format(
        cluster_id, namespace)
    response = request(config,
                       requests.patch,
                       path,
                       json=resources_update.to_dict())
    change_request_id = response.json()['id']

    if execute:
        approve_and_execute(config, change_request_id)
    else:
        print(change_request_id)
Esempio n. 29
0
def create(definition, region, version, parameter, disable_rollback, dry_run, force):
    '''Create a new Cloud Formation stack from the given Senza definition file'''

    input = definition

    region = get_region(region)
    check_credentials(region)
    args = parse_args(input, region, version, parameter)

    with Action('Generating Cloud Formation template..'):
        data = evaluate(input.copy(), args, force)
        cfjson = json.dumps(data, sort_keys=True, indent=4)

    stack_name = "{0}-{1}".format(input["SenzaInfo"]["StackName"], version)
    if len(stack_name) > 128:
        raise click.UsageError('Stack name "{}" cannot exceed 128 characters. '.format(stack_name) +
                               ' Please choose another name/version.')

    parameters = []
    for name, parameter in data.get("Parameters", {}).items():
        parameters.append([name, getattr(args, name, None)])

    tags = {
        "Name": stack_name,
        "StackName": input["SenzaInfo"]["StackName"],
        "StackVersion": version
    }

    if "OperatorTopicId" in input["SenzaInfo"]:
        topic = input["SenzaInfo"]["OperatorTopicId"]
        topic_arn = resolve_topic_arn(region, topic)
        if not topic_arn:
            raise click.UsageError('SNS topic "{}" does not exist'.format(topic))
        topics = [topic_arn]
    else:
        topics = None

    capabilities = get_required_capabilities(data)

    cf = boto.cloudformation.connect_to_region(region)

    with Action('Creating Cloud Formation stack {}..'.format(stack_name)):
        try:
            if dry_run:
                info('**DRY-RUN** {}'.format(topics))
            else:
                cf.create_stack(stack_name, template_body=cfjson, parameters=parameters, tags=tags,
                                notification_arns=topics, disable_rollback=disable_rollback, capabilities=capabilities)
        except boto.exception.BotoServerError as e:
            if e.error_code == 'AlreadyExistsException':
                raise click.UsageError('Stack {} already exists. Please choose another version.'.format(stack_name))
            else:
                raise
Esempio n. 30
0
def main():
    # parser = argparse.ArgumentParser()
    # parser.add_argument('from')
    # parser.add_argument('to')
    # parser.add_argument('file')
    # args = parser.parse_args()

    hosts_file = Path('/etc/hosts')

    with hosts_file.open() as fd:
        old_contents = fd.read()

    backup_file = hosts_file.with_suffix('.local-cname-backup')
    with backup_file.open('w') as fd:
        fd.write(old_contents)

    try:
        while True:
            entries = []
            cname_file = Path('/etc/cnames')
            with cname_file.open() as fd:
                for line in fd:
                    (cnameFrom, cnameTo) = line.strip().split('=')
                    print('resoving:' + cnameTo)
                    with Action('Resolving {} ..'.format(cnameTo)):
                        results = socket.getaddrinfo(cnameTo,
                                                     80,
                                                     type=socket.SOCK_STREAM)
                        for result in results:
                            family, type, proto, canonname, sockaddr = result
                            if family in (socket.AF_INET, socket.AF_INET6):
                                ip = sockaddr[0]
                                entries.append((cnameFrom, ip))

            info('Current entries:')
            for hostname, ip in entries:
                info('{} -> {}'.format(hostname, ip))

            with Action('Writing {} ..'.format(hosts_file)):
                with hosts_file.open('w') as fd:
                    fd.write(old_contents)
                    fd.write(
                        '#### Start of entries generated by local-cnames\n')
                    for hostname, ip in entries:
                        fd.write('{} {}\n'.format(ip, hostname))

            time.sleep(60)
    except KeyboardInterrupt:
        # ignore, do not print stacktrace
        pass
    finally:
        backup_file.rename(hosts_file)
Esempio n. 31
0
File: api.py Progetto: pfigue/zign
def get_named_token(scope, realm, name, user, password, url=None,
                    insecure=False, refresh=False, use_keyring=True, prompt=False):
    '''get named access token, return existing if still valid'''

    if name and not refresh:
        existing_token = get_existing_token(name)
        if existing_token:
            return existing_token

    config = get_config()

    url = url or config.get('url')

    while not url and prompt:
        url = click.prompt('Please enter the OAuth access token service URL')
        if not url.startswith('http'):
            url = 'https://{}'.format(url)

        try:
            requests.get(url, timeout=5, verify=not insecure)
        except:
            error('Could not reach {}'.format(url))
            url = None

        config['url'] = url

    stups_cli.config.store_config(config, 'zign')

    password = password or keyring.get_password(KEYRING_KEY, user)

    while True:
        if not password and prompt:
            password = click.prompt('Password for {}'.format(user), hide_input=True)

        try:
            result = get_new_token(realm, scope, user, password, url=url, insecure=insecure)
            break
        except AuthenticationFailed as e:
            if prompt:
                error(e)
                info('Please check your username and password and try again.')
                password = None
            else:
                raise

    if result and use_keyring:
        keyring.set_password(KEYRING_KEY, user, password)

    if name:
        store_token(name, result)

    return result
Esempio n. 32
0
def main():
    mai_accounts = mai.get_accounts()
    account = choice('Select account to login', mai_accounts)  # type: str

    with Action("Login to pierone..") as login_action:
        if not pierone.login():
            login_action.fatal_error('Failed')

    with Action("Login to AWS..") as login_action:
        if not mai.login(account):
            login_action.fatal_error('Failed')

    info('Logged in to {}'.format(account))
Esempio n. 33
0
def print_help():
    click.secho('Zalando Kubectl {}\n'.format(zalando_kubectl.__version__),
                bold=True)
    info('''Available wrapper commands:
  zkubectl help                               Show this help message and exit
  zkubectl configure --cluster-registry=URL   Set the Cluster Registry URL
  zkubectl list                               Shortcut for "list-clusters"
  zkubectl list-clusters                      List all Kubernetes cluster in "ready" state
  zkubectl login CLUSTER_ALIAS_ID_OR_URL      Login to a specific cluster
  zkubectl dashboard                          Open the Kubernetes dashboard UI in the browser

All other commands are forwarded to kubectl:
            ''')
def print_success_message(options: dict):
    info('Cluster initialization completed successfully!')

    regions_list = ' '.join(options['regions'])

    # prepare alter keyspace params in the format: 'eu-central': N [, ...]
    dc_list = ', '.join([
        "'{}': {}".format(re.sub('-[0-9]+$', '', r), options['cluster_size'])
        for r in options['regions']
    ])

    sys.stdout.write('''
The Cassandra cluster {cluster_name} was created with {cluster_size} nodes
in each of the following AWS regions: {regions_list}

You might need to update the Security Group named {cluster_name}
(in all regions!) to allow access to Cassandra from the Odd host (port 22),
from your application (port 9042) and optionally to allow access to Jolokia
(port 8778) and/or Prometheus Node Exporter (port 9100) from your monitoring
tool.

You should now login to any of the cluster nodes to change the replication
settings of system_auth keyspace and to create the admin superuser, using the
following commands:

$ docker exec -ti taupageapp bash

(docker)$ cqlsh -u cassandra -p cassandra \\
  -e "ALTER KEYSPACE system_auth WITH replication = {{
        'class': 'NetworkTopologyStrategy', {dc_list}
      }};
      CREATE USER admin WITH PASSWORD '$ADMIN_PASSWORD' SUPERUSER;"

Then login with the newly created admin account and disable the default
superuser account:

(docker)$ cqlsh -u admin -p $ADMIN_PASSWORD

cqlsh> ALTER USER cassandra WITH PASSWORD '{random_pw}' NOSUPERUSER;

You can then also create non-superuser application roles and data keyspace(s).

In general, follow the documentation on setting up authentication, depending
on your Cassandra version:

  http://docs.datastax.com/en/cassandra/3.0/cassandra/configuration/secureConfigNativeAuth.html
  http://docs.datastax.com/en/cassandra/2.1/cassandra/security/security_config_native_authenticate_t.html
'''.format(**options,
           regions_list=regions_list,
           dc_list=dc_list,
           random_pw=generate_password()))
Esempio n. 35
0
File: cli.py Progetto: jmirc/senza
def update(definition, region, version, parameter, disable_rollback, dry_run, force):
    '''Update an existing Cloud Formation stack from the given Senza definition file'''
    data = create_cf_template(definition, region, version, parameter, force)
    cf = boto3.client('cloudformation', region)

    with Action('Updating Cloud Formation stack {}..'.format(data['StackName'])) as act:
        try:
            if dry_run:
                info('**DRY-RUN** {}'.format(data['NotificationARNs']))
            else:
                del(data['Tags'])
                cf.update_stack(**data)
        except ClientError as e:
            act.fatal_error('ClientError: {}'.format(pformat(e.response)))
Esempio n. 36
0
File: cli.py Progetto: chutium/senza
def create(definition, region, version, parameter, disable_rollback, dry_run, force):
    '''Create a new Cloud Formation stack from the given Senza definition file'''

    input = definition

    region = get_region(region)
    args = parse_args(input, region, version, parameter)

    with Action('Generating Cloud Formation template..'):
        data = evaluate(input.copy(), args, force)
        cfjson = json.dumps(data, sort_keys=True, indent=4)

    stack_name = "{0}-{1}".format(input["SenzaInfo"]["StackName"], version)

    parameters = []
    for name, parameter in data.get("Parameters", {}).items():
        parameters.append([name, getattr(args, name, None)])

    tags = {
        "Name": stack_name,
        "StackName": input["SenzaInfo"]["StackName"],
        "StackVersion": version
    }

    if "OperatorTopicId" in input["SenzaInfo"]:
        topic = input["SenzaInfo"]["OperatorTopicId"]
        topic_arn = resolve_topic_arn(region, topic)
        if not topic_arn:
            raise click.UsageError('SNS topic "{}" does not exist'.format(topic))
        topics = [topic_arn]
    else:
        topics = None

    capabilities = get_required_capabilities(data)

    cf = boto.cloudformation.connect_to_region(region)

    with Action('Creating Cloud Formation stack {}..'.format(stack_name)):
        try:
            if dry_run:
                info('**DRY-RUN** {}'.format(topics))
            else:
                cf.create_stack(stack_name, template_body=cfjson, parameters=parameters, tags=tags,
                                notification_arns=topics, disable_rollback=disable_rollback, capabilities=capabilities)
        except boto.exception.BotoServerError as e:
            if e.error_code == 'AlreadyExistsException':
                raise click.UsageError('Stack {} already exists. Please choose another version.'.format(stack_name))
            else:
                raise
Esempio n. 37
0
def launch_normal_nodes(options: dict):
    # TODO: parallelize by region?
    for region, ips in options['node_ips'].items():
        subnets = options['subnets'][region]
        for i, ip in enumerate(ips):
            if i >= options['seed_count']:
                # avoid stating all nodes at the same time
                info("Sleeping for one minute before launching next node..")
                time.sleep(60)
                launch_instance(region, ip,
                                ami=options['taupage_amis'][region],
                                subnet_id=subnets[i % len(subnets)]['SubnetId'],
                                security_group_id=options['security_groups'][region]['GroupId'],
                                is_seed=False,
                                options=options)
Esempio n. 38
0
def _scale_deployment(config, name, namespace, replicas, execute):
    '''Scale a single deployment'''
    info('Scaling deployment {} to {} replicas..'.format(name, replicas))
    resources_update = ResourcesUpdate()
    resources_update.set_number_of_replicas(name, replicas)

    cluster_id = config.get('kubernetes_cluster')
    path = '/kubernetes-clusters/{}/namespaces/{}/resources'.format(cluster_id, namespace)
    response = request(config, requests.patch, path, json=resources_update.to_dict())
    change_request_id = response.json()['id']

    if execute:
        approve_and_execute(config, change_request_id)
    else:
        print(change_request_id)
Esempio n. 39
0
def launch_normal_nodes(options: dict):
    # TODO: parallelize by region?
    for region, ips in options['node_ips'].items():
        subnets = options['subnets'][region]
        for i, ip in enumerate(ips):
            if i >= options['seed_count']:
                # avoid stating all nodes at the same time
                info("Sleeping for one minute before launching next node..")
                time.sleep(60)
                launch_instance(region, ip,
                                ami=options['taupage_amis'][region],
                                subnet_id=subnets[i % len(subnets)]['SubnetId'],
                                security_group_id=options['security_groups'][region]['GroupId'],
                                is_seed=False,
                                options=options)
Esempio n. 40
0
def _open_dashboard_in_browser():
    import webbrowser
    # sleep some time to make sure "kubectl proxy" runs
    url = 'http://localhost:8001/api/v1/namespaces/kube-system/services/kubernetes-dashboard/proxy'
    with Action('Waiting for local kubectl proxy..') as act:
        for i in range(20):
            time.sleep(0.1)
            try:
                requests.get(url, timeout=2)
            except:
                act.progress()
            else:
                break
    info('\nOpening {} ..'.format(url))
    webbrowser.open(url)
Esempio n. 41
0
def test_echo():
    action('Action..')
    ok()

    action('Action..')
    error(' some error')

    action('Action..')
    with pytest.raises(SystemExit):
        fatal_error(' some fatal error')  # noqa

    action('Action..')
    warning(' some warning')

    info('Some info')
Esempio n. 42
0
def test_echo():
    action('Action..')
    ok()

    action('Action..')
    error(' some error')

    action('Action..')
    with pytest.raises(SystemExit):
        fatal_error(' some fatal error')  # noqa

    action('Action..')
    warning(' some warning')

    info('Some info')
Esempio n. 43
0
def launch_seed_nodes(options: dict):
    total_seed_count = options['seed_count'] * len(options['regions'])
    seeds_launched = 0
    for region, ips in options['seed_nodes'].items():
        subnets = options['subnets'][region]
        for i, ip in enumerate(ips):
            launch_instance(region, ip,
                            ami=options['taupage_amis'][region],
                            subnet_id=subnets[i % len(subnets)]['SubnetId'],
                            security_group_id=options['security_groups'][region]['GroupId'],
                            is_seed=True,
                            options=options)
            seeds_launched += 1
            if seeds_launched < total_seed_count:
                info("Sleeping for a minute before launching next SEED node..")
                time.sleep(60)
Esempio n. 44
0
def launch_seed_nodes(options: dict):
    total_seed_count = options['seed_count'] * len(options['regions'])
    seeds_launched = 0
    for region, ips in options['seed_nodes'].items():
        subnets = options['subnets'][region]
        for i, ip in enumerate(ips):
            launch_instance(region, ip,
                            ami=options['taupage_amis'][region],
                            subnet_id=subnets[i % len(subnets)]['SubnetId'],
                            security_group_id=options['security_groups'][region]['GroupId'],
                            is_seed=True,
                            options=options)
            seeds_launched += 1
            if seeds_launched < total_seed_count:
                info("Sleeping for a minute before launching next SEED node..")
                time.sleep(60)
Esempio n. 45
0
def status(config):
    """Check system status"""
    response = get('/status')
    data = response.json()
    info('Workers:')
    rows = []
    for worker in data.get('workers', []):
        rows.append(worker)
    rows.sort(key=lambda x: x.get('name'))
    print_table(['name', 'check_invocations', 'last_execution_time'], rows)
    info('Queues:')
    rows = []
    for queue in data.get('queues', []):
        rows.append(queue)
    rows.sort(key=lambda x: x.get('name'))
    print_table(['name', 'size'], rows)
Esempio n. 46
0
File: cli.py Progetto: jmirc/senza
def create(definition, region, version, parameter, disable_rollback, dry_run, force):
    '''Create a new Cloud Formation stack from the given Senza definition file'''
    data = create_cf_template(definition, region, version, parameter, force)
    cf = boto3.client('cloudformation', region)

    with Action('Creating Cloud Formation stack {}..'.format(data['StackName'])) as act:
        try:
            if dry_run:
                info('**DRY-RUN** {}'.format(data['NotificationARNs']))
            else:
                cf.create_stack(DisableRollback=disable_rollback, **data)
        except ClientError as e:
            if e.response['Error']['Code'] == 'AlreadyExistsException':
                act.fatal_error('Stack {} already exists. Please choose another version.'.format(data['StackName']))
            else:
                raise
Esempio n. 47
0
def update(definition, region, version, parameter, disable_rollback, dry_run,
           force):
    '''Update an existing Cloud Formation stack from the given Senza definition file'''
    data = create_cf_template(definition, region, version, parameter, force)
    cf = boto3.client('cloudformation', region)

    with Action('Updating Cloud Formation stack {}..'.format(
            data['StackName'])) as act:
        try:
            if dry_run:
                info('**DRY-RUN** {}'.format(data['NotificationARNs']))
            else:
                del (data['Tags'])
                cf.update_stack(**data)
        except ClientError as e:
            act.fatal_error('ClientError: {}'.format(pformat(e.response)))
Esempio n. 48
0
def delete_deployment(config, deployment, execute):
    '''Delete deployment by first scaling down to 0, deleting the deployment
    resource and any replicaset resources owned by the deployment.'''
    cluster_id = config.get('kubernetes_cluster')
    name = deployment['metadata']['name']
    namespace = deployment['metadata']['namespace']

    # scale deployment to 0 before deleting
    _scale_deployment(config, name, namespace, 0, execute)

    # with for deployment to be scaled down to 0
    timeout = DEFAULT_RESOURCE_DELETION_TIMEOUT
    maxtime = time.time() + timeout
    while get_replicas(name, namespace) > 0:
        if time.time() > maxtime:
            error('Timed out after {:d}s waiting for deployment to scale down'.format(timeout))
            return

    # get replicasets owned by the deployment
    replicasets = kubectl_get(namespace, 'replicasets')
    owned_rs = get_owned_replicasets(deployment, replicasets['items'])

    # delete deployment
    info('Deleting deployment {}..'.format(name))
    path = '/kubernetes-clusters/{}/namespaces/{}/deployments/{}'.format(
        cluster_id, namespace, name)
    response = request(config, requests.delete, path)
    change_request_id = response.json()['id']

    if execute:
        approve_and_execute(config, change_request_id)
    else:
        print(change_request_id)

    # delete replicasets
    for rs in owned_rs:
        name = rs['metadata']['name']
        info('Deleting replicaset {}..'.format(name))
        path = '/kubernetes-clusters/{}/namespaces/{}/replicasets/{}'.format(
            cluster_id, namespace, name)
        response = request(config, requests.delete, path)
        change_request_id = response.json()['id']

        if execute:
            approve_and_execute(config, change_request_id)
        else:
            print(change_request_id)
Esempio n. 49
0
def create(definition: str, image_version: str, keep_stacks: int, traffic: int,
           verbose: bool, senza_parameters: list, app_version: Optional[str],
           stack_version: Optional[str], disable_rollback: bool):
    senza_parameters = senza_parameters or []

    config = Configuration()

    access_token = fetch_token(config.token_url, config.scopes,
                               config.credentials_dir)

    lizzy = Lizzy(config.lizzy_url, access_token)

    with Action('Requesting new stack..') as action:
        try:
            stack_id = lizzy.new_stack(image_version, keep_stacks, traffic,
                                       definition, stack_version, app_version,
                                       disable_rollback, senza_parameters)
        except requests.RequestException as e:
            action.fatal_error('Deployment failed: {}.'.format(e))

    info('Stack ID: {}'.format(stack_id))

    with Action('Waiting for new stack...') as action:
        if verbose:
            print(
            )  # ensure that new states will not be printed on the same line as the action

        last_state = None
        for state in lizzy.wait_for_deployment(stack_id):
            if state != last_state and verbose:
                click.echo(' {}'.format(state))
            else:
                action.progress()
            last_state = state

        if last_state == 'CF:ROLLBACK_COMPLETE':
            fatal_error(
                'Stack was rollback after deployment. Check you application log for possible reasons.'
            )
        elif last_state == 'LIZZY:REMOVED':
            fatal_error('Stack was removed before deployment finished.')
        elif last_state != 'CF:CREATE_COMPLETE':
            fatal_error('Deployment failed: {}'.format(last_state))

    info('Deployment Successful')
def validate_artifact_version(options: dict) -> dict:
    conflict_options_msg = """Conflicting options: --artifact-name and
--docker-image cannot be specified at the same time"""
    if not options['docker_image']:
        if not options['artifact_name']:
            options['artifact_name'] = 'planb-cassandra-3.0'
        image_version = get_latest_docker_image_version(
            options['artifact_name'])
        docker_image = 'registry.opensource.zalan.do/stups/{}:{}' \
                       .format(options['artifact_name'], image_version)
        info('Using docker image: {}'.format(docker_image))
    else:
        if options['artifact_name']:
            raise click.UsageError(conflict_options_msg)
        image_version = options['docker_image'].split(':')[-1]
        docker_image = options['docker_image']
    return dict(options,
                docker_image=docker_image,
                image_version=image_version)
Esempio n. 51
0
def saml_login(user, url):
    ring_user = '******'.format(user, url)
    saml_password = keyring.get_password('mai', ring_user)

    saml_xml = None
    while not saml_xml:
        if not saml_password:
            saml_password = click.prompt('Please enter your SAML password', hide_input=True)

        with Action('Authenticating against {url}..', url=url) as act:
            try:
                saml_xml, roles = authenticate(url, user, saml_password)
            except aws_saml_login.saml.AuthenticationFailed:
                act.error('Authentication Failed')
                info('Please check your username/password and try again.')
                saml_password = None

    keyring.set_password('mai', ring_user, saml_password)
    return saml_xml, roles
Esempio n. 52
0
def configure_dns(account_name, cfg):
    dns_domain = cfg.get("domain").format(account_name=account_name)

    # NOTE: hardcoded region as Route53 is region-independent
    conn = boto3.client("route53")
    zone = conn.list_hosted_zones_by_name(DNSName=dns_domain + ".")["HostedZones"]
    if not zone:
        with Action("Creating hosted zone.."):
            conn.create_hosted_zone(
                Name=dns_domain + ".",
                CallerReference="sevenseconds-" + dns_domain,
                HostedZoneConfig={"Comment": "Public Hosted Zone"},
            )
    zone = conn.list_hosted_zones_by_name(DNSName=dns_domain + ".")["HostedZones"][0]
    nameservers = conn.get_hosted_zone(Id=zone["Id"])["DelegationSet"]["NameServers"]
    info("Hosted zone for {} has nameservers {}".format(dns_domain, nameservers))
    with Action("Set up DNS Delegation..") as act:
        try:
            configure_dns_delegation(account_name, nameservers, cfg)
        except:
            act.error("DNS Delegation not possible")
    soa_ttl = cfg.get("domain_soa_ttl", "60")
    with Action("Set SOA-TTL to {}..".format(soa_ttl)):
        rr_list = conn.list_resource_record_sets(
            HostedZoneId=zone["Id"], StartRecordType="SOA", StartRecordName=zone["Name"]
        )
        rr = rr_list["ResourceRecordSets"][0]["ResourceRecords"]
        changebatch = {
            "Comment": "updated SOA TTL",
            "Changes": [
                {
                    "Action": "UPSERT",
                    "ResourceRecordSet": {
                        "Name": zone["Name"],
                        "Type": "SOA",
                        "TTL": int(soa_ttl),
                        "ResourceRecords": rr,
                    },
                }
            ],
        }
        conn.change_resource_record_sets(HostedZoneId=zone["Id"], ChangeBatch=changebatch)
    return dns_domain
Esempio n. 53
0
def find_taupage_amis(regions: list) -> dict:
    '''
    Find latest Taupage AMI for each region
    '''
    result = {}
    for region in regions:
        with Action('Finding latest Taupage AMI in {}..'.format(region)):
            ec2 = boto3.resource('ec2', region)
            filters = [{'Name': 'name', 'Values': ['*Taupage-AMI-*']},
                       {'Name': 'is-public', 'Values': ['false']},
                       {'Name': 'state', 'Values': ['available']},
                       {'Name': 'root-device-type', 'Values': ['ebs']}]
            images = list(ec2.images.filter(Filters=filters))
            if not images:
                raise Exception('No Taupage AMI found')
            most_recent_image = sorted(images, key=lambda i: i.name)[-1]
            result[region] = most_recent_image
        info(most_recent_image.name)
    return result
Esempio n. 54
0
def create(definition: str, image_version: str, keep_stacks: int,
           traffic: int, verbose: bool, senza_parameters: list,
           app_version: Optional[str], stack_version: Optional[str],
           disable_rollback: bool):
    senza_parameters = senza_parameters or []

    config = Configuration()

    access_token = fetch_token(config.token_url, config.scopes, config.credentials_dir)

    lizzy = Lizzy(config.lizzy_url, access_token)

    with Action('Requesting new stack..') as action:
        try:
            stack_id = lizzy.new_stack(image_version, keep_stacks, traffic,
                                       definition, stack_version, app_version,
                                       disable_rollback, senza_parameters)
        except requests.RequestException as e:
            action.fatal_error('Deployment failed: {}.'.format(e))

    info('Stack ID: {}'.format(stack_id))

    with Action('Waiting for new stack...') as action:
        if verbose:
            print()  # ensure that new states will not be printed on the same line as the action

        last_state = None
        for state in lizzy.wait_for_deployment(stack_id):
            if state != last_state and verbose:
                click.echo(' {}'.format(state))
            else:
                action.progress()
            last_state = state

        if last_state == 'CF:ROLLBACK_COMPLETE':
            fatal_error('Stack was rollback after deployment. Check you application log for possible reasons.')
        elif last_state == 'LIZZY:REMOVED':
            fatal_error('Stack was removed before deployment finished.')
        elif last_state != 'CF:CREATE_COMPLETE':
            fatal_error('Deployment failed: {}'.format(last_state))

    info('Deployment Successful')
Esempio n. 55
0
def print_success_message(options: dict):
    info('Cluster initialization completed successfully!')
    sys.stdout.write('''
The Cassandra cluster {cluster_name} was created with {cluster_size} nodes
in each of the following AWS regions: {regions_list}

You can now login to any of the cluster nodes with the superuser
account using the following command:

$ cqlsh -u cassandra -p '{admin_password}'

From there you can create non-superuser roles and otherwise configure
the cluster.

You might also need to update the Security Groups named {cluster_name}
(in all regions!) to allow access to Cassandra from your application (port 9042)
and optionally to allow access to Jolokia (port 8778) and/or
Prometheus Node Exporter (port 9100) from your monitoring tool.
'''.format(**options, regions_list=' '.join(options['regions']),
           admin_password=options['user_data']['environment']['ADMIN_PASSWORD']))
Esempio n. 56
0
def update_stack_from_template(region: str, template: dict, dry_run: bool):
    """
    Updates a stack from a generated template
    """
    cf = BotoClientProxy('cloudformation', region)
    del (template['Tags'])
    with Action('Updating Cloud Formation stack '
                '{StackName}..'.format_map(template)) as act:
        try:
            if dry_run:
                info('**DRY-RUN** {}'.format(template['NotificationARNs']))
            else:
                cf.update_stack(**template)
        except ClientError as err:
            response = err.response
            error_info = response['Error']
            error_message = error_info['Message']
            if error_message == 'No updates are to be performed.':
                act.ok('NO UPDATE')
            else:
                act.fatal_error('ClientError: {}'.format(pformat(response)))
Esempio n. 57
0
def configure_cloudtrail(account_name, region, cfg, dry_run):
    if "cloudtrail" not in cfg:
        info("Found no Cloudtrail Section in Configfile. Skipping CloudTrail configuration")
        return
    cloudtrail = boto3.client("cloudtrail", region)
    trails = cloudtrail.describe_trails()["trailList"]
    name = "Default"
    trail = find_trail(trails, name)
    kwargs = dict(
        Name=name,
        S3BucketName=cfg["cloudtrail"]["s3_bucket_name"],
        S3KeyPrefix=cfg["cloudtrail"]["s3_key_prefix"],
        IncludeGlobalServiceEvents=True,
    )
    if trail:
        with Action("[{}] Check CloudTrail..".format(region)) as act:
            if not dry_run:
                if (
                    trail["IncludeGlobalServiceEvents"] != kwargs["IncludeGlobalServiceEvents"]
                    or trail["S3KeyPrefix"] != kwargs["S3KeyPrefix"]
                    or trail["S3BucketName"] != kwargs["S3BucketName"]
                ):
                    act.error("wrong configuration")
                    cloudtrail.update_trail(**kwargs)
                status = cloudtrail.get_trail_status(Name=name)
                if not status["IsLogging"]:
                    act.error("was not active")
                    cloudtrail.start_logging(Name=name)
    else:
        if trails:
            for trail in trails:
                delname = trail.get("Name")
                with Action("[{}] Deleting invalid trail {}..".format(region, delname)):
                    if not dry_run:
                        cloudtrail.stop_logging(Name=delname)
                        cloudtrail.delete_trail(Name=delname)
        with Action("[{}] Enabling CloudTrail..".format(region)):
            if not dry_run:
                cloudtrail.create_trail(**kwargs)
                cloudtrail.start_logging(Name=name)
Esempio n. 58
0
File: cli.py Progetto: mpare-net/mai
def saml_login(user, url):
    ring_user = "******".format(user, url)
    saml_password = keyring.get_password("mai", ring_user)

    saml_xml = None
    while not saml_xml:
        if not saml_password:
            saml_password = click.prompt("Please enter your SAML password", hide_input=True)

        saml_otp = click.prompt("Please enter your One-Time-Password (eg. Google Authenticate or Yubikey)")

        with Action("Authenticating against {url}..", url=url) as act:
            try:
                saml_xml, roles = authenticate(url, user, saml_password, saml_otp)
            except aws_saml_login.saml.AuthenticationFailed:
                act.error("Authentication Failed")
                info("Please check your username/password/token and try again.")
                saml_password = None
                saml_otp = None

    keyring.set_password("mai", ring_user, saml_password)
    return saml_xml, roles
Esempio n. 59
0
def all_stacks_in_final_state(related_stacks_refs: list, region: str, timeout: Optional[int], interval: int):
    ''' Wait and check if all related stacks are in a final state before performing code block
    changes. If there is no timeout, we don't wait anything and just execute the traffic change.

    :param related_stacks_refs: Related stacks to wait
    :param region: region where stacks are present
    :param timeout: optional value of how long we should wait for the stack should be `None`
    :param interval: interval between checks using AWS CF API
    '''
    if timeout is None or timeout < 1:
        yield
    else:
        wait_timeout = datetime.datetime.utcnow() + datetime.timedelta(seconds=timeout)

        all_in_final_state = False
        while not all_in_final_state and wait_timeout > datetime.datetime.utcnow():
            # assume all stacks are ready
            all_in_final_state = True
            related_stacks = list(get_stacks(related_stacks_refs, region))

            if not related_stacks:
                error("Stack not found!")
                exit(1)

            for related_stack in related_stacks:
                current_stack_status = related_stack.StackStatus
                if current_stack_status.endswith('_IN_PROGRESS'):
                    # some operation in progress, let's wait some time to try again
                    all_in_final_state = False
                    info(
                        "Waiting for stack {} ({}) to perform requested operation..".format(
                            related_stack.StackName, current_stack_status))
                    time.sleep(interval)

        if datetime.datetime.utcnow() > wait_timeout:
            info("Timeout reached, requested operation not executed.")
            exit(1)
        else:
            yield
Esempio n. 60
0
def create(definition: dict, version: str, parameter: tuple,
           region: str,
           disable_rollback: bool,
           dry_run: bool,
           force: bool,
           tag: List[str],
           timeout: int,
           keep_stacks: Optional[int],
           traffic: int,
           verbose: bool,
           remote: str,
           parameter_file: Optional[str]
           ):
    """
    Create a new Cloud Formation stack from the given Senza definition file
    """
    lizzy = setup_lizzy_client(remote)
    parameter = list(parameter) or []
    if parameter_file:
        parameter.extend(read_parameter_file(parameter_file))

    if not force:  # pragma: no cover
        # supporting artifact checking would imply copying a large amount of code
        # from senza, so it should be considered out of scope until senza
        # and lizzy client are merged
        warning("WARNING: "
                "Artifact checking is still not supported by lizzy-client.")

    with Action('Requesting new stack..') as action:
        new_stack, output = lizzy.new_stack(keep_stacks, traffic,
                                            definition, version,
                                            disable_rollback, parameter,
                                            region=region,
                                            dry_run=dry_run,
                                            tags=tag)

    stack_id = '{stack_name}-{version}'.format_map(new_stack)
    print(output)

    info('Stack ID: {}'.format(stack_id))

    if dry_run:
        info("Post deployment steps skipped")
        exit(0)

    with Action('Waiting for new stack...') as action:
        if verbose:
            print()  # ensure that new states will not be printed on the same line as the action

        last_state = None
        for state in lizzy.wait_for_deployment(stack_id, region=region):
            if state != last_state and verbose:
                click.echo(' {}'.format(state))
            else:
                action.progress()
            last_state = state

        # TODO be prepared to handle all final AWS CF states
        if last_state == 'ROLLBACK_COMPLETE':
            fatal_error(
                'Stack was rollback after deployment. Check your application log for possible reasons.')
        elif last_state != 'CREATE_COMPLETE':
            fatal_error('Deployment failed: {}'.format(last_state))

    info('Deployment Successful')

    if traffic is not None:
        with Action('Requesting traffic change..'):
            try:
                lizzy.traffic(stack_id, traffic, region=region)
            except requests.ConnectionError as e:
                connection_error(e, fatal=False)
            except requests.HTTPError as e:
                agent_error(e, fatal=False)

    # TODO unit test this
    if keep_stacks is not None:
        versions_to_keep = keep_stacks + 1
        stacks_to_remove_counter = 1
        end_time = datetime.datetime.utcnow() + datetime.timedelta(seconds=timeout)
        while stacks_to_remove_counter > 0 and datetime.datetime.utcnow() <= end_time:
            try:
                all_stacks = lizzy.get_stacks([new_stack['stack_name']],
                                              region=region)
            except requests.ConnectionError as e:
                connection_error(e, fatal=False)
                error("Failed to fetch old stacks. "
                      "Old stacks WILL NOT BE DELETED")
                exit(1)
            except requests.HTTPError as e:
                agent_error(e, fatal=False)
                error("Failed to fetch old stacks. "
                      "Old stacks WILL NOT BE DELETED")
                exit(1)
            else:
                sorted_stacks = sorted(all_stacks,
                                       key=lambda stack: stack['creation_time'])
                stacks_to_remove = sorted_stacks[:-versions_to_keep]
                stacks_to_remove_counter = len(stacks_to_remove)
                with Action('Deleting old stacks..'):
                    print()
                    for old_stack in stacks_to_remove:
                        old_stack_id = '{stack_name}-{version}'.format_map(
                            old_stack)
                        if old_stack['status'] in COMPLETE_STATES:
                            click.echo(' {}'.format(old_stack_id))
                            try:
                                lizzy.delete(old_stack_id, region=region)
                                stacks_to_remove_counter -= 1
                            except requests.ConnectionError as e:
                                connection_error(e, fatal=False)
                            except requests.HTTPError as e:
                                agent_error(e, fatal=False)
                        else:
                            click.echo(' > {} current status is {} trying '
                                       'again later'.format(old_stack_id,
                                                            old_stack['status']))
                if stacks_to_remove_counter > 0:
                    time.sleep(5)

        if datetime.datetime.utcnow() > end_time:
            click.echo('Timeout waiting for related stacks to be ready.')