Example #1
0
def main():
    options = check_environment()
    aw = AzureWrapper(options.location, options.subscription_id,
                      options.client_id, options.client_secret,
                      options.tenant_id)
    dcos_resource_group = DcosAzureResourceGroup.deploy_acs_template(
        azure_wrapper=aw,
        template_url=options.template_url,
        group_name=options.name,
        public_key=options.public_ssh_key,
        master_prefix=options.master_prefix,
        agent_prefix=options.agent_prefix,
        admin_name=options.linux_user,
        oauth_enabled=options.oauth_enabled,
        vm_size=options.vm_size,
        agent_count=options.num_agents,
        name_suffix=options.name_suffix,
        vm_diagnostics_enabled=options.vm_diagnostics_enabled)
    result = 1
    dcos_resource_group.wait_for_deployment()
    dcos_dns = dcos_resource_group.public_master_lb_fqdn
    master_list = [
        ip.private_ip for ip in dcos_resource_group.get_master_ips()
    ]
    with tunnel(options.linux_user,
                load_string(options.ssh_key_path),
                dcos_dns,
                port=2200) as t:
        result = integration_test(
            tunnel=t,
            dcos_dns=master_list[0],
            master_list=master_list,
            agent_list=[
                ip.private_ip
                for ip in dcos_resource_group.get_private_agent_ips()
            ],
            public_agent_list=[
                ip.private_ip
                for ip in dcos_resource_group.get_public_agent_ips()
            ],
            test_cmd=options.test_cmd)
    if result == 0:
        log.info('Test successsful! Deleting Azure resource group')
        dcos_resource_group.delete()
    else:
        logging.warning(
            'Test exited with an error; Resource group preserved for troubleshooting.'
            'See https://github.com/mesosphere/cloudcleaner project for cleanup policies'
        )
    if options.ci_flags:
        result = 0  # Wipe the return code so that tests can be muted in CI
    sys.exit(result)
Example #2
0
def main():
    options = check_environment()
    aw = AzureWrapper(
        options.location,
        options.subscription_id,
        options.client_id,
        options.client_secret,
        options.tenant_id)
    dcos_resource_group = DcosAzureResourceGroup.deploy_acs_template(
        azure_wrapper=aw,
        template_url=options.template_url,
        group_name=options.name,
        public_key=options.public_ssh_key,
        master_prefix=options.master_prefix,
        agent_prefix=options.agent_prefix,
        admin_name=options.linux_user,
        oauth_enabled=options.oauth_enabled,
        vm_size=options.vm_size,
        agent_count=options.num_agents,
        name_suffix=options.name_suffix,
        vm_diagnostics_enabled=options.vm_diagnostics_enabled)
    result = 1
    dcos_resource_group.wait_for_deployment()
    dcos_dns = dcos_resource_group.public_master_lb_fqdn
    master_list = [ip.private_ip for ip in dcos_resource_group.get_master_ips()]
    with tunnel(options.linux_user, load_string(options.ssh_key_path),
                dcos_dns, port=2200) as t:
        result = integration_test(
            tunnel=t,
            dcos_dns=dcos_dns,
            master_list=master_list,
            agent_list=[ip.private_ip for ip in dcos_resource_group.get_private_agent_ips()],
            public_agent_list=[ip.private_ip for ip in dcos_resource_group.get_public_agent_ips()],
            test_cmd=options.test_cmd)
    if result == 0:
        log.info('Test successsful! Deleting Azure resource group')
        dcos_resource_group.delete()
    else:
        logging.warning('Test exited with an error; Resource group preserved for troubleshooting.'
                        'See https://github.com/mesosphere/cloudcleaner project for cleanup policies')
    if options.ci_flags:
        result = 0  # Wipe the return code so that tests can be muted in CI
    sys.exit(result)
Example #3
0
def main():
    options = check_environment()
    aw = AzureWrapper(options.location, options.subscription_id,
                      options.client_id, options.client_secret,
                      options.tenant_id)
    dcos_resource_group = DcosAzureResourceGroup.deploy_acs_template(
        azure_wrapper=aw,
        template_url=options.template_url,
        group_name=options.name,
        public_key=options.public_ssh_key,
        master_prefix=options.master_prefix,
        agent_prefix=options.agent_prefix,
        admin_name=options.linux_user,
        oauth_enabled=options.oauth_enabled,
        vm_size=options.vm_size,
        agent_count=options.num_agents,
        name_suffix=options.name_suffix,
        vm_diagnostics_enabled=options.vm_diagnostics_enabled)
    result = 1
    with ExitStack() as stack:
        if options.azure_cleanup:
            stack.push(dcos_resource_group)
        dcos_resource_group.wait_for_deployment()
        t = stack.enter_context(
            tunnel(options.linux_user,
                   load_string(options.ssh_key_path),
                   dcos_resource_group.outputs['masterFQDN'],
                   port=2200))
        result = integration_test(
            tunnel=t,
            dcos_dns=dcos_resource_group.get_master_ips()[0],
            master_list=dcos_resource_group.get_master_ips(),
            agent_list=dcos_resource_group.get_private_ips(),
            public_agent_list=dcos_resource_group.get_public_ips(),
            test_cmd=options.test_cmd)
    if result == 0:
        log.info('Test successsful!')
    else:
        logging.warning('Test exited with an error')
    if options.ci_flags:
        result = 0  # Wipe the return code so that tests can be muted in CI
    sys.exit(result)
Example #4
0
def main():
    validate_env()
    location = os.getenv('AZURE_LOCATION', 'East US')
    credentials = azure.common.credentials.ServicePrincipalCredentials(
        client_id=os.environ['AZURE_CLIENT_ID'],
        secret=os.environ['AZURE_CLIENT_SECRET'],
        tenant=os.environ['AZURE_TENANT_ID'])
    subscription_id = os.environ['AZURE_SUBSCRIPTION_ID']
    template = TemplateLink(uri=os.environ['AZURE_TEMPLATE_URL'])
    group_name = 'testing' + ''.join(random.choice('01234567890abcdef') for n in range(10))
    deployment_name = 'deployment{}'.format(uuid.uuid4().hex)

    rmc = ResourceManagementClient(credentials, subscription_id)

    template_parameters = get_env_params()

    # Output resource group
    print("Resource group name: {}".format(group_name))
    print("Deployment name: {}".format(deployment_name))

    azure_cluster = {'resource_group_name': group_name, 'deployment_name': deployment_name}

    pkgpanda.util.write_json('azure-cluster.json', azure_cluster)

    # Create a new resource group
    print("Creating new resource group in location: {}".format(location))
    if rmc.resource_groups.check_existence(group_name):
        print("ERROR: Group name already exists / taken: {}".format(group_name))

    rmc.resource_groups.create_or_update(group_name, ResourceGroup(location=location))

    try:
        deployment_properties = DeploymentProperties(
            template_link=template,
            mode=DeploymentMode.incremental,
            parameters=template_parameters)

        # Use RPC against azure to validate the ARM template is well-formed
        result = rmc.deployments.validate(group_name, deployment_name, properties=deployment_properties)
        if result.error:
            print("Template verification failed\n{}".format(result.error), file=sys.stderr)
            sys.exit(1)

        # Actually create a template deployment
        print("Creating template deployment ...")
        deploy_poller = rmc.deployments.create_or_update(group_name, deployment_name, deployment_properties)

        def retry_if_assertion_error(exception):
            """Return True if we should retry (in this case when it's an AssertionError), False otherwise"""
            return isinstance(exception, AssertionError)

        @retry(retry_on_exception=retry_if_assertion_error, stop_max_attempt_number=45)
        def poll_deploy():
            res = deploy_poller.result(timeout=60)
            print("Current deploy state: {}".format(res.properties.provisioning_state))
            assert deploy_poller.done(), "Not done deploying."

        print("Waiting for template to deploy.")
        try:
            poll_deploy()
        except:
            print("Current deploy status:\n{}".format(deploy_poller.result(0)))
            raise
        print("Template deployed successfully")

        assert deploy_poller.done(), "Deployment failed / polling didn't reach deployment done."
        deployment_result = deploy_poller.result()
        print(deployment_result.properties.outputs)
        master_lb = deployment_result.properties.outputs['masterFQDN']['value']

        print("Template deployed using SSH private key: https://mesosphere.onelogin.com/notes/18444")
        print("For troubleshooting, master0 can be reached using: ssh -p 2200 {}@{}".format(
            get_value('linuxAdminUsername'), master_lb))

        # Run test now, so grab IPs
        nmc = NetworkManagementClient(credentials, subscription_id)
        ip_buckets = {
            'master': [],
            'private': [],
            'public': []}

        for resource in rmc.resource_groups.list_resources(
            group_name, filter=("resourceType eq 'Microsoft.Network/networkInterfaces' or "
                                "resourceType eq 'Microsoft.Compute/virtualMachineScaleSets'")):
            if resource.type == 'Microsoft.Network/networkInterfaces':
                nics = [nmc.network_interfaces.get(group_name, resource.name)]
            elif resource.type == 'Microsoft.Compute/virtualMachineScaleSets':
                nics = list(nmc.network_interfaces.list_virtual_machine_scale_set_network_interfaces(
                    virtual_machine_scale_set_name=resource.name, resource_group_name=group_name))
            else:
                raise ('Unexpected resourceType: {}'.format(resource.type))

            for bucket_name in ip_buckets.keys():
                if bucket_name in resource.name:
                    for n in nics:
                        for config in n.ip_configurations:
                            ip_buckets[bucket_name].append(config.private_ip_address)

        print('Detected IP configuration: {}'.format(ip_buckets))

        with tunnel(get_value('linuxAdminUsername'), pkgpanda.util.load_string('ssh_key'), master_lb, port=2200) as t:
            integration_test(
                tunnel=t,
                dcos_dns=ip_buckets['master'][0],
                master_list=ip_buckets['master'],
                agent_list=ip_buckets['private'],
                public_agent_list=ip_buckets['public'],
                test_cmd=os.getenv(
                    'DCOS_PYTEST_CMD', get_add_env() + " py.test -vv -rs -m 'not ccm' ") + os.getenv('CI_FLAGS', ''))
        test_successful = True
    except Exception as ex:
        traceback.print_exc()
        print("ERROR: exception {}".format(ex))
        raise
    finally:
        if os.getenv('AZURE_CLEANUP') == 'false':
            print("Cluster must be cleaned up manually")
            print("Cluster details: {}".format(azure_cluster))
        else:
            # TODO(cmaloney): The old code had a retry around this:
            # @retry(wait_exponential_multiplier=1000, wait_exponential_max=60*1000, stop_max_delay=(30*60*1000))
            poller = rmc.resource_groups.delete(group_name)

            # poll for the delete to complete
            print("Deleting resource group: {} ...".format(group_name))

            @retry(wait_fixed=(5 * 1000), stop_max_delay=(60 * 60 * 1000))
            def wait_for_delete():
                assert poller.done(), "Timed out waiting for delete"

            print("Waiting for delete ...")
            wait_for_delete()

            print("Clean up successful")

    if test_successful:
        print("Azure test deployment succeeded")
    else:
        print("ERROR: Azure test deployment failed", file=sys.stderr)
        sys.exit(2)
Example #5
0
def main():
    validate_env()
    location = os.getenv('AZURE_LOCATION', 'East US')
    credentials = azure.common.credentials.ServicePrincipalCredentials(
        client_id=os.environ['AZURE_CLIENT_ID'],
        secret=os.environ['AZURE_CLIENT_SECRET'],
        tenant=os.environ['AZURE_TENANT_ID'])
    subscription_id = os.environ['AZURE_SUBSCRIPTION_ID']
    template = TemplateLink(uri=os.environ['AZURE_TEMPLATE_URL'])
    # tenant_id = os.environ.get('AZURE_TENANT_ID')
    # client_id = os.environ.get('AZURE_CLIENT_ID')
    # client_secret = os.environ.get('AZURE_CLIENT_SECRET')
    group_name = 'testing' + ''.join(random.choice('01234567890abcdef') for n in range(10))
    deployment_name = 'deployment{}'.format(uuid.uuid4().hex)

    rmc = ResourceManagementClient(credentials, subscription_id)

    template_parameters = get_env_params()

    # Output resource group
    print("Resource group name: {}".format(group_name))
    print("Deployment name: {}".format(deployment_name))

    azure_cluster = {
        'resource_group_name': group_name,
        'deployment_name': deployment_name}
    pkgpanda.util.write_json('azure-cluster.json', azure_cluster)

    # Create a new resource group
    print("Creating new resource group in location: {}".format(location))
    if rmc.resource_groups.check_existence(group_name):
        print("ERROR: Group name already exists / taken: {}".format(group_name))
    rmc.resource_groups.create_or_update(
        group_name,
        ResourceGroup(location=location))

    test_successful = False

    try:
        deployment_properties = DeploymentProperties(
            template_link=template,
            mode=DeploymentMode.incremental,
            parameters=template_parameters)

        # Use RPC against azure to validate the ARM template is well-formed
        result = rmc.deployments.validate(group_name, deployment_name, properties=deployment_properties)
        if result.error:
            print("Template verification failed\n{}".format(result.error), file=sys.stderr)
            sys.exit(1)

        # Actually create a template deployment
        print("Creating template deployment ...")
        deploy_poller = rmc.deployments.create_or_update(group_name, deployment_name, deployment_properties)

        # Stop after 45 attempts (each one takes up to one minute)
        @retry(stop_max_attempt_number=45)
        def poll_deploy():
            res = deploy_poller.result(timeout=60)
            print("Current deploy state: {}".format(res.properties.provisioning_state))
            assert deploy_poller.done(), "Not done deploying."

        print("Waiting for template to deploy ...")
        try:
            poll_deploy()
        except:
            print("Current deploy status:\n{}".format(deploy_poller.result(0)))
            raise
        print("Template deployed successfully")

        assert deploy_poller.done(), "Deployment failed / polling didn't reach deployment done."
        deployment_result = deploy_poller.result()
        print(deployment_result.properties.outputs)
        master_lb = deployment_result.properties.outputs['masterFQDN']['value']

        print("Template deployed using SSH private key: https://mesosphere.onelogin.com/notes/18444")
        print("For troubleshooting, master0 can be reached using: ssh -p 2200 {}@{}".format(
            get_value('linuxAdminUsername'), master_lb))

        # Run test now, so grab IPs
        nmc = NetworkManagementClient(credentials, subscription_id)
        ip_buckets = {
            'master': [],
            'private': [],
            'public': []}

        for resource in rmc.resource_groups.list_resources(
                group_name, filter=("resourceType eq 'Microsoft.Network/networkInterfaces' or "
                                    "resourceType eq 'Microsoft.Compute/virtualMachineScaleSets'")):
            if resource.type == 'Microsoft.Network/networkInterfaces':
                nics = [nmc.network_interfaces.get(group_name, resource.name)]
            elif resource.type == 'Microsoft.Compute/virtualMachineScaleSets':
                nics = list(nmc.network_interfaces.list_virtual_machine_scale_set_network_interfaces(
                            virtual_machine_scale_set_name=resource.name, resource_group_name=group_name))
            else:
                raise('Unexpected resourceType: {}'.format(resource.type))

            for bucket_name in ip_buckets.keys():
                if bucket_name in resource.name:
                    for n in nics:
                        for config in n.ip_configurations:
                            ip_buckets[bucket_name].append(config.private_ip_address)

        print('Detected IP configuration: {}'.format(ip_buckets))

        with tunnel(get_value('linuxAdminUsername'), pkgpanda.util.load_string('ssh_key'), master_lb, port=2200) as t:
            integration_test(
                tunnel=t,
                dcos_dns=ip_buckets['master'][0],
                master_list=ip_buckets['master'],
                agent_list=ip_buckets['private'],
                public_agent_list=ip_buckets['public'],
                test_cmd=os.getenv(
                    'DCOS_PYTEST_CMD', get_add_env() + " py.test -vv -s -rs -m 'not ccm' ") + os.getenv('CI_FLAGS', ''))
        test_successful = True
    except Exception as ex:
        traceback.print_exc()
        print("ERROR: exception {}".format(ex))
        raise
    finally:
        if os.getenv('AZURE_CLEANUP') == 'false':
            print("Cluster must be cleaned up manually")
            print("Cluster details: {}".format(azure_cluster))
        else:
            # Send a delete request
            # TODO(cmaloney): The old code had a retry around this:
            # @retry(wait_exponential_multiplier=1000, wait_exponential_max=60*1000, stop_max_delay=(30*60*1000))
            poller = rmc.resource_groups.delete(group_name)

            # poll for the delete to complete
            print("Deleting resource group: {} ...".format(group_name))

            @retry(wait_fixed=(5 * 1000), stop_max_delay=(60 * 60 * 1000))
            def wait_for_delete():
                assert poller.done(), "Timed out waiting for delete"

            print("Waiting for delete ...")
            wait_for_delete()

            print("Clean up successful")

    if test_successful:
        print("Azure test deployment succeeded")
    else:
        print("ERROR: Azure test deployment failed", file=sys.stderr)
        sys.exit(2)