def create_instance(name, config, region, key_name, ssh_key, instance_data,
                    deploypass, loaned_to, loan_bug, create_ami,
                    ignore_subnet_check, max_attempts):
    """Creates an AMI instance with the given name and config. The config must
    specify things like ami id."""
    conn = get_aws_connection(region)
    # Make sure we don't request the same things twice
    token = str(uuid.uuid4())[:16]

    instance_data = instance_data.copy()
    instance_data['name'] = name
    instance_data['domain'] = config['domain']
    instance_data['hostname'] = '{name}.{domain}'.format(
        name=name, domain=config['domain'])

    ami = conn.get_all_images(image_ids=[config["ami"]])[0]
    bdm = None
    if 'device_map' in config:
        bdm = BlockDeviceMapping()
        for device, device_info in config['device_map'].items():
            bd = BlockDeviceType()
            if device_info.get('size'):
                bd.size = device_info['size']
            # Overwrite root device size for HVM instances, since they cannot
            # be resized online
            if ami.virtualization_type == "hvm" and \
                    ami.root_device_name == device:
                bd.size = ami.block_device_mapping[ami.root_device_name].size
            if device_info.get("delete_on_termination") is not False:
                bd.delete_on_termination = True
            if device_info.get("ephemeral_name"):
                bd.ephemeral_name = device_info["ephemeral_name"]
            if device_info.get("volume_type"):
                bd.volume_type = device_info["volume_type"]
                if device_info["volume_type"] == "io1" \
                        and device_info.get("iops"):
                    bd.iops = device_info["iops"]

            bdm[device] = bd

    interfaces = make_instance_interfaces(
        region, instance_data['hostname'], ignore_subnet_check,
        config.get('subnet_ids'), config.get('security_group_ids', []),
        config.get("use_public_ip"))

    keep_going, attempt = True, 1
    while keep_going:
        try:
            puppet_master = pick_puppet_master(instance_data.get('puppet_masters'))
            user_data = user_data_from_template(config['type'], {
                "puppet_server": puppet_master,
                "fqdn": instance_data['hostname'],
                "hostname": instance_data['name'],
                "domain": instance_data['domain'],
                "dns_search_domain": config.get('dns_search_domain'),
                "password": deploypass,
                "moz_instance_type": config['type'],
                "region_dns_atom": get_region_dns_atom(region)})

            reservation = conn.run_instances(
                image_id=config['ami'],
                key_name=key_name,
                instance_type=config['instance_type'],
                block_device_map=bdm,
                client_token=token,
                disable_api_termination=config.get('disable_api_termination'),
                user_data=user_data,
                instance_profile_name=config.get('instance_profile_name'),
                network_interfaces=interfaces,
            )
            break
        except boto.exception.BotoServerError:
            log.exception("Cannot start an instance")
        time.sleep(10)
        if max_attempts:
            attempt += 1
            keep_going = max_attempts >= attempt

    instance = reservation.instances[0]
    log.info("instance %s created, waiting to come up", instance)
    # Wait for the instance to come up
    wait_for_status(instance, "state", "running", "update")
    instance.add_tag('Name', name)
    instance.add_tag('FQDN', instance_data['hostname'])
    instance.add_tag('created', time.strftime("%Y-%m-%d %H:%M:%S %Z",
                                              time.gmtime()))
    instance.add_tag('moz-type', config['type'])
    if loaned_to:
        instance.add_tag("moz-loaned-to", loaned_to)
    if loan_bug:
        instance.add_tag("moz-bug", loan_bug)

    log.info("assimilating %s", instance)
    instance.add_tag('moz-state', 'pending')

    keep_going, attempt = True, 1
    while keep_going:
        try:
            # Don't reboot if need to create ami
            reboot = not create_ami
            assimilate_instance(instance=instance, config=config,
                                ssh_key=ssh_key, instance_data=instance_data,
                                deploypass=deploypass, reboot=reboot)
            break
        except NetworkError as e:
            # it takes a while for the machine to start/reboot so the
            # NetworkError exception is quite common, just log the error,
            # without the full stack trace
            log.warn("cannot connect; instance may still be starting  %s (%s, %s) - %s,"
                     "retrying in %d sec ...", instance_data['hostname'], instance.id,
                     instance.private_ip_address, e, FAILURE_TIMEOUT)
            time.sleep(FAILURE_TIMEOUT)

        except:
            # any other exception
            log.warn("problem assimilating %s (%s, %s), retrying in "
                     "%d sec ...", instance_data['hostname'], instance.id,
                     instance.private_ip_address, FAILURE_TIMEOUT, exc_info=True)
            time.sleep(FAILURE_TIMEOUT)
        if max_attempts:
            attempt += 1
            keep_going = max_attempts >= attempt

    instance.add_tag('moz-state', 'ready')
    if create_ami:
        ami_name = "spot-%s-%s" % (
            config['type'], time.strftime("%Y-%m-%d-%H-%M", time.gmtime()))
        log.info("Generating AMI %s", ami_name)
        ami_cleanup(mount_point="/", distro=config["distro"])
        root_bd = instance.block_device_mapping[instance.root_device_name]
        volume = instance.connection.get_all_volumes(
            volume_ids=[root_bd.volume_id])[0]
        # The instance has to be stopped to flush EBS caches
        instance.stop()
        wait_for_status(instance, 'state', 'stopped', 'update')
        ami = volume_to_ami(volume=volume, ami_name=ami_name,
                            arch=instance.architecture,
                            virtualization_type=instance.virtualization_type,
                            kernel_id=instance.kernel,
                            root_device_name=instance.root_device_name,
                            tags=config["tags"])
        log.info("AMI %s (%s) is ready", ami_name, ami.id)
        log.warn("Terminating %s", instance)
        instance.terminate()
def create_instance(name, config, region, key_name, ssh_key, instance_data,
                    deploypass, loaned_to, loan_bug, create_ami,
                    ignore_subnet_check, max_attempts):
    """Creates an AMI instance with the given name and config. The config must
    specify things like ami id."""
    conn = get_aws_connection(region)
    # Make sure we don't request the same things twice
    token = str(uuid.uuid4())[:16]

    instance_data = instance_data.copy()
    instance_data['name'] = name
    instance_data['domain'] = config['domain']
    instance_data['hostname'] = '{name}.{domain}'.format(
        name=name, domain=config['domain'])

    ami = conn.get_all_images(image_ids=[config["ami"]])[0]
    bdm = None
    if 'device_map' in config:
        bdm = BlockDeviceMapping()
        for device, device_info in config['device_map'].items():
            bd = BlockDeviceType()
            if device_info.get('size'):
                bd.size = device_info['size']
            # Overwrite root device size for HVM instances, since they cannot
            # be resized online
            if ami.virtualization_type == "hvm" and \
                    ami.root_device_name == device:
                bd.size = ami.block_device_mapping[ami.root_device_name].size
            if device_info.get("delete_on_termination") is not False:
                bd.delete_on_termination = True
            if device_info.get("ephemeral_name"):
                bd.ephemeral_name = device_info["ephemeral_name"]
            if device_info.get("volume_type"):
                bd.volume_type = device_info["volume_type"]
                if device_info["volume_type"] == "io1" \
                        and device_info.get("iops"):
                    bd.iops = device_info["iops"]

            bdm[device] = bd

    interfaces = make_instance_interfaces(region, instance_data['hostname'],
                                          ignore_subnet_check,
                                          config.get('subnet_ids'),
                                          config.get('security_group_ids', []),
                                          config.get("use_public_ip"))

    keep_going, attempt = True, 1
    while keep_going:
        try:
            puppet_master = pick_puppet_master(
                instance_data.get('puppet_masters'))
            user_data = user_data_from_template(
                config['type'], {
                    "puppet_server": puppet_master,
                    "fqdn": instance_data['hostname'],
                    "hostname": instance_data['name'],
                    "domain": instance_data['domain'],
                    "dns_search_domain": config.get('dns_search_domain'),
                    "password": deploypass,
                    "moz_instance_type": config['type'],
                    "region_dns_atom": get_region_dns_atom(region)
                })

            reservation = conn.run_instances(
                image_id=config['ami'],
                key_name=key_name,
                instance_type=config['instance_type'],
                block_device_map=bdm,
                client_token=token,
                disable_api_termination=config.get('disable_api_termination'),
                user_data=user_data,
                instance_profile_name=config.get('instance_profile_name'),
                network_interfaces=interfaces,
            )
            break
        except boto.exception.BotoServerError:
            log.exception("Cannot start an instance")
        time.sleep(10)
        if max_attempts:
            attempt += 1
            keep_going = max_attempts >= attempt

    instance = reservation.instances[0]
    log.info("instance %s created, waiting to come up", instance)
    # Wait for the instance to come up
    wait_for_status(instance, "state", "running", "update")
    instance.add_tag('Name', name)
    instance.add_tag('FQDN', instance_data['hostname'])
    instance.add_tag('created',
                     time.strftime("%Y-%m-%d %H:%M:%S %Z", time.gmtime()))
    instance.add_tag('moz-type', config['type'])
    if loaned_to:
        instance.add_tag("moz-loaned-to", loaned_to)
    if loan_bug:
        instance.add_tag("moz-bug", loan_bug)

    log.info("assimilating %s", instance)
    instance.add_tag('moz-state', 'pending')

    keep_going, attempt = True, 1
    while keep_going:
        try:
            # Don't reboot if need to create ami
            reboot = not create_ami
            assimilate_instance(instance=instance,
                                config=config,
                                ssh_key=ssh_key,
                                instance_data=instance_data,
                                deploypass=deploypass,
                                reboot=reboot)
            break
        except NetworkError as e:
            # it takes a while for the machine to start/reboot so the
            # NetworkError exception is quite common, just log the error,
            # without the full stack trace
            log.warn(
                "cannot connect; instance may still be starting  %s (%s, %s) - %s,"
                "retrying in %d sec ...", instance_data['hostname'],
                instance.id, instance.private_ip_address, e, FAILURE_TIMEOUT)
            time.sleep(FAILURE_TIMEOUT)

        except:
            # any other exception
            log.warn(
                "problem assimilating %s (%s, %s), retrying in "
                "%d sec ...",
                instance_data['hostname'],
                instance.id,
                instance.private_ip_address,
                FAILURE_TIMEOUT,
                exc_info=True)
            time.sleep(FAILURE_TIMEOUT)
        if max_attempts:
            attempt += 1
            keep_going = max_attempts >= attempt

    instance.add_tag('moz-state', 'ready')
    if create_ami:
        ami_name = "spot-%s-%s" % (
            config['type'], time.strftime("%Y-%m-%d-%H-%M", time.gmtime()))
        log.info("Generating AMI %s", ami_name)
        ami_cleanup(mount_point="/", distro=config["distro"])
        root_bd = instance.block_device_mapping[instance.root_device_name]
        volume = instance.connection.get_all_volumes(
            volume_ids=[root_bd.volume_id])[0]
        # The instance has to be stopped to flush EBS caches
        # The sleep is to prevent the occasional interference of the shutdown with the capture of Windows AMIs
        time.sleep(15)
        instance.stop()
        wait_for_status(instance, 'state', 'stopped', 'update')
        ami = volume_to_ami(volume=volume,
                            ami_name=ami_name,
                            arch=instance.architecture,
                            virtualization_type=instance.virtualization_type,
                            kernel_id=instance.kernel,
                            root_device_name=instance.root_device_name,
                            tags=config["tags"])
        log.info("AMI %s (%s) is ready", ami_name, ami.id)
        log.warn("Terminating %s", instance)
        instance.terminate()
Ejemplo n.º 3
0
def do_request_instance(region, moz_instance_type, price, ami, instance_config,
                        instance_type, availability_zone, slaveset, is_spot,
                        all_instances, dryrun):
    name = get_available_slave_name(region,
                                    moz_instance_type,
                                    slaveset,
                                    is_spot=is_spot,
                                    all_instances=all_instances)
    if not name:
        log.debug("No slave name available for %s, %s, %s" %
                  (region, moz_instance_type, slaveset))
        return False

    subnet_id = get_avail_subnet(region, instance_config[region]["subnet_ids"],
                                 availability_zone)
    if not subnet_id:
        log.debug("No free IP available for %s in %s", moz_instance_type,
                  availability_zone)
        return False

    fqdn = "{}.{}".format(name, instance_config[region]["domain"])
    if is_spot:
        log.debug("Spot request for %s (%s)", fqdn, price)
    else:
        log.debug("Starting %s", fqdn)

    if dryrun:
        log.info("Dry run. skipping")
        return True

    spec = NetworkInterfaceSpecification(
        associate_public_ip_address=True,
        subnet_id=subnet_id,
        delete_on_termination=True,
        groups=instance_config[region].get("security_group_ids"))
    nc = NetworkInterfaceCollection(spec)

    user_data = user_data_from_template(moz_instance_type, fqdn, region)
    bdm = create_block_device_mapping(ami,
                                      instance_config[region]['device_map'])
    if is_spot:
        rv = do_request_spot_instance(
            region, price, ami.id, instance_type,
            instance_config[region]["ssh_key"], user_data, bdm, nc,
            instance_config[region].get("instance_profile_name"),
            moz_instance_type, name, fqdn)
    else:
        rv = do_request_ondemand_instance(
            region, price, ami.id, instance_type,
            instance_config[region]["ssh_key"], user_data, bdm, nc,
            instance_config[region].get("instance_profile_name"),
            moz_instance_type, name, fqdn)
    if rv:
        template_values = dict(
            region=region,
            moz_instance_type=moz_instance_type,
            instance_type=instance_type.replace(".", "-"),
            life_cycle_type="spot" if is_spot else "ondemand",
            virtualization=ami.virtualization_type,
            root_device_type=ami.root_device_type,
            jacuzzi_type=jacuzzi_suffix(slaveset),
        )
        name = "started.{region}.{moz_instance_type}.{instance_type}" \
            ".{life_cycle_type}.{virtualization}.{root_device_type}" \
            ".{jacuzzi_type}"
        gr_log.add(name.format(**template_values), 1, collect=True)
    return rv
def do_request_instance(region, moz_instance_type, price, ami, instance_config,
                        instance_type, availability_zone, is_spot,
                        all_instances, dryrun):
    name = get_available_slave_name(region, moz_instance_type,
                                    is_spot=is_spot,
                                    all_instances=all_instances)
    if not name:
        log.debug("No slave name available for %s, %s",
                  region, moz_instance_type)
        return False

    subnet_id = get_avail_subnet(region, instance_config[region]["subnet_ids"],
                                 availability_zone)
    if not subnet_id:
        log.debug("No free IP available for %s in %s", moz_instance_type,
                  availability_zone)
        return False

    fqdn = "{}.{}".format(name, instance_config[region]["domain"])
    if is_spot:
        log.debug("Spot request for %s (%s)", fqdn, price)
    else:
        log.debug("Starting %s", fqdn)

    if dryrun:
        log.info("Dry run. skipping")
        return True

    spec = NetworkInterfaceSpecification(
        associate_public_ip_address=True, subnet_id=subnet_id,
        delete_on_termination=True,
        groups=instance_config[region].get("security_group_ids"))
    nc = NetworkInterfaceCollection(spec)

    user_data = user_data_from_template(moz_instance_type, {
        "moz_instance_type": moz_instance_type,
        "hostname": name,
        "domain": instance_config[region]["domain"],
        "fqdn": fqdn,
        "region_dns_atom": get_region_dns_atom(region),
        "puppet_server": "",  # intentionally empty
        "password": ""  # intentionally empty
    })

    bdm = create_block_device_mapping(
        ami, instance_config[region]['device_map'])
    if is_spot:
        rv = do_request_spot_instance(
            region, price, ami.id, instance_type,
            instance_config[region]["ssh_key"], user_data, bdm, nc,
            instance_config[region].get("instance_profile_name"),
            moz_instance_type, name, fqdn)
    else:
        rv = do_request_ondemand_instance(
            region, price, ami.id, instance_type,
            instance_config[region]["ssh_key"], user_data, bdm, nc,
            instance_config[region].get("instance_profile_name"),
            moz_instance_type, name, fqdn)
    if rv:
        template_values = dict(
            region=region,
            moz_instance_type=moz_instance_type,
            instance_type=instance_type.replace(".", "-"),
            life_cycle_type="spot" if is_spot else "ondemand",
            virtualization=ami.virtualization_type,
            root_device_type=ami.root_device_type,
        )
        name = "started.{region}.{moz_instance_type}.{instance_type}" \
            ".{life_cycle_type}.{virtualization}.{root_device_type}"
        gr_log.add(name.format(**template_values), 1, collect=True)
    return rv