コード例 #1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--region", dest="regions", action="append",
                        help="optional list of regions")
    parser.add_argument("-v", "--verbose", action="store_true",
                        help="Supress logging messages")
    parser.add_argument("--events-dir", dest="events_dir",
                        help="cloudtrail logs event directory")
    args = parser.parse_args()

    logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
    if args.verbose:
        log.setLevel(logging.DEBUG)
    else:
        log.setLevel(logging.ERROR)

    if not args.regions:
        args.regions = DEFAULT_REGIONS
    all_instances = []
    all_volumes = []
    for region in args.regions:
        conn = get_aws_connection(region)
        all_instances.extend(get_all_instances(conn))
        all_volumes.extend(conn.get_all_volumes())

    generate_report(connection=conn,
                    regions=args.regions,
                    instances=all_instances,
                    volumes=all_volumes,
                    events_dir=args.events_dir)
コード例 #2
0
def sanity_check(regions):
    spot_requests = []
    for r in regions:
        conn = get_aws_connection(r)
        region_spot_requests = conn.get_all_spot_instance_requests()
        if region_spot_requests:
            spot_requests.extend(region_spot_requests)
    all_spot_instances = aws_get_all_instances(regions)
    instance_ids = [i.id for i in all_spot_instances]

    for req in spot_requests:
        if req.state in ["open", "failed"]:
            if req.status.code in CANCEL_STATUS_CODES:
                log.info("Cancelling request %s", req)
                retry_aws_request(req.add_tag, "moz-cancel-reason", req.status.code)
                req.cancel()
            elif req.status.code not in IGNORABLE_STATUS_CODES:
                log.error("Uknown status for request %s: %s", req,
                          req.status.code)
        # Cancel all active request older than 30 mins without runing instances
        elif req.state == "active" and \
                parse_aws_time(req.create_time) + 30 * 60 < time.time() and \
                req.instance_id not in instance_ids:
            log.info("Cancelling request %s: %s is not running", req,
                     req.instance_id)
            retry_aws_request(req.add_tag, "moz-cancel-reason", "no-running-instances")
            req.cancel()
コード例 #3
0
def verify(hosts, config, region, ignore_subnet_check=False):
    """ Check DNS entries and IP availability for hosts"""
    passed = True
    conn = get_aws_connection(region)
    for host in hosts:
        fqdn = "%s.%s" % (host, config["domain"])
        log.info("Checking name conflicts for %s", host)
        if not name_available(conn, host):
            log.error("%s has been already taken", host)
            passed = False
            continue
        log.debug("Getting IP for %s", fqdn)
        ip = get_ip(fqdn)
        if not ip:
            log.error("%s has no DNS entry", fqdn)
            passed = False
        else:
            log.debug("Getting PTR for %s", fqdn)
            ptr = get_ptr(ip)
            if ptr != fqdn:
                log.error("Bad PTR for %s", host)
                passed = False
            log.debug("Checking %s availablility", ip)
            if not ip_available(region, ip):
                log.error("IP %s reserved for %s, but not available", ip, host)
                passed = False
            if not ignore_subnet_check:
                vpc = get_vpc(region)
                s_id = get_subnet_id(vpc, ip)
                if s_id not in config['subnet_ids']:
                    log.error("IP %s does not belong to assigned subnets", ip)
                    passed = False
    if not passed:
        raise RuntimeError("Sanity check failed")
コード例 #4
0
def sanity_check(regions):
    spot_requests = []
    for r in regions:
        conn = get_aws_connection(r)
        region_spot_requests = conn.get_all_spot_instance_requests()
        if region_spot_requests:
            spot_requests.extend(region_spot_requests)
    all_spot_instances = aws_get_all_instances(regions)
    instance_ids = [i.id for i in all_spot_instances]

    for req in spot_requests:
        if req.state in ["open", "failed"]:
            if req.status.code in CANCEL_STATUS_CODES:
                log.info("Cancelling request %s", req)
                retry_aws_request(req.add_tag, "moz-cancel-reason",
                                  req.status.code)
                req.cancel()
            elif req.status.code not in IGNORABLE_STATUS_CODES:
                log.error("Uknown status for request %s: %s", req,
                          req.status.code)
        # Cancel all active request older than 30 mins without runing instances
        elif req.state == "active" and \
                parse_aws_time(req.create_time) + 30 * 60 < time.time() and \
                req.instance_id not in instance_ids:
            log.info("Cancelling request %s: %s is not running", req,
                     req.instance_id)
            retry_aws_request(req.add_tag, "moz-cancel-reason",
                              "no-running-instances")
            req.cancel()
コード例 #5
0
def verify(hosts, config, region, ignore_subnet_check=False):
    """ Check DNS entries and IP availability for hosts"""
    passed = True
    conn = get_aws_connection(region)
    for host in hosts:
        fqdn = "%s.%s" % (host, config["domain"])
        log.info("Checking name conflicts for %s", host)
        if not name_available(conn, host):
            log.error("%s has been already taken", host)
            passed = False
            continue
        log.debug("Getting IP for %s", fqdn)
        ip = get_ip(fqdn)
        if not ip:
            log.error("%s has no DNS entry", fqdn)
            passed = False
        else:
            log.debug("Getting PTR for %s", fqdn)
            ptr = get_ptr(ip)
            if ptr != fqdn:
                log.error("Bad PTR for %s", host)
                passed = False
            log.debug("Checking %s availablility", ip)
            if not ip_available(region, ip):
                log.error("IP %s reserved for %s, but not available", ip, host)
                passed = False
            if not ignore_subnet_check:
                vpc = get_vpc(region)
                s_id = get_subnet_id(vpc, ip)
                if s_id not in config['subnet_ids']:
                    log.error("IP %s does not belong to assigned subnets", ip)
                    passed = False
    if not passed:
        raise RuntimeError("Sanity check failed")
コード例 #6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-r",
                        "--region",
                        dest="regions",
                        action="append",
                        help="optional list of regions")
    parser.add_argument("-q",
                        "--quiet",
                        action="store_true",
                        help="Supress logging messages")

    args = parser.parse_args()

    logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
    if not args.quiet:
        log.setLevel(logging.INFO)
    else:
        log.setLevel(logging.ERROR)

    if not args.regions:
        args.regions = DEFAULT_REGIONS
    images = []
    for region in args.regions:
        conn = get_aws_connection(region)
        images.extend(
            conn.get_all_images(owners=["self"],
                                filters={"state": "available"}))
    update_ami_status(amis_to_dict(images))
コード例 #7
0
def get_ami(region, moz_instance_type):
    conn = get_aws_connection(region)
    avail_amis = conn.get_all_images(
        owners=["self"],
        filters={"tag:moz-type": moz_instance_type})
    last_ami = sorted(avail_amis,
                      key=lambda ami: ami.tags.get("moz-created"))[-1]
    return last_ami
コード例 #8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--region", dest="regions", action="append",
                        help="optional list of regions")
    parser.add_argument("action", choices=["stop", "start", "restart",
                                           "enable", "disable", "terminate",
                                           "status"],
                        help="action to be performed")
    parser.add_argument("-m", "--comments", help="reason to disable")
    parser.add_argument("-n", "--dry-run", action="store_true",
                        help="Dry run mode")
    parser.add_argument("-q", "--quiet", action="store_true",
                        help="Supress logging messages")
    parser.add_argument("hosts", metavar="host", nargs="+",
                        help="hosts to be processed")
    parser.add_argument("-f", "--force", action="store_true",
                        help="Force action without prompting")

    args = parser.parse_args()

    logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
    if not args.quiet:
        log.setLevel(logging.INFO)
    else:
        log.setLevel(logging.ERROR)

    if not args.regions:
        args.regions = DEFAULT_REGIONS

    for region in args.regions:
        conn = get_aws_connection(region)
        instances = conn.get_only_instances()
        for i in instances:
            name = i.tags.get('Name', '')
            instance_id = i.id
            if not i.private_ip_address:
                # Terminated instances has no IP address assinged
                log.debug("Skipping (terminated?) %s (%s)..." % (name,
                                                                 instance_id))
                continue
            if name in args.hosts or instance_id in args.hosts:
                log.info("Found %s (%s)..." % (name, instance_id))

                if args.action == "start":
                    start(i, args.dry_run)
                elif args.action == "stop":
                    stop(i, args.dry_run)
                elif args.action == "restart":
                    restart(i, args.dry_run)
                elif args.action == "enable":
                    enable(i, args.dry_run)
                elif args.action == "disable":
                    disable(i, args.dry_run, args.comments)
                elif args.action == "terminate":
                    terminate(i, args.dry_run, args.force)
                elif args.action == "status":
                    status(i)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--region", action="append", dest="regions")
    parser.add_argument("amis", metavar="AMI", nargs="+", help="AMI IDs")
    parser.add_argument("-v",
                        "--verbose",
                        action="store_true",
                        help="Verbose logging")
    args = parser.parse_args()

    logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
    if args.verbose:
        log.setLevel(logging.DEBUG)
    else:
        log.setLevel(logging.INFO)

    regions = args.regions
    if not regions:
        regions = DEFAULT_REGIONS

    instances_to_kill = []
    for r in regions:
        log.debug("working in %s", r)
        conn = get_aws_connection(r)
        instances = conn.get_only_instances(filters={
            "image-id": args.amis,
            "instance-state-name": "running"
        })
        log.debug("got %s instances:\n%s", len(instances), instances)
        if instances:
            instances_to_kill.extend(instances)
    if instances_to_kill:
        log.info("Preparing to terminate the following %s instances:",
                 len(instances_to_kill))
        for i in instances_to_kill:
            log.info("%s (%s)", i.id, i.tags.get("Name"))
        yesno = raw_input("Are you sure you want to kill these? ^ y/N >")
        if yesno != "y":
            log.info("Exiting without any changes!")
            return

        yesno = raw_input("ARE YOU SURE YOU WANT TO KILL THESE? ^"
                          " LAST WARNING!!! y/N >")
        if yesno != "y":
            log.info("Exiting without any changes!")
            return
        log.warn("The instances mentioned above are about to be terminated")
        log.warn("Waiting extra 60 seconds to make sure...")
        time.sleep(60)
        log.warn("Starting...")
        for i in instances_to_kill:
            log.warn("Terminating %s...", i)
            i.terminate()
            log.warn("Done.")
コード例 #10
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--region", action="append", dest="regions")
    parser.add_argument("amis", metavar="AMI", nargs="+",
                        help="AMI IDs")
    parser.add_argument("-v", "--verbose", action="store_true",
                        help="Verbose logging")
    args = parser.parse_args()

    logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
    if args.verbose:
        log.setLevel(logging.DEBUG)
    else:
        log.setLevel(logging.INFO)

    regions = args.regions
    if not regions:
        regions = DEFAULT_REGIONS

    instances_to_kill = []
    for r in regions:
        log.debug("working in %s", r)
        conn = get_aws_connection(r)
        instances = conn.get_only_instances(
            filters={"image-id": args.amis, "instance-state-name": "running"})
        log.debug("got %s instances:\n%s", len(instances), instances)
        if instances:
            instances_to_kill.extend(instances)
    if instances_to_kill:
        log.info("Preparing to terminate the following %s instances:",
                 len(instances_to_kill))
        for i in instances_to_kill:
            log.info("%s (%s)", i.id, i.tags.get("Name"))
        yesno = raw_input("Are you sure you want to kill these? ^ y/N >")
        if yesno != "y":
            log.info("Exiting without any changes!")
            return

        yesno = raw_input("ARE YOU SURE YOU WANT TO KILL THESE? ^"
                          " LAST WARNING!!! y/N >")
        if yesno != "y":
            log.info("Exiting without any changes!")
            return
        log.warn("The instances mentioned above are about to be terminated")
        log.warn("Waiting extra 60 seconds to make sure...")
        time.sleep(60)
        log.warn("Starting...")
        for i in instances_to_kill:
            log.warn("Terminating %s...", i)
            i.terminate()
            log.warn("Done.")
コード例 #11
0
def main():
    args = docopt(__doc__)

    logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(message)s")

    try:
        config = json.load(
            open("%s/%s.json" %
                 (AMI_CONFIGS_DIR, args['--config'])))[args['--region']]
    except KeyError:
        log.error("unknown configuration")
        exit(1)

    connection = get_aws_connection(args['--region'])
    host_instance = create_instance(connection, args['INSTANCE_NAME'], config,
                                    args['--key-name'])
    create_ami(host_instance, args['--config'], config)
コード例 #12
0
def main():
    args = docopt(__doc__)

    logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(message)s")

    try:
        config = json.load(
            open("%s/%s.json" % (AMI_CONFIGS_DIR,
                                 args['--config'])))[args['--region']]
    except KeyError:
        log.error("unknown configuration")
        exit(1)

    connection = get_aws_connection(args['--region'])
    host_instance = create_instance(connection, args['INSTANCE_NAME'], config,
                                    args['--key-name'])
    create_ami(host_instance, args['--config'], config)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--region", dest="region", required=True,
                        help="optional list of regions")
    parser.add_argument("-v", "--verbose", action="store_true",
                        help="Supress logging messages")

    args = parser.parse_args()

    logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
    if args.verbose:
        log.setLevel(logging.DEBUG)
    else:
        log.setLevel(logging.WARNING)

    conn = get_aws_connection(args.region)

    pool = Pool()
    res = conn.get_all_instances()
    instances = reduce(lambda a, b: a + b, [r.instances for r in res])
    a_checks = []
    ptr_checks = []
    cname_checks = []
    for i in instances:
        # TODO: ignore EB
        name = i.tags.get("Name")
        if not name:
            log.warning("%s has no Name tag, skipping...", i)
            continue
        fqdn = i.tags.get("FQDN")
        if not fqdn:
            log.warning("%s has no FQDN tag, skipping...", i)
            continue
        ip = i.private_ip_address
        if not ip:
            log.warning("%s no ip assigned, skipping...", i)
            continue
        cname = "%s.build.mozilla.org" % name
        a_checks.append([fqdn, ip])
        ptr_checks.append([fqdn, ip])
        cname_checks.append([fqdn, cname])
    pool.map(check_A, a_checks)
    pool.map(check_PTR, ptr_checks)
    pool.map(check_CNAME, cname_checks)
    pool.close()
    pool.join()
コード例 #14
0
def do_request_ondemand_instance(region, price, ami_id, instance_type, ssh_key,
                                 user_data, bdm, nc, profile,
                                 moz_instance_type, name, fqdn):
    conn = get_aws_connection(region)
    res = conn.run_instances(
        image_id=ami_id,
        key_name=ssh_key,
        instance_type=instance_type,
        user_data=user_data,
        block_device_map=bdm,
        network_interfaces=nc,
        instance_profile_name=profile,
        # terminate the instances on shutdown
        instance_initiated_shutdown_behavior="terminate",
    )
    return tag_ondemand_instance(res.instances[0], name, fqdn,
                                 moz_instance_type)
コード例 #15
0
def do_request_ondemand_instance(region, price, ami_id, instance_type, ssh_key,
                                 user_data, bdm, nc, profile,
                                 moz_instance_type, name, fqdn):
    conn = get_aws_connection(region)
    res = conn.run_instances(
        image_id=ami_id,
        key_name=ssh_key,
        instance_type=instance_type,
        user_data=user_data,
        block_device_map=bdm,
        network_interfaces=nc,
        instance_profile_name=profile,
        # terminate the instances on shutdown
        instance_initiated_shutdown_behavior="terminate",
    )
    return tag_ondemand_instance(res.instances[0], name, fqdn,
                                 moz_instance_type)
コード例 #16
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--region", dest="region", required=True,
                        help="optional list of regions")
    parser.add_argument("-v", "--verbose", action="store_true",
                        help="Supress logging messages")

    args = parser.parse_args()

    logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
    if args.verbose:
        log.setLevel(logging.DEBUG)
    else:
        log.setLevel(logging.WARNING)

    conn = get_aws_connection(args.region)

    pool = Pool()
    res = conn.get_all_instances()
    instances = reduce(lambda a, b: a + b, [r.instances for r in res])
    a_checks = []
    ptr_checks = []
    cname_checks = []
    for i in instances:
        # TODO: ignore EB
        name = i.tags.get("Name")
        if not name:
            log.warning("%s has no Name tag, skipping...", i)
            continue
        fqdn = i.tags.get("FQDN")
        if not fqdn:
            log.warning("%s has no FQDN tag, skipping...", i)
            continue
        ip = i.private_ip_address
        if not ip:
            log.warning("%s no ip assigned, skipping...", i)
            continue
        cname = "%s.build.mozilla.org" % name
        a_checks.append([fqdn, ip])
        ptr_checks.append([fqdn, ip])
        cname_checks.append([fqdn, cname])
    pool.map(check_A, a_checks)
    pool.map(check_PTR, ptr_checks)
    pool.map(check_CNAME, cname_checks)
    pool.close()
    pool.join()
コード例 #17
0
def do_request_spot_instance(region, price, ami_id, instance_type, ssh_key,
                             user_data, bdm, nc, profile, moz_instance_type,
                             name, fqdn):
    conn = get_aws_connection(region)
    sir = conn.request_spot_instances(
        price=str(price),
        image_id=ami_id,
        count=1,
        instance_type=instance_type,
        key_name=ssh_key,
        user_data=user_data,
        block_device_map=bdm,
        network_interfaces=nc,
        instance_profile_name=profile,
    )
    # Sleep for a little bit to prevent us hitting
    # InvalidSpotInstanceRequestID.NotFound right away
    time.sleep(0.5)
    max_tries = 10
    sleep_time = 5
    for i in range(max_tries):
        try:
            sir[0].add_tag("moz-type", moz_instance_type)
            # Name will be used to determine available slave names
            sir[0].add_tag("Name", name)
            sir[0].add_tag("FQDN", fqdn)
            return True
        except EC2ResponseError, e:
            if e.code == "InvalidSpotInstanceRequestID.NotFound":
                if i < max_tries - 1:
                    # Try again
                    log.debug("waiting for spot request")
                    time.sleep(sleep_time)
                    sleep_time = min(30, sleep_time * 1.5)
                    continue
        except BotoServerError, e:
            if e.code == "RequestLimitExceeded":
                if i < max_tries - 1:
                    # Try again
                    log.debug(
                        "request limit exceeded; sleeping and trying again")
                    time.sleep(sleep_time)
                    sleep_time = min(30, sleep_time * 1.5)
                    continue
            raise
コード例 #18
0
def aws_get_reservations(regions):
    """
    Return a mapping of (availability zone, ec2 instance type) -> count
    """
    log.debug("getting reservations for %s", regions)
    retval = {}
    for region in regions:
        conn = get_aws_connection(region)
        reservations = conn.get_all_reserved_instances(filters={
            'state': 'active',
        })
        for r in reservations:
            az = r.availability_zone
            ec2_instance_type = r.instance_type
            if (az, ec2_instance_type) not in retval:
                retval[az, ec2_instance_type] = 0
            retval[az, ec2_instance_type] += r.instance_count
    return retval
コード例 #19
0
def do_request_spot_instance(region, price, ami_id, instance_type, ssh_key,
                             user_data, bdm, nc, profile, moz_instance_type,
                             name, fqdn):
    conn = get_aws_connection(region)
    sir = conn.request_spot_instances(
        price=str(price),
        image_id=ami_id,
        count=1,
        instance_type=instance_type,
        key_name=ssh_key,
        user_data=user_data,
        block_device_map=bdm,
        network_interfaces=nc,
        instance_profile_name=profile,
    )
    # Sleep for a little bit to prevent us hitting
    # InvalidSpotInstanceRequestID.NotFound right away
    time.sleep(0.5)
    max_tries = 10
    sleep_time = 5
    for i in range(max_tries):
        try:
            sir[0].add_tag("moz-type", moz_instance_type)
            # Name will be used to determine available slave names
            sir[0].add_tag("Name", name)
            sir[0].add_tag("FQDN", fqdn)
            return True
        except EC2ResponseError, e:
            if e.code == "InvalidSpotInstanceRequestID.NotFound":
                if i < max_tries - 1:
                    # Try again
                    log.debug("waiting for spot request")
                    time.sleep(sleep_time)
                    sleep_time = min(30, sleep_time * 1.5)
                    continue
        except BotoServerError, e:
            if e.code == "RequestLimitExceeded":
                if i < max_tries - 1:
                    # Try again
                    log.debug("request limit exceeded; sleeping and trying again")
                    time.sleep(sleep_time)
                    sleep_time = min(30, sleep_time * 1.5)
                    continue
            raise
コード例 #20
0
def aws_get_all_instances(regions):
    """
    Returns a list of all instances in the given regions
    """
    log.debug("fetching all instances for %s", regions)
    retval = []
    for region in regions:
        if region in _aws_instances_cache:
            log.debug("aws_get_all_instances - cache hit for %s", region)
            retval.extend(_aws_instances_cache[region])
        else:
            conn = get_aws_connection(region)
            reservations = conn.get_all_instances()
            region_instances = []
            for r in reservations:
                region_instances.extend(r.instances)
            log.debug("aws_get_running_instances - caching %s", region)
            _aws_instances_cache[region] = region_instances
            retval.extend(region_instances)
    return retval
コード例 #21
0
def main():
    from optparse import OptionParser
    parser = OptionParser()
    parser.add_option("-r", "--region", dest="region", help="region to use",
                      default="us-east-1")

    options, args = parser.parse_args()
    if not args:
        parser.error("at least one instance name is required")

    hosts_re = [re.compile(x) for x in args]

    conn = get_aws_connection(options.region)

    res = conn.get_all_instances()
    if res:
        instances = reduce(lambda a, b: a + b, [r.instances for r in res])
        for i in instances:
            for mask in hosts_re:
                hostname = i.tags.get('FQDN', i.tags.get('Name', ''))
                if mask.search(hostname) and i.private_ip_address:
                    print i.private_ip_address, hostname
コード例 #22
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--region", dest="regions", action="append",
                        help="optional list of regions")
    parser.add_argument("-q", "--quiet", action="store_true",
                        help="Supress logging messages")

    args = parser.parse_args()

    logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
    if not args.quiet:
        log.setLevel(logging.INFO)
    else:
        log.setLevel(logging.ERROR)

    if not args.regions:
        args.regions = DEFAULT_REGIONS
    images = []
    for region in args.regions:
        conn = get_aws_connection(region)
        images.extend(conn.get_all_images(owners=["self"],
                                          filters={"state": "available"}))
    update_ami_status(amis_to_dict(images))
コード例 #23
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-r",
                        "--region",
                        dest="regions",
                        action="append",
                        help="optional list of regions")
    parser.add_argument("-v",
                        "--verbose",
                        action="store_true",
                        help="Supress logging messages")
    parser.add_argument("--events-dir",
                        dest="events_dir",
                        help="cloudtrail logs event directory")
    args = parser.parse_args()

    logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
    if args.verbose:
        log.setLevel(logging.DEBUG)
    else:
        log.setLevel(logging.ERROR)

    if not args.regions:
        args.regions = DEFAULT_REGIONS
    all_instances = []
    all_volumes = []
    for region in args.regions:
        conn = get_aws_connection(region)
        all_instances.extend(get_all_instances(conn))
        all_volumes.extend(conn.get_all_volumes())

    generate_report(connection=conn,
                    regions=args.regions,
                    instances=all_instances,
                    volumes=all_volumes,
                    events_dir=args.events_dir)
コード例 #24
0
def do_request_spot_instance(region, secrets, moz_instance_type, price, ami,
                             instance_config, cached_cert_dir, instance_type,
                             availability_zone, slaveset, active_network_ids, dryrun):
    conn = get_aws_connection(region)
    interface = get_available_interface(
        conn=conn, moz_instance_type=moz_instance_type,
        availability_zone=availability_zone,
        slaveset=slaveset,
        active_network_ids=active_network_ids)
    if not interface:
        log.warn("No free network interfaces left in %s" % region)
        return False

    # TODO: check DNS
    fqdn = interface.tags.get("FQDN")
    if not fqdn:
        log.warn("interface %s has no FQDN", interface)
        return False

    log.debug("Spot request for %s (%s)", fqdn, price)

    if dryrun:
        log.info("Dry run. skipping")
        return True

    spec = NetworkInterfaceSpecification(
        network_interface_id=interface.id)
    nc = NetworkInterfaceCollection(spec)
    ip = interface.private_ip_address
    certs = get_puppet_certs(ip, secrets, cached_cert_dir)
    user_data = """
FQDN="%(fqdn)s"
cd /var/lib/puppet/ssl || exit 1
%(certs)s
cd -
""" % dict(fqdn=fqdn, certs=certs)
    if instance_config[region].get("lvm"):
        user_data += """
mkdir -p /etc/lvm-init/
cat <<EOF > /etc/lvm-init/lvm-init.json
%s
EOF
/sbin/lvm-init
""" % json.dumps(instance_config[region])

    bdm = BlockDeviceMapping()
    for device, device_info in instance_config[region]['device_map'].items():
        bd = BlockDeviceType()
        if device_info.get('size'):
            bd.size = device_info['size']
        if ami.root_device_name == device:
            ami_size = ami.block_device_mapping[device].size
            if ami.virtualization_type == "hvm":
                # Overwrite root device size for HVM instances, since they
                # cannot be resized online
                bd.size = ami_size
            elif device_info.get('size'):
                # make sure that size is enough for this AMI
                assert ami_size <= device_info['size'], \
                    "Instance root device size cannot be smaller than AMI " \
                    "root device"
        if device_info.get("delete_on_termination") is not False:
            bd.delete_on_termination = True
        if device_info.get("ephemeral_name"):
            bd.ephemeral_name = device_info["ephemeral_name"]

        bdm[device] = bd

    sir = conn.request_spot_instances(
        price=str(price),
        image_id=ami.id,
        count=1,
        instance_type=instance_type,
        key_name=instance_config[region]["ssh_key"],
        user_data=user_data,
        block_device_map=bdm,
        network_interfaces=nc,
        instance_profile_name=instance_config[region].get("instance_profile_name"),
    )
    sir[0].add_tag("moz-type", moz_instance_type)
    return True
コード例 #25
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-r",
                        "--region",
                        dest="regions",
                        action="append",
                        help="optional list of regions")
    parser.add_argument("action",
                        choices=[
                            "stop", "start", "restart", "enable", "disable",
                            "terminate", "status"
                        ],
                        help="action to be performed")
    parser.add_argument("-m", "--comments", help="reason to disable")
    parser.add_argument("-n",
                        "--dry-run",
                        action="store_true",
                        help="Dry run mode")
    parser.add_argument("-q",
                        "--quiet",
                        action="store_true",
                        help="Supress logging messages")
    parser.add_argument("hosts",
                        metavar="host",
                        nargs="+",
                        help="hosts to be processed")
    parser.add_argument("-f",
                        "--force",
                        action="store_true",
                        help="Force action without prompting")

    args = parser.parse_args()

    logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
    if not args.quiet:
        log.setLevel(logging.INFO)
    else:
        log.setLevel(logging.ERROR)

    if not args.regions:
        args.regions = DEFAULT_REGIONS

    for region in args.regions:
        conn = get_aws_connection(region)
        instances = conn.get_only_instances()
        for i in instances:
            name = i.tags.get('Name', '')
            instance_id = i.id
            if not i.private_ip_address:
                # Terminated instances has no IP address assinged
                log.debug("Skipping (terminated?) %s (%s)..." %
                          (name, instance_id))
                continue
            if name in args.hosts or instance_id in args.hosts:
                log.info("Found %s (%s)..." % (name, instance_id))

                if args.action == "start":
                    start(i, args.dry_run)
                elif args.action == "stop":
                    stop(i, args.dry_run)
                elif args.action == "restart":
                    restart(i, args.dry_run)
                elif args.action == "enable":
                    enable(i, args.dry_run)
                elif args.action == "disable":
                    disable(i, args.dry_run, args.comments)
                elif args.action == "terminate":
                    terminate(i, args.dry_run, args.force)
                elif args.action == "status":
                    status(i)
コード例 #26
0
        session.commit()

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--region", dest="regions", action="append",
                        help="optional list of regions")
    parser.add_argument("-q", "--quiet", action="store_true",
                        help="Supress logging messages")
    parser.add_argument("-d", "--db", default="spots.db")

    args = parser.parse_args()

    logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
    if not args.quiet:
        log.setLevel(logging.DEBUG)
    else:
        log.setLevel(logging.WARNING)

    engine = create_engine('sqlite:///%s' % args.db)
    Base.metadata.create_all(bind=engine)
    Session = sessionmaker(bind=engine)
    session = Session()

    if not args.regions:
        args.regions = DEFAULT_REGIONS
    for region in args.regions:
        conn = get_aws_connection(region)
        update_spot_stats(conn, session)
        cancel_low_price(conn)
コード例 #27
0
def request_spot_instances(all_instances, moz_instance_type, start_count,
                           regions, region_priorities, spot_config, dryrun,
                           slaveset, latest_ami_percentage):
    started = 0
    spot_rules = spot_config.get("rules", {}).get(moz_instance_type)
    if not spot_rules:
        log.warn("No spot rules found for %s", moz_instance_type)
        return 0

    instance_config = load_instance_config(moz_instance_type)
    connections = [get_aws_connection(r) for r in regions]
    spot_choices = get_spot_choices(connections, spot_rules,
                                    "Linux/UNIX (Amazon VPC)")
    if not spot_choices:
        log.warn("No spot choices for %s", moz_instance_type)
        return 0

    to_start = defaultdict(list)
    active_instance_ids = set(i.id for i in all_instances)

    # count the number of instances for each image id
    ami_distribution = defaultdict(int)
    for instance in all_instances:
        ami_distribution[instance.image_id] += 1

    for region in regions:
        # Check if spots are enabled in this region for this type
        region_limit = spot_config.get("limits",
                                       {}).get(region,
                                               {}).get(moz_instance_type)
        if not region_limit:
            log.debug("No spot limits defined for %s in %s, skipping...",
                      moz_instance_type, region)
            continue

        # check the limits
        active_requests = get_spot_requests_for_moztype(
            region=region, moz_instance_type=moz_instance_type)
        log.debug("%i active spot requests for %s %s", len(active_requests),
                  region, moz_instance_type)
        # Filter out requests for instances that don't exist
        active_requests = [
            r for r in active_requests if r.instance_id is not None
            and r.instance_id in active_instance_ids
        ]
        log.debug("%i real active spot requests for %s %s",
                  len(active_requests), region, moz_instance_type)
        active_count = len(active_requests)
        can_be_started = region_limit - active_count
        if can_be_started < 1:
            log.debug(
                "Not starting. Active spot request count in %s region "
                "hit limit of %s. Active count: %s", region, region_limit,
                active_count)
            continue

        to_be_started_latest = min(can_be_started, start_count - started)
        spot_amis = get_spot_amis(region=region,
                                  tags={"moz-type": moz_instance_type})
        ami_latest = spot_amis[-1]
        if len(spot_amis) > 1 and latest_ami_percentage < 100:
            # get the total number of running instances with both the latest and
            # prevous ami types, so that we can decide how many of each type to
            # launch.
            ami_prev = spot_amis[-2]
            prev_ami_count = ami_distribution[ami_prev.id]
            latest_ami_count = ami_distribution[ami_latest.id]
            ami_prev_to_start, ami_latest_to_start = find_prev_latest_amis_needed(
                latest_ami_percentage, prev_ami_count, latest_ami_count,
                to_be_started_latest)
            to_start[region].append({
                "ami": ami_prev,
                "instances": ami_prev_to_start
            })
            to_start[region].append({
                "ami": ami_latest,
                "instances": ami_latest_to_start
            })
        else:
            to_start[region].append({
                "ami": ami_latest,
                "instances": to_be_started_latest
            })
    if not to_start:
        log.debug("Nothing to start for %s", moz_instance_type)
        return 0

    for choice in spot_choices:
        region = choice.region
        if region not in to_start:
            log.debug("Skipping %s for %s", choice, region)
            continue
        if not usable_spot_choice(choice):
            log.debug("Skipping %s for %s - unusable", choice, region)
            continue
        for to_start_entry in to_start[region]:
            need = min(to_start_entry["instances"], start_count - started)
            if need > 0:
                log.debug("Need %s of %s in %s", need, moz_instance_type,
                          choice.availability_zone)

                log.debug("Using %s", choice)
                launched = do_request_spot_instances(
                    amount=need,
                    region=region,
                    moz_instance_type=moz_instance_type,
                    ami=to_start_entry["ami"],
                    instance_config=instance_config,
                    dryrun=dryrun,
                    spot_choice=choice,
                    slaveset=slaveset,
                    all_instances=all_instances,
                )
                started += launched

        if started >= start_count:
            break

    return started
コード例 #28
0
def get_all_spot_requests(region):
    log.info("getting all spot requests for %s", region)
    conn = get_aws_connection(region)
    spot_requests = conn.get_all_spot_instance_requests()
    return spot_requests
コード例 #29
0
def request_spot_instances(all_instances, moz_instance_type, start_count,
                           regions, region_priorities, spot_config, dryrun,
                           latest_ami_percentage):
    started = 0
    spot_rules = spot_config.get("rules", {}).get(moz_instance_type)
    if not spot_rules:
        log.warn("No spot rules found for %s", moz_instance_type)
        return 0

    instance_config = load_instance_config(moz_instance_type)
    connections = [get_aws_connection(r) for r in regions]
    product_description = get_product_description(moz_instance_type)
    spot_choices = get_spot_choices(connections, spot_rules, product_description)
    if not spot_choices:
        log.warn("No spot choices for %s", moz_instance_type)
        log.warn("%s: market price too expensive in all available regions; spot instances needed: %i",
                 moz_instance_type, start_count)
        return 0

    to_start = defaultdict(list)
    active_instance_ids = set(i.id for i in all_instances)

    # count the number of instances for each image id
    ami_distribution = defaultdict(int)
    for instance in all_instances:
        ami_distribution[instance.image_id] += 1

    for region in regions:
        # Check if spots are enabled in this region for this type
        region_limit = spot_config.get("limits", {}).get(region, {}).get(
            moz_instance_type)
        if not region_limit:
            log.debug("No spot limits defined for %s in %s, skipping...",
                      moz_instance_type, region)
            continue

        # check the limits
        active_requests = get_spot_requests_for_moztype(
            region=region, moz_instance_type=moz_instance_type)
        log.debug("%i active spot requests for %s %s", len(active_requests),
                  region, moz_instance_type)
        # Filter out requests for instances that don't exist
        active_requests = [r for r in active_requests if r.instance_id is not
                           None and r.instance_id in active_instance_ids]
        log.debug("%i real active spot requests for %s %s",
                  len(active_requests), region, moz_instance_type)
        active_count = len(active_requests)
        can_be_started = region_limit - active_count
        if can_be_started < 1:
            log.debug("Not starting. Active spot request count in %s region "
                      "hit limit of %s. Active count: %s", region,
                      region_limit, active_count)
            continue

        to_be_started_latest = min(can_be_started, start_count - started)
        spot_amis = get_spot_amis(region=region, tags={"moz-type": moz_instance_type})
        ami_latest = spot_amis[-1]
        if len(spot_amis) > 1 and latest_ami_percentage < 100:
            # get the total number of running instances with both the latest and
            # prevous ami types, so that we can decide how many of each type to
            # launch.
            ami_prev = spot_amis[-2]
            prev_ami_count = ami_distribution[ami_prev.id]
            latest_ami_count = ami_distribution[ami_latest.id]
            ami_prev_to_start, ami_latest_to_start = find_prev_latest_amis_needed(
                latest_ami_percentage,
                prev_ami_count,
                latest_ami_count,
                to_be_started_latest
            )
            to_start[region].append({"ami": ami_prev, "instances": ami_prev_to_start})
            to_start[region].append({"ami": ami_latest, "instances": ami_latest_to_start})
        else:
            to_start[region].append({"ami": ami_latest, "instances": to_be_started_latest})
    if not to_start:
        log.debug("Nothing to start for %s", moz_instance_type)
        return 0

    for choice in spot_choices:
        region = choice.region
        if region not in to_start:
            log.debug("Skipping %s for %s", choice, region)
            continue
        if not usable_spot_choice(choice):
            log.debug("Skipping %s for %s - unusable", choice, region)
            continue
        for to_start_entry in to_start[region]:
            need = min(to_start_entry["instances"], start_count - started)
            if need > 0:
                log.debug("Need %s of %s in %s", need, moz_instance_type,
                          choice.availability_zone)

                log.debug("Using %s", choice)
                launched = do_request_spot_instances(
                    amount=need,
                    region=region,
                    moz_instance_type=moz_instance_type,
                    ami=to_start_entry["ami"],
                    instance_config=instance_config, dryrun=dryrun,
                    spot_choice=choice,
                    all_instances=all_instances,
                )
                started += launched

        if started >= start_count:
            break

    return started
コード例 #30
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--region", dest="region", required=True,
                        help="Region")
    parser.add_argument("-q", "--quiet", action="store_true",
                        help="Supress logging messages")
    parser.add_argument("-c", "--ami-config", required=True, help="AMI config")
    parser.add_argument("-i", "--instance-config", required=True,
                        help="Instance config")
    parser.add_argument("--ssh-key", required=True, help="SSH key name")
    parser.add_argument("--user", help="Login name")
    parser.add_argument("--public", action="store_true", default=False,
                        help="Generate a public AMI (no secrets)")

    args = parser.parse_args()
    try:
        ami_config = json.load(
            open("%s/%s.json" % (AMI_CONFIGS_DIR, args.ami_config))
        )[args.region]
        moz_type_config = json.load(
            open("%s/%s" % (INSTANCE_CONFIGS_DIR, args.instance_config))
        )[args.region]
    except KeyError:
        parser.error("unknown configuration")

    logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
    if not args.quiet:
        log.setLevel(logging.DEBUG)
    else:
        log.setLevel(logging.ERROR)

    conn = get_aws_connection(args.region)

    dated_target_name = "spot-%s-%s" % (
        args.ami_config, time.strftime("%Y-%m-%d-%H-%M", time.gmtime()))
    filters = {
        "tag:moz-state": "ready",
        "instance-state-name": "stopped"
    }
    for tag, value in moz_type_config["tags"].iteritems():
        filters["tag:%s" % tag] = value
    using_stopped_instance = True
    res = conn.get_all_instances(filters=filters)
    if not res:
        filters["instance-state-name"] = "running"
        res = conn.get_all_instances(filters=filters)
        using_stopped_instance = False
    instances = reduce(lambda a, b: a + b, [r.instances for r in res])
    # skip loaned instances
    instances = [i for i in instances if not i.tags.get("moz-loaned-to")]
    i = sorted(instances, key=lambda i: i.launch_time)[-1]
    log.debug("Selected instance to clone: %s", i)
    v_id = i.block_device_mapping[i.root_device_name].volume_id
    v = conn.get_all_volumes(volume_ids=[v_id])[0]
    snap1 = v.create_snapshot("temporary snapshot of %s" % v_id)

    wait_for_status(snap1, "status", "completed", "update")
    host_instance = run_instance(
        connection=conn, instance_name="tmp", config=ami_config,
        key_name=args.ssh_key, user=args.user,
        subnet_id=random.choice(moz_type_config["subnet_ids"]))

    env.host_string = host_instance.private_ip_address
    env.user = '******'
    env.abort_on_prompts = True
    env.disable_known_hosts = True
    int_dev_name = ami_config['target']['int_dev_name']
    mount_dev = int_dev_name
    mount_point = ami_config['target']['mount_point']
    virtualization_type = ami_config.get("virtualization_type")
    if virtualization_type == "hvm":
        mount_dev = "%s1" % mount_dev
    tmp_v = conn.create_volume(size=snap1.volume_size,
                               zone=host_instance.placement,
                               snapshot=snap1)
    wait_for_status(tmp_v, "status", "available", "update")
    while True:
        try:
            tmp_v.attach(host_instance.id,
                         ami_config['target']['aws_dev_name'])
            break
        except:
            log.debug('hit error waiting for volume to be attached')
            time.sleep(10)
    while True:
        try:
            tmp_v.update()
            if tmp_v.status == 'in-use':
                if run('ls %s' % int_dev_name).succeeded:
                    break
        except:
            log.debug('hit error waiting for volume to be attached')
            time.sleep(10)
    run('mkdir -p %s' % mount_point)
    run('mount {dev} {mount_point}'.format(dev=mount_dev,
                                           mount_point=mount_point))
    with cd(mount_point):
        run("rm -f root/*.sh")
        run("rm -f root/*.log")
        run("rm -f root/userdata")
        run("rm -f root/*.done")
        run("rm -f etc/spot_setup.done")
        run("rm -f var/lib/puppet/ssl/private_keys/*")
        run("rm -f var/lib/puppet/ssl/certs/*")
        if not using_stopped_instance or args.public:
            run("rm -rf builds/slave")
        else:
            run("rm -f builds/slave/buildbot.tac")
        run("echo localhost > etc/hostname")
        run("sed -i -e 's/127.0.0.1.*/127.0.0.1 localhost/g' etc/hosts")
        if args.public:
            # put rc.local
            put("%s/%s/etc/rc.local" % (AMI_CONFIGS_DIR, args.ami_config),
                "etc/rc.local", mirror_local_mode=True)
            run("rm -rf home/cltbld/.ssh")
            run("rm -rf root/.ssh/*")
            run("rm -rf builds/gapi.data")
            run("rm -rf builds/mock_mozilla/*/root/home/mock_mozilla")
        else:
            put("%s/spot_setup.sh" % AMI_CONFIGS_DIR,
                "etc/spot_setup.sh", mirror_local_mode=True)
            # replace puppet init with our script
            if ami_config["distro"] == "ubuntu":
                put("%s/spot_setup.conf" % AMI_CONFIGS_DIR,
                    "etc/init/puppet.conf", mirror_local_mode=True)
            else:
                run("echo '/etc/spot_setup.sh' > etc/init.d/puppet")
    # create snapshot2
    log.info('Terminating %s', host_instance)
    host_instance.terminate()
    wait_for_status(tmp_v, "status", "available", "update")
    log.info('Creating a snapshot')
    snap2 = tmp_v.create_snapshot(dated_target_name)
    wait_for_status(snap2, "status", "completed", "update")
    snap2.add_tag("Name", dated_target_name)

    bdm = BlockDeviceMapping()
    bdm[i.root_device_name] = BlockDeviceType(snapshot_id=snap2.id)

    log.info('Creating AMI')

    if virtualization_type == "hvm":
        kernel_id = None
    else:
        kernel_id = i.kernel

    ami_id = conn.register_image(
        dated_target_name,
        dated_target_name,
        architecture=ami_config["arch"],
        kernel_id=kernel_id,
        root_device_name=i.root_device_name,
        block_device_map=bdm,
        virtualization_type=virtualization_type,
    )
    log.info('Waiting...')
    while True:
        try:
            ami = conn.get_image(ami_id)
            ami.add_tag('Name', dated_target_name)
            ami.add_tag('moz-created', int(time.mktime(time.gmtime())))
            for tag, value in moz_type_config["tags"].iteritems():
                ami.add_tag(tag, value)
            log.info('AMI created')
            log.info('ID: {id}, name: {name}'.format(id=ami.id, name=ami.name))
            break
        except:
            log.info('Wating for AMI')
            time.sleep(10)
    # Step 7: Cleanup
    log.info('Cleanup...')
    tmp_v.delete()
    snap1.delete()
コード例 #31
0
ファイル: free_ips.py プロジェクト: bdacode/build-cloud-tools
parser = argparse.ArgumentParser()
parser.add_argument("-c", "--config", required=True,
                    type=argparse.FileType('r'),
                    help="instance configuration to use")
parser.add_argument("-r", "--region", help="region to use",
                    default="us-east-1")
parser.add_argument("-n", "--number", type=int, required=True,
                    help="How many IPs you need")
args = parser.parse_args()

try:
    config = json.load(args.config)[args.region]
except KeyError:
    parser.error("unknown configuration")

conn = get_aws_connection(args.region)
vpc = get_vpc(args.region)

interfaces = vpc.get_all_network_interfaces()
used_ips = [i.private_ip_address for i in interfaces]

subnets = vpc.get_all_subnets(subnet_ids=config["subnet_ids"])
blocks = [s.cidr_block for s in subnets]

available_ips = []
for b in blocks:
    # skip first 5 IPs (they are sometimes "reserved") and the last one
    # (broadcast)
    for ip in list(IP(b))[4:-1]:
        if str(ip) not in used_ips:
            available_ips.append(ip)
コード例 #32
0
    parser.add_option("-s", "--key-name", dest="key_name", help="SSH key name")
    parser.add_option('--keep-volume', dest='keep_volume', action='store_true',
                      help="Don't delete target volume")
    parser.add_option('--keep-host-instance', dest='keep_host_instance',
                      action='store_true', help="Don't delete host instance")
    parser.add_option('--user', dest='user', default='root')

    options, args = parser.parse_args()

    logging.basicConfig(level=logging.INFO)

    if not args:
        parser.error("at least one instance name is required")

    if not options.config:
        parser.error("config name is required")

    if not options.key_name:
        parser.error("SSH key name name is required")

    try:
        config = json.load(open("%s/%s.json" % (AMI_CONFIGS_DIR,
                                                options.config)))[options.region]
    except KeyError:
        parser.error("unknown configuration")

    connection = get_aws_connection(options.region)
    host_instance = run_instance(connection, args[0], config, options.key_name,
                                 options.user)
    create_ami(host_instance, options, config)
コード例 #33
0
def create_instance(name, config, region, key_name, ssh_key, instance_data,
                    deploypass, loaned_to, loan_bug, create_ami,
                    ignore_subnet_check, max_attempts):
    """Creates an AMI instance with the given name and config. The config must
    specify things like ami id."""
    conn = get_aws_connection(region)
    # Make sure we don't request the same things twice
    token = str(uuid.uuid4())[:16]

    instance_data = instance_data.copy()
    instance_data['name'] = name
    instance_data['domain'] = config['domain']
    instance_data['hostname'] = '{name}.{domain}'.format(
        name=name, domain=config['domain'])

    ami = conn.get_all_images(image_ids=[config["ami"]])[0]
    bdm = None
    if 'device_map' in config:
        bdm = BlockDeviceMapping()
        for device, device_info in config['device_map'].items():
            bd = BlockDeviceType()
            if device_info.get('size'):
                bd.size = device_info['size']
            # Overwrite root device size for HVM instances, since they cannot
            # be resized online
            if ami.virtualization_type == "hvm" and \
                    ami.root_device_name == device:
                bd.size = ami.block_device_mapping[ami.root_device_name].size
            if device_info.get("delete_on_termination") is not False:
                bd.delete_on_termination = True
            if device_info.get("ephemeral_name"):
                bd.ephemeral_name = device_info["ephemeral_name"]
            if device_info.get("volume_type"):
                bd.volume_type = device_info["volume_type"]
                if device_info["volume_type"] == "io1" \
                        and device_info.get("iops"):
                    bd.iops = device_info["iops"]

            bdm[device] = bd

    interfaces = make_instance_interfaces(
        region, instance_data['hostname'], ignore_subnet_check,
        config.get('subnet_ids'), config.get('security_group_ids', []),
        config.get("use_public_ip"))

    keep_going, attempt = True, 1
    while keep_going:
        try:
            puppet_master = pick_puppet_master(instance_data.get('puppet_masters'))
            user_data = user_data_from_template(config['type'], {
                "puppet_server": puppet_master,
                "fqdn": instance_data['hostname'],
                "hostname": instance_data['name'],
                "domain": instance_data['domain'],
                "dns_search_domain": config.get('dns_search_domain'),
                "password": deploypass,
                "moz_instance_type": config['type'],
                "region_dns_atom": get_region_dns_atom(region)})

            reservation = conn.run_instances(
                image_id=config['ami'],
                key_name=key_name,
                instance_type=config['instance_type'],
                block_device_map=bdm,
                client_token=token,
                disable_api_termination=config.get('disable_api_termination'),
                user_data=user_data,
                instance_profile_name=config.get('instance_profile_name'),
                network_interfaces=interfaces,
            )
            break
        except boto.exception.BotoServerError:
            log.exception("Cannot start an instance")
        time.sleep(10)
        if max_attempts:
            attempt += 1
            keep_going = max_attempts >= attempt

    instance = reservation.instances[0]
    log.info("instance %s created, waiting to come up", instance)
    # Wait for the instance to come up
    wait_for_status(instance, "state", "running", "update")
    instance.add_tag('Name', name)
    instance.add_tag('FQDN', instance_data['hostname'])
    instance.add_tag('created', time.strftime("%Y-%m-%d %H:%M:%S %Z",
                                              time.gmtime()))
    instance.add_tag('moz-type', config['type'])
    if loaned_to:
        instance.add_tag("moz-loaned-to", loaned_to)
    if loan_bug:
        instance.add_tag("moz-bug", loan_bug)

    log.info("assimilating %s", instance)
    instance.add_tag('moz-state', 'pending')

    keep_going, attempt = True, 1
    while keep_going:
        try:
            # Don't reboot if need to create ami
            reboot = not create_ami
            assimilate_instance(instance=instance, config=config,
                                ssh_key=ssh_key, instance_data=instance_data,
                                deploypass=deploypass, reboot=reboot)
            break
        except NetworkError as e:
            # it takes a while for the machine to start/reboot so the
            # NetworkError exception is quite common, just log the error,
            # without the full stack trace
            log.warn("cannot connect; instance may still be starting  %s (%s, %s) - %s,"
                     "retrying in %d sec ...", instance_data['hostname'], instance.id,
                     instance.private_ip_address, e, FAILURE_TIMEOUT)
            time.sleep(FAILURE_TIMEOUT)

        except:
            # any other exception
            log.warn("problem assimilating %s (%s, %s), retrying in "
                     "%d sec ...", instance_data['hostname'], instance.id,
                     instance.private_ip_address, FAILURE_TIMEOUT, exc_info=True)
            time.sleep(FAILURE_TIMEOUT)
        if max_attempts:
            attempt += 1
            keep_going = max_attempts >= attempt

    instance.add_tag('moz-state', 'ready')
    if create_ami:
        ami_name = "spot-%s-%s" % (
            config['type'], time.strftime("%Y-%m-%d-%H-%M", time.gmtime()))
        log.info("Generating AMI %s", ami_name)
        ami_cleanup(mount_point="/", distro=config["distro"])
        root_bd = instance.block_device_mapping[instance.root_device_name]
        volume = instance.connection.get_all_volumes(
            volume_ids=[root_bd.volume_id])[0]
        # The instance has to be stopped to flush EBS caches
        instance.stop()
        wait_for_status(instance, 'state', 'stopped', 'update')
        ami = volume_to_ami(volume=volume, ami_name=ami_name,
                            arch=instance.architecture,
                            virtualization_type=instance.virtualization_type,
                            kernel_id=instance.kernel,
                            root_device_name=instance.root_device_name,
                            tags=config["tags"])
        log.info("AMI %s (%s) is ready", ami_name, ami.id)
        log.warn("Terminating %s", instance)
        instance.terminate()
コード例 #34
0
def create_instance(name, config, region, key_name, ssh_key, instance_data,
                    deploypass, loaned_to, loan_bug, create_ami,
                    ignore_subnet_check, max_attempts):
    """Creates an AMI instance with the given name and config. The config must
    specify things like ami id."""
    conn = get_aws_connection(region)
    # Make sure we don't request the same things twice
    token = str(uuid.uuid4())[:16]

    instance_data = instance_data.copy()
    instance_data['name'] = name
    instance_data['domain'] = config['domain']
    instance_data['hostname'] = '{name}.{domain}'.format(
        name=name, domain=config['domain'])

    ami = conn.get_all_images(image_ids=[config["ami"]])[0]
    bdm = None
    if 'device_map' in config:
        bdm = BlockDeviceMapping()
        for device, device_info in config['device_map'].items():
            bd = BlockDeviceType()
            if device_info.get('size'):
                bd.size = device_info['size']
            # Overwrite root device size for HVM instances, since they cannot
            # be resized online
            if ami.virtualization_type == "hvm" and \
                    ami.root_device_name == device:
                bd.size = ami.block_device_mapping[ami.root_device_name].size
            if device_info.get("delete_on_termination") is not False:
                bd.delete_on_termination = True
            if device_info.get("ephemeral_name"):
                bd.ephemeral_name = device_info["ephemeral_name"]

            bdm[device] = bd

    interfaces = make_instance_interfaces(region, instance_data['hostname'],
                                          ignore_subnet_check,
                                          config.get('subnet_ids'),
                                          config.get('security_group_ids', []),
                                          config.get("use_public_ip"))

    keep_going, attempt = True, 1
    while keep_going:
        try:
            if 'user_data_file' in config:
                user_data = open(config['user_data_file']).read()
            else:
                user_data = get_user_data_tmpl(config['type'])
            if user_data:
                user_data = user_data.format(
                    puppet_server=instance_data.get('default_puppet_server'),
                    fqdn=instance_data['hostname'],
                    hostname=instance_data['name'],
                    domain=instance_data['domain'],
                    dns_search_domain=config.get('dns_search_domain'),
                    password=deploypass,
                    moz_instance_type=config['type'],
                    region_dns_atom=get_region_dns_atom(region),
                )

            reservation = conn.run_instances(
                image_id=config['ami'],
                key_name=key_name,
                instance_type=config['instance_type'],
                block_device_map=bdm,
                client_token=token,
                disable_api_termination=config.get('disable_api_termination'),
                user_data=user_data,
                instance_profile_name=config.get('instance_profile_name'),
                network_interfaces=interfaces,
            )
            break
        except boto.exception.BotoServerError:
            log.exception("Cannot start an instance")
        time.sleep(10)
        if max_attempts:
            attempt += 1
            keep_going = max_attempts >= attempt

    instance = reservation.instances[0]
    log.info("instance %s created, waiting to come up", instance)
    # Wait for the instance to come up
    wait_for_status(instance, "state", "running", "update")
    instance.add_tag('Name', name)
    instance.add_tag('FQDN', instance_data['hostname'])
    instance.add_tag('created',
                     time.strftime("%Y-%m-%d %H:%M:%S %Z", time.gmtime()))
    instance.add_tag('moz-type', config['type'])
    if loaned_to:
        instance.add_tag("moz-loaned-to", loaned_to)
    if loan_bug:
        instance.add_tag("moz-bug", loan_bug)

    log.info("assimilating %s", instance)
    instance.add_tag('moz-state', 'pending')

    keep_going, attempt = True, 1
    while keep_going:
        try:
            # Don't reboot if need to create ami
            reboot = not create_ami
            assimilate_instance(instance=instance,
                                config=config,
                                ssh_key=ssh_key,
                                instance_data=instance_data,
                                deploypass=deploypass,
                                reboot=reboot)
            break
        except NetworkError as e:
            # it takes a while for the machine to start/reboot so the
            # NetworkError exception is quite common, just log the error,
            # without the full stack trace
            log.warn(
                "cannot connect; instance may still be starting  %s (%s, %s) - %s,"
                "retrying in %d sec ...", instance_data['hostname'],
                instance.id, instance.private_ip_address, e, FAILURE_TIMEOUT)
            time.sleep(FAILURE_TIMEOUT)

        except:
            # any other exception
            log.warn(
                "problem assimilating %s (%s, %s), retrying in "
                "%d sec ...",
                instance_data['hostname'],
                instance.id,
                instance.private_ip_address,
                FAILURE_TIMEOUT,
                exc_info=True)
            time.sleep(FAILURE_TIMEOUT)
        if max_attempts:
            attempt += 1
            keep_going = max_attempts >= attempt

    instance.add_tag('moz-state', 'ready')
    if create_ami:
        ami_name = "spot-%s-%s" % (
            config['type'], time.strftime("%Y-%m-%d-%H-%M", time.gmtime()))
        log.info("Generating AMI %s", ami_name)
        ami_cleanup(mount_point="/", distro=config["distro"])
        root_bd = instance.block_device_mapping[instance.root_device_name]
        volume = instance.connection.get_all_volumes(
            volume_ids=[root_bd.volume_id])[0]
        # The instance has to be stopped to flush EBS caches
        instance.stop()
        wait_for_status(instance, 'state', 'stopped', 'update')
        ami = volume_to_ami(volume=volume,
                            ami_name=ami_name,
                            arch=instance.architecture,
                            virtualization_type=instance.virtualization_type,
                            kernel_id=instance.kernel,
                            root_device_name=instance.root_device_name,
                            tags=config["tags"])
        log.info("AMI %s (%s) is ready", ami_name, ami.id)
        log.warn("Terminating %s", instance)
        instance.terminate()
コード例 #35
0
def aws_get_spot_requests(region, moz_instance_type):
    """retruns a list of all open and active spot requests"""
    conn = get_aws_connection(region)
    filters = {"tag:moz-type": moz_instance_type}
    req = conn.get_all_spot_instance_requests(filters=filters)
    return [r for r in req if r.state in ("open", "active")]
コード例 #36
0
def create_instance(name, config, region, key_name, instance_data,
                    deploypass, loaned_to, loan_bug):
    """Creates an AMI instance with the given name and config. The config must
    specify things like ami id."""
    conn = get_aws_connection(region)
    vpc = get_vpc(region)
    # Make sure we don't request the same things twice
    token = str(uuid.uuid4())[:16]

    instance_data = instance_data.copy()
    instance_data['name'] = name
    instance_data['hostname'] = '{name}.{domain}'.format(
        name=name, domain=config['domain'])

    ami = conn.get_all_images(image_ids=[config["ami"]])[0]
    bdm = None
    if 'device_map' in config:
        bdm = BlockDeviceMapping()
        for device, device_info in config['device_map'].items():
            bd = BlockDeviceType()
            if device_info.get('size'):
                bd.size = device_info['size']
            # Overwrite root device size for HVM instances, since they cannot
            # be resized online
            if ami.virtualization_type == "hvm" and \
                    ami.root_device_name == device:
                bd.size = ami.block_device_mapping[ami.root_device_name].size
            if device_info.get("delete_on_termination") is not False:
                bd.delete_on_termination = True
            if device_info.get("ephemeral_name"):
                bd.ephemeral_name = device_info["ephemeral_name"]

            bdm[device] = bd

    ip_address = get_ip(instance_data['hostname'])
    subnet_id = None

    if ip_address:
        s_id = get_subnet_id(vpc, ip_address)
        if s_id in config['subnet_ids']:
            if ip_available(conn, ip_address):
                subnet_id = s_id
            else:
                log.warning("%s already assigned" % ip_address)

    if not ip_address or not subnet_id:
        ip_address = None
        subnet_id = choice(config.get('subnet_ids'))
    interface = NetworkInterfaceSpecification(
        subnet_id=subnet_id, private_ip_address=ip_address,
        delete_on_termination=True,
        groups=config.get('security_group_ids', []),
        associate_public_ip_address=config.get("use_public_ip")
    )
    interfaces = NetworkInterfaceCollection(interface)

    while True:
        try:
            reservation = conn.run_instances(
                image_id=config['ami'],
                key_name=key_name,
                instance_type=config['instance_type'],
                block_device_map=bdm,
                client_token=token,
                disable_api_termination=bool(config.get('disable_api_termination')),
                network_interfaces=interfaces,
                instance_profile_name=config.get("instance_profile_name"),
            )
            break
        except boto.exception.BotoServerError:
            log.exception("Cannot start an instance")
        time.sleep(10)

    instance = reservation.instances[0]
    log.info("instance %s created, waiting to come up", instance)
    # Wait for the instance to come up
    wait_for_status(instance, "state", "running", "update")
    instance.add_tag('Name', name)
    instance.add_tag('FQDN', instance_data['hostname'])
    instance.add_tag('created', time.strftime("%Y-%m-%d %H:%M:%S %Z",
                                              time.gmtime()))
    instance.add_tag('moz-type', config['type'])
    if loaned_to:
        instance.add_tag("moz-loaned-to", loaned_to)
    if loan_bug:
        instance.add_tag("moz-bug", loan_bug)

    log.info("assimilating %s", instance)
    instance.add_tag('moz-state', 'pending')
    while True:
        try:
            assimilate(instance.private_ip_address, config, instance_data,
                       deploypass)
            break
        except:
            log.warn("problem assimilating %s (%s), retrying in 10 sec ...",
                     instance_data['hostname'], instance.id)
            time.sleep(10)
    instance.add_tag('moz-state', 'ready')
コード例 #37
0
def request_spot_instances(moz_instance_type, start_count, regions, secrets,
                           region_priorities, spot_config, dryrun,
                           cached_cert_dir, slaveset):
    started = 0
    spot_rules = spot_config.get("rules", {}).get(moz_instance_type)
    if not spot_rules:
        log.warn("No spot rules found for %s", moz_instance_type)
        return 0

    instance_config = json.load(open(os.path.join(INSTANCE_CONFIGS_DIR, moz_instance_type)))
    connections = []
    for region in regions:
        conn = get_aws_connection(region)
        connections.append(conn)
    spot_choices = get_spot_choices(connections, spot_rules, "Linux/UNIX (Amazon VPC)")
    if not spot_choices:
        log.warn("No spot choices for %s", moz_instance_type)
        return 0

    to_start = {}
    active_network_ids = {}
    for region in regions:
        # Check if spots are enabled in this region for this type
        region_limit = spot_config.get("limits", {}).get(region, {}).get(
            moz_instance_type)
        if not region_limit:
            log.debug("No spot limits defined for %s in %s, skipping...",
                      moz_instance_type, region)
            continue

        # check the limits
        # Count how many unique network interfaces are active
        # Sometimes we have multiple requests for the same interface
        active_requests = aws_get_spot_requests(region=region, moz_instance_type=moz_instance_type)
        active_network_ids[region] = set(r.launch_specification.networkInterfaceId for r in active_requests)
        active_count = len(active_network_ids[region])
        log.debug("%s: %i running spot instances in %s", moz_instance_type, active_count, region)
        can_be_started = region_limit - active_count
        if can_be_started < 1:
            log.debug("Not starting. Active spot request count in %s region "
                      "hit limit of %s. Active count: %s", region,
                      region_limit, active_count)
            continue

        to_be_started = min(can_be_started, start_count - started)
        ami = get_ami(region=region, moz_instance_type=moz_instance_type)
        to_start[region] = {"ami": ami, "instances": to_be_started}

    if not to_start:
        log.debug("Nothing to start for %s", moz_instance_type)
        return 0

    for choice in spot_choices:
        region = choice.region
        if region not in to_start:
            log.debug("Skipping %s for %s", choice, region)
            continue
        if not usable_choice(choice):
            log.debug("Skipping %s for %s - unusable", choice, region)
            continue
        need = min(to_start[region]["instances"], start_count - started)
        log.debug("Need %s of %s in %s", need, moz_instance_type,
                  choice.availability_zone)

        log.debug("Using %s", choice)
        launched = do_request_spot_instances(
            amount=need,
            region=region, secrets=secrets,
            moz_instance_type=moz_instance_type,
            ami=to_start[region]["ami"],
            instance_config=instance_config, dryrun=dryrun,
            cached_cert_dir=cached_cert_dir,
            spot_choice=choice,
            slaveset=slaveset,
            active_network_ids=active_network_ids[region],
        )
        started += launched

        if started >= start_count:
            break

    return started
コード例 #38
0
def aws_stop_idle(credentials, regions, masters_json, moz_types,
                  dryrun=False, concurrency=8):
    if not regions:
        # Look at all regions
        log.debug("loading all regions")
        regions = [r.name for r in boto.ec2.regions()]

    min_running_by_type = 0

    all_instances = []
    impaired_ids = []

    for r in regions:
        log.debug("looking at region %s", r)
        conn = get_aws_connection(r)
        instances = get_buildbot_instances(conn, moz_types)
        impaired = conn.get_all_instance_status(
            filters={'instance-status.status': 'impaired'})
        impaired_ids.extend(i.id for i in impaired)
        instances_by_type = {}
        for i in instances:
            # TODO: Check if launch_time is too old, and terminate the instance
            # if it is
            # NB can't turn this on until aws_create_instance is working
            # properly (with ssh keys)
            instances_by_type.setdefault(i.tags['moz-type'], []).append(i)

        # Make sure min_running_by_type are kept running
        for t in instances_by_type:
            to_remove = instances_by_type[t][:min_running_by_type]
            for i in to_remove:
                log.debug("%s - keep running (min %s instances of type %s)",
                          i.tags['Name'], min_running_by_type,
                          i.tags['moz-type'])
                instances.remove(i)

        all_instances.extend(instances)

    random.shuffle(all_instances)

    q = Queue()
    to_stop = Queue()

    def worker():
        while True:
            try:
                i = q.get(timeout=0.1)
            except Empty:
                return
            try:
                if aws_safe_stop_instance(i, impaired_ids, credentials,
                                          masters_json, dryrun=dryrun):
                    to_stop.put(i)
            except Exception:
                log.warning("%s - unable to stop" % i.tags.get('Name'),
                            exc_info=True)

    for i in all_instances:
        q.put(i)

    # Workaround for http://bugs.python.org/issue11108
    time.strptime("19000102030405", "%Y%m%d%H%M%S")
    threads = []
    for i in range(concurrency):
        t = threading.Thread(target=worker)
        t.start()
        threads.append(t)

    while threads:
        for t in threads[:]:
            try:
                if t.is_alive():
                    t.join(timeout=0.5)
                else:
                    t.join()
                    threads.remove(t)
            except KeyboardInterrupt:
                raise SystemExit(1)

    total_stopped = {}
    while not to_stop.empty():
        i = to_stop.get()
        if not dryrun:
            i.update()
        if 'moz-type' not in i.tags:
            log.info("%s - has no moz-type! (%s)" % (i.tags.get('Name'), i.id))

        t = i.tags.get('moz-type', 'notype')
        if t not in total_stopped:
            total_stopped[t] = 0
        total_stopped[t] += 1

    for t, c in sorted(total_stopped.items()):
        log.info("%s - stopped %s", t, c)