def main(argv):
    if len(argv) == 1:
        print "Usage: %s [region]+" % argv[0]
        print "  region: all or some of %s" % " ".join(Ec2Region.All())
        sys.exit(1)

    # Note: Not sure if I want to parameterize the cluster name too. It can be
    # generated dynamically.

    regions = []
    if argv[1] == "all":
        regions = Ec2Region.All()
    else:
        for i in range(len(argv)):
            if i == 0:
                continue
            regions.append(argv[i])

    # EC2 instance types
    #
    # 4 vCPUs, 7.5 Gib RAM, EBS only, $0.209 per Hour
    # "The specified instance type can only be used in a VPC. A subnet ID or network interface ID is required to carry out the request."
    # ec2_type = "c4.xlarge"
    #
    # 4 vCPUs, 7.5 Gib RAM, 2 x 40 SSD, $0.21 per Hour
    # ec2_type = "c3.xlarge"
    #
    # For fast development
    ec2_type = "c3.4xlarge"
Exemple #2
0
def MeasureMetadataXdcTraffic(q):
	Cons.P("regions: %s" % ",".join(Ec2Region.All()))

	req_attrs = {
			"regions": Ec2Region.All()

			# Partial replication metadata is exchanged
			, "acorn-youtube.replication_type": "partial"

			# Objects are fully replicated
			, "acorn_options.full_replication": "true"

			, "acorn-youtube.fn_youtube_reqs": "tweets-010"

			, "acorn-youtube.youtube_extra_data_size": "10240"

			# Request all
			, "acorn-youtube.max_requests": "-1"

			, "acorn-youtube.simulation_time_dur_in_ms": "1800000"
			}
	_EnqReq(q, req_attrs)

	# Full replication, of course without any acorn metadata exchange
	req_attrs["acorn-youtube.replication_type"] = "full"
	req_attrs["acorn_options.use_attr_user"] = "******"
	req_attrs["acorn_options.use_attr_topic"] = "false"
	_EnqReq(q, req_attrs)
def CanLaunchAnotherCluster():
    # Returns True when all regions have less than 12 instances.
    with _num_nodes_per_region_lock:
        for r in Ec2Region.All():
            v = _num_nodes_per_region.get(r)
            if v is None:
                return False
            if v >= 12:
                return False
        #JobControllerLog.P("%s %s" % (Util.FileLine(), pprint.pformat(_num_nodes_per_region)))

        # You can launch another cluster now
        for r in Ec2Region.All():
            _num_nodes_per_region[r] += 1
        return True
Exemple #4
0
def ByRepModels(q):
	# UT
	req_attrs = {
			"init_script": "acorn-server"
			, "regions": Ec2Region.All()

			# Partial replication metadata is exchanged
			, "acorn-youtube.replication_type": "partial"

			, "acorn-youtube.fn_youtube_reqs": "tweets-010"

			# Default is 10240
			#, "acorn-youtube.youtube_extra_data_size": "10240"

			# Default is -1 (request all)
			#, "acorn-youtube.max_requests": "-1"
			, "acorn-youtube.max_requests": "100000"

			# Default is 1800000
			#, "acorn-youtube.simulation_time_dur_in_ms": "1800000"
			, "acorn-youtube.simulation_time_dur_in_ms": "10000"

			# Default is true, true
			, "acorn_options.use_attr_user": "******"
			, "acorn_options.use_attr_topic": "true"
			}
	_EnqReq(q, req_attrs)
Exemple #5
0
def ByJobIdTermSelfLast():
    job_id = Ec2Util.JobId()
    Cons.P("Terminating running instances of job_id %s" % job_id)

    _TermInst.Init(term_by_job_id_self_last=True)

    tags = {}
    tags["job_id"] = job_id

    tis = []
    for r in Ec2Region.All():
        tis.append(_TermInst(r, tags))

    threads = []
    for ti in tis:
        t = threading.Thread(target=ti.Run)
        t.daemon = True
        threads.append(t)
        t.start()

    for t in threads:
        t.join()
    print ""

    Cons.P(_TermInst.Header())
    for ti in tis:
        ti.PrintResult()
Exemple #6
0
def RunTermInst(tags):
    threads = []

    sys.stdout.write("Terminating running instances:")
    sys.stdout.flush()

    tis = []
    for r in Ec2Region.All():
        tis.append(TermInst(r, tags))

    for ti in tis:
        t = threading.Thread(target=ti.Run)
        t.daemon = True
        threads.append(t)
        t.start()

    for t in threads:
        t.join()
    print ""

    Cons.P(
        Util.BuildHeader(_fmt, "Region"
                         " InstanceId"
                         " PrevState"
                         " CurrState"))

    for ti in tis:
        ti.PrintResult()
def main(argv):
    if len(argv) != 3:
        raise RuntimeError("Usage: %s ami-id-in-us-east-1 name\n" \
          "  E.g.: %s ami-01d12c17 mutant-server-170129-1600" \
          % (argv[0], argv[0]))

    region_ami = {"us-east-1": argv[1]}

    regions = Ec2Region.All()
    regions.remove("us-east-1")
    for r in regions:
        cmd = "aws ec2 copy-image" \
          " --source-image-id %s" \
          " --source-region us-east-1" \
          " --region %s" \
          " --name %s" \
          % (argv[1], r, argv[2])
        out = Util.RunSubp(cmd)
        found_ami_id = False
        for line in out.split("\n"):
            if "\"ImageId\": \"ami-" in line:
                t = line.split("\"ImageId\": \"")
                if len(t) != 2:
                    raise RuntimeError("Unexpected line=[%s]" % line)
                # ami-a46623c4"
                # 012345678901
                region_ami[r] = t[1][0:11 + 1]
                found_ami_id = True
                break
        if not found_ami_id:
            raise RuntimeError("Unexpected output=[%s]" % out)

    print "{\n%s\n}" % ("\n, ".join(
        ["\"%s\": \"%s\"" % (k, v) for (k, v) in sorted(region_ami.items())]))
def main(argv):
	iscs = []
	for r in Ec2Region.All():
		iscs.append(ImageSnapshotCleaner(region=r))
	for i in iscs:
		i.GetImages()
	for i in iscs:
		i.Join()

	for i in iscs:
		i.GetSnapshots()
	for i in iscs:
		i.Join()

	for i in iscs:
		i.PrintWhatToKeepAndDelete()

	num_AMIs_to_delete = 0
	for i in iscs:
		num_AMIs_to_delete += len(i.imgs_myproj_to_delete)
	if num_AMIs_to_delete == 0:
		Cons.P("Nothing to delete")
		return

	confirm = raw_input("Would you like to proceed (Y/N)? ")
	if confirm.lower() != "y":
		return

	Cons.P("")
	Cons.P("Deregistering Amis and deleting snapshots ...")
	for i in iscs:
		i.DeleteOldAmisSnapshots()
	for i in iscs:
		i.Join()
Exemple #9
0
 def __init__(self, az_or_region):
     if re.match(r".*[a-z]$", az_or_region):
         self.az = az_or_region
         self.region_name = self.az[:-1]
     else:
         self.az = None
         self.region_name = az_or_region
     self.ami_id = Ec2Region.GetLatestAmiId(self.region_name)
Exemple #10
0
def Run(tags=None):
    sys.stdout.write("desc_instances:")
    sys.stdout.flush()

    diprs = []
    for r in Ec2Region.All():
        diprs.append(DescInstPerRegion(r, tags))

    threads = []
    for dipr in diprs:
        t = threading.Thread(target=dipr.Run)
        threads.append(t)
        t.daemon = True
        t.start()

    for t in threads:
        t.join()
    print ""

    num_insts = 0
    for dipr in diprs:
        num_insts += dipr.NumInsts()
    if num_insts == 0:
        Cons.P("No instances found.")
        return

    print ""
    Cons.P(
        Util.BuildHeader(
            _fmt,
            "job_id"
            " Placement:AvailabilityZone"
            " InstanceId"
            #" InstanceType"
            #" LaunchTime"
            #" PrivateIpAddress"
            " PublicIpAddress"
            " State:Name"
            #" Tag:Name"
        ))

    results = []
    for dipr in diprs:
        results += dipr.GetResults()
    for r in sorted(results):
        Cons.P(r)
def main(argv):
    with Cons.MTnnl("Checking:"):
        checks = []
        for r in Ec2Region.All():
            checks.append(Check(r))

        threads = []
        for c in checks:
            t = threading.Thread(target=c.Run)
            threads.append(t)
            t.start()

        for t in threads:
            t.join()
        print ""

        for c in checks:
            Cons.P("%-14s %2d" % (c.region, c.max_inst))
Exemple #12
0
def GetServerPubIpsByJobId(job_id):
	threads = []

	dis = []
	for r in Ec2Region.All():
		dis.append(_DescInst(r, {"job_id": job_id}))

	for di in dis:
		t = threading.Thread(target=di.Run)
		threads.append(t)
		t.start()
	for t in threads:
		t.join()

	ips = []
	for di in dis:
		ips.extend(di.GetIPs())
	return ips
Exemple #13
0
def GetByTags(tags):
    threads = []

    dis = []
    for r in Ec2Region.All():
        dis.append(DescInst(r, tags))

    for di in dis:
        t = threading.Thread(target=di.Run)
        threads.append(t)
        t.start()

    for t in threads:
        t.join()

    ips = []
    for di in dis:
        ip = di.GetIp()
        if ip == None:
            continue
        ips.append(ip)
    return ips
Exemple #14
0
def GetInstDescs(tags=None):
    sys.stdout.write("desc_instances:")
    sys.stdout.flush()

    dis = []
    for r in Ec2Region.All():
        dis.append(DescInstPerRegion(r, tags))

    threads = []
    for di in dis:
        t = threading.Thread(target=di.Run)
        threads.append(t)
        t.start()

    for t in threads:
        t.join()
    print ""

    inst_descs = []
    for di in dis:
        inst_descs += di.GetInstDesc()
    return inst_descs
Exemple #15
0
def ByTags(tags, job_id_none_requested):
    Cons.Pnnl("Terminating running instances:")
    _TermInst.Init(job_id_none_requested)

    tis = []
    for r in Ec2Region.All():
        tis.append(_TermInst(r, tags))

    threads = []
    for ti in tis:
        t = threading.Thread(target=ti.Run)
        t.daemon = True
        threads.append(t)
        t.start()

    for t in threads:
        t.join()
    print ""

    Cons.P(_TermInst.Header())
    for ti in tis:
        ti.PrintResult()
Exemple #16
0
def main(argv):
    iscs = []
    for r in Ec2Region.All():
        iscs.append(ImageSnapshotCleaner(region=r))
    for i in iscs:
        i.GetImages()
    for i in iscs:
        i.Join()

    for i in iscs:
        i.GetSnapshots()
    for i in iscs:
        i.Join()

    for i in iscs:
        i.PrintWhatToKeepAndDelete()

    Cons.P("")
    Cons.P("Deregistering Amis and deleting snapshots ...")
    for i in iscs:
        i.DeleteOldAmisSnapshots()
    for i in iscs:
        i.Join()
    def _DescInst(self):
        if self.mode == "run_until_stopped":
            self.dio.P("\n")
        self.dio.P("Describing instances:")

        DescInstPerRegion.Reset()

        dis = []
        for r in Ec2Region.All():
            dis.append(DescInstPerRegion(r, self.dio))

        self.per_region_threads = []
        for di in dis:
            t = threading.Thread(target=di.Run)
            self.per_region_threads.append(t)
            t.daemon = True
            t.start()

        # Exit immediately when requested
        for t in self.per_region_threads:
            while t.isAlive():
                if self.stop_requested:
                    return
                t.join(0.1)

        self.dio.P("\n")

        num_insts = 0
        with _num_nodes_per_region_lock:
            for di in dis:
                num_insts += len(di.Instances())
                # Decrement slowly, at most one at a time. You don't want a suddern
                # increase in the capacity. Increase as is reported by the boto library.
                n = _num_nodes_per_region.get(di.region)
                if n is None:
                    n = len(di.Instances())
                else:
                    if len(di.Instances()) < n:
                        n -= 0.2
                    else:
                        n = len(di.Instances())
                _num_nodes_per_region[di.region] = n

        if num_insts == 0:
            self.dio.P("No instances found.\n")
        else:
            self.dio.P("#"
                       " job_id"
                       " (Placement:AvailabilityZone"
                       " InstanceId"
                       " PublicIpAddress"
                       " State:Name) ...\n")

            # Group by job_id. Only for those with job_ids
            #   { job_id: {region: Inst} }
            jobid_inst = {}
            # Instances without any job_id
            #   { region: [Inst] }
            nojobid_inst = {}
            num_nojobid_inst = 0
            for di in dis:
                for i in di.Instances():
                    if i.job_id is not None:
                        if i.job_id not in jobid_inst:
                            jobid_inst[i.job_id] = {}
                        jobid_inst[i.job_id][i.region] = i
                    else:
                        if i.region not in nojobid_inst:
                            nojobid_inst[i.region] = []
                        nojobid_inst[i.region].append(i)
                        num_nojobid_inst += 1

            ClusterCleaner.Clean(jobid_inst)

            for job_id, v in sorted(jobid_inst.iteritems()):
                self.dio.P("%s %d" % (job_id, len(v)))
                for k1, i in sorted(v.iteritems()):
                    #msg = " (%s %s %s %s)" % (i.az, i.inst_id, i.public_ip, i.state)
                    msg = " (%s %s %s)" % (i.az, i.public_ip, i.state)
                    if self.dio.LastLineWidth() + len(
                            msg) > DIO.max_column_width:
                        self.dio.P("\n  ")
                    self.dio.P(msg)
                self.dio.P("\n")

            if len(nojobid_inst) > 0:
                self.dio.P("%-13s %d" % ("no-job-id", num_nojobid_inst))
                for region, insts in sorted(nojobid_inst.iteritems()):
                    for i in insts:
                        msg = " (%s %s %s)" % (i.az, i.public_ip, i.state)
                        if self.dio.LastLineWidth() + len(
                                msg) > DIO.max_column_width:
                            self.dio.P("\n  ")
                        self.dio.P(msg)
                self.dio.P("\n")

        if self.mode == "run_until_stopped":
            self.dio.P("Time since the last msg: %s" %
                       (datetime.datetime.now() - self.desc_inst_start_time))
            self.dio.Flush()
Exemple #18
0
    def _ReqSpotInst(self):
        # This is run as root
        #
        # http://unix.stackexchange.com/questions/4342/how-do-i-get-sudo-u-user-to-use-the-users-env
        user_data = \
      """#!/bin/bash
cd /home/ubuntu/work
rm -rf /home/ubuntu/work/acorn-tools
sudo -i -u ubuntu bash -c 'git clone https://github.com/hobinyoon/acorn-tools.git /home/ubuntu/work/acorn-tools'
sudo -i -u ubuntu /home/ubuntu/work/acorn-tools/ec2/ec2-init.py {0} {1} {2} {3}
"""
        user_data = user_data.format(self.tags["init_script"], self.jr_sqs_url,
                                     self.jr_sqs_msg_receipt_handle,
                                     self.num_regions)

        ls = {
            'ImageId':
            Ec2Region.GetLatestAmiId(region=self.region, name=self.ami_name)
            #, 'KeyName': 'string'
            ,
            'SecurityGroups': ["cass-server"],
            'UserData':
            base64.b64encode(user_data)
            #, 'AddressingType': 'string'
            ,
            'InstanceType':
            self.inst_type,
            'EbsOptimized':
            True,
            'Placement': {
                'AvailabilityZone': self.az
            }
        }

        while True:
            try:
                r = BotoClient.Get(self.region).request_spot_instances(
                    SpotPrice=str(self.max_price),
                    #ClientToken='string',
                    InstanceCount=1,
                    Type='one-time',
                    #ValidFrom=datetime(2015, 1, 1),
                    #ValidUntil=datetime(2015, 1, 1),
                    #LaunchGroup='string',
                    #AvailabilityZoneGroup='string',

                    # https://aws.amazon.com/blogs/aws/new-ec2-spot-blocks-for-defined-duration-workloads/
                    #BlockDurationMinutes=123,
                    LaunchSpecification=ls,
                )
                self.log.P("SpotInstReqResp: %s" % pprint.pformat(r))
                if len(r["SpotInstanceRequests"]) != 1:
                    raise RuntimeError("len(r[\"SpotInstanceRequests\"])=%d" %
                                       len(r["SpotInstanceRequests"]))
                self.spot_req_id = r["SpotInstanceRequests"][0][
                    "SpotInstanceRequestId"]
                self.log.P("region=%s spot_req_id=%s" %
                           (self.region, self.spot_req_id))
                break
            except botocore.exceptions.ClientError as e:
                if e.response["Error"]["Code"] == "RequestLimitExceeded":
                    self.log.P("region=%s error=%s" % (self.region, e))
                    time.sleep(5)
                else:
                    raise e
def _DescInst(dio):
    if _mode == "run_until_stopped":
        dio.P("\n")
    dio.P("# Describing instances:")

    DescInstPerRegion.Reset()

    region_desc_inst = {}
    for r in Ec2Region.All():
        region_desc_inst[r] = DescInstPerRegion(r, dio)

    if _stop_requested:
        return

    threads = []
    for r, di in region_desc_inst.iteritems():
        t = threading.Thread(target=di.Run)
        threads.append(t)
        t.daemon = True
        t.start()

    # Exit immediately when requested
    for t in threads:
        while t.isAlive():
            if _stop_requested:
                return
            t.join(0.1)

    dio.P("\n#\n")

    num_insts = 0
    for r, di in region_desc_inst.iteritems():
        num_insts += len(di.Instances())

    if num_insts == 0:
        dio.P("No instances found.\n")
    else:
        # Header
        fmt = "%-15s %13s %-10s %6.4f %2s %19s %15s %13s"
        dio.P(
            Util.BuildHeader(
                fmt, "az"
                " job_id"
                " inst_type"
                " cur_spot_price"
                " name"
                " InstanceId"
                " PublicIpAddress"
                " State:Name") + "\n")

        for r, di in sorted(region_desc_inst.iteritems()):
            for i in di.Instances():
                # Note: could be grouped by job_id later
                dio.P((fmt + "\n") %
                      (i.az, i.job_id, i.inst_type,
                       SpotPrice.GetCur(i.az, i.inst_type),
                       i.name.replace("server", "s").replace(
                           "client", "c"), i.inst_id, i.public_ip, i.state))
    if _mode == "run_once":
        sys.exit(0)

        # Note: JobCleaner could use this node info

    if _stop_requested:
        return

    if _mode == "run_until_stopped":
        # Since the last JobContConsole output
        dio.P("# Time since the last msg: %s" %
              (datetime.datetime.now() - _desc_inst_start_time))
        dio.Flush()
import time
import traceback

sys.path.insert(0, "%s/../../util/python" % os.path.dirname(__file__))
import Cons
import Util

sys.path.insert(0, "%s/.." % os.path.dirname(__file__))
import BotoClient
import Ec2Region

import ClusterCleaner
import JobControllerLog

# Initialize all values to None
_num_nodes_per_region = dict.fromkeys(Ec2Region.All())
_num_nodes_per_region_lock = threading.Lock()


def CanLaunchAnotherCluster():
    # Returns True when all regions have less than 12 instances.
    with _num_nodes_per_region_lock:
        for r in Ec2Region.All():
            v = _num_nodes_per_region.get(r)
            if v is None:
                return False
            if v >= 12:
                return False
        #JobControllerLog.P("%s %s" % (Util.FileLine(), pprint.pformat(_num_nodes_per_region)))

        # You can launch another cluster now