Python exception Examples, scrimp.logger.exception Python Examples

Example #1

0

Show file

File: manager.py Project: globus-labs/SCRIMP

def cancel_unmigrated_requests(tenants):
    """
    There are two cases to handle here. Either there are no idle jobs, so
    all requests should be cancelled.
    Or there are idle jobs but the existing requests could not be migrated
    to them. In this case, any orphaned requests should also be cancelled.
    """
    for tenant in tenants:
        # start by grabbing all of the open spot requests for this tenant
        ids_to_check = ProvisionerConfig().simulator.get_open_requests()

        # Get the set of idle job numbers
        idle_job_numbers = []
        for job in tenant.jobs:
            if job.sim_status == 'IDLE':
                idle_job_numbers.append(job.id)

        # now get all of the orphaned requests
        reqs = get_orphaned_requests(tenant, ids_to_check, idle_job_numbers)
        reqs_to_cancel = []
        # build a nice list we can pass to boto
        for req in reqs:
            reqs_to_cancel.append(req['request_id'])

        # now cancel all of these requests
        try:
            if len(reqs_to_cancel) > 0:
                logger.debug("Cancelling unmigrated requests: %s" %
                             reqs_to_cancel)
                ProvisionerConfig().simulator.cancel_spot_instance_requests(
                    ids_to_check)
        except Exception as e:
            logger.exception("Error removing spot instance requests.")
            raise e

Example #2

0

Show file

def update_database(tenants):
    """
    Record when an instance is started in the database. This should also
    try and record when an instance is terminated.
    In future work this should probably calculate the cost of the instance
    as well.
    """

    for tenant in tenants:
        conn = boto.connect_ec2(tenant.access_key, tenant.secret_key)
        try:
            # First get all operating instances (instances probably are not
            # yet tagged, so don't filter them yet.)
            reservations = conn.get_all_instances()
            instance_spot_ids = []
            # Go over the fulfilled spot requests
            for r in reservations:
                for i in r.instances:
                    if i.spot_instance_request_id is not None:
                        instance_spot_ids.append("'%s'" %
                                                 i.spot_instance_request_id)
                    # Also include ondemand instances which tag as the id.
                    else:
                        instance_spot_ids.append("'%s'" % i.id)

            # Get the entry in the instance_request table for each of these
            # requests
            check_for_new_instances(reservations, instance_spot_ids, conn,
                                    tenant)
            check_for_terminated_instances(reservations)

        except:
            logger.exception("Error updating database. Or, more likely, the " +
                             "instance wasn't yet registered by amazon, so " +
                             "skip this error this time.")

Example #3

0

Show file

File: api.py Project: globus-labs/SCRIMP

def launch_spot_request(conn, request, tenant, job):
    try:

        cost_aware_req = job.cost_aware
        drafts_req = job.DrAFTS
        drafts_avg = job.DrAFTSAvg

        cost_aware_req = job.cost_aware
        drafts_req = job.cost_aware
        drafts_avg = job.cost_aware

        mapping = None

        my_req_ids = ProvisionerConfig().simulator.request_spot_instances(
            price=request.bid, image_id=request.ami,
            subnet_id=tenant.subnets[request.zone],
            count=request.count,
            key_name=tenant.key_pair,
            security_group_ids=[tenant.security_group],
            instance_type=request.instance_type,
            user_data=customise_cloudinit(tenant, job),
            block_device_map=mapping,
            job=job)
        for req in my_req_ids:
            # tag each request
            tag_requests(req, tenant.name, conn)

            ProvisionerConfig().dbconn.execute(
                ("insert into instance_request (tenant, instance_type, " +
                 "price, job_runner_id, request_type, request_id, " +
                 "subnet, cost_aware_ins, cost_aware_bid, " +
                 "cost_aware_subnet, " +
                 "drafts_ins, drafts_bid, drafts_subnet, selected_avg_price, "
                 "cost_aware_avg_price, drafts_avg_price, drafts_avg_ins, " +
                 "drafts_avg_bid, drafts_avg_subnet, drafts_avg_avg_price) " +
                 "values ('%s', '%s', %s, '%s', '%s', '%s', %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)") %
                (tenant.db_id, request.instance.db_id, request.OraclePrice,
                 job.id,
                 "spot", req, tenant.subnets_db_id[request.zone],
                 cost_aware_req.instance.db_id, cost_aware_req.bid,
                 tenant.subnets_db_id[cost_aware_req.zone],
                 drafts_req.instance.db_id,
                 drafts_req.DrAFTS, tenant.subnets_db_id[drafts_req.zone],
                 request.AvgPrice, cost_aware_req.AvgPrice,
                 drafts_req.AvgPrice,
                 drafts_avg.instance.db_id, drafts_avg.DrAFTS,
                 tenant.subnets_db_id[
                     drafts_avg.zone],
                 drafts_avg.AvgPrice))

        return my_req_ids
    except boto.exception.EC2ResponseError:
        logger.exception("There was an error communicating with EC2.")

Example #4

0

Show file

File: manager.py Project: globus-labs/SCRIMP

def get_orphaned_requests(tenant, ids_to_check, idle_job_numbers):
    """
    Check if there are any requests that don't belong to a job in the idle
    queue
    """

    res = []

    if len(ids_to_check) > 0:
        try:
            # Add quotes and commas to the list items for psql.
            sir_ids = (', '.join('\'' + item + '\'' for item in ids_to_check))
            # Get any requests that do not belong to an idle job
            rows = []
            print sir_ids
            print idle_job_numbers
            if len(idle_job_numbers) > 0:
                rows = ProvisionerConfig().dbconn.execute(
                    ("select instance_request.id, instance_type.type, " +
                     "instance_request.job_runner_id, " +
                     "instance_request.request_id from instance_request, " +
                     "instance_type where instance_request.instance_type = " +
                     "instance_type.id and job_runner_id not in (%s) and " +
                     "request_id in (%s) and request_type = 'spot' and " +
                     "tenant = %s") % (",".join(
                         '\'' + num + '\''
                         for num in idle_job_numbers), sir_ids, tenant.db_id))
            else:
                rows = ProvisionerConfig().dbconn.execute(
                    ("select instance_request.id, instance_type.type, " +
                     "instance_request.job_runner_id, " +
                     "instance_request.request_id from instance_request, " +
                     "instance_type where instance_request.instance_type = " +
                     "instance_type.id and " +
                     "request_id in (%s) and request_type = 'spot' and " +
                     "tenant = %s") % (sir_ids, tenant.db_id))
            # I had some issues with the rows object closing after
            # returning it, so this just builds a dict for it
            for row in rows:
                res.append({
                    'id': row['id'],
                    'type': row['type'],
                    'job_runner_id': row['job_runner_id'],
                    'request_id': row['request_id']
                })
                logger.warn("Orphaned request %s" % row['request_id'])

        except psycopg2.Error:
            logger.exception("Error migrating instances.")

    return res

Example #5

0

Show file

File: config.py Project: globus-labs/SCRIMP

 def get_instance_types():
     """
     Get the set of instances from the database
     """
     instances = []
     try:
         rows = self.dbconn.execute(
             "select * from instance_type where available = True")
         for row in rows:
             instances.append(aws.Instance(
                 row['id'], row['type'], row['ondemand_price'],
                 row['cpus'], row['memory'], row['disk'],
                 row['ami']))
     except psycopg2.Error:
         logger.exception("Error getting instance types from database.")
     return instances

Example #6

0

Show file

File: api.py Project: globus-labs/SCRIMP

def launch_ondemand_request(conn, request, tenant, job):
    try:

        mapping = BlockDeviceMapping()
        sda1 = BlockDeviceType()
        eph0 = BlockDeviceType()
        eph1 = BlockDeviceType()
        eph2 = BlockDeviceType()
        eph3 = BlockDeviceType()
        sda1.size = 10
        eph0.ephemeral_name = 'ephemeral0'
        eph1.ephemeral_name = 'ephemeral1'
        eph2.ephemeral_name = 'ephemeral2'
        eph3.ephemeral_name = 'ephemeral3'
        mapping['/dev/sda1'] = sda1
        mapping['/dev/sdb'] = eph0
        mapping['/dev/sdc'] = eph1
        mapping['/dev/sdd'] = eph2
        mapping['/dev/sde'] = eph3

        # issue a run_instances command for this request
        res = conn.run_instances(min_count=request.count,
                                 max_count=request.count,
                                 key_name=tenant.key_pair,
                                 image_id=request.ami,
                                 security_group_ids=[tenant.security_group],
                                 user_data=customise_cloudinit(tenant, job),
                                 instance_type=request.instance_type,
                                 subnet_id=tenant.subnet,
                                 block_device_map=mapping)
        my_req_ids = [req.id for req in res.instances]
        # address = ""
        for req in my_req_ids:
            # tag each request
            tag_requests(req, tenant.name, conn)
            # update the database to include the new request
            ProvisionerConfig().dbconn.execute(
                ("insert into instance_request (tenant, instance_type, " +
                 "price, job_runner_id, request_type, request_id, " +
                 "subnet) values ('%s', '%s', %s, %s, '%s', '%s', %s)") %
                (tenant.db_id, request.instance.db_id,
                 request.instance.ondemand, job.id, "ondemand", req,
                 tenant.subnet_id))
            # ProvisionerConfig().dbconn.commit()
            return
    except boto.exception.EC2ResponseError:
        logger.exception("There was an error communicating with EC2.")

Example #7

0

Show file

File: api.py Project: globus-labs/SCRIMP

def tag_requests(req, tag, conn):
    """
    Tag any requests that have just been made with the tenant name
    """
    for x in range(0, 3):
        try:
            conn.create_tags([req], {"tenant": tag})
            conn.create_tags([req], {"Name": 'worker@%s' % tag})
            break
        except boto.exception.BotoClientError:
            time.sleep(2)
            pass
        except boto.exception.BotoServerError:
            time.sleep(2)
            pass
        except boto.exception.EC2ResponseError:
            logger.exception("There was an error communicating with EC2.")

Example #8

0

Show file

def cancel_unmigrated_requests(tenants):
    """
    There are two cases to handle here. Either there are no idle jobs, so
    all requests should be cancelled.
    Or there are idle jobs but the existing requests could not be migrated
    to them. In this case, any orphaned requests should also be cancelled.
    """
    for tenant in tenants:
        # start by grabbing all of the open spot requests for this tenant
        conn = boto.connect_ec2(tenant.access_key, tenant.secret_key)
        reqs = conn.get_all_spot_instance_requests(filters={
            "tag-value": tenant.name,
            "state": "open"
        })

        # Get a list of ids that can be used in a db query
        ids_to_check = []
        for r in reqs:
            ids_to_check.append("%s" % r.id)

        # Get the set of idle job numbers
        idle_job_numbers = []
        for job in tenant.jobs:
            if job.status == '1':
                idle_job_numbers.append(job.id)

        # now get all of the orphaned requests
        reqs = get_orphaned_requests(tenant, ids_to_check, idle_job_numbers)
        reqs_to_cancel = []
        # build a nice list we can pass to boto
        for req in reqs:
            reqs_to_cancel.append(req['request_id'])

        # now cancel all of these requests
        try:
            if len(reqs_to_cancel) > 0:
                logger.debug("Cancelling unmigrated requests: %s" %
                             reqs_to_cancel)
                conn.cancel_spot_instance_requests(ids_to_check)
        except Exception as e:
            logger.exception("Error removing spot instance requests.")
            raise e

Example #9

0

Show file

File: manager.py Project: globus-labs/SCRIMP

def update_database(tenants):
    """
    Record when an instance is started in the database. This should also
    try and record when an instance is terminated.
    In future work this should probably calculate the cost of the instance
    as well.
    """

    for tenant in tenants:
        try:
            # First get all operating instances (instances probably are not
            # yet tagged, so don't filter them yet.)
            reservations = ProvisionerConfig().simulator.get_all_instances()
            instance_spot_ids = ProvisionerConfig(
            ).simulator.get_spot_instances()

            check_for_terminated_instances(reservations)

        except psycopg2.Error:
            logger.exception("Error updating database.")

Example #10

0

Show file

    def get_existing_requests(self, tenant, job):
        # Get all of the outstanding requests from the db for this instance
        existing_requests = []
        try:
            rows = ProvisionerConfig().dbconn.execute(
                ("select instance_request.instance_type, "
                 "instance_request.request_type, "
                 "instance_type.type, "
                 "instance_request.subnet, subnet_mapping.zone "
                 "from instance_request, subnet_mapping, instance_type "
                 "where job_runner_id = '%s' and "
                 "instance_request.tenant = %s and "
                 "instance_request.instance_type = instance_type.id and "
                 "subnet_mapping.id = instance_request.subnet") %
                (job.id, tenant.db_id))
            for row in rows:
                existing_requests.append(aws.Request(
                    None, row['type'],
                    row['zone'], None, None))
        except psycopg2.Error:
            logger.exception("Error getting number of outstanding")

        return existing_requests

Example #11

0

Show file

def migrate_request_to_job(request, job):
    """
    Check if an instance can be repurposed to another job and update the
    database.
    """
    # Check to see if the job can be fulfilled by the requested instance
    if check_requirements(request['type'], job):
        next_idle_job_id = job.id
        try:
            logger.debug(
                ("Migrating instance request  %s, from job " + "%s to job %s.")
                % (request['id'], request['job_runner_id'], next_idle_job_id))
            ProvisionerConfig().dbconn.execute(
                ("update instance_request set job_runner_id = '%s' " +
                 "where id = %s") % (next_idle_job_id, request['id']))
            ProvisionerConfig().dbconn.execute(
                ("insert into request_migration " +
                 "(request_id, from_job, to_job, migration_time) " +
                 "values (%s, %s, %s, NOW())") %
                (request['id'], request['job_runner_id'], next_idle_job_id))
            return True
        except psycopg2.Error:
            logger.exception("Error performing migration in database.")

Example #12

0

Show file

File: condor_scheduler.py Project: globus-labs/SCRIMP

    def get_global_queue(self):
        """
        Poll condor_q -global and return a set of Jobs.
        """
        cmd = [
            'condor_q', '-global', '-format', '%s:', 'GlobalJobId', '-format',
            '%s:', 'ClusterId', '-format', '%s:', 'JobStatus', '-format',
            '%s:', 'QDate', '-format', '%s:', 'RequestCpus', '-format', '%s:',
            'RequestMemory', '-format', '%s:', 'RequestDisk', '-format', '%s',
            'JobDescription', '-format', '%s\n', 'ExitStatus'
        ]

        #output = subprocess.check_output(cmd)
        output = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0]
        queue = output.split("\n")
        queue = filter(None, queue)

        jobs = []
        if len(queue) > 0:
            # set the time of the first job if this is it
            if ProvisionerConfig().first_job_time is None:
                logger.debug("Simulation: first job time set")
                utc = timezone('UTC')
                ProvisionerConfig().first_job_time = datetime.datetime.now(utc)
            for line in queue:
                if "All queues are empty" in line:
                    break
                try:
                    split = line.split(":")
                    tenant_addr = ""
                    # Grab the address of the tenant from the global id
                    if "#" in split[0]:
                        tenant_addr = split[0].split("#")[0]
                    # Req memory is either a number or a string talking about
                    # requested memory, so check if it is a number
                    req_memory = 0
                    try:
                        req_memory = int(split[5])
                        if req_memory > 1024:
                            # change it to use GB like instance types.
                            req_memory = req_memory / 1024
                    except Exception, e:
                        pass
                    # Req disk is the same as memory. Again it is
                    # in mb I believe
                    req_disk = 0
                    try:
                        req_disk = int(split[6])
                        if req_disk > 1024:
                            # change it to use GB like instance types.
                            req_disk = req_disk / 1024
                    except Exception, e:
                        pass
                    # Decipher the description of the job as well (name, etc.)
                    description = {}
                    if "=" in split[7]:
                        description = self.process_job_description(split[7])
                    # Create the job: tenant address, job id, queue time,
                    # requested cpus, requested memory
                    j = Job(tenant_addr, split[1], split[2], split[3],
                            split[4], req_memory, req_disk, description)
                    jobs.append(j)
                except Exception, e:
                    logger.exception("Something has gone wrong while"
                                     " processing "
                                     "the job queue.")
                    raise e

Example #13

0

Show file

File: config.py Project: globus-labs/SCRIMP

    def __init__(self, *args, **kwargs):
        """
        Load provisioner configuration based on the settings in a config file.
        """
        # override defaults with kwargs
        config_file = 'ggprovisioner/provisioner.ini'
        cloudinit_file = "cloudinit.cfg"
        if 'config_file' in kwargs:
            config_file = kwargs['config_file']
        if 'cloudinit_file' in kwargs:
            cloudinit_file = kwargs['cloudinit_file']

        # we need to pull cloudinit from the DB in the future
        self.cloudinit_file = cloudinit_file

        # read config from a file
        config = ConfigParser.ConfigParser()
        config.read(config_file)

        # get DB connection info
        user = config.get('Database', 'user')
        password = config.get('Database', 'password')
        host = config.get('Database', 'host')
        port = config.get('Database', 'port')
        database = config.get('Database', 'database')

        # create a connection and keep it as a config attribute
        try:
            engine = sqlalchemy.create_engine(
                'postgresql://%s:%s@%s:%s/%s' %
                (user, password, host, port, database),
                isolation_level="AUTOCOMMIT")
            self.dbconn = engine.connect()
        except psycopg2.Error:
            logger.exception("Failed to connect to database.")

        # Get some provisioner specific config settings
        self.ondemand_price_threshold = float(
            config.get('Provision', 'ondemand_price_threshold'))
        self.max_requests = int(config.get('Provision', 'max_requests'))
        self.run_rate = int(config.get('Provision', 'run_rate'))

        self.DrAFTS = config.get('Provision', 'DrAFTS')
        self.DrAFTSAvgPrice = config.get('Provision', 'DrAFTSAvgPrice')
        self.DrAFTSProfiles = config.get('Provision', 'DrAFTSProfiles')
        self.instance_types = []
        if self.DrAFTS == 'True':
            self.DrAFTS = True
        else:
            self.DrAFTS = False
        if self.DrAFTSProfiles == 'True':
            self.DrAFTSProfiles = True
        else:
            self.DrAFTSProfiles = False
        if self.DrAFTSAvgPrice == 'True':
            self.DrAFTSAvgPrice = True
        else:
            self.DrAFTSAvgPrice = False

        self.simulate = config.get('Simulation', 'Simulate')
        if self.simulate == 'True':
            self.simulate = True
        else:
            self.simulate = False

        self.neg_time = int(config.get('Simulation', 'NegTime'))
        self.job_number = int(config.get('Simulation', 'JobNumber'))
        self.idle_time = int(config.get('Simulation', 'IdleTime'))
        self.terminate = config.get('Simulation', 'Terminate')
        self.overhead_time = int(config.get('Simulation', 'OverheadTime'))
        self.simulate_jobs = (config.get('Simulation', 'JobFile'))
        self.run_name = config.get('Simulation', 'RunName')

        # things for the simulator
        self.first_job_time = None

        # drafts_stored_db = True

        # self.simulate = True
        self.sim_time = datetime.datetime.strptime('2017-03-25T03:14:00Z',
                                                   '%Y-%m-%dT%H:%M:%SZ')
        self.jobs_file = self.simulate_jobs.split("/")[-1]
        if self.job_number == 100:
            # 100 jobs
            self.jobs_file = '100jobs.json'
            self.simulate_jobs = '/home/ubuntu/100jobs.json'
            self.sim_time = datetime.datetime.strptime('2017-03-23T21:35:00Z',
                                                       '%Y-%m-%dT%H:%M:%SZ')
        elif self.job_number == 200:
            # 200 jobs
            self.jobs_file = '200jobs.json'
            self.simulate_jobs = '/home/ubuntu/200jobs.json'
            self.sim_time = datetime.datetime.strptime('2017-03-27T20:45:00Z',
                                                       '%Y-%m-%dT%H:%M:%SZ')
        elif self.job_number == 500:
            # 500 jobs
            self.jobs_file = '500jobs.json'
            self.simulate_jobs = '/home/ubuntu/500jobs.json'
            self.sim_time = datetime.datetime.strptime('2017-03-28T21:55:00Z',
                                                       '%Y-%m-%dT%H:%M:%SZ')
        elif self.job_number == 1000:
            # 1000 jobs
            self.jobs_file = '1000jobs.json'
            self.simulate_jobs = '/home/ubuntu/1000jobs.json'
            self.sim_time = datetime.datetime.strptime('2017-03-25T03:14:00Z',
                                                       '%Y-%m-%dT%H:%M:%SZ')
            if self.idle_time == 120:
                self.idle_time = 128

        self.sim_time = self.sim_time.replace(tzinfo=pytz.utc)

        self.run_id = random.randint(500, 10000)
        self.relative_time = None
        self.simulate_time = self.sim_time

        if self.simulate:
            self.simulator = ggprovisioner.cloud.simaws.aws_simulator.AWSSimulator()

Example #14

0

Show file

File: tenant.py Project: globus-labs/SCRIMP

def load_from_db():
    """
    Load all of the tenant data. This should let us iterate over the
    tenants and apply their preferences to the idle queue etc..
    It should also let us shut down unnecessary requests across the board
    as this will load their aws credentials.
    """
    tenant_list = []
    # Pull all of the tenant data from the database
    try:
        # Only get those that are subscribed
        tenant_columns = [
            'id', 'name', 'public_address', 'condor_address', 'public_ip',
            'zone', 'vpc', 'security_group', 'domain'
        ]
        tenant_settings_columns = [
            'max_bid_price', 'bid_percent', 'timeout_threshold'
        ]
        tenant_selection = ', '.join('tenant.' + c for c in tenant_columns)
        tenant_settings_selection = ', '.join('tenant_settings.' + c
                                              for c in tenant_settings_columns)
        aws_creds_columns = ['access_key_id', 'secret_key', 'key_pair']
        aws_creds_selection = ', '.join('aws_credentials.' + c
                                        for c in aws_creds_columns)
        subnet_columns = ['subnet', 'id as subnet_id']
        subnet_selection = ', '.join('subnet_mapping.' + c
                                     for c in subnet_columns)
        full_selection = ', '.join(
            (tenant_selection, tenant_settings_selection, aws_creds_selection,
             subnet_selection))

        rows = ProvisionerConfig().dbconn.execute(
            "SELECT " + full_selection +
            " FROM tenant, tenant_settings, aws_credentials, subnet_mapping"
            " WHERE tenant_settings.tenant = tenant.id AND"
            " tenant.credentials = aws_credentials.id AND"
            " tenant.subscribed = TRUE AND subnet_mapping.tenant"
            " = tenant.id AND subnet_mapping.zone = tenant.zone")
        # Create a tenant object for each row returned
        for row in rows:
            t = Tenant(row['id'], row['name'], row['public_address'],
                       row['condor_address'], row['public_ip'], row['zone'],
                       row['subnet'], row['subnet_id'], row['vpc'],
                       row['security_group'], row['domain'],
                       row['max_bid_price'], row['bid_percent'],
                       row['timeout_threshold'], row['access_key_id'],
                       row['secret_key'], row['key_pair'])
            # Pull the subnets for the tenant too
            subnets = {}
            subnets_db_id = {}
            subs = ProvisionerConfig().dbconn.execute("select * "
                                                      "from subnet_mapping "
                                                      "where tenant = %s" %
                                                      t.db_id)
            # Create a dict for the subnets and add that to the tenant
            # I later realised that I need the database id of the subnet to
            # store the instance request in the database:
            # Hello, subnets_db_id.
            for sn in subs:
                subnets.update({sn['zone']: sn['subnet']})
                subnets_db_id.update({sn['zone']: sn['id']})
            t.subnets = subnets
            t.subnets_db_id = subnets_db_id
            tenant_list.append(t)
    except psycopg2.Error:
        logger.exception("Failed to get tenant data.")

    return tenant_list

Example #15

0

Show file

File: api.py Project: globus-labs/SCRIMP

def launch_spot_request(conn, request, tenant, job):
    try:
        logger.debug("%s = %s. tenants vpc = %s" %
                     (request.zone, tenant.subnets[request.zone], tenant.vpc))

        cost_aware_req = job.cost_aware
        drafts_req = job.cost_aware
        drafts_avg = job.cost_aware
        mapping = BlockDeviceMapping()
        sda1 = BlockDeviceType()
        eph0 = BlockDeviceType()
        eph1 = BlockDeviceType()
        eph2 = BlockDeviceType()
        eph3 = BlockDeviceType()
        sda1.size = 10
        eph0.ephemeral_name = 'ephemeral0'
        eph1.ephemeral_name = 'ephemeral1'
        eph2.ephemeral_name = 'ephemeral2'
        eph3.ephemeral_name = 'ephemeral3'
        mapping['/dev/sda1'] = sda1
        mapping['/dev/sdb'] = eph0
        mapping['/dev/sdc'] = eph1
        mapping['/dev/sdd'] = eph2
        mapping['/dev/sde'] = eph3

        inst_req = None

        inst_req = conn.request_spot_instances(
            price=request.bid,
            image_id=request.ami,
            subnet_id=tenant.subnets[request.zone],
            count=request.count,
            key_name=tenant.key_pair,
            security_group_ids=[tenant.security_group],
            instance_type=request.instance_type,
            user_data=customise_cloudinit(tenant, job),
            block_device_map=mapping)
        my_req_ids = [req.id for req in inst_req]
        # address = ""
        for req in my_req_ids:
            insert_launch_stats(req, request, tenant)
            # tag each request
            tag_requests(req, tenant.name, conn)
            ProvisionerConfig().dbconn.execute((
                "insert into instance_request (tenant, instance_type, " +
                "price, job_runner_id, request_type, request_id, " +
                "subnet, cost_aware_ins, cost_aware_bid, cost_aware_subnet," +
                " drafts_ins, drafts_bid, drafts_subnet, selected_avg_price,"
                " cost_aware_avg_price, drafts_avg_price, drafts_avg_ins, " +
                "drafts_avg_bid, drafts_avg_subnet, drafts_avg_avg_price) " +
                "values ('%s', '%s', %s, %s, '%s', '%s', %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
            ) % (tenant.db_id, request.instance.db_id, request.price, job.id,
                 "spot", req, tenant.subnets_db_id[request.zone],
                 cost_aware_req.instance.db_id, cost_aware_req.bid,
                 tenant.subnets_db_id[cost_aware_req.zone],
                 drafts_req.instance.db_id, drafts_req.DrAFTS,
                 tenant.subnets_db_id[drafts_req.zone], request.AvgPrice,
                 cost_aware_req.AvgPrice, drafts_req.AvgPrice,
                 drafts_avg.instance.db_id, drafts_avg.DrAFTS,
                 tenant.subnets_db_id[drafts_avg.zone], drafts_avg.AvgPrice))

        return my_req_ids
    except boto.exception.EC2ResponseError:
        logger.exception("There was an error communicating with EC2.")

Example #16

0

Show file

File: base_scheduler.py Project: globus-labs/SCRIMP

def stop_over_requesting(tenants):
    """
    Stop too many requests being made for an individual job. This is the
    frequency of new requests being made for an individual job.
    Future work would be to look at launching many requests instantly, and
    then cancelling requests once one is fulfilled.
    """
    for tenant in tenants:
        # Stop excess instances being requested in a five minute round
        logger.debug("Tenant: %s. Request rate: %s" %
                     (tenant.name, tenant.request_rate))

        if ProvisionerConfig().simulate:
            open_reqs = ProvisionerConfig().simulator.requests

        for job in list(tenant.idle_jobs):
            if ProvisionerConfig().simulate:
                if job.sim_status != "IDLE":
                    continue
            # check to see if we are requesting too frequently
            count = 0
            try:
                if ProvisionerConfig().simulate:
                    open_reqs = ProvisionerConfig().simulator.requests
                    for openreq in open_reqs:
                        if openreq.job_runner_id == job.id:
                            # found an existing job
                            diff = (ProvisionerConfig().simulate_time -
                                    openreq.request_time).total_seconds()
                            if diff <= tenant.request_rate:

                                count = 1
                else:
                    rows = ProvisionerConfig().dbconn.execute(
                        ("select count(*) from instance_request " +
                         "where job_runner_id = '%s' and " +
                         "request_time >= Now() - " +
                         "'%s second'::interval and tenant = %s;") %
                        (job.id, tenant.request_rate, tenant.db_id))
                    for row in rows:
                        count = row['count']
            except psycopg2.Error:
                logger.exception("Error getting number of outstanding "
                                 "requests within time frame.")
            if count > 0:
                tenant.idle_jobs.remove(job)
                logger.debug("Too many requests. Removed job %s" % job.id)
                continue

            # now check to see if we already have too many requests for
            # this job
            try:
                if ProvisionerConfig().simulate:
                    count = 0
                    open_reqs = ProvisionerConfig().simulator.requests
                    for openreq in open_reqs:
                        if openreq.job_runner_id == job.id:
                            count = count + 1
                    if count > ProvisionerConfig().max_requests:
                        logger.warn("Too many outstanding requests, " +
                                    "removing idle job: %s" % repr(job))
                        tenant.idle_jobs.remove(job)
                else:
                    rows = ProvisionerConfig().dbconn.execute(
                        ("select count(*) from instance_request "
                         "where job_runner_id = '%s' and tenant = %s;") %
                        (job.id, tenant.db_id))
                    count = 0
                    for row in rows:
                        count = row['count']
                    if count > ProvisionerConfig().max_requests:
                        logger.warn("Too many outstanding requests, "
                                    "removing idle job: %s" % repr(job))
                        tenant.idle_jobs.remove(job)
            except psycopg2.Error:
                logger.exception("Error getting number of outstanding "
                                 "requests.")