def cancel_unmigrated_requests(tenants): """ There are two cases to handle here. Either there are no idle jobs, so all requests should be cancelled. Or there are idle jobs but the existing requests could not be migrated to them. In this case, any orphaned requests should also be cancelled. """ for tenant in tenants: # start by grabbing all of the open spot requests for this tenant ids_to_check = ProvisionerConfig().simulator.get_open_requests() # Get the set of idle job numbers idle_job_numbers = [] for job in tenant.jobs: if job.sim_status == 'IDLE': idle_job_numbers.append(job.id) # now get all of the orphaned requests reqs = get_orphaned_requests(tenant, ids_to_check, idle_job_numbers) reqs_to_cancel = [] # build a nice list we can pass to boto for req in reqs: reqs_to_cancel.append(req['request_id']) # now cancel all of these requests try: if len(reqs_to_cancel) > 0: logger.debug("Cancelling unmigrated requests: %s" % reqs_to_cancel) ProvisionerConfig().simulator.cancel_spot_instance_requests( ids_to_check) except Exception as e: logger.exception("Error removing spot instance requests.") raise e
def update_database(tenants): """ Record when an instance is started in the database. This should also try and record when an instance is terminated. In future work this should probably calculate the cost of the instance as well. """ for tenant in tenants: conn = boto.connect_ec2(tenant.access_key, tenant.secret_key) try: # First get all operating instances (instances probably are not # yet tagged, so don't filter them yet.) reservations = conn.get_all_instances() instance_spot_ids = [] # Go over the fulfilled spot requests for r in reservations: for i in r.instances: if i.spot_instance_request_id is not None: instance_spot_ids.append("'%s'" % i.spot_instance_request_id) # Also include ondemand instances which tag as the id. else: instance_spot_ids.append("'%s'" % i.id) # Get the entry in the instance_request table for each of these # requests check_for_new_instances(reservations, instance_spot_ids, conn, tenant) check_for_terminated_instances(reservations) except: logger.exception("Error updating database. Or, more likely, the " + "instance wasn't yet registered by amazon, so " + "skip this error this time.")
def launch_spot_request(conn, request, tenant, job): try: cost_aware_req = job.cost_aware drafts_req = job.DrAFTS drafts_avg = job.DrAFTSAvg cost_aware_req = job.cost_aware drafts_req = job.cost_aware drafts_avg = job.cost_aware mapping = None my_req_ids = ProvisionerConfig().simulator.request_spot_instances( price=request.bid, image_id=request.ami, subnet_id=tenant.subnets[request.zone], count=request.count, key_name=tenant.key_pair, security_group_ids=[tenant.security_group], instance_type=request.instance_type, user_data=customise_cloudinit(tenant, job), block_device_map=mapping, job=job) for req in my_req_ids: # tag each request tag_requests(req, tenant.name, conn) ProvisionerConfig().dbconn.execute( ("insert into instance_request (tenant, instance_type, " + "price, job_runner_id, request_type, request_id, " + "subnet, cost_aware_ins, cost_aware_bid, " + "cost_aware_subnet, " + "drafts_ins, drafts_bid, drafts_subnet, selected_avg_price, " "cost_aware_avg_price, drafts_avg_price, drafts_avg_ins, " + "drafts_avg_bid, drafts_avg_subnet, drafts_avg_avg_price) " + "values ('%s', '%s', %s, '%s', '%s', '%s', %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)") % (tenant.db_id, request.instance.db_id, request.OraclePrice, job.id, "spot", req, tenant.subnets_db_id[request.zone], cost_aware_req.instance.db_id, cost_aware_req.bid, tenant.subnets_db_id[cost_aware_req.zone], drafts_req.instance.db_id, drafts_req.DrAFTS, tenant.subnets_db_id[drafts_req.zone], request.AvgPrice, cost_aware_req.AvgPrice, drafts_req.AvgPrice, drafts_avg.instance.db_id, drafts_avg.DrAFTS, tenant.subnets_db_id[ drafts_avg.zone], drafts_avg.AvgPrice)) return my_req_ids except boto.exception.EC2ResponseError: logger.exception("There was an error communicating with EC2.")
def get_orphaned_requests(tenant, ids_to_check, idle_job_numbers): """ Check if there are any requests that don't belong to a job in the idle queue """ res = [] if len(ids_to_check) > 0: try: # Add quotes and commas to the list items for psql. sir_ids = (', '.join('\'' + item + '\'' for item in ids_to_check)) # Get any requests that do not belong to an idle job rows = [] print sir_ids print idle_job_numbers if len(idle_job_numbers) > 0: rows = ProvisionerConfig().dbconn.execute( ("select instance_request.id, instance_type.type, " + "instance_request.job_runner_id, " + "instance_request.request_id from instance_request, " + "instance_type where instance_request.instance_type = " + "instance_type.id and job_runner_id not in (%s) and " + "request_id in (%s) and request_type = 'spot' and " + "tenant = %s") % (",".join( '\'' + num + '\'' for num in idle_job_numbers), sir_ids, tenant.db_id)) else: rows = ProvisionerConfig().dbconn.execute( ("select instance_request.id, instance_type.type, " + "instance_request.job_runner_id, " + "instance_request.request_id from instance_request, " + "instance_type where instance_request.instance_type = " + "instance_type.id and " + "request_id in (%s) and request_type = 'spot' and " + "tenant = %s") % (sir_ids, tenant.db_id)) # I had some issues with the rows object closing after # returning it, so this just builds a dict for it for row in rows: res.append({ 'id': row['id'], 'type': row['type'], 'job_runner_id': row['job_runner_id'], 'request_id': row['request_id'] }) logger.warn("Orphaned request %s" % row['request_id']) except psycopg2.Error: logger.exception("Error migrating instances.") return res
def get_instance_types(): """ Get the set of instances from the database """ instances = [] try: rows = self.dbconn.execute( "select * from instance_type where available = True") for row in rows: instances.append(aws.Instance( row['id'], row['type'], row['ondemand_price'], row['cpus'], row['memory'], row['disk'], row['ami'])) except psycopg2.Error: logger.exception("Error getting instance types from database.") return instances
def launch_ondemand_request(conn, request, tenant, job): try: mapping = BlockDeviceMapping() sda1 = BlockDeviceType() eph0 = BlockDeviceType() eph1 = BlockDeviceType() eph2 = BlockDeviceType() eph3 = BlockDeviceType() sda1.size = 10 eph0.ephemeral_name = 'ephemeral0' eph1.ephemeral_name = 'ephemeral1' eph2.ephemeral_name = 'ephemeral2' eph3.ephemeral_name = 'ephemeral3' mapping['/dev/sda1'] = sda1 mapping['/dev/sdb'] = eph0 mapping['/dev/sdc'] = eph1 mapping['/dev/sdd'] = eph2 mapping['/dev/sde'] = eph3 # issue a run_instances command for this request res = conn.run_instances(min_count=request.count, max_count=request.count, key_name=tenant.key_pair, image_id=request.ami, security_group_ids=[tenant.security_group], user_data=customise_cloudinit(tenant, job), instance_type=request.instance_type, subnet_id=tenant.subnet, block_device_map=mapping) my_req_ids = [req.id for req in res.instances] # address = "" for req in my_req_ids: # tag each request tag_requests(req, tenant.name, conn) # update the database to include the new request ProvisionerConfig().dbconn.execute( ("insert into instance_request (tenant, instance_type, " + "price, job_runner_id, request_type, request_id, " + "subnet) values ('%s', '%s', %s, %s, '%s', '%s', %s)") % (tenant.db_id, request.instance.db_id, request.instance.ondemand, job.id, "ondemand", req, tenant.subnet_id)) # ProvisionerConfig().dbconn.commit() return except boto.exception.EC2ResponseError: logger.exception("There was an error communicating with EC2.")
def tag_requests(req, tag, conn): """ Tag any requests that have just been made with the tenant name """ for x in range(0, 3): try: conn.create_tags([req], {"tenant": tag}) conn.create_tags([req], {"Name": 'worker@%s' % tag}) break except boto.exception.BotoClientError: time.sleep(2) pass except boto.exception.BotoServerError: time.sleep(2) pass except boto.exception.EC2ResponseError: logger.exception("There was an error communicating with EC2.")
def cancel_unmigrated_requests(tenants): """ There are two cases to handle here. Either there are no idle jobs, so all requests should be cancelled. Or there are idle jobs but the existing requests could not be migrated to them. In this case, any orphaned requests should also be cancelled. """ for tenant in tenants: # start by grabbing all of the open spot requests for this tenant conn = boto.connect_ec2(tenant.access_key, tenant.secret_key) reqs = conn.get_all_spot_instance_requests(filters={ "tag-value": tenant.name, "state": "open" }) # Get a list of ids that can be used in a db query ids_to_check = [] for r in reqs: ids_to_check.append("%s" % r.id) # Get the set of idle job numbers idle_job_numbers = [] for job in tenant.jobs: if job.status == '1': idle_job_numbers.append(job.id) # now get all of the orphaned requests reqs = get_orphaned_requests(tenant, ids_to_check, idle_job_numbers) reqs_to_cancel = [] # build a nice list we can pass to boto for req in reqs: reqs_to_cancel.append(req['request_id']) # now cancel all of these requests try: if len(reqs_to_cancel) > 0: logger.debug("Cancelling unmigrated requests: %s" % reqs_to_cancel) conn.cancel_spot_instance_requests(ids_to_check) except Exception as e: logger.exception("Error removing spot instance requests.") raise e
def update_database(tenants): """ Record when an instance is started in the database. This should also try and record when an instance is terminated. In future work this should probably calculate the cost of the instance as well. """ for tenant in tenants: try: # First get all operating instances (instances probably are not # yet tagged, so don't filter them yet.) reservations = ProvisionerConfig().simulator.get_all_instances() instance_spot_ids = ProvisionerConfig( ).simulator.get_spot_instances() check_for_terminated_instances(reservations) except psycopg2.Error: logger.exception("Error updating database.")
def get_existing_requests(self, tenant, job): # Get all of the outstanding requests from the db for this instance existing_requests = [] try: rows = ProvisionerConfig().dbconn.execute( ("select instance_request.instance_type, " "instance_request.request_type, " "instance_type.type, " "instance_request.subnet, subnet_mapping.zone " "from instance_request, subnet_mapping, instance_type " "where job_runner_id = '%s' and " "instance_request.tenant = %s and " "instance_request.instance_type = instance_type.id and " "subnet_mapping.id = instance_request.subnet") % (job.id, tenant.db_id)) for row in rows: existing_requests.append(aws.Request( None, row['type'], row['zone'], None, None)) except psycopg2.Error: logger.exception("Error getting number of outstanding") return existing_requests
def migrate_request_to_job(request, job): """ Check if an instance can be repurposed to another job and update the database. """ # Check to see if the job can be fulfilled by the requested instance if check_requirements(request['type'], job): next_idle_job_id = job.id try: logger.debug( ("Migrating instance request %s, from job " + "%s to job %s.") % (request['id'], request['job_runner_id'], next_idle_job_id)) ProvisionerConfig().dbconn.execute( ("update instance_request set job_runner_id = '%s' " + "where id = %s") % (next_idle_job_id, request['id'])) ProvisionerConfig().dbconn.execute( ("insert into request_migration " + "(request_id, from_job, to_job, migration_time) " + "values (%s, %s, %s, NOW())") % (request['id'], request['job_runner_id'], next_idle_job_id)) return True except psycopg2.Error: logger.exception("Error performing migration in database.")
def get_global_queue(self): """ Poll condor_q -global and return a set of Jobs. """ cmd = [ 'condor_q', '-global', '-format', '%s:', 'GlobalJobId', '-format', '%s:', 'ClusterId', '-format', '%s:', 'JobStatus', '-format', '%s:', 'QDate', '-format', '%s:', 'RequestCpus', '-format', '%s:', 'RequestMemory', '-format', '%s:', 'RequestDisk', '-format', '%s', 'JobDescription', '-format', '%s\n', 'ExitStatus' ] #output = subprocess.check_output(cmd) output = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0] queue = output.split("\n") queue = filter(None, queue) jobs = [] if len(queue) > 0: # set the time of the first job if this is it if ProvisionerConfig().first_job_time is None: logger.debug("Simulation: first job time set") utc = timezone('UTC') ProvisionerConfig().first_job_time = datetime.datetime.now(utc) for line in queue: if "All queues are empty" in line: break try: split = line.split(":") tenant_addr = "" # Grab the address of the tenant from the global id if "#" in split[0]: tenant_addr = split[0].split("#")[0] # Req memory is either a number or a string talking about # requested memory, so check if it is a number req_memory = 0 try: req_memory = int(split[5]) if req_memory > 1024: # change it to use GB like instance types. req_memory = req_memory / 1024 except Exception, e: pass # Req disk is the same as memory. Again it is # in mb I believe req_disk = 0 try: req_disk = int(split[6]) if req_disk > 1024: # change it to use GB like instance types. req_disk = req_disk / 1024 except Exception, e: pass # Decipher the description of the job as well (name, etc.) description = {} if "=" in split[7]: description = self.process_job_description(split[7]) # Create the job: tenant address, job id, queue time, # requested cpus, requested memory j = Job(tenant_addr, split[1], split[2], split[3], split[4], req_memory, req_disk, description) jobs.append(j) except Exception, e: logger.exception("Something has gone wrong while" " processing " "the job queue.") raise e
def __init__(self, *args, **kwargs): """ Load provisioner configuration based on the settings in a config file. """ # override defaults with kwargs config_file = 'ggprovisioner/provisioner.ini' cloudinit_file = "cloudinit.cfg" if 'config_file' in kwargs: config_file = kwargs['config_file'] if 'cloudinit_file' in kwargs: cloudinit_file = kwargs['cloudinit_file'] # we need to pull cloudinit from the DB in the future self.cloudinit_file = cloudinit_file # read config from a file config = ConfigParser.ConfigParser() config.read(config_file) # get DB connection info user = config.get('Database', 'user') password = config.get('Database', 'password') host = config.get('Database', 'host') port = config.get('Database', 'port') database = config.get('Database', 'database') # create a connection and keep it as a config attribute try: engine = sqlalchemy.create_engine( 'postgresql://%s:%s@%s:%s/%s' % (user, password, host, port, database), isolation_level="AUTOCOMMIT") self.dbconn = engine.connect() except psycopg2.Error: logger.exception("Failed to connect to database.") # Get some provisioner specific config settings self.ondemand_price_threshold = float( config.get('Provision', 'ondemand_price_threshold')) self.max_requests = int(config.get('Provision', 'max_requests')) self.run_rate = int(config.get('Provision', 'run_rate')) self.DrAFTS = config.get('Provision', 'DrAFTS') self.DrAFTSAvgPrice = config.get('Provision', 'DrAFTSAvgPrice') self.DrAFTSProfiles = config.get('Provision', 'DrAFTSProfiles') self.instance_types = [] if self.DrAFTS == 'True': self.DrAFTS = True else: self.DrAFTS = False if self.DrAFTSProfiles == 'True': self.DrAFTSProfiles = True else: self.DrAFTSProfiles = False if self.DrAFTSAvgPrice == 'True': self.DrAFTSAvgPrice = True else: self.DrAFTSAvgPrice = False self.simulate = config.get('Simulation', 'Simulate') if self.simulate == 'True': self.simulate = True else: self.simulate = False self.neg_time = int(config.get('Simulation', 'NegTime')) self.job_number = int(config.get('Simulation', 'JobNumber')) self.idle_time = int(config.get('Simulation', 'IdleTime')) self.terminate = config.get('Simulation', 'Terminate') self.overhead_time = int(config.get('Simulation', 'OverheadTime')) self.simulate_jobs = (config.get('Simulation', 'JobFile')) self.run_name = config.get('Simulation', 'RunName') # things for the simulator self.first_job_time = None # drafts_stored_db = True # self.simulate = True self.sim_time = datetime.datetime.strptime('2017-03-25T03:14:00Z', '%Y-%m-%dT%H:%M:%SZ') self.jobs_file = self.simulate_jobs.split("/")[-1] if self.job_number == 100: # 100 jobs self.jobs_file = '100jobs.json' self.simulate_jobs = '/home/ubuntu/100jobs.json' self.sim_time = datetime.datetime.strptime('2017-03-23T21:35:00Z', '%Y-%m-%dT%H:%M:%SZ') elif self.job_number == 200: # 200 jobs self.jobs_file = '200jobs.json' self.simulate_jobs = '/home/ubuntu/200jobs.json' self.sim_time = datetime.datetime.strptime('2017-03-27T20:45:00Z', '%Y-%m-%dT%H:%M:%SZ') elif self.job_number == 500: # 500 jobs self.jobs_file = '500jobs.json' self.simulate_jobs = '/home/ubuntu/500jobs.json' self.sim_time = datetime.datetime.strptime('2017-03-28T21:55:00Z', '%Y-%m-%dT%H:%M:%SZ') elif self.job_number == 1000: # 1000 jobs self.jobs_file = '1000jobs.json' self.simulate_jobs = '/home/ubuntu/1000jobs.json' self.sim_time = datetime.datetime.strptime('2017-03-25T03:14:00Z', '%Y-%m-%dT%H:%M:%SZ') if self.idle_time == 120: self.idle_time = 128 self.sim_time = self.sim_time.replace(tzinfo=pytz.utc) self.run_id = random.randint(500, 10000) self.relative_time = None self.simulate_time = self.sim_time if self.simulate: self.simulator = ggprovisioner.cloud.simaws.aws_simulator.AWSSimulator()
def load_from_db(): """ Load all of the tenant data. This should let us iterate over the tenants and apply their preferences to the idle queue etc.. It should also let us shut down unnecessary requests across the board as this will load their aws credentials. """ tenant_list = [] # Pull all of the tenant data from the database try: # Only get those that are subscribed tenant_columns = [ 'id', 'name', 'public_address', 'condor_address', 'public_ip', 'zone', 'vpc', 'security_group', 'domain' ] tenant_settings_columns = [ 'max_bid_price', 'bid_percent', 'timeout_threshold' ] tenant_selection = ', '.join('tenant.' + c for c in tenant_columns) tenant_settings_selection = ', '.join('tenant_settings.' + c for c in tenant_settings_columns) aws_creds_columns = ['access_key_id', 'secret_key', 'key_pair'] aws_creds_selection = ', '.join('aws_credentials.' + c for c in aws_creds_columns) subnet_columns = ['subnet', 'id as subnet_id'] subnet_selection = ', '.join('subnet_mapping.' + c for c in subnet_columns) full_selection = ', '.join( (tenant_selection, tenant_settings_selection, aws_creds_selection, subnet_selection)) rows = ProvisionerConfig().dbconn.execute( "SELECT " + full_selection + " FROM tenant, tenant_settings, aws_credentials, subnet_mapping" " WHERE tenant_settings.tenant = tenant.id AND" " tenant.credentials = aws_credentials.id AND" " tenant.subscribed = TRUE AND subnet_mapping.tenant" " = tenant.id AND subnet_mapping.zone = tenant.zone") # Create a tenant object for each row returned for row in rows: t = Tenant(row['id'], row['name'], row['public_address'], row['condor_address'], row['public_ip'], row['zone'], row['subnet'], row['subnet_id'], row['vpc'], row['security_group'], row['domain'], row['max_bid_price'], row['bid_percent'], row['timeout_threshold'], row['access_key_id'], row['secret_key'], row['key_pair']) # Pull the subnets for the tenant too subnets = {} subnets_db_id = {} subs = ProvisionerConfig().dbconn.execute("select * " "from subnet_mapping " "where tenant = %s" % t.db_id) # Create a dict for the subnets and add that to the tenant # I later realised that I need the database id of the subnet to # store the instance request in the database: # Hello, subnets_db_id. for sn in subs: subnets.update({sn['zone']: sn['subnet']}) subnets_db_id.update({sn['zone']: sn['id']}) t.subnets = subnets t.subnets_db_id = subnets_db_id tenant_list.append(t) except psycopg2.Error: logger.exception("Failed to get tenant data.") return tenant_list
def launch_spot_request(conn, request, tenant, job): try: logger.debug("%s = %s. tenants vpc = %s" % (request.zone, tenant.subnets[request.zone], tenant.vpc)) cost_aware_req = job.cost_aware drafts_req = job.cost_aware drafts_avg = job.cost_aware mapping = BlockDeviceMapping() sda1 = BlockDeviceType() eph0 = BlockDeviceType() eph1 = BlockDeviceType() eph2 = BlockDeviceType() eph3 = BlockDeviceType() sda1.size = 10 eph0.ephemeral_name = 'ephemeral0' eph1.ephemeral_name = 'ephemeral1' eph2.ephemeral_name = 'ephemeral2' eph3.ephemeral_name = 'ephemeral3' mapping['/dev/sda1'] = sda1 mapping['/dev/sdb'] = eph0 mapping['/dev/sdc'] = eph1 mapping['/dev/sdd'] = eph2 mapping['/dev/sde'] = eph3 inst_req = None inst_req = conn.request_spot_instances( price=request.bid, image_id=request.ami, subnet_id=tenant.subnets[request.zone], count=request.count, key_name=tenant.key_pair, security_group_ids=[tenant.security_group], instance_type=request.instance_type, user_data=customise_cloudinit(tenant, job), block_device_map=mapping) my_req_ids = [req.id for req in inst_req] # address = "" for req in my_req_ids: insert_launch_stats(req, request, tenant) # tag each request tag_requests(req, tenant.name, conn) ProvisionerConfig().dbconn.execute(( "insert into instance_request (tenant, instance_type, " + "price, job_runner_id, request_type, request_id, " + "subnet, cost_aware_ins, cost_aware_bid, cost_aware_subnet," + " drafts_ins, drafts_bid, drafts_subnet, selected_avg_price," " cost_aware_avg_price, drafts_avg_price, drafts_avg_ins, " + "drafts_avg_bid, drafts_avg_subnet, drafts_avg_avg_price) " + "values ('%s', '%s', %s, %s, '%s', '%s', %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" ) % (tenant.db_id, request.instance.db_id, request.price, job.id, "spot", req, tenant.subnets_db_id[request.zone], cost_aware_req.instance.db_id, cost_aware_req.bid, tenant.subnets_db_id[cost_aware_req.zone], drafts_req.instance.db_id, drafts_req.DrAFTS, tenant.subnets_db_id[drafts_req.zone], request.AvgPrice, cost_aware_req.AvgPrice, drafts_req.AvgPrice, drafts_avg.instance.db_id, drafts_avg.DrAFTS, tenant.subnets_db_id[drafts_avg.zone], drafts_avg.AvgPrice)) return my_req_ids except boto.exception.EC2ResponseError: logger.exception("There was an error communicating with EC2.")
def stop_over_requesting(tenants): """ Stop too many requests being made for an individual job. This is the frequency of new requests being made for an individual job. Future work would be to look at launching many requests instantly, and then cancelling requests once one is fulfilled. """ for tenant in tenants: # Stop excess instances being requested in a five minute round logger.debug("Tenant: %s. Request rate: %s" % (tenant.name, tenant.request_rate)) if ProvisionerConfig().simulate: open_reqs = ProvisionerConfig().simulator.requests for job in list(tenant.idle_jobs): if ProvisionerConfig().simulate: if job.sim_status != "IDLE": continue # check to see if we are requesting too frequently count = 0 try: if ProvisionerConfig().simulate: open_reqs = ProvisionerConfig().simulator.requests for openreq in open_reqs: if openreq.job_runner_id == job.id: # found an existing job diff = (ProvisionerConfig().simulate_time - openreq.request_time).total_seconds() if diff <= tenant.request_rate: count = 1 else: rows = ProvisionerConfig().dbconn.execute( ("select count(*) from instance_request " + "where job_runner_id = '%s' and " + "request_time >= Now() - " + "'%s second'::interval and tenant = %s;") % (job.id, tenant.request_rate, tenant.db_id)) for row in rows: count = row['count'] except psycopg2.Error: logger.exception("Error getting number of outstanding " "requests within time frame.") if count > 0: tenant.idle_jobs.remove(job) logger.debug("Too many requests. Removed job %s" % job.id) continue # now check to see if we already have too many requests for # this job try: if ProvisionerConfig().simulate: count = 0 open_reqs = ProvisionerConfig().simulator.requests for openreq in open_reqs: if openreq.job_runner_id == job.id: count = count + 1 if count > ProvisionerConfig().max_requests: logger.warn("Too many outstanding requests, " + "removing idle job: %s" % repr(job)) tenant.idle_jobs.remove(job) else: rows = ProvisionerConfig().dbconn.execute( ("select count(*) from instance_request " "where job_runner_id = '%s' and tenant = %s;") % (job.id, tenant.db_id)) count = 0 for row in rows: count = row['count'] if count > ProvisionerConfig().max_requests: logger.warn("Too many outstanding requests, " "removing idle job: %s" % repr(job)) tenant.idle_jobs.remove(job) except psycopg2.Error: logger.exception("Error getting number of outstanding " "requests.")