Esempio n. 1
0
 def cancel_spot_instance_requests(self, to_kill):
     """
     remove requests in this list.
     """
     for kill in to_kill:
         logger.debug("SIMULATION: Killing requests %s" % kill)
         self.requests = [r for r in self.requests if r.reqid != kill]
Esempio n. 2
0
def cancel_unnecessary_requests(tenants):
    """
    Make sure spot requests are closed if there are no idle jobs in the
    queue.
    """
    for tenant in tenants:
        # start by grabbing all of the open spot requests for this tenant
        conn = boto.connect_ec2(tenant.access_key, tenant.secret_key)
        reqs = conn.get_all_spot_instance_requests(filters={
            "tag-value": tenant.name,
            "state": "open"
        })
        # That should be sufficient, but just because spot requests are
        # scary lets double check and kill anything if there are no idle
        # jobs.
        status = []
        for job in tenant.jobs:
            # Remove any jobs that have now been fulfilled too
            if job.fulfilled is False:
                status.append(job.status)

        # If there are no jobs currently idle, terminate any outstanding
        # spot requests
        if not all(stat == '1' for stat in status) or len(status) == 0:
            # Reorder these requests to a usable array
            id_to_req = request_ids_dict(reqs)
            # Build a list of requests to cancel
            to_cancel = id_to_req.keys()
            # Cancel these requests
            if len(to_cancel) > 0:
                logger.error("This should be deprecated if the other " +
                             "cancel function is working correctly.")
                logger.debug("Cancelling spot requests: %s" % to_cancel)
                conn.cancel_spot_instance_requests(to_cancel)
Esempio n. 3
0
def cancel_unmigrated_requests(tenants):
    """
    There are two cases to handle here. Either there are no idle jobs, so
    all requests should be cancelled.
    Or there are idle jobs but the existing requests could not be migrated
    to them. In this case, any orphaned requests should also be cancelled.
    """
    for tenant in tenants:
        # start by grabbing all of the open spot requests for this tenant
        ids_to_check = ProvisionerConfig().simulator.get_open_requests()

        # Get the set of idle job numbers
        idle_job_numbers = []
        for job in tenant.jobs:
            if job.sim_status == 'IDLE':
                idle_job_numbers.append(job.id)

        # now get all of the orphaned requests
        reqs = get_orphaned_requests(tenant, ids_to_check, idle_job_numbers)
        reqs_to_cancel = []
        # build a nice list we can pass to boto
        for req in reqs:
            reqs_to_cancel.append(req['request_id'])

        # now cancel all of these requests
        try:
            if len(reqs_to_cancel) > 0:
                logger.debug("Cancelling unmigrated requests: %s" %
                             reqs_to_cancel)
                ProvisionerConfig().simulator.cancel_spot_instance_requests(
                    ids_to_check)
        except Exception as e:
            logger.exception("Error removing spot instance requests.")
            raise e
Esempio n. 4
0
    def deploy_job(self, job):
        current_time = ProvisionerConfig().simulate_time
        instance_types = ProvisionerConfig().instance_types
        for resource in self.resources:
            if resource.state == "IDLE":
                for instance in instance_types:
                    # check that it fits this instance
                    if (resource.type == instance.type
                            and self.check_requirements(instance, job)):

                        # this is now good, so lets put it on there.
                        resource.job_id = job.id
                        # set the time for the job to finish
                        # first convert the exec time to the instance
                        exec_seconds = self.exec_time(job, resource.type)

                        logger.debug("SIMULATION CONDOR: Deploying " +
                                     "job %s to resource %s for %s" %
                                     (job.id, resource.id, exec_seconds))
                        # convert the jobs request time into a timestamp

                        req_time = job.req_time
                        ProvisionerConfig().dbconn.execute(
                            ("insert into jobs (test, job_id, start_time, "
                             "req_time) values ('%s', %s, '%s', '%s');" %
                             (ProvisionerConfig().run_name, int(
                                 job.id), self.get_fake_time(), req_time)))

                        resource.job_finish = current_time + \
                            datetime.timedelta(seconds=exec_seconds)
                        resource.state = "EXECUTING"
                        job.sim_status = "EXECUTING"
                        self.executing_jobs = self.executing_jobs + [job.id]
                        return
Esempio n. 5
0
def migrate_requests(tenants):
    """
    If requests exist for a job that is no longer in the idle queue
    (e.g. it has been fulfilled or scheduled on other resources)
    then migrate any outstanding requests to another job in the idle queue.
    If there are no other jobs in the idle queue, cancel
    all existing requests tagged by a tenant.
    """
    for tenant in tenants:
        conn = boto.connect_ec2(tenant.access_key, tenant.secret_key)
        reqs = conn.get_all_spot_instance_requests(filters={
            "tag-value": tenant.name,
            "state": "open"
        })

        # Get a list of ids that can be used in a db query
        ids_to_check = []
        for r in reqs:
            ids_to_check.append("%s" % r.id)

        logger.debug("Open requests: %s" % ids_to_check)

        # Get the set of idle job numbers - using tenant.idle_jobs may not
        # work, as some jobs are removed from the list for various reasons
        # e.g. they have recently had a request made for them, so instead
        # we will use the all_jobs list and check for idle state
        idle_job_numbers = []
        potential_jobs = []
        for job in tenant.jobs:
            if job.status == '1':
                idle_job_numbers.append(job.id)
                potential_jobs.append(job)

        # Get requests that do not belong to idle jobs
        reqs = get_orphaned_requests(tenant, ids_to_check, idle_job_numbers)

        # if there are any requests, try to reassign them to another job
        if len(reqs) > 0:
            for req in reqs:
                for job in potential_jobs:
                    # try to migrate it. if it works, then go to the next
                    # request. otherwise try the next job.
                    if migrate_request_to_job(req, job):
                        # Remove it from idle jobs so it doesn't also get
                        # a request made for it this round
                        if job in tenant.idle_jobs:
                            tenant.idle_jobs.remove(job)
                        break
Esempio n. 6
0
def process_resources(tenants):
    """
    This should manage all of the existing aws resources and requests.
    """
    # Update the DB with newly fulfilled instances
    logger.debug("Processing sim requests.")
    update_database(tenants)

    # Migrate any requests that still exist for a resource that is not
    # going to use them
    migrate_reqs = True
    if migrate_reqs:
        migrate_requests(tenants)

    # Stop any unnecessary spot requests (still launching without any idle
    # jobs)
    cancel_unmigrated_requests(tenants)
Esempio n. 7
0
    def process_idle_jobs(self, tenants):
        """
        Go through and ignore fulfilled jobs etc.
        """

        t1 = datetime.datetime.now()
        ignore_fulfilled_jobs(tenants)
        t2 = datetime.datetime.now()
        # Stop resources being requested too frequently
        stop_over_requesting(tenants)

        t3 = datetime.datetime.now()

        ig_time = (t2 - t1).total_seconds()
        over_time = (t3 - t2).total_seconds()
        logger.debug("SIMULATION load times: ignore (%s), over req (%s)" %
                     (ig_time, over_time))
Esempio n. 8
0
    def request_spot_instances(self, price, image_id, subnet_id, count,
                               key_name, security_group_ids, instance_type,
                               user_data, block_device_map, job):
        # this needs to make a request, not an instance. then somehow i
        # need to translate requests to instances after a little while.
        simid = "%s-sim-req-%s" % (ProvisionerConfig().run_name, self.reqid)

        sleep_time = float(random.choice(self.fulfilled_time_dist))
        self.reqid = self.reqid + 1
        new_request = SimRequest(price, subnet_id, instance_type, simid,
                                 int(sleep_time), job.id)
        self.requests.append(new_request)
        # moved this sleep to a different spot so now the requests are
        # done as a batch too
        # time.sleep(ProvisionerConfig().overhead_time)
        logger.debug("SIMULATION: creating new request %s - sleep for %s" %
                     (new_request, sleep_time))

        return [simid]
Esempio n. 9
0
def cancel_unmigrated_requests(tenants):
    """
    There are two cases to handle here. Either there are no idle jobs, so
    all requests should be cancelled.
    Or there are idle jobs but the existing requests could not be migrated
    to them. In this case, any orphaned requests should also be cancelled.
    """
    for tenant in tenants:
        # start by grabbing all of the open spot requests for this tenant
        conn = boto.connect_ec2(tenant.access_key, tenant.secret_key)
        reqs = conn.get_all_spot_instance_requests(filters={
            "tag-value": tenant.name,
            "state": "open"
        })

        # Get a list of ids that can be used in a db query
        ids_to_check = []
        for r in reqs:
            ids_to_check.append("%s" % r.id)

        # Get the set of idle job numbers
        idle_job_numbers = []
        for job in tenant.jobs:
            if job.status == '1':
                idle_job_numbers.append(job.id)

        # now get all of the orphaned requests
        reqs = get_orphaned_requests(tenant, ids_to_check, idle_job_numbers)
        reqs_to_cancel = []
        # build a nice list we can pass to boto
        for req in reqs:
            reqs_to_cancel.append(req['request_id'])

        # now cancel all of these requests
        try:
            if len(reqs_to_cancel) > 0:
                logger.debug("Cancelling unmigrated requests: %s" %
                             reqs_to_cancel)
                conn.cancel_spot_instance_requests(ids_to_check)
        except Exception as e:
            logger.exception("Error removing spot instance requests.")
            raise e
Esempio n. 10
0
def request_resources(tenant):
    """
    Request the resources that have been selected for each job
    """
    conn = None
    output_string = "Name: %s\n" % tenant.name
    output_string = "%sTenant: %s\n" % (output_string, tenant.name)
    instance_req_string = ""
    req_cpus = 0
    req_instances = 0

    for job in tenant.idle_jobs:
        if job.fulfilled is False:
            request = job.launch
            if request is None:
                logger.debug("Failed to find request object for job %s" % job)
                continue
            # increment some counters
            req_instances += int(request.count)
            req_cpus += int(job.req_cpus)
            # Launch any on-demand requests
            if request.ondemand:
                # launch the ondemand request
                launch_ondemand_request(conn, request, tenant, job)
                instance_req_string = (
                    ("%sONDEMAND_INSTANCE_REQUEST" +
                     "\t%s\t%s\t%s\t%s\t%s\n") %
                    (instance_req_string, tenant.name,
                     request.instance_type, request.bid, job.id,
                     "ondemand"))
            else:
                # launch the spot request
                # TODO batch request instances of the same type
                req_ids = launch_spot_request(conn, request, tenant, job)
                for req in req_ids:
                    instance_req_string = (
                        ("%sSPOT_INSTANCE_REQUEST" +
                         "\t%s\t%s\t%s\t%s\t%s\tDrAFTS: %s\t%s\n") %
                        (instance_req_string, tenant.name,
                         request.instance_type, request.bid, job.id,
                         "spot", request.DrAFTS, req))
Esempio n. 11
0
    def load_drafts_data(self):
        """
        To speed this up, load in all the drafts data once per
        provisioning cycle
        """
        cur_time = datetime.datetime.utcnow()
        if ProvisionerConfig().simulate:
            cur_time = ProvisionerConfig().simulator.get_fake_time()

        minus_ten = cur_time - datetime.timedelta(seconds=600)
        query = ("select * from drafts_price where timestamp < "
                 "'%s'::TIMESTAMP and timestamp > '%s'::TIMESTAMP") % (
            cur_time.strftime("%Y-%m-%d %H:%M"),
            minus_ten.strftime("%Y-%m-%d %H:%M"))
        self.drafts_data = []
        logger.debug('getting drafts data: ' + query)
        rows = ProvisionerConfig().dbconn.execute(query)
        for row in rows:
            data = {'time': row['time'], 'price': row['price'],
                    'zone': row['zone'], 'type': row['type']}
            self.drafts_data.append(data)
Esempio n. 12
0
def instance_acquired(inst, request, tenant, conn):
    """
    A new instance has been acquired, so insert a record into the instance
    table and tag it with the tenant name
    """
    launch_time = datetime.datetime.strptime(inst.launch_time,
                                             "%Y-%m-%dT%H:%M:%S.000Z")
    # insert it into the database
    ProvisionerConfig().dbconn.execute(
        ("insert into instance (request_id, instance_id, fulfilled_time, " +
         "public_dns, private_dns) values ('%s', '%s', '%s', '%s', '%s')") %
        (request['id'], inst.id, launch_time, inst.public_dns_name,
         inst.private_dns_name))
    logger.debug("An instance has been acquired. " +
                 "Tenant={0}; Request={1}, Instance={2}".format(
                     tenant.name, repr(request), repr(inst)))

    # Update the launch stats table too.
    update_launch_stats(inst, request, conn)

    # now tag the request
    api.tag_requests(inst.id, tenant.name, conn)

    # if the job is still in the idle queue, we should remove it as the
    # instance was now launched for it
    for job in tenant.jobs:
        logger.debug("Checking {0} vs {1}".format(repr(job), repr(request)))
        if int(job.id) == int(request['job_runner_id']):
            logger.debug("Launched an instance for job %s - removing it." %
                         request['job_runner_id'])
            job.fulfilled = True
Esempio n. 13
0
    def get_global_queue(self):
        """
        Read in the jobs that should have started prior to the
        current sim time.
        Create a new job object for each then return a list of them.
        """

        if self.job_data is None:
            with open(ProvisionerConfig().jobs_file) as data_file:
                logger.debug("SIMULATION: READING DATA")
                self.job_data = json.load(data_file)

        # NOTE: this now doesn't work for multiple tenants as this
        # is self.jobs. change it back
        # to read the full file over and over if i want tenants.

        # Work out how many seconds have passed since starting the test
        rel_time = (ProvisionerConfig().simulate_time -
                    ProvisionerConfig().sim_time).total_seconds()
        to_delete = []
        for j in self.job_data:

            if int(j['relative_time']) < rel_time:
                to_delete.append(j)
                description = {}
                description['instype'] = j['instance_type']
                description['duration'] = float(j['duration'])
                req_time = ProvisionerConfig().sim_time + \
                    datetime.timedelta(seconds=int(j['relative_time']))
                newjob = Job('tenant_addr',
                             "%s%s" % (j['id'], ProvisionerConfig().run_id), 1,
                             req_time, 1, 1, 1, description)

                self.jobs.append(newjob)
            else:
                break
        for j in to_delete:
            self.job_data.remove(j)

        return self.jobs
Esempio n. 14
0
def instance_acquired(inst, request, tenant, conn):
    """
    A new instance has been acquired, so insert a record into the instance
    table and tag it with the tenant name
    """

    launch_time = ProvisionerConfig().simulator.get_fake_time()
    # insert it into the database
    ProvisionerConfig().dbconn.execute(
        ("insert into instance (request_id, instance_id, fulfilled_time, " +
         "public_dns, private_dns) values ('%s', '%s', '%s', '%s', '%s')") %
        (request['id'], inst.id, launch_time, 'pubdns', 'privdns'))
    logger.debug("An instance has been acquired. " +
                 "Tenant={0}; Request={1}, Instance={2}".format(
                     tenant.name, repr(request), repr(inst)))

    # now tag the request
    api.tag_requests(inst.id, tenant.name, conn)

    # if the job is still in the idle queue, we should remove it as the
    # instance was now launched for it
    for job in tenant.jobs:
        logger.debug("Checking {0} vs {1}".format(repr(job), repr(request)))
        if job.id == request['job_runner_id']:
            logger.debug("Launched an instance for job %s - removing it." %
                         request['job_runner_id'])
            job.fulfilled = True
Esempio n. 15
0
    def process_global_queue(self, jobs, tenants):
        """
        Associate each job with a tenant and add them to their local list of
        jobs.
        """
        for tenant in tenants:
            tenant.jobs = []
            tenant.idle_jobs = []
            # Get the necessary time a job must be idle as a timestamp for
            # each tenant

            # Go through the jobs and only add those that are old enough and
            # are in the idle state
            for job in jobs:
                tenant.jobs.append(job)

                job_idle_at = job.req_time + \
                    datetime.timedelta(seconds=tenant.idle_time)
                if (int(job.status) == 1
                        and job_idle_at < ProvisionerConfig().simulate_time):
                    tenant.idle_jobs.append(job)
            logger.debug("SIMULATION: job len = %s" % len(tenant.jobs))
Esempio n. 16
0
    def load_jobs(self, tenants):
        """
        Read in the condor queue and manage the removal of jobs that should
        not be processed.
        """
        # Assess the global queue

        # Clear out the lists then reload them.
        for t in tenants:
            t.idle_jobs = []
            t.jobs = []
        t1 = datetime.datetime.now()
        all_jobs = self.get_global_queue()
        t2 = datetime.datetime.now()
        if ProvisionerConfig().simulate:
            if ProvisionerConfig().relative_time is None:

                self.job_data = None
                utc = timezone('UTC')
                ProvisionerConfig().relative_time = datetime.datetime.now(utc)

        # Assoicate the jobs from the global queue with each of the tenants
        self.process_global_queue(all_jobs, tenants)
        t3 = datetime.datetime.now()

        ignore_fulfilled_jobs(tenants)
        t4 = datetime.datetime.now()
        # Stop resources being requested too frequently
        stop_over_requesting(tenants)
        t5 = datetime.datetime.now()

        queue_time = (t2 - t1).total_seconds()
        process_time = (t3 - t2).total_seconds()
        ignore_time = (t4 - t3).total_seconds()
        stop_time = (t5 - t4).total_seconds()
        logger.debug("SIMULATION load times: queue (%s), process (%s), "
                     "ignore (%s), stop (%s)" %
                     (queue_time, process_time, ignore_time, stop_time))
Esempio n. 17
0
def migrate_request_to_job(request, job):
    """
    Check if an instance can be repurposed to another job and update the
    database.
    """
    # Check to see if the job can be fulfilled by the requested instance
    if check_requirements(request['type'], job):
        next_idle_job_id = job.id
        try:
            logger.debug(
                ("Migrating instance request  %s, from job " + "%s to job %s.")
                % (request['id'], request['job_runner_id'], next_idle_job_id))
            ProvisionerConfig().dbconn.execute(
                ("update instance_request set job_runner_id = '%s' " +
                 "where id = %s") % (next_idle_job_id, request['id']))
            ProvisionerConfig().dbconn.execute(
                ("insert into request_migration " +
                 "(request_id, from_job, to_job, migration_time) " +
                 "values (%s, %s, %s, NOW())") %
                (request['id'], request['job_runner_id'], next_idle_job_id))
            return True
        except psycopg2.Error:
            logger.exception("Error performing migration in database.")
Esempio n. 18
0
    def run_condor(self, tenants):
        """
        Be the condor agent. This will manage putting jobs on
        the resources etc.
        """
        logger.debug("SIMULATION CONDOR: starting.")
        instance_types = ProvisionerConfig().instance_types

        current_time = ProvisionerConfig().simulate_time

        # logger.debug("SIMULATION CONDOR: loaded tenants.")
        # now i need to add status to each of the jobs
        # Run through the jobs and set their states so they
        # are ignored by other things
        for t in tenants:
            for job in list(t.jobs):
                if job.id in self.finished_jobs:
                    job.sim_status = "FINISHED"
                    if job in t.idle_jobs:
                        t.idle_jobs.remove(job)
                    t.jobs.remove(job)

                elif job.id in self.executing_jobs:
                    job.sim_status = "EXECUTING"
                    if job in t.idle_jobs:
                        t.idle_jobs.remove(job)
            for t in tenants:
                for job in t.jobs:
                    for resource in self.resources:
                        if (job.id == resource.job_id
                                and resource.job_finish is not None
                                and resource.job_finish < current_time):
                            # Mark it as all done
                            job.sim_status = "FINISHED"
                            resource.state = "IDLE"
                            if job.id in self.executing_jobs:
                                self.executing_jobs.remove(job.id)
                            if job.id not in self.finished_jobs:

                                self.finished_jobs = self.finished_jobs + \
                                    [job.id]
                                logger.debug("SIMULATION CONDOR: Finished " +
                                             "job %s." % (job.id))
                                # resource.state = "IDLE"
                                ProvisionerConfig().dbconn.execute(
                                    ("update jobs set end_time = '%s' " +
                                     "where job_id = %s and test = '%s';") %
                                    (ProvisionerConfig().simulate_time,
                                     int(job.id),
                                     ProvisionerConfig().run_name))

            logger.debug("SIMULATION CONDOR: deploying new jobs.")
            for t in tenants:
                for job in t.jobs:
                    # check if it can fit on the instance
                    if job.sim_status == "IDLE":
                        self.deploy_job(job)
Esempio n. 19
0
    def get_timeout_ondemand(self, job, tenant, instances):
        """
        Check to see if the job now requires an ondemand instance due to
        timing out.
        """
        cur_time = datetime.datetime.now()
        cur_time = calendar.timegm(cur_time.timetuple())
        time_idle = 0
        if ProvisionerConfig().simulate:
            cur_time = ProvisionerConfig().simulate_time
            time_idle = (ProvisionerConfig().simulate_time -
                         job.req_time).total_seconds()
        else:
            time_idle = cur_time - int(job.req_time)

        res_instance = None
        # if the tenant has set a timeout and the job has been idle longer than
        # this
        if tenant.timeout > 0 and time_idle > tenant.timeout:
            # sort the eligibile instances by their ondemand price (odp)
            sorted_instances = sorted(instances, key=lambda k: k.odp)
            logger.debug("Selecting ondemand instance: %s" % str(job.launch))
            res_instance = sorted_instances[0]
        return res_instance
Esempio n. 20
0
    def check_ondemand_needed(self, tenant, sorted_instances, job):
        # Check to see if an ondemand instance is required due to timeout
        needed = False
        launch_instance = self.get_timeout_ondemand(job, tenant,
                                                    sorted_instances)
        cheapest = sorted_instances[0]

        # check to see if it timed out
        if (launch_instance is not None and
                launch_instance.odp < tenant.max_bid_price):
            job.launch = aws.Request(
                launch_instance, launch_instance.type, "", launch_instance.ami,
                1, launch_instance.odp, True)
            logger.debug("Selected to launch on demand due to timeout: %s" %
                         str(job.launch))
            needed = True

        # check if the job is flagged as needing on-demand
        elif job.ondemand:
            needed = True

        # if the cheapest option is ondemand
        elif cheapest.ondemand and cheapest.odp < tenant.max_bid_price:
            job.launch = cheapest
            logger.debug("Selected to launch on demand due to ondemand "
                         "being cheapest: %s" % repr(cheapest))
            needed = True

        # or if the cheapest option close in price to ondemand, then use
        # ondemand.
        elif (cheapest.price >
                (ProvisionerConfig().ondemand_price_threshold *
                    float(cheapest.odp)) and
                cheapest.price < tenant.max_bid_price):
            job.launch = cheapest
            logger.debug("Selected to launch on demand due to spot price "
                         "being close to ondemand price: %s" %
                         repr(cheapest))
            needed = True

        return needed
Esempio n. 21
0
def migrate_instance():
    """
    A placeholder for where the migration of instances will fit in to this.
    """
    logger.debug("Migration not yet supported.")
Esempio n. 22
0
def launch_spot_request(conn, request, tenant, job):
    try:
        logger.debug("%s = %s. tenants vpc = %s" %
                     (request.zone, tenant.subnets[request.zone], tenant.vpc))

        cost_aware_req = job.cost_aware
        drafts_req = job.cost_aware
        drafts_avg = job.cost_aware
        mapping = BlockDeviceMapping()
        sda1 = BlockDeviceType()
        eph0 = BlockDeviceType()
        eph1 = BlockDeviceType()
        eph2 = BlockDeviceType()
        eph3 = BlockDeviceType()
        sda1.size = 10
        eph0.ephemeral_name = 'ephemeral0'
        eph1.ephemeral_name = 'ephemeral1'
        eph2.ephemeral_name = 'ephemeral2'
        eph3.ephemeral_name = 'ephemeral3'
        mapping['/dev/sda1'] = sda1
        mapping['/dev/sdb'] = eph0
        mapping['/dev/sdc'] = eph1
        mapping['/dev/sdd'] = eph2
        mapping['/dev/sde'] = eph3

        inst_req = None

        inst_req = conn.request_spot_instances(
            price=request.bid,
            image_id=request.ami,
            subnet_id=tenant.subnets[request.zone],
            count=request.count,
            key_name=tenant.key_pair,
            security_group_ids=[tenant.security_group],
            instance_type=request.instance_type,
            user_data=customise_cloudinit(tenant, job),
            block_device_map=mapping)
        my_req_ids = [req.id for req in inst_req]
        # address = ""
        for req in my_req_ids:
            insert_launch_stats(req, request, tenant)
            # tag each request
            tag_requests(req, tenant.name, conn)
            ProvisionerConfig().dbconn.execute((
                "insert into instance_request (tenant, instance_type, " +
                "price, job_runner_id, request_type, request_id, " +
                "subnet, cost_aware_ins, cost_aware_bid, cost_aware_subnet," +
                " drafts_ins, drafts_bid, drafts_subnet, selected_avg_price,"
                " cost_aware_avg_price, drafts_avg_price, drafts_avg_ins, " +
                "drafts_avg_bid, drafts_avg_subnet, drafts_avg_avg_price) " +
                "values ('%s', '%s', %s, %s, '%s', '%s', %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
            ) % (tenant.db_id, request.instance.db_id, request.price, job.id,
                 "spot", req, tenant.subnets_db_id[request.zone],
                 cost_aware_req.instance.db_id, cost_aware_req.bid,
                 tenant.subnets_db_id[cost_aware_req.zone],
                 drafts_req.instance.db_id, drafts_req.DrAFTS,
                 tenant.subnets_db_id[drafts_req.zone], request.AvgPrice,
                 cost_aware_req.AvgPrice, drafts_req.AvgPrice,
                 drafts_avg.instance.db_id, drafts_avg.DrAFTS,
                 tenant.subnets_db_id[drafts_avg.zone], drafts_avg.AvgPrice))

        return my_req_ids
    except boto.exception.EC2ResponseError:
        logger.exception("There was an error communicating with EC2.")
Esempio n. 23
0
    def get_global_queue(self):
        """
        Poll condor_q -global and return a set of Jobs.
        """
        cmd = [
            'condor_q', '-global', '-format', '%s:', 'GlobalJobId', '-format',
            '%s:', 'ClusterId', '-format', '%s:', 'JobStatus', '-format',
            '%s:', 'QDate', '-format', '%s:', 'RequestCpus', '-format', '%s:',
            'RequestMemory', '-format', '%s:', 'RequestDisk', '-format', '%s',
            'JobDescription', '-format', '%s\n', 'ExitStatus'
        ]

        #output = subprocess.check_output(cmd)
        output = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0]
        queue = output.split("\n")
        queue = filter(None, queue)

        jobs = []
        if len(queue) > 0:
            # set the time of the first job if this is it
            if ProvisionerConfig().first_job_time is None:
                logger.debug("Simulation: first job time set")
                utc = timezone('UTC')
                ProvisionerConfig().first_job_time = datetime.datetime.now(utc)
            for line in queue:
                if "All queues are empty" in line:
                    break
                try:
                    split = line.split(":")
                    tenant_addr = ""
                    # Grab the address of the tenant from the global id
                    if "#" in split[0]:
                        tenant_addr = split[0].split("#")[0]
                    # Req memory is either a number or a string talking about
                    # requested memory, so check if it is a number
                    req_memory = 0
                    try:
                        req_memory = int(split[5])
                        if req_memory > 1024:
                            # change it to use GB like instance types.
                            req_memory = req_memory / 1024
                    except Exception, e:
                        pass
                    # Req disk is the same as memory. Again it is
                    # in mb I believe
                    req_disk = 0
                    try:
                        req_disk = int(split[6])
                        if req_disk > 1024:
                            # change it to use GB like instance types.
                            req_disk = req_disk / 1024
                    except Exception, e:
                        pass
                    # Decipher the description of the job as well (name, etc.)
                    description = {}
                    if "=" in split[7]:
                        description = self.process_job_description(split[7])
                    # Create the job: tenant address, job id, queue time,
                    # requested cpus, requested memory
                    j = Job(tenant_addr, split[1], split[2], split[3],
                            split[4], req_memory, req_disk, description)
                    jobs.append(j)
                except Exception, e:
                    logger.exception("Something has gone wrong while"
                                     " processing "
                                     "the job queue.")
                    raise e
Esempio n. 24
0
    def get_potential_instances(self, eligible_instances, job, tenant):
        """
        Make a list of all <type,zone> and <type,ondemand> pairs then order
        them.
        """
        # Putting this here so it isn't called every run
        # commented out to stop it checking drafts prices

        unsorted_instances = []
        # Add an entry for each instance type as ondemand, or each spot
        # price so we can sort everything and pick the cheapest.
        for ins in eligible_instances:
            unsorted_instances.append(aws.Request(
                ins, ins.type, "", ins.ami, 1, 0, True,
                ins.ondemand, ins.ondemand, ins.ondemand, ins.ondemand,
                ins.ondemand))
            # Don't bother adding spot prices if it is an ondemand request:
            if not job.ondemand:
                DrAFTS = None
                AvgPrice = None
                OraclePrice = None
                for zone, price in ins.spot.iteritems():
                    # if zone == 'us-east-1c':
                    if (ProvisionerConfig().DrAFTS or
                            ProvisionerConfig().DrAFTSProfiles):
                        DrAFTS, OraclePrice = self.get_DrAFTS_bid(
                            ins.type, zone, job, price)
                        if DrAFTS is None or OraclePrice is None:
                            # try it again, if it doesn't find them its
                            # because the price doesn't exist. so add a big
                            # value to skip it
                            DrAFTS, OraclePrice = self.get_DrAFTS_bid(
                                ins.type, zone, job, price)
                            if DrAFTS is None:
                                DrAFTS = 1000
                            if OraclePrice is None:
                                OraclePrice = 1000
                        if ProvisionerConfig().DrAFTS:
                            unsorted_instances.append(aws.Request(
                                ins, ins.type, zone, ins.ami, 1, 0, False,
                                ins.ondemand, DrAFTS, 0, 0, 0))
                        elif ProvisionerConfig().DrAFTSProfiles:
                            unsorted_instances.append(aws.Request(
                                ins, ins.type, zone, ins.ami, 1, 0, False,
                                ins.ondemand, OraclePrice, 0, 0, 0))
                    else:
                        unsorted_instances.append(aws.Request(
                            ins, ins.type, zone, ins.ami, 1, 0, False,
                            ins.ondemand, price, 0, 0, 0))
                    logger.debug('%s, %s spot: %s drafts: %s profile: %s' % (
                        ins.type, zone, price, DrAFTS, OraclePrice))

        # Now sort all of these instances by price
        sorted_instances = []
        # Adding and false here to force it to use the cheapest price for now.
        if ProvisionerConfig().DrAFTS:
            # This should sort by the drafts price and then by the current
            # spot price that way we will get the cheapest AZ at the top of
            # the list.
            sorted_instances = sorted(unsorted_instances,
                                      key=lambda k: (k.DrAFTS, k.price))
        if ProvisionerConfig().DrAFTSProfiles:
            sorted_instances = sorted(unsorted_instances,
                                      key=lambda k: (k.OraclePrice, k.price))

        else:
            sorted_instances = sorted(
                unsorted_instances, key=lambda k: k.price)
        return sorted_instances
Esempio n. 25
0
    def get_DrAFTS_bid(self, ins, zone, job, cur_price):
        """
        Pull the DrAFTS price for this instance type.
        This will get the nearest value greater than 1 hour.
        """
        # example: http://128.111.84.183/vpc/us-east-1a-c3.2xlarge.pgraph
        try:
            ret_drafts = None
            ret_oracle = None

            if ProvisionerConfig().drafts_stored_db:
                # clear the tenant's current avg prices
                mapped_zone = self.drafts_mapping[zone]
                logger.debug('drafts zone: %s' % mapped_zone)
                for row in self.drafts_data:
                    if (row['type'] == ins and mapped_zone == row['zone'] and
                            float(row['price']) > float(cur_price)):
                        time = row['time']
                        cost = row['price']
                        if ret_drafts is None and float(time) > 1:
                            ret_drafts = Decimal(str(cost))
                        if (ret_oracle is None and float(time) >
                                (float(job.duration) / 3600)):
                            ret_oracle = Decimal(str(cost))

                return ret_drafts, ret_oracle
            else:
                # use the mapping between AZs to pick a zone name
                mapped_zone = self.drafts_mapping[zone]

                addr = 'http://128.111.84.183/vpc/%s-%s.pgraph' % (
                    mapped_zone, ins)
                req = requests.get(addr)
                output = req.text
                # Split the result by line
                lines = output.split("\n")
                ret_drafts = None
                # define these out here so if it goes over the line,
                # when the request length is too long, it can use the
                # previous ones.
                cost = None
                time = None
                for line in lines:
                    # Extract the time and cost
                    try:
                        time = line.split(" ")[0]
                        cost = line.split(" ")[1]
                    except Exception, y:
                        logger.error("drafts: Failed here: %s %s" % (y, line))
                   # Split the line in half to get the time and cost
                    if float(time) > 1:
                        # this is the one we want to use
                        ret_drafts = Decimal(str(cost))
                        break
                # now do the oracle ones
                ret_oracle = None
                last = False
                for line in lines:
                    # Extract the time and cost
                    try:
                        if len(line) > 5:
                            time = line.split(" ")[0]
                            cost = line.split(" ")[1]
                        else:
                            last = True
                            logger.debug("No prediction long enough in "
                                         "%s, using last one. %s %s" % (addr,
                                                                        time,
                                                                        cost))
                    except Exception, z:
                        logger.error("oracle: failed here: %s %s" % (z, line))
                    # Split the line in half to get the time and cost
                    if last or float(time) > (float(job.duration) / 3600):
                        # this is the one we want to use
                        ret_oracle = Decimal(str(cost))
                        break
                return ret_drafts, ret_oracle
Esempio n. 26
0
    def simulate(self, _tenants):
        """
        check the state of the simulation.
        """
        self.tenants = _tenants
        jobs_list = []
        idle_jobs = []

        for t in self.tenants:
            for job in t.jobs:
                jobs_list = jobs_list + [job.id]
                if (job.sim_status == "IDLE"
                        and job.id not in self.finished_jobs
                        and job.id not in self.executing_jobs):
                    idle_jobs = idle_jobs + [job.id]

        # get some counts to print out
        terminated_time_instances = []
        terminated_price_instances = []
        starting_instances = []
        unclaimed_instances = []
        idle_instances = []
        executing_instances = []

        for res in self.resources:
            if res.state == "IDLE":
                idle_instances = idle_instances + [res.id]
            elif res.state == "EXECUTING":
                executing_instances = executing_instances + [res.id]
            elif res.state == "STARTING" or res.state == "CONTEXTUALIZING":
                starting_instances = starting_instances + [res.id]
            elif res.state == "UNCLAIMED":
                unclaimed_instances = unclaimed_instances + [res.id]
            elif res.state == "TERMINATED":
                if 'time' in res.reason:
                    terminated_time_instances = terminated_time_instances + \
                        [res.id]
                if 'price' in res.reason:
                    terminated_price_instances = terminated_price_instances + \
                        [res.id]

        logger.debug("\nSIMULATION OVERVIEW: requests (cur: %s -- total: %s), "
                     "resources (%s), jobs (%s)\n" %
                     (len(self.requests), self.reqid - 1, len(
                         self.resources), len(jobs_list)))

        logger.debug("\nSIMULATION JOB OVERVIEW: idle (%s), executing (%s), "
                     "finished (%s)\n" %
                     (len(idle_jobs), len(
                         self.executing_jobs), len(self.finished_jobs)))

        logger.debug(
            "\nSIMULATION RESOURCE OVERVIEW: starting (%s), "
            "idle (%s), unclaimed (%s), executing (%s), "
            "terminated-time (%s), terminated-price (%s)\n" %
            (len(starting_instances), len(idle_instances),
             len(unclaimed_instances), len(executing_instances),
             len(terminated_time_instances), len(terminated_price_instances)))
        total_run_seconds = (ProvisionerConfig().simulate_time -
                             ProvisionerConfig().sim_time).total_seconds()
        logger.debug("\nSIMULATION TIME OVERVIEW: start time (%s), "
                     "current time (%s), seconds simulated (%s)" %
                     (ProvisionerConfig().sim_time,
                      ProvisionerConfig().simulate_time, total_run_seconds))
        if total_run_seconds > self.kill_time:
            sys.exit()
        # Run through the jobs and set their states so they are
        # ignored by other things
        for t in self.tenants:
            for job in t.jobs:
                if job.id in self.finished_jobs:
                    job.sim_status = "FINISHED"
                    if job in t.idle_jobs:
                        t.idle_jobs.remove(job)
                    if job.id in self.executing_jobs:
                        self.executing_jobs.remove(job.id)

                    t.jobs.remove(job)
                elif job.id in self.executing_jobs:
                    job.sim_status = "EXECUTING"
                    if job in t.idle_jobs:
                        t.idle_jobs.remove(job)

        # try cleaning up the instance state too
        for res in self.resources:
            if (res.state == "EXECUTING"
                    and res.job_id not in self.executing_jobs):
                # somehow this one should have finished...
                # try to just wrap it up now
                res.job_id = None
                res.job_finish = None
                res.state = "IDLE"
                logger.debug("SIMULATION: Found an executing resource " +
                             "that should be idle.")
Esempio n. 27
0
    def run_aws(self):
        """
        This is the aws loop. Check if instances should be fulfilled etc.
        """
        logger.debug("SIMULATION AWS: starting.")

        current_time = ProvisionerConfig().simulate_time
        logger.debug("SIMULATION AWS: running.")

        with self.lock:
            self.turn = 0
            # check if any requests should be fulfilled
            for request in list(self.requests):
                if current_time >= request.ready_time:
                    # start a resource for this
                    insid = "%s-sim-ins-%s" % (ProvisionerConfig().run_name,
                                               self.insid)
                    self.insid = self.insid + 1

                    logger.debug("SIMULATION AWS: creating a new resource " +
                                 "for request %s, has slept %s" %
                                 (request, request.sleep_time))
                    new_resource = SimResource(
                        request.price, request.subnet, request.type,
                        request.request_time, request.reqid, insid,
                        random.choice(self.contextualise_time_dist),
                        request.job_runner_id)
                    self.resources = self.resources + [new_resource]
                    # and remove the request since it is done
                    self.instance_acquired(new_resource)
                    self.requests.remove(request)

            # Now check to see if any instances should have booted by now.
            # This handles working out when the instance joins the HTCondor
            # queue and when jobs get dispatched.
            for resource in self.resources:
                # First check if the resource is in the contextualzing state.
                if (resource.state == 'CONTEXTUALIZING'
                        and current_time >= resource.context_time):
                    # Switch it over to Starting with a neg time
                    resource.state = 'UNCLAIMED'
                    wait_time = int(random.choice(self.negotiate_time_dist))
                    resource.claimed_time = current_time + \
                        datetime.timedelta(seconds=wait_time)

                elif resource.state == 'UNCLAIMED':
                    # check if the timer has passed:
                    logger.debug(
                        'SIMULATION: trying to become ' +
                        'unclaimed %s seconds remaining' %
                        (resource.claimed_time - current_time).total_seconds())
                    if (resource.claimed_time -
                            current_time).total_seconds() <= 0:
                        # check if any jobs are in an idle state
                        new_claim = self.check_claim(resource)
                        if new_claim:
                            resource.state = 'IDLE'
                            logger.debug('Set resource to idle')
                            continue
                        else:
                            # otherwise, set it back to 'starting so it
                            # becomes unclaimed again'
                            resource.claimed_time = current_time + \
                                datetime.timedelta(seconds=int(
                                    random.choice(self.negotiate_time_dist)))
                            resource.state = 'UNCLAIMED'
                            logger.debug(
                                'SIMULATION no idle job found, setting ' +
                                'back to UNCLAIMED')

            # check if any instances should terminate due to time
            terminate_resources = {}
            for resource in self.resources:
                if ProvisionerConfig().terminate == "hourly":
                    if (resource.state != 'EXECUTING' and (int(
                        (current_time - resource.launch_time).total_seconds() %
                            3600) > 3480) and resource.state != 'TERMINATED'):
                        # now checking this when killing anything
                        # over 3480 secs...
                        logger.debug("SIMULATION AWS. Terminating resource "
                                     "due to time: %s" % resource)
                        # terminate the job
                        resource.reason = "time related"
                        resource.state = "TERMINATED"
                        resource.terminate_time = self.get_fake_time()
                elif ProvisionerConfig().terminate == "1hour":
                    # now checking this when killing anything over 3480 secs...
                    if (resource.state != 'EXECUTING' and
                        (current_time - resource.launch_time).total_seconds() >
                            3480 and resource.state != 'TERMINATED'):
                        logger.debug("SIMULATION AWS. Terminating resource "
                                     "due to time: %s" % resource)
                        # terminate the job
                        resource.reason = "time related"
                        resource.state = "TERMINATED"
                        resource.terminate_time = self.get_fake_time()
                elif ProvisionerConfig().terminate == "idle":
                    if (resource.state == 'IDLE'
                            and resource.state != 'TERMINATED' and
                        (int((current_time -
                              resource.launch_time).total_seconds()) > 600)):
                        # now checking this when killing anything over
                        # 3480 secs...
                        logger.debug(
                            "SIMULATION AWS. Terminating resource due "
                            "to time: %s" % resource)
                        # terminate the job
                        resource.reason = "time related"
                        resource.state = "TERMINATED"
                        resource.terminate_time = self.get_fake_time()

                # Sort out the job that was running on this instance,
                # put it back to idle.
                if resource.state != "TERMINATED":
                    # only do this every 10 seconds
                    if (ProvisionerConfig().simulate_time -
                            ProvisionerConfig().sim_time
                        ).total_seconds() % 60 == 0:
                        for t in self.tenants:
                            if resource.type in terminate_resources:
                                if terminate_resources[resource.type] is False:
                                    continue
                            if (float(resource.price) < float(
                                    self.get_spot_prices(resource, t))):
                                logger.debug(
                                    "SIMULATION: terminating resource "
                                    "due to price %s" % resource)

                                for job in t.jobs:
                                    if job.id == resource.job_id:

                                        job.sim_status = 'IDLE'
                                        if job.id in self.executing_jobs:
                                            self.executing_jobs.remove(job.id)
                                        if job not in t.idle_jobs:
                                            t.idle_jobs = t.idle_jobs + [job]
                                # now terminate the instance
                                logger.debug(
                                    'terminating instance due '
                                    'to price: %s %s' %
                                    (resource.price,
                                     self.get_spot_prices(resource, t)))
                                resource.state = "TERMINATED"
                                resource.reason = ("spot instance termination "
                                                   "due to spot price")
                                resource.terminate_time = self.get_fake_time()
                                # self.resources.remove(resource)
                            else:
                                terminate_resources[resource.type] = False
Esempio n. 28
0
    def select_instance_type(self, instances):
        """
        Select the instance to launch for each idle job.
        """

        for tenant in self.tenants:
            for job in list(tenant.idle_jobs):
                if ProvisionerConfig().simulate:
                    time.sleep(ProvisionerConfig().overhead_time)
                # Get the set of instance types that can be used for this job
                eligible_instances = self.restrict_instances(job)
                if len(eligible_instances) == 0:
                    logger.error("Failed to find any eligible instances "
                                 "for job %s" % job)
                    continue
                # get all potential pairs and sort them
                sorted_instances = self.get_potential_instances(
                    eligible_instances, job, tenant)
                if len(sorted_instances) == 0:
                    logger.error("Failed to find any sorted instances "
                                 "for job %s" % job)
                    continue

                # work out if an ondemand instance is needed
                job.ondemand = self.check_ondemand_needed(tenant,
                                                          sorted_instances,
                                                          job)

                # If ondemand is required, redo the sorted list with only
                # ondemand requests and set that to be the launched instance
                if job.ondemand:
                    sorted_instances = self.get_potential_instances(
                        eligible_instances, job, tenant)

                    job.launch = sorted_instances[0]
                    logger.debug("Launching ondemand for this job. %s" %
                                 str(job.launch))
                    continue

                # otherwise we are now looking at launching a spot request
                # print out the options we are looking at
                self.print_cheapest_options(sorted_instances)
                # filter out a job if it has had too many requests made
                existing_requests = self.get_existing_requests(tenant, job)
                if len(existing_requests) >= ProvisionerConfig().max_requests:
                    tenant.idle_jobs.remove(job)
                    continue

                # Find the top request that hasn't already been requested
                # (e.g. zone+type pair is not in existing_requests)
                for req in sorted_instances:
                    if len(existing_requests) > 0:
                        # Skip this type if a matching request already
                        # exists
                        exists = False
                        for existing in existing_requests:
                            if (req.instance_type == existing.instance_type and
                                    req.zone == existing.zone):
                                exists = True
                        if exists:
                            continue
                    # Launch this type.
                    # Hmm, this is getting more complciated with
                    # multuiple provisioning models.
                    if req.price < tenant.max_bid_price:
                        req.bid = self.get_bid_price(job, tenant, req)
                        job.launch = req
                        job.cost_aware = req
                        break
                    else:
                        logger.error(("Unable to launch request %s as "
                                      "the price is higher than max bid "
                                      "%s.") % (str(req),
                                                tenant.max_bid_price))
Esempio n. 29
0
                        else:
                            last = True
                            logger.debug("No prediction long enough in "
                                         "%s, using last one. %s %s" % (addr,
                                                                        time,
                                                                        cost))
                    except Exception, z:
                        logger.error("oracle: failed here: %s %s" % (z, line))
                    # Split the line in half to get the time and cost
                    if last or float(time) > (float(job.duration) / 3600):
                        # this is the one we want to use
                        ret_oracle = Decimal(str(cost))
                        break
                return ret_drafts, ret_oracle
        except Exception, e:
            logger.debug("Failed to find DrAFTS price for %s. %s" % (ins, e))
        return None, None

    def print_cheapest_options(self, sorted_instances):
        # Print out the top three
        logger.info("Top three to select from:")
        top_three = 3
        for ins in sorted_instances:
            if top_three == 0:
                break
            if ProvisionerConfig().DrAFTS:
                logger.info("DrAFTS:  %s %s %s %s" % (ins.instance_type,
                    ins.zone, ins.price, ins.DrAFTS))
            if ProvisionerConfig().DrAFTSAvgPrice:
                logger.info("DrAFTS Oracle Price: %s %s %s %s" % (ins.instance_type,
                    ins.zone, ins.price, ins.OraclePrice))
Esempio n. 30
0
    def run(self):
        """
        Run the provisioner. This should execute periodically and
        determine what actions need to be taken.
        """
        self.run_iterations = 0
        # self.simulate = False
        if ProvisionerConfig().simulate:
            self.sched = SimScheduler()
            ProvisionerConfig().load_instance_types()
            self.load_drafts_data()
            while True:
                self.run_iterations = self.run_iterations + 1
                # Load jobs
                t1 = datetime.datetime.now()
                start_time = datetime.datetime.now()
                self.load_tenants_and_jobs()

                t2 = datetime.datetime.now()
                # Simulate the world (mostly tidy things up and print stats)
                ProvisionerConfig().simulator.simulate(self.tenants)
                t3 = datetime.datetime.now()

                self.sched.process_idle_jobs(self.tenants)
                tx = datetime.datetime.now()
                # Simulate Condor
                ProvisionerConfig().simulator.run_condor(self.tenants)
                t4 = datetime.datetime.now()
                # Simulate AWS
                ProvisionerConfig().simulator.run_aws()
                t5 = datetime.datetime.now()
                # Check if it should finish executing (e.g. jobs and
                # resources all terminated)
                if ProvisionerConfig().simulator.check_finished():
                    break

                self.manage_resources()
                t6 = datetime.datetime.now()
                if ((ProvisionerConfig().simulate_time -
                    ProvisionerConfig().sim_time).total_seconds() %
                    ProvisionerConfig().run_rate == 0):

                    self.provision_resources()
                t7 = datetime.datetime.now()

                load_time = (t2 - t1).total_seconds()
                sim_time = (t3 - t2).total_seconds()
                proc_idle_time = (tx - t3).total_seconds()
                condor_time = (t4 - tx).total_seconds()
                aws_time = (t5 - t4).total_seconds()
                manage_time = (t6 - t5).total_seconds()
                prov_time = (t7 - t6).total_seconds()

                # Otherwise, step through time
                ProvisionerConfig().simulate_time = ProvisionerConfig(
                ).simulate_time + datetime.timedelta(seconds=2)
                logger.debug("RUN ID: %s. SIMULATION: advancing time "
                             "2 second" % ProvisionerConfig().run_id)

                logger.debug("SIMULATION times: load (%s), sim (%s),"
                             " proc_idle (%s), condor (%s), aws (%s),"
                             " manage (%s), prov (%s)" % (
                                 load_time, sim_time, proc_idle_time,
                                 condor_time,
                                 aws_time, manage_time, prov_time))

        else:
            self.sched = CondorScheduler()
            while True:
                self.run_iterations = self.run_iterations + 1
                # Get the tenants from the database and process the current
                # condor_q. Also assign those jobs to each tenant.
                start_time = datetime.datetime.now()
                self.load_tenants_and_jobs()
                # provisioning will fail if there are no tenants
                if len(self.tenants) > 0:
                    # Handle all of the existing requests. This will cancel
                    # or migrate excess requests and update the database to
                    # reflect the state of the environment
                    self.manage_resources()

                    # Work out the price for each instance type and acquire
                    # resources for jobs
                    self.provision_resources()

                # wait "run_rate" seconds before trying again
                end_time = datetime.datetime.now()
                diff = (end_time - start_time).total_seconds()
                logger.debug("SCRIMP (SIMULATION) run loop: "
                             "%s seconds. Now sleeping %s seconds." % (
                                 diff, ProvisionerConfig().run_rate))
                if diff < ProvisionerConfig().run_rate:
                    time.sleep(ProvisionerConfig().run_rate - diff)