Esempio n. 1
0
    def __init__(self, db_name=None, queue_name=None, host=None,
                port=27017, user=None, password=None, queue_delay=.1,
                poll_delay=.2):
        """Constructs a subscriber.

        Keyword arguments:
        db_name -- MongoDB database name (required)
        queue_name -- the name of a MongoDB collection in which the queue exists
        host -- hostname or ip address of the MongoDB service. Localhost if unspecified
        port -- MongoDB port (default 27017)
        user -- user with r/w permission on MongoDB collection
        password -- password for user
        queue_delay -- a sleep throttle so that multiple subscribers can pull
                        from queued jobs (default .1 seconds)
        poll_delay -- if nothing is in the queue, number of seconds to sleep
                        before checking again (default .2 seconds)

        """

        self.db_name = db_name
        self.queue_name = queue_name
        self.host = host
        self.port = port
        self.user = user
        self.password = password

        JobQueue.__init__(self, "subscriber", self.db_name, self.queue_name, host=self.host,
                port=self.port, user=self.user, password=self.password)

        self.queue_delay = queue_delay
        self.poll_delay = poll_delay
        self.fk_func_map = {}
        self.th_func_map = {}
        self.mul_func_map = {}
Esempio n. 2
0
 def test_publish(self):
     jq = JobQueue(self.db, collection_name=K.collection)
     job = {'message': 'hello world!'}
     jq.pub(job)
     self.assertEquals(jq.queue_count(), 1)
     jq.clear_queue()
     jq.q = None  # erase the queue
     self.assertRaises(Exception, jq.pub, job)
    def test_equal_priority(self):
        num_jobs = 10

        jobq = JobQueue(self.db.name)

        jobs = []
        for i in range(0, num_jobs):
            job = Job(self.db.name, self.job)
            job.pending()
            jobq.add_job_to_pending_queue(job)
            jobs.append(job)

        # should be FIFO
        for i in range(0, num_jobs):
            job = jobq.pop_job_from_pending_queue()
            self.assertEqual(jobs[i].job_id, job.job_id)
Esempio n. 4
0
    def run(self):
        debug('RUN')

        # fix for some broken saved jobs
        for i, j in enumerate(self.jobs):
            if type(j) == str or type(j) == np.string_:
                self.jobs[i] = eval(j)

        # check to see if anything is left to do
        work_to_do = False
        for j in self.jobs:
            if j['status'] == 0 or j['status'] == 'Q':
                work_to_do = True
                break
        debug("WTD=%s" % work_to_do)
        if not work_to_do:
            return True

        # There is work to be done. Create a JobQueue and send stuff to it
        jobq = JobQueue(self, limit=self.qlimit)
        for j in self.jobs:
            if j['status'] == 0:
                debug("adding job %s" % j['num'])
                jobq.add(j)

        # Tell the JobQueue to start running jobs
        jobq.start()

        # join returns when JobQueue has run all the jobs
        res = jobq.join()
        if res == []:
            return True
        self.wqueue = res
        return False
Esempio n. 5
0
 def test_valid(self):
     jq = JobQueue(self.db)
     jq.db['jobqueue'].drop()
     jq._create(capped=False)
     self.assertFalse(jq.valid())
     self.assertRaises(Exception, jq._create)
     jq.clear_queue()
    def test_random_priority(self):
        num_jobs = 100

        jobq = JobQueue(self.db.name)

        jobs = []
        priorities = [x for x in range(0, num_jobs)]
        random.shuffle(priorities)
        for i in range(0, num_jobs):
            job = Job(self.db.name, {'priority': priorities[i]})
            job.pending()
            jobq.add_job_to_pending_queue(job)
            jobs.append(job)

        jobs = sorted(jobs)
        for i in range(0, num_jobs):
            job = jobq.pop_job_from_pending_queue()
            self.assertEqual(jobs[i].job_id, job.job_id)
Esempio n. 7
0
 def test_next(self):
     jq = JobQueue(self.db)
     self.assertRaises(Exception, jq.next)
     job = {'message': 'hello world!'}
     jq.pub(job)
     row = jq.next()
     self.assertEquals(row['data']['message'], 'hello world!')
     jq.clear_queue()
Esempio n. 8
0
File: hosts.py Progetto: zoidy/puq
 def run(self):
     jobq = JobQueue(self, limit=10, polltime=1)
     for j in self.jobs:
         if j['status'] == 0 or j['status'] == 'Q':
             debug("adding job %s" % j['num'])
             jobq.add(j)
     jobq.start()
     return jobq.join() == []
Esempio n. 9
0
 def test_publish(self):
     jq = JobQueue(self.db)
     job = {'message': 'hello world!'}
     jq.pub(job)
     self.assertEquals(jq.queue_count(), 1)
     jq.clear_queue()
     jq.q = None  # erase the queue
     self.assertRaises(Exception, jq.pub, job)
Esempio n. 10
0
def test_jobqueue():
    signal.signal(signal.SIGALRM, lambda: pytest.fail())
    signal.alarm(5)

    with pytest.raises(ValueError):
        JobQueue(0)

    source = list(range(1024))
    queue = Queue()

    jq = JobQueue(8)

    for n in source:
        jq.put(store, (n, queue))

    while not jq.queue.empty():
        time.sleep(.1)

    assert (set(source) == set(queue.queue))
Esempio n. 11
0
    def test_iter(self):
        NUM_JOBS = 3
        num_jobs_queued = [NUM_JOBS]

        def iterator_wait():
            num_jobs_queued[0] -= 1
            return num_jobs_queued[0] < 0

        jq = JobQueue(self.db,
                      iterator_wait=iterator_wait,
                      collection_name=K.collection)
        for ii in range(1, NUM_JOBS + 1):
            job = {'message': 'I am # ' + str(ii)}
            jq.pub(job)
        num_jobs_done = 0
        for job in jq:
            #print job['data']['message']
            num_jobs_done += 1
            record = jq.q.find_one({'_id': job['_id']})
            self.assertEquals(record['status'], jq.WORKING)
        self.assertEquals(num_jobs_done, NUM_JOBS)
Esempio n. 12
0
 def test_valid(self):
     jq = JobQueue(self.db)
     jq.db['jobqueue'].drop()
     jq._create(capped=False)
     self.assertFalse(jq.valid())
     self.assertRaises(Exception, jq._create)
     jq.clear_queue()
Esempio n. 13
0
    def __init__(
        self,
        id,
        logger,
        conf,
    ):

        self.id = id
        self.logger = logger
        self.conf = conf
        self.job_queue = JobQueue(logger)
        self.done_queue = JobQueue(logger)
        self.scheduler = FairFitScheduler(logger, conf)

        # self.scheduler = FirstFitScheduler(logger, conf)

        self.scheduler.attach_job_queue(self.job_queue)
        self.scheduler.attach_done_queue(self.done_queue)
        self.users = self.scheduler.users
        self.resources = self.scheduler.resources
        self.servers = self.scheduler.servers
        self.peers = self.scheduler.peers
Esempio n. 14
0
    def __init__(
        self,
        id,
        logger,
        conf,
        ):

        self.id = id
        self.logger = logger
        self.conf = conf
        self.job_queue = JobQueue(logger)
        self.done_queue = JobQueue(logger)
        self.scheduler = FairFitScheduler(logger, conf)

        # self.scheduler = FirstFitScheduler(logger, conf)

        self.scheduler.attach_job_queue(self.job_queue)
        self.scheduler.attach_done_queue(self.done_queue)
        self.users = self.scheduler.users
        self.resources = self.scheduler.resources
        self.servers = self.scheduler.servers
        self.peers = self.scheduler.peers
Esempio n. 15
0
 def test_next(self):
     jq = JobQueue(self.db)
     self.assertRaises(Exception, jq.next)
     job = {'message': 'hello world!'}
     jq.pub(job)
     row = jq.next()
     self.assertEquals(row['data']['message'], 'hello world!')
     jq.clear_queue()
Esempio n. 16
0
File: hosts.py Progetto: zoidy/puq
 def run(self):
     jobq = JobQueue(self, limit=10, polltime=1)
     for j in self.jobs:
         if j['status'] == 0 or j['status'] == 'Q':
             debug("adding job %s" % j['num'])
             jobq.add(j)
     jobq.start()
     return jobq.join() == []
Esempio n. 17
0
 def test_next(self):
     jq = JobQueue(self.db, collection_name=K.collection)
     self.assertRaises(Exception, jq.next)
     job = {'message': 'hello world!'}
     jq.pub(job)
     row = jq.next()
     self.assertEquals(row['data']['message'], 'hello world!')
     self.assertEquals(jq.queue_count(), 0)
Esempio n. 18
0
    def __init__(self, db_name=None, queue_name=None, host=None,
                port=27017, user=None, password=None):
        """Constructs a publisher.

        Keyword arguments:
        db_name -- MongoDB database name (required)
        queue_name -- the name of a MongoDB collection in which the queue exists
        host -- hostname or ip address of the MongoDB service. Localhost if unspecified
        port -- MongoDB port (default 27017)
        user -- user with r/w permission on MongoDB collection
        password -- password for user

        """

        self.db_name = db_name
        self.queue_name = queue_name
        self.host = host
        self.port = port
        self.user = user
        self.password = password

        JobQueue.__init__(self, "publisher", self.db_name, self.queue_name, host=self.host,
                port=self.port, user=self.user, password=self.password)
Esempio n. 19
0
    if config.get('debug'):
        jobs.logger.addHandler(logging.StreamHandler())
        jobs.logger.setLevel(logging.DEBUG)

    postgresql = config.get('postgresql', {})
    threads = postgresql.get('threads', 1)
    pg_uri = postgresql.get('uri')

    kafka = config.get('kafka', {})
    topics = kafka.pop('topics', '')
    if isinstance(topics, str):
        topics = (topics, )

    kafka['value_deserializer'] = json_deserialize
    if 'auto_offset_reset' not in kafka:
        kafka['auto_offset_reset'] = 'earliest'

    print('Setting up PostgreSQL...')
    pool = ThreadedConnectionPool(1, threads, pg_uri)
    with db.get_conn(pool) as c:
        db.run_ddl(c)

    print('Connecting to Kafka...')
    consumer = KafkaConsumer(*topics, **kafka)

    jq = JobQueue(threads)
    print('\nWebChecker-pg service is running\n')
    for message in consumer:
        jq.put(jobs.commit_message, (pool, message))
Esempio n. 20
0
 def test_pendingqueue_underflow(self):
     jobq = JobQueue(self.db.name)
     self.assertIsNone(jobq.pop_job_from_pending_queue())
Esempio n. 21
0
class Server:
    """Serversimulation"""

    id = ''
    job_queue = None
    scheduler = None
    job_queue = None
    done_queue = None
    users = None
    resources = None
    peers = None
    nextid = 0
    migrated_jobs = 0
    returned_jobs = 0
    conf = None

    def __init__(
        self,
        id,
        logger,
        conf,
    ):

        self.id = id
        self.logger = logger
        self.conf = conf
        self.job_queue = JobQueue(logger)
        self.done_queue = JobQueue(logger)
        self.scheduler = FairFitScheduler(logger, conf)

        # self.scheduler = FirstFitScheduler(logger, conf)

        self.scheduler.attach_job_queue(self.job_queue)
        self.scheduler.attach_done_queue(self.done_queue)
        self.users = self.scheduler.users
        self.resources = self.scheduler.resources
        self.servers = self.scheduler.servers
        self.peers = self.scheduler.peers

    def submit(
        self,
        user_id,
        length,
        maxprice,
        vgrid,
    ):

        self.logger.info('%s received job from %s', self.id, user_id)

        # handle job

        job_id = user_id + '-' + str(self.nextid)
        job = {}
        job['JOB_ID'] = job_id
        self.nextid += 1
        job['OWNER'] = job['USER_CERT'] = user_id
        job['CPUTIME'] = length
        job['MAXPRICE'] = maxprice
        job['CPUCOUNT'] = 1
        job['NODECOUNT'] = 1
        job['MEMORY'] = 1
        job['DISK'] = 1
        job['ARCHITECTURE'] = 'X86'
        job['RUNTIMEENVIRONMENT'] = []
        job['RECEIVED_TIMESTAMP'] = time.gmtime()
        job['QUEUED_TIMESTAMP'] = time.gmtime()
        job['MIGRATE_COUNT'] = str(0)
        job['VGRID'] = vgrid

        # Enqueue job

        qlen = self.job_queue.queue_length()
        self.job_queue.enqueue_job(job, qlen)

        # Update user list with this job

        user_conf = {'USER_ID': user_id}

        # This will leave existing users unchanged while new users are created correctly

        user = self.scheduler.update_users(user_conf)
        self.scheduler.update_seen(user)

        # link job to user for continued monitoring

        user['QUEUE_HIST'].pop(0)
        user['QUEUE_HIST'].append(job)
        user['QUEUE_CNT'] += 1

        return True

    def request(
        self,
        res_id,
        length,
        minprice,
        vgrid,
    ):

        res_conf = {'RESOURCE_ID': res_id}
        res = self.scheduler.find_resource(res_conf)
        if not res:

            # create basic configuration

            res['RESOURCE_ID'] = res_id

            # So far unused attributes

            res['CPUCOUNT'] = 1
            res['NODECOUNT'] = 1
            res['MEMORY'] = 1
            res['DISK'] = 1
            res['ARCHITECTURE'] = 'X86'
            res['RUNTIMEENVIRONMENT'] = []

            # UpdateResources need MINPRICE

            res['MINPRICE'] = minprice
            res['CPUTIME'] = length
            res['VGRID'] = vgrid
            res = self.scheduler.update_resources(res)

        # Update/add these in any case

        res['MINPRICE'] = minprice
        res['CPUTIME'] = length

        self.scheduler.update_seen(res)

        # Job price and diff fields are automatically set during scheduling

        job = self.scheduler.schedule(res)

        if job:
            self.logger.info(
                '%s scheduled job %s to %s (%s, %s, %s)',
                self.id,
                job['JOB_ID'],
                res_id,
                res['LOAD'],
                res['CUR_PRICE'],
                res['LOAD_MULTIPLY'],
            )
        else:
            self.logger.info(
                '%s scheduled empty job to %s (%s, %s, %s)',
                self.id,
                res_id,
                res['LOAD'],
                res['CUR_PRICE'],
                res['LOAD_MULTIPLY'],
            )

        self.scheduler.update_price(res)

        return job

    def return_finished(self, res_id, job):

        # Hand back finished job to server

        self.scheduler.finished_job(res_id, job)

    def sleep(self):
        self.logger.debug('%s sleeping')

    def migrate_jobs(self):

        # Migrate all jobs that can be executed cheaper at a remote resource

        local_jobs = self.job_queue.queue_length()
        migrate_count = 0

        if not self.peers.keys():
            return 0

    # Use previously collected resource statuses for price directed migration

        for i in range(local_jobs):

            # queue shrinks as we migrate jobs so i may go out of range

            next_i = i - migrate_count
            job = self.job_queue.get_job(next_i)
            job_id = job['JOB_ID']

            # self.logger.debug("migrate_jobs: inspecting job %s", job_id)

            if not job.has_key('SCHEDULE_HINT'):

                # self.logger.debug("new job %s not marked yet", job_id)

                pass
            elif job['SCHEDULE_HINT'].startswith('MIGRATE '):
                server = job['SCHEDULE_HINT'].replace('MIGRATE ', '')
                server_conf = self.peers[server]['obj']
                self.logger.info('%s migrating job %s to %s', self.id, job_id,
                                 server)
                success = self.migrate_job(job, server_conf)
                if success:
                    job = self.job_queue.dequeue_job(next_i)
                    migrate_count += 1
                else:
                    self.logger.error(
                        'Migration to %s failed! leaving job %s at index %d',
                        server, job['JOB_ID'], next_i)
            else:

                # self.logger.debug("%s not marked for migration", job_id)

                pass

            # Limit number of migrated jobs to avoid thrashing

            if migrate_count >= self.conf.migrate_limit:
                break

        self.logger.info('%s actually migrated %d jobs', self.id,
                         migrate_count)

        self.migrated_jobs += migrate_count

        return migrate_count

    def migrate_job(self, job, server):
        del job['SCHEDULE_HINT']

        # Add or increment migration counter

        migrate_count = int(job['MIGRATE_COUNT']) + 1
        job['MIGRATE_COUNT'] = str(migrate_count)

        qlen = server.job_queue.queue_length()
        server.job_queue.enqueue_job(job, qlen)
        return True

    def return_result(self):

        # Return migrated jobs to source

        done_jobs = self.done_queue.queue_length()
        return_count = 0
        local_count = 0

        # Use previously collected resource statuses for price directed migration

        for i in range(done_jobs):

            # queue shrinks as we migrate jobs so i may go out of range

            next_i = i - (return_count + local_count)
            job = self.done_queue.get_job(next_i)
            job_id = job['JOB_ID']

            # self.logger.info("return_result: inspecting job %s", job_id)

            # Check if job returned to owner

            if self.scheduler.returned_job(job):
                job = self.done_queue.dequeue_job(next_i)

                # don't include local jobs in return counter

                local_count += 1
                continue

            # Otherwise try to pass it on to a closer peer

            # use propagated user migration cost to find shortest
            # path to user

            owner = self.scheduler.find_owner(job)
            user_conf = {'USER_ID': owner}
            user = self.scheduler.find_user(user_conf)

            # We may have treated a job that we don't have local owner
            # information for. If so, just leave job for later return.

            if not user:
                self.logger.info(
                    "return_result: don't know %s - delay return of %s", owner,
                    job_id)
                continue

            peer_id = self.scheduler.user_direction(user)
            peer_dict = self.peers[peer_id]
            peer = peer_dict['obj']

            # found peer - move job there

            if self.return_job(job, peer):
                job = self.done_queue.dequeue_job(next_i)
                return_count += 1
                break

            # Limit number of returned jobs to avoid flooding

            if return_count >= self.conf.migrate_limit:
                break

        self.logger.info('%s actually returned %d local and %d remote jobs',
                         self.id, local_count, return_count)

        self.returned_jobs += return_count

        return return_count

    def return_job(self, job, server):
        qlen = server.done_queue.queue_length()
        server.done_queue.enqueue_job(job, qlen)
        return True

    def refresh_servers(self):
        """
        Update information system in scheduler
        """

        # Update local status
        # self.logger.info("refresh_servers: %s updating local status", self.id)

        self.scheduler.update_local_server()

        # Remove users and resources no longer available with this server
        # self.logger.info("refresh_servers: %s pruning users and resources", self.id)

        self.scheduler.prune_peer_resources(self.id, self.resources)
        self.scheduler.prune_peer_users(self.id, self.users)

        # self.logger.info("refresh_servers: %s removing stale data", self.id)

        self.scheduler.remove_stale_data()

        # Update the server information for all peers.

        for (peer_id, peer_dict) in self.peers.items():

            # self.logger.info("refresh_servers: %s, peer %s", self.id, peer_id)

            peer = peer_dict['obj']
            self.refresh_peer_status(peer)

        return True

    def refresh_peer_status(self, peer):

        # Extract peer status from ConfigParser
        # object, peer.
        # Use contents to update local version of
        # peer status information in scheduler.

        peer_servers = {}
        peer_resources = {}
        peer_users = {}

        for (name, server) in peer.servers.items():

            # self.logger.debug("refresh_peer_status: %s", name)

            peer_servers[name] = self.scheduler._clone_dict(server)

        for (name, resource) in peer.resources.items():

            # self.logger.debug("refresh_peer_status: %s", name)

            peer_resources[name] = self.scheduler._clone_dict(resource)
        for (name, user) in peer.users.items():

            # self.logger.debug("refresh_peer_status: %s", name)

            peer_users[name] = self.scheduler._clone_dict(user)

        self.scheduler.update_peer_status(peer.id, peer_servers,
                                          peer_resources, peer_users)

    def exchange_status(self):

        # migrate every time for now
        # Migrate using previous status and scheduling
        # information.

        self.migrate_jobs()
        self.return_result()

        self.refresh_servers()

        # print self.id, self.resources.keys()

    def simulate(self, timestep):

        # Handle resource and user requests

        # rand = random.random()

        # self.sleep()

        # communicate every time for now

        comm_freq = 1
        if timestep % comm_freq == 0:

            # Update local and remote information

            self.exchange_status()

            # Make sure jobs don't get stuck

            self.scheduler.filter_jobs()
        qlen = self.job_queue.queue_length()
        self.logger.info('%s: %d jobs in queue', self.id, qlen)
Esempio n. 22
0
class Server:

    """Serversimulation"""

    id = ''
    job_queue = None
    scheduler = None
    job_queue = None
    done_queue = None
    users = None
    resources = None
    peers = None
    nextid = 0
    migrated_jobs = 0
    returned_jobs = 0
    conf = None

    def __init__(
        self,
        id,
        logger,
        conf,
        ):

        self.id = id
        self.logger = logger
        self.conf = conf
        self.job_queue = JobQueue(logger)
        self.done_queue = JobQueue(logger)
        self.scheduler = FairFitScheduler(logger, conf)

        # self.scheduler = FirstFitScheduler(logger, conf)

        self.scheduler.attach_job_queue(self.job_queue)
        self.scheduler.attach_done_queue(self.done_queue)
        self.users = self.scheduler.users
        self.resources = self.scheduler.resources
        self.servers = self.scheduler.servers
        self.peers = self.scheduler.peers

    def submit(
        self,
        user_id,
        length,
        maxprice,
        vgrid,
        ):

        self.logger.info('%s received job from %s', self.id, user_id)

        # handle job

        job_id = user_id + '-' + str(self.nextid)
        job = {}
        job['JOB_ID'] = job_id
        self.nextid += 1
        job['OWNER'] = job['USER_CERT'] = user_id
        job['CPUTIME'] = length
        job['MAXPRICE'] = maxprice
        job['CPUCOUNT'] = 1
        job['NODECOUNT'] = 1
        job['MEMORY'] = 1
        job['DISK'] = 1
        job['ARCHITECTURE'] = 'X86'
        job['RUNTIMEENVIRONMENT'] = []
        job['RECEIVED_TIMESTAMP'] = time.gmtime()
        job['QUEUED_TIMESTAMP'] = time.gmtime()
        job['MIGRATE_COUNT'] = str(0)
        job['VGRID'] = vgrid

        # Enqueue job

        qlen = self.job_queue.queue_length()
        self.job_queue.enqueue_job(job, qlen)

        # Update user list with this job

        user_conf = {'USER_ID': user_id}

        # This will leave existing users unchanged while new users are created correctly

        user = self.scheduler.update_users(user_conf)
        self.scheduler.update_seen(user)

        # link job to user for continued monitoring

        user['QUEUE_HIST'].pop(0)
        user['QUEUE_HIST'].append(job)
        user['QUEUE_CNT'] += 1

        return True

    def request(
        self,
        res_id,
        length,
        minprice,
        vgrid,
        ):

        res_conf = {'RESOURCE_ID': res_id}
        res = self.scheduler.find_resource(res_conf)
        if not res:

            # create basic configuration

            res['RESOURCE_ID'] = res_id

            # So far unused attributes

            res['CPUCOUNT'] = 1
            res['NODECOUNT'] = 1
            res['MEMORY'] = 1
            res['DISK'] = 1
            res['ARCHITECTURE'] = 'X86'
            res['RUNTIMEENVIRONMENT'] = []

            # UpdateResources need MINPRICE

            res['MINPRICE'] = minprice
            res['CPUTIME'] = length
            res['VGRID'] = vgrid
            res = self.scheduler.update_resources(res)

        # Update/add these in any case

        res['MINPRICE'] = minprice
        res['CPUTIME'] = length

        self.scheduler.update_seen(res)

        # Job price and diff fields are automatically set during scheduling

        job = self.scheduler.schedule(res)

        if job:
            self.logger.info(
                '%s scheduled job %s to %s (%s, %s, %s)',
                self.id,
                job['JOB_ID'],
                res_id,
                res['LOAD'],
                res['CUR_PRICE'],
                res['LOAD_MULTIPLY'],
                )
        else:
            self.logger.info(
                '%s scheduled empty job to %s (%s, %s, %s)',
                self.id,
                res_id,
                res['LOAD'],
                res['CUR_PRICE'],
                res['LOAD_MULTIPLY'],
                )

        self.scheduler.update_price(res)

        return job

    def return_finished(self, res_id, job):

        # Hand back finished job to server

        self.scheduler.finished_job(res_id, job)

    def sleep(self):
        self.logger.debug('%s sleeping')

    def migrate_jobs(self):

    # Migrate all jobs that can be executed cheaper at a remote resource

        local_jobs = self.job_queue.queue_length()
        migrate_count = 0

        if not self.peers.keys():
            return 0

    # Use previously collected resource statuses for price directed migration

        for i in range(local_jobs):

            # queue shrinks as we migrate jobs so i may go out of range

            next_i = i - migrate_count
            job = self.job_queue.get_job(next_i)
            job_id = job['JOB_ID']

            # self.logger.debug("migrate_jobs: inspecting job %s", job_id)

            if not job.has_key('SCHEDULE_HINT'):

                # self.logger.debug("new job %s not marked yet", job_id)

                pass
            elif job['SCHEDULE_HINT'].startswith('MIGRATE '):
                server = job['SCHEDULE_HINT'].replace('MIGRATE ', '')
                server_conf = self.peers[server]['obj']
                self.logger.info('%s migrating job %s to %s', self.id,
                                 job_id, server)
                success = self.migrate_job(job, server_conf)
                if success:
                    job = self.job_queue.dequeue_job(next_i)
                    migrate_count += 1
                else:
                    self.logger.error('Migration to %s failed! leaving job %s at index %d'
                            , server, job['JOB_ID'], next_i)
            else:

                # self.logger.debug("%s not marked for migration", job_id)

                pass

            # Limit number of migrated jobs to avoid thrashing

            if migrate_count >= self.conf.migrate_limit:
                break

        self.logger.info('%s actually migrated %d jobs', self.id,
                         migrate_count)

        self.migrated_jobs += migrate_count

        return migrate_count

    def migrate_job(self, job, server):
        del job['SCHEDULE_HINT']

    # Add or increment migration counter

        migrate_count = int(job['MIGRATE_COUNT']) + 1
        job['MIGRATE_COUNT'] = str(migrate_count)

        qlen = server.job_queue.queue_length()
        server.job_queue.enqueue_job(job, qlen)
        return True

    def return_result(self):

    # Return migrated jobs to source

        done_jobs = self.done_queue.queue_length()
        return_count = 0
        local_count = 0

    # Use previously collected resource statuses for price directed migration

        for i in range(done_jobs):

            # queue shrinks as we migrate jobs so i may go out of range

            next_i = i - (return_count + local_count)
            job = self.done_queue.get_job(next_i)
            job_id = job['JOB_ID']

            # self.logger.info("return_result: inspecting job %s", job_id)

            # Check if job returned to owner

            if self.scheduler.returned_job(job):
                job = self.done_queue.dequeue_job(next_i)

                # don't include local jobs in return counter

                local_count += 1
                continue

            # Otherwise try to pass it on to a closer peer

            # use propagated user migration cost to find shortest
            # path to user

            owner = self.scheduler.find_owner(job)
            user_conf = {'USER_ID': owner}
            user = self.scheduler.find_user(user_conf)

            # We may have treated a job that we don't have local owner
            # information for. If so, just leave job for later return.

            if not user:
                self.logger.info("return_result: don't know %s - delay return of %s"
                                 , owner, job_id)
                continue

            peer_id = self.scheduler.user_direction(user)
            peer_dict = self.peers[peer_id]
            peer = peer_dict['obj']

            # found peer - move job there

            if self.return_job(job, peer):
                job = self.done_queue.dequeue_job(next_i)
                return_count += 1
                break

            # Limit number of returned jobs to avoid flooding

            if return_count >= self.conf.migrate_limit:
                break

        self.logger.info('%s actually returned %d local and %d remote jobs'
                         , self.id, local_count, return_count)

        self.returned_jobs += return_count

        return return_count

    def return_job(self, job, server):
        qlen = server.done_queue.queue_length()
        server.done_queue.enqueue_job(job, qlen)
        return True

    def refresh_servers(self):
        """
        Update information system in scheduler
        """

        # Update local status
        # self.logger.info("refresh_servers: %s updating local status", self.id)

        self.scheduler.update_local_server()

        # Remove users and resources no longer available with this server
        # self.logger.info("refresh_servers: %s pruning users and resources", self.id)

        self.scheduler.prune_peer_resources(self.id, self.resources)
        self.scheduler.prune_peer_users(self.id, self.users)

        # self.logger.info("refresh_servers: %s removing stale data", self.id)

        self.scheduler.remove_stale_data()

        # Update the server information for all peers.

        for (peer_id, peer_dict) in self.peers.items():

            # self.logger.info("refresh_servers: %s, peer %s", self.id, peer_id)

            peer = peer_dict['obj']
            self.refresh_peer_status(peer)

        return True

    def refresh_peer_status(self, peer):

        # Extract peer status from ConfigParser
        # object, peer.
        # Use contents to update local version of
        # peer status information in scheduler.

        peer_servers = {}
        peer_resources = {}
        peer_users = {}

        for (name, server) in peer.servers.items():

            # self.logger.debug("refresh_peer_status: %s", name)

            peer_servers[name] = self.scheduler._clone_dict(server)

        for (name, resource) in peer.resources.items():

            # self.logger.debug("refresh_peer_status: %s", name)

            peer_resources[name] = self.scheduler._clone_dict(resource)
        for (name, user) in peer.users.items():

            # self.logger.debug("refresh_peer_status: %s", name)

            peer_users[name] = self.scheduler._clone_dict(user)

        self.scheduler.update_peer_status(peer.id, peer_servers,
                peer_resources, peer_users)

    def exchange_status(self):

    # migrate every time for now
        # Migrate using previous status and scheduling
        # information.

        self.migrate_jobs()
        self.return_result()

        self.refresh_servers()

        # print self.id, self.resources.keys()

    def simulate(self, timestep):

        # Handle resource and user requests

        # rand = random.random()

        # self.sleep()

    # communicate every time for now

        comm_freq = 1
        if timestep % comm_freq == 0:

            # Update local and remote information

            self.exchange_status()

            # Make sure jobs don't get stuck

            self.scheduler.filter_jobs()
        qlen = self.job_queue.queue_length()
        self.logger.info('%s: %d jobs in queue', self.id, qlen)
Esempio n. 23
0
 def test_init(self):
     jq = JobQueue(self.db)
     self.assertTrue(jq.valid())
     self.assertRaises(Exception, jq._create)
     jq.clear_queue()
Esempio n. 24
0
 def test_init(self):
     jq = JobQueue(self.db)
     self.assertTrue(jq.valid())
     self.assertRaises(Exception, jq._create)
     jq.clear_queue()
Esempio n. 25
0
 def test_valid(self):
     jq = JobQueue(self.db, collection_name=K.collection)
     jq.db[K.collection].drop()
     jq._create(capped=False)
     self.assertFalse(jq.valid())
     self.assertRaises(Exception, jq._create)
Esempio n. 26
0
 def test_init(self):
     jq = JobQueue(self.db, collection_name=K.collection)
     self.assertTrue(jq.valid())
     self.assertRaises(Exception, jq._create)