Example #1
0
 def test_lock_blocking(self):
     KEY = 'test_key'
     with redis.lock(KEY, expire=0.3, nowait=True):
         try:
             with redis.lock(KEY, blocking_timeout=0.2):
                 assert False, "Shouldn't be able to acquire lock"
         except UnableToGetLock:
             pass
         # should succeed, rather than throwing UnableToGetLock
         with redis.lock(KEY, blocking_timeout=0.2):
             pass
Example #2
0
 def wrapped(**kwargs):
     key = '{0}:{1}:{2}'.format(
         func.__module__, func.__name__,
         md5('&'.join('{0}={1}'.format(k, repr(v))
                      for k, v in sorted(kwargs.iteritems()))).hexdigest())
     try:
         with redis.lock(key, expire=300, nowait=True):
             return func(**kwargs)
     except UnableToGetLock:
         current_app.logger.warn('Unable to get lock for %s', key)
Example #3
0
 def wrapped(**kwargs):
     key = "{0}:{1}".format(
         func.__name__,
         md5("&".join("{0}={1}".format(k, repr(v)) for k, v in sorted(kwargs.iteritems()))).hexdigest(),
     )
     try:
         with redis.lock(key, timeout=1, expire=300, nowait=True):
             return func(**kwargs)
     except UnableToGetLock:
         current_app.logger.warn("Unable to get lock for %s", key)
Example #4
0
    def test_lock_cant_unlock_others(self):
        KEY = 'test_key'
        initial_lock = redis.lock(KEY, expire=0.2, nowait=True)
        initial_lock.__enter__()
        # expire the current lock
        sleep(0.3)

        lock2 = redis.lock(KEY, nowait=True)
        lock2.__enter__()

        initial_lock.__exit__(None, None, None)

        # ensure that didn't unlock lock2
        try:
            with redis.lock(KEY, nowait=True):
                assert False, "Shouldn't be able to acquire lock"
        except UnableToGetLock:
            pass
        finally:
            lock2.__exit__(None, None, None)
Example #5
0
 def wrapped(**kwargs):
     key = '{0}:{1}'.format(
         func.__name__,
         '&'.join('{0}={1}'.format(k, v)
         for k, v in sorted(kwargs.iteritems()))
     )
     try:
         with redis.lock(key, timeout=1, expire=300, nowait=True):
             return func(**kwargs)
     except UnableToGetLock:
         current_app.logger.warn('Unable to get lock for %s', key)
Example #6
0
    def test_lock_failure_doesnt_refresh(self):
        KEY = 'test_key'

        assert redis.redis.set(KEY, "initial", px=250)
        try:
            with redis.lock(KEY, nowait=True, timeout=1):
                assert False, "Shouldn't be able to acquire lock"
        except UnableToGetLock:
            pass
        # Ensure the initial set is expired, but a refresh wouldn't.
        sleep(0.3)
        assert redis.redis.get(KEY) is None
Example #7
0
 def wrapped(**kwargs):
     key = '{0}:{1}:{2}'.format(
         func.__module__,
         func.__name__,
         md5(
             '&'.join('{0}={1}'.format(k, repr(v))
             for k, v in sorted(kwargs.iteritems()))
         ).hexdigest()
     )
     try:
         with redis.lock(key, expire=300, nowait=True):
             return func(**kwargs)
     except UnableToGetLock:
         current_app.logger.warn('Unable to get lock for %s', key)
Example #8
0
    def process(self, fp):
        results = self.get_coverage(fp)

        for result in results:
            try:
                with db.session.begin_nested():
                    db.session.add(result)
            except IntegrityError:
                lock_key = 'coverage:{job_id}:{file_hash}'.format(
                    job_id=result.job_id.hex,
                    file_hash=md5(result.filename).hexdigest(),
                )
                with redis.lock(lock_key):
                    result = self.merge_coverage(result)
                    db.session.add(result)
            db.session.commit()

        return results
Example #9
0
    def process(self, fp):
        results = self.get_coverage(fp)

        for result in results:
            try:
                with db.session.begin_nested():
                    db.session.add(result)
            except IntegrityError:
                lock_key = 'coverage:{job_id}:{file_hash}'.format(
                    job_id=result.job_id.hex,
                    file_hash=md5(result.filename).hexdigest(),
                )
                with redis.lock(lock_key):
                    result = self.merge_coverage(result)
                    db.session.add(result)
            db.session.commit()

        return results
    def post(self):
        try:
            with redis.lock('jobstep:allocate', nowait=True):
                to_allocate = self.find_next_jobstep()

                # Should 204, but flask/werkzeug throws StopIteration (bug!) for tests
                if to_allocate is None:
                    return self.respond([])

                to_allocate.status = Status.allocated
                db.session.add(to_allocate)
                db.session.flush()
        except redis.UnableToGetLock:
            return error('Another allocation is in progress', http_code=503)

        try:
            jobplan, buildstep = JobPlan.get_build_step_for_job(to_allocate.job_id)

            assert jobplan and buildstep

            context = self.serialize(to_allocate)
            context['project'] = self.serialize(to_allocate.project)
            context['resources'] = {
                'cpus': to_allocate.data.get('cpus', 4),
                'mem': to_allocate.data.get('mem', 8 * 1024),
            }
            context['cmd'] = buildstep.get_allocation_command(to_allocate)

            return self.respond([context])
        except Exception:
            to_allocate.status = Status.finished
            to_allocate.result = Result.aborted
            db.session.add(to_allocate)
            db.session.flush()

            logging.exception(
                'Exception occurred while allocating job step for project %s',
                to_allocate.project.slug)

            return error('Internal error while attempting allocation',
                         http_code=503)
Example #11
0
    def post(self):
        args = json.loads(request.data)

        try:
            resources = args['resources']
        except KeyError:
            return error('Missing resources attribute')

        total_cpus = int(resources['cpus'])
        total_mem = int(resources['mem'])  # MB

        try:
            with redis.lock('jobstep:allocate', nowait=True):
                available_allocations = self.find_next_jobsteps(limit=10)
                to_allocate = []
                for jobstep in available_allocations:
                    req_cpus = jobstep.data.get('cpus', 4)
                    req_mem = jobstep.data.get('mem', 8 * 1024)

                    if total_cpus >= req_cpus and total_mem >= req_mem:
                        total_cpus -= req_cpus
                        total_mem -= req_mem

                        jobstep.status = Status.allocated
                        db.session.add(jobstep)

                        to_allocate.append(jobstep)
                    else:
                        logging.info(
                            'Not allocating %s due to lack of offered resources',
                            jobstep.id.hex)

                if not to_allocate:
                    # Should 204, but flask/werkzeug throws StopIteration (bug!) for tests
                    return self.respond([])

                db.session.flush()
        except redis.UnableToGetLock:
            return error('Another allocation is in progress', http_code=503)

        context = []

        for jobstep, jobstep_data in zip(to_allocate,
                                         self.serialize(to_allocate)):
            try:
                jobplan, buildstep = JobPlan.get_build_step_for_job(
                    jobstep.job_id)

                assert jobplan and buildstep

                jobstep_data['project'] = self.serialize(jobstep.project)
                jobstep_data['resources'] = {
                    'cpus': jobstep.data.get('cpus', 4),
                    'mem': jobstep.data.get('mem', 8 * 1024),
                }
                jobstep_data['cmd'] = buildstep.get_allocation_command(jobstep)
            except Exception:
                jobstep.status = Status.finished
                jobstep.result = Result.aborted
                db.session.add(jobstep)
                db.session.flush()

                logging.exception(
                    'Exception occurred while allocating job step %s for project %s',
                    jobstep.id.hex, jobstep.project.slug)
            else:
                context.append(jobstep_data)

        return self.respond(context)
    def post(self):
        args = json.loads(request.data)

        try:
            resources = args['resources']
        except KeyError:
            return error('Missing resources attribute')

        total_cpus = int(resources['cpus'])
        total_mem = int(resources['mem'])  # MB

        with statsreporter.stats().timer('jobstep_allocate'):
            try:
                with redis.lock('jobstep:allocate', nowait=True):
                    available_allocations = self.find_next_jobsteps(limit=10)
                    to_allocate = []
                    for jobstep in available_allocations:
                        req_cpus = jobstep.data.get('cpus', 4)
                        req_mem = jobstep.data.get('mem', 8 * 1024)

                        if total_cpus >= req_cpus and total_mem >= req_mem:
                            total_cpus -= req_cpus
                            total_mem -= req_mem

                            jobstep.status = Status.allocated
                            db.session.add(jobstep)

                            to_allocate.append(jobstep)
                        else:
                            logging.info('Not allocating %s due to lack of offered resources', jobstep.id.hex)

                    if not to_allocate:
                        # Should 204, but flask/werkzeug throws StopIteration (bug!) for tests
                        return self.respond([])

                    db.session.flush()
            except UnableToGetLock:
                return error('Another allocation is in progress', http_code=503)

            context = []

            for jobstep, jobstep_data in zip(to_allocate, self.serialize(to_allocate)):
                try:
                    jobplan, buildstep = JobPlan.get_build_step_for_job(jobstep.job_id)

                    assert jobplan and buildstep

                    jobstep_data['project'] = self.serialize(jobstep.project)
                    jobstep_data['resources'] = {
                        'cpus': jobstep.data.get('cpus', 4),
                        'mem': jobstep.data.get('mem', 8 * 1024),
                    }
                    jobstep_data['cmd'] = buildstep.get_allocation_command(jobstep)
                except Exception:
                    jobstep.status = Status.finished
                    jobstep.result = Result.infra_failed
                    db.session.add(jobstep)
                    db.session.flush()

                    logging.exception(
                        'Exception occurred while allocating job step %s for project %s',
                        jobstep.id.hex, jobstep.project.slug)
                else:
                    context.append(jobstep_data)

            return self.respond(context)
Example #13
0
    def post(self, command_id):
        args = self.post_parser.parse_args()

        current_datetime = args.date or datetime.utcnow()

        # We need to lock this resource to ensure the command doesn't get expanded
        # twice in the time it's checking the attr + writing the updated value
        if args.output or args.status == 'finished':
            lock = redis.lock('expand:{}'.format(command_id), expire=15, nowait=True)
        else:
            lock = None

        if lock:
            lock.__enter__()

        try:
            command = Command.query.get(command_id)
            if command is None:
                return '', 404

            if command.status == Status.finished:
                return error("Command already marked as finished")

            if args.return_code is not None:
                command.return_code = args.return_code

            if args.status:
                command.status = Status[args.status]

                # if we've finished this job, lets ensure we have set date_finished
                if command.status == Status.finished and command.date_finished is None:
                    command.date_finished = current_datetime
                elif command.status != Status.finished and command.date_finished:
                    command.date_finished = None

                if command.status != Status.queued and command.date_started is None:
                    command.date_started = current_datetime
                elif command.status == Status.queued and command.date_started:
                    command.date_started = None

            db.session.add(command)
            db.session.flush()

            if args.output or args.status == 'finished':
                # don't expand a jobstep that already failed
                if command.jobstep.result in (Result.aborted, Result.failed, Result.infra_failed):
                    statsreporter.stats().incr('command_expansion_aborted')
                    return self.respond(command)
                expander_cls = self.get_expander(command.type)
                if expander_cls is not None:
                    if not args.output:
                        db.session.rollback()
                        return error("Missing output for command of type %s" % command.type)

                    expander = expander_cls(
                        project=command.jobstep.project,
                        data=args.output,
                    )

                    try:
                        expander.validate()
                    except AssertionError as e:
                        db.session.rollback()
                        return error('%s' % e)
                    except Exception:
                        db.session.rollback()
                        return '', 500

                    self.expand_command(command, expander, args.output)

            db.session.commit()

        finally:
            if lock:
                lock.__exit__(None, None, None)

        return self.respond(command)
Example #14
0
    def post(self):
        args = json.loads(request.data)

        try:
            resources = args['resources']
        except KeyError:
            return error('Missing resources attribute')

        # cpu and mem as 0 are treated by changes-client
        # as having no enforced limit
        total_cpus = int(resources.get('cpus', 0))
        total_mem = int(resources.get('mem', 0))  # MB

        with statsreporter.stats().timer('jobstep_allocate'):
            try:
                with redis.lock('jobstep:allocate', nowait=True):
                    available_allocations = self.find_next_jobsteps(limit=10)
                    to_allocate = []
                    for jobstep in available_allocations:
                        req_cpus = jobstep.data.get('cpus', 4)
                        req_mem = jobstep.data.get('mem', 8 * 1024)

                        if total_cpus >= req_cpus and total_mem >= req_mem:
                            total_cpus -= req_cpus
                            total_mem -= req_mem

                            jobstep.status = Status.allocated
                            db.session.add(jobstep)

                            to_allocate.append(jobstep)
                            # The JobSteps returned are pending_allocation, and the initial state for a Mesos JobStep is
                            # pending_allocation, so we can determine how long it was pending by how long ago it was
                            # created.
                            pending_seconds = (
                                datetime.utcnow() -
                                jobstep.date_created).total_seconds()
                            statsreporter.stats().log_timing(
                                'duration_pending_allocation',
                                pending_seconds * 1000)
                        else:
                            logging.info(
                                'Not allocating %s due to lack of offered resources',
                                jobstep.id.hex)

                    if not to_allocate:
                        # Should 204, but flask/werkzeug throws StopIteration (bug!) for tests
                        return self.respond([])

                    db.session.flush()
            except UnableToGetLock:
                return error('Another allocation is in progress',
                             http_code=503)

            context = []

            for jobstep, jobstep_data in zip(to_allocate,
                                             self.serialize(to_allocate)):
                try:
                    jobplan, buildstep = JobPlan.get_build_step_for_job(
                        jobstep.job_id)

                    assert jobplan and buildstep

                    jobstep_data['project'] = self.serialize(jobstep.project)
                    jobstep_data['resources'] = {
                        'cpus': jobstep.data.get('cpus', 4),
                        'mem': jobstep.data.get('mem', 8 * 1024),
                    }
                    jobstep_data['cmd'] = buildstep.get_allocation_command(
                        jobstep)
                except Exception:
                    jobstep.status = Status.finished
                    jobstep.result = Result.infra_failed
                    db.session.add(jobstep)
                    db.session.flush()

                    logging.exception(
                        'Exception occurred while allocating job step %s for project %s',
                        jobstep.id.hex, jobstep.project.slug)
                else:
                    context.append(jobstep_data)

            return self.respond(context)
Example #15
0
    def new_post(self, args):
        """
        New POST code path that just allocates a list of jobstep IDs.
        This new method of allocation works by first sending a GET request
        to get a priority sorted list of possible jobsteps. The scheduler can
        then allocate these as it sees fit, and sends a POST request with
        the list of jobstep IDs it actually decided to allocate.

        We maintain the old POST code for now so that current schedulers
        continue to work (anything without a jobstep_ids arg goes to the old
        POST method). But it will likely get removed once it's out of use.

        Args:
            args: JSON dict of args to the POST request. This must include a
                jobstep_ids field, which is a list of jobstep ID hexs to
                allocate, and optionally a cluster that these jobsteps are in.
        """
        try:
            jobstep_ids = args['jobstep_ids']
        except KeyError:
            return error('Missing jobstep_ids attribute')

        for id in jobstep_ids:
            try:
                UUID(id)
            except ValueError:
                err = "Invalid jobstep id sent to jobstep_allocate: %s"
                logging.warning(err, id, exc_info=True)
                return error(err % id)

        cluster = args.get('cluster')

        with statsreporter.stats().timer('jobstep_allocate_post'):
            try:
                lock_key = 'jobstep:allocate'
                if cluster:
                    lock_key = lock_key + ':' + cluster
                with redis.lock(lock_key, nowait=True):
                    jobsteps = JobStep.query.filter(
                        JobStep.id.in_(jobstep_ids))

                    for jobstep in jobsteps:
                        if jobstep.cluster != cluster:
                            db.session.rollback()
                            err = 'Jobstep is in cluster %s but tried to allocate in cluster %s (id=%s, project=%s)'
                            err_args = (jobstep.cluster, cluster,
                                        jobstep.id.hex, jobstep.project.slug)
                            logging.warning(err, *err_args)
                            return error(err % err_args)
                        if jobstep.status != Status.pending_allocation:
                            db.session.rollback()
                            err = 'Jobstep %s for project %s was already allocated'
                            err_args = (jobstep.id.hex, jobstep.project.slug)
                            logging.warning(err, *err_args)
                            return error(err % err_args, http_code=409)

                        jobstep.status = Status.allocated
                        db.session.add(jobstep)
                        # The JobSteps returned are pending_allocation, and the initial state for a Mesos JobStep is
                        # pending_allocation, so we can determine how long it was pending by how long ago it was
                        # created.
                        pending_seconds = (
                            datetime.utcnow() -
                            jobstep.date_created).total_seconds()
                        statsreporter.stats().log_timing(
                            'duration_pending_allocation',
                            pending_seconds * 1000)

                    db.session.commit()

                    return self.respond({'allocated': jobstep_ids})
            except UnableToGetLock:
                return error('Another allocation is in progress',
                             http_code=409)
            except IntegrityError:
                err = 'Could not commit allocation'
                logging.warning(err, exc_info=True)
                return error(err, http_code=409)
Example #16
0
    def post(self):
        args = json.loads(request.data)

        # TODO(nate): get rid of old allocation code once scheduler is updated to use this
        if args.get('jobstep_ids'):
            return self.new_post(args)

        try:
            resources = args['resources']
        except KeyError:
            return error('Missing resources attribute')

        cluster = args.get('cluster')

        # cpu and mem as 0 are treated by changes-client
        # as having no enforced limit
        total_cpus = int(resources.get('cpus', 0))
        total_mem = int(resources.get('mem', 0))  # MB

        with statsreporter.stats().timer('jobstep_allocate'):
            try:
                lock_key = 'jobstep:allocate'
                if cluster:
                    lock_key = lock_key + ':' + cluster
                with redis.lock(lock_key, nowait=True):
                    available_allocations = self.find_next_jobsteps(
                        limit=10, cluster=cluster)
                    to_allocate = []
                    for jobstep in available_allocations:

                        jobplan, buildstep = JobPlan.get_build_step_for_job(
                            jobstep.job_id)
                        assert jobplan and buildstep
                        limits = buildstep.get_resource_limits()
                        req_cpus = limits.get('cpus', 4)
                        req_mem = limits.get('memory', 8 * 1024)

                        if total_cpus >= req_cpus and total_mem >= req_mem:
                            total_cpus -= req_cpus
                            total_mem -= req_mem
                            allocation_cmd = buildstep.get_allocation_command(
                                jobstep)

                            jobstep.status = Status.allocated
                            db.session.add(jobstep)

                            # We keep the data from the BuildStep to be sure we're using the same resource values.
                            to_allocate.append(
                                (jobstep,
                                 _AllocData(cpus=req_cpus,
                                            memory=req_mem,
                                            command=allocation_cmd)))
                            # The JobSteps returned are pending_allocation, and the initial state for a Mesos JobStep is
                            # pending_allocation, so we can determine how long it was pending by how long ago it was
                            # created.
                            pending_seconds = (
                                datetime.utcnow() -
                                jobstep.date_created).total_seconds()
                            statsreporter.stats().log_timing(
                                'duration_pending_allocation',
                                pending_seconds * 1000)
                        else:
                            logging.info(
                                'Not allocating %s due to lack of offered resources',
                                jobstep.id.hex)

                    if not to_allocate:
                        # Should 204, but flask/werkzeug throws StopIteration (bug!) for tests
                        return self.respond([])

                    db.session.flush()
            except UnableToGetLock:
                return error('Another allocation is in progress',
                             http_code=503)

            context = []

            for jobstep, alloc_data in to_allocate:
                try:
                    jobstep_data = self.serialize(jobstep)

                    jobstep_data['project'] = self.serialize(jobstep.project)
                    jobstep_data['resources'] = {
                        'cpus': alloc_data.cpus,
                        'mem': alloc_data.memory,
                    }
                    jobstep_data['cmd'] = alloc_data.command
                except Exception:
                    jobstep.status = Status.finished
                    jobstep.result = Result.infra_failed
                    db.session.add(jobstep)
                    db.session.flush()

                    logging.exception(
                        'Exception occurred while allocating job step %s for project %s',
                        jobstep.id.hex, jobstep.project.slug)
                else:
                    context.append(jobstep_data)

            return self.respond(context)
Example #17
0
    def post(self):
        """
        Allocates a list of jobstep IDs.
        This method of allocation works by first sending a GET request
        to get a priority sorted list of possible jobsteps. The scheduler can
        then allocate these as it sees fit, and sends a POST request with
        the list of jobstep IDs it actually decided to allocate.
        """
        args = json.loads(request.data)

        try:
            jobstep_ids = args['jobstep_ids']
        except KeyError:
            return error('Missing jobstep_ids attribute')

        for id in jobstep_ids:
            try:
                UUID(id)
            except ValueError:
                err = "Invalid jobstep id sent to jobstep_allocate: %s"
                logging.warning(err, id, exc_info=True)
                return error(err % id)

        cluster = args.get('cluster')

        with statsreporter.stats().timer('jobstep_allocate_post'):
            try:
                lock_key = 'jobstep:allocate'
                if cluster:
                    lock_key = lock_key + ':' + cluster
                with redis.lock(lock_key, nowait=True):
                    jobsteps = JobStep.query.filter(JobStep.id.in_(jobstep_ids))

                    for jobstep in jobsteps:
                        if jobstep.cluster != cluster:
                            db.session.rollback()
                            err = 'Jobstep is in cluster %s but tried to allocate in cluster %s (id=%s, project=%s)'
                            err_args = (jobstep.cluster, cluster, jobstep.id.hex, jobstep.project.slug)
                            logging.warning(err, *err_args)
                            return error(err % err_args)
                        if jobstep.status != Status.pending_allocation:
                            db.session.rollback()
                            err = 'Jobstep %s for project %s was already allocated'
                            err_args = (jobstep.id.hex, jobstep.project.slug)
                            logging.warning(err, *err_args)
                            return error(err % err_args, http_code=409)

                        jobstep.status = Status.allocated
                        db.session.add(jobstep)
                        # The JobSteps returned are pending_allocation, and the initial state for a Mesos JobStep is
                        # pending_allocation, so we can determine how long it was pending by how long ago it was
                        # created.
                        pending_seconds = (datetime.utcnow() - jobstep.date_created).total_seconds()
                        statsreporter.stats().log_timing('duration_pending_allocation', pending_seconds * 1000)

                    db.session.commit()

                    return self.respond({'allocated': jobstep_ids})
            except UnableToGetLock:
                return error('Another allocation is in progress', http_code=409)
            except IntegrityError:
                err = 'Could not commit allocation'
                logging.warning(err, exc_info=True)
                return error(err, http_code=409)
Example #18
0
    def post(self):
        args = json.loads(request.data)

        # TODO(nate): get rid of old allocation code once scheduler is updated to use this
        if args.get('jobstep_ids'):
            return self.new_post(args)

        try:
            resources = args['resources']
        except KeyError:
            return error('Missing resources attribute')

        cluster = args.get('cluster')

        # cpu and mem as 0 are treated by changes-client
        # as having no enforced limit
        total_cpus = int(resources.get('cpus', 0))
        total_mem = int(resources.get('mem', 0))  # MB

        with statsreporter.stats().timer('jobstep_allocate'):
            try:
                lock_key = 'jobstep:allocate'
                if cluster:
                    lock_key = lock_key + ':' + cluster
                with redis.lock(lock_key, nowait=True):
                    available_allocations = self.find_next_jobsteps(limit=10, cluster=cluster)
                    to_allocate = []
                    for jobstep in available_allocations:

                        jobplan, buildstep = JobPlan.get_build_step_for_job(jobstep.job_id)
                        assert jobplan and buildstep
                        limits = buildstep.get_resource_limits()
                        req_cpus = limits.get('cpus', 4)
                        req_mem = limits.get('memory', 8 * 1024)

                        if total_cpus >= req_cpus and total_mem >= req_mem:
                            total_cpus -= req_cpus
                            total_mem -= req_mem
                            allocation_cmd = buildstep.get_allocation_command(jobstep)

                            jobstep.status = Status.allocated
                            db.session.add(jobstep)

                            # We keep the data from the BuildStep to be sure we're using the same resource values.
                            to_allocate.append((jobstep, _AllocData(cpus=req_cpus,
                                                                    memory=req_mem,
                                                                    command=allocation_cmd)))
                            # The JobSteps returned are pending_allocation, and the initial state for a Mesos JobStep is
                            # pending_allocation, so we can determine how long it was pending by how long ago it was
                            # created.
                            pending_seconds = (datetime.utcnow() - jobstep.date_created).total_seconds()
                            statsreporter.stats().log_timing('duration_pending_allocation', pending_seconds * 1000)
                        else:
                            logging.info('Not allocating %s due to lack of offered resources', jobstep.id.hex)

                    if not to_allocate:
                        # Should 204, but flask/werkzeug throws StopIteration (bug!) for tests
                        return self.respond([])

                    db.session.flush()
            except UnableToGetLock:
                return error('Another allocation is in progress', http_code=503)

            context = []

            for jobstep, alloc_data in to_allocate:
                try:
                    jobstep_data = self.serialize(jobstep)

                    jobstep_data['project'] = self.serialize(jobstep.project)
                    jobstep_data['resources'] = {
                        'cpus': alloc_data.cpus,
                        'mem': alloc_data.memory,
                    }
                    jobstep_data['cmd'] = alloc_data.command
                except Exception:
                    jobstep.status = Status.finished
                    jobstep.result = Result.infra_failed
                    db.session.add(jobstep)
                    db.session.flush()

                    logging.exception(
                        'Exception occurred while allocating job step %s for project %s',
                        jobstep.id.hex, jobstep.project.slug)
                else:
                    context.append(jobstep_data)

            return self.respond(context)
Example #19
0
    def post(self):
        args = json.loads(request.data)

        try:
            resources = args['resources']
        except KeyError:
            return error('Missing resources attribute')

        # cpu and mem as 0 are treated by changes-client
        # as having no enforced limit
        total_cpus = int(resources.get('cpus', 0))
        total_mem = int(resources.get('mem', 0))  # MB

        with statsreporter.stats().timer('jobstep_allocate'):
            try:
                with redis.lock('jobstep:allocate', nowait=True):
                    available_allocations = self.find_next_jobsteps(limit=10)
                    to_allocate = []
                    for jobstep in available_allocations:
                        req_cpus = jobstep.data.get('cpus', 4)
                        req_mem = jobstep.data.get('mem', 8 * 1024)

                        if total_cpus >= req_cpus and total_mem >= req_mem:
                            total_cpus -= req_cpus
                            total_mem -= req_mem

                            jobstep.status = Status.allocated
                            db.session.add(jobstep)

                            to_allocate.append(jobstep)
                            # The JobSteps returned are pending_allocation, and the initial state for a Mesos JobStep is
                            # pending_allocation, so we can determine how long it was pending by how long ago it was
                            # created.
                            pending_seconds = (datetime.utcnow() - jobstep.date_created).total_seconds()
                            statsreporter.stats().log_timing('duration_pending_allocation', pending_seconds * 1000)
                        else:
                            logging.info('Not allocating %s due to lack of offered resources', jobstep.id.hex)

                    if not to_allocate:
                        # Should 204, but flask/werkzeug throws StopIteration (bug!) for tests
                        return self.respond([])

                    db.session.flush()
            except UnableToGetLock:
                return error('Another allocation is in progress', http_code=503)

            context = []

            for jobstep, jobstep_data in zip(to_allocate, self.serialize(to_allocate)):
                try:
                    jobplan, buildstep = JobPlan.get_build_step_for_job(jobstep.job_id)

                    assert jobplan and buildstep

                    jobstep_data['project'] = self.serialize(jobstep.project)
                    jobstep_data['resources'] = {
                        'cpus': jobstep.data.get('cpus', 4),
                        'mem': jobstep.data.get('mem', 8 * 1024),
                    }
                    jobstep_data['cmd'] = buildstep.get_allocation_command(jobstep)
                except Exception:
                    jobstep.status = Status.finished
                    jobstep.result = Result.infra_failed
                    db.session.add(jobstep)
                    db.session.flush()

                    logging.exception(
                        'Exception occurred while allocating job step %s for project %s',
                        jobstep.id.hex, jobstep.project.slug)
                else:
                    context.append(jobstep_data)

            return self.respond(context)
Example #20
0
    def post(self, command_id):
        args = self.post_parser.parse_args()

        current_datetime = args.date or datetime.utcnow()

        # We need to lock this resource to ensure the command doesn't get expanded
        # twice in the time it's checking the attr + writing the updated value
        if args.output or args.status == 'finished':
            lock = redis.lock('expand:{}'.format(command_id),
                              expire=3,
                              nowait=True)
        else:
            lock = None

        if lock:
            lock.__enter__()

        try:
            command = Command.query.get(command_id)
            if command is None:
                return '', 404

            if command.status == Status.finished:
                return error("Command already marked as finished")

            if args.return_code is not None:
                command.return_code = args.return_code

            if args.status:
                command.status = Status[args.status]

                # if we've finished this job, lets ensure we have set date_finished
                if command.status == Status.finished and command.date_finished is None:
                    command.date_finished = current_datetime
                elif command.status != Status.finished and command.date_finished:
                    command.date_finished = None

                if command.status != Status.queued and command.date_started is None:
                    command.date_started = current_datetime
                elif command.status == Status.queued and command.date_started:
                    command.date_started = None

            db.session.add(command)
            db.session.flush()
        finally:
            if lock:
                lock.__exit__(None, None, None)

        if args.output or args.status == 'finished':
            expander_cls = self.get_expander(command.type)
            if expander_cls is not None:
                if not args.output:
                    db.session.rollback()
                    return error("Missing output for command of type %s" %
                                 command.type)

                expander = expander_cls(
                    project=command.jobstep.project,
                    data=args.output,
                )

                try:
                    expander.validate()
                except AssertionError as e:
                    db.session.rollback()
                    return error('%s' % e)
                except Exception:
                    db.session.rollback()
                    return '', 500

                self.expand_command(command, expander, args.output)

        db.session.commit()

        return self.respond(command)