Ejemplo n.º 1
0
 def __init__(self, directory):
     super(Scheduler, self).__init__()
     self.directory = directory
     self.next_id = 1000
     self.event_queue = []
     self.have_events = Event()
     self.projects = {}
     self.job_queue = collections.deque()
     self.running = {}
     self.project_usage = {}
     self.blueprint_usage = {}
     self.total_vms = 0
     self.logger = logging.getLogger('ciserver.Scheduler')
     self.client = RavelloClient(self.api_url, self.api_user, self.api_password)
     self.load_projects()
     self.load_job_queue()
Ejemplo n.º 2
0
 def __init__(self, scheduler, job):
     super(JobRunner, self).__init__()
     self.scheduler = scheduler
     self.job = job
     self.client = RavelloClient(self.api_url, self.api_user, self.api_password)
     self.logger = logging.getLogger('ciserver.JobRunner')
     # Of course this could be made extensible to support multiple
     # controller types, repositories and result types.
     ctrl = self.job['environment']['controller']
     if ctrl['type'] != 'ssh':
         raise ValueError('Unkown controller type: %s' % ctrl['type'])
     repo = self.job['project']['repo']
     if repo['type'] != 'git':
         raise ValueError('Unknown repository type: %s' % repo['type'])
     results = self.job['project']['results']
     if results['type'] != 'github':
         raise ValueError('Unknown repository type: %s' % results['type'])
Ejemplo n.º 3
0
class JobRunner(Greenlet):

    def __init__(self, scheduler, job):
        super(JobRunner, self).__init__()
        self.scheduler = scheduler
        self.job = job
        self.client = RavelloClient(self.api_url, self.api_user, self.api_password)
        self.logger = logging.getLogger('ciserver.JobRunner')
        # Of course this could be made extensible to support multiple
        # controller types, repositories and result types.
        ctrl = self.job['environment']['controller']
        if ctrl['type'] != 'ssh':
            raise ValueError('Unkown controller type: %s' % ctrl['type'])
        repo = self.job['project']['repo']
        if repo['type'] != 'git':
            raise ValueError('Unknown repository type: %s' % repo['type'])
        results = self.job['project']['results']
        if results['type'] != 'github':
            raise ValueError('Unknown repository type: %s' % results['type'])

    @classmethod
    def set_api_parameters(cls, url, user, password):
        cls.api_url = url
        cls.api_user = user
        cls.api_password = password

    def provision_blueprint(self):
        """Provision a Blueprint. Return the application instance ID."""
        # We don't really deploy the new Blueprint. As a POC hack we look for
        # a running app instance that was created from the Blueprint that we're
        # interested in.
        name = self.job['environment']['blueprint']
        apps = self.client.get_applications_metadata()
        for app in apps:
            if app.blueprintName == name:
                break
        else:
            raise JobFailed('Blueprint not found: %s' % name)
        self.appid = app.id

    def run_tests(self):
        """Run the tests via the controller."""
        jobid = self.job['id']
        logger = self.logger
        ctrl = self.job['environment']['controller']
        vms = self.client.meta_vms(self.appid).vms
        for vm in vms:
            if vm.name.lower() == ctrl['host'].lower():
                break
        else:
            raise JobFailed('Controller VM not found: %s' % name)
        host = vm.vmDynamicMD.fullyQualifiedDomainName
        repo = self.job['project']['repo']
        logger.debug('[job %s] Running tests via "ssh" controller', jobid)
        logger.debug('[job %s] Controller node = %s', jobid, host)
        ssh = winpexpect.spawn('ssh-agent sh', timeout=30)
        try:
            # Forward the repository key.
            keyfile = os.path.join(self.scheduler.directory, 'keys', repo['key'])
            ssh.expect('[$#]')
            ssh.send('ssh-add %s\n' % keyfile)
            ssh.expect('[$#]')
            ssh.send('ssh -A ravello@%s\n' % host)
            # Install a more distinctive prompt that hopefully does not occur
            # in the output of any command we run.
            ssh.expect('[#$]')
            prompt = 'CITestHost: '
            ssh.send('PS1="%s"\n' % prompt)
            ssh.expect(prompt)  # echo
            ssh.expect(prompt)  # prompt
            # Check out the commit in a temporary directory
            url = repo['url']
            parsed = urlparse.urlsplit(url)
            #repodir = parsed.path.rstrip('/').split('/')[-1].rstrip('.git')
            logger.debug('[job %s] cloning source code from %s', jobid, url)
            dirname = os.urandom(8).encode('hex')
            ssh.send('mkdir %s\n' % dirname)
            ssh.expect(prompt)
            ssh.send('cd %s\n' % dirname)
            ssh.expect(prompt)
            ssh.send('git clone %s\n' % url)
            ssh.expect(prompt)
            ssh.send('cd *\n')
            ssh.expect(prompt)
            commit = self.job['commit']['id']
            logger.debug('[job %s] checkout commit %s', jobid, commit)
            ssh.send('git checkout %s\n' % commit)
            ssh.expect(prompt)
            # And run the tests!
            command = ctrl['command']
            logger.debug('[job %s] running test command "%s"', jobid, command)
            ssh.send('%s\n' % command)
            ssh.settimeout(600)
            ssh.expect('\r\n')  # echo
            ssh.expect(prompt)
            output = ssh.before[:ssh.before.rfind('\r\n')]  # strip prompt
            ssh.settimeout(30)
            ssh.send('echo $?\n')
            ssh.expect('\r\n')  # echo
            ssh.expect('\r\n')  # end of output
            status = int(ssh.before)
            ssh.expect(prompt)  # end of output
            ssh.send('cd ../..\n')
            ssh.expect(prompt)
            ssh.send('rm -rf %s\n' % dirname)
            ssh.expect(prompt)
            ssh.send(ssh.cchar('VEOF'))  # exit ssh
            ssh.expect('[$#]')
            ssh.send(ssh.cchar('VEOF'))  # exit ssh-agent
            ssh.wait(10)
        except (TIMEOUT, EOF):
            logger.debug('[job %s] failed to run the test', jobid)
            ssh.terminate()
            raise JobFailed('Failed to run test job')
        logger.debug('[job %s] test return code: %s', jobid, status)
        self.status = status
        self.output = output

    commit_ok = 'This commit passed all tests in environment "%(environment)s".'
    issue_title = 'CI Test Error for commit %(commit)s'
    issue_body = textwrap.dedent("""\
        Test suite error for commit %(commit)s for environment "%(environment)s".

        The output of the test suite is:

        %(output)s
        """)

    def publish_results(self):
        """Store the output of the test job."""
        env = self.job['environment']
        commit = self.job['commit']
        results = self.job['project']['results']
        subst = { 'commit': commit['id'], 'environment': env['blueprint'] }
        subst['output'] = '    ' + self.output.replace('\r\n', '\r\n    ')
        client = GithubClient(results['username'], results['repository'],
                              results['token'])
        client.connect()
        if self.status == 0:
            message = self.commit_ok % subst
            client.add_comment_to_commit(commit['id'], message)
        else:
            title = self.issue_title % subst
            body = self.issue_body % subst
            client.add_issue(title, body)
        client.close()
 
    def _run(self):
        logger = self.logger
        jobid = self.job['id']
        logger.debug('[job %s] Running tests for project %s', jobid,
                     self.job['project']['name'])
        try:
            self.provision_blueprint()
            self.run_tests()
            self.publish_results()
        except JobFailed as e:
            self.logger.debug('[job %s] Failed with: %s', jobid, e[0])
            self.scheduler.job_done(self.job, 'FAILED', e[0], e[1])
        except Exception as e:
            lines = ['An uncaught exception occurred\n']
            lines += traceback.format_exception(*sys.exc_info())
            detail = ''.join(lines)
            self.logger.debug('[job %s] Uncaught exception', jobid)
            self.logger.debug(detail)
            self.scheduler.job_done(self.job, 'FAILED', 'Uncaught exception', detail)
        else:
            self.logger.debug('[job %s] Completed successfully', jobid)
            self.scheduler.job_done(self.job, 'OK', 'Job completed successfully')
Ejemplo n.º 4
0
class Scheduler(Greenlet):
    """Job scheduler."""

    # Keep max_instances_per_blueprint to 1 until the "do not really
    # provision blueprint" hack/optimization is removed.
    max_vms_per_project = 10
    max_total_vms = 100
    max_instances_per_blueprint = 1

    def __init__(self, directory):
        super(Scheduler, self).__init__()
        self.directory = directory
        self.next_id = 1000
        self.event_queue = []
        self.have_events = Event()
        self.projects = {}
        self.job_queue = collections.deque()
        self.running = {}
        self.project_usage = {}
        self.blueprint_usage = {}
        self.total_vms = 0
        self.logger = logging.getLogger('ciserver.Scheduler')
        self.client = RavelloClient(self.api_url, self.api_user, self.api_password)
        self.load_projects()
        self.load_job_queue()

    @classmethod
    def set_api_parameters(cls, url, user, password):
        cls.api_url = url
        cls.api_user = user
        cls.api_password = password

    def load_projects(self):
        dirname = os.path.join(self.directory, 'projects')
        for fname in os.listdir(dirname):
            if not fname.endswith('.js'):
                continue
            absname = os.path.join(dirname, fname)
            try:
                project = Project.load(absname)
            except (IOError, TypeError, ValueError):
                self.logger.debug('Could not load project: %s', fname)
                continue
            self.projects[project['name']] = project
        self.logger.debug('Loaded %s projects', len(self.projects))

    def load_job_queue(self):
        dirname = os.path.join(self.directory, 'jobs')
        for fname in sorted(os.listdir(dirname)):
            if not fname.endswith('.js'):
                continue
            absname = os.path.join(dirname, fname)
            try:
                job = TestJob.load(absname)
            except (IOError, TypeError, ValueError):
                self.logger.debug('Could not load job: %s', fname)
                continue
            if job['status'] == 'NEW':
                self.job_queue.append(job)
            self.next_id = max(self.next_id, job['id']+1)
        self.logger.debug('Loaded %s jobs', len(self.job_queue))

    def schedule_run(self, project, request):
        if project not in self.projects:
            return False
        project = self.projects[project]
        for env in project['environments']:
            for commit in request['commits']:
                job = TestJob()
                job['project'] = project
                job['request'] = request
                job['environment'] = env
                job['commit'] = commit
                self.add_job(job)
        return True

    def add_job(self, job):
        self.event_queue.append(('AddJob', (job,)))
        self.have_events.set()

    def job_done(self, job, result, message, detail=''):
        self.event_queue.append(('JobDone', (job, result, message, detail)))
        self.have_events.set()

    def _job_filename(self, job):
        return os.path.join(self.directory, 'jobs', '%010d.js' % job['id'])

    def _get_blueprint_allocation(self, name):
        """Return the VMs for a blueprint."""
        apps = self.client.get_applications_metadata()
        for app in apps:
            if app.blueprintName == name:
                return app.numStartedVms
        else:
            return  -1

    def _process_events(self):
        logger = self.logger
        while self.event_queue:
            event, args = self.event_queue.pop(0)
            logger.debug('Handling event %s', event)
            if event == 'AddJob':
                job = args[0]
                job['id'] = self.next_id; self.next_id += 1
                vms = self._get_blueprint_allocation(job['environment']['blueprint'])
                if vms == -1:
                    logger.error('Could not add job %s', job['id'])
                    continue
                job['vms'] = vms
                job['status'] = 'NEW'
                fname = self._job_filename(job)
                try:
                    job.save(fname)
                except IOError:
                    logger.error('Could not add job %s', job['id'])
                    continue
                self.job_queue.append(job)
            elif event == 'JobDone':
                job, result, message, detail = args
                job['status'] = 'DONE'
                job['result'] = { 'result': result, 'message': message, 'detail': detail }
                fname = self._job_filename(job)
                try:
                    job.save(fname)
                except IOError:
                    logger.error('Could not update job %s', job['id'])
                assert job['id'] in self.running
                del self.running[job['id']]
                self.project_usage[job['project']['name']] -= job['vms']
                self.blueprint_usage[job['environment']['blueprint']] -= 1
        logger.debug('Done processing events')

    def _run_jobs(self):
        """A very simple scheduler that can enforce per project caps
        on VMs and blueprints, and a cap on total VMs.
        """
        logger = self.logger
        while True:
            if not self.job_queue:
                logger.debug('Job queue empty')
                break
            logger.debug('Trying to schedule job from job queue')
            # Find the first job whose project is below its cap and
            # whose blueprint is runnable.
            for ix,job in enumerate(self.job_queue):
                project = job['project']['name']
                if self.project_usage.get(project, 0) + job['vms'] \
                            > self.max_vms_per_project:
                    continue
                blueprint = job['environment']['blueprint']
                if self.blueprint_usage.get(blueprint, 0) + 1 \
                            > self.max_instances_per_blueprint:
                    continue
                break
            # If we are running againt the global cap just wait. Don't be
            # smart and try to run other vms as that could starve the current
            # candidate.
            logger.debug('Job candidate: %s', job['id'])
            if self.total_vms + job['vms'] > self.max_total_vms:
                logger.debug('Would run over global cap, not any jobs')
                break
            logger.debug('Still below global cap, running job %s', job['id'])
            # Update allocations
            if project not in self.project_usage:
                self.project_usage[project] = 0
            self.project_usage[project] += job['vms']
            if blueprint not in self.blueprint_usage:
                self.blueprint_usage[blueprint] = 0
            self.blueprint_usage[blueprint] += 1
            self.total_vms += job['vms']
            del self.job_queue[ix]
            self.running[job['id']] = job
            # And finally run the job
            runner = JobRunner(self, job)
            runner.start()
            logger.debug('Created JobRunner to run job')

    def _run(self):
        self.have_events.set()
        self.logger.debug('Entering _run() loop')
        while True:
            self.have_events.wait()
            self.have_events.clear()
            self._process_events()
            self._run_jobs()
        self.logger.debug('_run() loop exited')