def test_url_from_a_matrix_job(self): url = u'https://jenkins.ceph.com/job/ceph-dev-build/ARCH=x86_64,AVAILABLE_ARCH=x86_64,AVAILABLE_DIST=xenial,DIST=xenial,MACHINE_SIZE=huge/' result = util.job_from_url(url) assert result == "ARCH=x86_64,AVAILABLE_ARCH=x86_64,AVAILABLE_DIST=xenial,DIST=xenial,MACHINE_SIZE=huge"
def test_url_with_job_in_the_name(self): url = u'https://jenkins.ceph.com/job/jenkins-job-builder' result = util.job_from_url(url) assert result == 'jenkins-job-builder'
def test_url_with_trailing_slash__gets_trimmed(self): url = u'https://jenkins.ceph.com/job/jenkins-job-builder/' result = util.job_from_url(url) assert result == 'jenkins-job-builder'
def check_queue(): """ Specifically checks for the status of the Jenkins queue. The docs are sparse here, but ``jenkins/core/src/main/resources/hudson/model/queue/CauseOfBlockage`` has the specific reasons this check needs: * *BecauseLabelIsBusy* Waiting for next available executor on {0} * *BecauseLabelIsOffline* All nodes of label \u2018{0}\u2019 are offline * *BecauseNodeIsBusy* Waiting for next available executor on {0} * *BecauseNodeLabelIsOffline* There are no nodes with the label \u2018{0}\u2019 * *BecauseNodeIsOffline* {0} is offline The distinction is the need for a label or a node. In the case of a node, it will get matched directly to the nodes in the configuration, in case of a label it will go through the configured nodes and pick the first matched to its labels. Label needed example -------------------- Jenkins queue reports that 'All nodes of label x86_64 are offline'. The configuration has:: nodes: { 'centos6': { ... 'labels': ['x86_64', 'centos', 'centos6'] } } Since 'x86_64' exists in the 'centos6' configured node, it will be sent off to create that one. Node needed example ------------------- Jenkins reports that 'wheezy is offline'. The configuration has a few labels configured:: nodes: { 'wheezy': { ... } 'centos6': { ... } } Since the key 'wheezy' matches the node required by the build system to continue it goes off to create it. """ jenkins_url = pecan.conf.jenkins['url'] jenkins_user = pecan.conf.jenkins['user'] jenkins_token = pecan.conf.jenkins['token'] conn = jenkins.Jenkins(jenkins_url, jenkins_user, jenkins_token) result = conn.get_queue_info() needed_nodes = {} if result: for task in result: if task['why'] is None: # this may happen when multiple tasks are getting pilled up (for a PR for example) # and there is no 'why' yet. So the API has a `None` for it which would break logic # to infer what is needed to get it unstuck continue if util.is_stuck(task['why']): logger.info('found stuck task with name: %s' % task['task']['name']) logger.info('reason was: %s' % task['why']) node_name = util.match_node(task['why']) if not node_name: # this usually happens when jenkins is waiting on an executor # on a static slave whose labels are not a subset of a # node in the mita config logger.warning('unable to match a suitable node') logger.warning('will infer from job labels') job_url = task['task']['url'] job_name = util.job_from_url(job_url) try: conn.get_job_info(job_name) logger.info("Job info found for: %s", job_name) node_name = util.match_node_from_job_config(job_url) except jenkins.JenkinsException: logger.warning('No job info found for: %s', job_name) logger.warning('Will assume the job is a matrix job') node_name = util.match_node_from_matrix_job_name( job_name) if not node_name: logger.warning( 'completely unable to match a node to provide') continue logger.info('inferred node as: %s' % str(node_name)) if node_name: logger.info('matched a node name to config: %s' % node_name) # TODO: this should talk to the pecan app over HTTP using # the `app.conf.pecan_app` configuration entry, and then follow this logic: # * got asked to create a new node -> check for an entry in the DB for a node that # matches the characteristics of it. # * if there is one already: # - check that if it has been running for more than N (configurable) minutes (defaults to 8): # * if it has, it means that it is probable busy already, so: # - create a new node in the cloud backend matching the characteristics needed # * if it hasn't, it means that it is still getting provisioned so: # - skip - do a log warning # * if there is more than one, and it has been more than # N (8) minutes since they got launched it is possible # that they are configured *incorrectly* and we should not # keep launching more, so log the warning and skip. # - now ask Jenkins about machines that have been idle # for N (configurable) minutes, and see if matches # a record in the DB for the characteristics that we are # being asked to create. # * if found/matched: # - log the warnings again, something is not working right. if needed_nodes.get(node_name): needed_nodes[node_name] += 1 else: needed_nodes[node_name] = 1 else: logger.warning( 'could not match a node name to config for labels') else: logger.info('no tasks where fund in "stuck" state') elif result == []: logger.info('the Jenkins queue is empty, nothing to do') else: logger.warning('attempted to get queue info but got: %s' % result) # At this point we might have a bag of nodes that we need to create, go over that # mapping and ask as many as Jenkins needs: node_endpoint = get_mita_api('nodes') for node_name, count in needed_nodes.items(): configured_node = pecan.conf.nodes[node_name] configured_node['name'] = node_name configured_node['count'] = count requests.post(node_endpoint, data=json.dumps(configured_node))
def check_queue(): """ Specifically checks for the status of the Jenkins queue. The docs are sparse here, but ``jenkins/core/src/main/resources/hudson/model/queue/CauseOfBlockage`` has the specific reasons this check needs: * *BecauseLabelIsBusy* Waiting for next available executor on {0} * *BecauseLabelIsOffline* All nodes of label \u2018{0}\u2019 are offline * *BecauseNodeIsBusy* Waiting for next available executor on {0} * *BecauseNodeLabelIsOffline* There are no nodes with the label \u2018{0}\u2019 * *BecauseNodeIsOffline* {0} is offline The distinction is the need for a label or a node. In the case of a node, it will get matched directly to the nodes in the configuration, in case of a label it will go through the configured nodes and pick the first matched to its labels. Label needed example -------------------- Jenkins queue reports that 'All nodes of label x86_64 are offline'. The configuration has:: nodes: { 'centos6': { ... 'labels': ['x86_64', 'centos', 'centos6'] } } Since 'x86_64' exists in the 'centos6' configured node, it will be sent off to create that one. Node needed example ------------------- Jenkins reports that 'wheezy is offline'. The configuration has a few labels configured:: nodes: { 'wheezy': { ... } 'centos6': { ... } } Since the key 'wheezy' matches the node required by the build system to continue it goes off to create it. """ jenkins_url = pecan.conf.jenkins['url'] jenkins_user = pecan.conf.jenkins['user'] jenkins_token = pecan.conf.jenkins['token'] conn = jenkins.Jenkins(jenkins_url, jenkins_user, jenkins_token) result = conn.get_queue_info() needed_nodes = {} if result: for task in result: if task['why'] is None: # this may happen when multiple tasks are getting pilled up (for a PR for example) # and there is no 'why' yet. So the API has a `None` for it which would break logic # to infer what is needed to get it unstuck continue if util.is_stuck(task['why']): logger.info('found stuck task with name: %s' % task['task']['name']) logger.info('reason was: %s' % task['why']) node_name = util.match_node(task['why']) if not node_name: # this usually happens when jenkins is waiting on an executor # on a static slave whose labels are not a subset of a # node in the mita config logger.warning('unable to match a suitable node') logger.warning('will infer from job labels') job_url = task['task']['url'] job_name = util.job_from_url(job_url) try: conn.get_job_info(job_name) logger.info("Job info found for: %s", job_name) node_name = util.match_node_from_job_config(job_url) except jenkins.NotFoundException: logger.warning('No job info found for: %s', job_name) logger.warning('Will assume the job is a matrix job') node_name = util.match_node_from_matrix_job_name(job_name) if not node_name: logger.warning('completely unable to match a node to provide') continue logger.info('inferred node as: %s' % str(node_name)) if node_name: logger.info('matched a node name to config: %s' % node_name) # TODO: this should talk to the pecan app over HTTP using # the `app.conf.pecan_app` configuration entry, and then follow this logic: # * got asked to create a new node -> check for an entry in the DB for a node that # matches the characteristics of it. # * if there is one already: # - check that if it has been running for more than N (configurable) minutes (defaults to 8): # * if it has, it means that it is probable busy already, so: # - create a new node in the cloud backend matching the characteristics needed # * if it hasn't, it means that it is still getting provisioned so: # - skip - do a log warning # * if there is more than one, and it has been more than # N (8) minutes since they got launched it is possible # that they are configured *incorrectly* and we should not # keep launching more, so log the warning and skip. # - now ask Jenkins about machines that have been idle # for N (configurable) minutes, and see if matches # a record in the DB for the characteristics that we are # being asked to create. # * if found/matched: # - log the warnings again, something is not working right. if needed_nodes.get(node_name): needed_nodes[node_name] += 1 else: needed_nodes[node_name] = 1 else: logger.warning('could not match a node name to config for labels') else: logger.info('no tasks where fund in "stuck" state') elif result == []: logger.info('the Jenkins queue is empty, nothing to do') else: logger.warning('attempted to get queue info but got: %s' % result) # At this point we might have a bag of nodes that we need to create, go over that # mapping and ask as many as Jenkins needs: node_endpoint = get_mita_api('nodes') for node_name, count in needed_nodes.items(): configured_node = pecan.conf.nodes[node_name] configured_node['name'] = node_name configured_node['count'] = count requests.post(node_endpoint, data=json.dumps(configured_node))