Ejemplo n.º 1
0
 def test_busy_label_no_match(self):
     result = util.match_node(BecauseLabelIsBusy % 'x86_64')
     assert result is None
Ejemplo n.º 2
0
 def test_single_nodelabel_is_offline(self):
     result = util.match_node(BecauseNodeLabelIsOffline % 'debian')
     assert result == 'wheezy'
Ejemplo n.º 3
0
 def test_busy_node_no_match(self):
     result = util.match_node(BecauseNodeIsBusy % 'solaris')
     assert result is None
Ejemplo n.º 4
0
 def test_busy_label(self):
     result = util.match_node(BecauseLabelIsBusy % 'amd64')
     assert result == 'wheezy'
Ejemplo n.º 5
0
def check_queue():
    """
    Specifically checks for the status of the Jenkins queue. The docs are
    sparse here, but
    ``jenkins/core/src/main/resources/hudson/model/queue/CauseOfBlockage`` has
    the specific reasons this check needs:

    * *BecauseLabelIsBusy* Waiting for next available executor on {0}

    * *BecauseLabelIsOffline* All nodes of label \u2018{0}\u2019 are offline

    * *BecauseNodeIsBusy* Waiting for next available executor on {0}

    * *BecauseNodeLabelIsOffline* There are no nodes with the label \u2018{0}\u2019

    * *BecauseNodeIsOffline* {0} is offline

    The distinction is the need for a label or a node. In the case of a node,
    it will get matched directly to the nodes in the configuration, in case of a label
    it will go through the configured nodes and pick the first matched to its labels.

    Label needed example
    --------------------
    Jenkins queue reports that 'All nodes of label x86_64 are offline'. The
    configuration has::

        nodes: {
            'centos6': {
                ...
                'labels': ['x86_64', 'centos', 'centos6']
            }
        }

    Since 'x86_64' exists in the 'centos6' configured node, it will be sent off
    to create that one.

    Node needed example
    -------------------
    Jenkins reports that 'wheezy is offline'. The configuration has a few
    labels configured::

        nodes: {
            'wheezy': {
                ...
            }
            'centos6': {
                ...
            }
        }

    Since the key 'wheezy' matches the node required by the build system to
    continue it goes off to create it.
    """
    jenkins_url = pecan.conf.jenkins['url']
    jenkins_user = pecan.conf.jenkins['user']
    jenkins_token = pecan.conf.jenkins['token']
    conn = jenkins.Jenkins(jenkins_url, jenkins_user, jenkins_token)
    result = conn.get_queue_info()

    if result:
        for task in result:
            if task['why'] is None:
                # this may happen when multiple tasks are getting pilled up (for a PR for example)
                # and there is no 'why' yet. So the API has a `None` for it which would break logic
                # to infer what is needed to get it unstuck
                continue
            if util.is_stuck(task['why']):
                logger.info('found stuck task with name: %s' % task['task']['name'])
                logger.info('reason was: %s' % task['why'])
                node_name = util.match_node(task['why'])
                if not node_name:
                    logger.warning('unable to match a suitable node')
                    logger.warning('will infer from builtOn')
                    job_name = task['task']['url'].split('job')[-1].split('/')[1]
                    job_id = conn.get_job_info(job_name)['nextBuildNumber']-1
                    logger.info('determined job name as: %s' % job_name)
                    logger.info('will look for build info on: %s id: %s' % (job_name, job_id))
                    try:
                        build = conn.get_build_info(job_name, job_id)
                    except jenkins.NotFoundException:
                        logger.warning('there are no builds available for job')
                        continue
                    logger.info('found a build')
                    node_name = util.from_offline_executor(build['builtOn'])
                    if not node_name:
                        logger.warning('completely unable to match a node to provide')
                        logger.warning(str(build))
                        continue
                logger.info('inferred node as: %s' % str(node_name))
                if node_name:
                    logger.info('matched a node name to config: %s' % node_name)
                    # TODO: this should talk to the pecan app over HTTP using
                    # the `app.conf.pecan_app` configuration entry, and then follow this logic:
                    # * got asked to create a new node -> check for an entry in the DB for a node that
                    # matches the characteristics of it.
                    #  * if there is one already:
                    #    - check that if it has been running for more than N (configurable) minutes (defaults to 8):
                    #      * if it has, it means that it is probable busy already, so:
                    #        - create a new node in the cloud backend matching the characteristics needed
                    #      * if it hasn't, it means that it is still getting provisioned so:
                    #        - skip - do a log warning
                    #  * if there is more than one, and it has been more than
                    #    N (8) minutes since they got launched it is possible
                    #    that they are configured *incorrectly* and we should not
                    #    keep launching more, so log the warning and skip.
                    #    - now ask Jenkins about machines that have been idle
                    #      for N (configurable) minutes, and see if matches
                    #      a record in the DB for the characteristics that we are
                    #      being asked to create.
                    #      * if found/matched:
                    #        - log the warnings again, something is not working right.
                    node_endpoint = get_mita_api('nodes')
                    configured_node = pecan.conf.nodes[node_name]
                    configured_node['name'] = node_name
                    requests.post(node_endpoint, data=json.dumps(configured_node))
                else:
                    logger.warning('could not match a node name to config for labels')
            else:
                logger.info('no tasks where fund in "stuck" state')
    elif result == []:
        logger.info('the Jenkins queue is empty, nothing to do')
    else:
        logger.warning('attempted to get queue info but got: %s' % result)
Ejemplo n.º 6
0
 def test_busy_node(self):
     result = util.match_node(BecauseNodeIsBusy % 'wheezy')
     assert result == 'wheezy'
Ejemplo n.º 7
0
 def test_multi_nodelabel_is_offline_no_match(self):
     result = util.match_node(BecauseNodeLabelIsOffline % 'fast&&debian&&amd64')
     assert result is None
Ejemplo n.º 8
0
 def test_busy_node(self):
     result = util.match_node(BecauseNodeIsBusy % 'wheezy')
     assert result == 'wheezy'
Ejemplo n.º 9
0
 def test_single_nodelabel_is_offline(self):
     result = util.match_node(BecauseNodeLabelIsOffline % 'debian')
     assert result == 'wheezy'
Ejemplo n.º 10
0
 def test_multi_nodelabel_is_offline(self):
     result = util.match_node(BecauseNodeLabelIsOffline % 'debian&&amd64')
     assert result == 'wheezy'
Ejemplo n.º 11
0
 def test_busy_label_no_match(self):
     result = util.match_node(BecauseLabelIsBusy % 'x86_64')
     assert result is None
Ejemplo n.º 12
0
 def test_busy_label(self):
     result = util.match_node(BecauseLabelIsBusy % 'amd64')
     assert result == 'wheezy'
Ejemplo n.º 13
0
 def test_busy_node_no_match(self):
     result = util.match_node(BecauseNodeIsBusy % 'solaris')
     assert result is None
Ejemplo n.º 14
0
 def test_multi_nodelabel_is_offline(self):
     result = util.match_node(BecauseNodeLabelIsOffline % 'debian&&amd64')
     assert result == 'wheezy'
Ejemplo n.º 15
0
Archivo: async.py Proyecto: dmick/mita
def check_queue():
    """
    Specifically checks for the status of the Jenkins queue. The docs are
    sparse here, but
    ``jenkins/core/src/main/resources/hudson/model/queue/CauseOfBlockage`` has
    the specific reasons this check needs:

    * *BecauseLabelIsBusy* Waiting for next available executor on {0}

    * *BecauseLabelIsOffline* All nodes of label \u2018{0}\u2019 are offline

    * *BecauseNodeIsBusy* Waiting for next available executor on {0}

    * *BecauseNodeLabelIsOffline* There are no nodes with the label \u2018{0}\u2019

    * *BecauseNodeIsOffline* {0} is offline

    The distinction is the need for a label or a node. In the case of a node,
    it will get matched directly to the nodes in the configuration, in case of a label
    it will go through the configured nodes and pick the first matched to its labels.

    Label needed example
    --------------------
    Jenkins queue reports that 'All nodes of label x86_64 are offline'. The
    configuration has::

        nodes: {
            'centos6': {
                ...
                'labels': ['x86_64', 'centos', 'centos6']
            }
        }

    Since 'x86_64' exists in the 'centos6' configured node, it will be sent off
    to create that one.

    Node needed example
    -------------------
    Jenkins reports that 'wheezy is offline'. The configuration has a few
    labels configured::

        nodes: {
            'wheezy': {
                ...
            }
            'centos6': {
                ...
            }
        }

    Since the key 'wheezy' matches the node required by the build system to
    continue it goes off to create it.
    """
    jenkins_url = pecan.conf.jenkins['url']
    jenkins_user = pecan.conf.jenkins['user']
    jenkins_token = pecan.conf.jenkins['token']
    conn = jenkins.Jenkins(jenkins_url, jenkins_user, jenkins_token)
    result = conn.get_queue_info()

    if result:
        for task in result:
            if task['why'] is None:
                # this may happen when multiple tasks are getting pilled up (for a PR for example) 
                # and there is no 'why' yet. So the API has a `None` for it which would break logic
                # to infer what is needed to get it unstuck
                continue
            if util.is_stuck(task['why']):
                logger.info('found stuck task with name: %s' % task['task']['name'])
                logger.info('reason was: %s' % task['why'])
                node_name = util.match_node(task['why'])
                if not node_name:
                    logger.warning('unable to match a suitable node')
                    logger.warning('will infer from builtOn')
                    job_name = task['task']['url'].split('job')[-1].split('/')[1]
                    job_id = conn.get_job_info(job_name)['nextBuildNumber']-1
                    logger.info('determined job name as: %s' % job_name)
                    logger.info('will look for build info on: %s id: %s' % (job_name, job_id))
                    try:
                        build = conn.get_build_info(job_name, job_id)
                    except jenkins.NotFoundException:
                        logger.warning('there are no builds available for job')
                        continue
                    logger.info('found a build')
                    node_name = util.from_offline_executor(build['builtOn'])
                    if not node_name:
                        logger.warning('completely unable to match a node to provide')
                        logger.warning(str(build))
                        continue
                logger.info('inferred node as: %s' % str(node_name))
                if node_name:
                    logger.info('matched a node name to config: %s' % node_name)
                    # TODO: this should talk to the pecan app over HTTP using
                    # the `app.conf.pecan_app` configuration entry, and then follow this logic:
                    # * got asked to create a new node -> check for an entry in the DB for a node that
                    # matches the characteristics of it.
                    #  * if there is one already:
                    #    - check that if it has been running for more than N (configurable) minutes (defaults to 8):
                    #      * if it has, it means that it is probable busy already, so:
                    #        - create a new node in the cloud backend matching the characteristics needed
                    #      * if it hasn't, it means that it is still getting provisioned so:
                    #        - skip - do a log warning
                    #  * if there is more than one, and it has been more than
                    #    N (8) minutes since they got launched it is possible
                    #    that they are configured *incorrectly* and we should not
                    #    keep launching more, so log the warning and skip.
                    #    - now ask Jenkins about machines that have been idle
                    #      for N (configurable) minutes, and see if matches
                    #      a record in the DB for the characteristics that we are
                    #      being asked to create.
                    #      * if found/matched:
                    #        - log the warnings again, something is not working right.
                    node_endpoint = get_mita_api('nodes')
                    configured_node = pecan.conf.nodes[node_name]
                    configured_node['name'] = node_name
                    requests.post(node_endpoint, data=json.dumps(configured_node))
                else:
                    logger.warning('could not match a node name to config for labels')
            else:
                logger.info('no tasks where fund in "stuck" state')
    elif result == []:
        logger.info('the Jenkins queue is empty, nothing to do')
    else:
        logger.warning('attempted to get queue info but got: %s' % result)
Ejemplo n.º 16
0
 def test_multi_nodelabel_is_offline_no_match(self):
     result = util.match_node(BecauseNodeLabelIsOffline %
                              'fast&&debian&&amd64')
     assert result is None
Ejemplo n.º 17
0
 def test_busy_label(self):
     result = util.match_node(BecauseLabelIsBusy % "amd64")
     assert result == "wheezy"