Beispiel #1
0
 def disable_node(self, fqdn):
     '''set a node's ``enabled`` flag to false'''
     for row in self.db.view('_design/'+settings.project_name+'/_view/slaves_by_hostname', key=fqdn):
         log.write('Disabling node %s' % fqdn)
         node = self.db[row.id]
         node['enabled'] = False
         self.db.save(node)
Beispiel #2
0
 def disable_node(self, fqdn):
     '''set a node's ``enabled`` flag to false'''
     for row in self.db.view('_design/%s/_view/nodes_by_hostname' % settings.project_name, key=fqdn):
         log.write('Disabling node %s' % fqdn)
         try:
             node = self.db[row.id]
             node['enabled'] = False
             self.db.save(node)
         except couchdb.http.ResourceNotFound:
             # already gone?
             pass
Beispiel #3
0
def node_recon(nodelist, interactive=True):
    """grab system information from a list of hosts and create or update
    nodes' db entries.
    """
    import execnet
    from dirt.tasks import system_info
    from dirt.core.db import db

    nodes = db.get_nodes()
    for node in nodelist:
        log.write("Connecting to host %s" % node)
        try:
            gw = execnet.makegateway("ssh=%s" % node)
        except execnet.HostNotFound:
            log.write("Host not found: %s" % node)
            continue
        log.write("Connected to host %s" % node)

        ch = gw.remote_exec(system_info)
        sys_info = ch.receive()

        # update the db
        if sys_info["fqdn"] in nodes:
            d = nodes[sys_info["fqdn"]]
            d["sys_info"] = sys_info
            d["enabled"] = True
        else:
            d = {"type": "node", "fqdn": sys_info["fqdn"], "sys_info": sys_info}
            log.write("Adding new node %(fqdn)s to database" % d)
            d["enabled"] = settings.node_enable_default
        db.save(d)
Beispiel #4
0
    def push_results(self, results, id, node_id):
        '''update task document with results'''
        node = self.db[node_id]
        node['active'] = False
        self.db.save(node)
        try:
            # upload attachments
            doc = self.db[id]
            if 'attachments' in results:
                for attachment in results['attachments']:
                    self.db.put_attachment(doc, attachment['contents'], filename=attachment['filename'])
                    log.write('Task %s: file %s attached' % (id, attachment['filename']))

            doc = self.db[id]
            doc['results'] = results
            doc['completed'] = time.time()
            if 'attachments' in results:
                # if a link name is specified, put a link next to results on the web page
                for attachment in results['attachments']:
                    if 'link_name' in attachment:
                        if not 'attach_links' in doc['results']:
                            doc['results']['attach_links'] = []
                        doc['results']['attach_links'].append({'id': attachment['filename'], 'name': attachment['link_name']})
                del doc['results']['attachments']
            self.db.save(doc)
            log.write('Task %s pushed to db' % id)

            # email notification for failed test
            if results['success'] == False and len(settings.notify_list) > 0:
                doctype = 'task'
                if 'kwargs' in doc and 'testname' in doc['kwargs']:
                    doctype = doc['kwargs']['testname']
                else:
                    doctype = doc['name']
                reason = 'n/a'
                if 'reason' in results:
                    reason = results['reason']
                message = '''An automated build test run by the %s server on host %s failed.\n\nType: %s\nDocument ID: %s\nNode: %s\nReason: %s\n\nThis is an automated email. Please do not reply.''' % (settings.project_name, socket.getfqdn(), doctype, id, node['fqdn'], reason)
                dirt.core.yelling.email(settings.notify_list, '[%s] task failure' % settings.project_name, message)

        except couchdb.ResourceNotFound:
            log.write('Cannot push results to db, document %s not found.' % id)
        except KeyError as key:
            log.write('Cannot push results to db, %s key missing in document %s' % (key, id))
            raise
        except IndexError:
            log.write('Cannot push results to db, invalid task id %i for document %s' % (taskid, id))
Beispiel #5
0
def serve_forever():
    '''when new tasks show up in the database, pair them up with the next
    available node for execution.
    '''
    signal.signal(signal.SIGINT, signal_handler)
    log.write('dirt is running...')

    nodes = settings.load_balancer(db)
    tasks = db.get_tasks()

    for id in tasks:
        task_status = 'new'
        while task_status == 'new' or task_status == 'retry':
            node = nodes.next()
            if dbi.check_requirements(db, id, node):
                log.write('%s -> %s' % (id, node['fqdn']))
                task_status = remote.remote_execute(db, node, id)
                if task_status == 'abort':
                    log.write('Task %s aborted' % id)
            else:
                doc = db[id]
                if 'nodes_tried' in doc:
                    doc['nodes_tried'].append(node['fqdn'])
                else:
                    doc['nodes_tried'] = [node['fqdn']]
                db.save(doc)
                task_status = 'abort'
Beispiel #6
0
def signal_handler(signal, frame):
    '''handle SIGINTs gracefully, clearing running tasks from the db'''
    log.write('Caught SIGINT (Ctrl-C), Exiting.')

    # clear any currently-running tasks from db
    log.write('Clearing running tasks from database')
    nodes = db.get_nodes()
    for node in nodes:
        if 'alloc' in nodes[node]:
            for i in range(len(nodes[node]['alloc'])):
                # only clear tasks from this master
                alloc = nodes[node]['alloc'][i]
                if alloc['master'] == socket.getfqdn():
                    doc = db[alloc['task']]
                    if 'started' in doc:
                        del doc['started']
                    if 'node' in doc:
                        del doc['node']
                    db.save(doc)
                    node_doc = db[nodes[node]['_id']]
                    node_doc['alloc'].pop(i)
                    db.save(node_doc)
    sys.exit(0)
Beispiel #7
0
def remote_execute(db, node, id):
    '''start a task on a remote host via ``execnet`` and set task start time
    and node hostname in the database. we first run the ``ping`` task to
    ensure the node is alive, and if that fails disable it in the db.
    '''
    import time
    import execnet
    hostname = node['fqdn']
    try:
        # first, check if node is alive
        node['active'] = True
        node_id = node['_id']
        db.save(node)
        ping_module = __import__('dirt.tasks.ping', fromlist=['dirt.tasks'])
        gw = execnet.makegateway('ssh=%s' % hostname)
        ch = gw.remote_exec(ping_module)
        if ch.receive():
            try:
                doc = db[id]
                taskname = doc['name']
                task_module = __import__('tasks.%s' % taskname, fromlist=['tasks'])
                ch = gw.remote_exec(task_module)
                # send keyword arguments to remote process
                if 'kwargs' in doc:
                    ch.send(doc['kwargs'])
                doc['started'] = time.time()
                doc['slave'] = hostname
                db.db.save(doc)
                # use lambda to provide arguments to callback
                push_args = {'id': id, 'node_id': node_id}
                ch.setcallback(callback = lambda(results): db.push_results(results, **push_args))
            except ImportError:
                log.write('Task %s not found' % taskname)
                # node disengaged
                node = db[node_id]
                node['active'] = False
                db.save(node)
                # update doc
                doc = db[id]
                doc['started'] = doc['completed'] = time.time()
                doc['results'] = {'success': False, 'reason': 'task module not found'}
                db.save(doc)
                return 'abort'
        else:
            log.write('Error connecting with host %s' % hostname)
            db.disable_node(hostname)
            return 'retry'
    except execnet.HostNotFound:
        log.write('Host %s not responding' % hostname)
        db.disable_node(hostname)
        return 'retry'
    return 'executed'
Beispiel #8
0
 def __init__(self, host, dbname):
     couch = couchdb.Server(host)
     try:
         try:
             if couch.version() < '1.1.0':
                 log.write('Error: couchdb version >= 1.1.0 required')
                 sys.exit(1)
             self.db = couch[dbname]
         except couchdb.http.Unauthorized:
             print 'Authentication required for CouchDB database at', host + '/' + dbname
             couch.resource.credentials = (raw_input('Username: '******'Password: '******'1.1.0':
                 log.write('Error: couchdb version >= 1.1.0 required')
                 sys.exit(1)
             self.db = couch[dbname]
         log.write('Connected to db at %s/%s' % (host, dbname))
     except Exception:
         log.write('Error connecting to database')
         raise
         sys.exit(1)
Beispiel #9
0
def node_recon(nodelist, db, interactive=True):
    '''grab system information from a list of hosts and create or update
    slave nodes' db entries.
    '''
    import execnet
    from dirt.tasks import system_info
    nodes = db.get_nodes()
    for node in nodelist:
        log.write('Connecting to host %s' % node)
        try:
            gw = execnet.makegateway('ssh=%s' % node)
        except execnet.HostNotFound:
            log.write('Host not found: %s' % node)
            continue
        log.write('Connected to host %s' % node)

        ch = gw.remote_exec(system_info)
        sys_info = ch.receive()

        # update the db
        if sys_info['fqdn'] in nodes:
            d = nodes[sys_info['fqdn']]
            d['sys_info'] = sys_info
            d['enabled'] = True
        else:
            d = {'type': 'slave', 'fqdn': sys_info['fqdn'], 'sys_info': sys_info, 'active': False}
            log.write('Adding new node %(fqdn)s to database' % d)
            if interactive:
                enable = raw_input('Enable node? [True|False] ')
                if enable == 'True':
                    d['enabled'] = True
                else:
                    d['enabled'] = False
                pw = raw_input('Node password? ')
                d['password'] = pw
            else:
                d['enabled'] = node_enable_default
                d['password'] = node_password_default
        db.save(d)
Beispiel #10
0
def remote_execute(db, node, id):
    """start a task on a remote host via ``execnet`` and set task start time
    and node hostname in the database. we first run the ``ping`` task to
    ensure the node is alive, and if that fails disable it in the db.
    """
    import time
    import execnet
    import socket

    hostname = node["fqdn"]
    try:
        # store node's state in the db
        node_id = node["_id"]
        node = db[node_id]  # refresh
        alloc = {"master": socket.getfqdn(), "db": db.db.name, "task": id}
        if "alloc" in node:
            node["alloc"].append(alloc)
        else:
            node["alloc"] = [alloc]
        db.save(node)

        # check if node is alive then remote_exec the task module
        ping_module = __import__("dirt.tasks.ping", fromlist=["dirt.tasks"])
        gw = execnet.makegateway("ssh=%s" % hostname)
        ch = gw.remote_exec(ping_module)
        if ch.receive():
            try:
                doc = db[id]
                taskname = doc["name"]
                task_module = __import__("tasks.%s" % taskname, fromlist=["tasks"])
                ch = gw.remote_exec(task_module)

                # send keyword arguments to remote process
                if "kwargs" in doc:
                    ch.send(doc["kwargs"])

                doc["started"] = time.time()
                doc["node"] = hostname
                db.db.save(doc)

                # use lambda to provide arguments to callback
                push_args = {"id": id, "node_id": node_id, "gateway": gw}
                ch.setcallback(callback=lambda (results): db.push_results(results, **push_args))

            except ImportError:
                log.write("Task %s not found" % taskname)

                # node disengaged
                node = db[node_id]
                for alloc in range(len(node["alloc"])):
                    if node["alloc"][alloc]["task"] == id:
                        node["alloc"].pop(alloc)
                db.save(node)

                # update doc with failure
                doc = db[id]
                doc["started"] = doc["completed"] = time.time()
                doc["results"] = {"success": False, "reason": "task module %s not found" % taskname}
                db.save(doc)
                return "abort"
        else:
            log.write("Error connecting with host %s" % hostname)
            db.disable_node(hostname)
            return "retry"
    except execnet.HostNotFound:
        log.write("Host %s not responding" % hostname)
        db.disable_node(hostname)
        return "retry"
    return "executed"
Beispiel #11
0
    def push_results(self, results, id, node_id, gateway):
        '''update task document with results'''
        # node disengaged
        node = self.db[node_id]
        for alloc in range(len(node['alloc'])):
            if node['alloc'][alloc]['task'] == id:
                node['alloc'].pop(alloc)
                break
        self.db.save(node)

        # close remote connection
        gateway.exit()

        try:
            # upload attachments
            doc = self.db[id]
            if 'attachments' in results:
                for attachment in results['attachments']:
                    self.db.put_attachment(doc, attachment['contents'], filename=attachment['filename'])
                    log.write('Task %s: file %s attached' % (id, attachment['filename']))

            doc = self.db[id]
            doc['results'] = results
            doc['completed'] = time.time()
            if 'attachments' in results:
                # if a link name is specified, put a link next to results on the web page
                for attachment in results['attachments']:
                    if 'link_name' in attachment:
                        doc['results'].setdefault('attach_links', []).append({
                            'id': attachment['filename'],
                            'name': attachment['link_name']
                        })
                del doc['results']['attachments']
            self.db.save(doc)
            log.write('Task %s pushed to db' % id)

            # email notification for failed test
            if results['success'] == False and len(settings.notify_list) > 0:
                doctype = 'task'
                if 'kwargs' in doc and 'testname' in doc['kwargs']:
                    doctype = doc['kwargs']['testname']
                else:
                    doctype = doc['name']
                reason = 'n/a'
                if 'reason' in results:
                    reason = results['reason']
                message = '''An automated build test run by the %s server on host %s failed.\n\nType: %s\nRecord ID: %s\nDocument ID: %s\nNode: %s\nReason: %s\n\nThis is an automated email. Please do not reply.''' % (settings.project_name, socket.getfqdn(), doctype, doc['record_id'], id, node['fqdn'], reason)
                dirt.core.yelling.email(settings.notify_list, '[%s] task failure' % settings.project_name, message)

        except couchdb.ResourceNotFound:
            log.write('Cannot push results to db, document %s not found.' % id)
        except KeyError as key:
            log.write('Cannot push results to db, %s key missing in document %s' % (key, id))
            raise
        except IndexError:
            log.write('Cannot push results to db, invalid task id %i for document %s' % (taskid, id))

        # update status on github if all tests are done
        sha = doc['kwargs']['sha']
        overall_success = True
        all_finished = True
        reason = 'Unknown'
        for row in self.db.view('pytunia/tasks_by_record', startkey=[sha,1], endkey=[sha,1,{}], include_docs=True):
            if not 'completed' in row.value:
                all_finished = False
                break
            if 'results' in row.value and not row.value['results']['success']:
                overall_success = False
                reason = row.value['results']['reason']
                break

        if all_finished:
            try:
                github_oauth_token = settings.github_oauth_token
                user, repo = doc['kwargs']['git_url'].split(':')[1].split('/')
                repo = repo.split('.')[0]
                url = '/repos/%s/%s/statuses/%s' % (user, repo, sha)
                target_url = settings.results_base_url + sha
                if overall_success:
                    status = 'success'
                    description = 'Build %s passed' % sha[:7]
                else:
                    status = 'failure'
                    description = 'Build %s failed: %s' % (sha[:7], reason)

                conn = httplib.HTTPSConnection('api.github.com')
                params = {'access_token': github_oauth_token}
                data = {
                    'state': status,
                    'target_url': target_url,
                    'description': description
                }

                req = conn.request('POST', url + '?' + urllib.urlencode(params), json.dumps(data))
                resp = conn.getresponse()

                if resp.status != 201:
                    log.write('Error %i updating status on commit %s: %s' % resp.status, sha, resp.read())

            except NameError:
                pass