def disable_node(self, fqdn): '''set a node's ``enabled`` flag to false''' for row in self.db.view('_design/'+settings.project_name+'/_view/slaves_by_hostname', key=fqdn): log.write('Disabling node %s' % fqdn) node = self.db[row.id] node['enabled'] = False self.db.save(node)
def disable_node(self, fqdn): '''set a node's ``enabled`` flag to false''' for row in self.db.view('_design/%s/_view/nodes_by_hostname' % settings.project_name, key=fqdn): log.write('Disabling node %s' % fqdn) try: node = self.db[row.id] node['enabled'] = False self.db.save(node) except couchdb.http.ResourceNotFound: # already gone? pass
def node_recon(nodelist, interactive=True): """grab system information from a list of hosts and create or update nodes' db entries. """ import execnet from dirt.tasks import system_info from dirt.core.db import db nodes = db.get_nodes() for node in nodelist: log.write("Connecting to host %s" % node) try: gw = execnet.makegateway("ssh=%s" % node) except execnet.HostNotFound: log.write("Host not found: %s" % node) continue log.write("Connected to host %s" % node) ch = gw.remote_exec(system_info) sys_info = ch.receive() # update the db if sys_info["fqdn"] in nodes: d = nodes[sys_info["fqdn"]] d["sys_info"] = sys_info d["enabled"] = True else: d = {"type": "node", "fqdn": sys_info["fqdn"], "sys_info": sys_info} log.write("Adding new node %(fqdn)s to database" % d) d["enabled"] = settings.node_enable_default db.save(d)
def push_results(self, results, id, node_id): '''update task document with results''' node = self.db[node_id] node['active'] = False self.db.save(node) try: # upload attachments doc = self.db[id] if 'attachments' in results: for attachment in results['attachments']: self.db.put_attachment(doc, attachment['contents'], filename=attachment['filename']) log.write('Task %s: file %s attached' % (id, attachment['filename'])) doc = self.db[id] doc['results'] = results doc['completed'] = time.time() if 'attachments' in results: # if a link name is specified, put a link next to results on the web page for attachment in results['attachments']: if 'link_name' in attachment: if not 'attach_links' in doc['results']: doc['results']['attach_links'] = [] doc['results']['attach_links'].append({'id': attachment['filename'], 'name': attachment['link_name']}) del doc['results']['attachments'] self.db.save(doc) log.write('Task %s pushed to db' % id) # email notification for failed test if results['success'] == False and len(settings.notify_list) > 0: doctype = 'task' if 'kwargs' in doc and 'testname' in doc['kwargs']: doctype = doc['kwargs']['testname'] else: doctype = doc['name'] reason = 'n/a' if 'reason' in results: reason = results['reason'] message = '''An automated build test run by the %s server on host %s failed.\n\nType: %s\nDocument ID: %s\nNode: %s\nReason: %s\n\nThis is an automated email. Please do not reply.''' % (settings.project_name, socket.getfqdn(), doctype, id, node['fqdn'], reason) dirt.core.yelling.email(settings.notify_list, '[%s] task failure' % settings.project_name, message) except couchdb.ResourceNotFound: log.write('Cannot push results to db, document %s not found.' % id) except KeyError as key: log.write('Cannot push results to db, %s key missing in document %s' % (key, id)) raise except IndexError: log.write('Cannot push results to db, invalid task id %i for document %s' % (taskid, id))
def serve_forever(): '''when new tasks show up in the database, pair them up with the next available node for execution. ''' signal.signal(signal.SIGINT, signal_handler) log.write('dirt is running...') nodes = settings.load_balancer(db) tasks = db.get_tasks() for id in tasks: task_status = 'new' while task_status == 'new' or task_status == 'retry': node = nodes.next() if dbi.check_requirements(db, id, node): log.write('%s -> %s' % (id, node['fqdn'])) task_status = remote.remote_execute(db, node, id) if task_status == 'abort': log.write('Task %s aborted' % id) else: doc = db[id] if 'nodes_tried' in doc: doc['nodes_tried'].append(node['fqdn']) else: doc['nodes_tried'] = [node['fqdn']] db.save(doc) task_status = 'abort'
def signal_handler(signal, frame): '''handle SIGINTs gracefully, clearing running tasks from the db''' log.write('Caught SIGINT (Ctrl-C), Exiting.') # clear any currently-running tasks from db log.write('Clearing running tasks from database') nodes = db.get_nodes() for node in nodes: if 'alloc' in nodes[node]: for i in range(len(nodes[node]['alloc'])): # only clear tasks from this master alloc = nodes[node]['alloc'][i] if alloc['master'] == socket.getfqdn(): doc = db[alloc['task']] if 'started' in doc: del doc['started'] if 'node' in doc: del doc['node'] db.save(doc) node_doc = db[nodes[node]['_id']] node_doc['alloc'].pop(i) db.save(node_doc) sys.exit(0)
def remote_execute(db, node, id): '''start a task on a remote host via ``execnet`` and set task start time and node hostname in the database. we first run the ``ping`` task to ensure the node is alive, and if that fails disable it in the db. ''' import time import execnet hostname = node['fqdn'] try: # first, check if node is alive node['active'] = True node_id = node['_id'] db.save(node) ping_module = __import__('dirt.tasks.ping', fromlist=['dirt.tasks']) gw = execnet.makegateway('ssh=%s' % hostname) ch = gw.remote_exec(ping_module) if ch.receive(): try: doc = db[id] taskname = doc['name'] task_module = __import__('tasks.%s' % taskname, fromlist=['tasks']) ch = gw.remote_exec(task_module) # send keyword arguments to remote process if 'kwargs' in doc: ch.send(doc['kwargs']) doc['started'] = time.time() doc['slave'] = hostname db.db.save(doc) # use lambda to provide arguments to callback push_args = {'id': id, 'node_id': node_id} ch.setcallback(callback = lambda(results): db.push_results(results, **push_args)) except ImportError: log.write('Task %s not found' % taskname) # node disengaged node = db[node_id] node['active'] = False db.save(node) # update doc doc = db[id] doc['started'] = doc['completed'] = time.time() doc['results'] = {'success': False, 'reason': 'task module not found'} db.save(doc) return 'abort' else: log.write('Error connecting with host %s' % hostname) db.disable_node(hostname) return 'retry' except execnet.HostNotFound: log.write('Host %s not responding' % hostname) db.disable_node(hostname) return 'retry' return 'executed'
def __init__(self, host, dbname): couch = couchdb.Server(host) try: try: if couch.version() < '1.1.0': log.write('Error: couchdb version >= 1.1.0 required') sys.exit(1) self.db = couch[dbname] except couchdb.http.Unauthorized: print 'Authentication required for CouchDB database at', host + '/' + dbname couch.resource.credentials = (raw_input('Username: '******'Password: '******'1.1.0': log.write('Error: couchdb version >= 1.1.0 required') sys.exit(1) self.db = couch[dbname] log.write('Connected to db at %s/%s' % (host, dbname)) except Exception: log.write('Error connecting to database') raise sys.exit(1)
def node_recon(nodelist, db, interactive=True): '''grab system information from a list of hosts and create or update slave nodes' db entries. ''' import execnet from dirt.tasks import system_info nodes = db.get_nodes() for node in nodelist: log.write('Connecting to host %s' % node) try: gw = execnet.makegateway('ssh=%s' % node) except execnet.HostNotFound: log.write('Host not found: %s' % node) continue log.write('Connected to host %s' % node) ch = gw.remote_exec(system_info) sys_info = ch.receive() # update the db if sys_info['fqdn'] in nodes: d = nodes[sys_info['fqdn']] d['sys_info'] = sys_info d['enabled'] = True else: d = {'type': 'slave', 'fqdn': sys_info['fqdn'], 'sys_info': sys_info, 'active': False} log.write('Adding new node %(fqdn)s to database' % d) if interactive: enable = raw_input('Enable node? [True|False] ') if enable == 'True': d['enabled'] = True else: d['enabled'] = False pw = raw_input('Node password? ') d['password'] = pw else: d['enabled'] = node_enable_default d['password'] = node_password_default db.save(d)
def remote_execute(db, node, id): """start a task on a remote host via ``execnet`` and set task start time and node hostname in the database. we first run the ``ping`` task to ensure the node is alive, and if that fails disable it in the db. """ import time import execnet import socket hostname = node["fqdn"] try: # store node's state in the db node_id = node["_id"] node = db[node_id] # refresh alloc = {"master": socket.getfqdn(), "db": db.db.name, "task": id} if "alloc" in node: node["alloc"].append(alloc) else: node["alloc"] = [alloc] db.save(node) # check if node is alive then remote_exec the task module ping_module = __import__("dirt.tasks.ping", fromlist=["dirt.tasks"]) gw = execnet.makegateway("ssh=%s" % hostname) ch = gw.remote_exec(ping_module) if ch.receive(): try: doc = db[id] taskname = doc["name"] task_module = __import__("tasks.%s" % taskname, fromlist=["tasks"]) ch = gw.remote_exec(task_module) # send keyword arguments to remote process if "kwargs" in doc: ch.send(doc["kwargs"]) doc["started"] = time.time() doc["node"] = hostname db.db.save(doc) # use lambda to provide arguments to callback push_args = {"id": id, "node_id": node_id, "gateway": gw} ch.setcallback(callback=lambda (results): db.push_results(results, **push_args)) except ImportError: log.write("Task %s not found" % taskname) # node disengaged node = db[node_id] for alloc in range(len(node["alloc"])): if node["alloc"][alloc]["task"] == id: node["alloc"].pop(alloc) db.save(node) # update doc with failure doc = db[id] doc["started"] = doc["completed"] = time.time() doc["results"] = {"success": False, "reason": "task module %s not found" % taskname} db.save(doc) return "abort" else: log.write("Error connecting with host %s" % hostname) db.disable_node(hostname) return "retry" except execnet.HostNotFound: log.write("Host %s not responding" % hostname) db.disable_node(hostname) return "retry" return "executed"
def push_results(self, results, id, node_id, gateway): '''update task document with results''' # node disengaged node = self.db[node_id] for alloc in range(len(node['alloc'])): if node['alloc'][alloc]['task'] == id: node['alloc'].pop(alloc) break self.db.save(node) # close remote connection gateway.exit() try: # upload attachments doc = self.db[id] if 'attachments' in results: for attachment in results['attachments']: self.db.put_attachment(doc, attachment['contents'], filename=attachment['filename']) log.write('Task %s: file %s attached' % (id, attachment['filename'])) doc = self.db[id] doc['results'] = results doc['completed'] = time.time() if 'attachments' in results: # if a link name is specified, put a link next to results on the web page for attachment in results['attachments']: if 'link_name' in attachment: doc['results'].setdefault('attach_links', []).append({ 'id': attachment['filename'], 'name': attachment['link_name'] }) del doc['results']['attachments'] self.db.save(doc) log.write('Task %s pushed to db' % id) # email notification for failed test if results['success'] == False and len(settings.notify_list) > 0: doctype = 'task' if 'kwargs' in doc and 'testname' in doc['kwargs']: doctype = doc['kwargs']['testname'] else: doctype = doc['name'] reason = 'n/a' if 'reason' in results: reason = results['reason'] message = '''An automated build test run by the %s server on host %s failed.\n\nType: %s\nRecord ID: %s\nDocument ID: %s\nNode: %s\nReason: %s\n\nThis is an automated email. Please do not reply.''' % (settings.project_name, socket.getfqdn(), doctype, doc['record_id'], id, node['fqdn'], reason) dirt.core.yelling.email(settings.notify_list, '[%s] task failure' % settings.project_name, message) except couchdb.ResourceNotFound: log.write('Cannot push results to db, document %s not found.' % id) except KeyError as key: log.write('Cannot push results to db, %s key missing in document %s' % (key, id)) raise except IndexError: log.write('Cannot push results to db, invalid task id %i for document %s' % (taskid, id)) # update status on github if all tests are done sha = doc['kwargs']['sha'] overall_success = True all_finished = True reason = 'Unknown' for row in self.db.view('pytunia/tasks_by_record', startkey=[sha,1], endkey=[sha,1,{}], include_docs=True): if not 'completed' in row.value: all_finished = False break if 'results' in row.value and not row.value['results']['success']: overall_success = False reason = row.value['results']['reason'] break if all_finished: try: github_oauth_token = settings.github_oauth_token user, repo = doc['kwargs']['git_url'].split(':')[1].split('/') repo = repo.split('.')[0] url = '/repos/%s/%s/statuses/%s' % (user, repo, sha) target_url = settings.results_base_url + sha if overall_success: status = 'success' description = 'Build %s passed' % sha[:7] else: status = 'failure' description = 'Build %s failed: %s' % (sha[:7], reason) conn = httplib.HTTPSConnection('api.github.com') params = {'access_token': github_oauth_token} data = { 'state': status, 'target_url': target_url, 'description': description } req = conn.request('POST', url + '?' + urllib.urlencode(params), json.dumps(data)) resp = conn.getresponse() if resp.status != 201: log.write('Error %i updating status on commit %s: %s' % resp.status, sha, resp.read()) except NameError: pass