Esempio n. 1
0
def agent(num, input):
    """
    A thread that will check resource availability and information
    """
    logger.info("Agent %s starting" % num)

    while True:
        resource = input.get() # resource hostname


        '''
            Load node information from the testbed/facility DB (e.g. with SFA or local API)
            We get from there the node status
        '''
        node = Query('Nodes').hostname(resource).execute().first()

        if not node:
            logger.info("Node disappeared : %s", (node.hostname))

        if not node.enabled:
            #logger.info("(%s) %s is not enabled" % (node.boot, node.hostname))
            availability = 0
            status = "disabled"

        elif not node.is_running():
            #logger.info(" (%s) %s is not running" % (node.boot, node.hostname))
            availability = 0
            status = "down"
        else:
            # if not r:
            #     print "+=> (%s) %s is not accessible" % (node.boot, node.hostname)
            #     availability = 0
            #     status = "no access"
            # else :
            #     print "+=> (%s) %s is ok" % (node.boot, node.hostname)
            availability = 1
            status = "up"
                #updates info about the node (testing)
                # d.info_resource(node.hostname, {
                #     #'ipv4' : node.ip(4),
                #     'ipv6' : node.ip(6),
                # })

        '''
            Node access status: e.g. ssh, we try to do a setup on the node and report the result
            We try also with nodes that are marked as disabled or not working anyway
        '''

        result = r.setup(resource)
        print result
        if not result['status'] :
            logger.info("%s : Failed SSH access (%s)" % (resource, result['message']))
        else :
            logger.info("%s : Setup complete" % (resource))

        s.resource({
            "hostname": node.hostname,
            "state": node.boot_state,
            "access" : result
        })
Esempio n. 2
0
def process_job(num, input):
    """
    This worker will try to check for resource availability

    """

    logger.info("Agent %s starting" % num)

    try :
        c = r.connect(host=Config.rethinkdb["host"], port=Config.rethinkdb["port"], db=Config.rethinkdb['db'])
    except r.RqlDriverError :
        logger.error("Can't connect to RethinkDB")
        raise SystemExit("Can't connect to RethinkDB")

    while True:
        job = input.get()
        logger.info("Agent %s processing job %s" % (num, job))

        j = r.table('jobs').get(job).run(c)

        logger.info("Job: %s" % (j,))

        r.table('jobs').get(job).update({
            'started': datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'),
            'jobstatus': 'running',
            'message': 'executing job'
        }).run(c)

        result = remote.setup(j['node'])
        if not result['status'] :
            logger.info("%s : Failed SSH access (%s)" % (j['node'], result['message']))
            upd = {
                'completed': datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'),
                'jobstatus': 'error',
                'message': 'node not reachable',
                'returnstatus': 1,
                'stdout': '',
                'stderr': result['message']
            }
            
        else :
            if not 'arg' in j['parameters']:
                j['parameters']['arg'] = ""

            if j['command'] == 'ping':
                command = 'ping'

                remote_command = '%s.py %s %s' % (command, j['parameters']['arg'], j['parameters']['dst'])

                try:
                    ret = remote_worker(j['node'], remote_command)
                except Exception, msg:
                    logger.error("EXEC error: %s" % (msg,))
                    upd = {
                        'completed': datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'),
                        'jobstatus': 'error',
                        'message': 'job error',
                        'returnstatus': 1,
                        'stdout': '',
                        'stderr': "execution error %s" % (msg)
                    }
                    logger.error("execution error %s" % (msg))
                else:
                    upd = {
                        'completed': datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'),
                        'jobstatus': ret['jobstatus'],
                        'message': ret['message'],
                        'returnstatus': ret['returnstatus'],
                        'stdout': ret['stdout'],
                        'stderr': ret['stderr']
                    }
                    logger.info("Command executed, result: %s" % (upd))

            elif j['command'] == 'traceroute':
                command = 'traceroute'

                remote_command = '%s.py %s %s' % (command, j['parameters']['arg'], j['parameters']['dst'])

                try:
                    ret = remote_worker(j['node'], remote_command)
                except Exception, msg:
                    logger.error("EXEC error: %s" % (msg,))
                    upd = {
                        'completed': datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'),
                        'jobstatus': 'error',
                        'message': 'job error',
                        'returnstatus': 1,
                        'stdout': '',
                        'stderr': "execution error %s" % (msg)
                    }
                    logger.error("execution error %s" % (msg))
                else:
                    upd = {
                        'completed': datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'),
                        'jobstatus': ret['jobstatus'],
                        'message': ret['message'],
                        'returnstatus': ret['returnstatus'],
                        'stdout': ret['stdout'],
                        'stderr': ret['stderr']
                    }
                    logger.info("Command executed, result: %s" % (upd))
Esempio n. 3
0
                else:
                    upd = {
                        'completed': datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'),
                        'jobstatus': ret['jobstatus'],
                        'message': ret['message'],
                        'returnstatus': ret['returnstatus'],
                        'stdout': ret['stdout'],
                        'stderr': ret['stderr']
                    }
                    logger.info("Command executed, result: %s" % (upd))

            elif j['command'] == 'iperf':

                ##
                # setup second node
                result_dst = remote.setup(j['parameters']['dst'])

                if not result_dst['status']:

                    logger.error("%s : Failed SSH access (%s)" % (j['parameters']['dst'], result_dst['message']))
                    upd = {
                        'completed': datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'),
                        'jobstatus': 'error',
                        'message': 'job error',
                        'returnstatus': 1,
                        'stdout': '',
                        'stderr': "Node %s not responding" % (j['parameters']['dst'])
                    }
                    logger.error("Node %s not responding" % (j['parameters']['dst']))

                else: