Beispiel #1
0
def init_sar_iostat_top():
    """
    Agent process invoke this method on startup. This will spawn 4 threads for system metrics collection. Below are
    thread details:
    1. top_gather - For TOP output collection
    2. iostat_gather - For iostat output collection
    3. sar_gather - For SAR data collection
    4. docker_stat_gather - For docker stat of all active containers

    """
    global sys_logger
    logger_file = os.getcwd() + "/system_metrics_gather_debug.out"
    sys_logger = loggersetup(logger_file)
    sys_logger.debug("Starting system metrics gather threads")
    sys_logger.debug("Starting top gather")
    t1 = common.FuncThread(top_gather, True)
    t1.start()
    sys_logger.debug("Starting iostat gather")
    t2 = common.FuncThread(iostat_gather, True)
    t2.start()
    sys_logger.debug("Starting SAR gather")
    t3 = common.FuncThread(sar_gather, True)
    t3.start()
    sys_logger.debug("Starting docker stat gather")
    t4 = common.FuncThread(docker_stat_gather, True)
    t4.start()
Beispiel #2
0
def perf_strace_gather(testid, perf_config=None, strace_config=None):
    """
    Agent invoke this procedure on test startup for configuring profiler information provided in test details

    """
    sys_logger.debug("Starting Profilers setup for test ID : " + str(testid))
    sys_logger.debug("Perf configuration details")
    if "process" in perf_config:
        sys_logger.debug("Delay - " + perf_config['delay'] + " Duration - " +
                         perf_config['duration'] + " Process - " +
                         perf_config['process'])
    else:
        sys_logger.debug("Delay - " + perf_config['delay'] + " Duration - " +
                         perf_config['duration'])

    t1 = common.FuncThread(perf_gather, True, testid, perf_config)
    t1.start()

    if strace_config is not None:
        sys_logger.debug("Strace configuration details")
        sys_logger.debug("Delay - " + strace_config['delay'] + " Duration - " +
                         strace_config['duration'] + " Process - " +
                         strace_config['process'])
        t2 = common.FuncThread(strace_gather, True, testid, strace_config)
        t2.start()
    else:
        sys_logger.debug("Strace not configured ")
Beispiel #3
0
    def __init__(self, db, cfg, lctx):
        self.dbinstance = db
        self.cfg = cfg
        self.testmap = db.tests_to_run
        self.cl = client.TCPClient(LOG.getLogger("clientlog", "DH"))
        self.ev = envelope.DaytonaEnvelope()
        self.HOST = common.get_local_ip()
        self.PORT = cfg.DHPORT
        self.CPORT = cfg.CPORT

        self.scheduler_thread = common.FuncThread(self.dispatch, True)
        self.testmon_thread = common.FuncThread(self.testmon, True)
        self.lctx = lctx
Beispiel #4
0
def docker_stat_gather(self):
    # Checking docker version
    try:
        p1 = subprocess.Popen(docker_version,
                              stdout=subprocess.PIPE,
                              stderr=subprocess.PIPE)
        version = p1.communicate()[0].strip()
        version = re.findall("\d+\.\d+", version)[0]
        version = float(version)
        if version < 10.0:
            # Docker version less than 10 is not supported
            sys_logger.error("Docker version less than 10, not supported !! ")
            sys_logger.error("Aborting docker stat gather thread !! ")
            quit()
    except Exception:
        # Docker is not installed, abort this thread
        sys_logger.error("Docker not installed !! ")
        sys_logger.error("Aborting docker stat gather thread !! ")
        quit()

    # Starting docker stats
    # Spawning different thread for collecting docker stat as it takes some time to collect the stats
    while True:
        thread = common.FuncThread(collect_docker_stats, True)
        thread.start()
        time.sleep(float(system_metrics_interval))
Beispiel #5
0
    def __init__(self, db, cfg, lctx):
        """
        Scheduler class constructor which initialize class variables and other threads

        """
        self.dbinstance = db
        self.cfg = cfg
        self.testmap = db.tests_to_run
        self.cl = client.TCPClient(LOG.getLogger("clientlog", "DH"))
        self.ev = envelope.DaytonaEnvelope()
        self.HOST = common.get_local_ip()
        self.PORT = cfg.DHPORT
        self.CPORT = cfg.CPORT

        self.scheduler_thread = common.FuncThread(self.dispatch, True)
        self.testmon_thread = common.FuncThread(self.testmon, True)
        self.lctx = lctx
def init_sar_iostat_top():
    global sys_logger
    logger_file = os.getcwd() + "/system_metrics_gather_debug.out"
    sys_logger = loggersetup(logger_file)
    sys_logger.debug("Starting system metrics gather threads")
    sys_logger.debug("Starting top gather")
    t1 = common.FuncThread(top_gather, True)
    t1.start()
    sys_logger.debug("Starting iostat gather")
    t2 = common.FuncThread(iostat_gather, True)
    t2.start()
    sys_logger.debug("Starting SAR gather")
    t3 = common.FuncThread(sar_gather, True)
    t3.start()
    sys_logger.debug("Starting docker stat gather")
    t4 = common.FuncThread(docker_stat_gather, True)
    t4.start()
Beispiel #7
0
    def execute(self, command, paramcsv, actionID):
        #based on SYNCFLAG release from here
        #send actionID for currently being executed action based on this we can stream resp
        #keep exec details over time in a buffer with actionID mapped
        #send actionID NULL and hold return till exec is complete
        module = self.conf.actionMap[command.strip()].split(".")[0]
        function = self.conf.actionMap[command.strip()].split(".")[1]
        sync = self.conf.actionMap[command.strip()].split(".")[2]

        t2 = testobj.testDefn()
        try:
            param = int(paramcsv)
            action.action_lock.acquire()
            t2 = action.running_tests[param].tobj
            action.action_lock.release()
        except Exception as e:
            pass

        m = __import__(module)
        f = getattr(m, function)
        if sync == "T":  #wait for func to complete and return the ret
            self.lctx.debug("Executing SYNC ACTION for " + command.strip() +
                            " : " + self.conf.actionMap[command.strip()] +
                            ":" + str(actionID))
            ret = f(self, self, command, paramcsv, actionID, sync)
            self.lctx.debug("ACTION completed for " + command.strip() + " : " +
                            self.conf.actionMap[command.strip()] + ":" +
                            str(actionID))
            if command == "DAYTONA_CLI":
                return "actionID=" + str(
                    actionID) + "%" + ret + "%" + "SYNC EXEC"
            else:
                return "actionID=" + str(
                    actionID) + "," + ret + "," + "SYNC EXEC"
        else:
            #callback will be called after completion
            #actionID = uuid.uuid4()
            self.lctx.debug("Executing ASYNC ACTION for " + command.strip() +
                            " : " + self.conf.actionMap[command.strip()] +
                            ":" + str(actionID))
            t1 = common.FuncThread(f, True, self, command, paramcsv, actionID,
                                   sync)
            x = (t1, actionID, t2, time.time())
            self.lock.acquire()
            self.async_actions.append(x)
            self.lctx.debug("async_actions size :" +
                            str(len(self.async_actions)))
            self.lock.release()
            t1.start()
            self.lctx.debug("Executing ACTION for " + command.strip() + " : " +
                            self.conf.actionMap[command.strip()] + ":" +
                            str(actionID))
            return "actionID=" + str(
                actionID) + "," + "SUCCESS," + "ASYNC EXEC"
Beispiel #8
0
  def execute(self, command, paramcsv, actionID):
    #based on SYNCFLAG release from here
    #send actionID for currently being executed action based on this we can stream resp
    #keep exec details over time in a buffer with actionID mapped
    #send actionID NULL and hold return till exec is complete
    module = self.conf.actionMap[command.strip()].split(".")[0]
    function = self.conf.actionMap[command.strip()].split(".")[1]
    sync = self.conf.actionMap[command.strip()].split(".")[2]
    self.lctx.debug(command)
    self.lctx.debug(paramcsv)
    self.lctx.debug(actionID)

    t2 = testobj.testDefn()
    tst = ""
    if paramcsv != "":
      p = paramcsv.split(",")
      tst = p[0]
      if command == "DAYTONA_FILE_DOWNLOAD":
        tst = p[3]

    if tst != "":
      t2.deserialize(tst)

    m = __import__ (module)
    f = getattr(m,function)
    if sync == "T" : #wait for func to complete and return the ret
      self.lctx.debug("Executing SYNC ACTION for " + command.strip() + " : " + self.conf.actionMap[command.strip()] + ":" + str(actionID))
      ret = f(self, self, command, paramcsv, actionID, sync)
      self.lctx.debug("ACTION completed for " + command.strip() + " : " + self.conf.actionMap[command.strip()] + ":" + str(actionID))
      return "actionID=" +  str(actionID) + "," + ret +  "," + "SYNC EXEC"
    else :
      #callback will be called after completion
      #actionID = uuid.uuid4()
      self.lctx.debug("Executing ASYNC ACTION for " + command.strip() + " : " + self.conf.actionMap[command.strip()] + ":" + str(actionID))
      t1 = common.FuncThread(f, True, self, command, paramcsv, actionID, sync)
      x = (t1, actionID, t2, time.time())
      self.lock.acquire()
      self.async_actions.append(x)
      self.lctx.debug( "async_actions size :" + str(len(self.async_actions)))
      self.lock.release()
      t1.start()
      self.lctx.debug( "Executing ACTION for " + command.strip() + " : " + self.conf.actionMap[command.strip()] + ":" + str(actionID))
      return "actionID=" + str(actionID) + "," + "SUCCESS," + "ASYNC EXEC"
Beispiel #9
0
    def execute(self, command, paramcsv, actionID):
        # based on SYNCFLAG release from here
        # send actionID for currently being executed action based on this we can stream resp
        # keep exec details over time in a buffer with actionID mapped
        # send actionID NULL and hold return till exec is complete
        module = self.conf.actionMap[command.strip()].split(".")[0]
        function = self.conf.actionMap[command.strip()].split(".")[1]
        sync = self.conf.actionMap[command.strip()].split(".")[2]

        t2 = testobj.testDefn()
	if command == "DAYTONA_START_TEST":
            testid = int(paramcsv.split(",")[0])
            hosttype = paramcsv.split(",")[1]
            current_test = action.get_test(testid)
            if current_test:
                t2 = current_test.tobj

        m = __import__ (module)
        f = getattr(m,function)
        if sync == "T":  # wait for func to complete and return the ret
            self.lctx.debug("Executing SYNC ACTION for " + command.strip() + " : " + self.conf.actionMap[command.strip()] + ":" + str(actionID))
            ret = f(self, self, command, paramcsv, actionID, sync)
            self.lctx.debug("ACTION completed for " + command.strip() + " : " + self.conf.actionMap[command.strip()] + ":" + str(actionID))
            if command == "DAYTONA_CLI":
                return "actionID=" + str(actionID) + "%" + ret + "%" + "SYNC EXEC"
            else:
                return "actionID=" + str(actionID) + "," + ret + "," + "SYNC EXEC"
        else:
            self.lctx.debug("Executing ASYNC ACTION for " + command.strip() + " : " + self.conf.actionMap[command.strip()] + ":" + str(actionID))
            t1 = common.FuncThread(f, True, self, command, paramcsv, actionID, sync)
            if hosttype == "EXEC":
                x = (t1, actionID, t2, time.time())
                self.lock.acquire()
                self.async_actions.append(x)
                self.lctx.debug( "async_actions size :" + str(len(self.async_actions)))
                self.lock.release()
            t1.start()
            self.lctx.debug( "Executing ACTION for " + command.strip() + " : " + self.conf.actionMap[command.strip()] + ":" + str(actionID))
            return "actionID=" + str(actionID) + "," + "SUCCESS," + "ASYNC EXEC"
Beispiel #10
0
    def testmon(self, *mon):
        process_results_threads = defaultdict()
        while True:
            d = "TSMON [R] : |"
            remove = False
            error = False

            for k in self.running_tests:
                if (self.running_tests[k] != None):
                    t = self.running_tests[k]

                    serialize_str = t.serialize()
                    t2 = testobj.testDefn()
                    t2.deserialize(serialize_str)
                    if t.testobj.TestInputData.testid != t2.testobj.TestInputData.testid:
                        lctx.error("testobj not same")
                        t.updateStatus("running", "failed")
                        remove = True
                        break  # out of for loop

                    try:
                        ret = self.cl.send(
                            t.testobj.TestInputData.exechostname, self.CPORT,
                            self.ev.construct(
                                "DAYTONA_GET_STATUS",
                                str(t2.testobj.TestInputData.testid)))
                        status = ret.split(",")[1]
                        lctx.debug(status)
                    except Exception as e:
                        lctx.debug(e)
                        t.updateStatus("running", "failed")
                        error = True
                        break  # out of for loop

                    if status in [
                            "RUNNING", "INIT", "SETUP", "MONITOR_ON",
                            "MONITOR_OFF"
                    ]:
                        found = checkTestRunning(
                            t.testobj.TestInputData.testid)
                        if not found:
                            error = True
                            break
                        d = d + str(self.running_tests[k].testobj.
                                    TestInputData.testid) + "|"
                    elif status in ["TESTEND", "TIMEOUT"]:
                        d = d + "*" + str(self.running_tests[k].testobj.
                                          TestInputData.testid) + "*|"
                        if t.testobj.TestInputData.end_status == "running":
                            lctx.debug(t.testobj.TestInputData.end_status)
                            if t.testobj.TestInputData.end_status == "running":
                                if status == "TIMEOUT":
                                    t.testobj.TestInputData.timeout_flag = True
                                    t.updateStatus("running", "timeout")
                                else:
                                    t.updateStatus("running", "completed")
                                pt = common.FuncThread(
                                    self.process_results, True, t,
                                    t.testobj.TestInputData.end_status)
                                process_results_threads[
                                    t.testobj.TestInputData.testid] = (pt, t)
                                pt.start()
                        elif t.testobj.TestInputData.end_status == "collating" or t.testobj.TestInputData.end_status == "completed" or t.testobj.TestInputData.end_status == "finished clean":
                            d = d + "*" + str(self.running_tests[k].testobj.
                                              TestInputData.testid) + "*|"
                        else:
                            remove = True
                            t.updateStatus("running", "failed")
                            lctx.error("ERROR : Unknown test status for : " +
                                       str(t.testobj.TestInputData.testid) +
                                       ":" + str(status))
                            break  # out of for loop
                    elif status.strip() == "FINISHED":
                        d = "TSMON [F] : |*" + str(
                            self.running_tests[k].testobj.TestInputData.testid
                        ) + "*|"
                        remove = True
                        break
                    elif status.strip() in ["FAILED", "ABORT", "TESTNA"]:
                        if status.strip() == "FAILED":
                            error = True
                        elif status.strip() in ["ABORT", "TESTNA"]:
                            remove = True
                        t.updateStatus("", "failed")
                        lctx.error("TEST " + status.strip() +
                                   " : Cleaning test from running queue")
                        break  # out of for loop
                    else:
                        remove = True
                        t.updateStatus("running", "failed")
                        lctx.error("ERROR : Unknown test status for : " +
                                   str(t.testobj.TestInputData.testid) + ":" +
                                   str(status))
                        break  # out of for loop

                lctx.info(d)
                d = ""

            if error:
                retsend = None
                ip = t.testobj.TestInputData.exechostname
                try:
                    retsend = self.cl.send(
                        ip, self.CPORT,
                        self.ev.construct("DAYTONA_HEARTBEAT", ""))
                except:
                    pass

                if retsend and retsend.split(",")[1] == "ALIVE":
                    retsend = self.cl.send(
                        ip, self.CPORT,
                        self.ev.construct("DAYTONA_STOP_MONITOR",
                                          str(t.testobj.TestInputData.testid)))
                    retsend = self.cl.send(
                        ip, self.CPORT,
                        self.ev.construct("DAYTONA_ABORT_TEST",
                                          str(t.testobj.TestInputData.testid)))
                for s in t.testobj.TestInputData.stathostname.split(','):
                    if len(s.strip()) == 0:
                        break
                    try:
                        retsend = self.cl.send(
                            s.strip(), self.CPORT,
                            self.ev.construct("DAYTONA_HEARTBEAT", ""))
                    except:
                        pass
                    if retsend and retsend.split(",")[1] == "ALIVE":
                        retsend = self.cl.send(
                            s.strip(), self.CPORT,
                            self.ev.construct(
                                "DAYTONA_STOP_MONITOR",
                                str(t.testobj.TestInputData.testid)))

                        retsend = self.cl.send(
                            s.strip(), self.CPORT,
                            self.ev.construct(
                                "DAYTONA_ABORT_TEST",
                                str(t.testobj.TestInputData.testid)))

                self.lock.acquire()
                for k in self.running_tests:
                    if self.running_tests[
                            k].testobj.TestInputData.testid == t.testobj.TestInputData.testid:
                        lctx.debug("removing entry for this test")
                        rt = self.running_tests.pop(k)
                        break
                if k in self.running_tests:
                    del self.running_tests[k]
                self.lock.release()

            if remove:
                self.lock.acquire()
                for k in self.running_tests:
                    if self.running_tests[
                            k].testobj.TestInputData.testid == t.testobj.TestInputData.testid:
                        lctx.debug("removing entry for this test")
                        rt = self.running_tests.pop(k)
                        break
                if k in self.running_tests:
                    del self.running_tests[k]
                self.lock.release()

            time.sleep(2)
Beispiel #11
0
    def dispatch(self, *args):
        dispatch_threads = defaultdict()
        while True:
            for k in self.testmap:
                found = False
                try:
                    if (self.running_tests[k]):
                        found = True
                except KeyError:
                    lctx.debug("Found spot for test")

                if found == True:
                    continue

                try:
                    tmp_t = self.testmap[k][0]
                except Exception as e:
                    lctx.debug("No test object found in map")
                    continue

                if tmp_t == None:
                    continue

                alive = False

                h = tmp_t.testobj.TestInputData.exechostname
                try:
                    ret = self.cl.send(
                        h, self.CPORT,
                        self.ev.construct("DAYTONA_HEARTBEAT", ""))
                    status = ""
                    st = ret.split(",")
                    if len(st) > 2:
                        status = st[1]
                    else:
                        raise Exception(
                            "Remove host not avaliable - No Heartbeat ",
                            tmp_t.testobj.TestInputData.testid)

                    if "ALIVE" == status:
                        ret = self.cl.send(
                            h, self.CPORT,
                            self.ev.construct(
                                "DAYTONA_HANDSHAKE",
                                self.HOST + "," + str(self.PORT) + "," +
                                str(tmp_t.testobj.TestInputData.testid) + "," +
                                h))
                        if ret == "SUCCESS":
                            alive = True
                            lctx.debug(
                                "Handshake successful in scheduler, adding ip/hostname to reg hosts"
                            )
                            server.serv.registered_hosts[h] = h
                            addr = socket.gethostbyname(h)
                            lctx.debug(addr)
                            server.serv.registered_hosts[addr] = addr
                        else:
                            raise Exception("Unable to handshake with agent:" +
                                            h)

                except Exception as e:
                    lctx.error(e)
                    alive = False
                    found = False
                    # pause the dbmon here as we dont want the same test to be picked again after we pop
                    self.dbinstance.mon_thread[0].pause()
                    self.dbinstance.lock.acquire()
                    t = self.testmap[k].pop(0)
                    t.updateStatus("waiting", "failed")
                    self.dbinstance.lock.release()
                    lctx.debug("Removed test from map : " +
                               str(t.testobj.TestInputData.testid))
                    self.dbinstance.mon_thread[0].resume()
                    continue
                    # todo : add host to reg list if handshake successful

                if alive == True and found == False:
                    # for each framework pick one and move it to running, iff running has an empty slot.
                    lctx.debug("-------Found empty slot in running Q-------")

                    # pause the dbmon here as we dont want the same test to be picked again after we pop
                    self.dbinstance.mon_thread[0].pause()
                    self.dbinstance.lock.acquire()
                    t = self.testmap[k].pop(0)
                    self.dbinstance.lock.release()

                    lctx.info("< %s" % (t.testobj.TestInputData.testid))

                    self.lock.acquire()
                    self.running_tests[k] = t
                    self.lock.release()

                    t.updateStatus("waiting", "setup")
                    self.dbinstance.mon_thread[0].resume()

                    try:
                        trigger_thread = common.FuncThread(
                            self.trigger, True, t)
                        dispatch_threads[t.testobj.TestInputData.testid] = (
                            trigger_thread, t)
                        trigger_thread.start()
                    except Exception as e:
                        lctx.error("Trigger error : " +
                                   str(t.testobj.TestInputData.testid))
                        # todo : remove testid from running tests
                        lctx.debug(e)

            try:
                d = "DISPATCH [S/R] : "
                for k in self.running_tests:
                    d = d + " |" + str(
                        self.running_tests[k].testobj.TestInputData.testid)
            except:
                lctx.error("ERROR : Dispatch Q empty")

            lctx.debug(d)
            d = ""

            time.sleep(2)
Beispiel #12
0
    def testmon(self, *mon):
        process_results_threads = defaultdict()
        while True:
            d = "TSMON [R] : |"
            remove = False

            for k in self.running_tests:
                if (self.running_tests[k] != None):
                    t = self.running_tests[k]

                    serialize_str = t.serialize()
                    t2 = testobj.testDefn()
                    t2.deserialize(serialize_str)
                    if (t.testobj.TestInputData.testid !=
                            t2.testobj.TestInputData.testid):
                        lctx.error("testobj not same")
                        t.updateStatus("running", "failed")
                        remove = True
                        break  #out of for loop

                    try:
                        ret = self.cl.send(
                            t.testobj.TestInputData.exechostname, self.CPORT,
                            self.ev.construct("DAYTONA_GET_STATUS",
                                              serialize_str))
                        status = ret.split(",")[1]
                        lctx.debug(status)
                    except Exception as e:
                        lctx.debug(e)
                        t.updateStatus("running", "failed")
                        remove = True
                        break  #out of for loop

                    if "TESTRUNNING" == status or "TESTSETUP" == status:
                        d = d + str(self.running_tests[k].testobj.
                                    TestInputData.testid) + "|"
                    elif "TESTEND" == status.strip():
                        d = d + "*" + str(self.running_tests[k].testobj.
                                          TestInputData.testid) + "*|"
                        if t.testobj.TestInputData.end_status == "running":
                            lctx.debug(t.testobj.TestInputData.end_status)
                            if t.testobj.TestInputData.end_status == "running":
                                lctx.debug(
                                    "Updating status to completed in DB")
                                t.updateStatus("running", "completed")
                                pt = common.FuncThread(
                                    self.process_results, True, t,
                                    t.testobj.TestInputData.end_status)
                                process_results_threads[
                                    t.testobj.TestInputData.testid] = (pt, t)
                                pt.start()
                        elif t.testobj.TestInputData.end_status == "collating" or t.testobj.TestInputData.end_status == "completed" or t.testobj.TestInputData.end_status == "finished clean":
                            d = d + "*" + str(self.running_tests[k].testobj.
                                              TestInputData.testid) + "*|"
                        else:
                            remove = True
                            t.updateStatus("running", "failed")
                            lctx.error("ERROR : Unknown test status for : " +
                                       str(t.testobj.TestInputData.testid) +
                                       ":" + str(status))
                            break  #out of for loop
                    elif "TESTFINISHED" == status.strip():
                        d = "TSMON [F] : |*" + str(
                            self.running_tests[k].testobj.TestInputData.testid
                        ) + "*|"
                        remove = True
                        break
                    else:
                        remove = True
                        t.updateStatus("running", "failed")
                        lctx.error("ERROR : Unknown test status for : " +
                                   str(t.testobj.TestInputData.testid) + ":" +
                                   str(status))
                        break  #out of for loop

                lctx.info(d)
                d = ""

            if (remove == True):
                self.lock.acquire()
                for k in self.running_tests:
                    if self.running_tests[
                            k].testobj.TestInputData.testid == t.testobj.TestInputData.testid:
                        lctx.debug("removing entry for this test")
                        rt = self.running_tests.pop(k)
                        break
                if k in self.running_tests:
                    del self.running_tests[k]
                self.lock.release()

            time.sleep(2)
Beispiel #13
0
 def startMon(self):
   mthread = None
   mthread = common.FuncThread(self.mon, True)
   self.mon_thread.append(mthread)
   mthread.start()
Beispiel #14
0
    def execute(self, command, paramcsv, actionID):
        """
        This function maps daytona command with actual procedure which need to executed upon receiving a
        particular message. This mapping is saved in action.map file and procedures are implemented in action.py
        Upon mapping with actual procedure this routine spawns a new thread for executing that procedure seperately

        Below are some other action performed in this procedure :
        # based on SYNCFLAG release from here
        # send actionID for currently being executed action based on this we can stream resp
        # keep exec details over time in a buffer with actionID mapped
        # send actionID NULL and hold return till exec is complete

        """
        module = self.conf.actionMap[command.strip()].split(".")[0]
        function = self.conf.actionMap[command.strip()].split(".")[1]
        sync = self.conf.actionMap[command.strip()].split(".")[2]

        t2 = testobj.testDefn()
        hosttype = None
        if command == "DAYTONA_START_TEST":
            testid = int(paramcsv.split(",")[0])
            hosttype = paramcsv.split(",")[1]
            current_test = action.get_test(testid)
            if current_test:
                t2 = current_test.tobj

        m = __import__(module)
        f = getattr(m, function)
        if sync == "T":  # wait for func to complete and return the ret
            self.lctx.debug("Executing SYNC ACTION for " + command.strip() +
                            " : " + self.conf.actionMap[command.strip()] +
                            ":" + str(actionID))
            ret = f(self, self, command, paramcsv, actionID, sync)
            self.lctx.debug("ACTION completed for " + command.strip() + " : " +
                            self.conf.actionMap[command.strip()] + ":" +
                            str(actionID))
            if command == "DAYTONA_CLI":
                return "actionID=" + str(
                    actionID) + "%" + ret + "%" + "SYNC EXEC"
            else:
                return "actionID=" + str(
                    actionID) + "," + ret + "," + "SYNC EXEC"
        else:
            self.lctx.debug("Executing ASYNC ACTION for " + command.strip() +
                            " : " + self.conf.actionMap[command.strip()] +
                            ":" + str(actionID))
            t1 = common.FuncThread(f, True, self, command, paramcsv, actionID,
                                   sync)
            if hosttype == "EXEC":
                x = (t1, actionID, t2, time.time())
                self.lock.acquire()
                self.async_actions.append(x)
                self.lctx.debug("async_actions size :" +
                                str(len(self.async_actions)))
                self.lock.release()
            t1.start()
            self.lctx.debug("Executing ACTION for " + command.strip() + " : " +
                            self.conf.actionMap[command.strip()] + ":" +
                            str(actionID))
            return "actionID=" + str(
                actionID) + "," + "SUCCESS," + "ASYNC EXEC"
Beispiel #15
0
    def testmon(self, *mon):
        """
	Testmon continuously monitors all the running test. It keeps on checking test status on a exec host where execution
	script is running. If anything goes wrong with the test execution, this thread trigger termination actions for
	this test. It also trigger graceful test termination and logs collection when test finishes on exec host

	"""
        process_results_threads = defaultdict()
        while True:
            d = "TSMON [R] : |"
            remove = False
            error = False

            # Continuously iterate over running test list for checking test status
            for k in self.running_tests:
                if self.running_tests[k] is not None:
                    t = self.running_tests[k]

                    serialize_str = t.serialize()
                    t2 = testobj.testDefn()
                    t2.deserialize(serialize_str)

                    # Initiating test logger for capturing test life cycle on scheduler, all logs are logged in
                    # file <testid>.log
                    test_logger = LOG.gettestlogger(t2, "EXEC")
                    if t.testobj.TestInputData.testid != t2.testobj.TestInputData.testid:
                        lctx.error("testobj not same")
                        t.updateStatus("running", "failed")
                        remove = True
                        break  # out of for loop

                    try:
                        # Send DAYTONA_GET_STATUS message on exec host mentioned in test for checking test status
                        ret = self.cl.send(
                            t.testobj.TestInputData.exechostname, self.CPORT,
                            self.ev.construct(
                                "DAYTONA_GET_STATUS",
                                str(t2.testobj.TestInputData.testid)))
                        status = ret.split(",")[1]
                        lctx.debug(status)
                        test_logger.info("Test status : " + status)
                    except Exception as e:
                        lctx.debug(e)
                        t.updateStatus("running", "failed")
                        error = True
                        break  # out of for loop

                    if status == "RUNNING":
                        # If the test is in running state, then we need to verify that user hasn't terminated this
                        # test from UI. If user has terminated then testmon will stop test execution on all exec host
                        # and stat host
                        found = checkTestRunning(
                            t.testobj.TestInputData.testid)
                        if not found:
                            error = True
                            break
                        d = d + str(self.running_tests[k].testobj.
                                    TestInputData.testid) + "|"
                    elif status in ["TESTEND", "TIMEOUT"]:
                        # If test ends on exec host or if test timout occurs then trigger graceful shutdown of this test
                        # Testmon invoke a new thread for this test for logs download and test cleanup from all hosts
                        d = d + "*" + str(self.running_tests[k].testobj.
                                          TestInputData.testid) + "*|"
                        if t.testobj.TestInputData.end_status == "running":
                            lctx.debug(t.testobj.TestInputData.end_status)
                            if status == "TIMEOUT":
                                t.testobj.TestInputData.timeout_flag = True
                                t.updateStatus("running", "timeout")
                            else:
                                t.updateStatus("running", "completed")

        # process_results download log files and perform cleanup on all other hosts
                            pt = common.FuncThread(
                                self.process_results, True, t,
                                t.testobj.TestInputData.end_status)
                            process_results_threads[
                                t.testobj.TestInputData.testid] = (pt, t)
                            pt.start()
                            remove = True
                            break
                        elif t.testobj.TestInputData.end_status == "collating" or t.testobj.TestInputData.end_status == "completed" or t.testobj.TestInputData.end_status == "finished clean":
                            d = d + "*" + str(self.running_tests[k].testobj.
                                              TestInputData.testid) + "*|"
                        else:
                            remove = True
                            t.updateStatus("running", "failed")
                            lctx.error("ERROR : Unknown test status for : " +
                                       str(t.testobj.TestInputData.testid) +
                                       ":" + str(status))
                            break  # out of for loop

                    elif status.strip() in ["FAILED", "TESTNA"]:
                        # Test termination if test fails or test is not even running on the host
                        if status.strip() == "FAILED":
                            error = True
                        elif status.strip() in ["ABORT", "TESTNA"]:
                            remove = True
                        t.updateStatus("", "failed")
                        lctx.error("TEST " + status.strip() +
                                   " : Cleaning test from running queue")
                        break  # out of for loop
                    else:
                        # Test termination on receiving any unknown test state
                        remove = True
                        t.updateStatus("running", "failed")
                        lctx.error("ERROR : Unknown test status for : " +
                                   str(t.testobj.TestInputData.testid) + ":" +
                                   str(status))
                        break  # out of for loop

                lctx.info(d)
                d = ""

        # Two modes of test termination:
            if error:
                # If error is set then testmon will perform below steps:
                # 1. Send test ABORT on exec host if is alive, this will stop execution script, perform logs cleanup and
                #    test termination on the host
                # 2. Send test cleanup on all other stat host for performing logs cleanup and test termination on the host
                # 3. Remove test from the scheduler running queue

                retsend = None
                test_logger.error("Bad test status " + status +
                                  " - Terminating test")
                ip = t.testobj.TestInputData.exechostname
                try:
                    retsend = self.cl.send(
                        ip, self.CPORT,
                        self.ev.construct("DAYTONA_HEARTBEAT", ""))
                except:
                    pass

                if retsend and retsend.split(",")[1] == "ALIVE":
                    retsend = self.cl.send(
                        ip, self.CPORT,
                        self.ev.construct("DAYTONA_ABORT_TEST",
                                          str(t.testobj.TestInputData.testid)))

                    test_logger.error("Test Aborted on exec host " + ip)

                for s in t.testobj.TestInputData.stathostname.split(','):
                    if len(s.strip()) == 0:
                        break
                    try:
                        retsend = self.cl.send(
                            s.strip(), self.CPORT,
                            self.ev.construct("DAYTONA_HEARTBEAT", ""))
                    except:
                        pass
                    if retsend and retsend.split(",")[1] == "ALIVE":
                        retsend = self.cl.send(
                            s.strip(), self.CPORT,
                            self.ev.construct(
                                "DAYTONA_CLEANUP_TEST",
                                str(t.testobj.TestInputData.testid)))
                        test_logger.error("Test Aborted on stat host " + s)

                self.lock.acquire()
                for k in self.running_tests:
                    if self.running_tests[
                            k].testobj.TestInputData.testid == t.testobj.TestInputData.testid:
                        lctx.debug("removing entry for this test")
                        rt = self.running_tests.pop(k)
                        break
                if k in self.running_tests:
                    del self.running_tests[k]
                self.lock.release()

            if remove:
                # If remove flag is set, then testmon will only delete this test from the running queue of scheduler
                self.lock.acquire()
                for k in self.running_tests:
                    if self.running_tests[
                            k].testobj.TestInputData.testid == t.testobj.TestInputData.testid:
                        lctx.debug("removing entry for this test")
                        rt = self.running_tests.pop(k)
                        break
                if k in self.running_tests:
                    del self.running_tests[k]
                self.lock.release()

            time.sleep(2)
Beispiel #16
0
    def dispatch(self, *args):
        """
        This is dispatch queue of scheduler where test from different framework wait in the waiting queue for scheduler to
        bind it with trigger thread. This procedure continuously iterate over testmap populated by DBMon with tests
        started by user from UI or CLI. This keep track of all running tests and it allows one test per framework. Once
        this procedure find an open test spot for a test from particular framework, this procedure will pop it from testmap,
        put it in dispatch queue and assign trigger thread for this test to start test setup and then execution.

        """
        dispatch_threads = defaultdict()
        while True:
            # Continuously iterate on testmap for initiating any test execution
            for k in self.testmap:
                # iterating for all frameworkid k in testmap which contains list of waiting tests for a particular framework
                found = False

                # If test for a particular framework is already in running or dispatch queue then this new test need to
                # wait until previous test gets finish, hence we do nothing and just continue
                if k in self.dispatch_queue or k in self.running_tests:
                    found = True
                else:
                    lctx.debug("Found spot for test")

                if found:
                    continue

# Proceed if spot is available for executing test for this framework
                try:
                    tmp_t = self.testmap[k][0]
                except Exception as e:
                    lctx.debug("No test object found in map")
                    continue

                if tmp_t is None:
                    continue

                alive = False

                h = tmp_t.testobj.TestInputData.exechostname

                # Initiating test logger for capturing test life cycle on scheduler, all logs are logged in file <testid>.log
                test_logger = LOG.init_testlogger(tmp_t, "EXEC")
                test_logger.info("Test execution starts")
                try:
                    # Sending heartbeat on exec host to check if it agent is up on exec host
                    retsend = self.cl.send(
                        h, self.CPORT,
                        self.ev.construct("DAYTONA_HEARTBEAT", ""))

                    if retsend and len(retsend.split(",")) > 2:
                        status = retsend.split(",")[1]
                    else:
                        raise Exception(
                            "Execution host not avaliable - No Heartbeat ",
                            tmp_t.testobj.TestInputData.testid)

                    if "ALIVE" == status:
                        test_logger.info(
                            "HeartBeat received from execution host " + h)
                        # Sending DAYTONA_HANDSHAKE for verifying connectivity between scheduler and agent on exec host
                        # using custom daytona ports
                        ret = self.cl.send(
                            h, self.CPORT,
                            self.ev.construct(
                                "DAYTONA_HANDSHAKE", "handshake1," +
                                self.HOST + "," + str(self.PORT) + "," +
                                str(tmp_t.testobj.TestInputData.testid) + "," +
                                h))
                        if ret == "SUCCESS":
                            alive = True
                            test_logger.info(
                                "Handshake successful with execution host " +
                                h)
                            lctx.debug(
                                "Handshake successful in scheduler, adding ip/hostname to reg hosts"
                            )
                            server.serv.registered_hosts[h] = h
                            addr = socket.gethostbyname(h)
                            lctx.debug(addr)
                            server.serv.registered_hosts[addr] = addr
                        else:
                            raise Exception(
                                "Unable to handshake with agent on executuion host "
                                + h)

                except Exception as e:
                    lctx.error(e)
                    test_logger.error(e)
                    # pause the dbmon here as we dont want the same test to be picked again after we pop
                    self.dbinstance.mon_thread[0].pause()
                    self.dbinstance.lock.acquire()
                    t = self.testmap[k].pop(0)
                    t.updateStatus("waiting", "failed")
                    self.dbinstance.lock.release()
                    lctx.debug("Removed test from map : " +
                               str(t.testobj.TestInputData.testid))
                    self.dbinstance.mon_thread[0].resume()
                    LOG.removeLogger(tmp_t)
                    continue

                if alive == True and found == False:
                    # for each framework pick one and move it to running, iff running has an empty slot.
                    lctx.debug(
                        "-------Found empty slot in dispatch and running Q-------"
                    )

                    # pause the dbmon here as we dont want the same test to be picked again after we pop
                    self.dbinstance.mon_thread[0].pause()
                    self.dbinstance.lock.acquire()
                    t = self.testmap[k].pop(0)
                    self.dbinstance.lock.release()

                    lctx.info("< %s" % t.testobj.TestInputData.testid)

                    # put the test in dispatch queue
                    self.dispatchQ__lock.acquire()
                    self.dispatch_queue[k] = t
                    self.dispatchQ__lock.release()

                    t.updateStatus("waiting", "setup")
                    self.dbinstance.mon_thread[0].resume()

                    try:
                        # Bind a seperate trigger thread for this test to start test execution
                        trigger_thread = common.FuncThread(
                            self.trigger, True, t)
                        dispatch_threads[t.testobj.TestInputData.testid] = (
                            trigger_thread, t)
                        trigger_thread.start()
                    except Exception as e:
                        lctx.error("Trigger error : " +
                                   str(t.testobj.TestInputData.testid))
                        test_logger.error("Test setup failed " +
                                          str(t.testobj.TestInputData.testid))
                        LOG.removeLogger(tmp_t)
                        self.dispatchQ__lock.acquire()
                        del self.dispatch_queue[k]
                        self.dispatchQ__lock.release()
                        lctx.debug(e)

            try:
                # Log list of test currently present in dispatch queue in scheduler debug file
                d = "DISPATCH [S/R] : "
                for k in self.dispatch_queue:
                    d = d + " |" + str(
                        self.dispatch_queue[k].testobj.TestInputData.testid)
            except:
                lctx.debug("ERROR : Dispatch Q empty")

            lctx.debug(d)
            d = ""

            time.sleep(2)