class RunControlServer:

    def __init__(self):

        # Define names of lock and last_used_setup files
        self.lock_file = "run/lock"
        self.lus_file = "setup/last_used_setup"

        # Redefine print to send output to log file
        sys.stdout = Logger()

        # Create lock file
        if (self.create_lock_file() == "error"): exit(1)

        # Define what setup to use at startup (can be changed interactively)
        initial_setup = self.get_initial_setup()

        print "=== Starting PADME Run Control server with %s setup"%initial_setup
        #self.write_log("=== Starting PADME Run Control server with %s setup"%initial_setup)

        # Create run
        self.run = Run()
        if (self.run.change_setup(initial_setup) == "error"):
            print "ERROR - Error while changing run setup to %s"%initial_setup
            #self.write_log("ERROR - Error while changing run setup to %s"%initial_setup)
            exit(1)

        # Start in idle state
        self.current_state = "idle"

        # Create handler for PadmeDB
        self.db = PadmeDB()

        # Create useful regular expressions
        self.re_get_board_config = re.compile("^get_board_config (\d+)$")
        self.re_get_board_log_file = re.compile("^get_board_log_file (\d+)$")
        self.re_change_setup = re.compile("^change_setup (\w+)$")

        # Create a TCP/IP socket
        self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

        # Bind the socket to the port
        server_address = ('localhost', 10000)
        print "Starting server socket on %s port %s"%server_address
        #self.write_log("Starting server socket on %s port %s"%server_address)
        try:
            self.sock.bind(server_address) # Listen for incoming connections
        except:
            print "ERROR - Could not bind to socket: %s"%str(sys.exc_info()[0])
            #self.write_log("ERROR - Could not bind to socket: %s"%str(sys.exc_info()[0]))
            exit(1)
        self.sock.listen(1)

        # Define SIGINT handler
        signal.signal(signal.SIGINT,self.sigint_handler)

        # Setup main interface
        self.main_loop()

        # Clean up before exiting
        self.sock.close()

    def create_lock_file(self):

        # Check if lock file exists
        if (os.path.exists(self.lock_file)):
            if (os.path.isfile(self.lock_file)):
                pid = 0
                lf = open(self.lock_file,"r")
                for ll in lf: pid = ll
                lf.close()
                print "ERROR - Lock file %s found for pid %s"%(self.lock_file,pid)
                #self.write_log("ERROR - Lock file %s found for pid %s"%(self.lock_file,pid))
            else:
                print "ERROR - Lock file %s found but it is not a file"%self.lock_file
                #self.write_log("ERROR - Lock file %s found but it is not a file"%self.lock_file)
            return "error"

        # Create our own lock file
        pid = os.getpid()
        lf = open(self.lock_file,"w")
        lf.write("%d"%pid)
        lf.close()

        return "ok"

    def get_initial_setup(self):

        setup = "test"

        lus = ""
        if (os.path.exists(self.lus_file)):
            if (os.path.isfile(self.lus_file)):
                lusf = open(self.lus_file,"r")
                lus = lusf.read().strip("\n")
                lusf.close()
                if (lus == ""):
                    print "WARNING - File with last used setup %s exists but it is empty - Using default setup %s"%(self.lus_file,setup)
                    #self.write_log("WARNING - File with last used setup %s exists but it is empty - Using default setup %s"%(self.lus_file,setup))
                else:
                    setup = lus
            else:
                print "WARNING - File with last used setup %s exists but it is not a file - Using default setup %s"%(self.lus_file,setup)
                #self.write_log("WARNING - File with last used setup %s exists but it is not a file - Using default setup %s"%(self.lus_file,setup))
        else:
            print "WARNING - Could not find file with last used setup %s - Using default setup %s"%(self.lus_file,setup)
            #self.write_log("WARNING - Could not find file with last used setup %s - Using default setup %s"%(self.lus_file,setup))

        return setup

    def sigint_handler(self,signal,frame):

        print "RunControlSever received SIGINT: exiting"

        # If a run is initialized/running, abort it as cleanly as possible
        if ( self.current_state == "initialized" or self.current_state == "running" ):

            self.run.run_comment_end = "Run aborted because of SIGINT"
            print "Aborting run on SIGINT"
            #self.write_log("Aborting run on SIGINT")
            if (self.run.run_number):
                self.db.set_run_status(self.run.run_number,4) # Status 4: run aborted
                self.db.set_run_time_stop(self.run.run_number,self.now_str())
                self.db.set_run_comment_end(self.run.run_number,self.run.run_end_comment)
            open(self.run.quit_file,'w').close()
            for adc in (self.run.adcboard_list):
                if adc.stop_daq():
                    print "ADC board %02d - Terminated correctly"%adc.board_id
                    #self.write_log("ADC board %02d - Terminated correctly"%adc.board_id)
                else:
                    print "ADC board %02d - WARNING: problems while terminating DAQ"%adc.board_id
                    #self.write_log("ADC board %02d - WARNING: problems while terminating DAQ"%adc.board_id)
                    if (self.run.run_number):
                        self.db.set_run_status(self.run.run_number,6) # Status 6: run ended with errors

            # Clean up run directory
            for adc in (self.run.adcboard_list):
                if (os.path.exists(adc.initok_file)):   os.remove(adc.initok_file)
                if (os.path.exists(adc.initfail_file)): os.remove(adc.initfail_file)
            if(os.path.exists(self.run.start_file)): os.remove(self.run.start_file)
            if(os.path.exists(self.run.quit_file)):  os.remove(self.run.quit_file)


        if os.path.exists(self.lock_file): os.remove(self.lock_file)

        # Now we can exit
        exit(0)

    def main_loop(self):

        while True:

            # Wait for a connection
            print "Waiting for a connection"
            #self.write_log('waiting for a connection')
            (self.connection,client_address) = self.sock.accept()
            print "Connection from %s"%str(client_address)
            #self.write_log('connection from '+str(client_address))

            while True:

                # Handle connection according to curren status of RunControl
                if self.current_state == "idle":
                    new_state = self.state_idle()
                elif self.current_state == "initialized":
                    new_state = self.state_initialized()
                elif self.current_state == "running":
                    new_state = self.state_running()
                elif self.current_state == "initfail":
                    new_state = self.state_initfail()
                else:
                    print "ERROR: unknown state %s - ABORTING"%self.current_state
                    #self.write_log("ERROR: unknown state %s - ABORTING"%self.current_state)
                    new_state = "exit"

            # See if status changed
                if new_state == "idle" or new_state == "initialized" or new_state == "running" or new_state == "initfail":
                    self.current_state = new_state
                elif new_state == "client_close":
                    self.connection.close()
                    break
                elif new_state == "exit":
                    print "=== RunControlSever received exit command: exiting"
                    self.connection.close()
                    if os.path.exists(self.lock_file): os.remove(self.lock_file)
                    exit(0)
                else:
                    print "=== RunControlServer = ERROR: unknown new state %s - ABORTING"%new_state
                    #self.write_log("ERROR: unknown new state %s - ABORTING"%new_state)
                    self.connection.close()
                    if os.path.exists(self.lock_file): os.remove(self.lock_file)
                    exit(1)

    def write_log(self,msg):
        print self.now_str()+" "+msg

    def now_str(self):
        return time.strftime("%Y-%m-%d %H:%M:%S",time.localtime())

    def state_idle(self):

        # Receive and process commands for "idle" state
        while True:

            cmd = self.get_command()
            print "Received command %s"%cmd
            #self.write_log('Received command '+cmd)
            if (cmd == "client_close"):
                return "client_close"
            elif (cmd == "get_state"):
                self.send_answer(self.current_state)
            elif (cmd == "get_setup"):
                self.send_answer(self.run.setup)
            elif (cmd == "get_setup_list"):
                self.send_answer(self.get_setup_list())
            elif (cmd == "get_board_list"):
                self.send_answer(str(self.run.boardid_list))
            elif (cmd == "get_run_number"):
                self.send_answer(str(self.db.get_last_run_in_db()))
            elif (cmd == "new_run"):
                res = self.new_run()
                if (res == "client_close"):
                    return "client_close"
                elif (res == "error"):
                    #self.write_log('ERROR while initializing new run')
                    print "ERROR while initializing new run"
                elif (res == "initialized"):
                    return "initialized"
                elif (res == "initfail"):
                    return "initfail"
                else:
                    #self.write_log("ERROR: new_run returned unknown answer "+res+" (?)")
                    print "ERROR: new_run returned unknown answer %s (?)"%res
            elif (cmd == "exit"):
                self.send_answer("exiting")
                return "exit"
            elif (cmd == "help"):
                msg = """Available commands:
help\t\tShow this help
get_state\tShow current state of RunControl
get_setup\tShow current setup name
get_setup_list\tShow list of available setups
get_board_list\tShow list of boards in use with current setup
get_board_config <b>\tShow current configuration of board <b>
get_run_number\tReturn last run number in DB
change_setup <setup>\tChange run setup to <setup>
new_run\t\tInitialize system for a new run
exit\t\tTell RunControl server to exit (use with extreme care!)"""
                self.send_answer(msg)
            else:

                # See if command can be handled by a regular expression
                found_re = False

                m = self.re_get_board_config.match(cmd)
                if (m):
                    self.send_answer(self.get_board_config(int(m.group(1))))
                    found_re = True

                m = self.re_change_setup.match(cmd)
                if (m):
                    self.send_answer(self.change_setup(m.group(1)))
                    found_re = True

                # No regular expression matched: command is unknown
                if not found_re:
                    self.send_answer("unknown command")
                    #self.write_log('command '+cmd+' is unknown')
                    print "Command %s is unknown"%cmd

    def state_initialized(self):

        # Receive and process commands for "initialized" state
        while True:

            cmd = self.get_command()
            #self.write_log('received command '+cmd)
            print "Received command %s"%cmd
            if (cmd == "client_close"):
                return "client_close"
            elif (cmd == "get_state"):
                self.send_answer(self.current_state)
            elif (cmd == "get_setup"):
                self.send_answer(self.run.setup)
            elif (cmd == "get_board_list"):
                self.send_answer(str(self.run.boardid_list))
            elif (cmd == "get_run_number"):
                self.send_answer(str(self.run.run_number))
            elif (cmd == "abort_run"):
                return self.abort_run()
            elif (cmd == "start_run"):
                return self.start_run()
            elif (cmd == "exit"):
                self.send_answer("exiting")
                return "exit"
            elif (cmd == "help"):
                msg = """Available commands:
help\t\tShow this help
get_state\tShow current state of RunControl
get_setup\tShow current setup name
get_board_list\tShow list of boards in use with current setup
get_board_config <b>\tShow current configuration of board <b>
get_board_log_file <b>\tGet name of log file for board <b>
get_run_number\tReturn current run number
start_run\t\tStart run
abort_run\t\tAbort run
exit\t\tTell RunControl server to exit (use with extreme care!)"""
                self.send_answer(msg)

            else:

                # See if command can be handled by a regular expression
                found_re = False

                m = self.re_get_board_config.match(cmd)
                if (m):
                    self.send_answer(self.get_board_config(int(m.group(1))))
                    found_re = True

                m = self.re_get_board_log_file.match(cmd)
                if (m):
                    self.send_answer(self.get_board_log_file(int(m.group(1))))
                    found_re = True

                # No regular expression matched: command is unknown
                if not found_re:
                    self.send_answer("unknown command")
                    #self.write_log('command '+cmd+' is unknown')
                    print "Command %s is unknown"%cmd

    def state_running(self):

        # Receive and process commands for "running" state
        while True:

            cmd = self.get_command()
            #self.write_log('received command '+cmd)
            print "Received command %s"%cmd
            if (cmd == "client_close"):
                return "client_close"
            elif (cmd == "get_state"):
                self.send_answer(self.current_state)
            elif (cmd == "get_setup"):
                self.send_answer(self.run.setup)
            elif (cmd == "get_board_list"):
                self.send_answer(str(self.run.boardid_list))
            elif (cmd == "get_run_number"):
                self.send_answer(str(self.run.run_number))
            elif (cmd == "stop_run"):
                return self.stop_run()
            elif (cmd == "exit"):
                self.send_answer("exiting")
                return "exit"
            elif (cmd == "help"):
                msg = """Available commands:
help\t\tShow this help
get_state\tShow current state of RunControl
get_setup\tShow current setup name
get_board_list\tShow list of boards in use with current setup
get_board_config <b>\tShow current configuration of board <b>
get_board_log_file <b>\tGet name of log file for board <b>
get_run_number\tReturn current run number
stop_run\t\tStop the run
exit\t\tTell RunControl server to exit (use with extreme care!)"""
                self.send_answer(msg)

            else:

                # See if command can be handled by a regular expression
                found_re = False

                m = self.re_get_board_config.match(cmd)
                if (m):
                    self.send_answer(self.get_board_config(int(m.group(1))))
                    found_re = True

                m = self.re_get_board_log_file.match(cmd)
                if (m):
                    self.send_answer(self.get_board_log_file(int(m.group(1))))
                    found_re = True

                # No regular expression matched: command is unknown
                if not found_re:
                    self.send_answer("unknown command")
                    #self.write_log('command '+cmd+' is unknown')
                    print "Command %s is unknown"%cmd
        return "idle"

    def state_initfail(self):

        return "idle"

    def get_command(self):

        # First get length of string
        l = ""
        for i in range(5): # Max 99999 characters
            ch = self.connection.recv(1)
            if ch:
                l += ch
            else:
                #self.write_log('no more data from client')
                print "Client closed connection"
                return "client_close"
        ll = int(l)

        # Then read the right amount of characters from the socket
        cmd = ""
        for i in range(ll):
            ch = self.connection.recv(1)
            if ch:
                cmd += ch
            else:
                #self.write_log('no more data from client')
                print "Client closed connection"
                return "client_close"

        return cmd

    def send_answer(self,answer):

        if len(answer)<100000:
            #self.write_log("Sending answer "+answer)
            print "Sending answer %s"%answer
            self.connection.sendall("%5.5d"%len(answer)+answer)
        else:
            #self.write_log('answer too long: cannot send')
            print "Answer is too long: cannot send"

    def get_board_config(self,brdid):
        if brdid in self.run.boardid_list:
            return self.run.adcboard_list[self.run.boardid_list.index(brdid)].format_config()
        else:
            return "ERROR: board id %d does not exist"%brdid

    def get_board_log_file(self,brdid):
        if brdid in self.run.boardid_list:
            return self.run.adcboard_list[self.run.boardid_list.index(brdid)].log_file
        else:
            return "ERROR: board id %d does not exist"%brdid

    def read_setup_list(self):

        # Get list of available setups
        setup_main_dir = "setup"
        setups = []
        for top,dirs,files in os.walk(setup_main_dir):
            if (top == setup_main_dir):
                for setup_dir in dirs: setups.append(setup_dir)
        setups.sort()
        return setups

    def get_setup_list(self):

        return str(self.read_setup_list())

    def change_setup(self,setup):

        # Check if requested setup is known
        if not (setup in self.read_setup_list()):
            #self.write_log("change_setup - ERROR: request to set unknown setup "+setup)
            print "change_setup - ERROR: request to set unknown setup %s"%setup
            return "error"

        # Change (or reload) setup
        if (setup==self.run.setup):
            #self.write_log("change_setup - reloading setup "+setup)
            print "change_setup - reloading setup %s"%setup
        else:
            #self.write_log("change_setup - changing setup from "+self.run.setup+" to "+setup)
            print "change_setup - changing setup from %s to %s"%(self.run.setup,setup)
        self.run.change_setup(setup)

        return setup

    def new_run(self):

        # Retrieve run number - next=next run from DB, dummy=dummy run (i.e. run nr=0)
        # Return run number used (0 for dummy run) or "error" for invalid answer
        newrun_number = 0
        self.send_answer("run_number")
        ans = self.get_command()
        if (ans=="next"):
            newrun_number = self.db.get_last_run_in_db()+1
        elif (ans=="dummy"):
            newrun_number = 0
        elif (ans == "error"):
            #self.write_log("run_number - client returned error")
            print "run_number - client returned error"
            return "error"
        elif (ans=="client_close"):
            return "client_close"
        else:
            #self.write_log("run_number - invalid option %s received"%ans)
            print "run_number - invalid option %s received"%ans
            self.send_answer("error")
            return "error"
        self.send_answer(str(newrun_number))

        # Retrieve run type (TEST,DAQ,COSMIC)
        # Return run type used or "error" for invalid answer
        newrun_type = ""
        self.send_answer("run_type")
        ans = self.get_command()
        if (ans=="TEST" or ans=="DAQ" or ans=="COSMIC"):
            newrun_type = ans
        elif (ans == "error"):
            self.write_log("run_type - client returned error")
            return "error"
        elif (ans=="client_close"):
            return "client_close"
        else:
#            self.write_log("run_type - invalid option %s received"%ans)
            print "run_type - invalid option %s received"%ans
            self.send_answer("error")
            return "error"
        self.send_answer(newrun_type)

        newrun_user = ""
        self.send_answer("shift_crew")
        ans = self.get_command()
        if (ans=="client_close"): return "client_close"
        newrun_user = ans

        newrun_comment = ""
        self.send_answer("run_comment")
        ans = self.get_command()
        if (ans=="client_close"): return "client_close"
        newrun_comment = ans

        #self.write_log("Run number: "+str(newrun_number))
        #self.write_log("Run type: "+newrun_type)
        #self.write_log("Run crew: "+newrun_user)
        #self.write_log("Run comment: "+newrun_comment)
        print "Run number:  %d"%newrun_number
        print "Run type:    %s"%newrun_type
        print "Run crew:    %s"%newrun_user
        print "Run comment: %s"%newrun_comment

        # Set run configuration according to user's request
        self.run.change_run(newrun_number)
        self.run.run_type = newrun_type
        self.run.run_user = newrun_user
        self.run.run_comment = newrun_comment

        # Check if requested run number was not used before
        # Saves the day if more than one RunControl program is running at the same time (DON'T DO THAT!!!)
        if (self.run.run_number):
            run_is_in_db = self.db.is_run_in_db(self.run.run_number)
            if (run_is_in_db):
                #self.write_log("ERROR - Run "+str(self.run.run_number)+" is already in the DB: cannot use it again")
                #self.write_log("Please check if someone else is using this RunControl before retrying")
                print "ERROR - Run %d is already in the DB: cannot use it again"%self.run.run_number
                print "Please check if someone else is using this RunControl before retrying"
                self.send_answer("error_init")
                return "error"

        # Create run structure in the DB
        #self.write_log("Initializing Run "+str(self.run.run_number))
        print "Initializing Run %d"%self.run.run_number
        self.run.create_run()
        if (self.run.run_number): self.db.set_run_time_init(self.run.run_number,self.now_str())

        # Create directory to host log files
        #self.write_log("Creating log directory "+self.run.log_dir)
        print "Creating log directory %s"%self.run.log_dir
        self.run.create_log_dir()

        # Write run and boards configuration files
        #self.write_log("Writing configuration file "+self.run.config_file)
        print "Writing configuration file %s"%self.run.config_file
        self.run.write_config()
        for adc in (self.run.adcboard_list):
            #self.write_log("Writing configuration file "+adc.config_file)
            print "Writing configuration file %s"%adc.config_file
            adc.write_config()

        # Start DAQ for all boards
        self.send_answer("start_init")
        for adc in (self.run.adcboard_list):

            p_id = adc.start_daq()
            if p_id:
                #self.write_log("ADC board %02d - Started DAQ with process id %d"%(adc.board_id,p_id))
                print "ADC board %02d - Started DAQ with process id %d"%(adc.board_id,p_id)
                self.send_answer("adc "+str(adc.board_id)+" init")
                adc.status = "init"
            else:
                #self.write_log("ADC board %02d - ERROR: could not start DAQ"%adc.board_id)
                print "ADC board %02d - ERROR: could not start DAQ"%adc.board_id
                self.send_answer("adc "+str(adc.board_id)+" fail")
                adc.status = "fail"

        # Wait for all boards to finish initialization
        n_try = 0
        while(1):
            all_boards_init = 1
            all_boards_ready = 1
            for adc in (self.run.adcboard_list):
                # Check if any board changed status
                if (adc.status == "init"):
                    if (os.path.exists(adc.initok_file)):
                        # Initialization ended OK
                        #self.write_log("ADC board %02d - Initialized and ready for DAQ"%adc.board_id)
                        print "ADC board %02d - Initialized and ready for DAQ"%adc.board_id
                        self.send_answer("adc "+str(adc.board_id)+" ready")
                        adc.status = "ready"
                    elif (os.path.exists(adc.initfail_file)):
                        # Problem during initialization
                        #self.write_log("ADC board %02d - *** Initialization failed ***"%adc.board_id)
                        print "ADC board %02d - *** Initialization failed ***"%adc.board_id
                        self.send_answer("adc "+str(adc.board_id)+" fail")
                        adc.status = "fail"
                    else:
                        # This board is still initializing
                        all_boards_init = 0
                # Check if any board is in fail status
                if (adc.status == "fail"): all_boards_ready = 0

            if (all_boards_init == 0):
                # Some boards are still initializing: keep waiting
                n_try += 1
                if (n_try>=10):
                    #self.write_log("*** ERROR *** One or more boards did not initialize within 10sec. Cannot start run")
                    print "*** ERROR *** One or more boards did not initialize within 10sec. Cannot start run"
                    if (self.run.run_number): self.db.set_run_status(self.run.run_number,5) # Status 5: run with problems at initialization
                    self.send_answer("init_timeout")
                    return "error"
                time.sleep(1)
            elif (all_boards_ready):
                #self.write_log("All boards completed initialization: DAQ run can be started")
                print "All boards completed initialization: DAQ run can be started"
                if (self.run.run_number): self.db.set_run_status(self.run.run_number,1) # Status 1: run correctly initialized
                self.send_answer("init_ready")
                return "initialized"
            else:
                #self.write_log("*** ERROR *** One or more boards failed the initialization. Cannot start run")
                print "*** ERROR *** One or more boards failed the initialization. Cannot start run"
                if (self.run.run_number): self.db.set_run_status(self.run.run_number,5) # Status 5: run with problems at initialization
                self.send_answer("init_fail")
                return "initfail"

    def start_run(self):

        #self.write_log("Starting run")
        print "Starting run"
        if (self.run.run_number):
            self.db.set_run_time_start(self.run.run_number,self.now_str())
            self.db.set_run_status(self.run.run_number,2) # Status 2: run started

        # Create "start the run" tag file
        open(self.run.start_file,'w').close()

        self.send_answer("run_started")

        # RunControl is now in "running" mode
        return "running"

    def stop_run(self):

        self.send_answer("run_comment_end")
        ans = self.get_command()
        if (ans=="client_close"): return "client_close"
        #self.write_log("End of Run comment: "+ans)
        print "End of Run comment: %s"%ans
        self.run.run_comment_end = ans

        #self.write_log("Stopping run")
        print "Stopping run"
        if (self.run.run_number): self.db.set_run_status(self.run.run_number,3) # Status 3: run stopped normally

        return self.terminate_run()

    def abort_run(self):

        self.run.run_comment_end = "Run aborted"

        #self.write_log("Aborting run")
        print "Aborting run"
        if (self.run.run_number): self.db.set_run_status(self.run.run_number,4) # Status 4: run aborted

        return self.terminate_run()

    def terminate_run(self):

        if (self.run.run_number):
            self.db.set_run_time_stop(self.run.run_number,self.now_str())
            self.db.set_run_comment_end(self.run.run_number,self.run.run_end_comment)

        # Create "stop the run" tag file
        open(self.run.quit_file,'w').close()

        # Run stop_daq procedure for each ADC board
        terminate_ok = True
        for adc in (self.run.adcboard_list):
            if adc.stop_daq():
                self.send_answer("adc %d terminate_ok"%adc.board_id)
                #self.write_log("ADC board %02d - Terminated correctly"%adc.board_id)
                print "ADC board %02d - Terminated correctly"%adc.board_id
            else:
                terminate_ok = False
                self.send_answer("adc %d terminate_error"%adc.board_id)
                #self.write_log("ADC board %02d - WARNING: problems while terminating DAQ"%adc.board_id)
                print "ADC board %02d - WARNING: problems while terminating DAQ"%adc.board_id
                if (self.run.run_number): self.db.set_run_status(self.run.run_number,6) # Status 6: run ended with errors

        # Clean up run directory
        for adc in (self.run.adcboard_list):
            if (os.path.exists(adc.initok_file)):   os.remove(adc.initok_file)
            if (os.path.exists(adc.initfail_file)): os.remove(adc.initfail_file)
        if(os.path.exists(self.run.start_file)): os.remove(self.run.start_file)
        if(os.path.exists(self.run.quit_file)):  os.remove(self.run.quit_file)

        if terminate_ok:
            self.send_answer("terminate_ok")
        else:
            self.send_answer("terminate_error")

        # At the end of this procedure RunControl is back to "idle" mode
        return "idle"

    def now_str(self): return time.strftime("%Y-%m-%d %H:%M:%S",time.localtime())
Beispiel #2
0
class RunControlServer:
    def __init__(self):

        # Define names of lock and last_used_setup files
        self.lock_file = "run/lock"
        self.lus_file = "setup/last_used_setup"

        # Redefine print to send output to log file
        sys.stdout = Logger()

        # Create lock file
        if (self.create_lock_file() == "error"): exit(1)

        # Define what setup to use at startup (can be changed interactively)
        initial_setup = self.get_initial_setup()

        print "=== Starting PADME Run Control server with %s setup" % initial_setup
        #self.write_log("=== Starting PADME Run Control server with %s setup"%initial_setup)

        # Create run
        self.run = Run()
        if (self.run.change_setup(initial_setup) == "error"):
            print "ERROR - Error while changing run setup to %s" % initial_setup
            #self.write_log("ERROR - Error while changing run setup to %s"%initial_setup)
            if os.path.exists(self.lock_file): os.remove(self.lock_file)
            exit(1)

        # Start in idle state
        self.current_state = "idle"

        # Create handler for PadmeDB
        self.db = PadmeDB()

        # Create useful regular expressions
        self.re_get_board_config = re.compile("^get_board_config (\d+)$")
        self.re_get_board_log_file = re.compile("^get_board_log_file (\d+)$")
        self.re_change_setup = re.compile("^change_setup (\w+)$")

        # Create a TCP/IP socket
        self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

        # Bind the socket to the port
        server_address = ('localhost', 10000)
        print "Starting server socket on %s port %s" % server_address
        #self.write_log("Starting server socket on %s port %s"%server_address)
        try:
            self.sock.bind(server_address)  # Listen for incoming connections
        except:
            print "ERROR - Could not bind to socket: %s" % str(
                sys.exc_info()[0])
            #self.write_log("ERROR - Could not bind to socket: %s"%str(sys.exc_info()[0]))
            if os.path.exists(self.lock_file): os.remove(self.lock_file)
            exit(1)
        self.sock.listen(1)

        # Define SIGINT handler
        signal.signal(signal.SIGINT, self.sigint_handler)

        # Setup main interface
        self.main_loop()

        # Clean up before exiting
        self.sock.close()

    def create_lock_file(self):

        # Check if lock file exists
        if (os.path.exists(self.lock_file)):
            if (os.path.isfile(self.lock_file)):
                pid = 0
                with open(self.lock_file, "r") as lf:
                    for ll in lf:
                        pid = ll

                print "Lock file %s found for pid %s - checking status" % (
                    self.lock_file, pid)
                #self.write_log("ERROR - Lock file %s found for pid %s"%(self.lock_file,pid))

                ppinfo = os.popen("ps -p %s" % pid)
                pinfo = ppinfo.readlines()
                ppinfo.close()
                if len(pinfo) == 2:
                    if pinfo[1].find("<defunct>") > -1:
                        print "There is zombie process with this pid. The RunControlServer is probably dead. Proceeding cautiously..."
                    else:
                        print "ERROR - there is already a RunControlServer running with pid %s" % pid
                        return "error"
                else:
                    print "No RunControlServer process found. As you were..."
            else:
                print "ERROR - Lock file %s found but it is not a file" % self.lock_file
                #self.write_log("ERROR - Lock file %s found but it is not a file"%self.lock_file)
                return "error"

        # Create our own lock file
        pid = os.getpid()
        with open(self.lock_file, "w") as lf:
            lf.write("%d" % pid)

        return "ok"

    def get_initial_setup(self):

        setup = "test"

        lus = ""
        if (os.path.exists(self.lus_file)):
            if (os.path.isfile(self.lus_file)):
                lusf = open(self.lus_file, "r")
                lus = lusf.read().strip("\n")
                lusf.close()
                if (lus == ""):
                    print "WARNING - File with last used setup %s exists but it is empty - Using default setup %s" % (
                        self.lus_file, setup)
                    #self.write_log("WARNING - File with last used setup %s exists but it is empty - Using default setup %s"%(self.lus_file,setup))
                else:
                    setup = lus
            else:
                print "WARNING - File with last used setup %s exists but it is not a file - Using default setup %s" % (
                    self.lus_file, setup)
                #self.write_log("WARNING - File with last used setup %s exists but it is not a file - Using default setup %s"%(self.lus_file,setup))
        else:
            print "WARNING - Could not find file with last used setup %s - Using default setup %s" % (
                self.lus_file, setup)
            #self.write_log("WARNING - Could not find file with last used setup %s - Using default setup %s"%(self.lus_file,setup))

        return setup

    def sigint_handler(self, signal, frame):

        print "RunControlSever received SIGINT: exiting"

        # If a run is initialized/running, abort it as cleanly as possible
        if (self.current_state == "initialized"
                or self.current_state == "running"):

            self.run.run_comment_end = "Run aborted because of SIGINT"
            print "Aborting run on SIGINT"
            #self.write_log("Aborting run on SIGINT")
            if (self.run.run_number):
                self.db.set_run_status(self.run.run_number,
                                       4)  # Status 4: run aborted
                self.db.set_run_time_stop(self.run.run_number, self.now_str())
                self.db.set_run_comment_end(self.run.run_number,
                                            self.run.run_comment_end)
            open(self.run.quit_file, 'w').close()
            for adc in (self.run.adcboard_list):
                if adc.stop_daq():
                    print "ADC board %02d - Terminated correctly" % adc.board_id
                    #self.write_log("ADC board %02d - Terminated correctly"%adc.board_id)
                else:
                    print "ADC board %02d - WARNING: problems while terminating DAQ" % adc.board_id
                    #self.write_log("ADC board %02d - WARNING: problems while terminating DAQ"%adc.board_id)
                    if (self.run.run_number):
                        self.db.set_run_status(
                            self.run.run_number,
                            6)  # Status 6: run ended with errors

            # Clean up run directory
            for adc in (self.run.adcboard_list):
                if (os.path.exists(adc.initok_file)):
                    os.remove(adc.initok_file)
                if (os.path.exists(adc.initfail_file)):
                    os.remove(adc.initfail_file)
            if (os.path.exists(self.run.start_file)):
                os.remove(self.run.start_file)
            if (os.path.exists(self.run.quit_file)):
                os.remove(self.run.quit_file)

        if os.path.exists(self.lock_file): os.remove(self.lock_file)

        # Now we can exit
        exit(0)

    def main_loop(self):

        while True:

            # Wait for a connection
            print "Waiting for a connection"
            #self.write_log('waiting for a connection')
            (self.connection, client_address) = self.sock.accept()
            print "Connection from %s" % str(client_address)
            #self.write_log('connection from '+str(client_address))

            while True:

                # Handle connection according to curren status of RunControl
                if self.current_state == "idle":
                    new_state = self.state_idle()
                elif self.current_state == "initialized":
                    new_state = self.state_initialized()
                elif self.current_state == "running":
                    new_state = self.state_running()
                elif self.current_state == "initfail":
                    new_state = self.state_initfail()
                else:
                    print "ERROR: unknown state %s - ABORTING" % self.current_state
                    #self.write_log("ERROR: unknown state %s - ABORTING"%self.current_state)
                    new_state = "exit"

            # See if status changed
                if new_state == "idle" or new_state == "initialized" or new_state == "running" or new_state == "initfail":
                    self.current_state = new_state
                elif new_state == "client_close":
                    self.connection.close()
                    break
                elif new_state == "exit":
                    print "=== RunControlSever received exit command: exiting"
                    self.connection.close()
                    if os.path.exists(self.lock_file):
                        os.remove(self.lock_file)
                    exit(0)
                else:
                    print "=== RunControlServer = ERROR: unknown new state %s - ABORTING" % new_state
                    #self.write_log("ERROR: unknown new state %s - ABORTING"%new_state)
                    self.connection.close()
                    if os.path.exists(self.lock_file):
                        os.remove(self.lock_file)
                    exit(1)

    def write_log(self, msg):
        print self.now_str() + " " + msg

    def now_str(self):
        return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())

    def state_idle(self):

        # Receive and process commands for "idle" state
        while True:

            cmd = self.get_command()
            print "Received command %s" % cmd
            #self.write_log('Received command '+cmd)
            if (cmd == "client_close"):
                return "client_close"
            elif (cmd == "get_state"):
                self.send_answer(self.current_state)
            elif (cmd == "get_setup"):
                self.send_answer(self.run.setup)
            elif (cmd == "get_setup_list"):
                self.send_answer(self.get_setup_list())
            elif (cmd == "get_board_list"):
                self.send_answer(str(self.run.boardid_list))
            elif (cmd == "get_run_number"):
                self.send_answer(str(self.db.get_last_run_in_db()))
            elif (cmd == "new_run"):
                res = self.new_run()
                if (res == "client_close"):
                    return "client_close"
                elif (res == "error"):
                    #self.write_log('ERROR while initializing new run')
                    print "ERROR while initializing new run"
                elif (res == "initialized"):
                    return "initialized"
                elif (res == "initfail"):
                    return "initfail"
                else:
                    #self.write_log("ERROR: new_run returned unknown answer "+res+" (?)")
                    print "ERROR: new_run returned unknown answer %s (?)" % res
            elif (cmd == "exit"):
                self.send_answer("exiting")
                return "exit"
            elif (cmd == "help"):
                msg = """Available commands:
help\t\tShow this help
get_state\tShow current state of RunControl
get_setup\tShow current setup name
get_setup_list\tShow list of available setups
get_board_list\tShow list of boards in use with current setup
get_board_config <b>\tShow current configuration of board <b>
get_run_number\tReturn last run number in DB
change_setup <setup>\tChange run setup to <setup>
new_run\t\tInitialize system for a new run
exit\t\tTell RunControl server to exit (use with extreme care!)"""
                self.send_answer(msg)
            else:

                # See if command can be handled by a regular expression
                found_re = False

                m = self.re_get_board_config.match(cmd)
                if (m):
                    self.send_answer(self.get_board_config(int(m.group(1))))
                    found_re = True

                m = self.re_change_setup.match(cmd)
                if (m):
                    self.send_answer(self.change_setup(m.group(1)))
                    found_re = True

                # No regular expression matched: command is unknown
                if not found_re:
                    self.send_answer("unknown command")
                    #self.write_log('command '+cmd+' is unknown')
                    print "Command %s is unknown" % cmd

    def state_initialized(self):

        # Receive and process commands for "initialized" state
        while True:

            cmd = self.get_command()
            #self.write_log('received command '+cmd)
            print "Received command %s" % cmd
            if (cmd == "client_close"):
                return "client_close"
            elif (cmd == "get_state"):
                self.send_answer(self.current_state)
            elif (cmd == "get_setup"):
                self.send_answer(self.run.setup)
            elif (cmd == "get_board_list"):
                self.send_answer(str(self.run.boardid_list))
            elif (cmd == "get_run_number"):
                self.send_answer(str(self.run.run_number))
            elif (cmd == "abort_run"):
                return self.abort_run()
            elif (cmd == "start_run"):
                return self.start_run()
            elif (cmd == "exit"):
                self.send_answer("exiting")
                return "exit"
            elif (cmd == "help"):
                msg = """Available commands:
help\t\tShow this help
get_state\tShow current state of RunControl
get_setup\tShow current setup name
get_board_list\tShow list of boards in use with current setup
get_board_config <b>\tShow current configuration of board <b>
get_board_log_file <b>\tGet name of log file for board <b>
get_run_number\tReturn current run number
start_run\t\tStart run
abort_run\t\tAbort run
exit\t\tTell RunControl server to exit (use with extreme care!)"""
                self.send_answer(msg)

            else:

                # See if command can be handled by a regular expression
                found_re = False

                m = self.re_get_board_config.match(cmd)
                if (m):
                    self.send_answer(self.get_board_config(int(m.group(1))))
                    found_re = True

                m = self.re_get_board_log_file.match(cmd)
                if (m):
                    self.send_answer(self.get_board_log_file(int(m.group(1))))
                    found_re = True

                # No regular expression matched: command is unknown
                if not found_re:
                    self.send_answer("unknown command")
                    #self.write_log('command '+cmd+' is unknown')
                    print "Command %s is unknown" % cmd

    def state_running(self):

        # Receive and process commands for "running" state
        while True:

            cmd = self.get_command()
            #self.write_log('received command '+cmd)
            print "Received command %s" % cmd
            if (cmd == "client_close"):
                return "client_close"
            elif (cmd == "get_state"):
                self.send_answer(self.current_state)
            elif (cmd == "get_setup"):
                self.send_answer(self.run.setup)
            elif (cmd == "get_board_list"):
                self.send_answer(str(self.run.boardid_list))
            elif (cmd == "get_run_number"):
                self.send_answer(str(self.run.run_number))
            elif (cmd == "stop_run"):
                return self.stop_run()
            elif (cmd == "exit"):
                self.send_answer("exiting")
                return "exit"
            elif (cmd == "help"):
                msg = """Available commands:
help\t\tShow this help
get_state\tShow current state of RunControl
get_setup\tShow current setup name
get_board_list\tShow list of boards in use with current setup
get_board_config <b>\tShow current configuration of board <b>
get_board_log_file <b>\tGet name of log file for board <b>
get_run_number\tReturn current run number
stop_run\t\tStop the run
exit\t\tTell RunControl server to exit (use with extreme care!)"""
                self.send_answer(msg)

            else:

                # See if command can be handled by a regular expression
                found_re = False

                m = self.re_get_board_config.match(cmd)
                if (m):
                    self.send_answer(self.get_board_config(int(m.group(1))))
                    found_re = True

                m = self.re_get_board_log_file.match(cmd)
                if (m):
                    self.send_answer(self.get_board_log_file(int(m.group(1))))
                    found_re = True

                # No regular expression matched: command is unknown
                if not found_re:
                    self.send_answer("unknown command")
                    #self.write_log('command '+cmd+' is unknown')
                    print "Command %s is unknown" % cmd
        return "idle"

    def state_initfail(self):

        return "idle"

    def get_command(self):

        # First get length of string
        l = ""
        for i in range(5):  # Max 99999 characters
            ch = self.connection.recv(1)
            if ch:
                l += ch
            else:
                #self.write_log('no more data from client')
                print "Client closed connection"
                return "client_close"
        ll = int(l)

        # Then read the right amount of characters from the socket
        cmd = ""
        for i in range(ll):
            ch = self.connection.recv(1)
            if ch:
                cmd += ch
            else:
                #self.write_log('no more data from client')
                print "Client closed connection"
                return "client_close"

        return cmd

    def send_answer(self, answer):

        if len(answer) < 100000:
            #self.write_log("Sending answer "+answer)
            print "Sending answer %s" % answer
            self.connection.sendall("%5.5d" % len(answer) + answer)
        else:
            #self.write_log('answer too long: cannot send')
            print "Answer is too long: cannot send"

    def get_board_config(self, brdid):
        if brdid in self.run.boardid_list:
            return self.run.adcboard_list[self.run.boardid_list.index(
                brdid)].format_config()
        else:
            return "ERROR: board id %d does not exist" % brdid

    def get_board_log_file(self, brdid):
        if brdid in self.run.boardid_list:
            return self.run.adcboard_list[self.run.boardid_list.index(
                brdid)].log_file
        else:
            return "ERROR: board id %d does not exist" % brdid

    def read_setup_list(self):

        # Get list of available setups
        setup_main_dir = "setup"
        setups = []
        for top, dirs, files in os.walk(setup_main_dir):
            if (top == setup_main_dir):
                for setup_dir in dirs:
                    setups.append(setup_dir)
        setups.sort()
        return setups

    def get_setup_list(self):

        return str(self.read_setup_list())

    def change_setup(self, setup):

        # Check if requested setup is known
        if not (setup in self.read_setup_list()):
            #self.write_log("change_setup - ERROR: request to set unknown setup "+setup)
            print "change_setup - ERROR: request to set unknown setup %s" % setup
            return "error"

        # Change (or reload) setup
        if (setup == self.run.setup):
            #self.write_log("change_setup - reloading setup "+setup)
            print "change_setup - reloading setup %s" % setup
        else:
            #self.write_log("change_setup - changing setup from "+self.run.setup+" to "+setup)
            print "change_setup - changing setup from %s to %s" % (
                self.run.setup, setup)
        self.run.change_setup(setup)

        return setup

    def new_run(self):

        # Retrieve run number - next=next run from DB, dummy=dummy run (i.e. run nr=0)
        # Return run number used (0 for dummy run) or "error" for invalid answer
        newrun_number = 0
        self.send_answer("run_number")
        ans = self.get_command()
        if (ans == "next"):
            newrun_number = self.db.get_last_run_in_db() + 1
        elif (ans == "dummy"):
            newrun_number = 0
        elif (ans == "error"):
            #self.write_log("run_number - client returned error")
            print "run_number - client returned error"
            return "error"
        elif (ans == "client_close"):
            return "client_close"
        else:
            #self.write_log("run_number - invalid option %s received"%ans)
            print "run_number - invalid option %s received" % ans
            self.send_answer("error")
            return "error"
        self.send_answer(str(newrun_number))

        # Retrieve run type (TEST,DAQ,COSMIC)
        # Return run type used or "error" for invalid answer
        newrun_type = ""
        self.send_answer("run_type")
        ans = self.get_command()
        if (ans == "TEST" or ans == "DAQ" or ans == "COSMIC"):
            newrun_type = ans
        elif (ans == "error"):
            self.write_log("run_type - client returned error")
            return "error"
        elif (ans == "client_close"):
            return "client_close"
        else:
            #            self.write_log("run_type - invalid option %s received"%ans)
            print "run_type - invalid option %s received" % ans
            self.send_answer("error")
            return "error"
        self.send_answer(newrun_type)

        newrun_user = ""
        self.send_answer("shift_crew")
        ans = self.get_command()
        if (ans == "client_close"): return "client_close"
        newrun_user = ans

        newrun_comment = ""
        self.send_answer("run_comment")
        ans = self.get_command()
        if (ans == "client_close"): return "client_close"
        newrun_comment = ans

        #self.write_log("Run number: "+str(newrun_number))
        #self.write_log("Run type: "+newrun_type)
        #self.write_log("Run crew: "+newrun_user)
        #self.write_log("Run comment: "+newrun_comment)
        print "Run number:  %d" % newrun_number
        print "Run type:    %s" % newrun_type
        print "Run crew:    %s" % newrun_user
        print "Run comment: %s" % newrun_comment

        # Set run configuration according to user's request
        self.run.change_run(newrun_number)
        self.run.run_type = newrun_type
        self.run.run_user = newrun_user
        self.run.run_comment = newrun_comment

        # Check if requested run number was not used before
        # Saves the day if more than one RunControl program is running at the same time (DON'T DO THAT!!!)
        if (self.run.run_number):
            run_is_in_db = self.db.is_run_in_db(self.run.run_number)
            if (run_is_in_db):
                #self.write_log("ERROR - Run "+str(self.run.run_number)+" is already in the DB: cannot use it again")
                #self.write_log("Please check if someone else is using this RunControl before retrying")
                print "ERROR - Run %d is already in the DB: cannot use it again" % self.run.run_number
                print "Please check if someone else is using this RunControl before retrying"
                self.send_answer("error_init")
                return "error"

        # Create run structure in the DB
        #self.write_log("Initializing Run "+str(self.run.run_number))
        print "Initializing Run %d" % self.run.run_number
        self.run.create_run()
        if (self.run.run_number):
            self.db.set_run_time_init(self.run.run_number, self.now_str())

        # Create directory to host log files
        #self.write_log("Creating log directory "+self.run.log_dir)
        print "Creating log directory %s" % self.run.log_dir
        self.run.create_log_dir()

        # Write run and boards configuration files
        #self.write_log("Writing configuration file "+self.run.config_file)
        print "Writing configuration file %s" % self.run.config_file
        self.run.write_config()
        for adc in (self.run.adcboard_list):
            #self.write_log("Writing configuration file "+adc.config_file)
            print "Writing configuration file %s" % adc.config_file
            adc.write_config()

        # Start DAQ for all boards
        self.send_answer("start_init")
        for adc in (self.run.adcboard_list):

            p_id = adc.start_daq()
            if p_id:
                #self.write_log("ADC board %02d - Started DAQ with process id %d"%(adc.board_id,p_id))
                print "ADC board %02d - Started DAQ with process id %d" % (
                    adc.board_id, p_id)
                self.send_answer("adc " + str(adc.board_id) + " init")
                adc.status = "init"
            else:
                #self.write_log("ADC board %02d - ERROR: could not start DAQ"%adc.board_id)
                print "ADC board %02d - ERROR: could not start DAQ" % adc.board_id
                self.send_answer("adc " + str(adc.board_id) + " fail")
                adc.status = "fail"

        # Wait for all boards to finish initialization
        n_try = 0
        while (1):
            all_boards_init = 1
            all_boards_ready = 1
            for adc in (self.run.adcboard_list):
                # Check if any board changed status
                if (adc.status == "init"):
                    if (os.path.exists(adc.initok_file)):
                        # Initialization ended OK
                        #self.write_log("ADC board %02d - Initialized and ready for DAQ"%adc.board_id)
                        print "ADC board %02d - Initialized and ready for DAQ" % adc.board_id
                        self.send_answer("adc " + str(adc.board_id) + " ready")
                        adc.status = "ready"
                    elif (os.path.exists(adc.initfail_file)):
                        # Problem during initialization
                        #self.write_log("ADC board %02d - *** Initialization failed ***"%adc.board_id)
                        print "ADC board %02d - *** Initialization failed ***" % adc.board_id
                        self.send_answer("adc " + str(adc.board_id) + " fail")
                        adc.status = "fail"
                    else:
                        # This board is still initializing
                        all_boards_init = 0
                # Check if any board is in fail status
                if (adc.status == "fail"): all_boards_ready = 0

            if (all_boards_init == 0):
                # Some boards are still initializing: keep waiting
                n_try += 1
                if (n_try >= 10):
                    #self.write_log("*** ERROR *** One or more boards did not initialize within 10sec. Cannot start run")
                    print "*** ERROR *** One or more boards did not initialize within 10sec. Cannot start run"
                    if (self.run.run_number):
                        self.db.set_run_status(
                            self.run.run_number,
                            5)  # Status 5: run with problems at initialization
                    self.send_answer("init_timeout")
                    return "error"
                time.sleep(1)
            elif (all_boards_ready):
                #self.write_log("All boards completed initialization: DAQ run can be started")
                print "All boards completed initialization: DAQ run can be started"
                if (self.run.run_number):
                    self.db.set_run_status(
                        self.run.run_number,
                        1)  # Status 1: run correctly initialized
                self.send_answer("init_ready")
                return "initialized"
            else:
                #self.write_log("*** ERROR *** One or more boards failed the initialization. Cannot start run")
                print "*** ERROR *** One or more boards failed the initialization. Cannot start run"
                if (self.run.run_number):
                    self.db.set_run_status(
                        self.run.run_number,
                        5)  # Status 5: run with problems at initialization
                self.send_answer("init_fail")
                return "initfail"

    def start_run(self):

        #self.write_log("Starting run")
        print "Starting run"
        if (self.run.run_number):
            self.db.set_run_time_start(self.run.run_number, self.now_str())
            self.db.set_run_status(self.run.run_number,
                                   2)  # Status 2: run started

        # Create "start the run" tag file
        open(self.run.start_file, 'w').close()

        self.send_answer("run_started")

        # RunControl is now in "running" mode
        return "running"

    def stop_run(self):

        self.send_answer("run_comment_end")
        ans = self.get_command()
        if (ans == "client_close"): return "client_close"
        #self.write_log("End of Run comment: "+ans)
        print "End of Run comment: %s" % ans
        self.run.run_comment_end = ans

        #self.write_log("Stopping run")
        print "Stopping run"
        if (self.run.run_number):
            self.db.set_run_status(self.run.run_number,
                                   3)  # Status 3: run stopped normally

        return self.terminate_run()

    def abort_run(self):

        self.run.run_comment_end = "Run aborted"

        #self.write_log("Aborting run")
        print "Aborting run"
        if (self.run.run_number):
            self.db.set_run_status(self.run.run_number,
                                   4)  # Status 4: run aborted

        return self.terminate_run()

    def terminate_run(self):

        if (self.run.run_number):
            self.db.set_run_time_stop(self.run.run_number, self.now_str())
            self.db.set_run_comment_end(self.run.run_number,
                                        self.run.run_comment_end)

        # Create "stop the run" tag file
        open(self.run.quit_file, 'w').close()

        # Run stop_daq procedure for each ADC board
        terminate_ok = True
        for adc in (self.run.adcboard_list):
            if adc.stop_daq():
                self.send_answer("adc %d terminate_ok" % adc.board_id)
                #self.write_log("ADC board %02d - Terminated correctly"%adc.board_id)
                print "ADC board %02d - Terminated correctly" % adc.board_id
            else:
                terminate_ok = False
                self.send_answer("adc %d terminate_error" % adc.board_id)
                #self.write_log("ADC board %02d - WARNING: problems while terminating DAQ"%adc.board_id)
                print "ADC board %02d - WARNING: problems while terminating DAQ" % adc.board_id
                if (self.run.run_number):
                    self.db.set_run_status(
                        self.run.run_number,
                        6)  # Status 6: run ended with errors

        # Clean up run directory
        for adc in (self.run.adcboard_list):
            if (os.path.exists(adc.initok_file)): os.remove(adc.initok_file)
            if (os.path.exists(adc.initfail_file)):
                os.remove(adc.initfail_file)
        if (os.path.exists(self.run.start_file)):
            os.remove(self.run.start_file)
        if (os.path.exists(self.run.quit_file)): os.remove(self.run.quit_file)

        if terminate_ok:
            self.send_answer("terminate_ok")
        else:
            self.send_answer("terminate_error")

        # At the end of this procedure RunControl is back to "idle" mode
        return "idle"

    def now_str(self):
        return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
Beispiel #3
0
class Run:
    def __init__(self):

        # Get account under which the RunControl runs
        self.user_account = os.getenv('USER', "daq")

        # Get location of DAQ main directory from PADME_DAQ_DIR
        # Default to current dir if not set
        self.daq_dir = os.getenv('PADME_DAQ_DIR', ".")

        # Get base port number for network tunnels from PADME_RC_TUNNEL_BASE_PORT
        self.base_port_number = int(
            os.getenv('PADME_RC_TUNNEL_BASE_PORT', "31400"))

        # Define id file for passwordless ssh command execution
        self.ssh_id_file = "%s/.ssh/id_rsa_daq" % os.getenv('HOME', "~")

        # Get location of padme-fw software from PADME
        # Default to PADME_DAQ_DIR/padme-fw if not set
        self.padme_fw = os.getenv('PADME', "%s/padme-fw" % self.daq_dir)

        # Define executables to use in production
        self.daq_executable = "%s/PadmeDAQ/PadmeDAQ.exe" % self.padme_fw
        self.merger_executable = "%s/Level1/PadmeMerger.exe" % self.padme_fw
        self.level1_executable = "%s/Level1/PadmeLevel1.exe" % self.padme_fw
        self.trigger_executable = "%s/PadmeTrig/PadmeTrig.exe" % self.padme_fw

        # Define directory containing setup subdirectories
        self.setup_root_dir = "%s/setup" % self.daq_dir

        # Define files where the current and last run name will be written
        self.current_run_file = "%s/run/current_run" % self.daq_dir
        self.last_run_file = "%s/run/last_run" % self.daq_dir

        # Define directory containing rawdata directories for each run
        self.rawdata_root_dir = self.daq_dir + "/local/rawdata"

        # Define name and position of control files
        self.control_dir = self.daq_dir + "/local/run"
        self.start_file = self.control_dir + "/start"
        self.quit_file = self.control_dir + "/quit"
        self.trig_start_file = self.control_dir + "/start_trig"
        self.trig_stop_file = self.control_dir + "/stop_trig"
        self.initok_file_head = self.control_dir + "/initok"
        self.initfail_file_head = self.control_dir + "/initfail"
        self.lock_file_head = self.control_dir + "/lock"

        # Connect to database services
        self.db = PadmeDB()

        # Do not define a default setup
        self.setup = ""

        # Run final status defaults to 3 (stopped normally)
        self.final_status = 3

        self.set_default_config()

    def change_run(self):

        # Check if requested run number was not used before
        # Saves the day if more than one RunControl program is running at the same time (DON'T DO THAT!!!)
        if (self.run_number):
            run_is_in_db = self.db.is_run_in_db(self.run_number)
            if (run_is_in_db):
                print "Run::change_run - ERROR - Run %d is already in the DB: cannot use it again" % self.run_number
                print "Please check if someone else is using this RunControl before retrying"
                #self.send_answer("error_init")
                return False

        # Define run name using run number and start time
        self.run_name = "run_%7.7d_%s" % (
            self.run_number, time.strftime("%Y%m%d_%H%M%S", time.gmtime()))

        # Write run name to current_run file for monitoring
        with open(self.current_run_file, "w") as lf:
            lf.write("%s\n" % self.run_name)

        self.run_dir = self.daq_dir + "/runs/" + self.run_name

        self.config_dir = "%s/cfg" % self.run_dir
        self.config_file = "%s.cfg" % self.run_name
        self.config_file_head = self.run_name

        self.log_dir = "%s/log" % self.run_dir
        self.log_file_head = self.run_name

        self.stream_dir = "%s/local/streams/%s" % (self.daq_dir, self.run_name)
        self.stream_head = self.run_name

        self.rawdata_dir = "%s/%s" % (self.rawdata_root_dir, self.run_name)
        self.rawdata_head = self.run_name

        # Make sure Merger runs on a different node after each run
        self.merger_node = self.next_merger_node()

        # Configure Merger for this run
        self.runconfig_merger(self.merger)

        # Configure Trigger for this run
        self.runconfig_trigger(self.trigger)

        # Configure ADC boards for this run
        for adcboard in self.adcboard_list:
            #print "--- Configuring for run board %d"%adcboard.board_id
            self.runconfig_adcboard(adcboard)

        # Configure Level1 processes for this run
        for level1 in self.level1_list:
            self.runconfig_level1(level1)

        # If this is a real run, create it in the DB
        if (self.run_number):
            print "Creating Run %d structure in DB" % self.run_number
            if self.create_run_in_db() == "error":
                print "Run::change_run - ERROR - Cannot create Run in the DB"
                return False

        return True

    def next_merger_node(self):

        # If merger node is currently not defined, set it to first in list or localhost
        if not self.merger_node:
            if self.merger_node_list:
                return self.merger_node_list[0]
            else:
                return "localhost"

        # If the list of nodes was not defined, keep using always the same node
        if not self.merger_node_list: return self.merger_node

        # Find current node in node list and use next one
        use_next = False
        for node in self.merger_node_list:
            if use_next: return node
            if node == self.merger_node:
                use_next = True

        # Current node was the last in the list, so we restart with the first node in the list
        if use_next: return self.merger_node_list[0]

        # Handle misconfigurations (should never happen)
        print "Run::next_merger_node - WARNING: current merger host %s is not in the merger node list %s" % (
            self.merger_node, self.merger_node_list)
        print "                        Will use first node in list: %s" % self.merger_node_list[
            0]
        return self.merger_node_list[0]

    def set_default_config(self):

        # Clean up Run configuration and set all run parameters to default

        self.adcboard_list = []

        self.trigger = None

        self.merger = None

        self.level1_list = []

        self.run_number = 0
        self.run_name = "run_%7.7d_%s" % (
            self.run_number, time.strftime("%Y%m%d_%H%M%S", time.gmtime()))
        self.run_type = "TEST"
        self.run_user = "******"
        self.run_comment_start = "Generic start of run"
        self.run_comment_end = "Generic end of run"

        self.run_dir = "%s/runs/%s" % (self.daq_dir, self.run_name)

        self.config_dir = "%s/cfg" % self.run_dir
        self.config_file = "%s.cfg" % self.run_name
        self.config_file_head = self.run_name

        self.log_dir = "%s/log" % self.run_dir
        self.log_file_head = self.run_name

        self.stream_dir = "%s/local/streams/%s" % (self.daq_dir, self.run_name)
        self.stream_head = self.run_name

        self.rawdata_dir = "%s/%s" % (self.rawdata_root_dir, self.run_name)
        self.rawdata_head = self.run_name

        self.trigger_node = "localhost"

        self.merger_node = "localhost"
        self.merger_node_list = []

        self.level1_nproc = 1
        self.level1_maxevt = 10000

        self.total_daq_time = 0

    def read_setup(self):

        # Define regular expressions used in file parsing
        re_empty = re.compile("^\s*$")
        re_comment = re.compile("^\s*#")
        re_param = re.compile("^\s*(\w+)\s+(.+?)\s*$")
        re_boardid = re.compile("\d+")
        re_board_link = re.compile("^\s*(\d+)\s+(\S+)\s+(\d+)\s+(\d+)\s*$")

        # Read general run configuration from file
        setup_file = "%s/%s/run.cfg" % (self.setup_root_dir, self.setup)
        if (not os.path.isfile(setup_file)):
            print "Run - ERROR: setup file %s not found" % setup_file
            return "error"
        f = open(setup_file)
        self.boardid_list = []
        self.board_link_list = []
        for l in f:
            if (re_empty.search(l) or re_comment.search(l)): continue
            m = re_param.search(l)
            if (m):
                (p_name, p_value) = m.group(1, 2)
                if (p_name == "boardid_list"):
                    # Get sorted list of board ids while removing duplicates
                    s_boards = sorted(list(set(re_boardid.findall(p_value))),
                                      key=int)
                    for s_bid in s_boards:
                        # Convert to int as findall returns strings
                        self.boardid_list.append(int(s_bid))
                elif (p_name == "board_link"):
                    mm = re_board_link.search(p_value)
                    if (mm):
                        self.board_link_list.append(mm.group(1, 2, 3, 4))
                    else:
                        print "Run - WARNING: unable to decode board_link parameter while reading setup file %s" % (
                            setup_file, )
                        print l
                elif (p_name == "total_daq_time"):
                    self.total_daq_time = int(p_value)
                elif (p_name == "trigger_node"):
                    self.trigger_node = p_value
                #elif (p_name == "trigger_mask"):
                #    self.trigger_mask = p_value
                elif (p_name == "merger_node"):
                    self.merger_node = p_value
                elif (p_name == "merger_node_list"):
                    self.merger_node_list = p_value.split()
                elif (p_name == "level1_nproc"):
                    self.level1_nproc = int(p_value)
                elif (p_name == "level1_maxevt"):
                    self.level1_maxevt = int(p_value)
                else:
                    print "Run - WARNING: unknown parameter %s found while reading setup file %s" % (
                        p_name, setup_file)
            else:
                print "Run - WARNING: unknown line format found while reading setup file %s" % (
                    p_name, setup_file)
                print l
        f.close()

        return "ok"

    def config_list(self):

        cfg_list = []

        cfg_list.append(["user_account", self.user_account])
        cfg_list.append(["daq_dir", self.daq_dir])
        cfg_list.append(["base_port_number", self.base_port_number])
        cfg_list.append(["ssh_id_file", self.ssh_id_file])

        cfg_list.append(["daq_executable", self.daq_executable])
        cfg_list.append(["trigger_executable", self.trigger_executable])
        cfg_list.append(["merger_executable", self.merger_executable])
        cfg_list.append(["level1_executable", self.level1_executable])

        cfg_list.append(["start_file", self.start_file])
        cfg_list.append(["quit_file", self.quit_file])
        cfg_list.append(["trig_start_file", self.trig_start_file])
        cfg_list.append(["trig_stop_file", self.trig_stop_file])
        cfg_list.append(["initok_file_head", self.initok_file_head])
        cfg_list.append(["initfail_file_head", self.initfail_file_head])
        cfg_list.append(["lock_file_head", self.lock_file_head])
        cfg_list.append(["rawdata_dir", self.rawdata_dir])

        cfg_list.append(["run_number", str(self.run_number)])
        cfg_list.append(["run_name", self.run_name])
        cfg_list.append(["run_dir", self.run_dir])
        cfg_list.append(["run_type", self.run_type])
        cfg_list.append(["run_user", self.run_user])
        cfg_list.append(["run_comment_start", self.run_comment_start])
        cfg_list.append(["setup", self.setup])

        s_board_list = ""
        for b in self.boardid_list:
            if (s_board_list):
                s_board_list += " %d" % b
            else:
                s_board_list = "%d" % b
        cfg_list.append(["board_list", s_board_list])

        for b in self.boardid_list:
            for link in self.board_link_list:
                (board, host, port, node) = link
                if b == int(board):
                    board_link = "%s %s %s %s" % (board, host, port, node)
                    cfg_list.append([
                        "board_link",
                        "%s %s %s %s" % (board, host, port, node)
                    ])

        cfg_list.append(["config_dir", self.config_dir])
        cfg_list.append(["config_file", self.config_file])
        cfg_list.append(["config_file_head", self.config_file_head])

        cfg_list.append(["log_dir", self.log_dir])
        cfg_list.append(["log_file_head", self.log_file_head])

        cfg_list.append(["stream_dir", self.stream_dir])
        cfg_list.append(["stream_head", self.stream_head])

        cfg_list.append(["rawdata_dir", self.rawdata_dir])
        cfg_list.append(["rawdata_head", self.rawdata_head])

        cfg_list.append(["trigger_node", self.trigger_node])

        if self.merger_node:
            cfg_list.append(["merger_node", self.merger_node])

        if self.merger_node_list:
            cfg_list.append(
                ["merger_node_list", " ".join(self.merger_node_list)])

        cfg_list.append(["level1_nproc", str(self.level1_nproc)])
        cfg_list.append(["level1_maxevt", str(self.level1_maxevt)])

        cfg_list.append(["total_daq_time", str(self.total_daq_time)])

        return cfg_list

    def format_config(self):

        cfgstring = ""
        for cfg in self.config_list():
            cfgstring += "%-30s %s\n" % (cfg[0], cfg[1])
        return cfgstring

    def create_run_in_db(self):

        # Create run in DB
        self.db.create_run(self.run_number, self.run_name, self.run_type)
        self.db.set_run_time_create(self.run_number, self.db.now_str())
        self.db.set_run_user(self.run_number, self.run_user)
        self.db.set_run_comment_start(self.run_number, self.db.now_str(),
                                      self.run_comment_start)

        # Add all configuration parameters
        for cfg in self.config_list():
            self.db.add_cfg_para_run(self.run_number, cfg[0], cfg[1])

        # Create board structures in DB
        for adc in (self.adcboard_list):
            if adc.create_proc_daq() == "error":
                print "Run::create_run - ERROR - Cannot create DAQ process for board %d in the DB" % adc.board_id
                return "error"
            if adc.create_proc_zsup() == "error":
                print "Run::create_run - ERROR - Cannot create ZSUP process for board %d in the DB" % adc.board_id
                return "error"

        # Create Trigger structure in DB
        if self.trigger.create_trigger() == "error":
            print "Run::create_run - ERROR - Cannot create Trigger process in the DB"
            return "error"

        # Create Merger structure in DB
        if self.merger.create_merger() == "error":
            print "Run::create_run - ERROR - Cannot create Merger process in the DB"
            return "error"

        # Create Level1 structures in DB
        for lvl1 in (self.level1_list):
            if lvl1.create_level1() == "error":
                print "Run::create_run - ERROR - Cannot create Level1 process for level1 %d in the DB" % lvl1.level1_id
                return "error"

        return "ok"

    def create_log_dir(self):

        # Create log directory for this run (make sure the full tree is there)
        if not os.path.exists(self.log_dir): os.makedirs(self.log_dir, 0755)

    def write_config(self):

        # Create config directory for this run (make sure the full tree is there)
        if not os.path.exists(self.config_dir):
            os.makedirs(self.config_dir, 0755)

        print "Writing configuration file %s" % self.config_file
        f = open(self.config_dir + "/" + self.config_file, "w")
        f.write(self.format_config())
        f.close()

        print "Writing configuration file %s for Merger" % self.merger.config_file
        self.merger.write_config()

        print "Writing configuration file %s for Trigger" % self.trigger.config_file
        self.trigger.write_config()

        for adc in (self.adcboard_list):
            print "Writing configuration files %s and %s for ADC board %d" % (
                adc.config_file_daq, adc.config_file_zsup, adc.board_id)
            adc.write_config()

        for lvl1 in (self.level1_list):
            print "Writing configuration files %s for Level1 %d" % (
                lvl1.config_file, lvl1.level1_id)
            lvl1.write_config()

    def print_config(self):

        print self.format_config()

    def create_fifos(self):

        # Create stream directory and fifo file for Trigger
        if self.trigger.node_id == 0:
            if not os.path.exists(self.stream_dir):
                os.makedirs(self.stream_dir, 0755)
            os.mkfifo(self.trigger.output_stream)
        else:
            command = "ssh -i %s %s '( mkdir -p %s ; mkfifo %s )'" % (
                self.ssh_id_file, self.trigger.node_ip, self.stream_dir,
                self.trigger.output_stream)
            print command
            os.system(command)

        # Create stream directories and fifo files for ADCBoards
        for adc in (self.adcboard_list):
            if adc.node_id == 0:
                if not os.path.exists(self.stream_dir):
                    os.makedirs(self.stream_dir, 0755)
                os.mkfifo(adc.output_stream_daq)
                os.mkfifo(adc.output_stream_zsup)
            else:
                command = "ssh -i %s %s '( mkdir -p %s ; mkfifo %s %s )'" % (
                    self.ssh_id_file, adc.node_ip, self.stream_dir,
                    adc.output_stream_daq, adc.output_stream_zsup)
                print command
                os.system(command)

        # Create stream directory on Merger
        if self.merger.node_id == 0:
            if not os.path.exists(self.stream_dir):
                os.makedirs(self.stream_dir, 0755)
        else:
            command = "ssh -i %s %s '( mkdir -p %s )'" % (
                self.ssh_id_file, self.merger.node_ip, self.stream_dir)
            print command
            os.system(command)

        # Create fifo files to connect Merger to Level1 processes
        for lvl1 in (self.level1_list):
            if lvl1.node_id == 0:
                os.mkfifo(lvl1.input_stream)
            else:
                command = "ssh -i %s %s '( mkfifo %s )'" % (
                    self.ssh_id_file, lvl1.node_ip, lvl1.input_stream)
                print command
                os.system(command)

        # If Trigger process runs on a node which is different from the Merger
        # we must replicate the Trigger FIFO file on the Merger
        if self.trigger.node_id != self.merger.node_id:

            # Duplicate Trigger FIFO file on Merger node
            if self.merger.node_id == 0:
                os.mkfifo(self.trigger.output_stream)
            else:
                command = "ssh -n -i %s %s '( mkfifo %s )'" % (
                    self.ssh_id_file, self.merger.node_ip,
                    self.trigger.output_stream)
                print command
                os.system(command)

        # If an ADCBoard process runs on a node which is different from the Merger
        # we must replicate the ZSUP FIFO file on the Merger
        if self.merger.node_id == 0:

            for adc in (self.adcboard_list):
                if adc.node_id != self.merger.node_id:
                    os.mkfifo(adc.output_stream_zsup)

        else:

            # More efficient if we give a single (long) ssh command
            stream_list = ""
            for adc in (self.adcboard_list):
                if adc.node_id != self.merger.node_id:
                    stream_list += " %s" % adc.output_stream_zsup
            if stream_list:
                command = "ssh -n -i %s %s '( mkfifo %s )'" % (
                    self.ssh_id_file, self.merger.node_ip, stream_list)
                print command
                os.system(command)

    def create_receivers(self):

        # Keep track of receiver processes and handles: needed for final celeanup
        self.proc_rcv = []
        self.hand_rcv = []

        # If an ADCBoard process runs on a node which is different from the Merger
        # we create the receiving end of a network tunnel
        for adc in (self.adcboard_list):

            # Check if ADCBoard and Merger run on different nodes
            if adc.node_id != self.merger.node_id:

                # Define port for network tunnel
                port_number = self.base_port_number + adc.board_id
                print "Creating receiving end of network tunnel for board %d on port %d" % (
                    adc.board_id, port_number)

                # Define log file and open it
                log_file = "%s/%s_nc_%d_recv.log" % (
                    self.log_dir, self.log_file_head, port_number)
                log_handle = open(log_file, "w")
                self.hand_rcv.append(log_handle)

                # Open receiving end of tunnel on Merger node
                command = "nc -l -k -v --recv-only %s %d > %s < /dev/zero" % (
                    self.merger.node_ip, port_number, adc.output_stream_zsup)
                #command = "nc --udp -l -v --recv-only %s %d > %s < /dev/zero"%(self.merger.node_ip,port_number,adc.output_stream_zsup)
                if self.merger.node_id != 0:
                    command = "ssh -f -i %s %s '( %s )'" % (
                        self.ssh_id_file, self.merger.node_ip, command)
                print command
                try:
                    proc = subprocess.Popen(shlex.split(command),
                                            stdout=log_handle,
                                            stderr=subprocess.STDOUT,
                                            bufsize=1)
                    self.proc_rcv.append(proc)
                except OSError as e:
                    print "Run::create_receivers - ERROR: Execution failed: %s", e
                time.sleep(0.5)

        # If the Trigger process runs on a node which is different from the Merger
        # we create the receiving end of a network tunnel
        if self.trigger.node_id != self.merger.node_id:

            # Define port for network tunnel
            port_number = self.base_port_number + 99
            print "Creating receiving end of network tunnel for trigger on port %d" % port_number

            # Define log file and open it
            log_file = "%s/%s_nc_%d_recv.log" % (
                self.log_dir, self.log_file_head, port_number)
            log_handle = open(log_file, "w")
            self.hand_rcv.append(log_handle)

            # Open receiving end of tunnel on Merger node
            #command = "nc -l -k -v --recv-only %s %d > %s < /dev/zero"%(self.merger.node_ip,port_number,self.trigger.output_stream)
            command = "nc --udp -l -v --recv-only %s %d > %s < /dev/zero" % (
                self.merger.node_ip, port_number, self.trigger.output_stream)
            if self.merger.node_id != 0:
                command = "ssh -f -i %s %s '( %s )'" % (
                    self.ssh_id_file, self.merger.node_ip, command)
            print command
            try:
                proc = subprocess.Popen(shlex.split(command),
                                        stdout=log_handle,
                                        stderr=subprocess.STDOUT,
                                        bufsize=1)
                self.proc_rcv.append(proc)
            except OSError as e:
                print "Run::create_receivers - ERROR: Execution failed: %s", e
            time.sleep(0.5)

    def create_senders(self):

        # Keep track of sender processes and handles: needed for final celeanup
        self.proc_snd = []
        self.hand_snd = []

        # If an ADCBoard process runs on a node which is different from the Merger
        # we create the sending end of a network tunnel
        for adc in (self.adcboard_list):

            # Check if ADCBoard and Merger run on different nodes
            if adc.node_id != self.merger.node_id:

                # Define port for network tunnel
                port_number = self.base_port_number + adc.board_id
                print "Creating sending end of network tunnel for board %d on port %d" % (
                    adc.board_id, port_number)

                # Define log file and open it
                log_file = "%s/%s_nc_%d_send.log" % (
                    self.log_dir, self.log_file_head, port_number)
                log_handle = open(log_file, "w")
                self.hand_snd.append(log_handle)

                # Open sending end of tunnel on DAQ node. Add some code to wait for receiving end to appear before proceeding.
                command = "while ! nc -z %s %d ; do sleep 1 ; done ; nc -v --send-only %s %d < %s > /dev/null" % (
                    self.merger.node_ip, port_number, self.merger.node_ip,
                    port_number, adc.output_stream_zsup)
                #command = "nc -v --udp --send-only %s %d < %s > /dev/null"%(self.merger.node_ip,port_number,adc.output_stream_zsup)
                if adc.node_id != 0:
                    command = "ssh -f -i %s %s '( %s )'" % (
                        self.ssh_id_file, adc.node_ip, command)
                print command
                try:
                    proc = subprocess.Popen(shlex.split(command),
                                            stdout=log_handle,
                                            stderr=subprocess.STDOUT,
                                            bufsize=1)
                    self.proc_snd.append(proc)
                except OSError as e:
                    print "Run::create_senders - ERROR: Execution failed: %s", e
                time.sleep(0.5)

        # If the Trigger process runs on a node which is different from the Merger
        # we create the sending end of a network tunnel
        if self.trigger.node_id != self.merger.node_id:

            # Define port for network tunnel
            port_number = self.base_port_number + 99
            print "Creating sending end of network tunnel for trigger on port %d" % port_number

            # Define log file and open it
            log_file = "%s/%s_nc_%d_send.log" % (
                self.log_dir, self.log_file_head, port_number)
            log_handle = open(log_file, "w")
            self.hand_snd.append(log_handle)

            # Open sending end of tunnel on Trigger node. Add some code to wait for receiving end to appear before proceeding.
            #command = "while ! nc -z %s %d ; do sleep 1 ; done ; nc -v --send-only %s %d < %s > /dev/null"%(self.merger.node_ip,port_number,self.merger.node_ip,port_number,self.trigger.output_stream)
            command = "nc -v --udp --send-only %s %d < %s > /dev/null" % (
                self.merger.node_ip, port_number, self.trigger.output_stream)
            if adc.node_id != 0:
                command = "ssh -f -i %s %s '( %s )'" % (
                    self.ssh_id_file, self.trigger.node_ip, command)
            print command
            try:
                proc = subprocess.Popen(shlex.split(command),
                                        stdout=log_handle,
                                        stderr=subprocess.STDOUT,
                                        bufsize=1)
                self.proc_snd.append(proc)
            except OSError as e:
                print "Run::create_senders - ERROR: Execution failed: %s", e
            time.sleep(0.5)

    def create_merger_input_list(self):

        print "Creating merger input list file %s" % self.merger.input_list
        f = open(self.merger.input_list, "w")
        for adcboard in self.adcboard_list:
            f.write("%d %s\n" %
                    (adcboard.board_id, adcboard.output_stream_zsup))
        f.write("%d %s\n" % (99, self.trigger.output_stream))
        f.close()

    def create_merger_output_list(self):

        print "Creating merger output list file %s" % self.merger.output_list
        f = open(self.merger.output_list, "w")
        for lvl1 in self.level1_list:
            f.write("%s\n" % (lvl1.input_stream))
        f.close()

    def create_level1_output_dirs(self):

        for level1 in self.level1_list:
            print "Creating output dir %s for level1 %d" % (level1.output_dir,
                                                            level1.level1_id)
            if level1.node_id == 0:
                if not os.path.exists(level1.output_dir):
                    os.makedirs(level1.output_dir, 0755)
            else:
                command = "ssh -i %s %s '( mkdir -p %s )'" % (
                    self.ssh_id_file, level1.node_ip, level1.output_dir)
                print command
                os.system(command)

    def change_setup(self, setup):

        # Reset run configuration to its default values
        self.set_default_config()

        # Read new setup
        self.setup = setup
        if (self.read_setup() == "error"): return "error"

        # Create new set of ADC board processes (DAQ and ZSUP) handlers
        self.daq_nodes_id_list = []
        for b in self.boardid_list:
            print "Run - Configuring ADC board %d" % b
            adcboard = ADCBoard(b)
            self.configure_adcboard(adcboard)
            self.adcboard_list.append(adcboard)
            self.daq_nodes_id_list.append(adcboard.node_id)

        # Get unique list of DAQ nodes (needed to create start/stop files)
        self.daq_nodes_id_list = list(set(self.daq_nodes_id_list))

        # Store ip addresses of DAQ nodes in a dictionary
        self.daq_nodes_ip_list = {}
        for node_id in self.daq_nodes_id_list:
            self.daq_nodes_ip_list[node_id] = self.db.get_node_daq_ip(node_id)

        # Create new Trigger process handler
        self.trigger = Trigger()
        self.configure_trigger(self.trigger)

        # Create new Merger process handler
        self.merger = Merger()
        self.configure_merger(self.merger)

        # Create new set of Level1 process handlers
        for l in range(self.level1_nproc):
            print "Run - Configuring Level1 process %d" % l
            lvl1_proc = Level1(l)
            self.configure_level1(lvl1_proc)
            self.level1_list.append(lvl1_proc)

        return setup

    def configure_adcboard(self, adcboard):

        # Configure ADC board DAQ/ZSUP processes after changing setup

        # Reset ADC board to default configuration
        adcboard.set_default_config()

        # Set executable
        adcboard.executable = self.daq_executable

        # Lock files (will contain PID of processes)
        adcboard.lock_file_daq = "%s_b%02d_daq" % (self.lock_file_head,
                                                   adcboard.board_id)
        adcboard.lock_file_zsup = "%s_b%02d_zsup" % (self.lock_file_head,
                                                     adcboard.board_id)

        # Control files needed to start/stop DAQ (will disappear)
        adcboard.start_file = self.start_file
        adcboard.quit_file = self.quit_file

        # Status files for initialization
        adcboard.initok_file_daq = "%s_b%02d_daq" % (self.initok_file_head,
                                                     adcboard.board_id)
        adcboard.initok_file_zsup = "%s_b%02d_zsup" % (self.initok_file_head,
                                                       adcboard.board_id)
        adcboard.initfail_file_daq = "%s_b%02d_daq" % (self.initfail_file_head,
                                                       adcboard.board_id)
        adcboard.initfail_file_zsup = "%s_b%02d_zsup" % (
            self.initfail_file_head, adcboard.board_id)

        # Define board connection information
        for link in self.board_link_list:
            (board, host, port, node) = link
            if (int(board) == adcboard.board_id):
                adcboard.node_id = self.db.get_node_id(host)
                adcboard.node_ip = self.db.get_node_daq_ip(adcboard.node_id)
                adcboard.conet2_link = int(port)
                adcboard.conet2_slot = int(node)

        # Define total DAQ time (default: 0, i.e. run forever)
        # In most cases the default is what you want
        adcboard.total_daq_time = self.total_daq_time

        # Read ADC settings from board setup file
        adcboard.read_setup(self.setup)

    def configure_trigger(self, trigger):

        # Configure Trigger process after changing setup

        # Reset Trigger process handler to default configuration
        trigger.set_default_config()

        # Set executable
        trigger.executable = self.trigger_executable

        # Lock file (will contain PID of process)
        trigger.lock_file = "%s_trigger" % self.lock_file_head

        # Control files needed to start/stop trigger generation
        trigger.start_file = self.trig_start_file
        trigger.quit_file = self.trig_stop_file

        # Status files for initialization
        trigger.initok_file = "%s_trigger" % self.initok_file_head
        trigger.initfail_file = "%s_trigger" % self.initfail_file_head

        # Set node where Trigger will run
        trigger.node_id = self.db.get_node_id(self.trigger_node)
        trigger.node_ip = self.db.get_node_daq_ip(trigger.node_id)

        # Define trigger mask to use for this setup
        #trigger.trigger_mask = self.trigger_mask

        # Define total DAQ time (default: 0, i.e. run forever)
        # In most cases the default is what you want
        trigger.total_daq_time = self.total_daq_time

        # Read Trigger settings from trigger setup file
        trigger.read_setup(self.setup)

    def configure_merger(self, merger):

        # Configure Merger process after changing setup

        # Reset Merger process handler to default configuration
        merger.set_default_config()

        # Set executable
        merger.executable = self.merger_executable

    def configure_level1(self, level1):

        # Configure Level1 processes after changing setup

        # Reset Level1 process handler to default configuration
        level1.set_default_config()

        # Set executable
        level1.executable = self.level1_executable

        # Set maximum number of events to write in a single file
        level1.max_events = self.level1_maxevt

    def runconfig_adcboard(self, adcboard):

        # Configure ADC board DAQ/ZSUP processes after changing run

        adcboard.run_number = self.run_number

        if (self.run_type == "FAKE"):
            adcboard.process_mode = "FAKE"
        else:
            adcboard.process_mode = "DAQ"

        s_bid = "b%02d" % adcboard.board_id
        adcboard.config_file_daq = "%s/%s_%s_daq.cfg" % (
            self.config_dir, self.config_file_head, s_bid)
        adcboard.config_file_zsup = "%s/%s_%s_zsup.cfg" % (
            self.config_dir, self.config_file_head, s_bid)
        adcboard.log_file_daq = "%s/%s_%s_daq.log" % (
            self.log_dir, self.log_file_head, s_bid)
        adcboard.log_file_zsup = "%s/%s_%s_zsup.log" % (
            self.log_dir, self.log_file_head, s_bid)
        adcboard.output_stream_daq = "%s/%s_%s_daq" % (self.stream_dir,
                                                       self.stream_head, s_bid)
        adcboard.input_stream_zsup = adcboard.output_stream_daq
        adcboard.output_stream_zsup = "%s/%s_%s_zsup" % (
            self.stream_dir, self.stream_head, s_bid)

    def runconfig_trigger(self, trigger):

        # Configure Trigger process after changing run

        trigger.run_number = self.run_number

        trigger.config_file = "%s/%s_trigger.cfg" % (self.config_dir,
                                                     self.config_file_head)
        trigger.log_file = "%s/%s_trigger.log" % (self.log_dir,
                                                  self.log_file_head)
        trigger.output_stream = "%s/%s_trigger" % (self.stream_dir,
                                                   self.stream_head)

    def runconfig_merger(self, merger):

        # Configure Merger process after changing run

        merger.run_number = self.run_number

        # Get node_id and node_ip from DB
        merger.node_id = self.db.get_node_id(self.merger_node)
        merger.node_ip = self.db.get_node_daq_ip(merger.node_id)

        merger.config_file = "%s/%s_merger.cfg" % (self.config_dir,
                                                   self.config_file_head)
        merger.log_file = "%s/%s_merger.log" % (self.log_dir,
                                                self.log_file_head)
        merger.input_list = "%s/%s_merger_input.list" % (self.config_dir,
                                                         self.config_file_head)
        merger.output_list = "%s/%s_merger_output.list" % (
            self.config_dir, self.config_file_head)

    def runconfig_level1(self, level1):

        # Configure Level1 processes after changing run

        level1.run_number = self.run_number

        # Get node_id and node_ip from DB using Merger node
        level1.node_id = self.db.get_node_id(self.merger_node)
        level1.node_ip = self.db.get_node_daq_ip(level1.node_id)

        s_lid = "lvl1_%02d" % level1.level1_id
        level1.config_file = "%s/%s_%s.cfg" % (self.config_dir,
                                               self.config_file_head, s_lid)
        level1.log_file = "%s/%s_%s.log" % (self.log_dir, self.log_file_head,
                                            s_lid)
        level1.input_stream = "%s/%s_%s" % (self.stream_dir, self.stream_head,
                                            s_lid)

        level1.output_dir = self.rawdata_dir
        level1.output_header = "%s_%s" % (self.rawdata_head, s_lid)

    def start(self):

        # Create the "start the run" tag file on all DAQ nodes
        print "Starting DAQs"
        for node_id in self.daq_nodes_id_list:
            if (node_id == 0):
                open(self.start_file, 'w').close()
            else:
                command = "ssh -n -i %s %s '( touch %s )'" % (
                    self.ssh_id_file, self.daq_nodes_ip_list[node_id],
                    self.start_file)
                print command
                os.system(command)

        # Wait 5sec before enabling triggers
        time.sleep(5)

        # Enable triggers
        print "Enabling triggers"
        if (self.trigger.node_id) == 0:
            open(self.trig_start_file, 'w').close()
        else:
            command = "ssh -n -i %s %s '( touch %s )'" % (
                self.ssh_id_file, self.trigger.node_ip, self.trig_start_file)
            print command
            os.system(command)

        # Update run status in DB
        if (self.run_number):
            self.db.set_run_time_start(self.run_number, self.db.now_str())
            self.db.set_run_status(self.run_number,
                                   self.db.DB_RUN_STATUS_RUNNING)

    def stop(self):

        # Disable triggers
        print "Disabling triggers"
        if (self.trigger.node_id) == 0:
            open(self.trig_stop_file, 'w').close()
        else:
            command = "ssh -n -i %s %s '( touch %s )'" % (
                self.ssh_id_file, self.trigger.node_ip, self.trig_stop_file)
            print command
            os.system(command)

        # Wait 1sec before telling all processes to stop
        time.sleep(1)

        # Create the "stop the run" tag file on all DAQ nodes
        for node_id in self.daq_nodes_id_list:
            if (node_id == 0):
                open(self.quit_file, 'w').close()
            else:
                command = "ssh -n -i %s %s '( touch %s )'" % (
                    self.ssh_id_file, self.daq_nodes_ip_list[node_id],
                    self.quit_file)
                print command
                os.system(command)

        # Write run name to last_run file for monitoring
        with open(self.last_run_file, "w") as lf:
            lf.write("%s\n" % self.run_name)

        # Finalize run in DB
        if (self.run_number):
            self.db.set_run_status(self.run_number, self.final_status)
            self.db.set_run_time_stop(self.run_number, self.db.now_str())
            self.db.set_run_comment_end(self.run_number, self.db.now_str(),
                                        self.run_comment_end)

    def clean_up(self):

        # Clean up control directories at end of run
        print "Cleaning up run directories"

        # Remove the "start/stop the run" tag files on all DAQ nodes
        for node_id in self.daq_nodes_id_list:
            if (node_id == 0):
                if (os.path.exists(self.start_file)):
                    os.remove(self.start_file)
                if (os.path.exists(self.quit_file)): os.remove(self.quit_file)
            else:
                command = "ssh -n -i %s %s '( rm -f %s %s )'" % (
                    self.ssh_id_file, self.daq_nodes_ip_list[node_id],
                    self.start_file, self.quit_file)
                print command
                os.system(command)

        # Remove the "start/stop the triggers" tag files on Trigger node
        if (self.trigger.node_id == 0):
            if (os.path.exists(self.trig_start_file)):
                os.remove(self.trig_start_file)
            if (os.path.exists(self.trig_stop_file)):
                os.remove(self.trig_stop_file)
        else:
            command = "ssh -n -i %s %s '( rm -f %s %s )'" % (
                self.ssh_id_file, self.trigger.node_ip, self.trig_start_file,
                self.trig_stop_file)
            print command
            os.system(command)

        # Remove initok/initfail files for all ADC nodes
        for adc in (self.adcboard_list):
            if (adc.node_id == 0):
                if (os.path.exists(adc.initok_file_daq)):
                    os.remove(adc.initok_file_daq)
                if (os.path.exists(adc.initok_file_zsup)):
                    os.remove(adc.initok_file_zsup)
                if (os.path.exists(adc.initfail_file_daq)):
                    os.remove(adc.initfail_file_daq)
                if (os.path.exists(adc.initfail_file_zsup)):
                    os.remove(adc.initfail_file_zsup)
            else:
                command = "ssh -n -i %s %s '( rm -f %s %s %s %s)'" % (
                    self.ssh_id_file, adc.node_ip, adc.initok_file_daq,
                    adc.initok_file_zsup, adc.initfail_file_daq,
                    adc.initfail_file_zsup)
                print command
                os.system(command)

        # Remove initok/initfail files on Trigger node
        if (self.trigger.node_id == 0):
            if (os.path.exists(self.trigger.initok_file)):
                os.remove(self.trigger.initok_file)
            if (os.path.exists(self.trigger.initfail_file)):
                os.remove(self.trigger.initfail_file)
        else:
            command = "ssh -n -i %s %s '( rm -f %s %s)'" % (
                self.ssh_id_file, self.trigger.node_ip,
                self.trigger.initok_file, self.trigger.initfail_file)
            print command
            os.system(command)

        # Stop all receiving nc processes on the merger nodes (clumsy but could not find another way)
        if (self.merger.node_id == 0):
            for proc in self.proc_rcv:
                proc.terminate()
        else:
            command = "ssh -i %s %s '( ps -fu %s | grep recv-only | grep -v bash | grep -v grep | awk \"{print \$2}\" | xargs kill )'" % (
                self.ssh_id_file, self.merger.node_ip, os.getenv(
                    'USER', "daq"))
            print command
            os.system(command)

        # Now we can clean up all nc processes
        for proc in self.proc_snd:
            if proc.poll() != None:
                proc.wait()
            else:
                print "Run::clean_up - Problem closing sending nc process"
        for proc in self.proc_rcv:
            if proc.poll() != None:
                proc.wait()
            else:
                print "Run::clean_up - Problem closing receiving nc process"

        # Close all receiving/sending nc log files
        for handler in self.hand_rcv:
            handler.close()
        for handler in self.hand_snd:
            handler.close()