Beispiel #1
0
    def __init__(self):
        ''' Depman initialization '''
        # parse arguments
        if len(sys.argv) < 8:
            sys.exit("Please provide enough arguments (nue, hostfile, restart executable, executable, parameters)");

        offset = 1  # argument offset

        self.fault_injection = False

        if sys.argv[1] == '-i':
            ''' Fault Injection mode '''
            self.fault_injection = True
            #print('self injection True')
            offset = 2

        if sys.argv[offset] == '-np':
            self.num_cores = int(sys.argv[offset+1])
            #print('num_cores=',self.num_cores)
        else:
            print "ERROR: -np argument not specified"
            sys.exit(1)

        #if sys.argv[offset+2] == '-f':
        #    self.hostfile = sys.argv[offset+3]
        #else:
        #    print "ERROR: -f argument not specified"
        #    sys.exit(1)

        if not self.scc_env_check():
            print "ERROR: SCCKit not found in PATH"
            sys.exit(1)

        # set executables
        self.restart_exec = simrun_path+sys.argv[offset+2]
        temp_exec=simrun_path+sys.argv[offset+3]
        #print('temp_exec=',temp_exec)
        self.exec_list = [temp_exec]+sys.argv[(offset+4):]
        #print('restart exec=',self.restart_exec)
        #print('exec_list=',self.exec_list)

        self.cells = int(sys.argv[offset+4]) * int(sys.argv[offset+5])
        self.update_cellcount()

        #print('cells=',self.cells)
        #print('cellcount=',self.cellcount)

        # configure logging
        logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', \
                            datefmt='%d/%m/%Y %I:%M:%S %p', \
                            filename='infoli.log', filemode='w', \
                            level=logging.DEBUG)

        # set simulation directory as attribute
        self.sim_dir = sim_dump_location

        self.prev_globalmax = 0     # (infoli-specific) previous maximum recoverable simulation step
	#self min step was 120000 for infoli
        self.min_step = 0
        self.checkpoints = []     # locations of checkpoints

        # The depman lock is held by the master thread while a simulation is running
        self.lock = Lock()

        # start simulation and create the diagnostics
        self.simrun(self.exec_list)
	
        sleep(4) #wait for the task to be initially spawned at the SCC TODO: no error detection at this time
	
        #creating DVFS object
        self.dvfs=dvfs(self)

        self.diagnostics = []
        if 'processExit' in diagnostics:
            self.diagnostics.append(processExit(self))

    	#print(self.diagnostics)

        # Initialize the countermeasure procedure
        self.current_counter_proc = []

        # Initialize MTTF and MTTR estimation
        self.timestamp = 0  # initialized for when no diagnostics ever fail
        self.mttf_values = deque([], moving_avg_N)
        self.mttr_values = []
        self.failure_timestamp = time()

        # Start the fault injection manager if requested
        if self.fault_injection:
            self.injector = injectorManager(self.diagnostics)
            logging.info("Fault Injection module initialized")
	
        # Set the killfoli sigint handler
        signal(SIGINT, self.sigint_handler)
Beispiel #2
0
    def __init__(self):
        ''' Depman initialization '''
        # parse arguments
        if len(sys.argv) < 8:
            sys.exit("Please provide enough arguments (nue, hostfile, restart executable, executable, parameters)");

        offset = 1  # argument offset

        self.fault_injection = False

        if sys.argv[1] == '-i':
            ''' Fault Injection mode '''
            self.fault_injection = True
            offset = 2

        if sys.argv[offset] == '-nue':
            self.num_cores = int(sys.argv[offset+1])
        else:
            print "ERROR: -nue argument not specified"
            sys.exit(1)

        if sys.argv[offset+2] == '-f':
            self.hostfile = sys.argv[offset+3]
        else:
            print "ERROR: -f argument not specified"
            sys.exit(1)

        if not self.scc_env_check():
            print "ERROR: SCCKit not found in PATH"
            sys.exit(1)

        # set executables
        self.restart_exec = sys.argv[offset+4]
        self.exec_list = sys.argv[(offset+5):]

        # parse initial list of cores from the hosts file
        self.hostfd = open(os.path.join(os.getcwd(), self.hostfile), 'r')
        core_list = filter(None, self.hostfd.read().splitlines())
        self.initial_cores = map(lambda s:'rck'+s.strip(), core_list)
        self.hostfd.close()
        self.cores = self.initial_cores[:]

        self.cells = int(sys.argv[offset+6]) * int(sys.argv[offset+7])
        self.update_cellcount()

        if len(self.cores) < self.num_cores:
            print "ERROR: less cores in host file than requested"
            sys.exit(1)
        elif len(self.cores) > self.num_cores:
            print "WARNING: hostfile contains more cores than requested, some may not be used"
            self.cores = self.cores[self.num_cores:]

        # configure logging
        logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', \
                            datefmt='%d/%m/%Y %I:%M:%S %p', \
                            filename='infoli.log', filemode='w', \
                            level=logging.DEBUG)

        # set simulation directory as attribute
        self.sim_dir = sim_dump_location

        self.prev_globalmax = 0     # (infoli-specific) previous maximum recoverable simulation step
#self min step was 120000 for infoli
        self.min_step = 0
        self.checkpoints = []     # locations of checkpoints

        # create the safe location if it doesnt exist
        if not os.path.exists(safe_location):
            try:
                os.makedirs(safe_location)
            except OSError:
                logging.error("Error during safe location creation")
                exit(1)
            else:
                logging.info("Safe location directory successfully created")

        # The depman lock is held by the master thread while a simulation is running
        self.lock = Lock()

        # start simulation and create the diagnostics
        self.rccerun(self.exec_list)

        sleep(4) #wait for the task to be initially spawned at the SCC TODO: no error detection at this time

        #creating DVFS object
        self.dvfs=dvfs(self)
        call('./execute_sccBmc.sh',stdout=PIPE); #initialize voltage files for the DVFS procedure

        self.diagnostics = []
        if 'processExit' in diagnostics:
            self.diagnostics.append(processExit(self))

    	print(self.diagnostics)
        # Initialize the countermeasure procedure
        self.current_counter_proc = []

        # Initialize MTTF and MTTR estimation
        self.timestamp = 0  # initialized for when no diagnostics ever fail
        self.mttf_values = deque([], moving_avg_N)
        self.mttr_values = []
        self.failure_timestamp = time()

        # Start the fault injection manager if requested
        if self.fault_injection:
            self.injector = injectorManager(self.diagnostics)
            logging.info("Fault Injection module initialized")

        # Set the killfoli sigint handler
        signal(SIGINT, self.sigint_handler)
        print('run monitor3V3SCC')
        monitor='./monitor3V3SCC'
        self.monitorEnergy=Popen(shlex.split(monitor))