def __init__(self): ''' Depman initialization ''' # parse arguments if len(sys.argv) < 8: sys.exit("Please provide enough arguments (nue, hostfile, restart executable, executable, parameters)"); offset = 1 # argument offset self.fault_injection = False if sys.argv[1] == '-i': ''' Fault Injection mode ''' self.fault_injection = True #print('self injection True') offset = 2 if sys.argv[offset] == '-np': self.num_cores = int(sys.argv[offset+1]) #print('num_cores=',self.num_cores) else: print "ERROR: -np argument not specified" sys.exit(1) #if sys.argv[offset+2] == '-f': # self.hostfile = sys.argv[offset+3] #else: # print "ERROR: -f argument not specified" # sys.exit(1) if not self.scc_env_check(): print "ERROR: SCCKit not found in PATH" sys.exit(1) # set executables self.restart_exec = simrun_path+sys.argv[offset+2] temp_exec=simrun_path+sys.argv[offset+3] #print('temp_exec=',temp_exec) self.exec_list = [temp_exec]+sys.argv[(offset+4):] #print('restart exec=',self.restart_exec) #print('exec_list=',self.exec_list) self.cells = int(sys.argv[offset+4]) * int(sys.argv[offset+5]) self.update_cellcount() #print('cells=',self.cells) #print('cellcount=',self.cellcount) # configure logging logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', \ datefmt='%d/%m/%Y %I:%M:%S %p', \ filename='infoli.log', filemode='w', \ level=logging.DEBUG) # set simulation directory as attribute self.sim_dir = sim_dump_location self.prev_globalmax = 0 # (infoli-specific) previous maximum recoverable simulation step #self min step was 120000 for infoli self.min_step = 0 self.checkpoints = [] # locations of checkpoints # The depman lock is held by the master thread while a simulation is running self.lock = Lock() # start simulation and create the diagnostics self.simrun(self.exec_list) sleep(4) #wait for the task to be initially spawned at the SCC TODO: no error detection at this time #creating DVFS object self.dvfs=dvfs(self) self.diagnostics = [] if 'processExit' in diagnostics: self.diagnostics.append(processExit(self)) #print(self.diagnostics) # Initialize the countermeasure procedure self.current_counter_proc = [] # Initialize MTTF and MTTR estimation self.timestamp = 0 # initialized for when no diagnostics ever fail self.mttf_values = deque([], moving_avg_N) self.mttr_values = [] self.failure_timestamp = time() # Start the fault injection manager if requested if self.fault_injection: self.injector = injectorManager(self.diagnostics) logging.info("Fault Injection module initialized") # Set the killfoli sigint handler signal(SIGINT, self.sigint_handler)
def __init__(self): ''' Depman initialization ''' # parse arguments if len(sys.argv) < 8: sys.exit("Please provide enough arguments (nue, hostfile, restart executable, executable, parameters)"); offset = 1 # argument offset self.fault_injection = False if sys.argv[1] == '-i': ''' Fault Injection mode ''' self.fault_injection = True offset = 2 if sys.argv[offset] == '-nue': self.num_cores = int(sys.argv[offset+1]) else: print "ERROR: -nue argument not specified" sys.exit(1) if sys.argv[offset+2] == '-f': self.hostfile = sys.argv[offset+3] else: print "ERROR: -f argument not specified" sys.exit(1) if not self.scc_env_check(): print "ERROR: SCCKit not found in PATH" sys.exit(1) # set executables self.restart_exec = sys.argv[offset+4] self.exec_list = sys.argv[(offset+5):] # parse initial list of cores from the hosts file self.hostfd = open(os.path.join(os.getcwd(), self.hostfile), 'r') core_list = filter(None, self.hostfd.read().splitlines()) self.initial_cores = map(lambda s:'rck'+s.strip(), core_list) self.hostfd.close() self.cores = self.initial_cores[:] self.cells = int(sys.argv[offset+6]) * int(sys.argv[offset+7]) self.update_cellcount() if len(self.cores) < self.num_cores: print "ERROR: less cores in host file than requested" sys.exit(1) elif len(self.cores) > self.num_cores: print "WARNING: hostfile contains more cores than requested, some may not be used" self.cores = self.cores[self.num_cores:] # configure logging logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', \ datefmt='%d/%m/%Y %I:%M:%S %p', \ filename='infoli.log', filemode='w', \ level=logging.DEBUG) # set simulation directory as attribute self.sim_dir = sim_dump_location self.prev_globalmax = 0 # (infoli-specific) previous maximum recoverable simulation step #self min step was 120000 for infoli self.min_step = 0 self.checkpoints = [] # locations of checkpoints # create the safe location if it doesnt exist if not os.path.exists(safe_location): try: os.makedirs(safe_location) except OSError: logging.error("Error during safe location creation") exit(1) else: logging.info("Safe location directory successfully created") # The depman lock is held by the master thread while a simulation is running self.lock = Lock() # start simulation and create the diagnostics self.rccerun(self.exec_list) sleep(4) #wait for the task to be initially spawned at the SCC TODO: no error detection at this time #creating DVFS object self.dvfs=dvfs(self) call('./execute_sccBmc.sh',stdout=PIPE); #initialize voltage files for the DVFS procedure self.diagnostics = [] if 'processExit' in diagnostics: self.diagnostics.append(processExit(self)) print(self.diagnostics) # Initialize the countermeasure procedure self.current_counter_proc = [] # Initialize MTTF and MTTR estimation self.timestamp = 0 # initialized for when no diagnostics ever fail self.mttf_values = deque([], moving_avg_N) self.mttr_values = [] self.failure_timestamp = time() # Start the fault injection manager if requested if self.fault_injection: self.injector = injectorManager(self.diagnostics) logging.info("Fault Injection module initialized") # Set the killfoli sigint handler signal(SIGINT, self.sigint_handler) print('run monitor3V3SCC') monitor='./monitor3V3SCC' self.monitorEnergy=Popen(shlex.split(monitor))