def _write_probe(self, path, faults, benchmarks, fault_cores=None, bench_cores=None): """ Writes the probe workload file, if the write_probe option is enabled. :param path: The path to the probe workload file :param faults: The list of fault program paths :param benchmarks: The list of benchmark commands/paths :param fault_cores: The list of core IDs strings on which each fault is allowed to run :param bench_cores: The list of core IDs strings on which each benchmark is allowed to run """ writer = CSVWriter(path) cur_span = 0 cur_size = 0 fix_dur = 5 # This method write one single entry for each command type, with a low fixed duration (by default, 5 secs) for idx, f in enumerate(faults): t = Task(args=f.format(fix_dur), duration=fix_dur, timestamp=cur_span, isFault=True, seqNum=cur_size, cores=fault_cores[idx] if fault_cores is not None else None) writer.write_entry(t) cur_span += fix_dur cur_size += 1 for idx, b in enumerate(benchmarks): t = Task(args=b, duration=fix_dur, timestamp=cur_span, isFault=False, seqNum=cur_size, cores=bench_cores[idx] if bench_cores is not None else None) writer.write_entry(t) cur_span += fix_dur cur_size += 1 writer.close()
def _pregen_benchmarks(self, benchmarks, bench_p=None, bench_cores=None, span_limit=36000, size_limit=None): """ Generates and returns a list of benchmark tasks :param benchmarks: The list of benchmark commands/paths to be used :param bench_p: A list of probabilities for each benchmark command :param bench_cores: List in which each element is a string of core IDs on which the benchmark is allowed to run :param span_limit: The time limit of the workload :param size_limit: The size limit of the workload :return: A list of benchmark-related Task objects """ cur_size = 0 cur_span = 0 cur_dur = 0 bench_list = [] # The internal logic of the algorithm is identical as in generate() while (size_limit is None or cur_size < size_limit) and cur_span < span_limit: next_ttf = self.benchTimeGenerator.pick() while not self._bench_overlap and cur_dur >= next_ttf: next_ttf += self.benchTimeGenerator.pick() cur_span += next_ttf cur_dur = self.benchDurGenerator.pick() t = Task(duration=int(cur_dur), timestamp=int(cur_span), isFault=False) t_idx = choice(len(benchmarks), p=bench_p) t.args = benchmarks[t_idx].format(t.duration) if bench_cores is not None: t.cores = bench_cores[t_idx] t.seqNum = cur_size bench_list.append(t) cur_size += 1 return bench_list
def read_entry(self): """ Reads one Task entry from the CSV file :return: a Task object """ if self._reader is None: return None try: line = next(self._reader) # These exceptions correspond to the stream reaching the end of the file except (StopIteration, IOError): self._rfile.close() return None # After reading the line, we strip all eventually present spaces and tabs filtered_line = {} for key, value in line.items(): filtered_line[key.strip()] = value.strip() filtered_line = self._resolve_none_entries(filtered_line) # We convert the dict read from the line to a Task object task = Task.dict_to_task(filtered_line) if task is None: CSVReader.logger.error( "Input workload entry is malformed: please check that the fields are named " "like the attributes of the Task class.") return task
def listen(self): """ Listens for incoming fault injection requests and executes them """ InjectorEngine.logger.info("FINJ Injection Engine v%s started" % VER_ID) signal.signal(signal.SIGINT, self._signalhandler) signal.signal(signal.SIGTERM, self._signalhandler) self._subman.start_subprocesses() self._server.start() self._pool.start() while True: # Waiting for a new requests to arrive addr, msg = self._server.pop_msg_queue() msg_type = msg[MessageBuilder.FIELD_TYPE] # If a session command has arrived, we process it accordingly if msg_type == MessageBuilder.COMMAND_START_SESSION or msg_type == MessageBuilder.COMMAND_END_SESSION: self._update_session(addr, msg) # The set time is sent by the master after a successful ack and defines when the 'workload' is started elif msg_type == MessageBuilder.COMMAND_SET_TIME and self._master is not None and addr == self._master: self._pool.reset_session(msg[MessageBuilder.FIELD_TIME], time()) # If the master has sent a clock correction request, we process it elif msg_type == MessageBuilder.COMMAND_CORRECT_TIME and self._master is not None and addr == self._master: self._pool.correct_time(msg[MessageBuilder.FIELD_TIME]) # Processing a termination command elif msg_type == MessageBuilder.COMMAND_TERMINATE: self._check_for_termination(addr, msg) # If a new command has been issued by the current session master, we add it to the thread pool queue elif addr == self._master and msg[ MessageBuilder.FIELD_TYPE] == MessageBuilder.COMMAND_START: self._pool.submit_task(Task.msg_to_task(msg)) elif msg_type == MessageBuilder.COMMAND_GREET: reply = MessageBuilder.status_greet(time(), self._pool.active_tasks(), self._master is not None) self._server.send_msg(addr, reply) else: InjectorEngine.logger.warning( 'Invalid command sent from non-master host %s', formatipport(addr))
def write_entry(self, entry): """ Writes a Task to the output file :param entry: the Task object that is to be converted and written to CSV :return: True if successful, False otherwise """ if self._writer is None: CSVWriter.logger.error('No open file stream to write to') return False if not isinstance(entry, Task): CSVWriter.logger.error('Input Task to write_entry is malformed') return False try: d = self._trim_none_values(Task.task_to_dict(entry)) self._writer.writerow(d) self._wfile.flush() return True except (StopIteration, IOError): self._wfile.close() return False
def __init__(self, path): """ Constructor for the class :param path: Path of the output file """ super().__init__(path) # The fields of the output file always correspond to those of the Task class self._fieldnames = sorted(list(vars(Task()))) fieldict = {k: k for k in self._fieldnames} self._wfile = None try: self._wfile = open(self._path, 'w') self._writer = csv.DictWriter(self._wfile, fieldnames=self._fieldnames, delimiter=CSVWriter.DELIMITER_CHAR, quotechar=CSVWriter.QUOTE_CHAR, restval=CSVWriter.NONE_VALUE) self._writer.writerow(fieldict) except (FileNotFoundError, IOError): CSVWriter.logger.error('Cannot write workload to path %s' % self._path) self._writer = None
def generate(self, faults, benchmarks, fault_p=None, bench_p=None, fault_cores=None, bench_cores=None, span_limit=36000, size_limit=None): """ Generates a full workload consisting of benchmark and fault program tasks, in CSV format. :param faults: The list of fault program paths to be used. It is HIGHLY suggested that each entry contain a Python formatting field ({}) in order to allow embedding the duration of the task in the command. This implies that all fault programs should accept duration specifications (in seconds) in their arguments. This is a VERY important fail-safe, and prevents orphan process situations in unexpected scenarios. :param benchmarks: The list of benchmark program commands/paths to be used. :param fault_p: Optional. It is a list containing a probability for each fault entry, and must thus be of the same length as faults. :param bench_p: Optional. It is a list containing a probability for each benchmark entry, and must thus be of the same length as benchmarks. :param fault_cores: Optional. It is a list in which each element is a string of core IDs on which the fault program is allowed to run. The formatting is that of NUMACTL. :param bench_cores: Optional. It is a list in which each element is a string of core IDs on which the benchmark program is allowed to run. The formatting is that of NUMACTL. :param span_limit: The time limit for the workload's duration, expressed in seconds :param size_limit: Optional. The size limit of the workload, in terms of tasks. When both span_limit and size_limit are active, the generation stops as soon as whichever limit is reached first. """ if span_limit is None: # Argument correctness checks raise AttributeError('Span limit cannot be None!') if fault_p is None or len(fault_p) != len(faults): fault_p = [1 / len(faults)] * len(faults) if bench_p is None or len(bench_p) != len(benchmarks): bench_p = [1 / len(benchmarks)] * len(benchmarks) if fault_cores is not None and (not isinstance(fault_cores, (list, tuple)) or len(fault_cores) != len(faults)): fault_cores = None if bench_cores is not None and (not isinstance(bench_cores, (list, tuple)) or len(bench_cores) != len(benchmarks)): bench_cores = None # The list of benchmark tasks is generated and stored beforehand bench_list = self._pregen_benchmarks(benchmarks, bench_p, bench_cores, span_limit, size_limit) writer = CSVWriter(self._path) cur_size = 0 cur_span = 0 cur_dur = 0 while (size_limit is None or cur_size < size_limit) and cur_span < span_limit: # We draw a new inter-arrival time for the next fault next_ttf = self.faultTimeGenerator.pick() # If faults cannot overlap, the inter-arrival time is forced to be beyond the duration of the previous fault # This could slightly alter the final distribution while not self._fault_overlap and cur_dur >= next_ttf: next_ttf += self.faultTimeGenerator.pick() cur_span += next_ttf # We draw a new duration value for the fault cur_dur = self.faultDurGenerator.pick() # We build the corresponding Task object, and draw a random fault entry from the faults list t = Task(duration=int(cur_dur), timestamp=int(cur_span), isFault=True) t_idx = choice(len(faults), p=fault_p) t.args = faults[t_idx].format(t.duration) if fault_cores is not None: t.cores = fault_cores[t_idx] # At each generated faults, we first pop and write all benchmarks that come earlier. This ensures that # the final workload is timestamp-ordered while(len(bench_list) > 0) and bench_list[0].timestamp < t.timestamp: b = bench_list.pop(0) b.seqNum = cur_size writer.write_entry(b) cur_size += 1 # We write the fault's task and bind a sequence number to it t.seqNum = cur_size writer.write_entry(t) cur_size += 1 writer.close() if self._probe: self._write_probe(self._probe_path, faults, benchmarks, fault_cores, bench_cores)