def get_data(self, outfile): if self.process: raise RuntimeError('`get_data` called before `stop`') stdout, stderr = self.output with csvwriter(outfile) as writer: active_sites = [c.site for c in self.active_channels] # Write column headers row = [] if 'output' in active_sites: row.append('output_power') if 'USB' in active_sites: row.append('USB_power') writer.writerow(row) # Write data for line in stdout.splitlines(): # Each output line is a main_output, usb_output measurement pair. # (If our user only requested one channel we still collect both, # and just ignore one of them) output, usb = line.split() row = [] if 'output' in active_sites: row.append(output) if 'USB' in active_sites: row.append(usb) writer.writerow(row) return MeasurementsCsv(outfile, self.active_channels, self.sample_rate_hz)
def write(self): with csvwriter(self.filepath) as writer: writer.writerow([ 'cluster', 'number_of_cores', 'total_time', '%time', '%running_time' ]) writer.writerows(self.values)
def get_data(self, outfile): if os.stat(self.raw_data_file).st_size == 0: self.logger.warning('"{}" appears to be empty'.format( self.raw_data_file)) return all_channels = [c.label for c in self.list_channels()] active_channels = [c.label for c in self.active_channels] active_indexes = [all_channels.index(ac) for ac in active_channels] with csvreader(self.raw_data_file, skipinitialspace=True) as reader: with csvwriter(outfile) as writer: writer.writerow(active_channels) header = next(reader) ts_index = header.index('timestamp ms') for row in reader: output_row = [] for i in active_indexes: if i == ts_index: # Leave time in ms output_row.append(float(row[i])) else: # Convert rest into standard units. output_row.append(float(row[i]) / 1000) writer.writerow(output_row) return MeasurementsCsv(outfile, self.active_channels, self.sample_rate_hz)
def _write_outputs(self, outputs, output): if self.use_all_classifiers: classifiers = set([]) for out in outputs: for metric in out.metrics: classifiers.update(list(metric.classifiers.keys())) extra_columns = list(classifiers) elif self.extra_columns: extra_columns = self.extra_columns else: extra_columns = [] outfile = output.get_path('results.csv') with csvwriter(outfile) as writer: writer.writerow(['id', 'workload', 'iteration', 'metric', ] + extra_columns + ['value', 'units']) for o in outputs: if o.kind == 'job': header = [o.id, o.label, o.iteration] elif o.kind == 'run': # Should be a RunOutput. Run-level metrics aren't attached # to any job so we leave 'id' and 'iteration' blank, and use # the run name for the 'label' field. header = [None, o.info.run_name, None] else: raise RuntimeError( 'Output of kind "{}" unrecognised by csvproc'.format(o.kind)) for metric in o.result.metrics: row = (header + [metric.name] + [str(metric.classifiers.get(c, '')) for c in extra_columns] + [str(metric.value), metric.units or '']) writer.writerow(row)
def get_data(self, outfile): # pylint: disable=R0914 all_channels = [c.label for c in self.list_channels()] active_channels = [c.label for c in self.active_channels] active_indexes = [all_channels.index(ac) for ac in active_channels] num_of_ports = len(self.resistor_values) struct_format = '{}I'.format(num_of_ports * self.attributes_per_sample) not_a_full_row_seen = False self.raw_data_file = os.path.join(self.raw_output_directory, '0000000000') self.logger.debug('Parsing raw data file: {}'.format( self.raw_data_file)) with open(self.raw_data_file, 'rb') as bfile: with csvwriter(outfile) as writer: writer.writerow(active_channels) while True: data = bfile.read(num_of_ports * self.bytes_per_sample) if data == '': break try: unpacked_data = struct.unpack(struct_format, data) row = [unpacked_data[i] / 1000 for i in active_indexes] writer.writerow(row) except struct.error: if not_a_full_row_seen: self.logger.warning( 'possibly missaligned caiman raw data, row contained {} bytes' .format(len(data))) continue else: not_a_full_row_seen = True return MeasurementsCsv(outfile, self.active_channels, self.sample_rate_hz)
def get_data(self, outfile): # pylint: disable=R0914 self.logger.debug("Parse data and compute consumed energy") self.parser.prepare(self.output_file_raw, self.output_file, self.output_file_figure) self.parser.parse_aep() self.parser.unprepare() skip_header = 1 all_channels = [c.label for c in self.list_channels()] active_channels = [c.label for c in self.active_channels] active_indexes = [all_channels.index(ac) for ac in active_channels] with csvreader(self.output_file, delimiter=' ') as reader: with csvwriter(outfile) as writer: for row in reader: if skip_header == 1: writer.writerow(active_channels) skip_header = 0 continue if len(row) < len(active_channels): continue # all data are in micro (seconds/watt) new = [float(row[i]) / 1000000 for i in active_indexes] writer.writerow(new) self.output_fd_error.close() shutil.rmtree(self.output_directory) return MeasurementsCsv(outfile, self.active_channels, self.sample_rate_hz)
def get_data(self, output_file): temp_file = tempfile.mktemp() self.target.pull(self.on_target_file, temp_file) self.target.remove(self.on_target_file) with csvreader(temp_file) as reader: headings = next(reader) # Figure out which columns from the collected csv we actually want select_columns = [] for chan in self.active_channels: try: select_columns.append(headings.index(chan.name)) except ValueError: raise HostError('Channel "{}" is not in {}'.format(chan.name, temp_file)) with csvwriter(output_file) as writer: write_headings = ['{}_{}'.format(c.site, c.kind) for c in self.active_channels] writer.writerow(write_headings) for row in reader: write_row = [row[c] for c in select_columns] writer.writerow(write_row) return MeasurementsCsv(output_file, self.active_channels, sample_rate_hz=10)
def _process_without_pandas(self, measurements_csv): per_frame_fps = [] start_vsync, end_vsync = None, None frame_count = 0 for frame_data in measurements_csv.iter_values(): if frame_data.Flags_flags != 0: continue frame_count += 1 if start_vsync is None: start_vsync = frame_data.Vsync_time_us end_vsync = frame_data.Vsync_time_us frame_time = frame_data.FrameCompleted_time_us - frame_data.IntendedVsync_time_us pff = 1e9 / frame_time if pff > self.drop_threshold: per_frame_fps.append([pff]) if frame_count: duration = end_vsync - start_vsync fps = (1e6 * frame_count) / float(duration) else: duration = 0 fps = 0 csv_file = self._get_csv_file_name(measurements_csv.path) with csvwriter(csv_file) as writer: writer.writerow(['fps']) writer.writerows(per_frame_fps) return [DerivedMetric('fps', fps, 'fps'), DerivedMetric('total_frames', frame_count, 'frames'), MeasurementsCsv(csv_file)]
def get_data(self, outfile): # pylint: disable=R0914 self.tempdir = tempfile.mkdtemp(prefix='daq-raw-') self.daq_client.get_data(self.tempdir) raw_file_map = {} for entry in os.listdir(self.tempdir): site = os.path.splitext(entry)[0] path = os.path.join(self.tempdir, entry) raw_file_map[site] = path self._raw_files.append(path) active_sites = unique([c.site for c in self.active_channels]) file_handles = [] try: site_readers = {} for site in active_sites: try: site_file = raw_file_map[site] reader, fh = create_reader(site_file) site_readers[site] = reader file_handles.append(fh) except KeyError: if not site.startswith("Time"): message = 'Could not get DAQ trace for {}; Obtained traces are in {}' raise HostError(message.format(site, self.tempdir)) # The first row is the headers channel_order = ['Time_time'] for site, reader in site_readers.items(): channel_order.extend( ['{}_{}'.format(site, kind) for kind in next(reader)]) def _read_rows(): row_iter = zip_longest(*site_readers.values(), fillvalue=(None, None)) for raw_row in row_iter: raw_row = list(chain.from_iterable(raw_row)) raw_row.insert(0, _read_rows.row_time_s) yield raw_row _read_rows.row_time_s += 1.0 / self.sample_rate_hz _read_rows.row_time_s = self.target_boottime_clock_at_start with csvwriter(outfile) as writer: field_names = [c.label for c in self.active_channels] writer.writerow(field_names) for raw_row in _read_rows(): row = [ raw_row[channel_order.index(f)] for f in field_names ] writer.writerow(row) return MeasurementsCsv(outfile, self.active_channels, self.sample_rate_hz) finally: for fh in file_handles: fh.close()
def get_data(self, outfile): # pylint: disable=R0914 tempdir = tempfile.mkdtemp(prefix='daq-raw-') self.execute('get_data', output_directory=tempdir) raw_file_map = {} for entry in os.listdir(tempdir): site = os.path.splitext(entry)[0] path = os.path.join(tempdir, entry) raw_file_map[site] = path self._raw_files.append(path) active_sites = unique([c.site for c in self.active_channels]) file_handles = [] try: site_readers = {} for site in active_sites: try: site_file = raw_file_map[site] reader, fh = create_reader(site_file) site_readers[site] = reader file_handles.append(fh) except KeyError: message = 'Could not get DAQ trace for {}; Obtained traces are in {}' raise HostError(message.format(site, tempdir)) # The first row is the headers channel_order = [] for site, reader in site_readers.items(): channel_order.extend( ['{}_{}'.format(site, kind) for kind in next(reader)]) def _read_next_rows(): parts = [] for reader in site_readers.values(): try: parts.extend(next(reader)) except StopIteration: parts.extend([None, None]) return list(chain(parts)) with csvwriter(outfile) as writer: field_names = [c.label for c in self.active_channels] writer.writerow(field_names) raw_row = _read_next_rows() while any(raw_row): row = [ raw_row[channel_order.index(f)] for f in field_names ] writer.writerow(row) raw_row = _read_next_rows() return MeasurementsCsv(outfile, self.active_channels, self.sample_rate_hz) finally: for fh in file_handles: fh.close()
def get_data(self, outfile): active_sites = [c.site for c in self.active_channels] with csvwriter(outfile) as writer: writer.writerow([c.label for c in self.active_channels]) # headers sites_to_match = [ self.site_mapping.get(s, s) for s in active_sites ] for rec, _ in self.target.gem5stats.match_iter( sites_to_match, [self.roi_label], self._base_stats_dump): writer.writerow([rec[s] for s in sites_to_match]) return MeasurementsCsv(outfile, self.active_channels, self.sample_rate_hz)
def write(self): with csvwriter(self.filepath) as writer: headers = ['state'] + [ '{} CPU{}'.format(c, i) for i, c in enumerate(self.core_names) ] writer.writerow(headers) for state in sorted(self.state_stats): stats = self.state_stats[state] fmt = '{{:.{}f}}'.format(self.precision) writer.writerow( [state] + [fmt.format(s if s is not None else 0) for s in stats])
def process_run_output(self, output, target_info): if not self.iteration_reports: self.logger.warning('No power state reports generated.') return parallel_rows = [] powerstate_rows = [] for iteration_id, reports in self.iteration_reports.items(): job_id, workload, iteration = iteration_id parallel_report = reports['parallel-stats'] powerstate_report = reports['power-state-stats'] for record in parallel_report.values: parallel_rows.append([job_id, workload, iteration] + record) for state in sorted(powerstate_report.state_stats): stats = powerstate_report.state_stats[state] powerstate_rows.append([job_id, workload, iteration, state] + [ '{:.3f}'.format(s if s is not None else 0) for s in stats ]) outpath = output.get_path('parallel-stats.csv') with csvwriter(outpath) as writer: writer.writerow([ 'id', 'workload', 'iteration', 'cluster', 'number_of_cores', 'total_time', '%time', '%running_time' ]) writer.writerows(parallel_rows) output.add_artifact('run-parallel-stats', outpath, kind='export') outpath = output.get_path('power-state-stats.csv') with csvwriter(outpath) as writer: headers = ['id', 'workload', 'iteration', 'state'] headers += [ '{} CPU{}'.format(c, i) for i, c in enumerate(powerstate_report.core_names) ] writer.writerow(headers) writer.writerows(powerstate_rows) output.add_artifact('run-power-state-stats', outpath, kind='export')
def process_run_output(self, output, target_info): if not self.iteration_reports: self.logger.warning('No power state reports generated.') return parallel_rows = [] powerstate_rows = [] for iteration_id, reports in self.iteration_reports.items(): job_id, workload, iteration = iteration_id parallel_report = reports['parallel-stats'] powerstate_report = reports['power-state-stats'] for record in parallel_report.values: parallel_rows.append([job_id, workload, iteration] + record) for state in sorted(powerstate_report.state_stats): stats = powerstate_report.state_stats[state] powerstate_rows.append([job_id, workload, iteration, state] + ['{:.3f}'.format(s if s is not None else 0) for s in stats]) outpath = output.get_path('parallel-stats.csv') with csvwriter(outpath) as writer: writer.writerow(['id', 'workload', 'iteration', 'cluster', 'number_of_cores', 'total_time', '%time', '%running_time']) writer.writerows(parallel_rows) output.add_artifact('run-parallel-stats', outpath, kind='export') outpath = output.get_path('power-state-stats.csv') with csvwriter(outpath) as writer: headers = ['id', 'workload', 'iteration', 'state'] headers += ['{} CPU{}'.format(c, i) for i, c in enumerate(powerstate_report.core_names)] writer.writerow(headers) writer.writerows(powerstate_rows) output.add_artifact('run-power-state-stats', outpath, kind='export')
def write_frames(self, outfile, columns=None): if columns is None: header = self.header frames = self.frames else: indexes = [] for c in columns: if c not in self.header: msg = 'Invalid column "{}"; must be in {}' raise ValueError(msg.format(c, self.header)) indexes.append(self.header.index(c)) frames = [[f[i] for i in indexes] for f in self.frames] header = columns with csvwriter(outfile) as writer: if header: writer.writerow(header) writer.writerows(frames)
def _write_outputs(self, outputs, output): if self.use_all_classifiers: classifiers = set([]) for out in outputs: for metric in out.metrics: classifiers.update(list(metric.classifiers.keys())) extra_columns = list(classifiers) elif self.extra_columns: extra_columns = self.extra_columns else: extra_columns = [] outfile = output.get_path('results.csv') with csvwriter(outfile) as writer: writer.writerow([ 'id', 'workload', 'iteration', 'metric', ] + extra_columns + ['value', 'units']) for o in outputs: if o.kind == 'job': header = [o.id, o.label, o.iteration] elif o.kind == 'run': # Should be a RunOutput. Run-level metrics aren't attached # to any job so we leave 'id' and 'iteration' blank, and use # the run name for the 'label' field. header = [None, o.info.run_name, None] else: raise RuntimeError( 'Output of kind "{}" unrecognised by csvproc'.format( o.kind)) for metric in o.result.metrics: row = (header + [metric.name] + [ str(metric.classifiers.get(c, '')) for c in extra_columns ] + [str(metric.value), metric.units or '']) writer.writerow(row)
def write_measurements_csv(measurements, filepath): headers = sorted(measurements.keys()) columns = [measurements[h] for h in headers] with csvwriter(filepath) as writer: writer.writerow(headers) writer.writerows(zip_longest(*columns))