def read_json_file(path, progress_bar=DUMMY_PROGRESS): # read lines from json file with open(path, "r") as inputfile: try: json_data = json.load(inputfile) except JSONDecodeError as error: inputfile.seek(0) is_jsonlines = any(line.strip().startswith('{') for line in inputfile) and \ all(line.strip().startswith('{') or line.strip() == "" for line in inputfile) if is_jsonlines: return read_jsonlines_file(path, progress_bar=DUMMY_PROGRESS) else: raise FileFormatError(str(error)) from error # create an experiment object to save the date loaded from the text file experiment = Experiment() if "callpaths" not in json_data: try: _read_new_json_file(experiment, json_data, progress_bar) except KeyError as err: raise FileFormatError(str(err)) from err else: try: _read_legacy_json_file(experiment, json_data, progress_bar) except KeyError as err: raise FileFormatError(str(err)) from err call_tree = create_call_tree(experiment.callpaths, progress_bar) experiment.call_tree = call_tree io_helper.validate_experiment(experiment, progress_bar) return experiment
def read_talpas_file(path, progress_bar=DUMMY_PROGRESS): # create an experiment object to save the date loaded from the text file experiment = Experiment() complete_data = {} parameters = None progress_bar.total += os.path.getsize(path) # read talpas file into complete_data with open(path) as file: progress_bar.step('Reading file') for ln, line in enumerate(file): progress_bar.update(len(line)) if line.isspace(): continue line = line.replace(';', ',') try: data = json.loads(line) except JSONDecodeError as error: raise FileFormatError( f'Decoding of line {ln} failed: {str(error).replace(",", ";")}. Line: "{line}"' ) try: key = Callpath(data['callpath']), Metric(data['metric']) if parameters is None: parameters = [ Parameter(p) for p in data['parameters'].keys() ] coordinate = Coordinate(data['parameters'][p.name] for p in parameters) io_helper.append_to_repetition_dict(complete_data, key, coordinate, data['value'], progress_bar) except KeyError as error: raise FileFormatError( f'Missing property in line {ln}: {str(error)}. Line: "{line}"' ) # create experiment io_helper.repetition_dict_to_experiment(complete_data, experiment, progress_bar) for p in parameters: experiment.add_parameter(p) call_tree = create_call_tree(experiment.callpaths, progress_bar) experiment.call_tree = call_tree io_helper.validate_experiment(experiment, progress_bar) return experiment
def read_cube_file(dir_name, scaling_type, pbar=DUMMY_PROGRESS, selected_metrics=None): # read the paths of the cube files in the given directory with dir_name path = Path(dir_name) if not path.is_dir(): raise FileFormatError( f'Cube file path must point to a directory: {dir_name}') cubex_files = list(path.glob('*/[!.]*.cubex')) if not cubex_files: raise FileFormatError(f'No cube files were found in: {dir_name}') pbar.total += len(cubex_files) + 6 # iterate over all folders and read the cube profiles in them experiment = Experiment() pbar.step("Reading cube files") parameter_names_initial = [] parameter_names = [] parameter_values = [] parameter_dict = defaultdict(set) progress_step_size = 5 / len(cubex_files) for path_id, path in enumerate(cubex_files): pbar.update(progress_step_size) folder_name = path.parent.name logging.debug(f"Cube file: {path} Folder: {folder_name}") # create the parameters par_start = folder_name.find(".") + 1 par_end = folder_name.find(".r") par_end = None if par_end == -1 else par_end parameters = folder_name[par_start:par_end] # parameters = folder_name.split(".") # set scaling flag for experiment if path_id == 0: if scaling_type == "weak" or scaling_type == "strong": experiment.scaling = scaling_type param_list = re.split('([0-9.,]+)', parameters) param_list.remove("") parameter_names = [n for i, n in enumerate(param_list) if i % 2 == 0] parameter_value = [ float(n.replace(',', '.').rstrip('.')) for i, n in enumerate(param_list) if i % 2 == 1 ] # check if parameter already exists if path_id == 0: parameter_names_initial = parameter_names elif parameter_names != parameter_names_initial: raise FileFormatError( f"Parameters must be the same and in the same order: {parameter_names} is not {parameter_names_initial}." ) for n, v in zip(parameter_names, parameter_value): parameter_dict[n].add(v) parameter_values.append(parameter_value) # determine non-constant parameters and add them to experiment parameter_selection_mask = [] for i, p in enumerate(parameter_names): if len(parameter_dict[p]) > 1: experiment.add_parameter(Parameter(p)) parameter_selection_mask.append(i) # check number of parameters, if > 1 use weak scaling instead # since sum values for strong scaling does not work for more than 1 parameter if scaling_type == 'strong' and len(experiment.parameters) > 1: warnings.warn( "Strong scaling only works for one parameter. Using weak scaling instead." ) scaling_type = 'weak' experiment.scaling = scaling_type pbar.step("Reading cube files") show_warning_skipped_metrics = set() aggregated_values = defaultdict(list) # import data from cube files # optimize import memory usage by reordering files and grouping by coordinate num_points = 0 reordered_files = sorted(zip(cubex_files, parameter_values), key=itemgetter(1)) for parameter_value, point_group in groupby(reordered_files, key=itemgetter(1)): num_points += 1 # create coordinate coordinate = Coordinate(parameter_value[i] for i in parameter_selection_mask) experiment.add_coordinate(coordinate) aggregated_values.clear() for path, _ in point_group: pbar.update() with CubexParser(str(path)) as parsed: callpaths = make_callpath_mapping(parsed.get_root_cnodes()) # iterate over all metrics for cube_metric in parsed.get_metrics(): pbar.update(0) # NOTE: here we could choose which metrics to extract if selected_metrics and cube_metric.name not in selected_metrics: continue try: metric_values = parsed.get_metric_values( metric=cube_metric, cache=False) # create the metrics metric = Metric(cube_metric.name) for cnode_id in metric_values.cnode_indices: pbar.update(0) cnode = parsed.get_cnode(cnode_id) callpath = callpaths[cnode_id] # NOTE: here we can use clustering algorithm to select only certain node level values # create the measurements cnode_values = metric_values.cnode_values( cnode, convert_to_exclusive=True) # in case of weak scaling calculate mean and median over all mpi process values if scaling_type == "weak": # do NOT use generator it is slower aggregated_values[(callpath, metric)].extend( map(float, cnode_values)) # in case of strong scaling calculate the sum over all mpi process values elif scaling_type == "strong": aggregated_values[(callpath, metric)].append( float(sum(cnode_values))) # Take care of missing metrics except MissingMetricError as e: # @UnusedVariable show_warning_skipped_metrics.add(e.metric.name) logging.info( f'The cubex file {Path(*path.parts[-2:])} does not contain data for the metric "{e.metric.name}"' ) # add measurements to experiment for (callpath, metric), values in aggregated_values.items(): pbar.update(0) experiment.add_measurement( Measurement(coordinate, callpath, metric, values)) pbar.step("Unify calltrees") to_delete = [] # determine common callpaths for common calltree # add common callpaths and metrics to experiment for key, value in pbar(experiment.measurements.items(), len(experiment.measurements), scale=0.1): if len(value) < num_points: to_delete.append(key) else: (callpath, metric) = key experiment.add_callpath(callpath) experiment.add_metric(metric) for key in to_delete: pbar.update(0) del experiment.measurements[key] # determine calltree call_tree = io_helper.create_call_tree(experiment.callpaths, pbar, progress_scale=0.1) experiment.call_tree = call_tree if show_warning_skipped_metrics: warnings.warn( "The following metrics were skipped because they contained no data: " f"{', '.join(show_warning_skipped_metrics)}. For more details see log." ) io_helper.validate_experiment(experiment, pbar) pbar.update() return experiment
def read_extrap3_experiment(path, progress_bar=DUMMY_PROGRESS): progress_bar.total += os.path.getsize(path) with open(path, "rb") as file: ioHelper = IoHelper(file) try: qualifier = ioHelper.readString() if qualifier != "EXTRAP_EXPERIMENT": raise FileFormatError( "This is not an Extra-P 3 Experiment File. Qualifier was " + str(qualifier)) except struct.error as err: raise FileFormatError( "This is not an Extra-P 3 Experiment File.") from err try: exp = Experiment() id_mappings = _Mappings() versionNumber = ioHelper.readString() prefix = ioHelper.readString() progress_bar.step('Load Extra-P 3 experiment') last_pos = 0 is_sparse = __Ref(False) while prefix: pos = file.tell() progress_bar.update(pos - last_pos) last_pos = pos # logging.debug("Deserialize " + str(prefix)) # noinspection PyNoneFunctionAssignment if prefix == 'Parameter': p = deserialize_parameter(id_mappings, ioHelper) exp.add_parameter(p) elif prefix == 'Metric': m = deserialize_metric(ioHelper) SAFE_RETURN_None(m) exp.add_metric(m) elif prefix == 'Region': deserialize_region(id_mappings, ioHelper) elif prefix == 'Callpath': c = deserialize_callpath(id_mappings, ioHelper) SAFE_RETURN_None(c) exp.add_callpath(c) progress_bar.total += 100 elif prefix == 'Coordinate': c = deserialize_coordinate(exp, id_mappings, ioHelper) SAFE_RETURN_None(c) exp.add_coordinate(c) elif prefix == 'ModelComment': deserialize_modelcomment(ioHelper) # SAFE_RETURN_None(comment) # exp.addModelComment(comment) elif prefix == 'SingleParameterSimpleModelGenerator': generator = deserialize_SingleParameterSimpleModelGenerator( exp, is_sparse, ioHelper) SAFE_RETURN_None(generator) exp.add_modeler(generator) elif prefix == 'SingleParameterRefiningModelGenerator': generator = deserialize_SingleParameterModelGenerator( exp, is_sparse, ioHelper) SAFE_RETURN_None(generator) exp.add_modeler(generator) elif prefix == 'MultiParameterSimpleModelGenerator': generator = deserialize_MultiParameterModelGenerator( exp, is_sparse, ioHelper) SAFE_RETURN_None(generator) exp.add_modeler(generator) elif prefix == 'MultiParameterSparseModelGenerator': generator = deserialize_MultiParameterModelGenerator( exp, is_sparse, ioHelper) SAFE_RETURN_None(generator) exp.add_modeler(generator) elif prefix == 'ExperimentPoint': point = deserialize_ExperimentPoint( exp, id_mappings, ioHelper) SAFE_RETURN_None(point) exp.add_measurement(point) elif prefix == 'Model': model, generator_id = deserialize_Model( exp, id_mappings, is_sparse, ioHelper) SAFE_RETURN_None(model) exp.modelers[generator_id].models[(model.callpath, model.metric)] = model else: raise FileFormatError("Unknown object: " + prefix + ". Can not load experiment.") prefix = ioHelper.readString() except struct.error as err: raise FileFormatError(str(err)) from err pos = file.tell() progress_bar.update(pos - last_pos) # remove empty modelers exp.modelers = [m for m in exp.modelers if len(m.models) > 0] # add measurements to model for modeler in exp.modelers: for key, model in modeler.models.items(): model.measurements = exp.measurements.get(key) callpaths = exp.callpaths call_tree = io_helper.create_call_tree(callpaths, progress_bar, True, progress_scale=100) exp.call_tree = call_tree io_helper.validate_experiment(exp, progress_bar) # new code return exp
def read_text_file(path, progress_bar=DUMMY_PROGRESS): # read text file into list with open(path) as file: lines = file.readlines() # remove empty lines lines_no_space = [l for l in lines if not l.isspace()] # remove line breaks lines_no_space = [l.replace("\n", "") for l in lines_no_space] # create an experiment object to save the date loaded from the text file experiment = Experiment() # variables for parsing number_parameters = 0 last_metric = None last_callpath = Callpath("") coordinate_id = 0 if len(lines_no_space) == 0: raise FileFormatError(f'File contains no data: "{path}"') # parse text to extrap objects for i, line in enumerate(progress_bar(lines)): if line.isspace() or line.startswith('#'): continue # allow comments line = re_whitespace.sub(' ', line) # get field name field_separator_idx = line.find(" ") field_name = line[:field_separator_idx] field_value = line[field_separator_idx + 1:].strip() if field_name == "METRIC": # create a new metric if not already exists metric_name = field_value test_metric = Metric(metric_name) if test_metric not in experiment.metrics: metric = test_metric experiment.add_metric(metric) last_metric = metric else: last_metric = metric # reset the coordinate id, since moving to a new region coordinate_id = 0 elif field_name == "REGION": # create a new region if not already exists callpath_name = field_value callpath = Callpath(callpath_name) experiment.add_callpath(callpath) last_callpath = callpath # reset the coordinate id, since moving to a new region coordinate_id = 0 elif field_name == "DATA": if last_metric is None: last_metric = Metric("") # create a new data set data_string = field_value data_list = data_string.split(" ") values = [float(d) for d in data_list] if 1 <= number_parameters <= 4: # create one measurement per repetition if coordinate_id >= len(experiment.coordinates): raise FileFormatError( f'To many DATA lines ({coordinate_id}) for the number of POINTS ' f'({len(experiment.coordinates)}) in line {i}.') measurement = Measurement( experiment.coordinates[coordinate_id], last_callpath, last_metric, values) experiment.add_measurement(measurement) coordinate_id += 1 elif number_parameters >= 5: raise FileFormatError( "This input format supports a maximum of 4 parameters.") else: raise FileFormatError("This file has no parameters.") elif field_name == "PARAMETER": # create a new parameter parameters = field_value.split(' ') experiment.parameters += [Parameter(p) for p in parameters] number_parameters = len(experiment.parameters) elif field_name == "POINTS": coordinate_string = field_value.strip() if '(' in coordinate_string: coordinate_string = coordinate_string.replace(") (", ")(") coordinate_string = coordinate_string[1:-1] coordinate_strings = coordinate_string.split(')(') else: coordinate_strings = coordinate_string.split(' ') # create a new point if number_parameters == 1: coordinates = [ Coordinate(float(c)) for c in coordinate_strings ] experiment.coordinates.extend(coordinates) elif 1 < number_parameters < 5: for coordinate_string in coordinate_strings: coordinate_string = coordinate_string.strip() values = coordinate_string.split(" ") coordinate = Coordinate(float(v) for v in values) experiment.coordinates.append(coordinate) elif number_parameters >= 5: raise FileFormatError( "This input format supports a maximum of 4 parameters.") else: raise FileFormatError("This file has no parameters.") else: raise FileFormatError( f'Encountered wrong field: "{field_name}" in line {i}: {line}') if last_metric == Metric(''): experiment.metrics.append(last_metric) if last_metric == Callpath(''): experiment.callpaths.append(last_callpath) # create the call tree and add it to the experiment call_tree = create_call_tree(experiment.callpaths, progress_bar, progress_scale=10) experiment.call_tree = call_tree io_helper.validate_experiment(experiment, progress_bar) return experiment
def read_jsonlines_file(path, progress_bar=DUMMY_PROGRESS): # create an experiment object to save the date loaded from the text file experiment = Experiment() complete_data = {} parameters = None default_callpath = Callpath('<root>') default_metric = Metric('<default>') progress_bar.total += os.path.getsize(path) # read jsonlines file into complete_data with open(path) as file: progress_bar.step('Reading file') for ln, line in enumerate(file): progress_bar.update(len(line)) if line.isspace(): continue try: data = json.loads(line) except JSONDecodeError as error: raise FileFormatError( f'Decoding of line {ln} failed: {str(error)}. Line: "{line}"' ) try: if 'callpath' in data: callpath = Callpath(data['callpath']) else: callpath = default_callpath if 'metric' in data: metric = Metric(data['metric']) else: metric = default_metric key = callpath, metric if parameters is None: # ensures uniform order of paremeters parameters = [Parameter(p) for p in data['params'].keys()] coordinate = Coordinate(data['params'][p.name] for p in parameters) io_helper.append_to_repetition_dict(complete_data, key, coordinate, data['value'], progress_bar) except KeyError as error: raise FileFormatError( f'Missing property in line {ln}: {str(error)}. Line: "{line}"' ) # create experiment io_helper.repetition_dict_to_experiment(complete_data, experiment, progress_bar) for p in parameters: experiment.add_parameter(p) callpaths = experiment.callpaths experiment.call_tree = create_call_tree(callpaths, progress_bar) io_helper.validate_experiment(experiment, progress_bar) return experiment