Esempio n. 1
0
        def make_measurement(c, ms: Sequence[Measurement]):
            if len(ms) == 1:
                measurement = copy.copy(ms[0])
                measurement.coordinate = Coordinate(c)
                return measurement

            measurement = Measurement(Coordinate(c), ms[0].callpath, ms[0].metric, None)

            if self.use_median:
                value = np.mean([m.median for m in ms])
            else:
                value = np.mean([m.mean for m in ms])

            measurement.mean = value
            measurement.median = value
            if measurement.mean == 0:
                measurement.maximum = np.mean([m.maximum for m in ms])
                measurement.minimum = np.mean([m.minimum for m in ms])
                measurement.std = np.mean([m.std for m in ms])
            else:
                measurement.maximum = np.nanmean([m.maximum / m.mean for m in ms]) * measurement.mean
                measurement.minimum = np.nanmean([m.minimum / m.mean for m in ms]) * measurement.mean
                measurement.std = np.nanmean([m.std / m.mean for m in ms]) * measurement.mean

            return measurement
Esempio n. 2
0
    def test_get_matching_hypotheses(self):
        modeler = SingleParameterModeler()
        modeler.hypotheses_building_blocks.append(CompoundTerm.create(1, 1, 1))
        for bb in modeler.get_matching_hypotheses(
                [Measurement(Coordinate(15), None, None, 15),
                 Measurement(Coordinate(0.1), None, None, 0.1)]):
            self.assertEqual(len(bb.simple_terms), 1)
            self.assertNotEqual(bb.simple_terms[0].term_type, 'logarithm')

        hbb = modeler.get_matching_hypotheses(
            [Measurement(Coordinate(31), None, None, 31),
             Measurement(Coordinate(1), None, None, 1)])
        self.assertIn(2, (len(bb.simple_terms) for bb in hbb))
        self.assertIn('logarithm', (bb.simple_terms[0].term_type for bb in hbb))
Esempio n. 3
0
    def test_modeling(self):
        for exponents in [
            (0, 1, 1), (0, 1, 2), (1, 4, 0), (1, 3, 0), (1, 4, 1), (1, 3, 1),
            (1, 4, 2), (1, 3, 2), (1, 2, 0), (1, 2, 1), (1, 2, 2), (2, 3, 0),
            (3, 4, 0), (2, 3, 1), (3, 4, 1), (4, 5, 0), (2, 3, 2), (3, 4, 2),
            (1, 1, 0), (1, 1, 1), (1, 1, 2), (5, 4, 0), (5, 4, 1), (4, 3, 0),
            (4, 3, 1), (3, 2, 0), (3, 2, 1), (3, 2, 2), (5, 3, 0), (7, 4, 0),
            (2, 1, 0), (2, 1, 1), (2, 1, 2), (9, 4, 0), (7, 3, 0), (5, 2, 0),
            (5, 2, 1), (5, 2, 2), (8, 3, 0), (11, 4, 0), (3, 1, 0), (3, 1, 1)
        ]:
            term = CompoundTerm.create(*exponents)
            term.coefficient = 10
            function = SingleParameterFunction(term)
            function.constant_coefficient = 200
            points = [2, 4, 8, 16, 32]

            values = function.evaluate(np.array(points))
            measurements = [
                Measurement(Coordinate(p), None, None, v)
                for p, v in zip(points, values)
            ]
            modeler = SingleParameterModeler()

            models = modeler.model([measurements])
            self.assertEqual(1, len(models))
            self.assertApproxFunction(function, models[0].hypothesis.function)
Esempio n. 4
0
    def test_modeling_4p(self):
        exponents = [(0, 1, 1), (0, 1, 2), (1, 4, 0), (1, 3, 0), (1, 4, 1),
                     (1, 3, 1), (1, 4, 2), (1, 3, 2), (1, 2, 0), (1, 2, 1),
                     (1, 2, 2), (2, 3, 0), (3, 4, 0), (2, 3, 1), (3, 4, 1),
                     (4, 5, 0), (2, 3, 2), (3, 4, 2), (1, 1, 0), (1, 1, 1),
                     (1, 1, 2), (5, 4, 0), (5, 4, 1), (4, 3, 0), (4, 3, 1),
                     (3, 2, 0), (3, 2, 1), (3, 2, 2), (5, 3, 0), (7, 4, 0),
                     (2, 1, 0), (2, 1, 1), (2, 1, 2), (9, 4, 0), (7, 3, 0),
                     (5, 2, 0), (5, 2, 1), (5, 2, 2), (8, 3, 0), (11, 4, 0),
                     (3, 1, 0), (3, 1, 1)]
        points = np.array(
            list(zip(*itertools.product([2, 4, 8, 10, 12], repeat=4))))
        for expo1, expo2, expo3, expo4 in zip(exponents, exponents[1:],
                                              exponents[2:], exponents[3:]):
            termX = CompoundTerm.create(*expo1)
            termY = CompoundTerm.create(*expo2)
            termZ = CompoundTerm.create(*expo3)
            termW = CompoundTerm.create(*expo4)
            term = MultiParameterTerm((0, termX), (1, termY), (2, termZ),
                                      (3, termW))
            term.coefficient = 10
            function = MultiParameterFunction(term)
            function.constant_coefficient = 20000

            values = function.evaluate(points)
            measurements = [
                Measurement(Coordinate(p), None, None, v)
                for p, v in zip(zip(*points), values)
            ]
            modeler = MultiParameterModeler()

            models = modeler.model([measurements])
            self.assertEqual(1, len(models))
            self.assertApproxFunction(function, models[0].hypothesis.function)
Esempio n. 5
0
    def test_modeling_plus(self):
        exponents = [(0, 1, 1), (0, 1, 2), (1, 4, 0), (1, 3, 0), (1, 4, 1), (1, 3, 1), (1, 4, 2), (1, 3, 2),
                     (1, 2, 0), (1, 2, 1), (1, 2, 2), (2, 3, 0), (3, 4, 0), (2, 3, 1), (3, 4, 1), (4, 5, 0),
                     (2, 3, 2), (3, 4, 2), (1, 1, 0), (1, 1, 1), (1, 1, 2), (5, 4, 0), (5, 4, 1), (4, 3, 0),
                     (4, 3, 1), (3, 2, 0), (3, 2, 1), (3, 2, 2), (5, 3, 0), (7, 4, 0), (2, 1, 0), (2, 1, 1),
                     (2, 1, 2), (9, 4, 0), (7, 3, 0), (5, 2, 0), (5, 2, 1), (5, 2, 2), (8, 3, 0), (11, 4, 0),
                     (3, 1, 0), (3, 1, 1)]
        for expo1, expo2 in zip(exponents, exponents[1:]):
            termX = CompoundTerm.create(*expo1)
            termY = CompoundTerm.create(*expo2)
            term1 = MultiParameterTerm((0, termX))
            term1.coefficient = 10
            term2 = MultiParameterTerm((1, termY))
            term2.coefficient = 20
            function = MultiParameterFunction(term1, term2)
            function.constant_coefficient = 200
            points = [np.array([2, 4, 8, 16, 32, 2, 4, 8, 16, 32, 2, 4, 8, 16, 32, 2, 4, 8, 16, 32, 2, 4, 8, 16, 32]),
                      np.array([2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 32, 32, 32, 32, 32])]

            values = function.evaluate(np.array(points))
            measurements = [Measurement(Coordinate(p), None, None, v) for p, v in zip(zip(*points), values)]
            modeler = MultiParameterModeler()

            models = modeler.model([measurements])
            self.assertEqual(1, len(models))
            self.assertApproxFunction(function, models[0].hypothesis.function)
def deserialize_ExperimentPoint(experiment, id_mappings, ioHelper):
    # coordinate_id = ioHelper.readId()
    # sampleCount = ioHelper.readInt()
    # mean = ioHelper.readValue()
    # meanCI_start = ioHelper.readValue()
    # meanCI_end = ioHelper.readValue()
    # standardDeviation = ioHelper.readValue()
    # median = ioHelper.readValue()
    # medianCI_start = ioHelper.readValue()
    # medianCI_end = ioHelper.readValue()
    # minimum = ioHelper.readValue()
    # maximum = ioHelper.readValue()
    # metricId = ioHelper.readId()
    # callpathId = ioHelper.readId()
    coordinate_id, sampleCount, \
    mean, meanCI_start, meanCI_end, \
    standardDeviation, \
    median, medianCI_start, medianCI_end, \
    minimum, maximum, metricId, callpathId = ioHelper.read_pattern('qqdddddddddqq')

    coordinate = id_mappings.coordinate_mapping[coordinate_id]
    metric = experiment.metrics[metricId]
    callpath = id_mappings.callpath_mapping[callpathId]

    point = Measurement(coordinate, callpath, metric, None)
    point.minimum = minimum
    point.maximum = maximum
    point.mean = mean
    point.median = median
    point.std = standardDeviation
    return point
Esempio n. 7
0
def repetition_dict_to_experiment(complete_data, experiment, progress_bar=DUMMY_PROGRESS):
    progress_bar.step('Creating experiment')
    for mi, key in enumerate(complete_data):
        progress_bar.update()
        callpath, metric = key
        measurementset = complete_data[key]
        experiment.add_callpath(callpath)
        experiment.add_metric(metric)
        for coordinate in measurementset:
            values = measurementset[coordinate]
            experiment.add_coordinate(coordinate)
            experiment.add_measurement(Measurement(coordinate, callpath, metric, values))
Esempio n. 8
0
    def compute_cost(self, training_measurements: Sequence[Measurement], validation_measurement: Measurement):
        """
        Compute the cost for the single-parameter model using leave one out crossvalidation.
        """
        value = validation_measurement.coordinate[0]
        predicted = self.function.evaluate(value)
        actual = validation_measurement.value(self._use_median)

        difference = predicted - actual
        self._RSS += difference * difference
        if actual != 0:
            relative_difference = difference / actual
            self._RE += numpy.abs(relative_difference) / (len(training_measurements) + 1)
            self._rRSS += relative_difference * relative_difference
        abssum = abs(actual) + abs(predicted)
        if abssum != 0:
            self._SMAPE += (abs(difference) / abssum * 2) / \
                           len(training_measurements) * 100
        self._costs_are_calculated = True
Esempio n. 9
0
def _read_new_json_file(experiment, json_data, progress_bar):
    parameter_data = json_data["parameters"]
    for p in parameter_data:
        parameter = Parameter(p)
        experiment.add_parameter(parameter)

    measurements_data = json_data["measurements"]
    for callpath_name, data in progress_bar(measurements_data.items()):
        for metric_name, measurements in data.items():
            for measurement in measurements:
                coordinate = Coordinate(measurement['point'])
                experiment.add_coordinate(coordinate)
                callpath = Callpath(callpath_name)
                experiment.add_callpath(callpath)
                metric = Metric(metric_name)
                experiment.add_metric(metric)
                measurement = Measurement(coordinate, callpath, metric,
                                          measurement['values'])
                experiment.add_measurement(measurement)
Esempio n. 10
0
def read_cube_file(dir_name,
                   scaling_type,
                   pbar=DUMMY_PROGRESS,
                   selected_metrics=None):
    # read the paths of the cube files in the given directory with dir_name
    path = Path(dir_name)
    if not path.is_dir():
        raise FileFormatError(
            f'Cube file path must point to a directory: {dir_name}')
    cubex_files = list(path.glob('*/[!.]*.cubex'))
    if not cubex_files:
        raise FileFormatError(f'No cube files were found in: {dir_name}')
    pbar.total += len(cubex_files) + 6
    # iterate over all folders and read the cube profiles in them
    experiment = Experiment()

    pbar.step("Reading cube files")
    parameter_names_initial = []
    parameter_names = []
    parameter_values = []
    parameter_dict = defaultdict(set)
    progress_step_size = 5 / len(cubex_files)
    for path_id, path in enumerate(cubex_files):
        pbar.update(progress_step_size)
        folder_name = path.parent.name
        logging.debug(f"Cube file: {path} Folder: {folder_name}")

        # create the parameters
        par_start = folder_name.find(".") + 1
        par_end = folder_name.find(".r")
        par_end = None if par_end == -1 else par_end
        parameters = folder_name[par_start:par_end]
        # parameters = folder_name.split(".")

        # set scaling flag for experiment
        if path_id == 0:
            if scaling_type == "weak" or scaling_type == "strong":
                experiment.scaling = scaling_type

        param_list = re.split('([0-9.,]+)', parameters)
        param_list.remove("")

        parameter_names = [n for i, n in enumerate(param_list) if i % 2 == 0]
        parameter_value = [
            float(n.replace(',', '.').rstrip('.'))
            for i, n in enumerate(param_list) if i % 2 == 1
        ]

        # check if parameter already exists
        if path_id == 0:
            parameter_names_initial = parameter_names
        elif parameter_names != parameter_names_initial:
            raise FileFormatError(
                f"Parameters must be the same and in the same order: {parameter_names} is not {parameter_names_initial}."
            )

        for n, v in zip(parameter_names, parameter_value):
            parameter_dict[n].add(v)
        parameter_values.append(parameter_value)

    # determine non-constant parameters and add them to experiment
    parameter_selection_mask = []
    for i, p in enumerate(parameter_names):
        if len(parameter_dict[p]) > 1:
            experiment.add_parameter(Parameter(p))
            parameter_selection_mask.append(i)

    # check number of parameters, if > 1 use weak scaling instead
    # since sum values for strong scaling does not work for more than 1 parameter
    if scaling_type == 'strong' and len(experiment.parameters) > 1:
        warnings.warn(
            "Strong scaling only works for one parameter. Using weak scaling instead."
        )
        scaling_type = 'weak'
        experiment.scaling = scaling_type

    pbar.step("Reading cube files")

    show_warning_skipped_metrics = set()
    aggregated_values = defaultdict(list)

    # import data from cube files
    # optimize import memory usage by reordering files and grouping by coordinate
    num_points = 0
    reordered_files = sorted(zip(cubex_files, parameter_values),
                             key=itemgetter(1))
    for parameter_value, point_group in groupby(reordered_files,
                                                key=itemgetter(1)):
        num_points += 1
        # create coordinate
        coordinate = Coordinate(parameter_value[i]
                                for i in parameter_selection_mask)
        experiment.add_coordinate(coordinate)

        aggregated_values.clear()
        for path, _ in point_group:
            pbar.update()
            with CubexParser(str(path)) as parsed:
                callpaths = make_callpath_mapping(parsed.get_root_cnodes())
                # iterate over all metrics
                for cube_metric in parsed.get_metrics():
                    pbar.update(0)
                    # NOTE: here we could choose which metrics to extract
                    if selected_metrics and cube_metric.name not in selected_metrics:
                        continue
                    try:
                        metric_values = parsed.get_metric_values(
                            metric=cube_metric, cache=False)
                        # create the metrics
                        metric = Metric(cube_metric.name)

                        for cnode_id in metric_values.cnode_indices:
                            pbar.update(0)
                            cnode = parsed.get_cnode(cnode_id)
                            callpath = callpaths[cnode_id]
                            # NOTE: here we can use clustering algorithm to select only certain node level values
                            # create the measurements
                            cnode_values = metric_values.cnode_values(
                                cnode, convert_to_exclusive=True)

                            # in case of weak scaling calculate mean and median over all mpi process values
                            if scaling_type == "weak":
                                # do NOT use generator it is slower
                                aggregated_values[(callpath, metric)].extend(
                                    map(float, cnode_values))

                                # in case of strong scaling calculate the sum over all mpi process values
                            elif scaling_type == "strong":
                                aggregated_values[(callpath, metric)].append(
                                    float(sum(cnode_values)))

                    # Take care of missing metrics
                    except MissingMetricError as e:  # @UnusedVariable
                        show_warning_skipped_metrics.add(e.metric.name)
                        logging.info(
                            f'The cubex file {Path(*path.parts[-2:])} does not contain data for the metric "{e.metric.name}"'
                        )

        # add measurements to experiment
        for (callpath, metric), values in aggregated_values.items():
            pbar.update(0)
            experiment.add_measurement(
                Measurement(coordinate, callpath, metric, values))

    pbar.step("Unify calltrees")
    to_delete = []
    # determine common callpaths for common calltree
    # add common callpaths and metrics to experiment
    for key, value in pbar(experiment.measurements.items(),
                           len(experiment.measurements),
                           scale=0.1):
        if len(value) < num_points:
            to_delete.append(key)
        else:
            (callpath, metric) = key
            experiment.add_callpath(callpath)
            experiment.add_metric(metric)
    for key in to_delete:
        pbar.update(0)
        del experiment.measurements[key]

    # determine calltree
    call_tree = io_helper.create_call_tree(experiment.callpaths,
                                           pbar,
                                           progress_scale=0.1)
    experiment.call_tree = call_tree

    if show_warning_skipped_metrics:
        warnings.warn(
            "The following metrics were skipped because they contained no data: "
            f"{', '.join(show_warning_skipped_metrics)}. For more details see log."
        )

    io_helper.validate_experiment(experiment, pbar)
    pbar.update()
    return experiment
Esempio n. 11
0
    def test_compare(self):
        points = [4, 8, 16, 32, 64, 128]
        data = [
            ((None, (12.279235119728051 + 112.3997486813747, 0)), [
                124.67898380110276, 124.67898380110276, 124.67898380110276,
                124.67898380110276, 124.67898380110276, 124.67898380110276
            ], (None, (124.679, 0.0))),
            (((0, Fraction(1, 1)), (392.837968713381, 683.8645895889935)), [
                1760.5671478913678, 2444.4317374803613, 3128.296327069355,
                3812.1609166583485, 4496.025506247342, 5179.890095836336
            ], ((0.0, 1.0), (392.838, 683.865))),
            (((0, Fraction(2, 1)), (138.69179452369758, 112.44445041582443)), [
                588.4695961869953, 1150.6918482661176, 1937.8030011768885,
                2949.803054919308, 4186.692009493378, 5648.469864899094
            ], ((0.0, 2.0), (138.692, 112.444))),
            (((Fraction(1, 4), 0), (231.8031252715932, 757.5927278025262)), [
                1303.2010356851542, 1505.917143334448, 1746.9885808766455,
                2033.672449625761, 2374.5989460987153, 2780.0311613973026
            ], ((0.25, 0.0), (231.803, 757.593))),
            (((Fraction(1, 3), 0), (147.40207355905747, 740.6554582848072)), [
                1323.1193271863492, 1628.712990128672, 2013.7368787841829,
                2498.836580813641, 3110.023906698286, 3880.071684009308
            ], ((0.333333, 0.0), (147.402, 740.655))),
            (((Fraction(1, 4), Fraction(1, 1)),
              (662.1669933486077, 136.57938776640577)), [
                  1048.4718383883378, 1351.2616987705137, 1754.802095479854,
                  2286.378790293861, 2979.9960635869884, 3877.9422853175015
              ], ((0.25, 1.0), (662.167, 136.579))),
            (((Fraction(1, 3), Fraction(1, 1)),
              (535.6622118860412, 447.75148218635366)), [
                  1957.1845595719178, 3222.171105004163, 5048.714352111772,
                  7643.273950315424, 11281.697784358528, 16331.344702676097
              ], ((0.333333, 1.0), (535.662, 447.751))),
            (((Fraction(1, 4), Fraction(2, 1)),
              (412.5706706079675, 996.343695251814)), [
                  6048.741737048133, 15493.363821169984, 32295.568918666017,
                  59655.521239685964, 101863.64986653095, 164625.65164339435
              ], ((0.25, 2.0), (412.571, 996.344))),
            (((Fraction(1, 3), Fraction(2, 1)),
              (93.11229615417925, 20.367438006670188)), [
                  222.43746622492063, 459.72618027424267, 914.2759402192239,
                  1709.6769220384463, 3026.0233691146855, 5122.739616052577
              ], ((0.333333, 2.0), (93.1123, 20.3674))),
            (((Fraction(1, 2), 0), (939.8019758412179, 402.94640866510485)), [
                1745.6947931714276, 2079.5065279286637, 2551.5876105016373,
                3219.2110800161095, 4163.373245162056, 5498.620184191001
            ], ((0.5, 0.0), (939.802, 402.946))),
            (((Fraction(1, 2), Fraction(1, 1)),
              (198.49843369241415, 330.31007853365884)), [
                  1519.7387478270496, 3001.272390797349, 5483.459690230956,
                  9541.078290708863, 16053.382203308038, 26357.722033338472
              ], ((0.5, 1.0), (198.498, 330.31))),
            (((Fraction(1, 2), Fraction(2, 1)),
              (364.8953574839538, 955.112891429775)), [
                  8005.798488922153, 24678.100241316602, 61492.120408989555,
                  135438.25582322088, 275437.4080892591, 529852.4683831728
              ], ((0.5, 2.0), (364.895, 955.113))),
            (((Fraction(2, 3), 0), (210.3330694987003, 216.92681699057178)), [
                756.9543955249287, 1078.0403374609873, 1587.732499462487,
                2396.8183736036135, 3681.1621413478483, 5719.930789353846
            ], ((0.666667, 0.0), (210.333, 216.927))),
            (((Fraction(3, 4), 0), (584.9013580111865, 547.3819137326248)), [
                2133.1312104080216, 3188.7032237497583, 4963.956667872185,
                7949.565182530901, 12970.740177185866, 21415.31628391976
            ], ((0.75, 0.0), (584.901, 547.382))),
            (((Fraction(2, 3), Fraction(1, 1)),
              (953.7431095545323, 838.6830078923111)), [
                  5180.440612885216, 11017.939204262264, 22254.963733492266,
                  43220.718142861355, 81467.31186721638, 150062.28747711863
              ], ((0.666667, 1.0), (953.743, 838.683))),
            (((Fraction(3, 4), Fraction(1, 1)),
              (355.50475595529707, 203.8586065472728)), [
                  1508.7031806978325, 3264.666020281983, 6878.980165468027,
                  14069.422473092825, 28032.266949776153, 54659.84835672009
              ], ((0.75, 1.0), (355.505, 203.859))),
            (((Fraction(4, 5), 0), (836.4136945625079, 988.9778707606993)), [
                3834.433979810851, 6056.270190754812, 9924.711720732794,
                16660.0596267337, 28386.981453862616, 48804.738258536
            ], ((0.8, 0.0), (836.414, 988.978))),
            (((Fraction(2, 3), Fraction(2, 1)),
              (30.370684174349353, 735.0460670350777)), [
                  7439.17078417381, 26492.029097437142, 74706.39628779484,
                  185250.37318416082, 423416.90529637906, 914811.6843285251
              ], ((0.666667, 2.0), (30.3707, 735.046))),
            (((Fraction(3, 4), Fraction(2, 1)),
              (868.3557741916711, 651.1510359543278)), [
                  8235.288783790882, 28745.079790529722, 84215.68837634563,
                  219888.5845432864, 531287.5324653349, 1215054.5573746935
              ], ((0.75, 2.0), (868.356, 651.151))),
            (((Fraction(1, 1), 0), (218.35982887307853, 796.5944762009765)), [
                3404.7377336769846, 6591.11563848089, 12963.871448088703,
                25709.383067304327, 51200.406305735574, 102182.45278259806
            ], ((1.0, 0.0), (218.36, 796.594))),
            (((Fraction(1, 1), Fraction(1, 1)),
              (729.8185276288646, 193.81268721358396)), [
                  2280.320025337536, 5381.323020754879, 13133.830509298237,
                  31739.8484818023, 75153.8904176451, 174385.9862710001
              ], ((1.0, 1.0), (729.819, 193.813))),
            (((Fraction(1, 1), Fraction(2, 1)),
              (640.8857481060144, 219.18401331861853)), [
                  4147.829961203911, 16422.13470704655, 56751.993157672354,
                  175988.09640300085, 505640.8524342031, 1375363.0172824815
              ], ((1.0, 2.0), (640.886, 219.184))),
            (((Fraction(5, 4), 0), (41.41439439883205, 336.0107050284518)), [
                1942.1779790139603, 4562.217551923606, 10793.75695530929,
                25614.93894716142, 60865.84910208294, 144707.1154351916
            ], ((1.25, 0.0), (41.4144, 336.011))),
            (((Fraction(5, 4), Fraction(1, 1)),
              (334.34344019665406, 396.1666489172391)), [
                  4816.457423065935, 16324.82895624065, 51043.67450160326,
                  151094.0866783297, 430617.2857956476, 1194290.5953048149
              ], ((1.25, 1.0), (334.343, 396.167))),
            (((Fraction(4, 3), 0), (646.7639733950962, 836.1733802023176)), [
                5956.133986838953, 14025.538056632176, 34359.162151911856,
                85596.68418849679, 214707.14930518836, 540045.1348296632
            ], ((1.33333, 0.0), (646.764, 836.173))),
            (((Fraction(4, 3), Fraction(1, 1)),
              (961.3235324936308, 976.4308028867101)), [
                  13361.221801905767, 47830.002071055715, 158430.21598980148,
                  496957.254308979, 1500759.03676648, 4410090.312337113
              ], ((1.33333, 1.0), (961.324, 976.431))),
            (((Fraction(3, 2), 0), (993.5060588040174, 789.8477910359313)), [
                7312.288387091468, 18865.72139149913, 51543.76468510362,
                143971.22872036492, 405395.57506920083, 1144815.2873512912
            ], ((1.5, 0.0), (993.506, 789.848))),
            (((Fraction(3, 2), Fraction(1, 1)),
              (306.3138276450713, 176.2989470011036)), [
                  3127.096979662729, 12273.883197935773, 45438.844259927595,
                  159873.90543152104, 541896.6790150353, 1787463.339791056
              ], ((1.5, 1.0), (306.314, 176.299))),
            (((Fraction(3, 2), Fraction(2, 1)),
              (623.7521800036756, 545.819243769916)), [
                  18089.967980640988, 111778.0688886881, 559542.6578003977,
                  2470719.679039657, 10061164.053347098, 38731727.88533937
              ], ((1.5, 2.0), (623.752, 545.819))),
            (((Fraction(5, 3), 0), (674.515344060474, 93.52470814254)), [
                1617.1853318529588, 3667.3060046217547, 10176.033429851634,
                30839.954953419994, 96443.81648202146, 304723.0940893776
            ], ((1.66667, 0.0), (674.515, 93.5247))),
            (((Fraction(7, 4), 0), (192.79185213528302, 921.0172026961501)), [
                10612.912005989885, 35241.758587692566, 118082.9937972425,
                396726.5846888438, 1333968.1715455244, 4486460.534003467
            ], ((1.75, 0.0), (192.792, 921.017))),
            (((Fraction(2, 1), 0), (601.3899361738712, 695.3677746959734)), [
                11727.274331309445, 45104.92751671617, 178615.54025834304,
                712657.9912248506, 2848827.7950908807, 11393507.010555001
            ], ((2.0, 0.0), (601.39, 695.368))),
            (((Fraction(2, 1), Fraction(1, 1)),
              (95.64610607936808, 399.1728717576563)), [
                  12869.17800232437, 76736.83748354937, 408848.66678591946,
                  2043860.7495052798, 9810168.14242224, 45780433.96224817
              ], ((2.0, 1.0), (95.6461, 399.173))),
            (((Fraction(2, 1), Fraction(2, 1)),
              (910.0933475245264, 649.3303470713025)), [
                  42467.23556008789, 374924.3732605948, 2660567.19495158,
                  16623766.97837287, 95748565.7510935, 521293702.00774235
              ], ((2.0, 2.0), (910.093, 649.33))),
            (((Fraction(9, 4), 0), (991.6538373014305, 196.89404893784854)), [
                5446.857587036748, 22184.29382918959, 101801.40689347989,
                480526.35622160026, 2282055.9737017835, 10851623.32968404
            ], ((2.25, 0.0), (991.654, 196.894))),
            (((Fraction(7, 3), 0), (454.80058623925424, 800.545149328823)), [
                20787.379981321064, 102924.57970032863, 516870.1273219162,
                2603024.963156712, 13116586.527189683, 66101616.62275291
            ], ((2.33333, 0.0), (454.801, 800.545))),
            (((Fraction(5, 2), 0), (444.6771885244102, 788.914623707213)), [
                25689.945147155224, 143253.478519879, 808293.2518647105,
                4570326.31979187, 25851599.066826478, 146236657.2404956
            ], ((2.5, 0.0), (444.677, 788.915))),
            (((Fraction(5, 2), Fraction(1, 1)),
              (561.6799261002398, 118.03818227888634)), [
                  8116.123591948965, 64663.26005666098, 484046.0745404187,
                  3419312.620222673, 23207812.621413387, 153160603.80521256
              ], ((2.5, 1.0), (561.68, 118.038))),
            (((Fraction(5, 2), Fraction(2, 1)),
              (98.86684880610103, 982.9958000733947)), [
                  125922.32925820061, 1601570.0898857696, 16105502.055251304,
                  142353096.47013444, 1159589128.4318285, 8928380108.544924
              ], ((2.5, 2.0), (0.0, 982.996))),
            (((Fraction(8, 3), 0), (119.32012560773262, 194.108049319692)), [
                7945.26627894892, 49810.980751448864, 315641.6975316356,
                2003561.5353809511, 12721184.440340932, 80773847.93606871
            ], ((2.66667, 0.0), (119.32, 194.108))),
            (((Fraction(11, 4), 0), (335.21389505653997, 713.4600389668701)), [
                32622.72952123845, 217538.8630750938, 1461501.3736992066,
                9829850.300849823, 66125167.21631561, 444833408.7346114
            ], ((2.75, 0.0), (335.214, 713.46))),
            (((Fraction(3, 1), 0), (854.0891091206599, 475.68703018220896)), [
                31298.059040782035, 244405.84856241164, 1949268.1647354485,
                15588166.694119744, 124699354.92919411, 997588860.809789
            ], ((3.0, 0.0), (854.089, 475.687))),
            (((Fraction(3, 1), Fraction(1, 1)),
              (498.14812816021515, 788.4374477528575)), [
                  101418.14144052597, 1211538.0678765494, 12918257.292110976,
                  129178089.58795632, 1240105375.9704788, 11574312691.156733
              ], ((3.0, 1.0), (498.148, 788.437))),
        ]
        modeler = SingleParameterModeler()
        modeler.use_crossvalidation = False
        for orig, values, (exponents, coeff) in data:
            if exponents:
                term = CompoundTerm.create(*exponents)
                term.coefficient = coeff[1]
                function = SingleParameterFunction(term)
            else:
                function = SingleParameterFunction()
            function.constant_coefficient = coeff[0]

            measurements = [
                Measurement(Coordinate(p), None, None, v)
                for p, v in zip(points, values)
            ]
            models = modeler.model([measurements])
            self.assertEqual(1, len(models))
            self.assertApproxFunction(function,
                                      models[0].hypothesis.function,
                                      places=3)
Esempio n. 12
0
def read_text_file(path, progress_bar=DUMMY_PROGRESS):
    # read text file into list
    with open(path) as file:
        lines = file.readlines()

    # remove empty lines
    lines_no_space = [l for l in lines if not l.isspace()]

    # remove line breaks
    lines_no_space = [l.replace("\n", "") for l in lines_no_space]

    # create an experiment object to save the date loaded from the text file
    experiment = Experiment()

    # variables for parsing
    number_parameters = 0
    last_metric = None
    last_callpath = Callpath("")
    coordinate_id = 0

    if len(lines_no_space) == 0:
        raise FileFormatError(f'File contains no data: "{path}"')

    # parse text to extrap objects
    for i, line in enumerate(progress_bar(lines)):
        if line.isspace() or line.startswith('#'):
            continue  # allow comments
        line = re_whitespace.sub(' ', line)
        # get field name
        field_separator_idx = line.find(" ")
        field_name = line[:field_separator_idx]
        field_value = line[field_separator_idx + 1:].strip()

        if field_name == "METRIC":
            # create a new metric if not already exists
            metric_name = field_value
            test_metric = Metric(metric_name)
            if test_metric not in experiment.metrics:
                metric = test_metric
                experiment.add_metric(metric)
                last_metric = metric
            else:
                last_metric = metric
            # reset the coordinate id, since moving to a new region
            coordinate_id = 0

        elif field_name == "REGION":
            # create a new region if not already exists
            callpath_name = field_value

            callpath = Callpath(callpath_name)
            experiment.add_callpath(callpath)
            last_callpath = callpath

            # reset the coordinate id, since moving to a new region
            coordinate_id = 0

        elif field_name == "DATA":
            if last_metric is None:
                last_metric = Metric("")
            # create a new data set
            data_string = field_value
            data_list = data_string.split(" ")
            values = [float(d) for d in data_list]
            if 1 <= number_parameters <= 4:
                # create one measurement per repetition

                if coordinate_id >= len(experiment.coordinates):
                    raise FileFormatError(
                        f'To many DATA lines ({coordinate_id}) for the number of POINTS '
                        f'({len(experiment.coordinates)}) in line {i}.')
                measurement = Measurement(
                    experiment.coordinates[coordinate_id], last_callpath,
                    last_metric, values)
                experiment.add_measurement(measurement)
                coordinate_id += 1
            elif number_parameters >= 5:
                raise FileFormatError(
                    "This input format supports a maximum of 4 parameters.")
            else:
                raise FileFormatError("This file has no parameters.")

        elif field_name == "PARAMETER":
            # create a new parameter
            parameters = field_value.split(' ')
            experiment.parameters += [Parameter(p) for p in parameters]
            number_parameters = len(experiment.parameters)

        elif field_name == "POINTS":
            coordinate_string = field_value.strip()
            if '(' in coordinate_string:
                coordinate_string = coordinate_string.replace(") (", ")(")
                coordinate_string = coordinate_string[1:-1]
                coordinate_strings = coordinate_string.split(')(')
            else:
                coordinate_strings = coordinate_string.split(' ')
            # create a new point
            if number_parameters == 1:
                coordinates = [
                    Coordinate(float(c)) for c in coordinate_strings
                ]
                experiment.coordinates.extend(coordinates)
            elif 1 < number_parameters < 5:
                for coordinate_string in coordinate_strings:
                    coordinate_string = coordinate_string.strip()
                    values = coordinate_string.split(" ")
                    coordinate = Coordinate(float(v) for v in values)
                    experiment.coordinates.append(coordinate)
            elif number_parameters >= 5:
                raise FileFormatError(
                    "This input format supports a maximum of 4 parameters.")
            else:
                raise FileFormatError("This file has no parameters.")
        else:
            raise FileFormatError(
                f'Encountered wrong field: "{field_name}" in line {i}: {line}')

    if last_metric == Metric(''):
        experiment.metrics.append(last_metric)
    if last_metric == Callpath(''):
        experiment.callpaths.append(last_callpath)
    # create the call tree and add it to the experiment
    call_tree = create_call_tree(experiment.callpaths,
                                 progress_bar,
                                 progress_scale=10)
    experiment.call_tree = call_tree

    io_helper.validate_experiment(experiment, progress_bar)

    return experiment
Esempio n. 13
0
def _read_legacy_json_file(experiment, json_data, progress_bar):
    # read parameters
    parameter_data = json_data["parameters"]
    parameter_data = sorted(parameter_data, key=lambda x: x["id"])
    logging.debug("Number of parameters: " + str(len(parameter_data)))
    for i, p_data in enumerate(progress_bar(parameter_data)):
        parameter_name = p_data["name"]
        parameter = Parameter(parameter_name)
        experiment.add_parameter(parameter)
        logging.debug("Parameter " + str(i + 1) + ": " + parameter_name)
    # read callpaths
    callpath_data = json_data["callpaths"]
    callpath_data = sorted(callpath_data, key=lambda x: x["id"])
    logging.debug("Number of callpaths: " + str(len(callpath_data)))
    for i, c_data in enumerate(progress_bar(callpath_data)):
        callpath_name = c_data["name"]
        callpath = Callpath(callpath_name)
        experiment.add_callpath(callpath)
        logging.debug("Callpath " + str(i + 1) + ": " + callpath_name)
    # read metrics
    metric_data = json_data["metrics"]
    metric_data = sorted(metric_data, key=lambda x: x["id"])
    logging.debug("Number of metrics: " + str(len(metric_data)))
    for i, m_data in enumerate(progress_bar(metric_data)):
        metric_name = m_data["name"]
        metric = Metric(metric_name)
        experiment.add_metric(metric)
        logging.debug("Metric " + str(i + 1) + ": " + metric_name)
    # read coordinates
    coordinate_data = json_data["coordinates"]
    coordinate_data = sorted(coordinate_data, key=lambda x: x["id"])
    logging.debug("Number of coordinates: " + str(len(coordinate_data)))
    for i, c_data in enumerate(progress_bar(coordinate_data)):
        parameter_value_pairs = c_data["parameter_value_pairs"]
        parameter_value_pairs = sorted(parameter_value_pairs,
                                       key=lambda x: x["parameter_id"])
        coordinate = Coordinate(
            float(p["parameter_value"]) for p in parameter_value_pairs)
        experiment.add_coordinate(coordinate)
        logging.debug(f"Coordinate {i + 1}: {coordinate}")
    aggregate_data = {}
    # read measurements
    measurements_data = json_data["measurements"]
    logging.debug("Number of measurements: " + str(len(measurements_data)))
    for i, m_data in enumerate(progress_bar(measurements_data)):
        coordinate_id = int(m_data["coordinate_id"]) - 1
        callpath_id = int(m_data["callpath_id"]) - 1
        metric_id = int(m_data["metric_id"]) - 1
        value = float(m_data["value"])
        key = coordinate_id, callpath_id, metric_id
        if key in aggregate_data:
            aggregate_data[key].append(value)
        else:
            aggregate_data[key] = [value]
    for key in progress_bar(aggregate_data):
        coordinate_id, callpath_id, metric_id = key
        coordinate = experiment.coordinates[coordinate_id]
        callpath = experiment.callpaths[callpath_id]
        metric = experiment.metrics[metric_id]
        values = aggregate_data[key]
        measurement = Measurement(coordinate, callpath, metric, values)
        experiment.add_measurement(measurement)