def _build_uniform_regression_data_format(x_pts, y_pts, model): """Creates the uniform regression data dictionary from the model properties and regression data points. The uniform data dictionary is used in the regression computation as it allows to build generic and easily extensible computational methods and models. Arguments: x_pts(list): the list of x points coordinates y_pts(list): the list of y points coordinates model(dict): the regression model properties Raises: InvalidPointsException: if the points count is too low or their coordinates list have different lengths DictionaryKeysValidationFailed: in case the data format dictionary is incorrect Return: dict: the uniform data dictionary """ # Check the requirements tools.check_points(len(x_pts), len(y_pts), tools.MIN_POINTS_COUNT) tools.validate_dictionary_keys(model, ['data_gen'], ['x', 'y']) model['x_pts'] = x_pts model['y_pts'] = y_pts # Initialize the data generator model['data_gen'] = model['data_gen'](**model) return model
def postprocess(profile, **configuration): """Invoked from perun core, handles the postprocess actions Arguments: profile(dict): the profile to analyze configuration: the perun and options context """ # Validate the input configuration tools.validate_dictionary_keys(configuration, ['method', 'regression_models', 'steps'], []) # Perform the regression analysis analysis = compute(data_provider.data_provider_mapper(profile), configuration['method'], configuration['regression_models'], steps=configuration['steps']) # Store the results if 'models' not in profile['global']: profile['global']['models'] = analysis else: profile['global']['models'].extend(analysis) return PostprocessStatus.OK, "", {'profile': profile}
def _transform_to_output_data(data, extra_keys=None): """Transforms the data dictionary into their output format - omitting computational details and keys that are not important for the result and it's further manipulation. The function provides dictionary with 'model', 'coeffs', 'r_square', 'x_interval_start' and 'x_interval_end' keys taken from the data dictionary. The function also allows to specify extra keys to be included in the output dictionary. If certain key is missing in the data dictionary, then it's not included in the output dictionary. Coefficients are saved with default names 'b0', 'b1'... :param dict data: the data dictionary with results :param list of str extra_keys: the extra keys to include :raises DictionaryKeysValidationFailed: in case the data format dictionary is incorrect :returns dict: the output dictionary """ tools.validate_dictionary_keys( data, ['model', 'coeffs', 'r_square', 'x_interval_start', 'x_interval_end'], []) # Specify the keys which should be directly mapped transform_keys = ['model', 'r_square', 'x_interval_start', 'x_interval_end', 'method', 'uid'] if extra_keys is not None: transform_keys += extra_keys transformed = {key: data[key] for key in transform_keys if key in data} # Transform the coefficients transformed['coeffs'] = [] for idx, coeff in enumerate(data['coeffs']): transformed['coeffs'].append({ 'name': 'b{0}'.format(idx), 'value': coeff }) return transformed
def coefficients_to_points(model, coeffs, x_interval_start, x_interval_end, **_): """ Transform computed coefficients from regression analysis into points, which can be plotted as a function / curve. Arguments: model(str): the model name coeffs(list): the model coefficients x_interval_start(int or float): the left bound of the x interval x_interval_end(int or float): the right bound of the x interval Raises: DictionaryKeysValidationFailed: if some dictionary checking fails TypeError: if the required function arguments are not in the unpacked dictionary input Return: dict: dictionary with 'plot_x' and 'plot_y' arrays """ # Get the transformation data from the regression models data = get_transformation_data_for(model, 'plot_model') # Validate the transformation data dictionary tools.validate_dictionary_keys(data, ['computation'], []) # Add the coefficients and interval values safely to the data dictionary for coefficient in coeffs: data.update({ coefficient.get('name', 'invalid_coeff'): coefficient.get('value', 0) }) data.update({ 'x_interval_start': x_interval_start, 'x_interval_end': x_interval_end }) # Call the transformation function and check results data = data['computation'](**data) # Check that the transformation was successful tools.validate_dictionary_keys(data, ['plot_x', 'plot_y'], []) # return the computed points return data
def compute_moving_average(data_gen, configuration): """ The moving average wrapper to execute the analysis on the individual chunks of resources. :param iter data_gen: the generator object with collected data (data provider generators) :param dict configuration: the perun and option context :return: list of dict: the computation results """ # checking the presence of specific keys in individual methods tools.validate_dictionary_keys( configuration, _METHOD_REQUIRED_KEYS[configuration['moving_method']], []) # list of resulting models of the analysis moving_average_models = [] for x_pts, y_pts, uid in data_gen: moving_average_model = moving_average(x_pts, y_pts, configuration) moving_average_model['uid'] = uid moving_average_model['method'] = 'moving_average' # add partial result to the model result list - create output dictionaries moving_average_models.append(moving_average_model) return moving_average_models
def postprocess(profile, **configuration): """Invoked from perun core, handles the postprocess actions :param dict profile: the profile to analyze :param configuration: the perun and options context """ # Validate the input configuration tools.validate_dictionary_keys(configuration, ['method', 'regression_models', 'steps'], []) # Perform the regression analysis analysis = methods.compute(data_provider.data_provider_mapper( profile, **configuration), configuration['method'], configuration['regression_models'], steps=configuration['steps']) # Store the results profile = tools.add_models_to_profile(profile, analysis) return PostprocessStatus.OK, "", {'profile': profile}
def compute_regressogram(data_gen, configuration): """ The regressogram wrapper to execute the analysis on the individual chunks of resources. :param iter data_gen: the generator object with collected data (data provider generators) :param dict configuration: the perun and option context :return: list of dict: the computation results """ # checking the presence of specific keys in individual methods tools.validate_dictionary_keys(configuration, _REQUIRED_KEYS, []) # list of result of the analysis analysis = [] for x_pts, y_pts, uid in data_gen: # Check whether the user gives as own number of buckets or select the method to its estimate buckets = configuration['bucket_number'] if configuration.get('bucket_number') else configuration[ 'bucket_method'] result = regressogram(x_pts, y_pts, configuration['statistic_function'], buckets) result['uid'] = uid result['method'] = 'regressogram' # add partial result to the result list - create output dictionaries analysis.append(result) return analysis
def derived_const(analysis, const_ref, **_): """The computation of a constant model based on a linear regression model. Current implementation is based on a assumption that linear model with very small b1 (slope) coefficient and small R^2 coefficient is similar to the constant model and can be used in estimation of its parameters. We use a slope threshold value that produces modification coefficient based on a deviation from the threshold. Two scenarios may happen: 1. slope is bigger than threshold - compute the multiple of the slopes divided by 10 and add 1 if the multiple is below 1, then use this as a modification coefficient - divide the 1 - (linear)R^2 by the coefficient 2. slope is smaller than threshold - subtract the slope from the threshold, multiply it by the inverted value of the threshold and add 1 - multiply the 1 - (linear)R^2 by the coefficient :param list of dict analysis: computed regression models :param dict const_ref: the constant model template from _MODELS dictionary :returns iterable: generator which produces constant model for every computed linear model """ # Filter the required models from computed regression models analysis = _filter_by_models(analysis, const_ref['required']) # Set to default threshold if value is invalid if const_ref['b1_threshold'] <= 0: const_ref['b1_threshold'] = _DEFAULT_THRESHOLD # Compute const model for every linear for result in analysis: # Check the keys in the result dictionary tools.validate_dictionary_keys(result, [ 'r_square', 'coeffs', 'y_sum', 'pts_num', 'x_interval_start', 'x_interval_end', 'uid', 'method' ], []) # Duplicate the constant model template const = const_ref.copy() r = 1 - result['r_square'] slope = abs(result['coeffs'][1]) # Compute the modification coefficient if slope > const['b1_threshold']: # b1 bigger than threshold, the modifier should reduce the fitness of the const model coeff = (slope / const['b1_threshold']) / 10 if coeff < 1: coeff += 1 r /= coeff else: # b1 smaller than threshold, the modifier should increase the fitness coeff = (1 / const['b1_threshold']) * (const['b1_threshold'] - slope) + 1 r *= coeff # Truncate the r value if needed if r > 1: r = 1 elif r < 0: r = 0 # Build the const model record const['r_square'] = r const['x_interval_start'] = result['x_interval_start'] const['x_interval_end'] = result['x_interval_end'] const['coeffs'] = [result['y_sum'] / result['pts_num'], 0] const['uid'] = result['uid'] const['method'] = result['method'] yield const