Example #1
0
def _build_uniform_regression_data_format(x_pts, y_pts, model):
    """Creates the uniform regression data dictionary from the model properties and regression
    data points.

    The uniform data dictionary is used in the regression computation as it allows to build
    generic and easily extensible computational methods and models.

    Arguments:
        x_pts(list): the list of x points coordinates
        y_pts(list): the list of y points coordinates
        model(dict): the regression model properties
    Raises:
        InvalidPointsException: if the points count is too low or their coordinates list have
                                different lengths
        DictionaryKeysValidationFailed: in case the data format dictionary is incorrect
    Return:
        dict: the uniform data dictionary

    """
    # Check the requirements
    tools.check_points(len(x_pts), len(y_pts), tools.MIN_POINTS_COUNT)
    tools.validate_dictionary_keys(model, ['data_gen'], ['x', 'y'])

    model['x_pts'] = x_pts
    model['y_pts'] = y_pts
    # Initialize the data generator
    model['data_gen'] = model['data_gen'](**model)
    return model
Example #2
0
def postprocess(profile, **configuration):
    """Invoked from perun core, handles the postprocess actions

    Arguments:
        profile(dict): the profile to analyze
        configuration: the perun and options context
    """
    # Validate the input configuration
    tools.validate_dictionary_keys(configuration,
                                   ['method', 'regression_models', 'steps'],
                                   [])

    # Perform the regression analysis
    analysis = compute(data_provider.data_provider_mapper(profile),
                       configuration['method'],
                       configuration['regression_models'],
                       steps=configuration['steps'])

    # Store the results
    if 'models' not in profile['global']:
        profile['global']['models'] = analysis
    else:
        profile['global']['models'].extend(analysis)

    return PostprocessStatus.OK, "", {'profile': profile}
Example #3
0
def _transform_to_output_data(data, extra_keys=None):
    """Transforms the data dictionary into their output format - omitting computational details
    and keys that are not important for the result and it's further manipulation.

    The function provides dictionary with 'model', 'coeffs', 'r_square', 'x_interval_start' and
    'x_interval_end' keys taken from the data dictionary. The function also allows to specify
    extra keys to be included in the output dictionary. If certain key is missing in the data
    dictionary, then it's not included in the output dictionary. Coefficients are saved with
    default names 'b0', 'b1'...

    :param dict data: the data dictionary with results
    :param list of str extra_keys: the extra keys to include
    :raises DictionaryKeysValidationFailed: in case the data format dictionary is incorrect
    :returns dict: the output dictionary
    """
    tools.validate_dictionary_keys(
        data, ['model', 'coeffs', 'r_square', 'x_interval_start', 'x_interval_end'], [])

    # Specify the keys which should be directly mapped
    transform_keys = ['model', 'r_square', 'x_interval_start', 'x_interval_end', 'method', 'uid']
    if extra_keys is not None:
        transform_keys += extra_keys
    transformed = {key: data[key] for key in transform_keys if key in data}
    # Transform the coefficients
    transformed['coeffs'] = []
    for idx, coeff in enumerate(data['coeffs']):
        transformed['coeffs'].append({
            'name': 'b{0}'.format(idx),
            'value': coeff
        })

    return transformed
Example #4
0
def coefficients_to_points(model, coeffs, x_interval_start, x_interval_end,
                           **_):
    """ Transform computed coefficients from regression analysis into points, which can be
        plotted as a function / curve.

    Arguments:
        model(str): the model name
        coeffs(list): the model coefficients
        x_interval_start(int or float): the left bound of the x interval
        x_interval_end(int or float): the right bound of the x interval
    Raises:
        DictionaryKeysValidationFailed: if some dictionary checking fails
        TypeError: if the required function arguments are not in the unpacked dictionary input
    Return:
        dict: dictionary with 'plot_x' and 'plot_y' arrays
    """
    # Get the transformation data from the regression models
    data = get_transformation_data_for(model, 'plot_model')

    # Validate the transformation data dictionary
    tools.validate_dictionary_keys(data, ['computation'], [])

    # Add the coefficients and interval values safely to the data dictionary
    for coefficient in coeffs:
        data.update({
            coefficient.get('name', 'invalid_coeff'):
            coefficient.get('value', 0)
        })
    data.update({
        'x_interval_start': x_interval_start,
        'x_interval_end': x_interval_end
    })

    # Call the transformation function and check results
    data = data['computation'](**data)
    # Check that the transformation was successful
    tools.validate_dictionary_keys(data, ['plot_x', 'plot_y'], [])

    # return the computed points
    return data
Example #5
0
def compute_moving_average(data_gen, configuration):
    """
    The moving average wrapper to execute the analysis on the individual chunks of resources.

    :param iter data_gen: the generator object with collected data (data provider generators)
    :param dict configuration: the perun and option context
    :return: list of dict: the computation results
    """
    # checking the presence of specific keys in individual methods
    tools.validate_dictionary_keys(
        configuration, _METHOD_REQUIRED_KEYS[configuration['moving_method']],
        [])

    # list of resulting models of the analysis
    moving_average_models = []
    for x_pts, y_pts, uid in data_gen:
        moving_average_model = moving_average(x_pts, y_pts, configuration)
        moving_average_model['uid'] = uid
        moving_average_model['method'] = 'moving_average'
        # add partial result to the model result list - create output dictionaries
        moving_average_models.append(moving_average_model)
    return moving_average_models
Example #6
0
def postprocess(profile, **configuration):
    """Invoked from perun core, handles the postprocess actions

    :param dict profile: the profile to analyze
    :param configuration: the perun and options context
    """
    # Validate the input configuration
    tools.validate_dictionary_keys(configuration,
                                   ['method', 'regression_models', 'steps'],
                                   [])

    # Perform the regression analysis
    analysis = methods.compute(data_provider.data_provider_mapper(
        profile, **configuration),
                               configuration['method'],
                               configuration['regression_models'],
                               steps=configuration['steps'])

    # Store the results
    profile = tools.add_models_to_profile(profile, analysis)

    return PostprocessStatus.OK, "", {'profile': profile}
Example #7
0
def compute_regressogram(data_gen, configuration):
    """
    The regressogram wrapper to execute the analysis on the individual chunks of resources.

    :param iter data_gen: the generator object with collected data (data provider generators)
    :param dict configuration: the perun and option context
    :return: list of dict: the computation results
    """
    # checking the presence of specific keys in individual methods
    tools.validate_dictionary_keys(configuration, _REQUIRED_KEYS, [])

    # list of result of the analysis
    analysis = []
    for x_pts, y_pts, uid in data_gen:
        # Check whether the user gives as own number of buckets or select the method to its estimate
        buckets = configuration['bucket_number'] if configuration.get('bucket_number') else configuration[
            'bucket_method']
        result = regressogram(x_pts, y_pts, configuration['statistic_function'], buckets)
        result['uid'] = uid
        result['method'] = 'regressogram'
        # add partial result to the result list - create output dictionaries
        analysis.append(result)
    return analysis
Example #8
0
def derived_const(analysis, const_ref, **_):
    """The computation of a constant model based on a linear regression model.

    Current implementation is based on a assumption that linear model with
    very small b1 (slope) coefficient and small R^2 coefficient is similar
    to the constant model and can be used in estimation of its parameters.

    We use a slope threshold value that produces modification coefficient
    based on a deviation from the threshold. Two scenarios may happen:

    1. slope is bigger than threshold
     - compute the multiple of the slopes divided by 10 and add 1 if
       the multiple is below 1, then use this as a modification coefficient
     - divide the 1 - (linear)R^2 by the coefficient

    2. slope is smaller than threshold
     - subtract the slope from the threshold, multiply it by the inverted
       value of the threshold and add 1
     - multiply the 1 - (linear)R^2 by the coefficient

    :param list of dict analysis: computed regression models
    :param dict const_ref: the constant model template from _MODELS dictionary
    :returns iterable: generator which produces constant model for every computed linear model
    """
    # Filter the required models from computed regression models
    analysis = _filter_by_models(analysis, const_ref['required'])
    # Set to default threshold if value is invalid
    if const_ref['b1_threshold'] <= 0:
        const_ref['b1_threshold'] = _DEFAULT_THRESHOLD

    # Compute const model for every linear
    for result in analysis:
        # Check the keys in the result dictionary
        tools.validate_dictionary_keys(result, [
            'r_square', 'coeffs', 'y_sum', 'pts_num', 'x_interval_start',
            'x_interval_end', 'uid', 'method'
        ], [])

        # Duplicate the constant model template
        const = const_ref.copy()
        r = 1 - result['r_square']
        slope = abs(result['coeffs'][1])

        # Compute the modification coefficient
        if slope > const['b1_threshold']:
            # b1 bigger than threshold, the modifier should reduce the fitness of the const model
            coeff = (slope / const['b1_threshold']) / 10
            if coeff < 1:
                coeff += 1
            r /= coeff
        else:
            # b1 smaller than threshold, the modifier should increase the fitness
            coeff = (1 / const['b1_threshold']) * (const['b1_threshold'] -
                                                   slope) + 1
            r *= coeff

        # Truncate the r value if needed
        if r > 1:
            r = 1
        elif r < 0:
            r = 0

        # Build the const model record
        const['r_square'] = r
        const['x_interval_start'] = result['x_interval_start']
        const['x_interval_end'] = result['x_interval_end']
        const['coeffs'] = [result['y_sum'] / result['pts_num'], 0]
        const['uid'] = result['uid']
        const['method'] = result['method']
        yield const