Exemple #1
0
def cyclise(xs: Vector,
            ys: Vector,
            x_period: float = 360.,
            flatten_xs_array: bool = False) -> Tuple[Vector, Vector]:
    '''
    Given periodic data (x and y values) with a known period, which potentially needs to be flattened,
    returns a vector of the cyclised data (that is to say a vector where the last point is also the first point).

    :param xs: (Vector) x values
    :param ys: (Vector) y values
    :param x_period: (Float) period of the data (xs, ys)
    :param flatten_xs_array: (Boolean) whether or not xs should be flattened (see `flatten`)
    :rtype: Tuple of cyclic xs, cyclic ys
    '''
    assert all([isinstance(an_array, Vector) for an_array in [xs, ys]])

    if flatten_xs_array:
        flat_xs = flatten(xs)
    else:
        flat_xs = xs

    try:
        return (
            concatenate([
                vector([flat_xs[-1] - x_period]),
                flat_xs,
            ], ),
            concatenate([
                vector([ys[-1]]),
                ys,
            ], ),
        )
    except:
        raise Exception([flat_xs, ys, x_period])
Exemple #2
0
def flatten(xs: Union[Vector, List[Any]]) -> Vector:
    '''
    Returns a vector containing the first element of each of its elements.

    :param xs: Vector of lists, or vector of vectors.
    :rtype: Vector
    '''
    return vector([x[0] for x in xs])
Exemple #3
0
def vector_if_necessary(x: Union[List, Tuple, Vector]) -> Vector:
    """
    Cast an iterable (list, tuple, or vector), to a vector (if necesarry)

    :param x: (List/Tuple/Vector): Iterable.
    :rtype: Vector
    """
    if isinstance(x, Vector):
        return x
    else:
        return vector(x)
Exemple #4
0
def cst(x: Any) -> Any:
    '''
    Adaptative constant function (return type matches input type) that returns
    either 1.0 if `x` is float-like, or a vector of 1 if x is vector-like.

    :param x: (Any)
    :rtype: (Any) either 1.0 if `x` is float-like, or a vector of 1 if x is vector-like.
    '''
    if isinstance(x, Vector):
        return vector([1.0 for _ in x])
    elif type(x) in [float, np_float]:
        return 1.0
    else:
        raise Exception(type(x))
Exemple #5
0
def plot(xs: Vector, ys: Vector, fit_function: Callable[[Any], float]) -> None:
    '''
    Plot the data (x and y values) and the values of the fitted function over the x values.

    :param xs: (Vector) x values.
    :param ys: (Vector) y values.
    :param fit_function: (Callable) fit function.
    '''
    import matplotlib.pyplot as plt # type: ignore

    fine_xs = linspace(xs[0], xs[-1], NUMBER_POINTS_FIT)

    plt.plot(xs, ys, label='ys')
    plt.plot(
        fine_xs,
        vector(evaluate(fit_function, fine_xs)),
        label='fit',
    )
    plt.legend()
    plt.show()
Exemple #6
0
def vector_rmsd(ys1: Vector,
                ys2: Vector,
                weights: Optional[Vector] = None,
                should_align: bool = True) -> float:
    '''
    Returns the RMSD (root mean square deviation) between two vectors, with optional weights, while potentially aligning them
    (that is to say finding the translation tha minimises the RMSD between the two vectors).

    :param ys1: (Vector) first vector.
    :param ys2: (Vector) second vector.
    :param weights: (Optional) RMSD weights.
    :param should_align: (Boolean) whether or not to align the two vectors
    (that is to say find the translation tha minimises the RMSD between them)
    :rtype: Float
    '''
    try:
        assert len(ys1) == len(ys2), [ys1, ys2]
    except TypeError:
        print([ys1, ys2])
        raise

    Es_1, Es_2 = [vector(data_set) for data_set in [ys1, ys2]]

    def potentially_aligned(Es: Vector) -> Vector:
        '''
        Returns either the vector if `should_align` is False,
        or the vector centered around 0 otherwise.

        :param Es: (Vector) vector.
        :rtype: Vector
        '''
        return Es - (np_average(Es) if should_align else 0.)

    return sqrt_np(
        np_average(
            d2(
                potentially_aligned(Es_1),
                potentially_aligned(Es_2),
            ),
            weights=weights,
        ))
Exemple #7
0
def normalised_anti_gradient(
        xs: Vector,
        ys: Vector,
        scale: float = 1.0,
        use_interpolated_gradient: bool = False,
        max_abs_gradient: Optional[float] = None) -> Vector:
    '''
    Given data (x and y values), returns the normalised anti-gradient, that is to say a number between zero and `scale` (defaults to 1)
    which is equal to one when the gradient is null, and equal to zero when the gradient is maximal.

    :param xs: (Vector) x values.
    :param ys: (Vector) y values.
    :param scale: (Float) maximum value of the normalised anti-gradient. Defaults to 1.
    :param use_interpolated_gradient: (Boolean) whether or not to interpolate the numerical gradient with a smooth (cubic) function
    :param max_abs_gradient: (Float) maximum absolute gradient that should be tolarated. Will throw an exception if the gradient exceeds this value.
    Useful for catching outliers.
    :rtype: Vector of the normalised anti-gradient.
    '''
    assert all([isinstance(an_array, Vector) for an_array in [xs, ys]])

    if use_interpolated_gradient:
        try:
            fine_xs = linspace(xs[0], xs[-1], 150)
        except:
            raise Exception(xs[0], xs[-1])
        interpolated_Es = interpolating_fct(xs, ys)(fine_xs)
        fine_d_ys = interpolating_fct(fine_xs,
                                      gradient(interpolated_Es))(fine_xs)
        fine_d_xs = gradient(fine_xs)
        fine_total_gradient = fine_d_ys / fine_d_xs
        # Necessary as the spacing can be uneven and gradient does not take a spacing argument
        total_gradient = interpolating_fct(fine_xs, fine_total_gradient)(xs)
    else:
        d_xs = gradient(xs)
        d_ys = gradient(ys)
        # Necessary as the spacing can be uneven and gradient does not take a spacing argument
        total_gradient = d_ys / d_xs

    try:
        absolute_gradient = np_abs(total_gradient)
    except:
        raise Exception([d_xs, d_ys])

    if max_abs_gradient is not None:
        if any([d > max_abs_gradient for d in absolute_gradient]):
            raise Discountinuity_Error(
                'Found possible discontinuity (rate of change > {0}) in gradient for {1}.\nIncrease the value of max_abs_gradient or add points to the fit.'
                .format(
                    max_abs_gradient,
                    dict(xs=xs, ys=ys, gradient=absolute_gradient),
                ))

    anti_gradient = (1.0 - (absolute_gradient / np_max(absolute_gradient)))

    if sum(anti_gradient) == 0.:
        anti_gradient = vector([1. for x in anti_gradient])

    assert sum(anti_gradient) != 0., anti_gradient

    return (
        xs,
        scale * anti_gradient,
    )
Exemple #8
0
def best_fit(
    xs: Sequence[float],
    ys: Sequence[float],
    unit: str = 'rad',
    should_plot: bool = False,
    optimise_final_terms: bool = True,
    debug: Optional[Any] = None,
    rmsd_weights: Optional[Vector] = None,
    penalty_function: Penalty_Function = DEFAULT_PENALTY_FUNCTION,
) -> Tuple[List[Term], WEIGHTED_RMSD, UNWEIGHTED_RMSD]:
    '''
    Given data (x and y values), returns a tuple containing the kept terms, the weighted RMSD between the data and the fit functions with `keep_n` terms,
    and the unweighted RMSD.

    :param xs: (Vector) x values.
    :param ys: (Vector) y values.
    :param unit: (String) unit to use (either 'rad' or 'deg')
    :param should_plot: (Boolean) whether of not to plot the resulting fit.
    :param optimise_final_terms: (Optional) whether or not to optimise final term values using a least square fit approach.
    :param debug: (Optional) stream to write debug information to.
    :param rmsd_weights: (Optional) weights used to focus the RMSD to certain section of the curve.
    :param penalty_function: (Optional) the penalty function to use (defaults to `DEFAULT_PENALTY_FUNCTION`).
    :rtype: (Tuple) List of kept terms, weighted RMSD, unweighted RMSD
    '''
    assert unit in ['rad', 'deg'], unit

    # Type casting to numpy arrays (vector)
    xs, ys = map(vector, (xs, ys))
    if rmsd_weights is not None:
        rmsd_weights = vector(rmsd_weights)

    assert not isinf(ys).any(), ys

    if len(xs) == 0:
        return (
            [],
            float('inf'),
            float('inf'),
        )
    else:
        max_keep_n = min(MAX_NUM_TERMS, len(xs) // 2)

        xs_in_rad = (xs if unit == 'rad' else radians(xs)) # pylint: disable=no-member

        get_weighted_rmsd, get_penalty = itemgetter(1), itemgetter(2)

        sorted_all_fits = sorted(
            [
                rmsd_score_with_n_terms(
                    xs_in_rad,
                    ys,
                    keep_n=keep_n,
                    should_plot=should_plot,
                    weights=rmsd_weights,
                    penalty_function=penalty_function,
                    debug=debug,
                )
                for keep_n in range(0, max_keep_n)
            ],
            key=lambda x: get_weighted_rmsd(x) + get_penalty(x)
        )

        if debug is not None:
            debug.write('\n'.join(map(str, [(terms, rmsd, penalty, rmsd + penalty) for (terms, rmsd, penalty) in sorted_all_fits])) + '\n')

        best_fit_terms, best_fit_rmsd, best_fit_penalty = sorted_all_fits[0]

        if optimise_final_terms:
            optimised_best_fit_terms, optimised_best_fit_weighted_rmsd, optimised_best_fit_unweighted_rmsd = optimise_fourier_terms(
                best_fit_terms,
                vector_if_necessary(xs_in_rad),
                vector_if_necessary(ys),
                rmsd_weights=rmsd_weights,
            )

            rmsd_to_compare = (optimised_best_fit_weighted_rmsd if rmsd_weights is not None else optimised_best_fit_unweighted_rmsd)
            try:
                assert rmsd_to_compare <= best_fit_rmsd, [rmsd_to_compare, best_fit_rmsd]
            except AssertionError as e:
                if debug is not None:
                    debug.write(str(e))
                else:
                    optimised_best_fit_terms = best_fit_terms
                    optimised_best_fit_weighted_rmsd, optimised_best_fit_unweighted_rmsd = map(
                        lambda weights: vector_rmsd(
                            ys,
                            fourier_series_fct(best_fit_terms)(xs),
                        ),
                        (rmsd_weights, None),
                    )
        else:
            pass

        return (
            optimised_best_fit_terms if unit == 'rad' else in_degrees(optimised_best_fit_terms),
            optimised_best_fit_weighted_rmsd,
            optimised_best_fit_unweighted_rmsd,
        )
Exemple #9
0
def rmsd_score_with_n_terms(
    xs: Union[List, Vector],
    ys: Union[List, Vector],
    keep_n: int = 1,
    should_plot: bool = False,
    max_frequency: Optional[int] = None,
    weights: Optional[Vector] = None,
    penalty_function: Penalty_Function = DEFAULT_PENALTY_FUNCTION,
    debug: Optional[Any] = None,
) -> Tuple[List[Term], float, float]:
    '''
    Given data (x and y values) and a maximum number of terms to keep `keep_n`,
    returns a tuple containing the kept terms, the weighted RMSD between the data and the fit functions with `keep_n` terms,
    and the value of the penalty function `penalty_function`.

    :param xs: (Vector) x values.
    :param ys: (Vector) y values.
    :param keep_n: (Integer) number of terms to keep.
    :param should_plot: (Boolean) whether of not to plot the resulting fit.
    :param max_frequency: (Optional) maximum integer frequency to use for the Fourier terms.
    Should be less than half the number of fitted points (cf thesis).
    :param weights: (Optional) weights used to focus the RMSD to certain section of the curve.
    :param penalty_function: (Optional) the penalty function to use (defaults to `DEFAULT_PENALTY_FUNCTION`).
    :param debug: (Optional) stream to write debug information to.
    :rtype: (Tuple) List of kept terms, weighted RMSD, penalty value
    '''
    if isinstance(ys, Vector):
        Es_np = ys
    else:
        Es_np = vector(ys)

    if max_frequency is None:
        max_frequency = min(len(xs) // 2, MAX_FREQUENCY)

    Ns = range(1, max_frequency + 1)

    assert 2 * max_frequency <= len(xs), 'Inappropriate max_frequency 2 * {0} > {1}'.format(
        max_frequency,
        len(xs),
    )

    A0 = Term(0, a0(xs, Es_np), 'cst')
    As = [Term(n, an(xs, Es_np, n, A0.k_n), 'cos') for n in Ns]
    Bs = [Term(n, bn(xs, Es_np, n, A0.k_n), 'sin') for n in Ns]

    assert keep_n <= len(As + Bs), 'Not enough terms to keep: {0} > {1}'.format(
        keep_n,
        len(As + Bs),
    )

    coeff_threshold = sorted(
        map(
            lambda coeff: abs(coeff.k_n),
            As + Bs,
        ),
        reverse=True,
    )[keep_n - 1] if keep_n >= 1 else inf

    kept_terms = [A0] + list(
        sorted(
            filter(
                lambda coeff: abs(coeff.k_n) >= coeff_threshold,
                As + Bs,
            ),
            key=lambda coeff: abs(coeff.k_n),
            reverse=True,
        ),
    )

    assert len(kept_terms) == keep_n + 1, kept_terms # We always keep A0

    fourier_series = fourier_series_fct(
        kept_terms,
    )

    if should_plot:
        plot(xs, ys, fourier_series)

    return (
        kept_terms,
        vector_rmsd(
            Es_np,
            vector(evaluate(fourier_series, xs)),
            weights=weights,
        ),
        penalty_function(kept_terms),
    )
Exemple #10
0
 def correct_weights(rmsd_weights: Optional[Vector]) -> Vector:
     MIN_VALUE = 0.05
     if rmsd_weights is None:
         return rmsd_weights
     else:
         return vector([max(1.0 - x, MIN_VALUE) for x in rmsd_weights])