Esempio n. 1
0
def compare_vectors(vec, vector_distance, Standardization=True):
    """Comparison of vectors.

    The type _VectorDistance implements standardization procedures.
    The objective of standardization is to avoid the dependence on
    the variable type (chosen among symbolic, ordinal, numeric and circular)
    and, for numeric variables, on the choice of the measurement units
    by converting the original variables to dimensionless variables.

    :Parameters:
     - `vec` (_Vectors) : test
     - `vector_distance` (_VectorDistance) : test

    :Returns:
      An object of type _DistanceMatrix is returned.

    :Examples:

    .. doctest::
        :options: +SKIP

        >>> compare_vectors(vec, vector_distance)

    .. seealso::
        :func:`~openalea.stat_tool.vectors.VectorDistance`,
        :func:`~openalea.stat_tool.cluster.Clustering`,
        :func:`~openalea.stat_tool.comparison.Compare`
     """
    error.CheckType([vec, vector_distance], [_Vectors, _VectorDistance])
    error.CheckType([Standardization], [bool])

    return vec.compare(vector_distance, Standardization)
Esempio n. 2
0
def Convolution(*args):
    """Construction of an object of type convolution from elementary
    distributions or from an ASCII file.

    The distribution of the sum of independent random variables is the
    convolution of the distributions of these elementary random variables.

    :Parameters:
      * dist1, dist2, ...(distribution, mixture, convolution, compound) -
        elementary distributions,
      * file_name (string).

    :Returns:
        If the construction succeeds, the returned object is of type
        convolution, otherwise no object is returned.

    :Examples:

    .. doctest::
        :options: +SKIP

        >>> Convolution(dist1, dist2, ...)
        >>> Convolution(file_name)

    .. plot::
        :width: 50%
        :include-source:

        from openalea.stat_tool import *
        sum_dist = Binomial(0,10,0.5)
        dist = Binomial(0,15,0.2)
        c = Convolution(sum_dist, dist)
        c.plot()


    .. seealso::
        :func:`~openalea.stat_tool.output.Save`,
        :func:`~openalea.stat_tool.estimate.Estimate`,
        :func:`~openalea.stat_tool.simulate.Simulate`.
    """
    error.CheckArgumentsLength(args, 1)

    possible_types = [_DiscreteParametricModel, _DiscreteMixture,
                      _Compound, _Convolution]

    # filename
    if(len(args)==1):
        error.CheckType([args[0]], [str], arg_id=[1])
        result =  _Convolution(args[0])
    # build from list of distributions
    else:
        arguments = []
        #check that all arguments are correct
        for arg, i in zip(args, range(0, len(args))):
            error.CheckType([arg], [possible_types], variable_pos=[i+1])
            arguments.append(arg)
        result = _Convolution(arguments)

    return result
Esempio n. 3
0
def compare_histo(histo, *args, **kargs):
    """Comparison of frequency distributions.

    :Parameters:
      * `histo1`, `histo2`, ... (histogram, mixture_data, convolution_data, compound_data),
      * `type` (string): variable type ("NUMERIC" ("N"), "ORDINAL" ("O") or "SYMBOLIC" ("S")).

    :Keywords:
      - FileName (string) : name of the result file
      - Format (string) : format of the result file: "ASCII" (default format) or "SpreadSheet".
        This optional argument can only be used in conjunction with the optional argument FileName.

    :Returns:
      The comparison result.

    :Examples:

    .. doctest::
        :options: +SKIP

        >>> compare_histo(histo1, histo2, ..., type, FileName="result",
        ... Format="ASCII")

    .. seealso::
        :func:`~openalea.stat_tool.comparison.Compare`

    """
    utype = args[-1]
    if utype not in variable_type.keys():
        raise KeyError("%s not found. Allowed keys are %s"
                       % (utype, variable_type.keys()))


    utype = variable_type[args[-1]]

    error.CheckType([histo],
                        [[_DiscreteDistributionData, _DiscreteMixtureData,
                          _ConvolutionData, _CompoundData]])

    histos = args[0:-1]
    for h in histos:
        error.CheckType([h],
                        [[_DiscreteDistributionData, _DiscreteMixtureData,
                          _ConvolutionData, _CompoundData]])
    filename = kargs.get("Filename", None)
    format = error.ParseKargs(kargs, "Format", "ASCII",
                                  possible=format_type)

    ret = histo.compare(histos, utype, filename, format)

    return ret
Esempio n. 4
0
def Binomial(inf_bound, sup_bound=I_DEFAULT, \
             proba=D_DEFAULT):
    """
    Construction of a binomial distribution

    :param float inf_bound: lower bound to the range of possible values    (shift parameter)
    :param float sup_bound: upper bound to the range of possilbe values
    :param float proba: probability of `success`

    .. plot::
        :width: 50%
        :include-source:

        from openalea.stat_tool.distribution import Binomial
        b = Binomial(0,10,0.5)
        b.plot(legend_size=8)

    """
    # todo: seg fault when passing -1 as first arguments if there
    # is no assert here below
    # memory leak ?
    # todo:  returns error if ((inf_bound < min_inf_bound) ||
    # (inf_bound > MAX_INF_BOUND)) {

    error.CheckType([inf_bound, sup_bound, proba], [int, int, [int, float]])
    assert inf_bound >= 0
    assert inf_bound < sup_bound
    assert (sup_bound - inf_bound) <= MAX_DIFF_BOUND
    assert proba <= 1. and proba > 0

    param = D_DEFAULT

    return(_DiscreteParametricModel(BINOMIAL.real,
        inf_bound, sup_bound, param, proba))
Esempio n. 5
0
def ToDistanceMatrix(distance_matrix):
    """
    Cast and object of type CLUSTER into an object of type DISTANCE_MATRIX.


    :Parameters:
      * distance_matrix

    :Returns:
        An object of type distance_matrix is returned.

    :Examples:

    .. doctest::
        :options: +SKIP

        >>> ToDistanceMatrix(distance_matrix)

    .. seealso::
        :func:`~openalea.stat_tool.cluster.Clustering`,

    """
    error.CheckType([distance_matrix], [[_Cluster, _DistanceMatrix]])

    try:
        return _DistanceMatrix(distance_matrix)
    except:
        raise TypeError("Input arguments must be of type Cluster")
Esempio n. 6
0
    def estimate_parametric(histo,
                            ident,
                            MinInfBound=0,
                            InfBoundStatus="Free"):
        """ Estimate a parametric discrete distribution (binomial,
        Poisson or negative binomial distribution with an additional shift
        parameter)

        :Parameters:
          * histo (histogram, mixture_data, convolution_data, compound_data),
          * ident ("BINOMIAL", "POISSON", "NEGATIVE_BINOMIAL", "UNIFORM")
          * MinInfBound (int): lower bound to the range of possible values (0 - default value - or 1).
          * InfBoundStatus (string): shifting or not of the distribution:
                                      "Free" (default value) or "Fixed". T

        :Usage:

        .. doctest::
            :options: +SKIP

            >>> estimate_parametric(histo, ident, MinInfBound=0, InfBoundStatus="Free")
            >>> Estimate(histo, "NB", MinInfBound=1, InfBoundStatus="Fixed")

        """

        error.CheckType([ident, MinInfBound, InfBoundStatus], [str, int, str])

        flag = bool(InfBoundStatus == "Free")

        try:
            ident_id = dist_type[ident]
        except KeyError:
            raise KeyError("Valid type are %s" % (str(dist_type.keys())))

        return histo.parametric_estimation(ident_id, MinInfBound, flag)
def SelectStep(obj, *args):
    """Change the internal step of a vector or a sequence

    :param obj: the vector or sequence objet
    :param argument 1: the new step

    :Example:

    .. doctest::
        :options: +SKIP

        >>> seq = Sequences([])
        >>> SelectStep(seq, 100)
        >>> Plot(seq)

    .. todo:: shall we move this function to sequence_analysis package?

    """
    error.CheckArgumentsLength(args, 1, 2)

    try:
        nb_variable = obj.nb_variable
    except AttributeError:
        raise TypeError(
            "object has no nb_variable. Check that it is a Vector or Sequence")

    if len(args) == 2:
        variable, step = args
        error.CheckType([step], [[int, float]])
        error.CheckType([variable], [[int]])
    elif len(args) == 1 and nb_variable == 1:
        variable = 1
        step = args[0]
        error.CheckType([step], [[int, float]])
    else:
        if nb_variable != 1:
            raise SyntaxError(
                "Wrong number of arguments. The number of variable is greater than 1 (%s) therefore you must provide a variable and a step like in SelectStep(object, 1, 100)"
                % nb_variable)
        else:
            raise ValueError("UnknownError")

    #obj.get_marginal_histogram(variable)
    ret = obj.select_step(variable, step)
    return ret
Esempio n. 8
0
def VarianceAnalysis(*args, **kargs):
    """
    One-way variance analysis.

    :Examples:

    .. doctest::
        :options: +SKIP

        >>> VarianceAnalysis(vec, class_variable, response_variable,
        ... type, FileName="result", Format="SpreadSheet")

    :Parameters:

      * vec (_Vectors),
      * class_variable (int): index of the class or group variable,
      * response_variable (int): index of the response variable,
      * type (string): type of the response variable ("NUMERIC" ("N") or
        "ORDINAL" ("O")).

    :Keywords:

      * FileName (string): name of the result file,
      * Format (string): format of the result file: "ASCII" (default format)
        or "SpreadSheet". This optional argument can only be used in conjunction with the optional argument FileName.

    :Returns:

        The variance analysis result as a string

    """
    error.CheckArgumentsLength(args, 4, 4)
    error.CheckKargs(kargs, possible_kargs = ["FileName", "Format"])

    #kargs
    filename = error.ParseKargs(kargs, "FileName", default="result")
    format = error.ParseKargs(kargs, "Format", default="O",
                              possible=variance_type)

    #args
    vec = args[0]
    class_variable = args[1]
    response_variable = args[2]
    utype = args[3]
    error.CheckType([vec, class_variable, response_variable, utype],
                    [_Vectors, int, int, str])

    try:
        utype = variance_type[args[3]]
    except KeyError:
        raise KeyError("Possible type are : " + str(variance_type.keys()))


    return vec.variance_analysis(class_variable, response_variable, utype,
                                 filename, format)
Esempio n. 9
0
def _Vectors_mixture_estimation(self, model,
                                nb_iteration=I_DEFAULT,
                                force_param=None):
    """Estimate a mixture from _Vectors given initial model or number of
    components, the maximal number of iterations and a flag for using parametric
    observation distributions or not, within a given family
    """
    if force_param is None:
        force_param = []

    error.CheckType([nb_iteration, force_param], [int, list])

    # model is a MultivariateMixture class
    error.CheckType([model], [[int, _MultivariateMixture]])
    if type(model) == int:
        return _Vectors.mixture_estimation_nb_component(self, model,
                                            nb_iteration, force_param)
    else:
        return _Vectors.mixture_estimation_model(self, model,
                                            nb_iteration, force_param)
Esempio n. 10
0
def ContingencyTable(*args, **kargs):
    """
    Computation of a contingency table.

    :Parameters:

      * vec (_Vectors),
      * variable1, variable2 (int): variable indices,

    :Keywords:

      * FileName (string): name of the result file,
      * Format (string): format of the result file: "ASCII" (default format) or "SpreadSheet".
        This optional argument can only be used in conjunction with the optional argument FileName.

    :Returns:

        The contingency table result as a string

    :Examples:

    .. doctest::
        :options: +SKIP

        >>> ContingencyTable(vec, variable1, variable2, FileName="result", Format="SpreadSheet")


    """
    error.CheckArgumentsLength(args, 3, 3)
    error.CheckKargs(kargs, possible_kargs = ["FileName", "Format"])

    possible_v = [str(f) for f in OutputFormat.values.values()] # possible output formats
    #kargs
    filename = error.ParseKargs(kargs, "FileName", default="result")
    format = error.ParseKargs(kargs, "Format", default="ASCII",
                              possible=possible_v)

    #args
    vec = args[0]
    variable1 = args[1]
    variable2 = args[2]
    error.CheckType([vec, variable1, variable2], [_Vectors, int, int])

    of = "OutputFormat." + format + ".real"
    of = eval(of)
    return vec.contingency_table(variable1, variable2, filename, of)
Esempio n. 11
0
def ComputeRankCorrelation(*args, **kargs):
    """ComputeRankCorrelation

    Computation of the rank correlation matrix.

    :Usage:

    >>> vec = Vectors([1,2,3,4,5,4,3,2,1])
    >>> ComputeRankCorrelation(vec, Type="Spearman", FileName='')

    :Arguments:

    * vec (vectors).

    :Optional Arguments:

    * Type (string): type of rank correlation coefficient:
      "Spearman" (the default) or "Kendall".

    :Returned Object:

    No object returned.
    """

    func_map = {
            "Spearman": 0,
            "Kendall": 1
            }

    error.CheckArgumentsLength(args, 1, 1)
    error.CheckKargs(kargs, possible_kargs = ["Type", "FileName"])

    #kargs
    utype = error.ParseKargs(kargs, "Type", default="Spearman",
                             possible=func_map)
    filename = error.ParseKargs(kargs, "FileName", default=None)

    #args
    vec = args[0]

    error.CheckType([vec], [_Vectors])

    _a = vec.rank_correlation_computation(utype, filename)
Esempio n. 12
0
def MergeVariable(obj, *args, **kargs):
    """
    Merging of variables.

    :Parameters:

        * vec1, vec2, ... (_Vectors),
        * seq1, seq2, ... (_Sequences, _DiscreteSequences, _MarkovData, _SemiMarkovData).

    :Keywords:

      * RefSample (int): reference sample to define individual identifiers
        (the default: no reference sample).

    :Returns:

        If the arguments are of type _Vectors and if the number of vectors is the same
        for each sample, an object of type _Vectors is returned.

        If the arguments are of type _Sequences, _DiscreteSequences, _MarkovData, _SemiMarkovData,
        if all the variables are of type STATE, and if the number and the lengths of sequences
        are the same for each sample, an object of type _Sequences or _DiscreteSequences is returned.

        The returned object is of type _DiscreteSequences if all the variables are of type STATE,
        if the possible values for each variable are consecutive from 0 and if the number of
        possible values for each variable is < 15.

    :Examples:

    .. doctest::
        :options: +SKIP

        >>> MergeVariable(histo1, histo2)
        >>> MergeVariable(vec1, vec2,..., RefSample=2)
        >>> MergeVariable(seq1, seq2,..., RefSample=2)

    .. seealso::
        :func:`~openalea.stat_tool.cluster.Cluster`,
        :func:`~openalea.stat_tool.data_transform.Shift`,
        :func:`~openalea.stat_tool.cluster.Transcode`,
        :func:`~openalea.stat_tool.data_transform.ValueSelect`,
        :func:`~openalea.stat_tool.data_transform.Merge`,
        :func:`~openalea.stat_tool.data_transform.SelectIndividual`,
        :func:`~openalea.stat_tool.data_transform.SelectVariable`,
        :func:`~openalea.stat_tool.cluster.AddAbsorbingRun`,
        :func:`~openalea.stat_tool.cluster.Cumulate`,
        :func:`~openalea.stat_tool.cluster.Difference`,
        :func:`~openalea.stat_tool.cluster.IndexExtract`,
        :func:`~openalea.stat_tool.cluster.LengthSelect`,
        :func:`~openalea.stat_tool.cluster.MovingAverage`,
        :func:`~openalea.stat_tool.cluster.RecurrenceTimeSequences`,
        :func:`~openalea.stat_tool.cluster.RemoveRun`,
        :func:`~openalea.stat_tool.cluster.Reverse`,
        :func:`~openalea.stat_tool.cluster.SegmentationExtract`,
        :func:`~openalea.stat_tool.cluster.VariableScaling`,
    """
    #todo:manage the marjkovian_sequences conversion if input
    # is made os Sequences
    arg1 = args[0]
    for arg in args:
        error.CheckType([arg], [type(arg1)])

    RefSample = kargs.get("RefSample", -1)
    error.CheckType([RefSample], [int])

    return obj.merge_variable(list(args), RefSample)
Esempio n. 13
0
    def display(self, *args, **kargs):
        format_map = {'c': 'c', 'l': 'l', 'Column': 'c', 'Line': 'l'}
        viewpoint_map = {
            'v': 'v',
            "Data": "d",
            "Survival": 's',
            "SegmentProfile": 'q',
            "StateProfile": 'p'
        }

        segmentations_map = {
            "DynamicProgramming": FORWARD_DYNAMIC_PROGRAMMING,
            "ForwardBackwardSampling": FORWARD_BACKWARD_SAMPLING
        }

        state_seq_map = {
            "GeneralizedViterbi": GENERALIZED_VITERBI,
            "ForwardBackwardSampling": FORWARD_BACKWARD_SAMPLING
        }

        # Detail level
        Detail = error.ParseKargs(kargs, "Detail", 1, [1, 2])
        if Detail == 2:
            exhaustive = True
        else:
            exhaustive = False
        Format = error.ParseKargs(kargs, "Format", "c", format_map)
        ViewPoint = error.ParseKargs(kargs, "ViewPoint", "v", viewpoint_map)
        NbStateSequence = error.ParseKargs(kargs, "NbStateSequence",
                                           NB_STATE_SEQUENCE)

        error.CheckType([NbStateSequence], [[int, float]])
        NbSegmentation = error.ParseKargs(kargs, "NbSegmentation",
                                          NB_SEGMENTATION)
        error.CheckType([NbSegmentation], [[int, float]])

        StateSequence = error.ParseKargs(kargs, "StateSequence",
                                         "GeneralizedViterbi", state_seq_map)
        Segmentation = error.ParseKargs(kargs, "Segmentation",
                                        "DynamicProgramming",
                                        segmentations_map)
        #todo it seems that by default, segmentation = FORWARD_DYNAMIC_PROGRAMMING ,

        # !! in AML, Output is not set y default, i.e. equivalent to None
        # the ParseKargs does not accept None sinc we provide the list of
        # possible keys in output_display (which do not contain None)
        #, so we first need to check the presence of Output in the kargs
        # then, to give a default value!=None. But be aware that tis default
        # value is a dummy variable that is not used.
        try:
            from openalea.sequence_analysis.enums import output_display
        except:
            from openalea.stat_tool.enums import output_display

        if kargs.get('Output'):
            try:
                Output = None
                Output = error.ParseKargs(kargs, "Output", 'Segment',
                                          output_display)
            except:
                print 'warning could not import output_display from sequence_analysis'
        else:
            try:
                from openalea.sequence_analysis.enums import output_display
            except:
                from openalea.stat_tool.enums import output_display
            Output = None

        if Output is None:
            if ViewPoint == 'q':
                Output = output_display['Segment']
            elif ViewPoint == 'p':
                Output = output_display['State']
        elif (ViewPoint == 'q' and Output not in [output_display['ChangePoint'], output_display['Segment']]) \
            or (ViewPoint == 'p'  and Output not in [output_display['State'], output_display['InState'], output_display['OutState']]):
            raise ValueError(
                " INCOMPATIBLE_OPTIONS between ViewPoint and Output")

        #check arguments compatibilities

        if Detail == 2 and ViewPoint not in ['v', 'd']:
            raise ValueError("incompatible options")
        if Format == 'l' and ViewPoint != 'd':
            raise ValueError("incompatible options")
        """if segmentations_option or nb_segmentation_option)  and \
           (view_point!='q' or args[0] not in
            (
                (args[0].tag() != AMObjType::SEQUENCES)
                && (args[0].tag() != AMObjType::MARKOVIAN_SEQUENCES)
                && (args[0].tag() != AMObjType::VARIABLE_ORDER_MARKOV_DATA)
                && (args[0].tag() != AMObjType::SEMI_MARKOV_DATA)
                && (args[0].tag() != AMObjType::NONHOMOGENEOUS_MARKOV_DATA)
            )
        )
  if (((state_sequences_option) || (nb_state_sequence_option)) && ((view_point != 'p') ||
       ((args[0].tag() != AMObjType::HIDDEN_VARIABLE_ORDER_MARKOV) &&
        (args[0].tag() != AMObjType::HIDDEN_SEMI_MARKOV) &&
        (args[0].tag() != AMObjType::MARKOVIAN_SEQUENCES) &&
        (args[0].tag() != AMObjType::VARIABLE_ORDER_MARKOV_DATA) &&
        (args[0].tag() != AMObjType::SEMI_MARKOV_DATA) &&
        (args[0].tag() != AMObjType::NONHOMOGENEOUS_MARKOV_DATA)))) {
    status = false;
    genAMLError(ERRORMSG(INCOMPATIBLE_OPTIONS_s) , "Display");
  }
  if ((output_option) && ((view_point != 'q') ||
       ((args[0].tag() != AMObjType::SEQUENCES) && (args[0].tag() != AMObjType::MARKOVIAN_SEQUENCES) &&
        (args[0].tag() != AMObjType::VARIABLE_ORDER_MARKOV_DATA) &&
        (args[0].tag() != AMObjType::SEMI_MARKOV_DATA) &&
        (args[0].tag() != AMObjType::NONHOMOGENEOUS_MARKOV_DATA))) &&
      ((view_point != 'p') || ((args[0].tag() != AMObjType::HIDDEN_SEMI_MARKOV) &&
        (args[0].tag() != AMObjType::MARKOVIAN_SEQUENCES) &&
        (args[0].tag() != AMObjType::VARIABLE_ORDER_MARKOV_DATA) &&
        (args[0].tag() != AMObjType::SEMI_MARKOV_DATA) &&
        (args[0].tag() != AMObjType::NONHOMOGENEOUS_MARKOV_DATA)))) {
    status = false;
    genAMLError(ERRORMSG(INCOMPATIBLE_OPTIONS_s) , "Display");
  }
        """

        # ---------------- ViewPoint
        # 1-Survival
        if ViewPoint == 's':
            from openalea.stat_tool.enums import histogram_types
            from openalea.stat_tool.enums import model_distribution_types

            if type(self) in model_distribution_types:
                output = self.survival_ascii_write()
            elif type(self) in histogram_types:
                output = self.survival_ascii_write()
            else:
                raise ValueError("""(%s) has no survival point. Use another
                Viewpoint or use a first argument in DISTRIBUTION or MIXTURE or
                CONVOLUTION or COMPOUND or FREQUENCY_DISTRIBUTION or
                MIXTURE_DATA or CONVOLUTION_DATA or COMPOUND_DATA""" %
                                 str(type(self)))

        # Data
        elif ViewPoint == "d":
            try:
                #todo checkType
                # Markovian_Sequences, VOMData, SMData,
                # or Nonhomogenous_Markov_data
                output = self.ascii_data_write(exhaustive, Format)

            except Exception, e:
                #for vectors only
                #todo checkType
                try:
                    output = self.ascii_data_write(exhaustive)

                except AttributeError:
                    raise AttributeError("""
                        %s has not 'data' viewpoint""" % (str(type(self))))
Esempio n. 14
0
def Compound(*args, **kargs):
    """
    Construction of a compound of distributions from a sum distribution and an
    elementary distribution or from an ASCII file.

    A compound (or stopped-sum) distribution is defined as the distribution
    of the sum of n independent and identically distributed random variables :math:`X_i`
    where `n` is the value taken by the random variable `N`. The distribution of N is referred
    to as the sum distribution while the distribution of the :math:`X_i` is referred to as
    the elementary distribution.

    :param sum_dist: sum distribution
    :param dist: elementary distribution
    :param string filename:

    :type sum_dist: :class:`distribution`, :class:`mixture`, :class:`convolution`, :class:`compound`
    :type dist: :class:`distribution`, :class:`mixture`, :class:`convolution`, :class:`compound`

    :Returns:

        If the construction succeeds, an object of type `COMPOUND` is returned,
        otherwise no object is returned.

    :Examples:

    .. doctest::
        :options: +SKIP

        >>> Compound(sum_dist, dist)
        >>> Compound(sum_dist, dist, Threshold=0.999)
        >>> Compound(filename)

    .. plot::
        :width: 50%
        :include-source:

        from openalea.stat_tool import *
        sum_dist = Binomial(0,10,0.5)
        dist = Binomial(0,15,0.2)
        c = Compound(sum_dist, dist)
        c.plot()


    .. seealso::
        :func:`~openalea.stat_tool.output.Save`,
        :func:`~openalea.stat_tool.estimate.Estimate`,
        :func:`~openalea.stat_tool.simulate.Simulate`
    """
    error.CheckArgumentsLength(args, 1, 2)
    error.CheckKargs(kargs, possible_kargs=["Threshold"])

    Threshold = kargs.get("Threshold", None)

    # filename
    if len(args) == 1:
        error.CheckType([args[0]], [str])
        result = _Compound(args[0])

    possible_types = [
        _DiscreteParametricModel, _DiscreteMixture, _Compound, _Convolution
    ]

    # build from two objects and optional threshold
    if len(args) == 2:
        error.CheckType([args[0], args[1]], [possible_types, possible_types],
                        variable_pos=[1, 2])

        if Threshold:
            result = _Compound([args[0], args[1]], Threshold)
        else:
            result = _Compound([args[0], args[1]])

    return result
Esempio n. 15
0
def VectorDistance(*args, **kargs):
    """
    Construction of an object of type vector_distance from types (and eventually weights)
    of variables or from an ASCII file.

    The type _VectorDistance implements standardization procedures. The objective of
    standardization is to avoid the dependence on the variable type
    (chosen among symbolic, ordinal, numeric and circular) and, for numeric variables,
    on the choice of the measurement units by converting the original variables to
    unitless variables.

    :Parameters:
      - `type1`, `type2`, ... (string):
        variable types ("NUMERIC" ("N"), "ORDINAL" ("O") or "SYMBOLIC" ("S")),
      - `weight1`, `weight2`, ... (float): weights of variables,
      - `file_name` (string).

    :Keywords:

      * Distance (string): distance type: "ABSOLUTE_VALUE" (default) or "QUADRATIC".
        This optional argument is only relevant in the multivariate case.

    :Returns:
        If the construction succeeds, an object of type vector_distance is returned.

    :Examples:

    .. doctest::
        :options: +SKIP

        >>> VectorDistance(type1, type2,..., Distance="QUADRATIC")
        >>> VectorDistance(weight1, type1, weight2, type2,..., Distance="QUADRATIC")
        >>> VectorDistance(file_name)

    .. seealso::
        :func:`~openalea.stat_tool.comparison.Compare`
    """

    error_arguments = ["",
                       """If first argument is a number, following
    argument must be in ["N", "O", "S"]. Check documentation by typing
    VectorDistance? .""",
    ""]

    distance = error.ParseKargs(kargs, "Distance", "ABSOLUTE_VALUE",
                                distance_type)


    # Case VectorDistance("O", "N", "S")
    if args[0] in variable_type.keys():
        # check that all following arguments (if any) are correct
        types = []
        for arg, index in zip(args, range(0, len(args))):
            # check that the arguments are correct
            if arg not in variable_type.keys():
                raise ValueError(error_arguments[1])
            else:
                types.append(variable_type[arg])
        # assign a uniform weights since none were provided
        weights = [1./len(types) for _elem in types]

        return _VectorDistance(types, weights, distance)
    # same as above but with weights VectorDistance(0.5, "N", 0.5, "S")
    if isinstance(args[0], int) or isinstance(args[0], float):
        types = list(args[1:len(args):2])
        weights = list(args[0:len(args):2])
        assert len(types)==len(weights)

        # check that types are strings
        error.CheckType(types, [str]*len(types))
        # check that weights are integer or floats
        error.CheckType(weights, [[int, float]]*len(weights))

        # convert to vector_distance_type
        for arg, index in zip(types, range(0, len(types))):
            types[index] = variable_type[types[index]]

        return _VectorDistance(types, weights, distance)
    # filename case
    elif isinstance(args[0], str) and len(args)==1 and \
            args[0] not in variable_type.keys():
        return _VectorDistance(args[0])
Esempio n. 16
0
def Regression(vec, utype, explanatory, response, *args, **kargs):
    """
    Simple regression (with a single explanatory variable).

    :Parameters:
    
      * vec : vectors
        vectors
      * type : string
        `"Linear"` or `"MovingAverage"` or `"NearestNeighbors"`
      * explanatory_variable : int
        index of the explanatory variable
      * response_variable : int 
        index of the response variable
      * filter : list of float
        filter values on the half width i.e. from one extremity to the central value
        (with the constraint filter[i] + filter[m] = 1),
      * frequencies : list of float
        frequencies defining the filter,
      * dist : distribution, mixture, convolution, compound 
        symmetric distribution, whose size of the support is even, defining the filter 
        (for instance Distribution("BINOMIAL",0,4,0.5)),
      * span : float
        proportion of individuals in each neighbourhood. 

    :Keywords:
    
      * Algorithm : string
          - `"Averaging"` (default) 
          - `"LeastSquares"`
            
        This optional argument can only be used if the second mandatory argument
        specifying the regression type is "MovingAverage".

      * Weighting : bool 
        weighting or not of the neighbors according to their distance to the 
        computed point (default value: True). This optional argument can only
        be used if the second mandatory argument specifying the regression type 
        is "NearestNeighbors". 

    :Returns:
    
        An object of type regression is returned.

    :Examples:

    .. doctest::
        :options: +SKIP
    
        >>> Regression(vec, "Linear", explanatory_variable, response_variable)
        >>> Regression(vec, "MovingAverage", explanatory_variable, 
        ...    response_variable, filter, Algorithm="LeastSquares")
        >>> Regression(vec, "MovingAverage", explanatory_variable,
        ..     response_variable, frequencies, Algorithm="LeastSquares")
        >>> Regression(vec, "MovingAverage", explanatory_variable, 
        ...    response_variable, dist, Algorithm="LeastSquares")
        >>> Regression(vec, "NearestNeighbors", explanatory_variable, 
        ...    response_variable, span, Weighting=False)
    
    .. seealso::
        :func:`~openalea.stat_tool.output.Plot`

    """
    STAT_MINIMUM_SPAN = 0.05  # from aml not stat_tool or sequence headers

    error.CheckType([vec, utype, explanatory, response],
                    [_Vectors, str, int, int])

    possible_types = [
        "Linear", "NearestNeighbors", "NearestNeighbours", "MovingAverage"
    ]

    Algorithm = error.ParseKargs(kargs, "Algorithm", 'Averaging', algo_map)
    Weighting = error.ParseKargs(kargs, "Weighting", True, bool_type)

    if utype == "Linear":
        error.CheckArgumentsLength(args, 0, 0)
        return vec.linear_regression(explanatory, response)

    elif utype == "MovingAverage":
        error.CheckArgumentsLength(args, 1, 1)
        param = args[0]
        #todo add CheckType for int and models
        # param is a list of float, int
        if isinstance(args[0], list):
            # todo: check that sum equals 1
            return vec.moving_average_regression_values(
                explanatory, response, param, Algorithm)
        # or a set of distributions
        # todo: test case of compound, convolution, mixture
        else:
            error.CheckType([param], [[
                _DiscreteParametricModel, _DiscreteMixture, _Convolution,
                _Compound
            ]])
            return vec.moving_average_regression_distribution(
                explanatory, response, param, Algorithm)

    elif utype in ["NearestNeighbors", "NearestNeighbours"]:
        error.CheckArgumentsLength(args, 1, 1)
        span = args[0]
        error.CheckType([span], [[float, int]])

        assert span >= STAT_MINIMUM_SPAN
        #todo: check this assert

        return vec.nearest_neighbours_regression(explanatory, response,
                                                 float(span), Weighting)
    else:
        raise TypeError("Bad Regression type. Must be in %s" % possible_types)
Esempio n. 17
0
    def plot(self, *args, **kargs):

        Title = kargs.get("Title", "")

        params = kargs.get("Params", ())
        groups = kargs.get("Groups", ())

        possible_modes = {'Blocking': False, 'NonBlocking': True}
        Mode = error.ParseKargs(kargs,
                                'Mode',
                                'Blocking',
                                possible=possible_modes)

        viewpoint_map = {
            'v': 'v',
            "Data": "d",
            "Survival": 's',
            "SegmentProfile": 'q',
            "StateProfile": 'p'
        }
        ViewPoint = error.ParseKargs(kargs,
                                     "ViewPoint",
                                     "v",
                                     possible=viewpoint_map)

        #todo: check the compatibilities between options
        """
        if ((output_option) && ((view_point != 'q') ||
       ((args[0].tag() != AMObjType::SEQUENCES) && (args[0].tag()
       != AMObjType::MARKOVIAN_SEQUENCES) &&
        (args[0].tag() != AMObjType::VARIABLE_ORDER_MARKOV_DATA) &&
        (args[0].tag() != AMObjType::SEMI_MARKOV_DATA) &&
        (args[0].tag() != AMObjType::NONHOMOGENEOUS_MARKOV_DATA))) &&
      ((view_point != 'p') || ((args[0].tag() != AMObjType::HIDDEN_SEMI_MARKOV)
      &&
        (args[0].tag() != AMObjType::MARKOVIAN_SEQUENCES) &&
        (args[0].tag() != AMObjType::VARIABLE_ORDER_MARKOV_DATA) &&
        (args[0].tag() != AMObjType::SEMI_MARKOV_DATA) &&
        (args[0].tag() != AMObjType::NONHOMOGENEOUS_MARKOV_DATA)))) {
    status = false;
    genAMLError(ERRORMSG(INCOMPATIBLE_OPTIONS_s) , "Plot");
  }


  if ((config) && (view_point != 'p') && ((args[0].tag() ==
      AMObjType::MARKOVIAN_SEQUENCES) ||
       (args[0].tag() == AMObjType::HIDDEN_VARIABLE_ORDER_MARKOV)
           || (args[0].tag() == AMObjType::HIDDEN_SEMI_MARKOV) ||
       (args[0].tag() == AMObjType::VARIABLE_ORDER_MARKOV_DATA)
        || (args[0].tag() == AMObjType::SEMI_MARKOV_DATA))) {
    variable = args[1].val.i;

    switch (args[0].tag()) {

    case AMObjType::MARKOVIAN_SEQUENCES : {
      seq = (MarkovianSequences*)((STAT_model*)args[0].val.p)->pt;
      if ((variable <= seq->get_nb_variable()) && (seq->get_characteristics(variable - 1))) {
        status = false;
        genAMLError(ERRORMSG(K_NB_ARG_ERR_s) , "Plot");
      }
      break;
    }

    case AMObjType::HIDDEN_VARIABLE_ORDER_MARKOV : {
      hmarkov = (HiddenVariableOrderMarkov*)((STAT_model*)args[0].val.p)->pt;
      if ((variable <= hmarkov->get_nb_output_process()) &&
          (hmarkov->get_nonparametric_process(variable))) {
        status = false;
        genAMLError(ERRORMSG(K_NB_ARG_ERR_s) , "Plot");
      }
      break;
    }

    case AMObjType::HIDDEN_SEMI_MARKOV : {
      hsmarkov = (HiddenSemiMarkov*)((STAT_model*)args[0].val.p)->pt;
      if ((variable <= hsmarkov->get_nb_output_process()) &&
          (hsmarkov->get_nonparametric_process(variable))) {
        status = false;
        genAMLError(ERRORMSG(K_NB_ARG_ERR_s) , "Plot");
      }
      break;
    }

    case AMObjType::VARIABLE_ORDER_MARKOV_DATA : {
      seq = (VariableOrderMarkovData*)((STAT_model*)args[0].val.p)->pt;
      if ((variable < seq->get_nb_variable()) && (seq->get_characteristics(variable))) {
        status = false;
        genAMLError(ERRORMSG(K_NB_ARG_ERR_s) , "Plot");
      }
      break;
    }

    case AMObjType::SEMI_MARKOV_DATA : {
      seq = (SemiMarkovData*)((STAT_model*)args[0].val.p)->pt;
      if ((variable < seq->get_nb_variable()) && (seq->get_characteristics(variable))) {
        status = false;
        genAMLError(ERRORMSG(K_NB_ARG_ERR_s) , "Plot");
      }
      break;
    }
    }
  }

        """
        try:
            from openalea.sequence_analysis.enums import output_display
        except:
            from openalea.stat_tool.enums import output_display

        if kargs.get('Output'):
            try:
                Output = None
                Output = error.ParseKargs(kargs, "Output", 'Segment',
                                          output_display)
            except:
                print 'warning could not import output_display from sequence_analysis'
        else:
            try:
                from openalea.sequence_analysis.enums import output_display
            except:
                from openalea.stat_tool.enums import output_display
            Output = None

        if Output is None:
            if ViewPoint == 'q':
                Output = output_display['Segment']
            elif ViewPoint == 'p':
                Output = output_display['State']
        elif (ViewPoint == 'q' and Output not in [output_display['ChangePoint'], output_display['Segment']]) \
            or (ViewPoint == 'p'  and Output not in [output_display['State'], output_display['InState'], output_display['OutState']]):
            raise ValueError(
                " INCOMPATIBLE_OPTIONS between ViewPoint and Output")

        #calling the plot functions from here
        try:
            if ViewPoint == 's':
                from openalea.stat_tool.enums import histogram_types
                from openalea.stat_tool.enums import model_distribution_types
                #todo is *params needed or not?
                if type(self) in model_distribution_types:
                    #equivalent to dist->suvival_plot_write(error, Plot_prefix, title)
                    plotable = self.survival_get_plotable(*params)
                elif type(self) in histogram_types:
                    #equivalent to histo->survival_plot_write(error , Plot_prefix , title)
                    output = self.survival_get_plotable(*params)
                else:
                    raise ValueError("""(%s) has no survival point. Use another
                        Viewpoint or use a first argument in DISTRIBUTION or MIXTURE or
                        CONVOLUTION or COMPOUND or FREQUENCY_DISTRIBUTION or
                        MIXTURE_DATA or CONVOLUTION_DATA or COMPOUND_DATA""" %
                                     str(type(self)))

            elif ViewPoint == 'p':
                #print 'viewpoint = state-profile'
                Plot_prefix = ''
                plotable = None
                from openalea.sequence_analysis._sequence_analysis import \
                    _HiddenVariableOrderMarkov, _HiddenSemiMarkov
                if type(self) == _HiddenVariableOrderMarkov:
                    plotable = self.state_profile_plotable_write(args[0])
                elif type(self) == _HiddenSemiMarkov:
                    if len(args) == 0:
                        raise SyntaxError(
                            "expect an identifier (Plot(hsmc25, 1, ViewPoint='StateProfile')"
                        )
                    elif len(args) == 1:
                        identifier = args[0]
                    else:
                        #print 'iiiiiiiiiiiiiii'
                        raise SyntaxError(
                            "expect only one identifier Plot(hsmc25, 1, ViewPoint='StateProfile'"
                        )
                    plotable = self.state_profile_plotable_write(
                        identifier, Output)
                else:
                    #todo 3 args required
                    from openalea.sequence_analysis._sequence_analysis import _MarkovianSequences, _VariableOrderMarkovData, _SemiMarkovData, _NonhomogeneousMarkovData
                    assert type(self) in [
                        _MarkovianSequences, _VariableOrderMarkovData,
                        _SemiMarkovData, _NonhomogeneousMarkovData
                    ]
                    if type(args[1]) == _HiddenVariableOrderMarkov:
                        plotable = args[1].state_profile_plotable_write2(
                            self, args[0])
                    elif type(args[1]) == _HiddenSemiMarkov:
                        plotable = args[1].state_profile_plotable_write2(
                            self, args[0], Output)
                    else:
                        raise TypeError(
                            "expect HiddenVariableOrderMarkov or HiddenSemiMarkov"
                        )

                if plotable == None:
                    try:
                        plotable = self.stateprofile_get_plotable(*params)
                    except:
                        pass

            elif ViewPoint == 'q':
                from openalea.sequence_analysis._sequence_analysis import _Sequences, _MarkovianSequences, _VariableOrderMarkovData, _SemiMarkovData
                if type(self) not in [
                        _Sequences, _MarkovianSequences,
                        _VariableOrderMarkovData, _SemiMarkovData
                ]:
                    raise TypeError(
                        'object must be in SEQUENCES or MARKOVIAN_SEQUENCES or VARIABLE_ORDER_MARKOV_DATA or SEMI-MARKOV_DATA'
                    )

                try:
                    self.nb_variable
                except:
                    raise ValueError(
                        "object has no nb_variable. check that it is a sequence"
                    )
                nb_variable = self.nb_variable
                assert len(args) >= 2
                error.CheckType([args[0], args[1]], [[int], [int]])
                #construct model_type
                from openalea.sequence_analysis.enums import model_type
                types = []
                for i in range(0, nb_variable):
                    error.CheckType([args[i + 2]], [str])
                    if i == 0:
                        types.append(model_type[args[i + 2]])
                        #Multinomial or Poisson or Ordinal or Gaussian or
                        # Mean or Variance or MeanVariance
                        if args[i + 2] in ["Mean", "MeanVariance"]:
                            for j in range(1, nb_variable):
                                types.append(types[i])
                            break
                    else:
                        # Multinomial or Poisson or Ordinal or Gaussian
                        # or Variance
                        types.append(model_type[args[i + 2]])
                #seq->segment_profile_plot_write(
                #         error , Plot_prefix , args[1].val.i ,
                #           args[2].val.i , model_type , output , title);

                plotable = self.segment_profile_plotable_write(
                    args[0], args[1], types, Output)

            #data viewPoint
            elif ViewPoint == 'd':
                from openalea.sequence_analysis._sequence_analysis import _SemiMarkovData, _MarkovianSequences, _Sequences, _NonHomogeneousMarkovData, _Tops
                if type(self) in [
                        _SemiMarkovData, _MarkovianSequences, _Sequences,
                        _NonHomogeneousMarkovData, _Tops
                ]:
                    #status = seq->plot_data_write(error , Plot_prefix , title);
                    plotable = self.get_plotable_data(*params)
            elif ViewPoint == 'v':
                # plot_write(error , Plot_prefix , title);

                if args:
                    #sequence case:
                    #todo: make it looser: observation, intensity INTENSITY?
                    choices = [
                        "SelfTransition", "Observation", "Intensity",
                        "FirstOccurrence", "Recurrence", "Sojourn", "Counting"
                    ]
                    if args[0] in choices:
                        multiplotset = self.get_plotable()
                        viewpoints = [x for x in multiplotset.viewpoint]
                        plotable = []
                        try:
                            from openalea.sequence_analysis import enums
                        except:
                            raise ImportError(
                                "sequence analysis not installed !!")

                        if len(args) == 1:
                            variable = 0
                        elif len(args) == 2:
                            variable = args[1]
                        for index, xx in enumerate(viewpoints):
                            if xx == enums.markovian_sequence_type[args[0]]:
                                if multiplotset.variable[index] == variable:
                                    plotable.append(multiplotset[index])
                    elif len(args) == 1 and type(args[0]) == str:
                        raise SyntaxError(
                            "first argument must be in %s and second arg (int) may be provided."
                            % choices)
                    elif len(args) == 1 and type(args[0]) == int:
                        from openalea.stat_tool._stat_tool import _Vectors
                        if type(self) == _Vectors:
                            #Plot(vector, 1)
                            multiplotset = self.get_plotable()
                            viewpoints = [x for x in multiplotset.viewpoint]
                            plotable = []
                            try:
                                from openalea.sequence_analysis import enums
                            except:
                                raise ImportError(
                                    "sequence analysis not installed !!")
                            plotable = [multiplotset[args[0]]]
                        else:
                            #Plot(hist1, hist2, hist3)
                            plotable = self.get_plotable_list()
                    elif len(args) == 1:
                        #e.g., list of histograms
                        plotable = self.get_plotable_list(list(args), *params)
                    else:
                        plotable = self.get_plotable_list(list(args), *params)
                else:
                    plotable = self.get_plotable(*params)
            plotter = plot.get_plotter()
        except:
            import warnings
            warnings.warn("Cannot use new plotter. Use old style plot.")
            plotable = None

        if plot.DISABLE_PLOT:
            return

        if (plotable is not None):
            plotter.plot(plotable, Title, groups, *args, **kargs)
        else:
            self.old_plot(*args, **kargs)
Esempio n. 18
0
def Clustering(matrix, utype, *args, **kargs):
    """
    Application of clustering methods (either partitioning methods or hierarchical methods)
    to dissimilarity matrices between patterns.

    In the case where the composition of clusters is a priori fixed,
    the function Clustering simply performs an evaluation of the a priori fixed
    partition.

    :Parameters:
      * `dissimilarity_matrix` (distance_matrix) - dissimilarity matrix between patterns,
      * `nb_cluster` (int) - number of clusters,
      * `clusters` (list(list(int))) - cluster composition.

    :Keywords:
      * `Prototypes` (list(int)): cluster prototypes.
      * `Algorithm` (string): "Agglomerative", "Divisive" or "Ordering"
      * `Criterion` (string): "FarthestNeighbor" or "Averaging"
      * `Filename` (string): filename
      * `Format` (string) : "ASCII" or "SpreadSheet"

    :Returns:
        If the second mandatory argument is "Partitioning" and
        if 2 < nb_cluster < (number of patterns), an object of type clusters
        is returned

    :Examples:

    .. doctest::
        :options: +SKIP

        >>> Clustering(dissimilarity_matrix, "Partition", nb_cluster, Prototypes=[1, 3, 12])
        >>> Clustering(dissimilarity_matrix, "Partition", clusters)
        >>> Clustering(dissimilarity_matrix, "Hierarchy", Algorithm="Agglomerative")
        >>> Clustering(dissimilarity_matrix, "Hierarchy", Algorithm="Divisive")

    .. seealso::
        :func:`~openalea.stat_tool.data_transform.SelectIndividual`,
        `Symmetrize`,
        :func:`~openalea.stat_tool.comparison.Compare`,
        :func:`~openalea.stat_tool.cluster.ToDistanceMatrix`.

    .. note:: if type=Partition, Algorthim must be 1 (divisive) or 2 (ordering).

    .. note:: if type!=Divisive criterion must be provided
    """
    #TODO: check this case :
    #Clustering(dissimilarity_matrix, "Partition", clusters)

    error.CheckType([matrix], [_DistanceMatrix])

    Algorithm = error.ParseKargs(kargs,
                                 "Algorithm",
                                 default="Divisive",
                                 possible=algorithm_type)
    # Switch for each type of clustering
    # first the partition case
    if utype == "Partition":
        error.CheckArgumentsLength(args, 1, 1)
        error.CheckKargs(kargs, ["Algorithm", "Prototypes", "Initialization"])
        Initialization = error.ParseKargs(kargs,
                                          "Initialization",
                                          1,
                                          possible=[1, 2])

        if Algorithm == algorithm_type["Agglomerative"]:
            raise ValueError("""If partition is on, Algorithm cannot
                    be agglomerative""")

        if (isinstance(args[0], int)):  #int case
            # if Prototypes is empty, the wrapping will send an
            # int * = 0 to the prototyping function, as expected
            Prototypes = kargs.get("Prototypes", [])
            nb_cluster = args[0]
            return matrix.partitioning_prototype(nb_cluster, Prototypes,
                                                 Initialization, Algorithm)
        elif isinstance(args[0], list):  # array case
            #todo:: array of what kind of object?
            #need a test
            return matrix.partitioning_clusters(args[0])
        else:
            raise TypeError("""
            With Partition as second argument, the third one must be either
            an int or an array.""")

    elif utype == "Hierarchy":
        error.CheckKargs(kargs,
                         ["Algorithm", "FileName", "Criterion", "Format"])

        Algorithm = error.ParseKargs(kargs,
                                     "Algorithm",
                                     default="Agglomerative",
                                     possible=algorithm_type)

        Criterion = error.ParseKargs(kargs,
                                     "Criterion",
                                     "Averaging",
                                     possible=criterion_type)

        # fixme: is it correct to set "" to the filename by defautl ?
        # if set to None, the prototype does not match
        filename = kargs.get("Filename", None)
        format = error.ParseKargs(kargs,
                                  "Format",
                                  "ASCII",
                                  possible=format_type)
        #check options
        if Algorithm != algorithm_type["Agglomerative"] and \
            kargs.get("Criterion"):

            raise ValueError("""
                In the Hierarchy case, if Algorithm is different from
                AGGLOMERATIVE, then Criterion cannot be used.""")
        return matrix.hierarchical_clustering(Algorithm, Criterion, filename,
                                              format)

    else:
        raise KeyError("Second argument must be 'Partitioning' or 'Hierarchy'")
Esempio n. 19
0
def Cluster(obj, utype, *args, **kargs):
    """Clustering of values.

    In the case of the clustering of values of a frequency distribution on the
    basis of an information measure criterion (argument `Information`), both the
    information measure ratio and the selected optimal step are given in the
    shell window.

    The clustering mode `Step` (and its variant `Information`) is naturally
    adapted to numeric variables while the clustering mode `Limit` applies to
    both symbolic (nominal) and numeric variables. In the case of a symbolic
    variable, the function `Cluster` with the mode `Limit` can be seen as a
    dedicated interface of the more general function `Transcode`.

    :Parameters:

      * `histo` (`_FrequencyDistribution`, `_DiscreteMixtureData`, `_ConvolutionData`, `_CompoundData`),
      * `step` (int) - step for the clustering of values
      * `information_ratio` (float) - proportion of the information measure of \
        the original sample for determining the clustering step,
      * `limits` (list(int)) - first values corresponding to the new classes \
        classes 1, ..., nb_class - 1. By convention, the first value corresponding \
        to the first class is 0,
      * `vec1` (`_Vector`) - values,
      * `vecn` (`_Vectors`) - vectors,
      * `variable` (int) - variable index,
      * `seq1` (`_Sequences`) - univariate sequences,
      * `seqn` (`_Sequences`) - multivariate sequences,
      * `discrete_seq1` (`_DiscreteSequences`, `_Markov`, `_SemiMarkovData`) -
        discrete univariate sequences,
      * `discrete_seqn` (`_DiscreteSequences`, `_Markov`, `_SemiMarkovData`) -
        discrete multivariate sequences.

    :Keywords:

      * `AddVariable` (bool) : addition (instead of simple replacement) of the variable
        corresponding to the clustering of values (default value: False).
        This optional argument can only be used if the first argument is of
        type `_DiscreteSequences`, `_Markov` or `_SemiMarkovData`. The addition
        of the clustered variable is particularly useful if one wants to evaluate
        a lumpability hypothesis.

    :Returns:

      * If `step` > 0, or if 0 <  `information_ratio` <  1, or if 0 < limits[1]
        < limits[2] < ... < limits[nb_class - 1] < (maximum possible value of histo),
        an object of type _FrequencyDistribution is returned.
      * If variable is a valid index of a variable and if `step` > 0, or
        if 0 < limits[1] < limits[2] < ... < limits[nb_class - 1] < (maximum possible
        value taken by the selected variable of `vec1` or `vecn`), an object of type
        `_Vectors` is returned.
      * If variable is a valid index of a variable of type STATE and if `step` > 0, or \
        if 0 < limits[1] < limits[2] < ... < limits[nb_class - 1] < (maximum
        possible value taken by the selected variable of `seq1`, `seqn`, `discrete_seq1`
        or `discrete_seqn`), an object of type `_Sequences` or `_DiscreteSequences`
        is returned.
      * In the case of a first argument of type `_Sequences`, an object of type
        `_DiscreteSequences` is returned if all the variables are of type STATE,
        if the possible values taken by each variable are consecutive from 0 and
        if the number of possible values for each variable is < 15.

    :Examples:

    .. doctest::
        :options: +SKIP

        >>> Cluster(histo, "Step", step)
        >>> Cluster(histo, "Information", information_ratio)
        >>> Cluster(histo, "Limit", limits)
        >>> Cluster(vec1, "Step", step)
        >>> Cluster(vecn, "Step", variable, step)
        >>> Cluster(vec1, "Limit", limits)
        >>> Cluster(vecn, "Limit", variable, limits)
        >>> Cluster(seq1, "Step", step)
        >>> Cluster(seqn, "Step", variable, step)
        >>> Cluster(discrete_seq1, "Step", step, AddVariable=True)
        >>> Cluster(discrete_seqn, "Step", variable, step, AddVariable=True)
        >>> Cluster(seq1, "Limit", limits)
        >>> Cluster(seqn, "Limit", variable, limits)
        >>> Cluster(discrete_seq1, "Limit", limits, AddVariable=True)
        >>> Cluster(discrete_seqn, "Limit", variable, limits, AddVariable=True)

    .. seealso::
        :func:`~openalea.stat_tool.data_transform.Merge`,
        :func:`~openalea.stat_tool.data_transform.Shift`,
        :func:`~openalea.stat_tool.data_transform.ValueSelect`,
        :func:`~openalea.stat_tool.data_transform.MergeVariable`,
        :func:`~openalea.stat_tool.data_transform.SelectIndividual`,
        :func:`~openalea.stat_tool.data_transform.SelectVariable`,
        :func:`~openalea.stat_tool.cluster.Transcode`,
        :func:`~openalea.stat_tool.data_transform.AddAbsorbingRun`,
        :func:`~openalea.stat_tool.data_transform.Cumulate`,
        :func:`~openalea.stat_tool.data_transform.Difference`,
        :func:`~openalea.stat_tool.data_transform.IndexExtract`,
        :func:`~openalea.stat_tool.data_transform.LengthSelect`,
        :func:`~vplants.sequence_analysis.data_transform.MovingAverage`,
        :func:`~openalea.stat_tool.data_transform.RecurrenceTimeSequences`,
        :func:`~openalea.stat_tool.data_transform.Removerun`,
        :func:`~openalea.stat_tool.data_transform.Reverse`,
        :func:`~openalea.stat_tool.data_transform.SegmentationExtract`,
        :func:`~openalea.stat_tool.data_transform.VariableScaling`.
    """

    # fixme: what about the Mode in the Step case ?
    # check markovian_sequences call in Sequences
    AddVariable = error.ParseKargs(kargs,
                                   "AddVariable",
                                   False,
                                   possible=[False, True])

    possible_r = [str(f) for f in mode_type]  # possible rounding modes
    RoundingVariable = error.ParseKargs(kargs,
                                        "Round",
                                        "ROUND",
                                        possible=possible_r)

    error.CheckArgumentsLength(args, 1, 2)

    # search for the function name
    if hasattr(obj, cluster_type[utype]):
        func = getattr(obj, cluster_type[utype])
    else:
        raise KeyError("""Possible action are : 'Step', 'Information' or
        'Limit'. Information cannot be used with Vectors objects""")

    # check if nb_variable is available (vectors, sequences)
    if hasattr(obj, 'nb_variable'):
        nb_variable = obj.nb_variable
    else:
        nb_variable = 1

    #check types
    if nb_variable == 1:
        if len(args) == 1:
            if utype == "Step":
                error.CheckType([args[0]], [int])
            if utype == "Limit":
                error.CheckType([args[0]], [list])
            if utype == "Information":
                error.CheckType([args[0]], [[int, float]])
            try:
                ret = func(args[0])  # histogram case
            except:
                try:
                    ret = func(1, args[0])  # vector case
                except:
                    try:
                        ret = func(1, args[0], AddVariable)  # sequences case
                    except:
                        pass
        else:
            raise ValueError("""Extra arguments provided 
            (to specify variable value ?). Consider removing it. 
            Be aware that nb_variable equals 1""")

    else:
        if len(args) == 2:
            if utype == "Step":
                error.CheckType([args[0]], [int])
                error.CheckType([args[1]], [[int, float]])
            if utype == "Limit":
                error.CheckType([args[0]], [int])
                error.CheckType([args[1]], [list])
            try:
                ret = func(*args)
            except:
                ret = func(args[0], args[1],
                           mode_type[RoundingVariable].real)  # sequences case
        else:
            raise ValueError("""Extra arguments provided 
            (to specify variable value ?).
            Consider removing it. Be aware that nb_variable equals 1""")

    if hasattr(ret, 'markovian_sequences'):
        ret = ret.markovian_sequences()

    return ret
Esempio n. 20
0
def Vectors(*args, **kargs):
    """
    Construction of a set of vectors from a multidimensional array,
    from a set of sequences or from an ASCII file.

    The data structure of type list(list(int)) should be constituted at the
    most internal level of arrays of constant size.

    :Parameters:
      - `list` (list(list(int))) :
      - `seq` (sequences, discrete_sequences, markov_data, semi-markov_data)
      - `file_name` (string) :

    :Keywords:
      - Identifiers (list(int)): explicit identifiers of vectors.
        This optional argument can only be used if the first mandatory argument is of
        type list(list(int)).
      - IndexVariable (bool): taking into account of the implicit index parameter as
        a supplementary variable (default value: False). This optional argument can
        only be used if the first mandatory argument is of type `sequences`,
        `discrete_sequences`, `markov_data` or `semi-markov_data`.

    :Returns:
       If the construction succeeds, an object of type vectors is returned,
       otherwise no object is returned.

    :Examples:

    .. doctest::
        :options: +SKIP

        >>> Vectors(list, Identifiers=[1, 8, 12])
        >>> Vectors(seq, IndexVariable=True)
        >>> Vectors(file_name)

    .. seealso::
        :func:`~openalea.stat_tool.output.Save`,
        :func:`~openalea.stat_tool.data_transform.ExtractHistogram`,
        :func:`~openalea.stat_tool.cluster.Cluster`,
        :func:`~openalea.stat_tool.data_transform.Merge`,
        :func:`~openalea.stat_tool.data_transform.MergeVariable`,
        :func:`~openalea.stat_tool.data_transform.SelectIndividual`,
        :func:`~openalea.stat_tool.data_transform.SelectVariable`,
        :func:`~openalea.stat_tool.data_transform.Shift`,
        :func:`~openalea.stat_tool.cluster.Transcode`,
        :func:`~openalea.stat_tool.data_transform.ValueSelect`,
        :func:`~openalea.stat_tool.comparison.Compare`,
        :func:`~openalea.stat_tool.comparison.ComputeRankCorrelation`,
        :func:`~openalea.stat_tool.comparison.ContingencyTable`,
        :func:`~openalea.stat_tool.comparison.Regression`,
        :func:`~openalea.stat_tool.comparison.VarianceAnalysis`
    """
    error.CheckArgumentsLength(args, 1, 1)
    error.CheckKargs(kargs, possible_kargs = ["Identifiers", 
                                              "IndexVariable"])

    obj = args[0]
    ret = None

    import openalea.core.path

    if isinstance(obj, str):
        # constructor from a filename
        ret = _Vectors(args[0])
    elif isinstance(obj, openalea.core.path.path):
        # constructor from a path
        ret = _Vectors(str(args[0]))
    elif isinstance(obj, list):
        # Normal usage is Vectors([ [1,2,3],  [1,2,3], [4,5,6]])
        # If only one variable is requited, then Normal usage is
        # Vectors([ [1,2,3] ]). Yet, to simplify usage, if there is only
        # one variable, the followin if allows us to use Vectors([1,2,3])
        if type(obj[0])!=list:
            obj = [obj]



        # 0 for int, 1 for float. By default all variables are int
        #now, we loop over all sequences and sequences and if a variable 
        # is found to be float, then the type is float.
        # once a float is found, there is no need to carry on the current variable
        InputTypes = [0] * len(obj[0])
        nb_variables = len(obj[0])
        for vec in obj:
            for index, var in enumerate(vec):
                assert type(var) in [int, float], "wrong types var=%s and its type is %s" % (var, type(var))
                if type(var)==float:
                    InputTypes[index]=1


        # from a list and an optional argument

        # first, get the Identifiers and check its type
        identifiers = error.ParseKargs(kargs, "Identifiers")
        if identifiers:
            error.CheckType([identifiers], [[list]], variable_pos=[2])

            if len(identifiers) != len(obj):
                raise ValueError("""Identifiers must be a list,
                which size equals vectors's length""")
            #iif InputTypes: 
            ret = _Vectors(obj, identifiers, InputTypes)
            #else:
            #    ret = _Vectors(obj, identifiers)
        else:

            #create a standard identifiers list [0,1,2,....] for each sequences ?
            identifiers = []
            for i, vec in enumerate(obj):
                identifiers.append(i+1)
    
            print identifiers
            #if InputTypes:
            ret = _Vectors(obj, identifiers, InputTypes)
            #else:
            #    ret = _Vectors(obj, [])
    else:
        # from a sequence
        index_variable = error.ParseKargs(kargs, "IndexVariable", False,
                                          [True, False])
        error.CheckType([index_variable], [bool], variable_pos=[2])
        ret = obj.build_vectors(index_variable)


    return ret
Esempio n. 21
0
    def estimate_DiscreteMixture(histo, *args, **kargs):
        """ Estimate a finite  mixture of discrete distributions


        :Parameters:

          * histo (histogram, mixture_data, convolution_data, compound_data),
          * distributions (list) : a list of distribution object
                                   or distribution label(string) : 'B', 'NB', 'U', 'P', ...
          * unknown (string): type of unknown distribution: "Sum" or "Elementary".

        :Keywords:

          * MinInfBound (int): lower bound to the range of possible values (0 -default- or 1). \
                               This optional argument cannot be used in conjunction \
                               with the optional argument InitialDistribution.
          * InfBoundStatus (string): shifting or not of the distribution: "Free" (default value) or "Fixed".
          * DistInfBoundStatus (string): shifting or not of the subsequent components of \
                                         the mixture: "Free" (default value) or "Fixed".
          * NbComponent (string): estimation of the number of components of the mixture: \
                                  "Fixed" (default value) or "Estimated". Le number of estimated \
                                  components is comprised between\
                                  1 and a maximum number which is given by the number of specified \
                                  parametric distributions in the mandatory arguments \
                                  (all of these distributions are assumed to be unknown).
          * Penalty (string): type of Penalty function for model selection: \
                              "AIC" (Akaike Information Criterion), \
                              "AICc" (corrected Akaike Information Criterion) \
                              "BIC" (Bayesian Information Criterion - default value). \
                              "BICc" (corrected Bayesian Information Criterion). \

                              This optional argument can only be used if the optional argument
                              NbComponent is set at "Estimated".

        :Examples:

        .. doctest::
            :options: +SKIP

            >>> estimate_DiscreteMixture(histo, "MIXTURE", "B", dist,...,,
                             MinInfBound=1, InfBoundStatus="Fixed",
                             DistInfBoundStatus="Fixed")
            >>> estimate_DiscreteMixture(histo, "MIXTURE", "B", "NB",...,,
                               MinInfBound=1, InfBoundStatus="Fixed",
                               DistInfBoundStatus="Fixed",
                               NbComponent="Estimated", Penalty="AIC")
            >>> Estimate(histo, "MIXTURE", "B", dist, MinInfBound=1, InfBoundStatus="Fixed",
                    DistInfBoundStatus="Fixed")
            >>> Estimate(histo, "MIXTURE", "B", "NB",
                    MinInfBound=1, InfBoundStatus="Fixed",
                    DistInfBoundStatus="Fixed",
                    NbComponent="Estimated", Penalty="AIC")


        """
        #alias

        #error.CheckArgumentsLength(args, 1, 1)

        # get user arguments
        # list of distributions can be either a list or several arguments
        # e.g.: estimate_DiscreteMixture(["B","B"]) or estimate_DiscreteMixture("B", "B")
        if len(args) == 1 and type(args[0]) == list:
            distributions = args[0]
        else:
            distributions = list(args)

        InfBoundStatus = kargs.get("InfBoundStatus", "Free")
        DistInfBoundStatus = kargs.get("DistInfBoundStatus", "Free")
        NbComponent = kargs.get("NbComponent", "Fixed")

        MinInfBound = kargs.get("MinInfBound", 0)
        Penalty = error.ParseKargs(kargs, "Penalty", "AIC",
                                   likelihood_penalty_type)

        #should be before the conversion to booleans
        error.CheckType([
            MinInfBound, InfBoundStatus, DistInfBoundStatus, NbComponent,
            Penalty
        ], [int, str, str, str, LikelihoodPenaltyType])

        # transform into boolean when needed
        InfBoundStatus = bool(InfBoundStatus == "Free")
        DistInfBoundStatus = bool(DistInfBoundStatus == "Free")
        NbComponent = bool(NbComponent == "Estimated")

        estimate = []  # list of bool
        pcomponent = []  # list of distribution
        ident = []  # list of distribution identifier

        # Parse list of distribution that could be defined by a distribution,
        # compound, mixture, convolution or simplya string such as "B",
        # "Mixture", ...

        for dist in distributions:

            if isinstance(dist, str):
                dist_authorised = [
                    "BINOMIAL", "B", "POISSON", "P", "NB", "NEGATIVE_BINOMIAL"
                ]
                if dist not in dist_authorised:
                    raise ValueError("""If distribution is a string, then it
                        must be in %s. You provided %s""" %
                                     (dist_authorised, dist))
                #todo: check that poisson is allowed

                pcomponent.append(_DiscreteParametric(0, dist_type[dist]))
                ident.append(dist_type[dist])
                estimate.append(True)
            elif isinstance(dist, _DiscreteParametricModel):
                pcomponent.append(_DiscreteParametric(dist))
                ident.append(None)
                estimate.append(False)
            elif type(dist) in [_DiscreteMixture, _Convolution, _Compound]:
                pcomponent.append(_Distribution(dist))
                ident.append(None)
                estimate.append(False)
            else:
                raise ValueError("""In the case of a MIXTURE estimation,
                argument related to distributions must be either string, or
                Distribution, Mixture, Convolution, Compound. %s provided""" %
                                 dist)

        # check parameters
        if not NbComponent and Penalty:
            raise TypeError("""
            Penalty can only be used with NbComponent set to 'Estimated'""")

        if not NbComponent:  # "FIXED"
            imixt = _DiscreteMixture(pcomponent)
            ret = histo.discrete_mixture_estimation1(imixt, estimate,
                                                     MinInfBound,
                                                     InfBoundStatus,
                                                     DistInfBoundStatus)

            return ret
        else:  # "ESTIMATED"
            ret = histo.discrete_mixture_estimation2(ident, MinInfBound,
                                                     InfBoundStatus,
                                                     DistInfBoundStatus,
                                                     Penalty)
            return ret
Esempio n. 22
0
    def estimate_compound(histo, *args, **kargs):
        """estimate a compound


        :Usage:

        .. doctest::
            :options: +SKIP

            >>> Estimate(histo, "COMPOUND", dist, unknown,
                    Parametric=False, MinInfBound=0)
                    Estimate(histo, "COMPOUND", dist, unknown,
                    InitialDistribution=initial_dist, Parametric=False)
        """

        if len(args) < 2:
            raise ValueError("expect at least three arguments")

        known_distribution = args[0]
        ##if isinstance(known_distribution, _DiscreteParametricModel):
        #    known_distribution = _DiscreteParametric(known_distribution)
        #elif type(known_distribution) in [_DiscreteMixture, _Convolution, _Compound]:
        #    known_distribution = _Distribution(known_distribution)
        #else:
        #    raise TypeError("""
        #    argument "known_distribution" must be of type _DiscreteMixture,
        #     _COnvolution, _Compound or _DiscreteParametricModel""")

        Type = args[1]
        error.CheckType([Type], [str])

        Weight = kargs.get("Weight", -1)
        NbIteration = kargs.get("NbIteration", -1)
        InitialDistribution = kargs.get("InitialDistribution", None)
        MinInfBound = kargs.get("MinInfBound", 0)

        Estimator = error.ParseKargs(kargs, "Estimator", "Likelihood",
                                     estimator_type)
        Penalty = error.ParseKargs(kargs, "Penalty", "SecondDifference",
                                   smoothing_penalty_type)
        Outside = error.ParseKargs(kargs, "Outside", "Zero", outside_type)

        if MinInfBound and InitialDistribution:
            raise ValueError("""MinInfBound and InitialDistribution cannot be
                             used together.""")
        #if Estimator != _stat_tool.PENALIZED_LIKELIHOOD:
        #    if Penalty or Weight or Outside:
        #        raise ValueError("""Estimator cannot be used with O
        #            utside or Weight or Penalty option""")

#The second argument is either a string (e.g.,"Sum") or an unknown
#distribution.
        try:
            if Type:
                Type = compound_type[Type]
        except KeyError:
            raise AttributeError("Bad type. Possible types are %s" %
                                 (str(compound_type.keys())))

        #The second argument is either a string (e.g.,"Sum") or an unknown
        #distribution.
        unknown_distribution = None

        if InitialDistribution:
            unknown_distribution = InitialDistribution
            if isinstance(unknown_distribution, _Distribution):
                unknown_distribution = _DiscreteParametric(
                    unknown_distribution)
            elif type(unknown_distribution) in \
                [_DiscreteMixture, _Convolution, _Compound]:
                unknown_distribution = _Distribution(unknown_distribution)
            else:
                raise TypeError("""
                    argument "known_distribution" must be of type
                     _DiscreteMixture, _COnvolution, _Compound or _DiscreteParametricModel"""
                                )
            if Type == 's':

                return histo.compound_estimation1(unknown_distribution,
                                                  known_distribution, Type,
                                                  Estimator, NbIteration,
                                                  Weight, Penalty, Outside)
            elif Type == 'e':

                return histo.compound_estimation1(known_distribution,
                                                  unknown_distribution, Type,
                                                  Estimator, NbIteration,
                                                  Weight, Penalty, Outside)
            else:
                raise KeyError("should not enter here.")
        else:
            return histo.compound_estimation2(known_distribution, Type,
                                              MinInfBound, Estimator,
                                              NbIteration, Weight, Penalty,
                                              Outside)
Esempio n. 23
0
def Mixture(*args):
    """Construction of a mixture of distributions from elementary distributions
    and associated weights or from an ASCII file.

    A mixture is a parametric model of classification where each elementary
    distribution or component represents a class with its associated weight.

    :Parameters:
      * `weight1`, `weight2`, ... (float) - weights of each component.
         These weights should sum to one (they constitute a discrete
         distribution).
      * `dist1`, `dist2`, ... (`_DiscreteParametricModel`, `_DiscreteMixture`, `_Convolution`,
        `_Compound`) elementary distributions (or components).
      * `filename` (string) -

    :Returns:
        If the construction succeeds, an object of type mixture is returned,
        otherwise no object is returned.

    :Examples:

    .. doctest::
        :options: +SKIP

        >>> Mixture(weight1, dist1, weight2, dist2,...)
        >>> Mixture(filename)

    .. seealso::
        :func:`~openalea.stat_tool.output.Save`,
        :func:`~openalea.stat_tool.estimate.Estimate`,
        :func:`~openalea.stat_tool.simulate.Simulate`.

    """
    error.CheckArgumentsLength(args, 1)

    types = [
        _DiscreteParametricModel, _DiscreteMixture, _Compound, _Convolution
    ]

    # filename
    if (len(args) == 1):
        error.CheckType([args[0]], [str], arg_id=[1])
        result = _DiscreteMixture(args[0])

    # build list of weights and distributions
    else:
        nb_param = len(args)
        if ((nb_param % 2) != 0):
            raise TypeError("Number of parameters must be pair")

        # Extract weights ands distributions
        weights = []
        dists = []
        for i in xrange(nb_param / 2):
            weights.append(args[i * 2])
            error.CheckType([args[i * 2 + 1]], [types], arg_id=[i * 2 + 1])
            error.CheckType([args[i * 2]], [float], arg_id=[i * 2])
            #dists.append(_Distribution(args[i * 2 + 1]))
            dists.append((args[i * 2 + 1]))

        result = _DiscreteMixture(weights, dists)

    return result
Esempio n. 24
0
def Extract(obj, *args, **kargs):
    """
    Common method to redirect extract function call
    See`ExtractHistogram` or `ExtractDistribution`
    """

    ret = None

    if type(obj) in [_DiscreteMixture, _DiscreteMixtureData]:
        assert len(args) >= 1
        error.CheckType([args[0]], [str])
        if args[0] == 'Mixture':
            assert len(args) == 1
            ret = obj.extract_mixture()
        elif args[0] == 'Component':
            assert len(args) == 2
            error.CheckType([args[1]], [int])
            ret = obj.extract_component(args[1])
        elif args[0] == 'Weight':
            assert len(args) == 1
            ret = obj.extract_weight()
        else:
            raise ValueError("Excepted Component, Weight or Mixture")
    elif type(obj) in [_Convolution, _ConvolutionData]:
        assert len(args) >= 1
        error.CheckType([args[0]], [[str, int]])
        if args[0] == 'Elementary' or isinstance(args[0], int):
            if len(args) == 1:
                error.CheckType([args[0]], [int])
                ret = obj.extract_elementary(args[0])
            elif len(args) == 2:
                error.CheckType([args[0], args[1]], [str, int])
                ret = obj.extract_elementary(args[1])
        elif args[0] == 'Convolution':
            error.CheckType([args[0]], [[str, int]])
            ret = obj.extract_convolution()
        else:
            raise ValueError("Excepted \"Elementaty\", or index")

    elif type(obj) in [_Compound, _CompoundData]:
        assert len(args) == 1
        if args[0] == 'Sum':
            ret = obj.extract_sum()
        elif args[0] == 'Elementary':
            ret = obj.extract_elementary()
        elif args[0] == 'Compound':
            ret = obj.extract_compound()
        else:
            raise ValueError("Excepted Sum, Elementary or Compound")

    elif isinstance(obj, _Vectors):
        # _Vectors with one variable

        try:
            nb_var = obj.nb_variable
            if (nb_var > 1):
                try:
                    variable = args[0]
                except IndexError:
                    raise TypeError("""Extract with vectors object need 1
                     arguments (variable) if nb variable>1""")
            else:
                variable = 1

            return obj.extract(variable)

        except AttributeError:
            raise ValueError("unknown issue while extracting vectors")
    else:
        # related to Top, Renewal, Markov , ...
        try:
            from openalea.sequence_analysis.data_transform import Extract \
                as newExtract
            ret = newExtract(obj, *args, **kargs)
        except ValueError:
            pass

    return ret
Esempio n. 25
0
def Distribution(utype, *args):
    """
    Construction of a parametric discrete distribution (either binomial,
    Poisson, negative binomial or uniform) from the name and the parameters
    of the distribution or from an ASCII file.

    A supplementary shift parameter (argument inf_bound) is required with
    respect to the usual definitions of these discrete distributions.
    Constraints over parameters are given in the file syntax corresponding
    to the type distribution(cf. File Syntax).

    :Parameters:
      * `inf_bound` (int) : lower bound to the range of possible values
        (shift parameter),
      * `sup_bound` (int) : upper bound to the range of possible values \
      (only relevant for binomial or uniform distributions),
      * `param` (int, real) : parameter of either the Poisson distribution or \
      the negative binomial distribution.
      * `proba` (int, float) : probability of success \
      (only relevant for binomial or negative binomial distributions),
      * `file_name` (string).

      .. note:: the names of the parametric discrete distributions can be
        summarized by their first letters:

        * "B" ("BINOMIAL"),
        * "P" ("POISSON"),
        * "NB" ("NEGATIVE_BINOMIAL"),
        * "U" ("UNIFORM"),
        * "M" ("MULTINOMIAL"),


    :Returns:
        If the construction succeeds, an object of type distribution is
        returned, otherwise no object is returned.

    :Examples:

    .. doctest::
        :options: +SKIP

        >>> Distribution("BINOMIAL", inf_bound, sup_bound, proba)
        >>> Distribution("POISSON", inf_bound, param)
        >>> Distribution("NEGATIVE_BINOMIAL", inf_bound, param, proba)
        >>> Distribution("UNIFORM", inf_bound, sup_bound)
        >>> Distribution(file_name)

    .. seealso::
        :func:`~openalea.stat_tool.output.Save`,
        :func:`~openalea.stat_tool.estimate.Estimate`
        :func:`~openalea.stat_tool.simulate.Simulate`.
    """
    # Constructor from Filename or Histogram or parametricmodel
    if(len(args) == 0):
        error.CheckType([utype],
                        [[str, _DiscreteDistributionData, _DiscreteParametricModel]],
                        arg_id=[1])
        result =  _DiscreteParametricModel(utype)
    # from parameters
    if len(args)>0:
        error.CheckArgumentsLength(args, 1)
        if utype in ["B",  "BINOMIAL"]:
            result = Binomial(*args)
        elif utype in ["P", "POISSON"]:
            result = Poisson(*args)
        elif utype in ["M", "MULTINOMIAL"]:
            raise NotImplementedError("Multinomial not yet implemented")
        elif utype in ["NB", "NEGATIVE_BINOMIAL"]:
            result = NegativeBinomial(*args)
        elif utype in ["U", "UNIFORM"]:
            result = Uniform(*args)
        else:
            raise KeyError(" %s not found. Allowed keys are %s"
                           % (utype, distribution_identifier_type.keys()))

    return result
Esempio n. 26
0
def SelectVariable(obj, variables, Mode="Keep"):
    """
    Selection of variables.

    :Parameters:

      * vec (vectors),
      * seq (sequences, discrete_sequences, markov_data, semi-markov_data),
      * variable (int): variable index.
      * variables (array(int)): variable indices.

    :Keywords:

      * Mode (string): conservation or rejection of the selected variables: "Keep" (default) or "Reject".

    :Returns:

      If either variable or variables[1], ..., variables[n] are valid indices of variables,
      an object of type vectors (respectively sequences or discrete_sequences) is returned,
      otherwise no object is returned. In the case of a first argument of type sequences,
      the returned object is of type discrete_sequences if all the variables are of type STATE,
      if the possible values for each variable are consecutive from 0 and if the number of
      possible values for each variable is < 15.

    :Examples:

    .. doctest::
        :options: +SKIP

        >>> SelectVariable(vec, variable, Mode="Reject")
        >>> SelectVariable(vec, variables, Mode="Reject")
        >>> SelectVariable(seq, variable, Mode="Reject")
        >>> SelectVariable(seq, variables, Mode="Reject")

    .. seealso::
        `AddAbsorbingRun`,
        :func:`~openalea.stat_tool.cluster.Cluster`,
        :func:`~openalea.stat_tool.cumulate.Cumulate`,
        `Difference`,
        `IndexExtract`,
        `LengthSelect`,
        :func:`~openalea.stat_tool.data_transform.Merge`,
        :func:`~openalea.stat_tool.data_transform.MergeVariable`,
        `MovingAverage`,
        `RecurrenceTimeSequences`,
        `RemoveRun`,
        `Reverse`,
        :func:`~openalea.stat_tool.data_transform.SelectIndividual`,
        :func:`~openalea.stat_tool.data_transform.Shift`,
        :func:`~openalea.stat_tool.cluster.Transcode`,
        :func:`~openalea.stat_tool.data_transform.ValueSelect`,
        `SegmentationExtract`,
        `VariableScaling`.
    """

    error.CheckType([variables, Mode], [[int, list], str])
    #todo: check that Mode is in ["Keep", "Reject"]

    keep = bool(Mode == "Keep" or Mode == "keep")

    if isinstance(variables, int):
        variables = [variables]

    ret = obj.select_variable(variables, keep)

    return ret
Esempio n. 27
0
def ComparisonTest(utype, histo1, histo2):
    r"""
    Test of comparaison of frequency distributions.

    The objective is to compare two independent random samples in order to decide
    if they have been drawn from the same population or not.
    In the case of samples from normal populations, the Fisher-Snedecor ("F") test
    enables to test is the two variances are not significantly different. The normal
    distribution assumption should be checked for instance by the exam of the shape
    coefficients (skewness and kurtosis coefficients). The test statistic is:

    .. math::

        F_{n_1-1,n_2-1} = \frac
            {
            \frac{\displaystyle\sum_{i=1}^{n_1}\left( x_{1i}-m_1 \right)^2}{n_1-1}
            }
            {
            \frac{\displaystyle\sum_{i=1}^{n_2}\left( x_{2i}-m_2 \right)^2}{n_2-1}
            }

    where :math:`m_1` and :math:`m_2` are the means of the samples.

    The Fisher-Snedecor variable :math:`F_{n_1-1,n_2-1}` with :math:`n_1-1` degrees
    of freedom and :math:`n_2-1` degrees of freedom can
    be interpreted as the ratio of the variance estimators of the two samples.
    In practice, the larger estimated variance is put at the denominator. Hence
    :math:`F_{n_1-1,n_2-1} \geq 1` . The critical region is of the form
    :math:`F_{n_1-1,n_2-1} > f` (one-sided test).

    In the case of samples from normal populations with equal variances,
    the Student ("T") test enables to test if the two means are not significantly
    different. The test statistic is:

    .. math::
        T_{n_1+n_2 - 2} = \frac{m_1 - m_2}{
        \sqrt{\left(
            \displaystyle\sum_{i=1}^{n_1}\left( x_{1i}-m_1 \right)^2{n_1-1}
            +
            \displaystyle\sum_{i=1}^{n_2}\left( x_{2i}-m_1 \right)^2{n_2-1}
            \right)
            \left( \frac{1}{n_1} + \frac{1}{n_2}\right)
        }
        } \sqrt{n_1 + n_2 - 2}

    The critical region is of the form :math:`\left| T_{n_1+n_2-2}\right| > t`
    (two-sided test). For sufficiently large sample
    sizes, this test of sample mean comparison can be used for samples from non-normal
    populations with unequal variances. This test is said to be robust.

    The Wilcoxon-Mann-Whitney ("W") test is a distribution-free test relying on
    the homogeneity of the ranking of the two sample (ranks of one sample should
    not cluster at either or both ends of the range). It can be seen as the
    non-parametric analog of the Student's t test and can be applied to compare
    two sets of observations measures on an interval scale when it is supposed
    that the data are non-normally distributed, or to compare two sets of
    observations measured on an ordinal scale.

    :Parameters:
       * type(string) : type of test "F" (Fisher-Snedecor), "T" (Student)
         or "W" (Wilcoxon-Mann-Whitney)
       * histo1, histo2 (Histogram, MixtureData, ConvolutionData, CompoundData)

    :Returns:
       A string containing the result of the tests

    :Examples:

    .. doctest::
        :options: +SKIP

        >>> ComparisonTest(type, histo1, histo2)

    """
    error.CheckType([histo1, histo2],
                        [[_DiscreteDistributionData, _DiscreteMixtureData,
                          _ConvolutionData, _CompoundData]]*2)

    utype = utype.lower()
    #todo: move this dict to enumerate.py ?
    type_dict = {
	    "f": "f_comparison",
	    "t": "t_comparison",
	    "w": "wmw_comparison",
	    }

    if not type_dict.has_key(utype):
        raise TypeError("to be done")

    func = getattr(histo1, type_dict[utype])
    ret = func(histo2)

    return ret
Esempio n. 28
0
def SelectIndividual(obj, identifiers, Mode="Keep"):
    """
    Selection of vectors, sequences, tops or patterns (in a dissimilarity matrix).

    :Parameters:

      * vec (vectors),
      * seq (sequences, discrete_sequences, markov_data, semi-markov_data),
      * top (tops),
      * dist_matrix (distance_matrix),
      * identifiers (array(int)): identifiers.

    :Keywords:

       Mode (string): conservation or rejection of the selected individuals: "Keep" (default) or "Reject".

    :Returns:

        If identifiers[1], ..., identifiers[n] are valid identifiers of vectors (respectively
        sequences, tops or patterns compared in a dissimilarity matrix), an object of type vectors
        (respectively sequences or discrete_sequences, tops or distance_matrix) is returned,
        otherwise no object is returned. In the case of a first argument of type sequences,
        discrete_sequences, markov_data, semi-markov_data, the returned object is of type
        discrete_sequences if all the variables are of type STATE, if the possible values for
        each variable are consecutive from 0 and if the number of possible values for each variable
        is < 15.

    :Examples:

    .. doctest::
        :options: +SKIP

        >>> SelectIndividual(vec, identifiers, Mode="Reject")
        >>> SelectIndividual(seq, identifiers, Mode="Reject")
        >>> SelectIndividual(top, identifiers, Mode="Reject")
        >>> SelectIndividual(dist_matrix, identifiers, Mode="Reject")

    .. seealso::
        :func:`~openalea.stat_tool.cluster.Cluster`,
        :func:`~openalea.stat_tool.data_transform.Merge`,
        :func:`~openalea.stat_tool.data_transform.Shift`,
        :func:`~openalea.stat_tool.cluster.Transcode`,
        :func:`~openalea.stat_tool.data_transform.ValueSelect`,
        :func:`~openalea.stat_tool.data_transform.MergeVariable`,
        :func:`~openalea.stat_tool.data_transform.SelectVariable`
        `AddAbsorbingRun`,
        `Cumulate`,
        `Difference`,
        `IndexExtract`,
        `LengthSelect`,
        `MovingAverage`,
        `RecurrenceTimeSequences`,
        `RemoveSeries`,
        `Reverse`,
        `SegmentationExtract`,
        `VariableScaling`,
        `RemoveApicalInternodes`,
        `Symmetrize`.

    """
    error.CheckType([identifiers, Mode], [list, str])

    #todo: CHECK THAT Mode is in ["Keep", "Reject"]
    keep = bool(Mode == "Keep" or Mode == "keep")

    ret = None
    try:
        ret = obj.select_individual(identifiers, keep)
    except:
        raise Exception("Could not run extract_data on the input variable. ")

    #if ret:
    #    try:
    # if obj is a sequence, returns markovian_sequences
    #        return ret.markovian_sequences()
    #    except AttributeError:
    #        return ret
    #else:
    #    raise Exception("Must not enter here")
    # the code above prevent tests to succeed.
    return ret
Esempio n. 29
0
def ValueSelect(obj, *args, **kargs):
    """Selection of individuals according to the values taken by a variable

    :Parameters:

      * histo (histogram, mixture_data, convolution_data, compound_data),
      * value (int): value,
      * min_value (int): minimum value,
      * max_value (int): maximum value,
      * vec1 (vectors): values,
      * vecn (vectors): vectors,
      * variable (int): variable index,
      * seq1 (sequences, discrete_sequences, markov_data, semi-markov_data): univariate sequences,
      * seqn (sequences, discrete_sequences, markov_data, semi-markov_data): multivariate sequences.

    :Keywords:

      * Mode (string): conservation or rejection of selected individuals: "Keep" (the default) or "Reject".

    :Returns:

        If either value 0 or if 0 < min_value < max_value and if the range of values defined either
        by value or by min_value and max_value enables to select individuals, an object of
        type HISTOGRAM is returned (respectively vectors, sequences or discrete_sequences),
        otherwise no object is returned. In the case of a first argument of type sequences,
        discrete_sequences, markov_data or semi-markov_data, the returned object is of type
        discrete_sequences if all the variables are of type STATE, if the possible values for each
        variable are consecutive from 0 and if the number of possible values for each variable is < 15.

    :Examples:

    .. doctest::
        :options: +SKIP

        >>> ValueSelect(histo, value, Mode="Reject")
        >>> ValueSelect(histo, min_value, max_value, Mode="Reject")
        >>> ValueSelect(vec1, value, Mode="Reject")
        >>> ValueSelect(vec1, min_value, max_value, Mode="Reject")
        >>> ValueSelect(vecn, variable, value, Mode="Reject")
        >>> ValueSelect(vecn, variable, min_value, max_value, Mode="Reject")
        >>> ValueSelect(seq1, value, Mode="Reject")
        >>> ValueSelect(seq1, min_value, max_value, Mode="Reject")
        >>> ValueSelect(seqn, variable, value, Mode="Reject")
        >>> ValueSelect(seqn, variable, min_value, max_value, Mode="Reject")

    .. seealso::
        :func:`~openalea.stat_tool.cluster.Cluster`,
        :func:`~openalea.stat_tool.data_transform.Merge`,
        :func:`~openalea.stat_tool.data_transform.Shift`,
        :func:`~openalea.stat_tool.data_transform.Transcode`,
        :func:`~openalea.stat_tool.data_transform.SelectIndividual`,
        :func:`~openalea.stat_tool.data_transform.MergeVariable`,
        :func:`~openalea.stat_tool.data_transform.SelectVariable`
        Cumulate`
        Difference`
        IndexExtract`
        LengthSelect`,
        MovingAverage`,
        RecurrenceTimeSequences`
        RemoveRun`,
        Reverse`,
        SegmentationExtract`,
        VariableScaling`.
    """
    error.CheckArgumentsLength(args, 1, 3)
    Mode = error.ParseKargs(kargs, "Mode", "Keep", keep_type)
    #keep = bool(Mode == "Keep" or Mode == "keep")
    keep = bool(Mode == "Keep")
    # Test for vectors
    try:
        nb_variable = obj.nb_variable
    except AttributeError:
        nb_variable = 0

    if len(args) == 3:
        variable, umin, umax = args

    elif len(args) == 2:
        # 2 cases (min_value, max_value) or (variable, value)
        if nb_variable:
            variable, umin = args
            umax = umin
        else:
            umin, umax = args

    elif len(args) == 1:
        value = args[0]
        error.CheckType([value], [[int, tuple, list]])
        if isinstance(value, tuple) and len(value) == 2:
            umin, umax = value
        else:
            umin = umax = value

    if (nb_variable):  # Vectors, sequences
        return obj.value_select(variable, umin, umax, keep)
    else:
        return obj.value_select(umin, umax, keep)
Esempio n. 30
0
    def old_plot(self, *args, **kargs):
        """ Old AML style plot """
        #todo: to be replace by correct enumerate but depends on sequence_analysis
        output_type = {"ChangePoint": 0, "Segment": 1}
        title = kargs.get("Title", "")
        ViewPoint = kargs.get("ViewPoint", "")
        suffix = kargs.get("Suffix", "")
        params = kargs.get("Params", ())
        output = kargs.get("Output", 0)

        data = bool(ViewPoint.lower() == "data")
        survival = bool(ViewPoint.lower() == "survival")
        stateprofile = bool(ViewPoint.lower() == "stateprofile")
        segmentprofile = bool(ViewPoint.lower() == "segmentprofile")

        import tempfile
        prefix = tempfile.mktemp()

        if (data):
            try:
                self.plot_data_write(prefix, title)
            except AttributeError:
                raise AttributeError("%s has not 'data' viewpoint" %
                                     (str(type(self))))
        elif (survival):
            try:
                self.survival_plot_write(prefix, title)
            except AttributeError:
                raise AttributeError("%s has not 'survival' viewpoint" %
                                     (str(type(self))))

        elif (stateprofile):
            try:
                self.state_profile_plot_write(prefix, title, *params)
            except AttributeError:
                raise AttributeError("%s has not 'state_profile' viewpoint" %
                                     (str(type(self))))
        elif (segmentprofile):
            try:

                error.CheckType([args[0], args[1]], [int, int])
                if len(args) == 2:
                    error.CheckType([args[2]], [[list, str]])
                    models = []
                    for model in args[2]:
                        try:
                            from openalea.sequence_analysis.enums import model_type
                            models.append(model_type[args[2]])
                        except:
                            pass
                else:
                    models = [3]  #Gaussian todo: check this is correct
                output = output_type[output]
                self.segment_profile_write(prefix, args[0], args[1], models,
                                           output, title)
            except AttributeError:
                raise AttributeError("%s has not 'segment_profile' viewpoint" %
                                     (str(type(self))))
        elif (args):
            self.plot_write(prefix, title, list(args))
        else:
            self.plot_write(prefix, title)

        plot_file = prefix + suffix + ".plot"

        f = open(plot_file, "a")
        f.write("pause -1")
        f.close()
        if ("win32" in sys.platform):
            # replace file separators
            f = open(plot_file, "r")
            ct = f.read()
            f.close()
            ctrp = ct.replace('\\', '\\\\')
            ctrp = ctrp.replace(',\\\\', ',\\')
            f = open(plot_file, "w")
            f.write(ctrp)
            f.close()
            print plot_file, "\n"

        try:
            import Gnuplot
            command = Gnuplot.GnuplotOpts.gnuplot_command
        except ImportError:
            if ("win32" in sys.platform):
                command = "pgnuplot.exe"
            else:
                command = "gnuplot"

        if (not plot.DISABLE_PLOT):
            os.system("%s %s" % (command, plot_file))