def Convolution(*args): """Construction of an object of type convolution from elementary distributions or from an ASCII file. The distribution of the sum of independent random variables is the convolution of the distributions of these elementary random variables. :Parameters: * dist1, dist2, ...(distribution, mixture, convolution, compound) - elementary distributions, * file_name (string). :Returns: If the construction succeeds, the returned object is of type convolution, otherwise no object is returned. :Examples: .. doctest:: :options: +SKIP >>> Convolution(dist1, dist2, ...) >>> Convolution(file_name) .. plot:: :width: 50% :include-source: from openalea.stat_tool import * sum_dist = Binomial(0,10,0.5) dist = Binomial(0,15,0.2) c = Convolution(sum_dist, dist) c.plot() .. seealso:: :func:`~openalea.stat_tool.output.Save`, :func:`~openalea.stat_tool.estimate.Estimate`, :func:`~openalea.stat_tool.simulate.Simulate`. """ error.CheckArgumentsLength(args, 1) possible_types = [_DiscreteParametricModel, _DiscreteMixture, _Compound, _Convolution] # filename if(len(args)==1): error.CheckType([args[0]], [str], arg_id=[1]) result = _Convolution(args[0]) # build from list of distributions else: arguments = [] #check that all arguments are correct for arg, i in zip(args, range(0, len(args))): error.CheckType([arg], [possible_types], variable_pos=[i+1]) arguments.append(arg) result = _Convolution(arguments) return result
def VarianceAnalysis(*args, **kargs): """ One-way variance analysis. :Examples: .. doctest:: :options: +SKIP >>> VarianceAnalysis(vec, class_variable, response_variable, ... type, FileName="result", Format="SpreadSheet") :Parameters: * vec (_Vectors), * class_variable (int): index of the class or group variable, * response_variable (int): index of the response variable, * type (string): type of the response variable ("NUMERIC" ("N") or "ORDINAL" ("O")). :Keywords: * FileName (string): name of the result file, * Format (string): format of the result file: "ASCII" (default format) or "SpreadSheet". This optional argument can only be used in conjunction with the optional argument FileName. :Returns: The variance analysis result as a string """ error.CheckArgumentsLength(args, 4, 4) error.CheckKargs(kargs, possible_kargs = ["FileName", "Format"]) #kargs filename = error.ParseKargs(kargs, "FileName", default="result") format = error.ParseKargs(kargs, "Format", default="O", possible=variance_type) #args vec = args[0] class_variable = args[1] response_variable = args[2] utype = args[3] error.CheckType([vec, class_variable, response_variable, utype], [_Vectors, int, int, str]) try: utype = variance_type[args[3]] except KeyError: raise KeyError("Possible type are : " + str(variance_type.keys())) return vec.variance_analysis(class_variable, response_variable, utype, filename, format)
def Simulate(obj, *args): """Generation of a random sample from a distribution. :Parameters: * `dist` (distribution), * `mixt` (mixture) * `convol` (convolution) * `compound` (compound), * `size` (int): sample size. :Returns: If the first argument is of type distribution and if 0 < size < 1000000, an object of type HISTOGRAM is returned, otherwise no object is returned. If the first argument is of type mixture and if 0 < size < 1000000, an object of type mixture_data is returned, otherwise no object is returned. If the first argument is of type convolution and if 0 < size < 1000000, an object of type convolution_data is returned, otherwise no object is returned. If the first argument is of type compound and if 0 < size < 1000000, an object of type compound_data is returned, otherwise no object is returned. The returned object of type HISTOGRAM, mixture_data, convolution_data or compound_data contains both the simulated sample and the model used for simulation. :Example: .. doctest:: :options: +SKIP >>> Simulate(dist, size) >>> Simulate(mixt, size) >>> Simulate(convol, size) >>> Simulate(compound, size) :See Also: Distribution, Mixture, Convolution, Compound, ExtractHistogram. """ error.CheckArgumentsLength(args, 1, 1) try: return obj.simulate(args[0]) except ImportError: from openalea.sequence_analysis.simulate import Simulate as newSimulate return newSimulate(args[0])
def ContingencyTable(*args, **kargs): """ Computation of a contingency table. :Parameters: * vec (_Vectors), * variable1, variable2 (int): variable indices, :Keywords: * FileName (string): name of the result file, * Format (string): format of the result file: "ASCII" (default format) or "SpreadSheet". This optional argument can only be used in conjunction with the optional argument FileName. :Returns: The contingency table result as a string :Examples: .. doctest:: :options: +SKIP >>> ContingencyTable(vec, variable1, variable2, FileName="result", Format="SpreadSheet") """ error.CheckArgumentsLength(args, 3, 3) error.CheckKargs(kargs, possible_kargs = ["FileName", "Format"]) possible_v = [str(f) for f in OutputFormat.values.values()] # possible output formats #kargs filename = error.ParseKargs(kargs, "FileName", default="result") format = error.ParseKargs(kargs, "Format", default="ASCII", possible=possible_v) #args vec = args[0] variable1 = args[1] variable2 = args[2] error.CheckType([vec, variable1, variable2], [_Vectors, int, int]) of = "OutputFormat." + format + ".real" of = eval(of) return vec.contingency_table(variable1, variable2, filename, of)
def SelectStep(obj, *args): """Change the internal step of a vector or a sequence :param obj: the vector or sequence objet :param argument 1: the new step :Example: .. doctest:: :options: +SKIP >>> seq = Sequences([]) >>> SelectStep(seq, 100) >>> Plot(seq) .. todo:: shall we move this function to sequence_analysis package? """ error.CheckArgumentsLength(args, 1, 2) try: nb_variable = obj.nb_variable except AttributeError: raise TypeError( "object has no nb_variable. Check that it is a Vector or Sequence") if len(args) == 2: variable, step = args error.CheckType([step], [[int, float]]) error.CheckType([variable], [[int]]) elif len(args) == 1 and nb_variable == 1: variable = 1 step = args[0] error.CheckType([step], [[int, float]]) else: if nb_variable != 1: raise SyntaxError( "Wrong number of arguments. The number of variable is greater than 1 (%s) therefore you must provide a variable and a step like in SelectStep(object, 1, 100)" % nb_variable) else: raise ValueError("UnknownError") #obj.get_marginal_histogram(variable) ret = obj.select_step(variable, step) return ret
def ComputeRankCorrelation(*args, **kargs): """ComputeRankCorrelation Computation of the rank correlation matrix. :Usage: >>> vec = Vectors([1,2,3,4,5,4,3,2,1]) >>> ComputeRankCorrelation(vec, Type="Spearman", FileName='') :Arguments: * vec (vectors). :Optional Arguments: * Type (string): type of rank correlation coefficient: "Spearman" (the default) or "Kendall". :Returned Object: No object returned. """ func_map = { "Spearman": 0, "Kendall": 1 } error.CheckArgumentsLength(args, 1, 1) error.CheckKargs(kargs, possible_kargs = ["Type", "FileName"]) #kargs utype = error.ParseKargs(kargs, "Type", default="Spearman", possible=func_map) filename = error.ParseKargs(kargs, "FileName", default=None) #args vec = args[0] error.CheckType([vec], [_Vectors]) _a = vec.rank_correlation_computation(utype, filename)
def Distribution(utype, *args): """ Construction of a parametric discrete distribution (either binomial, Poisson, negative binomial or uniform) from the name and the parameters of the distribution or from an ASCII file. A supplementary shift parameter (argument inf_bound) is required with respect to the usual definitions of these discrete distributions. Constraints over parameters are given in the file syntax corresponding to the type distribution(cf. File Syntax). :Parameters: * `inf_bound` (int) : lower bound to the range of possible values (shift parameter), * `sup_bound` (int) : upper bound to the range of possible values \ (only relevant for binomial or uniform distributions), * `param` (int, real) : parameter of either the Poisson distribution or \ the negative binomial distribution. * `proba` (int, float) : probability of success \ (only relevant for binomial or negative binomial distributions), * `file_name` (string). .. note:: the names of the parametric discrete distributions can be summarized by their first letters: * "B" ("BINOMIAL"), * "P" ("POISSON"), * "NB" ("NEGATIVE_BINOMIAL"), * "U" ("UNIFORM"), * "M" ("MULTINOMIAL"), :Returns: If the construction succeeds, an object of type distribution is returned, otherwise no object is returned. :Examples: .. doctest:: :options: +SKIP >>> Distribution("BINOMIAL", inf_bound, sup_bound, proba) >>> Distribution("POISSON", inf_bound, param) >>> Distribution("NEGATIVE_BINOMIAL", inf_bound, param, proba) >>> Distribution("UNIFORM", inf_bound, sup_bound) >>> Distribution(file_name) .. seealso:: :func:`~openalea.stat_tool.output.Save`, :func:`~openalea.stat_tool.estimate.Estimate` :func:`~openalea.stat_tool.simulate.Simulate`. """ # Constructor from Filename or Histogram or parametricmodel if(len(args) == 0): error.CheckType([utype], [[str, _DiscreteDistributionData, _DiscreteParametricModel]], arg_id=[1]) result = _DiscreteParametricModel(utype) # from parameters if len(args)>0: error.CheckArgumentsLength(args, 1) if utype in ["B", "BINOMIAL"]: result = Binomial(*args) elif utype in ["P", "POISSON"]: result = Poisson(*args) elif utype in ["M", "MULTINOMIAL"]: raise NotImplementedError("Multinomial not yet implemented") elif utype in ["NB", "NEGATIVE_BINOMIAL"]: result = NegativeBinomial(*args) elif utype in ["U", "UNIFORM"]: result = Uniform(*args) else: raise KeyError(" %s not found. Allowed keys are %s" % (utype, distribution_identifier_type.keys())) return result
def Mixture(*args): """Construction of a mixture of distributions from elementary distributions and associated weights or from an ASCII file. A mixture is a parametric model of classification where each elementary distribution or component represents a class with its associated weight. :Parameters: * `weight1`, `weight2`, ... (float) - weights of each component. These weights should sum to one (they constitute a discrete distribution). * `dist1`, `dist2`, ... (`_DiscreteParametricModel`, `_DiscreteMixture`, `_Convolution`, `_Compound`) elementary distributions (or components). * `filename` (string) - :Returns: If the construction succeeds, an object of type mixture is returned, otherwise no object is returned. :Examples: .. doctest:: :options: +SKIP >>> Mixture(weight1, dist1, weight2, dist2,...) >>> Mixture(filename) .. seealso:: :func:`~openalea.stat_tool.output.Save`, :func:`~openalea.stat_tool.estimate.Estimate`, :func:`~openalea.stat_tool.simulate.Simulate`. """ error.CheckArgumentsLength(args, 1) types = [ _DiscreteParametricModel, _DiscreteMixture, _Compound, _Convolution ] # filename if (len(args) == 1): error.CheckType([args[0]], [str], arg_id=[1]) result = _DiscreteMixture(args[0]) # build list of weights and distributions else: nb_param = len(args) if ((nb_param % 2) != 0): raise TypeError("Number of parameters must be pair") # Extract weights ands distributions weights = [] dists = [] for i in xrange(nb_param / 2): weights.append(args[i * 2]) error.CheckType([args[i * 2 + 1]], [types], arg_id=[i * 2 + 1]) error.CheckType([args[i * 2]], [float], arg_id=[i * 2]) #dists.append(_Distribution(args[i * 2 + 1])) dists.append((args[i * 2 + 1])) result = _DiscreteMixture(weights, dists) return result
def Vectors(*args, **kargs): """ Construction of a set of vectors from a multidimensional array, from a set of sequences or from an ASCII file. The data structure of type list(list(int)) should be constituted at the most internal level of arrays of constant size. :Parameters: - `list` (list(list(int))) : - `seq` (sequences, discrete_sequences, markov_data, semi-markov_data) - `file_name` (string) : :Keywords: - Identifiers (list(int)): explicit identifiers of vectors. This optional argument can only be used if the first mandatory argument is of type list(list(int)). - IndexVariable (bool): taking into account of the implicit index parameter as a supplementary variable (default value: False). This optional argument can only be used if the first mandatory argument is of type `sequences`, `discrete_sequences`, `markov_data` or `semi-markov_data`. :Returns: If the construction succeeds, an object of type vectors is returned, otherwise no object is returned. :Examples: .. doctest:: :options: +SKIP >>> Vectors(list, Identifiers=[1, 8, 12]) >>> Vectors(seq, IndexVariable=True) >>> Vectors(file_name) .. seealso:: :func:`~openalea.stat_tool.output.Save`, :func:`~openalea.stat_tool.data_transform.ExtractHistogram`, :func:`~openalea.stat_tool.cluster.Cluster`, :func:`~openalea.stat_tool.data_transform.Merge`, :func:`~openalea.stat_tool.data_transform.MergeVariable`, :func:`~openalea.stat_tool.data_transform.SelectIndividual`, :func:`~openalea.stat_tool.data_transform.SelectVariable`, :func:`~openalea.stat_tool.data_transform.Shift`, :func:`~openalea.stat_tool.cluster.Transcode`, :func:`~openalea.stat_tool.data_transform.ValueSelect`, :func:`~openalea.stat_tool.comparison.Compare`, :func:`~openalea.stat_tool.comparison.ComputeRankCorrelation`, :func:`~openalea.stat_tool.comparison.ContingencyTable`, :func:`~openalea.stat_tool.comparison.Regression`, :func:`~openalea.stat_tool.comparison.VarianceAnalysis` """ error.CheckArgumentsLength(args, 1, 1) error.CheckKargs(kargs, possible_kargs = ["Identifiers", "IndexVariable"]) obj = args[0] ret = None import openalea.core.path if isinstance(obj, str): # constructor from a filename ret = _Vectors(args[0]) elif isinstance(obj, openalea.core.path.path): # constructor from a path ret = _Vectors(str(args[0])) elif isinstance(obj, list): # Normal usage is Vectors([ [1,2,3], [1,2,3], [4,5,6]]) # If only one variable is requited, then Normal usage is # Vectors([ [1,2,3] ]). Yet, to simplify usage, if there is only # one variable, the followin if allows us to use Vectors([1,2,3]) if type(obj[0])!=list: obj = [obj] # 0 for int, 1 for float. By default all variables are int #now, we loop over all sequences and sequences and if a variable # is found to be float, then the type is float. # once a float is found, there is no need to carry on the current variable InputTypes = [0] * len(obj[0]) nb_variables = len(obj[0]) for vec in obj: for index, var in enumerate(vec): assert type(var) in [int, float], "wrong types var=%s and its type is %s" % (var, type(var)) if type(var)==float: InputTypes[index]=1 # from a list and an optional argument # first, get the Identifiers and check its type identifiers = error.ParseKargs(kargs, "Identifiers") if identifiers: error.CheckType([identifiers], [[list]], variable_pos=[2]) if len(identifiers) != len(obj): raise ValueError("""Identifiers must be a list, which size equals vectors's length""") #iif InputTypes: ret = _Vectors(obj, identifiers, InputTypes) #else: # ret = _Vectors(obj, identifiers) else: #create a standard identifiers list [0,1,2,....] for each sequences ? identifiers = [] for i, vec in enumerate(obj): identifiers.append(i+1) print identifiers #if InputTypes: ret = _Vectors(obj, identifiers, InputTypes) #else: # ret = _Vectors(obj, []) else: # from a sequence index_variable = error.ParseKargs(kargs, "IndexVariable", False, [True, False]) error.CheckType([index_variable], [bool], variable_pos=[2]) ret = obj.build_vectors(index_variable) return ret
def Shift(obj, *args): """ Shifting of values :Parameters: * histo (histogram, mixture_data, convolution_data, compound_data), * param (int): shifting parameter, * vec1 (vectors): values, * vecn (vectors): vectors, * variable (int): variable index, * seq1 (sequences): univariate sequences, * seqn (sequences): multivariate sequences. :Returns: If the shifting makes that the lower bound to the possible values is positive, an object of type HISTOGRAM (respectively _Vectors, _Sequences) is returned. In the case of a first argument of type sequences, the returned object is of type discrete_sequences if all the variables are of type STATE, if the possible values for each variable are consecutive from 0 and if the number of possible values for each variable is 15. :Examples: .. doctest:: :options: +SKIP >>> Shift(histo, param) >>> Shift(vec1, param) >>> Shift(vecn, variable, param) >>> Shift(seq1, param) >>> Shift(seqn, variable, param) .. seealso:: :func:`~openalea.stat_tool.cluster.Cluster`, :func:`~openalea.stat_tool.data_transform.Merge`, :func:`~openalea.stat_tool.data_transform.Transcode`, :func:`~openalea.stat_tool.data_transform.SelectIndividual`, :func:`~openalea.stat_tool.data_transform.MergeVariable`, :func:`~openalea.stat_tool.data_transform.SelectVariable` :func:`~openalea.stat_tool.data_transform.AddAbsorbingRun`, :func:`~openalea.stat_tool.data_transform.Cumulate`, :func:`~openalea.stat_tool.data_transform.Difference`, :func:`~openalea.stat_tool.data_transform.Lengthselect`, :func:`~openalea.stat_tool.data_transform.MovingAverage`, :func:`~openalea.stat_tool.data_transform.IndexExtract`, :func:`~openalea.stat_tool.data_transform.RecurrenceTimeSequences`, :func:`~openalea.stat_tool.data_transform.RemoveRun`, :func:`~openalea.stat_tool.data_transform.Reverse`, :func:`~openalea.stat_tool.data_transform.SegmentationExtract`, :func:`~openalea.stat_tool.data_transform.ValueSelect`, :func:`~openalea.stat_tool.data_transform.VariableScaling`. """ error.CheckArgumentsLength(args, 1, 3) try: nb_variable = obj.nb_variable except AttributeError: nb_variable = 0 if nb_variable == 1: param = args[0] ret = obj.shift(1, param) elif nb_variable > 1: variable = args[0] param = args[1] ret = obj.shift(variable, param) else: param = args[0] ret = obj.shift(param) return ret
def ValueSelect(obj, *args, **kargs): """Selection of individuals according to the values taken by a variable :Parameters: * histo (histogram, mixture_data, convolution_data, compound_data), * value (int): value, * min_value (int): minimum value, * max_value (int): maximum value, * vec1 (vectors): values, * vecn (vectors): vectors, * variable (int): variable index, * seq1 (sequences, discrete_sequences, markov_data, semi-markov_data): univariate sequences, * seqn (sequences, discrete_sequences, markov_data, semi-markov_data): multivariate sequences. :Keywords: * Mode (string): conservation or rejection of selected individuals: "Keep" (the default) or "Reject". :Returns: If either value 0 or if 0 < min_value < max_value and if the range of values defined either by value or by min_value and max_value enables to select individuals, an object of type HISTOGRAM is returned (respectively vectors, sequences or discrete_sequences), otherwise no object is returned. In the case of a first argument of type sequences, discrete_sequences, markov_data or semi-markov_data, the returned object is of type discrete_sequences if all the variables are of type STATE, if the possible values for each variable are consecutive from 0 and if the number of possible values for each variable is < 15. :Examples: .. doctest:: :options: +SKIP >>> ValueSelect(histo, value, Mode="Reject") >>> ValueSelect(histo, min_value, max_value, Mode="Reject") >>> ValueSelect(vec1, value, Mode="Reject") >>> ValueSelect(vec1, min_value, max_value, Mode="Reject") >>> ValueSelect(vecn, variable, value, Mode="Reject") >>> ValueSelect(vecn, variable, min_value, max_value, Mode="Reject") >>> ValueSelect(seq1, value, Mode="Reject") >>> ValueSelect(seq1, min_value, max_value, Mode="Reject") >>> ValueSelect(seqn, variable, value, Mode="Reject") >>> ValueSelect(seqn, variable, min_value, max_value, Mode="Reject") .. seealso:: :func:`~openalea.stat_tool.cluster.Cluster`, :func:`~openalea.stat_tool.data_transform.Merge`, :func:`~openalea.stat_tool.data_transform.Shift`, :func:`~openalea.stat_tool.data_transform.Transcode`, :func:`~openalea.stat_tool.data_transform.SelectIndividual`, :func:`~openalea.stat_tool.data_transform.MergeVariable`, :func:`~openalea.stat_tool.data_transform.SelectVariable` Cumulate` Difference` IndexExtract` LengthSelect`, MovingAverage`, RecurrenceTimeSequences` RemoveRun`, Reverse`, SegmentationExtract`, VariableScaling`. """ error.CheckArgumentsLength(args, 1, 3) Mode = error.ParseKargs(kargs, "Mode", "Keep", keep_type) #keep = bool(Mode == "Keep" or Mode == "keep") keep = bool(Mode == "Keep") # Test for vectors try: nb_variable = obj.nb_variable except AttributeError: nb_variable = 0 if len(args) == 3: variable, umin, umax = args elif len(args) == 2: # 2 cases (min_value, max_value) or (variable, value) if nb_variable: variable, umin = args umax = umin else: umin, umax = args elif len(args) == 1: value = args[0] error.CheckType([value], [[int, tuple, list]]) if isinstance(value, tuple) and len(value) == 2: umin, umax = value else: umin = umax = value if (nb_variable): # Vectors, sequences return obj.value_select(variable, umin, umax, keep) else: return obj.value_select(umin, umax, keep)
def Cluster(obj, utype, *args, **kargs): """Clustering of values. In the case of the clustering of values of a frequency distribution on the basis of an information measure criterion (argument `Information`), both the information measure ratio and the selected optimal step are given in the shell window. The clustering mode `Step` (and its variant `Information`) is naturally adapted to numeric variables while the clustering mode `Limit` applies to both symbolic (nominal) and numeric variables. In the case of a symbolic variable, the function `Cluster` with the mode `Limit` can be seen as a dedicated interface of the more general function `Transcode`. :Parameters: * `histo` (`_FrequencyDistribution`, `_DiscreteMixtureData`, `_ConvolutionData`, `_CompoundData`), * `step` (int) - step for the clustering of values * `information_ratio` (float) - proportion of the information measure of \ the original sample for determining the clustering step, * `limits` (list(int)) - first values corresponding to the new classes \ classes 1, ..., nb_class - 1. By convention, the first value corresponding \ to the first class is 0, * `vec1` (`_Vector`) - values, * `vecn` (`_Vectors`) - vectors, * `variable` (int) - variable index, * `seq1` (`_Sequences`) - univariate sequences, * `seqn` (`_Sequences`) - multivariate sequences, * `discrete_seq1` (`_DiscreteSequences`, `_Markov`, `_SemiMarkovData`) - discrete univariate sequences, * `discrete_seqn` (`_DiscreteSequences`, `_Markov`, `_SemiMarkovData`) - discrete multivariate sequences. :Keywords: * `AddVariable` (bool) : addition (instead of simple replacement) of the variable corresponding to the clustering of values (default value: False). This optional argument can only be used if the first argument is of type `_DiscreteSequences`, `_Markov` or `_SemiMarkovData`. The addition of the clustered variable is particularly useful if one wants to evaluate a lumpability hypothesis. :Returns: * If `step` > 0, or if 0 < `information_ratio` < 1, or if 0 < limits[1] < limits[2] < ... < limits[nb_class - 1] < (maximum possible value of histo), an object of type _FrequencyDistribution is returned. * If variable is a valid index of a variable and if `step` > 0, or if 0 < limits[1] < limits[2] < ... < limits[nb_class - 1] < (maximum possible value taken by the selected variable of `vec1` or `vecn`), an object of type `_Vectors` is returned. * If variable is a valid index of a variable of type STATE and if `step` > 0, or \ if 0 < limits[1] < limits[2] < ... < limits[nb_class - 1] < (maximum possible value taken by the selected variable of `seq1`, `seqn`, `discrete_seq1` or `discrete_seqn`), an object of type `_Sequences` or `_DiscreteSequences` is returned. * In the case of a first argument of type `_Sequences`, an object of type `_DiscreteSequences` is returned if all the variables are of type STATE, if the possible values taken by each variable are consecutive from 0 and if the number of possible values for each variable is < 15. :Examples: .. doctest:: :options: +SKIP >>> Cluster(histo, "Step", step) >>> Cluster(histo, "Information", information_ratio) >>> Cluster(histo, "Limit", limits) >>> Cluster(vec1, "Step", step) >>> Cluster(vecn, "Step", variable, step) >>> Cluster(vec1, "Limit", limits) >>> Cluster(vecn, "Limit", variable, limits) >>> Cluster(seq1, "Step", step) >>> Cluster(seqn, "Step", variable, step) >>> Cluster(discrete_seq1, "Step", step, AddVariable=True) >>> Cluster(discrete_seqn, "Step", variable, step, AddVariable=True) >>> Cluster(seq1, "Limit", limits) >>> Cluster(seqn, "Limit", variable, limits) >>> Cluster(discrete_seq1, "Limit", limits, AddVariable=True) >>> Cluster(discrete_seqn, "Limit", variable, limits, AddVariable=True) .. seealso:: :func:`~openalea.stat_tool.data_transform.Merge`, :func:`~openalea.stat_tool.data_transform.Shift`, :func:`~openalea.stat_tool.data_transform.ValueSelect`, :func:`~openalea.stat_tool.data_transform.MergeVariable`, :func:`~openalea.stat_tool.data_transform.SelectIndividual`, :func:`~openalea.stat_tool.data_transform.SelectVariable`, :func:`~openalea.stat_tool.cluster.Transcode`, :func:`~openalea.stat_tool.data_transform.AddAbsorbingRun`, :func:`~openalea.stat_tool.data_transform.Cumulate`, :func:`~openalea.stat_tool.data_transform.Difference`, :func:`~openalea.stat_tool.data_transform.IndexExtract`, :func:`~openalea.stat_tool.data_transform.LengthSelect`, :func:`~vplants.sequence_analysis.data_transform.MovingAverage`, :func:`~openalea.stat_tool.data_transform.RecurrenceTimeSequences`, :func:`~openalea.stat_tool.data_transform.Removerun`, :func:`~openalea.stat_tool.data_transform.Reverse`, :func:`~openalea.stat_tool.data_transform.SegmentationExtract`, :func:`~openalea.stat_tool.data_transform.VariableScaling`. """ # fixme: what about the Mode in the Step case ? # check markovian_sequences call in Sequences AddVariable = error.ParseKargs(kargs, "AddVariable", False, possible=[False, True]) possible_r = [str(f) for f in mode_type] # possible rounding modes RoundingVariable = error.ParseKargs(kargs, "Round", "ROUND", possible=possible_r) error.CheckArgumentsLength(args, 1, 2) # search for the function name if hasattr(obj, cluster_type[utype]): func = getattr(obj, cluster_type[utype]) else: raise KeyError("""Possible action are : 'Step', 'Information' or 'Limit'. Information cannot be used with Vectors objects""") # check if nb_variable is available (vectors, sequences) if hasattr(obj, 'nb_variable'): nb_variable = obj.nb_variable else: nb_variable = 1 #check types if nb_variable == 1: if len(args) == 1: if utype == "Step": error.CheckType([args[0]], [int]) if utype == "Limit": error.CheckType([args[0]], [list]) if utype == "Information": error.CheckType([args[0]], [[int, float]]) try: ret = func(args[0]) # histogram case except: try: ret = func(1, args[0]) # vector case except: try: ret = func(1, args[0], AddVariable) # sequences case except: pass else: raise ValueError("""Extra arguments provided (to specify variable value ?). Consider removing it. Be aware that nb_variable equals 1""") else: if len(args) == 2: if utype == "Step": error.CheckType([args[0]], [int]) error.CheckType([args[1]], [[int, float]]) if utype == "Limit": error.CheckType([args[0]], [int]) error.CheckType([args[1]], [list]) try: ret = func(*args) except: ret = func(args[0], args[1], mode_type[RoundingVariable].real) # sequences case else: raise ValueError("""Extra arguments provided (to specify variable value ?). Consider removing it. Be aware that nb_variable equals 1""") if hasattr(ret, 'markovian_sequences'): ret = ret.markovian_sequences() return ret
def Clustering(matrix, utype, *args, **kargs): """ Application of clustering methods (either partitioning methods or hierarchical methods) to dissimilarity matrices between patterns. In the case where the composition of clusters is a priori fixed, the function Clustering simply performs an evaluation of the a priori fixed partition. :Parameters: * `dissimilarity_matrix` (distance_matrix) - dissimilarity matrix between patterns, * `nb_cluster` (int) - number of clusters, * `clusters` (list(list(int))) - cluster composition. :Keywords: * `Prototypes` (list(int)): cluster prototypes. * `Algorithm` (string): "Agglomerative", "Divisive" or "Ordering" * `Criterion` (string): "FarthestNeighbor" or "Averaging" * `Filename` (string): filename * `Format` (string) : "ASCII" or "SpreadSheet" :Returns: If the second mandatory argument is "Partitioning" and if 2 < nb_cluster < (number of patterns), an object of type clusters is returned :Examples: .. doctest:: :options: +SKIP >>> Clustering(dissimilarity_matrix, "Partition", nb_cluster, Prototypes=[1, 3, 12]) >>> Clustering(dissimilarity_matrix, "Partition", clusters) >>> Clustering(dissimilarity_matrix, "Hierarchy", Algorithm="Agglomerative") >>> Clustering(dissimilarity_matrix, "Hierarchy", Algorithm="Divisive") .. seealso:: :func:`~openalea.stat_tool.data_transform.SelectIndividual`, `Symmetrize`, :func:`~openalea.stat_tool.comparison.Compare`, :func:`~openalea.stat_tool.cluster.ToDistanceMatrix`. .. note:: if type=Partition, Algorthim must be 1 (divisive) or 2 (ordering). .. note:: if type!=Divisive criterion must be provided """ #TODO: check this case : #Clustering(dissimilarity_matrix, "Partition", clusters) error.CheckType([matrix], [_DistanceMatrix]) Algorithm = error.ParseKargs(kargs, "Algorithm", default="Divisive", possible=algorithm_type) # Switch for each type of clustering # first the partition case if utype == "Partition": error.CheckArgumentsLength(args, 1, 1) error.CheckKargs(kargs, ["Algorithm", "Prototypes", "Initialization"]) Initialization = error.ParseKargs(kargs, "Initialization", 1, possible=[1, 2]) if Algorithm == algorithm_type["Agglomerative"]: raise ValueError("""If partition is on, Algorithm cannot be agglomerative""") if (isinstance(args[0], int)): #int case # if Prototypes is empty, the wrapping will send an # int * = 0 to the prototyping function, as expected Prototypes = kargs.get("Prototypes", []) nb_cluster = args[0] return matrix.partitioning_prototype(nb_cluster, Prototypes, Initialization, Algorithm) elif isinstance(args[0], list): # array case #todo:: array of what kind of object? #need a test return matrix.partitioning_clusters(args[0]) else: raise TypeError(""" With Partition as second argument, the third one must be either an int or an array.""") elif utype == "Hierarchy": error.CheckKargs(kargs, ["Algorithm", "FileName", "Criterion", "Format"]) Algorithm = error.ParseKargs(kargs, "Algorithm", default="Agglomerative", possible=algorithm_type) Criterion = error.ParseKargs(kargs, "Criterion", "Averaging", possible=criterion_type) # fixme: is it correct to set "" to the filename by defautl ? # if set to None, the prototype does not match filename = kargs.get("Filename", None) format = error.ParseKargs(kargs, "Format", "ASCII", possible=format_type) #check options if Algorithm != algorithm_type["Agglomerative"] and \ kargs.get("Criterion"): raise ValueError(""" In the Hierarchy case, if Algorithm is different from AGGLOMERATIVE, then Criterion cannot be used.""") return matrix.hierarchical_clustering(Algorithm, Criterion, filename, format) else: raise KeyError("Second argument must be 'Partitioning' or 'Hierarchy'")
def Regression(vec, utype, explanatory, response, *args, **kargs): """ Simple regression (with a single explanatory variable). :Parameters: * vec : vectors vectors * type : string `"Linear"` or `"MovingAverage"` or `"NearestNeighbors"` * explanatory_variable : int index of the explanatory variable * response_variable : int index of the response variable * filter : list of float filter values on the half width i.e. from one extremity to the central value (with the constraint filter[i] + filter[m] = 1), * frequencies : list of float frequencies defining the filter, * dist : distribution, mixture, convolution, compound symmetric distribution, whose size of the support is even, defining the filter (for instance Distribution("BINOMIAL",0,4,0.5)), * span : float proportion of individuals in each neighbourhood. :Keywords: * Algorithm : string - `"Averaging"` (default) - `"LeastSquares"` This optional argument can only be used if the second mandatory argument specifying the regression type is "MovingAverage". * Weighting : bool weighting or not of the neighbors according to their distance to the computed point (default value: True). This optional argument can only be used if the second mandatory argument specifying the regression type is "NearestNeighbors". :Returns: An object of type regression is returned. :Examples: .. doctest:: :options: +SKIP >>> Regression(vec, "Linear", explanatory_variable, response_variable) >>> Regression(vec, "MovingAverage", explanatory_variable, ... response_variable, filter, Algorithm="LeastSquares") >>> Regression(vec, "MovingAverage", explanatory_variable, .. response_variable, frequencies, Algorithm="LeastSquares") >>> Regression(vec, "MovingAverage", explanatory_variable, ... response_variable, dist, Algorithm="LeastSquares") >>> Regression(vec, "NearestNeighbors", explanatory_variable, ... response_variable, span, Weighting=False) .. seealso:: :func:`~openalea.stat_tool.output.Plot` """ STAT_MINIMUM_SPAN = 0.05 # from aml not stat_tool or sequence headers error.CheckType([vec, utype, explanatory, response], [_Vectors, str, int, int]) possible_types = [ "Linear", "NearestNeighbors", "NearestNeighbours", "MovingAverage" ] Algorithm = error.ParseKargs(kargs, "Algorithm", 'Averaging', algo_map) Weighting = error.ParseKargs(kargs, "Weighting", True, bool_type) if utype == "Linear": error.CheckArgumentsLength(args, 0, 0) return vec.linear_regression(explanatory, response) elif utype == "MovingAverage": error.CheckArgumentsLength(args, 1, 1) param = args[0] #todo add CheckType for int and models # param is a list of float, int if isinstance(args[0], list): # todo: check that sum equals 1 return vec.moving_average_regression_values( explanatory, response, param, Algorithm) # or a set of distributions # todo: test case of compound, convolution, mixture else: error.CheckType([param], [[ _DiscreteParametricModel, _DiscreteMixture, _Convolution, _Compound ]]) return vec.moving_average_regression_distribution( explanatory, response, param, Algorithm) elif utype in ["NearestNeighbors", "NearestNeighbours"]: error.CheckArgumentsLength(args, 1, 1) span = args[0] error.CheckType([span], [[float, int]]) assert span >= STAT_MINIMUM_SPAN #todo: check this assert return vec.nearest_neighbours_regression(explanatory, response, float(span), Weighting) else: raise TypeError("Bad Regression type. Must be in %s" % possible_types)
def Compound(*args, **kargs): """ Construction of a compound of distributions from a sum distribution and an elementary distribution or from an ASCII file. A compound (or stopped-sum) distribution is defined as the distribution of the sum of n independent and identically distributed random variables :math:`X_i` where `n` is the value taken by the random variable `N`. The distribution of N is referred to as the sum distribution while the distribution of the :math:`X_i` is referred to as the elementary distribution. :param sum_dist: sum distribution :param dist: elementary distribution :param string filename: :type sum_dist: :class:`distribution`, :class:`mixture`, :class:`convolution`, :class:`compound` :type dist: :class:`distribution`, :class:`mixture`, :class:`convolution`, :class:`compound` :Returns: If the construction succeeds, an object of type `COMPOUND` is returned, otherwise no object is returned. :Examples: .. doctest:: :options: +SKIP >>> Compound(sum_dist, dist) >>> Compound(sum_dist, dist, Threshold=0.999) >>> Compound(filename) .. plot:: :width: 50% :include-source: from openalea.stat_tool import * sum_dist = Binomial(0,10,0.5) dist = Binomial(0,15,0.2) c = Compound(sum_dist, dist) c.plot() .. seealso:: :func:`~openalea.stat_tool.output.Save`, :func:`~openalea.stat_tool.estimate.Estimate`, :func:`~openalea.stat_tool.simulate.Simulate` """ error.CheckArgumentsLength(args, 1, 2) error.CheckKargs(kargs, possible_kargs=["Threshold"]) Threshold = kargs.get("Threshold", None) # filename if len(args) == 1: error.CheckType([args[0]], [str]) result = _Compound(args[0]) possible_types = [ _DiscreteParametricModel, _DiscreteMixture, _Compound, _Convolution ] # build from two objects and optional threshold if len(args) == 2: error.CheckType([args[0], args[1]], [possible_types, possible_types], variable_pos=[1, 2]) if Threshold: result = _Compound([args[0], args[1]], Threshold) else: result = _Compound([args[0], args[1]]) return result