def ComputeAutoCorrelation(obj, *args, **kargs): """ ComputeAutoCorrelation """ error.CheckType([obj], [[ _VariableOrderMarkov, _HiddenVariableOrderMarkov, _VariableOrderMarkovData ]]) error.CheckArgumentsLength(args, 1, 2) if len(args) == 1: variable = 1 value = args[0] elif len(args) == 2: variable = args[0] value = args[1] #_check_nb_variable(obj, variable) max_lag = error.ParseKargs(kargs, "MaxLag", MAX_LAG) error.CheckType([variable, value, max_lag], [int, int, int]) if len(args) == 1: return obj.state_autocorrelation_computation(value, max_lag) elif len(args) == 2: return obj.output_autocorrelation_computation(variable, value, max_lag)
def Split(obj, step): """.. todo:: documentaiton input markovian """ error.CheckType([obj], [[_MarkovianSequences, _VariableOrderMarkovData, _SemiMarkovData, _NonHomogeneousMarkovData]]) error.CheckType([step], [int]) return obj.split(step)
def _estimate_hidden_variable_order_markov(obj, *args, **kargs): """ Estimate switch hidden_variable_order_markov """ from openalea.sequence_analysis._sequence_analysis import \ MIN_NB_STATE_SEQUENCE, \ MAX_NB_STATE_SEQUENCE, \ NB_STATE_SEQUENCE_PARAMETER from openalea.stat_tool._stat_tool import \ FORWARD, \ FORWARD_BACKWARD_SAMPLING, \ FORWARD_DYNAMIC_PROGRAMMING GlobalInitialTransition = kargs.get("GlobalInitialTransition", True) CommonDispersion = kargs.get("CommonDispersion", False) NbIteration = kargs.get("NbIteration", 80) Counting = kargs.get("Counting", True) StateSequence = kargs.get("StateSequence", True) Parameter = kargs.get("Parameter", NB_STATE_SEQUENCE_PARAMETER) MinNbSequence = kargs.get("MinNbStateSequence", MIN_NB_STATE_SEQUENCE) MaxNbSequence = kargs.get("MaxNbStateSequence", MAX_NB_STATE_SEQUENCE) Algorithm = error.ParseKargs(kargs, "Algorithm", 'EM', \ sub_markovian_algorithms) error.CheckType([CommonDispersion, Counting, GlobalInitialTransition, NbIteration, MinNbSequence, MaxNbSequence, Parameter, StateSequence], [bool, bool, bool, int, int, int, [int, float], bool]) error.CheckType([args[0]], [_HiddenVariableOrderMarkov]) # sanity check on arguments # this one can be check only when Chain will be public and exported in # export_variable_order_markov # if type == 'e' and kargs.get("GlobalInitialTransition")" raise Error if Algorithm != sub_markovian_algorithms["MCEM"]: options = ["Parameter", "MaxNbStateSequence", "MinNbStateSequence"] for option in options: if kargs.get(option): raise ValueError("If % is provided, Algorithm cannot be MCEM" % option) if Algorithm == FORWARD: hmarkov = obj.hidden_variable_order_markov_estimation( args[0], GlobalInitialTransition, CommonDispersion, Counting, StateSequence, NbIteration) elif Algorithm == FORWARD_BACKWARD_SAMPLING: hmarkov = obj.hidden_variable_order_markov_stochastic_estimation( args[0], GlobalInitialTransition, CommonDispersion, MinNbSequence, MaxNbSequence, Parameter, Counting, StateSequence, NbIteration) else: print(Algorithm) return hmarkov
def SaveMTG(obj, Filename=None, Type=None): """ Save sequence in MTG format. :param Type : list of "S" or "N" characters for Symbolic or Numeric """ from openalea.stat_tool.enums import sub_variable_type error.CheckType([obj], [ms_vomd_smd_nhmd]) error.CheckType([Filename, Type], [str, list]) type = [] for pstr in Type: type.append(sub_variable_type[pstr]) obj.mtg_write(Filename, type)
def ComputeWhiteNoiseCorrelation(obj, itype): """ComputeWhiteNoiseCorrelation Computation of the sample autocorrelation or cross-correlation function induced on a white noise sequence by filtering. :Usage: .. doctest:: :options: +SKIP >>> ComputeWhiteNoiseAutoCorrelation(cf, order) >>> ComputeWhiteNoiseAutoCorrelation(cf, filter) >>> ComputeWhiteNoiseAutoCorrelation(cf, frequencies) >>> ComputeWhiteNoiseAutoCorrelation(cf, dist) :Arguments: * cf (correlation): sample autocorrelation or cross-correlation function (in the Pearsons sense), * order (int): order of differencing, * filter (array(real)): filter values on a half width i.e. from one extremity to the central value (with the constraint that filteri + filterm = 1), * frequencies (array(int)): frequencies defining the filter, * dist (distribution, mixture, convolution, compound): symmetric distribution whose size of the support is even defining the filter (for instance Distribution("BINOMIAL", 0, 4, 0.5)), :Returned Object: No object is returned. :Background: The application of linear filters for trend removal induces an autocorrelation structure. The effect of a given linear filter on the autocorrelation structure of the residual sequence can be roughly described as follows: the number of non-zero induced autocorrelation coefficients increase with the width of the filter while their numerical magnitudes decrease. .. seealso:: :func:`~openalea.sequence_analysis.correlation.ComputeCorrelation`. """ error.CheckType([obj], [_Correlation]) error.CheckType([itype], [[ int, list, _DiscreteParametricModel, _DiscreteMixture, _Convolution, _Compound ]]) if isinstance(itype, int): obj.white_noise_correlation_order(itype) elif isinstance(itype, list): obj.white_noise_correlation_filter(itype) else: obj.white_noise_correlation_dist(itype) return obj
def _estimate_semi_markov(obj, *args, **kargs): Type = 'v' #error.CheckType([args[0]], [str]) Type = error.CheckDictKeys(args[0], stochastic_process_type) NbIteration = kargs.get("NbIteration", I_DEFAULT) Counting = kargs.get("Counting", True) Estimator = error.ParseKargs(kargs, "Estimator", "CompleteLikelihood", estimator_semi_markov_type) OccupancyMean = error.ParseKargs(kargs, "OccupancyMean", 'Computed', mean_computation_map) error.CheckType([Counting, NbIteration], [bool, int]) if Type != 'e' or Estimator == PARTIAL_LIKELIHOOD: if kargs.get(NbIteration): raise ValueError("Forbidden options Estimate NbIteration") if kargs.get("OccupancyMean"): raise ValueError("Forbidden options Estimate OccupancyMean") return obj.semi_markov_estimation(Type.real , Estimator , Counting, NbIteration , OccupancyMean)
def RemoveApicalInternodes(obj, internode): """RemoveApicalInternodes Removal of the apical internodes of the parent shoot of a 'top'. :Usage: .. doctest:: :options: +SKIP >>> RemoveApicalInternodes(top, nb_internode) :Arguments: * top (tops), * nb_internode (int): number of removed internodes. :Returned Object: If nb_internode > 0 and if the removed internodes do not bear offspring shoots, an object of type tops is returned, otherwise no object is returned. .. seealso:: :func:`~openalea.stat_tool.data_transform.SelectIndividual`, :func:`~openalea.stat_tool.data_transform.Merge`, :func:`~openalea.sequence_analysis.data_transform.Reverse`. """ error.CheckType([obj, internode], [_Tops, int]) return obj.shift(internode)
def RenewalData(*args): error.CheckArgumentsLength(args, 2) error.CheckType([args[0]], [[_Sequences, _MarkovianSequences]]) error.CheckType([args[1], args[2]], [int, int] ) seq = args[0] variable = seq.nb_variable begin_index = args[1] end_index = args[2] timev = seq.extract_renewal_data(variable, begin_index, end_index) return _RenewalData(timev)
def NbEventSelect(obj, imin, imax): """NbEventSelect Selection of data item of type {time interval between two observation dates, number of events occurring between these two observation dates} according to a number of events criterion. :Usage: .. doctest:: :options: +SKIP >>> NbEventSelect(timev, min_nb_event, max_nb_event) :param TimeEvents,RenewalData time_v: :param int min_nb_event: minimum number of events, :param int max_nb_event: maximum number of events. :Returned Object: If 0 <= min_nb_event < max_nb_event and if the range of number of events defined by min_nb_event and max_nb_event enables to select data items of type {time interval between two observation dates, number of events}, an object of type time_events is returned, otherwise no object is returned. .. seealso:: :func:`~openalea.stat_tool.data_transform.Merge`, :func:`~openalea.sequence_analysis.data_transform.TimeScaling`, :func:`~openalea.sequence_analysis.data_transform.TimeSelect`. """ error.CheckType([obj, imin, imax], [[_TimeEvents, _RenewalData], int, int]) return obj.nb_event_select(imin, imax)
def LumpabilityTest(obj, *args, **kargs): """.. todo:: documenation""" error.CheckArgumentsLength(args, 1, 1) error.CheckType([obj], [[_MarkovianSequences, _VariableOrderMarkovData, _SemiMarkovData, _NonHomogeneousMarkovData]]) symbol = args[0] Order = kargs.get("Order", 1) error.CheckType([symbol, Order], [list, int]) ret = obj.lumpability_test(symbol, Order) if ret is False: raise TypeError("warning: false status returned by lumpability test")
def ComputeInitialRun(obj): """.. todo:: documenation input can be sequence, markovian_sequences, nonhomogeneous_markov, variable_order_markov """ error.CheckType([obj], [[_MarkovianSequences, _VariableOrderMarkovData, _SemiMarkovData, _NonHomogeneousMarkovData]]) return obj.initial_run_computation()
def SemiMarkov(filename=None, length=DEFAULT_LENGTH, counting=True, cumul_threshold=0): """SemiMarkov constructor Construction of a semi-Markov chain from an ASCII file. :Usage: :: SemiMarkov(filename, length=40, counting=True, cumul_threshold=40) .. todo:: make the parameter input names consistent over all modules e.g, length and Length should be only denoted either length or Length exclusively. For backward compatibility, Length should be used ? :Arguments: * filename (string). :Optional Arguments: * Length (int): length of sequences for the computation of the intensity and counting characteristic distributions (default value: 20), * Counting (bool): computation of counting characteristic distributions default value: True). :Returned Object: If the construction succeeds, an object of type semi-markov is returned, otherwise no object is returned. :Background: A semi-Markov chain is constructed from a first-order Markov chain representing transition between distinct states and state occupancy distributions associated to the non-absorbing states. The state occupancy distributions are defined as object of type distribution with the additional constraint that the minimum time spent in a given state is at least 1 (inf_bound >= 1). .. seealso:: :class:`~openalea.stat_tool.output.Save`, :func:`~openalea.sequence_analysis.compare.Compare`, :func:`~openalea.sequence_analysis.simulate.Simulate`. """ # todo:: cumul_threshold has no default value in cpp code semi_markov.cpp # does zero is a good default value ? error.CheckType([filename, length, counting, cumul_threshold], [str, int, bool, [int, float]]) if not os.path.isfile(filename): raise IOError("Invalid filename") else: return _SemiMarkov(filename, length, counting, cumul_threshold)
def TransformPosition(obj, step=None): """.. todo:: documenation input is a sequence only""" error.CheckType([obj, step], [_Sequences, int]) ret = obj.transform_position(step) if hasattr(obj, 'markovian_sequences'): return obj.markovian_sequences() else: return ret
def RemoveIndexParameter(obj): """.. todo:: documenation input can be sequence, markovian_sequences, nonhomogeneous_markov, variable_order_markov""" error.CheckType([obj], [[_Sequences, _MarkovianSequences, _VariableOrderMarkovData, _SemiMarkovData, _NonHomogeneousMarkovData]]) if isinstance(obj, _Sequences): return obj.remove_index_parameter().markovian_sequences() else: return obj.remove_index_parameter()
def HiddenVariableOrderMarkov(filename=None, Length=DEFAULT_LENGTH, CumulThreshold=OCCUPANCY_THRESHOLD): """HiddenVariableOrderMarkov :param str filename: :param int Length: :param float CumulThreshold: .. todo:: documentation """ error.CheckType([filename, Length, CumulThreshold], [str, int, float]) if not os.path.isfile(filename): raise IOError("Invalid filename %s" % filename) else: return _HiddenVariableOrderMarkov(filename, Length, CumulThreshold)
def NonhomogeneousMarkov(filename, length=DEFAULT_LENGTH): """NonhomogeneousMarkov constructor :param str filename: :param int length: :Usage: .. doctest:: :options: +SKIP >>> nm = NonhomogeneousMarkov("filename.dat") >>> nm = NonhomogeneousMarkov("filename.dat", 10) """ error.CheckType([filename, length], [str, int]) if os.path.isfile(filename): return _NonHomogeneousMarkov(filename, length) else: raise IOError("bad file name")
def VariableOrderMarkov(*args, **kargs): """VariableOrderMarkov :Usage: .. doctest:: :options: +SKIP >>> VariableOrderMarkov(filename) """ error.CheckArgumentsLength(args, 1, 1) error.CheckType([args[0]], [str]) filename = args[0] Length = kargs.get("Length", DEFAULT_LENGTH) if os.path.isfile(filename): vom = _VariableOrderMarkov(filename, Length) else: raise IOError("bad file name %s" % filename) return vom
def IndexParameterType(obj): """ input can be sequence, markovian_sequences, nonhomogeneous_markov, variable_order_markov .. doctest:: :options: +SKIP >>> obj.index_parameter_type 3 >>> IndexParameterType(obj) openalea.sequence_analysis._sequence_analysis.IndexParameterType.POSITON """ error.CheckType([obj], [[_Sequences, _MarkovianSequences, _VariableOrderMarkovData, _SemiMarkovData, _NonHomogeneousMarkovData]]) from openalea.sequence_analysis.enums import index_parameter_type_map type = obj.index_parameter_type for key, value in index_parameter_type_map.iteritems(): if value==type: return key
def ComputeCorrelation(obj, *args, **kargs): """Computation of sample autocorrelation or cross-correlation functions. :Examples: .. doctest:: :options: +SKIP >>> ComputeCorrelation(seq1, MaxLag=10, Type="Spearman", Normalization="Exact") >>> ComputeCorrelation(seqn, variable, MaxLag=10, Type="Spearman", Normalization="Exact") >>> ComputeCorrelation(seqn, variable1, variable2, MaxLag=10, Type="Spearman", Normalization="Exact") :Arguments: * seq1 (sequences, discrete_sequences, markov_data, semi-markov_data): univariate sequences, * seqn (sequences, discrete_sequences, markov_data, semi-markov_data): multivariate sequences, * variable (int): variable index (computation of a sample autocorrelation function). * variable1, variable2 (int): variable indices (computation of a sample cross-correlation function). :Optional Arguments: * Type (string): type of correlation coefficient: "Pearson" (linear correlation coefficient - default value), "Spearman" or "Kendall" (rank correlation coefficients). * MaxLag (int): maximum lag. A default value is computed from the sequence length distribution, * Normalization (STRING): normalization of the correlation coefficients: "Approximated" (the default - usual convention for time series analysis) or "Exact", (highly recommended for sample of short sequences). This optional argument can only be used if the optional argument Type is set at "Pearson" or "Spearman". :Returned Object: If variable, or variable1 and variable2 are valid indices of variables (and are different if two indices are given) and if 0 <= MaxLag < (maximum length of sequences), then an object of type correlation is returned, otherwise no object is returned. :Background: In the univariate case or if only variable is given, a sample autocorrelation function is computed. If variable1 and variable2 are given, a sample cross-correlation function is computed. .. seealso:: :func:`~openalea.sequence_analysis.correlation.ComputePartialAutoCorrelation`, :func:`~openalea.sequence_analysis.correlation.ComputeWhiteNoiseCorrelation` """ error.CheckType([obj], [[ _Sequences, _MarkovianSequences, _VariableOrderMarkovData, _SemiMarkovData, _NonHomogeneousMarkovData ]]) if obj.nb_variable == 1: variable1 = 1 variable2 = 1 else: error.CheckType([args[0]], [int]) #todo: check that variable1 <= nb_variable and > 0 variable1 = args[0] if len(args) == 1: variable2 = variable1 elif len(args) == 2: #todo: check that variable1 <= nb_variable and > 0 error.CheckType([args[1]], [int]) variable2 = args[1] else: raise TypeError("1 or 2 non-optional arguments required") max_lag = error.ParseKargs(kargs, "MaxLag", I_DEFAULT) itype = error.ParseKargs(kargs, "Type", "Pearson", type_dict) normalization = error.ParseKargs(kargs, "Normalization", "Exact", norm_type) IndividualMean = error.ParseKargs(kargs, "IndividualMean", False) #if normalization_option and ((type == SPEARMAN2) or (type == KENDALL)): # raise Exception #if individual_mean_option and (type != PEARSON): # raise Exception # check argument validity. return obj.correlation_computation(variable1, variable2, itype, max_lag, normalization, IndividualMean)
def ComputePartialAutoCorrelation(obj, *args, **kargs): """ComputePartialAutoCorrelation Computation of sample partial autocorrelation functions. :Usage: .. doctest:: :options: +SKIP >>> ComputePartialAutoCorrelation(seq1, MaxLag=10, Type="Kendall") >>> ComputePartialAutoCorrelation(seqn, variable, MaxLag=10, Type="Kendall") :Arguments: * seq1 (**sequences**, discrete_sequences, markov_data, semi-markov_data): univariate sequences, * seqn (sequences, discrete_sequences, markov_data, semi-markov_data): multivariate sequences, * variable (int): variable index. :Optional Arguments: * MaxLag (int): maximum lag. A default value is computed from the sequence length distribution, * Type (string): type of correlation coefficient: "Pearson" (linear correlation coefficient - the default) or "Kendall" (rank correlation coefficient). :Returned Object: If variable is a valid variable index and if 1 <= MaxLag < (maximum length of sequences), an object of type correlation is returned, otherwise no object is returned. :Background: The partial autocorrelation coefficient at lag k measures the correlation between :math:`x_i` and :math:`x_{t+k}` not accounted for by :math:`x_{t+1}, ..., x_{t+k-1}` (or after adjusting for the effects of :math:`x_{t+1}, ..., x_{t+k-1}`). .. seealso:: :func:`~openalea.sequence_analysis.correlation.ComputeCorrelation` """ error.CheckType([obj], [[ _Sequences, _MarkovianSequences, _VariableOrderMarkovData, _SemiMarkovData, _NonHomogeneousMarkovData ]]) error.CheckArgumentsLength(args, 0, 1) if len(args) == 0: variable = 1 else: variable = args[0] max_lag = error.ParseKargs(kargs, "MaxLag", MAX_LAG) Type = error.ParseKargs(kargs, "Type", "Pearson", type_dict) error.CheckType([variable, max_lag], [int, int]) _check_nb_variable(obj, variable) #todo check that Type is Pearson or Kendall return obj.partial_autocorrelation_computation(variable, Type, max_lag)
def Renewal(*args, **kargs): """Renewal Construction of a (either ordinary or equilibrium) renewal process from an inter-event distribution or from an ASCII file. :Usage: .. doctest:: :options: +SKIP >>> Renewal("BINOMIAL", inf_bound, sup_bound, proba, Type="Equilibrium", ObservationTime=40) >>> Renewal("POISSON", inf_bound, param, Type="Equilibrium", ObservationTime=40) >>> Renewal("NEGATIVE_BINOMIAL", inf_bound, param, proba, Type="Equilibrium", ObservationTime=40) >>> Renewal(inter_event, Type="Equilibrium", ObservationTime=40) >>> Renewal(file_name, Type="Equilibrium", ObservationTime=40) :Arguments: * inf_bound (int): lower bound to the range of possible values (shift parameter), * sup_bound (int): upper bound to the range of possible values (only relevant for binomial or uniform distributions), * param (int, real): parameter of either the Poisson distribution or the negative binomial distribution. * proba (int, real): probability of 'success' (only relevant for binomial or negative binomial distributions). .. note:: the names of the parametric discrete distributions can be summarized by their first letters: "B" ("BINOMIAL"), "P" ("POISSON"), "NB" ("NEGATIVE_BINOMIAL"). * inter_event (distribution, mixture, convolution, compound): inter-event distribution, * file_name (string). :Optional Arguments: * Type (string): type of renewal process: "Ordinary" or "Equilibriun" (the default). * ObservationTime (int): length of the observation period for the computation of the intensity and counting distributions (default value: 20), :Returned Object: If the construction succeeds, an object of type renewal is returned, otherwise no object is returned. :Background: A renewal process is built from a discrete distribution termed the inter-event distribution which represents the time interval between consecutive events. Two types of renewal processes are available: * ordinary renewal process where the start of the observation period coincides with the occurrence time of an event (synchronism assumption), * equilibrium or stationary renewal process where the start of the observation period is independent of the process which generates the data (asynchronism assumption). In the case where the arguments are the name and the parameters of the inter-event \ distribution, the constraints on parameters described in the definition of the syntactic form of the type distribution apply (cf. File Syntax). .. seealso:: :func:`~openalea.stat_tool.output.Save`, :func:`~openalea.sequence_analysis.simulate.Simulate` (renewal process) .. todo :: ident should correspond to Binomail,B, NegativeBinomial and so on """ #todo: move this enym to enumerate.py type_map = { "Equilibrium":'e', "Ordinary": 'o' } Type = error.ParseKargs(kargs, "Type", "Equilibrium", type_map) ObservationTime = kargs.get("ObservationTime", DEFAULT_TIME) Scale = kargs.get("Scale", None) #todo check default values ! a = [str] a.extend(model_distribution_types) error.CheckType([args[0]], [a]) # a filename constructor. check that only one argument, which is a string # ------------------ todo ----------- not tested if len(args)==1 and isinstance(args[0], str): filename = args[0] if os.path.isfile(filename): renewal = _Renewal(filename) else: raise IOError("bad file name") # otherwise, we switch to a constructor from a distribution elif isinstance(args[0], str): if args[0] == "BINOMIAL" or args[0] == "B": error.CheckArgumentsLength(args, 4, 4) error.CheckType([args[1], args[2], args[3]], [int, int, [int, float]]) inf_bound = args[1] sup_bound = args[2] probability = args[3] parameter = -1 elif args[0] == "NEGATIVE_BINOMIAL" or args[0] == "NB": error.CheckArgumentsLength(args, 4, 4) error.CheckType([args[1], args[2], args[3]], [int, [int, float], [int, float]]) inf_bound = args[1] sup_bound = -1 parameter = args[2] probability = args[3] elif args[0] == "POISSON" or args[0] == "P": error.CheckArgumentsLength(args, 4, 4) error.CheckType([args[1], args[2], args[3]], [int, [float, int], [int, float]]) inf_bound = args[1] sup_bound = -1 parameter = args[2] probability = args[3] else: raise NotImplemented("""case not implemented. First arg must be a valid filename or a "BINOMIAL", "NEGATIVE_BINOMIAL, or "POISSON" """) # if all keys in distribution_identifier_type are used, we can move this # piece of call before the if and remove the NotImplemented above ident = distribution_identifier_type[args[0]] RENEWAL_THRESHOLD = 1. inter_event = _DiscreteParametric(ident , inf_bound , sup_bound , parameter , probability , RENEWAL_THRESHOLD) if Scale: error.CheckType([Scale], [float]) scaled_inter_event = _DiscreteParametric(inter_event , Scale) renewal = _Renewal(scaled_inter_event , Type , ObservationTime) else: renewal = _Renewal(inter_event , Type , ObservationTime) # renewal = _Renewal(args[0], range(0,len(args[0])), # index_parameter_type) or may be provided by the user. elif type(args[0]) in model_distribution_types: renewal = _Renewal(_DiscreteParametric(args[0]), Type, ObservationTime) return renewal
def Sequences(obj, **kargs): """Construction of a set of sequences from multidimensional arrays of integers, from data generated by a renewal process or from an ASCII file. The data structure of type array(array(array(int))) should be constituted at the most internal level of arrays of constant size. If the optional argument IndexParameter is set at "Position" or "Time", the data structure of type array(array(array(int))) is constituted at the most internal level of arrays of size 1 + n (index parameter, n variables attached to the explicit index parameter). If the optional argument IndexParameter is set at "Position", only the index parameter of the last array of size 1 + n is considered and the first component of successive elementary arrays (representing the index parameter) should be ncreasing. If the optional argument IndexParameter is set at "Time", the first component of successive elementary arrays should be strictly increasing. :Parameters: * array1 (array(array(int))): input data for univariate sequences * arrayn (array(array(array(int)))): input data for multivariate sequences, * timev (renewal_data), * file_name (string). :Optional Parameters: * Identifiers (array(int)): explicit identifiers of sequences. This optional argument can only be used if the first argument is of type array(array(int / array(int))). * VertexIdentifiers (array(array(int))): explicit identifiers of vectors. * IndexParameter (string): type of the explicit index parameter: "Position" or "Time" (the default: implicit discrete index parameter starting at 0). This optional argument can only be used if the first argument is of type array(array(int / array(int))). .. todo:: IndexParameterType :Returns: If the construction succeeds, an object of type sequences or discrete_sequences is returned, otherwise no object is returned. The returned object is of type discrete_sequences if all the variables are of type STATE, if the possible values for each variable are consecutive from 0 and if the number of possible values for each variable is <= 15. :Examples: .. doctest:: >>> # Single univariate sequence case (array1). >>> seq1 = Sequences([1, 2, 3], Identifiers=[8]) >>> seq1.nb_sequence 1 >>> seq1.nb_variable 1 >>> # General case arrayn >>> seq = Sequences([ ... [[1,2],[3,4]], ... [[21,22],[23,24]], ... [[31,32],[33,34], [35,36] ]], ... Identifiers = [1,8,12], ... VertexIdentifiers = [[1,2],[3,4],[5,6,7]]) >>> seq.nb_sequence 3 >>> seq.nb_variable 2 >>> seq.max_length 3 .. doctest:: :options: +SKIP >>> Sequences(timev) >>> Sequences(file_name) .. seealso:: :class:`~openalea.stat_tool.output.Save`, :func:`~openalea.sequence_analysis.data_transform.AddAbsorbingRun`, :func:`~openalea.stat_tool.cluster.Cluster`, :func:`~openalea.sequence_analysis.data_transform.Cumulate`, :func:`~openalea.sequence_analysis.data_transform.Difference`, :func:`~openalea.sequence_analysis.data_transform.IndexParameterExtract`, :func:`~openalea.sequence_analysis.data_transform.LengthSelect`, :func:`~openalea.stat_tool.data_transform.Merge`, :func:`~openalea.stat_tool.data_transform.MergeVariable`, :func:`~openalea.sequence_analysis.data_transform.MovingAverage`, :func:`~openalea.sequence_analysis.data_transform.RecurrenceTimeSequences`, :func:`~openalea.sequence_analysis.data_transform.RemoveRun`, :func:`~openalea.sequence_analysis.data_transform.Reverse`, :func:`~openalea.sequence_analysis.data_transform.SegmentationExtract`, :func:`~openalea.stat_tool.data_transform.SelectIndividual`, :func:`~openalea.stat_tool.data_transform.SelectVariable`, :func:`~openalea.stat_tool.data_transform.Shift`, :func:`~openalea.stat_tool.cluster.Transcode`, :func:`~openalea.stat_tool.data_transform.ValueSelect`, :func:`~openalea.sequence_analysis.data_transform.VariableScaling`. :func:`~openalea.stat_tool.data_transform.ExtractHistogram`, :func:`~openalea.sequence_analysis.data_transform.ExtractVectors`, :func:`~openalea.sequence_analysis.correlation.ComputeCorrelation`, :func:`~openalea.sequence_analysis.correlation.ComputePartialAutoCorrelation`, :func:`~openalea.sequence_analysis.data_transform.ComputeSelfTransition`, :func:`~openalea.sequence_analysis.compare.Compare`, :func:`~openalea.sequence_analysis.estimate.Estimate`, :func:`~openalea.sequence_analysis.data_transform.ComputeStateSequences`, :func:`~openalea.sequence_analysis.simulate.Simulate`. """ import numpy sequence = None error.CheckType([obj], [[str, _RenewalData, list]]) if isinstance(obj, str): filename = obj if os.path.isfile(filename): OldFormat = error.ParseKargs(kargs, "OldFormat", False, bool_type) sequence = _Sequences(filename, OldFormat) else: raise IOError("bad file name %s" % filename) if hasattr(sequence, 'markovian_sequences'): try: sequence = sequence.markovian_sequences() except Exception: pass try: sequence.nb_sequence except ValueError: raise ValueError("File read but issue while parsing. Returned sequence is not valid") return sequence elif isinstance(obj, _RenewalData): sequence = _Sequences(obj) if hasattr(sequence, 'markovian_sequences'): try: sequence = sequence.markovian_sequences() except Exception: pass return sequence # otherwise, we switch to a list constructor that requires a list of seqs # transform input into array of arrays of arrays # case 1: general case where input = [[[1,2],[3,4]],[[1,2],[3,4], [5,6]]] nothing to do # case 2: univariate single sequence, input = [1,2,3,4,5,6] so it is [[[1],[2],[3],...]] # case 3: univariate sequences input = [[1,2],[3,4],[5,6,7]] (i.e, different vector sizes) # case 4: multivariate sequence input = [[1,2],[3,4],[5,6]] Verbose = error.ParseKargs(kargs, "Verbose", False) Univariate = error.ParseKargs(kargs, "Univariate", False) if type(obj)==list: first_sequence = obj[0] if (type(first_sequence) in [int, float]): obj = [[[x] for x in obj]] if Verbose:print 'this is a single univariate sequence' elif type(first_sequence)==list: #either a single multivariate sequence ot general case of several sequences multivariates if type(first_sequence[0]) == list: if Verbose: print 'this is the general case, nothing to do' elif type(first_sequence[0]) in [int, float]: lengths = numpy.array([len(x) for x in obj]) if lengths.var()==0: if Verbose:print 'this is the ambiguous case' if lengths[0]<5 and Univariate==False: if Verbose:print 'this is 1 single multivariate sequence' obj = [obj] else: if Verbose:print 'this is univariate sequences' res = [] for x in obj: res.append([[y] for y in x]) obj = res else: if Verbose:print 'this is univariate sequences' res = [] for x in obj: res.append([[y] for y in x]) obj = res else: print SyntaxError('wrong syntax for input object') # 0 for int, 1 for float. By default all variables are int #now, we loop over all sequences and sequences and if a variable # is found to be float, then the type is float. # once a float is found, there is no need to carry on the current variable InputTypes = [0] * len(obj[0][0]) nb_variables = len(obj[0][0]) for seq in obj: for vec in seq: for index, var in enumerate(vec): assert type(var) in [int, float], "wrong types var=%s and its type is %s" % (var, type(var)) if type(var)==float: InputTypes[index]=1 from openalea.sequence_analysis._sequence_analysis import TIME, POSITION, \ IMPLICIT_TYPE #error.CheckArgumentsLength(args, 1, 1) IndexParameterType = error.ParseKargs(kargs, "IndexParameterType", "IMPLICIT_TYPE", index_parameter_type_map) IndexParameter = error.ParseKargs(kargs, "IndexParameter", []) Identifiers = error.ParseKargs(kargs, "Identifiers", []) VertexIdentifiers = error.ParseKargs(kargs, "VertexIdentifiers", []) # build up a list of unique identifiers if none is provided lengths=[] for seq in obj: lengths.append(len(seq)) # all values must be positive strictly if len(Identifiers)>0: assert len([x for x in Identifiers if x<=0]) == 0 else: #create a standard identifiers list [0,1,2,....] for i, seq in enumerate(obj): Identifiers.append(i) # build up a list of unique vertex identifiers if none is provided if len(VertexIdentifiers)>0: assert len([x for x in VertexIdentifiers if x<=0]) == 0 else: #create a standard identifiers list [0,1,2,....] for each sequences ? index = 0 for i, seq in enumerate(obj): VertexIdentifiers.append([]) for vec in seq: VertexIdentifiers[i].append(index) index+=1 # check unicity of vertex identifiers idents = [] for seq in VertexIdentifiers: for ident in seq: idents.append(ident) assert len(set(idents)) == len(idents), "ERROR, VertexIdentifiers must be made of unique identifiers (for each vector)" # check unicity of identifiers idents = [] for ident in Identifiers: idents.append(ident) assert len(set(idents)) == len(idents), "ERROR, Identifiers must be made of unique identifiers (for each sequence)" if len(IndexParameter)==0: index = 0 for i, seq in enumerate(obj): IndexParameter.append([]) for vec in seq: IndexParameter[i].append(index) index+=1 if IndexParameterType==POSITION: IndexParameter[i].append(index) index+=1 for i, seq in enumerate(obj): #print len(seq), len(IndexParameter) if IndexParameterType==POSITION: assert len(seq)==len(IndexParameter[i])-1, "ERROR, wrong IndexParameterLength. When ParameterType=POSITION, ParameterIndex length must be equla to the sequence length +1" else: assert len(seq)==len(IndexParameter[i]), "ERROR, wrong IndexParameterLength. ParameterIndex length must be equal to the sequence length." #todo check that indesparameter length is correct (length of vectors +1 if position) valid_param = [POSITION, TIME, IMPLICIT_TYPE] if IndexParameterType not in valid_param: raise ValueError("""IndexParameter can be only %s if first argument is a list""" % valid_param) sequence = _Sequences(obj, Identifiers, VertexIdentifiers, IndexParameter, InputTypes, IndexParameterType) if hasattr(sequence, 'markovian_sequences'): try: sequence = sequence.markovian_sequences() except Exception: pass return sequence
def _estimate_variable_order_markov(obj, *args, **kargs): """ EStimate on variable order markov """ from openalea.sequence_analysis._sequence_analysis import \ LOCAL_BIC_THRESHOLD,\ CTM_KT_THRESHOLD,\ CTM_BIC_THRESHOLD,\ CONTEXT_THRESHOLD,\ CTM_BIC,\ CTM_KT,\ CONTEXT,\ LOCAL_BIC Order = kargs.get("Order", None) MaxOrder = kargs.get("MaxOrder", ORDER) MinOrder = kargs.get("MinOrder", 0) Threshold = kargs.get("Threshold", LOCAL_BIC_THRESHOLD) error.CheckType([Threshold, MaxOrder, MinOrder], [[int, float], int, int]) Algorithm = error.ParseKargs(kargs, "Algorithm", "LocalBIC", algorithm) Estimator = error.ParseKargs(kargs, "Estimator", "Laplace", estimator) Penalty = error.ParseKargs(kargs, "Penalty", "BIC", likelihood_penalty_type) GlobalInitialTransition = kargs.get("GlobalInitialTransition", True) GlobalSample = kargs.get("GlobalSample", True) Counting = kargs.get("Counting", True) error.CheckType([Counting, GlobalSample, GlobalInitialTransition], [bool, bool, bool]) #args0 is a string if len(args)>0 and isinstance(args[0], str): Type = 'v' Type = error.CheckDictKeys(args[0], stochastic_process_type) # check validity of the input arguments following AML's code if Algorithm != LOCAL_BIC and not kargs.get("Threshold"): if Algorithm == CTM_BIC: Threshold = CTM_BIC_THRESHOLD elif Algorithm == CTM_KT: Threshold = CTM_KT_THRESHOLD elif Algorithm == CONTEXT: Threshold = CONTEXT_THRESHOLD if Algorithm == CTM_KT and kargs.get("Estimator"): raise ValueError("Forbidden combinaison of Algorithm and Estimator") order_estimation = True if Order is not None: order_estimation = False MaxOrder = Order if not order_estimation: options = ["Algorithm", "Estimator", "GlobalSample", "MinOrder", "Threshold"] for option in options: if kargs.get(option): raise ValueError("Order and %s cannot be used together" % option) if Type == 'e' and kargs.get("GlobalInitialTransition"): raise ValueError(""" Type e and GlobalInitialTransition cannot be used together""") if order_estimation is True: markov = obj.variable_order_markov_estimation1( Type.real, MinOrder, MaxOrder, Algorithm.real, Threshold, Estimator.real , GlobalInitialTransition , GlobalSample , Counting) else: markov = obj.variable_order_markov_estimation2( Type, MaxOrder, GlobalInitialTransition, Counting) #Variable order markov case elif isinstance(args[0], _VariableOrderMarkov): vom = args[0] # can be implemted once Chain class is public and exported # in export_variable_order_markov # if vom.type == 'e' and kargs.get("GlobalInitialTransition"): # raise ValueError(""" # Type e and GlobalInitialTransition cannot be used together""") markov = obj.variable_order_markov_estimation3(vom, GlobalInitialTransition, Counting) # array case elif isinstance(args[0], list): symbol = args[0] markov = obj.lumpability_estimation(symbol, Penalty, Order, Counting) else: raise KeyError("jfjf") return markov
def _estimate_hidden_semi_markov(obj, *args, **kargs): """ .. doctest:: :options: +SKIP >>> hsmc21 = Estimate(seq21, "HIDDEN_SEMI-MARKOV", hsmc0) """ from openalea.sequence_analysis._sequence_analysis import \ MIN_NB_STATE_SEQUENCE, \ MAX_NB_STATE_SEQUENCE, \ NB_STATE_SEQUENCE_PARAMETER from openalea.stat_tool._stat_tool import \ NO_COMPUTATION, \ FORWARD, \ FORWARD_BACKWARD_SAMPLING, \ KAPLAN_MEIER # GlobalInitialTransition = kargs.get("GlobalInitialTransition", True) CommonDispersion = kargs.get("CommonDispersion", False) NbIteration = kargs.get("NbIteration", I_DEFAULT) Counting = kargs.get("Counting", True) StateSequence = kargs.get("StateSequence", True) Parameter = kargs.get("Parameter", NB_STATE_SEQUENCE_PARAMETER) MinNbSequence = kargs.get("MinNbStateSequence", MIN_NB_STATE_SEQUENCE) MaxNbSequence = kargs.get("MaxNbStateSequence", MAX_NB_STATE_SEQUENCE) Algorithm = error.ParseKargs(kargs, "Algorithm", 'EM', \ sub_markovian_algorithms) Estimator = error.ParseKargs(kargs, "Estimator", 'CompleteLikelihood', estimator_semi_markov_type) InitialOccupancyMean = kargs.get("InitialOccupancyMean", D_DEFAULT) MeanComputation = error.ParseKargs(kargs, "OccupancyMean", 'Computed', mean_computation_map) error.CheckType([CommonDispersion, Counting, NbIteration, MinNbSequence, MaxNbSequence, Parameter, StateSequence, InitialOccupancyMean], [bool, bool, int, int, int, [int, float], bool, [float, int]]) print(Algorithm) if Algorithm != sub_markovian_algorithms["MCEM"]: options = ["Parameter", "MaxNbStateSequence", "MinNbStateSequence"] for option in options: if kargs.get(option): raise ValueError( "If % is provided, Algorithm cannot be MCEM" % option) if Algorithm != sub_markovian_algorithms["EM"]: if Estimator == KAPLAN_MEIER: raise ValueError( "Estimator= KaplanMeier and Algorithm = MCEM not possible") error.CheckType([args[0]], [[str, _HiddenSemiMarkov]]) if isinstance(args[0], str): Type = 'v' error.CheckType([args[1]], [int]) NbState = args[1] if args[0] == "Ordinary": error.CheckArgumentsLength(args, 3, 3) error.CheckType([args[2]], [str]) Type = 'o' if args[2] not in ["LeftRight", "Irreducible"]: raise ValueError( "third argument must be LeftRight or Irreducible.") if args[2] == "LeftRight": LeftRight = True else: LeftRight = False elif args[0] == "Equilibrium": error.CheckArgumentsLength(args, 2, 2) Type = 'e' LeftRight = False else: raise AttributeError("type must be Ordinary or Equilibrium") if ((Type != 'e') or (Estimator == PARTIAL_LIKELIHOOD) or \ (Algorithm != NO_COMPUTATION)) and \ kargs.get(InitialOccupancyMean): raise ValueError("Incompatible user arguments") if Algorithm == NO_COMPUTATION: hsmarkov = obj.hidden_semi_markov_estimation_model( Type, NbState, LeftRight, InitialOccupancyMean, CommonDispersion, Estimator, Counting, StateSequence, NbIteration, MeanComputation) return hsmarkov elif Algorithm == FORWARD_BACKWARD_SAMPLING: hsmarkov = obj.hidden_semi_markov_stochastic_estimation_model( Type, NbState, LeftRight, InitialOccupancyMean, CommonDispersion, MinNbSequence, MaxNbSequence, Parameter, Estimator, Counting, StateSequence, NbIteration) return hsmarkov elif isinstance(args[0], _HiddenSemiMarkov): #todo: add these lines once Chain is public #if ((( (args[0].type == 'o')) or # (Estimator == PARTIAL_LIKELIHOOD) or # (Algorithm != FORWARD_BACKWARD)) and \ # kargs.get("InitialOccupancyMean")): # raise ValueError("Incompatible arguments") hsmarkov = args[0] if Algorithm == NO_COMPUTATION: output = obj.hidden_semi_markov_estimation(hsmarkov, CommonDispersion, Estimator, Counting, StateSequence, NbIteration, MeanComputation) return output elif Algorithm == FORWARD_BACKWARD_SAMPLING: return obj.hidden_semi_markov_stochastic_estimation(hsmarkov, CommonDispersion, MinNbSequence, MaxNbSequence, Parameter, Estimator, Counting, StateSequence, NbIteration)
def Tops(*args, **kargs): """Construction of a set of sequences from multidimensional arrays of integers, from data generated by a renewal process or from an ASCII file. The data structure of type array(array(array(int))) should be constituted at the most internal level of arrays of constant size. If the optional argument IndexParameter is set at "Position" or "Time", the data structure of type array(array(array(int))) is constituted at the most internal level of arrays of size 1+n (index parameter, n variables attached to the explicit index parameter). If the optional argument IndexParameter is set at "Position", only the index parameter of the last array of size 1+n is considered and the first component of successive elementary arrays (representing the index parameter) should be increasing. If the optional argument IndexParameter is set at "Time", the first component of successive elementary arrays should be strictly increasing. :Parameters: * array1 (array(array(int))): input data for univariate sequences * arrayn (array(array(array(int)))): input data for multivariate sequences, * timev (renewal_data), file_name (string). :Optional Parameters: * Identifiers (array(int)): explicit identifiers of sequences. This optional argument can only be used if the first argument is of type array(array(int/array(int))). * IndexParameter (string): type of the explicit index parameter: "Position" or "Time" (the default: implicit discrete index parameter starting at 0). This optional argument can only be used if the first argument is of type array(array(int/array(int))). :Returns: If the construction succeeds, an object of type sequences or discrete_sequences is returned, otherwise no object is returned. The returned object is of type discrete_sequences if all the variables are of type STATE, if the possible values for each variable are consecutive from 0 and if the number of possible values for each variable is <= 15. :Examples: .. doctest:: :options: +SKIP >>> Tops(array1, Identifiers=[1, 8, 12]) >>> Tops(arrayn, Identifiers=[1, 8, 12], IndexParameter="Position") >>> Tops(timev) >>> Tops(file_name) .. seealso:: :class:`~openalea.stat_tool.output.Save`, :func:`~openalea.sequence_analysis.data_transform.AddAbsorbingRun`, :func:`~openalea.stat_tool.cluster.Cluster`, :func:`~openalea.sequence_analysis.data_transform.Cumulate`, :func:`~openalea.sequence_analysis.data_transform.Difference`, :func:`~openalea.sequence_analysis.data_transform.IndexParameterExtract`, :func:`~openalea.sequence_analysis.data_transform.LengthSelect`, :func:`~openalea.stat_tool.data_transform.Merge`, :func:`~openalea.stat_tool.data_transform.MergeVariable`, :func:`~openalea.sequence_analysis.data_transform.MovingAverage`, :func:`~openalea.sequence_analysis.data_transform.RecurrenceTimeSequences`, :func:`~openalea.sequence_analysis.data_transform.RemoveRun`, :func:`~openalea.sequence_analysis.data_transform.Reverse`, :func:`~openalea.sequence_analysis.data_transform.SegmentationExtract`, :func:`~openalea.stat_tool.data_transform.SelectIndividual`, :func:`~openalea.stat_tool.data_transform.SelectVariable`, :func:`~openalea.stat_tool.data_transform.Shift`, :func:`~openalea.stat_tool.cluster.Transcode`, :func:`~openalea.stat_tool.data_transform.ValueSelect`, :func:`~openalea.sequence_analysis.data_transform.VariableScaling`. :func:`~openalea.stat_tool.data_transform.ExtractHistogram`, :func:`~openalea.sequence_analysis.data_transform.ExtractVectors`, :func:`~openalea.sequence_analysis.correlation.ComputeCorrelation`, :func:`~openalea.sequence_analysis.correlation.ComputePartialAutoCorrelation`, :func:`~openalea.sequence_analysis.data_transform.ComputeSelfTransition`, :func:`~openalea.sequence_analysis.compare.Compare`, :func:`~openalea.sequence_analysis.estimate.Estimate`, :func:`ComputeStateTops`, :func:`~openalea.sequence_analysis.simulate.Simulate`. """ error.CheckArgumentsLength(args, 1, 1) index_parameter = error.ParseKargs(kargs, "IndexParameter", "IMPLICIT_TYPE", index_parameter_type_map) Identifiers = error.ParseKargs(kargs, "Identifiers", None) if isinstance(args[0], str): #todo: add True, False instead or as well as Current, Old #todo: !!! OldFormat set to True does not work in CPP code OldFormat = error.ParseKargs(kargs, "Format", "Old", { "Current": False, "Old": True }) filename = args[0] if os.path.isfile(filename): return _Tops(filename, OldFormat) else: raise IOError("bad file name") elif isinstance(args[0], _Sequences): raise NotImplemented #return _Tops(args[0]) elif isinstance(args[0], list): error.CheckType([Identifiers], [list]) if kargs.get("IndexParameter"): if Identifiers: return _Tops(args[0], Identifiers, index_parameter) else: return _Tops(args[0], range(0, len(args[0])), index_parameter) else: raise ValueError("wrong arguments ?") else: raise TypeError("""Expected a valid filename or a list of lists (e.g., [[1,0],[0,1]])""")
def _estimate_renewal_interval_data(obj, **kargs): """ Estimate switch renewal_count_data .. todo:: to be completed and validated with tests see stat_func4 in aml """ #only LIKELIHOOD and PENALIZED_LIKELIHOOD Estimator = error.ParseKargs(kargs, "Estimator", 'Likelihood', estimator_type) NbIteration = kargs.get("NbIteration", I_DEFAULT) error.CheckType([NbIteration], [int]) # distribution InitialInterEvent = kargs.get("InitialInterEvent", None) error.CheckType([InitialInterEvent], [[type(None), _DiscreteParametricModel, _DiscreteMixture, _Convolution, _Compound]]) if isinstance(InitialInterEvent, _DiscreteParametricModel): InitialInterEvent = _DiscreteParametric(InitialInterEvent) else: InitialInterEvent = _Distribution(InitialInterEvent) #cast initialInterEvent to parametric ? Penalty = error.ParseKargs(kargs, "Penalty", "SecondDifference", smoothing_penalty_type) Weight = kargs.get("Weight", D_DEFAULT) error.CheckType([Weight], [[int, float]]) Outside = error.ParseKargs(kargs, "Outside", "Zero", outside_type) error.CheckType([Weight], [[int, float]]) InterEventMean = error.ParseKargs(kargs, "InterEventMean", 'Computed', mean_computation_map) if Estimator == estimator_type['PenalizedLikelihood']: if kargs.get("InterEventMean") is None: InterEventMean = ONE_STEP_LATE elif InterEventMean == COMPUTED: raise ValueError(""" Incompatible options Estimator and InterEventMean""") else: if kargs.get("Penalty"): raise ValueError("""Incompatible options Penalty with type o""") if kargs.get("Weight"): raise ValueError("""Incompatible options Weight with type o""") if kargs.get("Outside"): raise ValueError("""Incompatible options Outside with type o""") if isinstance(obj, _FrequencyDistribution): if InitialInterEvent: renew = obj.estimation_inter_event(InitialInterEvent, Estimator, NbIteration, InterEventMean, Weight, Penalty, Outside) else: renew = obj.estimation(Estimator, NbIteration, InterEventMean , Weight, Penalty, Outside) else: if InitialInterEvent: renew = obj.estimation_inter_event(InitialInterEvent, Estimator, NbIteration, InterEventMean, Weight, Penalty, Outside) else: renew = obj.estimation(Estimator, NbIteration, InterEventMean , Weight, Penalty, Outside) return renew
def _estimate_renewal_count_data(obj, itype, **kargs): """ Estimate switch renewal_count_data """ Type = 'v' error.CheckType([obj, itype], [[_TimeEvents, _RenewalData], str]) if isinstance(itype, str): if itype == "Ordinary": Type = 'o' elif itype == "Equilibrium": Type = 'e' else: raise AttributeError("type must be Ordinary or Equilibrium") else: raise AttributeError("type must be Ordinary or Equilibrium") Estimator = error.ParseKargs(kargs, "Estimator", 'Likelihood', estimator_type) NbIteration = kargs.get("NbIteration", I_DEFAULT) error.CheckType([NbIteration], [int]) InitialInterEvent = kargs.get("InitialInterEvent", None) error.CheckType([InitialInterEvent], [[type(None), _DiscreteParametricModel, _DiscreteMixture, _Convolution, _Compound]]) EquilibriumEstimator = error.ParseKargs(kargs, "EquilibriumEstimator", 'CompleteLikelihood', estimator_semi_markov_type) InterEventMean = error.ParseKargs(kargs, "InterEventMean", 'Computed', mean_computation_map) Penalty = error.ParseKargs(kargs, "Penalty", "SecondDifference", smoothing_penalty_type) Outside = error.ParseKargs(kargs, "Outside", "Zero", outside_type) Weight = kargs.get("Weight", -1.) error.CheckType([Weight], [[int, float]]) if Type != 'e': if kargs.get("EquilibriumEstimator"): raise Exception("EquilibriumEstimator cannot be used with type='e'") if kargs.get("InterEventMean"): raise Exception("InterEventMean be used with type='e'") if Estimator == estimator_type['PenalizedLikelihood']: if kargs.get("InterEventMean") is None: InterEventMean = ONE_STEP_LATE elif InterEventMean == COMPUTED: raise ValueError(""" Incompatible options Estimator and InterEventMean""") else: if kargs.get("Penalty"): raise ValueError("""Incompatible options Penalty with type o""") if kargs.get("Weight"): raise ValueError("""Incompatible options Weight with type o""") if kargs.get("Outside"): raise ValueError("""Incompatible options Outside with type o""") if InitialInterEvent: #cast from InitialInterEvent to Mixture, Compound should be done if isinstance(InitialInterEvent, _DiscreteParametricModel): InitialInterEvent = _DiscreteParametric(InitialInterEvent) else: InitialInterEvent = _Distribution(InitialInterEvent) renew = obj.estimation_inter_event_type(Type, InitialInterEvent, Estimator, NbIteration, EquilibriumEstimator, InterEventMean, Weight, Penalty, Outside) else: renew = obj.estimation_type(Type, Estimator, NbIteration, EquilibriumEstimator, InterEventMean , Weight, Penalty, Outside) return renew
def TopParameters(*args, **kargs): """TopParameters Construction of 'top' parameters from the three parameters or from an ASCII file. :Usage: TopParameters(proba, axillary_proba, rhythm_ratio, MaxPosition=40) TopParameters(file_name, MaxPosition=40) :Arguments: * proba (int, real): growth probability of the parent shoot, * axillary_proba (int, real): growth probability of the offspring shoots, * rhythm_ratio (int, real): growth rhythm ratio offspring shoots / parent shoot, * file_name (string). :Optional Arguments: MaxPosition (int): maximum position for the computation of the distributions of the number of internodes of offspring shoots (default value: 20). :Returned Object: If the construction succeeds, an object of type top_parameters is returned, otherwise no object is returned. :Background: The aim of the model of 'tops' is to related the growth of offspring shoots to the growth of their parent shoot in the case of immediate branching. In the case where the arguments are the three 'top' parameters, the constraints over these parameters are described in the definition of the syntactic form of the type top_parameters (cf. File Syntax). :Example: .. plot:: :include-source: :width: 50% from openalea.sequence_analysis import TopParameters top = TopParameters(0.4, 0.5,0.6) top.plot() .. seealso:: :func:`~openalea.stat_tool.output.Save`, :func:`~openalea.sequence_analysis.simulate.Simulate`. """ error.CheckArgumentsLength(args, 1, 3) MaxPosition = kargs.get("MaxPosition", DEFAULT_MAX_POSITION) assert MaxPosition < MAX_POSITION if len(args) == 1: error.CheckType([args[0], MaxPosition], [str, int]) #filename case filename = args[0] if os.path.isfile(filename): return _TopParameters(filename, MaxPosition) else: raise IOError("bad file name") else: #sequences case error.CheckArgumentsLength(args, 3, 3) error.CheckType([args[0], args[1], args[2], MaxPosition], [[int, float], [float, int], [float, int], int]) probability = args[0] axillary_probability = args[1] rhythm_ratio = args[2] assert probability >= TOP_MIN_PROBABILITY assert probability <= 1 assert axillary_probability <= 1 assert axillary_probability >= TOP_MIN_PROBABILITY assert rhythm_ratio >= MIN_RHYTHM_RATIO assert rhythm_ratio <= 1. / MIN_RHYTHM_RATIO return _TopParameters(probability, axillary_probability, rhythm_ratio, MaxPosition)
def _SetMTGVidDictionary(self, VidDict, TreeId=None, ValidityCheck=False): """Set the dictionaries corresponding to the tree -> MTG and MTG -> tree vertex identifiers correspondences. :Usage: _SetMTGVidDictionary(self, VidDict, TreeId=None, ValidityCheck=False) :Parameters: `VidDict` (list or dict) - Dictionary or list of dictionaries with the vertices in self as keys and the vids of a MTG as values `TreeId` (int) - Identifier of the tree whose MTG ids must be set (all trees in self if None) `ValidityCheck` (bool) - Check whether the dictionary values correspond to valid tree vertex identifiers :Remarks: If TreeId is None, VidDict must be a list of dictionaries with length self.NbTrees(). Otherwise, VidDict must be a single dictionary. The keys of the dictionary(ies) are the vertex identifiers of the MTG, and values are the corresponding vertex identifiers in associated trees. :Examples: .. doctest:: :options: +SKIP >>> _SetMTGVidDictionary(self, VidDict, TreeId=None, ValidityCheck=False) .. seealso:: :func:`~openalea.tree_statistic.trees.Trees.MTGVertexId`, :func:`~openalea.tree_statistic.trees.Trees.TreeVertexId`. """ msg = "Correspondence between MTG and tree vertex identifiers " msg += "was previously defined already. This correspondence " msg += "will be overwritten." import warnings replace = True if ((TreeId is None) and not (self.__mtg_to_tree_vid is None) and (len(self.__mtg_to_tree_vid) > 0)): warnings.warn(msg, Warning) if self.__mtg_to_tree_vid is None: self.__mtg_to_tree_vid = [] for t in range(self.NbTrees()): self.__mtg_to_tree_vid.append({}) if self.__tree_to_mtg_vid is None: self.__tree_to_mtg_vid = [] for t in range(self.NbTrees()): self.__tree_to_mtg_vid.append({}) if self.__tree_to_mtg_tid is None: self.__tree_to_mtg_tid = {} if self.__mtg_to_tree_tid is None: self.__mtg_to_tree_tid = {} if not (TreeId is None): check = self._valid_tree(Treeid) CpVidDict = dict(VidDict) VidDict = [] for t in range(self.NbTrees()): if (TreeId == t): VidDict.append(CpVidDict) else: VidDict.append({}) if (len(self.__mtg_to_tree_vid[t]) > 0): warnings.warn(msg, Warning) elif (len(VidDict) != self.NbTrees()): if len(VidDict) == 0: # erase dictionaries replace = False self.__mtg_to_tree_vid = None self.__tree_to_mtg_vid = None self.__tree_to_mtg_tid = None self.__mtg_to_tree_tid = None else: msg = "Bad number of dictionaries: " + str(len(VidDict)) msg += " - should be " + str(self.NbTrees()) raise ValueError, msg if (ValidityCheck): for t in range(self.NbTrees()): if ((TreeId is None) or (TreeId == t)): for v in VidDict[t].values(): check = self._valid_vid(t, v) for k in VidDict[t].keys(): check_error.CheckType([k], [int]) for t in range(self.NbTrees()): # copy dictionary MTG->Tree if (((TreeId is None) or (TreeId == t)) and replace): self.__mtg_to_tree_vid[t] = dict(VidDict[t]) # build dictionary Tree->MTG self.__tree_to_mtg_vid[t] = {} for k in VidDict[t].keys(): v = VidDict[t][k] if self.__tree_to_mtg_vid[t].has_key(v): msg = "Tree vertex " + str(v) msg += " already present in dictionary for " msg += "tree " + str(t) raise ValueError, msg else: self.__tree_to_mtg_vid[t][v] = k # update dictionaries MTGComponentRoot <--> Tree Roots tr = self._ctrees().Tree(t).Root() # tree root try: v = self.__tree_to_mtg_vid[t][tr] # MTGComponentRoot except KeyError, error: if (ValidityCheck): raise KeyError, error else: v = sorted(VidDict[t].keys())[0] self.__mtg_to_tree_tid[v] = t self.__tree_to_mtg_tid[t] = v
def TimeEvents(*args, **kargs): """TimeEvents Construction of data of type {time interval between two observation dates, number of events occurring between these two observation dates} from time sequences, from an object of type HISTOGRAM or from an ASCII file. :Usage: .. doctest:: :options: +SKIP >>> TimeEvents(seq1, begin_date, end_date, PreviousDate=3, NextDate=8) >>> TimeEvents(seqn, variable, begin_date, end_date, PreviousDate=3,\ NextDate=8) >>> TimeEvents(histo, time) >>> TimeEvents(file_name) >>> h = Histogram([1,1,1,2,2,2]) >>> t = TimeEvents(h, 2) :Arguments: * seq1 (sequences): univariate time sequences (with an explicit index parameter of type TIME), * seqn (sequences): multivariate time sequences (with an explicit index parameter of type TIME), * variable (int): variable index, * begin_date (int): initial observation date, * end_date (int): final observation date, * histo (histogram, mixture_data, convolution_data, compound_data): number of events frequency distribution, * time (int): time interval between two observation dates (length of the observation period), * file_name (string). :Optional Arguments: * PreviousDate (int): date preceding the initial observation date to check the increasing character of the number of events. This optional argument can only be used if the first mandatory argument is of type sequences. * NextDate (int): date following the final observation date to check the increasing character of the number of events. This optional argument can only be used if the first mandatory argument is of type sequences. :Returned Object: If the construction succeeds, an object of type time_events is returned, otherwise no object is returned. .. seealso:: :func:`Save`, :func:`~openalea.stat_tool.data_transform.ExtractHistogram`, :func:`~openalea.stat_tool.data_transform.Merge`, :func:`~openalea.sequence_analysis.time_events.NbEventSelect`, :func:`~openalea.sequence_analysis.data_transform.TimeScaling`, :func:`~openalea.sequence_analysis.data_transform.TimeSelect`. .. todo:: fix the build_time_events method to allows constructor with histogram issue: this method is in stat_tool and returns a time events so stat_tool requires to know sequence_analysis... """ PreviousDate = kargs.get("PreviousDate", -1) NextDate = kargs.get("NextDate", -1) if len(args) == 1 and isinstance(args[0], str): filename = args[0] if os.path.isfile(filename): time_events = _TimeEvents(filename) else: raise IOError("bad file name") elif isinstance(args[0], _Sequences): seq = args[0] nb_variable = seq.nb_variable if nb_variable != 1: variable = args[0] begin_date = args[1] end_date = args[2] else: variable = 1 begin_date = args[1] end_date = args[2] error.CheckType([variable, begin_date, end_date], [int, int, int]) time_events = seq.extract_time_events(variable, begin_date, end_date, PreviousDate, NextDate) else: # should work with Histogram, Mixture_data, Conv_data, comp_data error.CheckArgumentsLength(args, 2, 2) error.CheckType([args[0], args[1]], \ [[_DiscreteDistributionData, _DiscreteMixtureData,\ _ConvolutionData, _CompoundData], int]) distribution = args[0] time = args[1] time_events = _TimeEvents(distribution, time) return time_events