Esempio n. 1
0
def power_law_fit(data, xmin=None, method="auto", return_alpha_only=False):
    """Fitting a power-law distribution to empirical data

    @param data: the data to fit, a list containing integer values
    @param xmin: the lower bound for fitting the power-law. If C{None},
      the optimal xmin value will be estimated as well. Zero means that
      the smallest possible xmin value will be used.
    @param method: the fitting method to use. The following methods are
      implemented so far:

        - C{continuous}, C{hill}: exact maximum likelihood estimation
          when the input data comes from a continuous scale. This is
          known as the Hill estimator. The statistical error of
          this estimator is M{(alpha-1) / sqrt(n)}, where alpha is the
          estimated exponent and M{n} is the number of data points above
          M{xmin}. The estimator is known to exhibit a small finite
          sample-size bias of order M{O(n^-1)}, which is small when
          M{n > 100}. igraph will try to compensate for the finite sample
          size if n is small.

        - C{discrete}: exact maximum likelihood estimation when the
          input comes from a discrete scale (see Clauset et al among the
          references).

        - C{auto}: exact maximum likelihood estimation where the continuous
          method is used if the input vector contains at least one fractional
          value and the discrete method is used if the input vector contains
          integers only.

    @return: a L{FittedPowerLaw} object. The fitted C{xmin} value and the
      power-law exponent can be queried from the C{xmin} and C{alpha}
      properties of the returned object.

    @newfield ref: Reference
    @ref: MEJ Newman: Power laws, Pareto distributions and Zipf's law.
      Contemporary Physics 46, 323-351 (2005)
    @ref: A Clauset, CR Shalizi, MEJ Newman: Power-law distributions
      in empirical data. E-print (2007). arXiv:0706.1062"""
    from igraph._igraph import _power_law_fit

    if xmin is None or xmin < 0:
        xmin = -1

    method = method.lower()
    if method not in ("continuous", "hill", "discrete", "auto"):
        raise ValueError("unknown method: %s" % method)

    force_continuous = method in ("continuous", "hill")
    fit = FittedPowerLaw(*_power_law_fit(data, xmin, force_continuous))
    if return_alpha_only:
        from igraph import deprecated

        deprecated(
            "The return_alpha_only keyword argument of power_law_fit is "
            "deprecated from igraph 0.7 and will be removed in igraph 0.8"
        )
        return fit.alpha
    else:
        return fit
def power_law_fit(data, xmin=None, method="auto", return_alpha_only=False):
    """Fitting a power-law distribution to empirical data

    @param data: the data to fit, a list containing integer values
    @param xmin: the lower bound for fitting the power-law. If C{None},
      the optimal xmin value will be estimated as well. Zero means that
      the smallest possible xmin value will be used.
    @param method: the fitting method to use. The following methods are
      implemented so far:

        - C{continuous}, C{hill}: exact maximum likelihood estimation
          when the input data comes from a continuous scale. This is
          known as the Hill estimator. The statistical error of
          this estimator is M{(alpha-1) / sqrt(n)}, where alpha is the
          estimated exponent and M{n} is the number of data points above
          M{xmin}. The estimator is known to exhibit a small finite
          sample-size bias of order M{O(n^-1)}, which is small when
          M{n > 100}. igraph will try to compensate for the finite sample
          size if n is small.

        - C{discrete}: exact maximum likelihood estimation when the
          input comes from a discrete scale (see Clauset et al among the
          references).

        - C{auto}: exact maximum likelihood estimation where the continuous
          method is used if the input vector contains at least one fractional
          value and the discrete method is used if the input vector contains
          integers only.

    @return: a L{FittedPowerLaw} object. The fitted C{xmin} value and the
      power-law exponent can be queried from the C{xmin} and C{alpha}
      properties of the returned object.
    
    @newfield ref: Reference
    @ref: MEJ Newman: Power laws, Pareto distributions and Zipf's law.
      Contemporary Physics 46, 323-351 (2005)
    @ref: A Clauset, CR Shalizi, MEJ Newman: Power-law distributions
      in empirical data. E-print (2007). arXiv:0706.1062"""
    from igraph._igraph import _power_law_fit

    if xmin is None or xmin < 0:
        xmin = -1

    method = method.lower()
    if method not in ("continuous", "hill", "discrete", "auto"):
        raise ValueError("unknown method: %s" % method)

    force_continuous = method in ("continuous", "hill")
    fit = FittedPowerLaw(*_power_law_fit(data, xmin, force_continuous))
    if return_alpha_only:
        from igraph import deprecated
        deprecated("The return_alpha_only keyword argument of power_law_fit is "\
                "deprecated from igraph 0.7 and will be removed in igraph 0.8")
        return fit.alpha
    else:
        return fit
Esempio n. 3
0
def power_law_fit(data, xmin=None, method="auto", return_alpha_only=True):
    """Fitting a power-law distribution to empirical data

    @param data: the data to fit, a list containing integer values
    @param xmin: the lower bound for fitting the power-law. If C{None},
      the optimal xmin value will be estimated as well. Zero means that
      the smallest possible xmin value will be used.
    @param method: the fitting method to use. The following methods are
      implemented so far:

        - C{continuous}, C{hill}: exact maximum likelihood estimation
          when the input data comes from a continuous scale. This is
          known as the Hill estimator. The statistical error of
          this estimator is M{(alpha-1) / sqrt(n)}, where alpha is the
          estimated exponent and M{n} is the number of data points above
          M{xmin}. The estimator is known to exhibit a small finite
          sample-size bias of order M{O(n^-1)}, which is small when
          M{n > 100}. igraph will try to compensate for the finite sample
          size if n is small.

        - C{discrete}: exact maximum likelihood estimation when the
          input comes from a discrete scale (see Clauset et al among the
          references).

        - C{auto}: exact maximum likelihood estimation where the continuous
          method is used if the input vector contains at least one fractional
          value and the discrete method is used if the input vector contains
          integers only.

    @param return_alpha_only: whether to return the fitted exponent only.
      When this argument is C{True}, the function will return the fitted power-law
      exponent only. When C{False}, the function will return a L{FittedPowerLaw}
      object with much more details. The default value is C{True} for the time
      being for sake of compatibility with earlier releases, but it will be changed
      to C{False} from igraph 0.7 onwards.

    @return: the fitted exponent or a L{FittedPowerLaw} object, depending on the
      value of C{return_alpha_only}.
    
    @newfield ref: Reference
    @ref: MEJ Newman: Power laws, Pareto distributions and Zipf's law.
      Contemporary Physics 46, 323-351 (2005)
    @ref: A Clauset, CR Shalizi, MEJ Newman: Power-law distributions
      in empirical data. E-print (2007). arXiv:0706.1062"""
    from igraph._igraph import _power_law_fit

    if xmin is None or xmin < 0:
        xmin = -1

    method = method.lower()
    if method not in ("continuous", "hill", "discrete", "auto"):
        raise ValueError("unknown method: %s" % method)

    force_continuous = method in ("continuous", "hill")
    fit = FittedPowerLaw(*_power_law_fit(data, xmin, force_continuous))
    if return_alpha_only:
        from warnings import warn
        warn("power_law_fit will return a FittedPowerLaw object from igraph "\
                "0.7 onwards. Better prepare for that by setting return_alpha_only "\
                "to False when calling power_law_fit()", PendingDeprecationWarning,
                stacklevel=3)
        return fit.alpha
    else:
        return fit