def test_moments_poisson_arrival(self): source = PoissonProcess(3.0) self.assertIsInstance(stats.moment(source, 1), np.ndarray) self.assertIsInstance(stats.moment(source, 2), np.ndarray) assert_almost_equal(stats.moment(source, 1), [1 / 3], 10) assert_almost_equal(stats.moment(source, 2), [1 / 3, 2 / 9], 10)
def test_moments_map_arrival(self): source = MAP.exponential(3.0) self.assertIsInstance(stats.moment(source, 1), np.ndarray) self.assertIsInstance(stats.moment(source, 2), np.ndarray) assert_almost_equal(stats.moment(source, 1), [1 / 3], 10) assert_almost_equal(stats.moment(source, 2), [1 / 3, 2 / 9], 10)
def test_moments_exp_distribution(self): source = Exp(3.0) self.assertIsInstance(stats.moment(source, 1), np.ndarray) self.assertIsInstance(stats.moment(source, 2), np.ndarray) assert_almost_equal(stats.moment(source, 1), [1 / 3], 10) assert_almost_equal(stats.moment(source, 2), [1 / 3, 2 / 9], 10)
def test_moments_ph_distribution(self): source = PhaseType.exponential(3.0) self.assertIsInstance(stats.moment(source, 1), np.ndarray) self.assertIsInstance(stats.moment(source, 2), np.ndarray) assert_almost_equal(stats.moment(source, 1), [1 / 3], 10) assert_almost_equal(stats.moment(source, 3), [1 / 3, 2 / 9, 2 / 9], 10)
def test_moments_const_samples(self): samples_1 = [1.0] * 10 samples_2 = [2.0] * 10 self.assertIsInstance(stats.moment(samples_1, 1), np.ndarray) self.assertIsInstance(stats.moment(samples_1, 2), np.ndarray) self.assertIsInstance(stats.moment(samples_2, 1), np.ndarray) self.assertIsInstance(stats.moment(samples_2, 2), np.ndarray) assert_almost_equal(stats.moment(samples_1, 1), [1.0]) assert_almost_equal(stats.moment(samples_1, 2), [1, 1]) assert_almost_equal(stats.moment(samples_2, 1), [2.0]) assert_almost_equal(stats.moment(samples_2, 2), [2, 4])
def test_moments_exp_samples(self): samples = np.random.exponential(1 / 5.0, 20000) self.assertIsInstance(stats.moment(samples, 1), np.ndarray) assert_almost_equal(stats.moment(samples, 1), [1 / 5], 2) assert_almost_equal(stats.moment(samples, 3), [0.2, 0.08, 6 / 125], 2)
def fit_map_horvath(ph, lags): """Find D1 matrix using a D0 as subgenerator of the given PH and lags. Args: ph (`pyqunet.distributions.PH`): a PH distribution approximating D0 lags (array-like): a list of lag-k auto-correlation coefficients """ if len(lags) > 1: return fit_map_horvath(ph, [lags[0]]) N = ph.order D0 = ph.subgenerator En = np.ones((N, 1)) pi = ph.init_probs num_lags = len(lags) if num_lags == 0: D1_row_sum = (-D0).dot(En).reshape(N) D1 = np.asarray([D1_row_sum[i] * pi for i in range(N)]) return MAP(D0, D1) ph_moments = stats.moment(ph, 2) ph_moments, mu = stats.normalize_moments(ph_moments) D0ni = np.linalg.inv(-D0) / mu D0ni2 = D0ni.dot(D0ni) rate = ph.param * mu lag1 = lags[0] d = (-D0 * mu).dot(En).reshape((N, 1)) gamma = pi.dot(D0ni).reshape((1, N)) block_gamma = cbdiag(N, [(0, gamma)]) block_eye = np.hstack([np.eye(N)] * N) A = np.vstack([block_eye, block_gamma]) b = np.vstack([d, pi.reshape((N, 1))]) delta = pow(rate, 2) * pi.dot(D0ni2) f = D0ni.dot(En) # noinspection PyTypeChecker v = lag1 * (2 * pow(rate, 2) * pi.dot(D0ni2).dot(En).reshape(1)[0] - 1) + 1 c = np.hstack([f[i] * delta for i in range(N)]) if num_lags == 1: A = np.vstack([A, c]) b = np.vstack([b, [[v]]]).reshape(2 * N + 1) ret = scipy.optimize.lsq_linear(A, b, (0, np.inf), tol=1e-10, method='bvls') # noinspection PyUnresolvedReferences x = ret.x assert isinstance(x, np.ndarray) D1 = x.reshape((N, N)).transpose() / mu try: return MAP(D0, D1, rtol=1e-3, atol=1e-4) except ValueError: if np.abs(lags[0] < 1e-5): return fit_map_horvath(ph, []) else: return fit_map_horvath(ph, [lags[0] * 0.5]) else: def residual(xi, n, input_lags): d1i = xi.reshape(n, n).transpose() / mu m = MAP(D0, d1i, check=False) estimated_lags = [m.lag(i + 1) for i in range(len(input_lags))] system_diff = np.asarray(A.dot(xi) - b).flatten() * 1000 lags_diff = np.asarray(input_lags - estimated_lags) diff = np.hstack((system_diff, lags_diff)) return diff lags = np.asarray(lags) params = { 'fun': residual, 'x0': np.asarray([d[i] * pi for i in range(N)]).transpose().flatten(), 'bounds': (0, np.inf), 'kwargs': { 'input_lags': lags, 'n': N, }, } result = scipy.optimize.least_squares(**params) # noinspection PyUnresolvedReferences D1 = result.x.reshape(N, N).transpose() / mu return MAP(D0, D1, check=False)
def fit_map(source, order, method='opt', verbose=False, options=None, **kwargs): """Fit a MAP of a given order from a trace or from another arrival process (in the latter case, it must provide methods `moment(k)`, `lag(k)` and `generate(n)`). Two methods are supported: - non-linear optimization - independent fitting of D0 as PH using moments and D1 using lag-k - EM-procedure If `source` is an arrival process, e.g. another `pyqunet.arrivals.MAP`, then in the first case, the MAP will be reduced using analytically computed moments and lag-k correlations, while in the latter case a trace will be generated and the EM algorithm will be used. Behaviour in the second case depends on the selected PH-fitting method, see options. Args: source (array-like): a list of samples order: an order of PH to fit method: 'opt', 'indi' or 'em' verbose: if `True`, progress will be printed to the screen options (dict): provides additional method-related options: - x0: initial guess, default: `None` (OPT) - loss: loss function, see `scipy.optimize.least_squares` (OPT, INDI) - numMoments: number of moments to use for fitting, default: 3 (OPT, INDI) - numLags: number of lag-k to use for fitting, default: 2 (OPT, INDI) - maxIter: max number of iterations, default: 200 (GFIT, INDI) - stopCond: stop condition, default: 1e-7 (GFIT, INDI) - numSamples: number of samples to generate into a trace, default: 20'000 (GFIT and INDI when source is a distribution) - phFitMethod: 'opt' or 'gfit', default: 'opt' (INDI) Returns: Markovian arrival process, `pyqunet.arrivals.MAP` """ if options is None: options = {} if method == 'opt': x0 = options.get('x0', None) # initial guess loss = options.get('loss', None) # 'cauchy', ... num_moments = options.get('numMoments', 3) num_lags = options.get('numLags', 2) moments = stats.moment(source, num_moments) lags = stats.lag(source, num_lags) return fit_map_nonlinear_opt(moments, lags, order, x0, loss) elif method == 'em': max_iter = options.get('maxIter', 200) stop_cond = options.get('stopCond', 1e-7) num_samples = options.get('numSamples', 20000) if hasattr(source, 'generate'): trace = list(source.generate(num_samples)) else: trace = list(source) d0, d1 = MAPFromTrace(trace, order, max_iter, stop_cond, initial=None, retlogli=False, verbose=verbose) return MAP(d0, d1) elif method == 'indi': ph_fit_method = options.get('phFitMethod', 'opt') num_lags = options.get('numLags', kwargs.get('numLags', 2)) lags = stats.lag(source, num_lags) ph = fit_ph(source, order, ph_fit_method, verbose, options) return fit_map_horvath(ph, lags) elif method == 'opt-cdf': x0 = options.get('x0', None) # initial guess num_components = options.get('numComponents', 3) weights = options.get('weights', None) return fit_map_cdf(source, num_components, weights, order, x0=x0) else: raise ValueError( "method '{}' not supported, use 'opt', 'gfit'".format(method))
def fit_ph(source, order, method='opt', verbose=False, options=None): """Fit a PH distribution of a given order from a trace or from another distribution (in the latter case, it must provide methods `moment(k)` and `generate(n)`). Two methods are supported: - non-linear optimization - G-Fit If `source` is a distribution (another `pyqunet.distributions.PhaseType`), then in the first case, the PH will be reduced using analytically computed moments, while in the latter case a trace will be generated and the EM algorithm will be used. Args: source (array-like): a list of samples order: an order of PH to fit method: 'opt' or 'gfit' verbose: if `True`, progress will be printed to the screen options (dict): provides additional method-related options: - x0: initial guess, default: `None` (OPT, GFIT) - loss: loss function, see `scipy.optimize.least_squares` (OPT) - numMoments: number of moments to use for fitting, default: 3 (OPT) - weights: sample weights vector, default: `None` (GFIT) - maxIter: max number of iterations, default: 200 (GFIT) - stopCond: stop condition, default: 1e-7 (GFIT) - numSamples: number of samples to generate into a trace, default: 20'000 (GFIT, used when source is a distribution) Returns: phase-type distribution, `pyqunet.distributions.PH` """ if options is None: options = {} if method == 'opt': x0 = options.get('x0', None) # initial guess loss = options.get('loss', None) # 'cauchy', ... maxn = options.get('numMoments', 3) # number of moments moments = stats.moment(source, maxn) return fit_ph_nonlinear_opt(moments, order, x0, loss) elif method == 'gfit': x0 = options.get('x0', None) weights = options.get('weights', None) max_iter = options.get('maxIter', 200) stop_cond = options.get('stopCond', 1e-7) num_samples = options.get('numSamples', 20000) if hasattr(source, 'generate'): trace = list(source.generate(num_samples)) else: trace = list(source) tau, s = PHFromTrace(trace, order, weights, max_iter, stop_cond, x0, result='vecmat', retlogli=False, verbose=verbose) return PhaseType(s, tau) else: raise ValueError( "method '{}' not supported, use 'opt', 'gfit'".format(method))