def test_mean(): with pytest.raises(ValueError): utils.mean([]) vals = [random.normalvariate(0, 1) for i in range(1000)] assert utils.mean(vals) - 0 < 0.1 assert utils.mean([1, 1, 1]) == 1
def probability(tree, instance, attr, val): """ Returns the probability of a particular value of an attribute in the instance. The instance should not contain the attribute, but if it does then a shallow copy is created that does not have the attribute. """ if attr in instance: instance = {a:instance[a] for a in instance if not a == attr} concept = tree.categorize(instance) if isinstance(val, dict): structure_mapper = StructureMapper(concept) temp_instance = structure_mapper.transform(instance) mapping = structure_mapper.get_mapping() #temp_instance = flatten_json(instance) #mapping = flat_match(concept, temp_instance) #temp_instance = rename_flat(temp_instance, mapping) probs = [concept.get_probability(sub_attr, temp_instance[sub_attr]) for sub_attr in temp_instance if search('^' + mapping[attr], sub_attr)] return mean(probs) else: return concept.get_probability(attr, val)
def probability(tree, instance, attr, val): """ Returns the probability of a particular value of an attribute in the instance. One of the scoring functions for incremental_evaluation. If the instance currently contains the target attribute a shallow copy is created to allow the attribute to be predicted. .. warning:: This is an older function in the library and we are not quite sure how to set it up for component values under the new representation and so for the time being it will raise an Exception if it encounts a component. :param tree: A category tree to evaluate. :type tree: :class:`CobwebTree <concept_formation.cobweb.CobwebTree>`, :class:`Cobweb3Tree <concept_formation.cobweb3.Cobweb3Tree>`, or :class:`TrestleTree <concept_formation.trestle.TrestleTree>` :param instance: An instance to use query the tree with :type instance: {a1:v1, a2:v2, ...} :param attr: A target instance attribute to evaluate probability on :type attr: :ref:`Attribute<attributes>` :param val: The target value of the given attr :type val: A :ref:`Nominal<val-nom>` or :ref:`Numeric<val-num>` value. :returns: The probabily of the given instance attribute value in the given tree :rtype: float """ if attr in instance: instance = {a: instance[a] for a in instance if not a == attr} concept = tree.categorize(instance) if isinstance(val, dict): raise Exception( "Probability cannot be estimated on component attributes!") structure_mapper = StructureMapper(concept) temp_instance = structure_mapper.transform(instance) mapping = structure_mapper.get_mapping() # temp_instance = flatten_json(instance) # mapping = flat_match(concept, temp_instance) # temp_instance = rename_flat(temp_instance, mapping) probs = [ concept.probability(sub_attr, temp_instance[sub_attr]) for sub_attr in temp_instance if search(r'^' + mapping[attr], sub_attr) ] return mean(probs) else: return concept.probability(attr, val)
def lowess(x, y, f=1./3., iter=3, confidence=0.95): """ Performs Lowess smoothing Code adapted from: https://gist.github.com/agramfort/850437 lowess(x, y, f=2./3., iter=3) -> yest Lowess smoother: Robust locally weighted regression. The lowess function fits a nonparametric regression curve to a scatterplot. The arrays x and y contain an equal number of elements; each pair (x[i], y[i]) defines a data point in the scatterplot. The function returns the estimated (smooth) values of y. The smoothing span is given by f. A larger value for f will result in a smoother curve. The number of robustifying iterations is given by iter. The function will run faster with a smaller number of iterations. .. todo:: double check that the confidence bounds are correct """ n = len(x) r = int(np.ceil(f*n)) h = [np.sort(np.abs(x - x[i]))[r] for i in range(n)] w = np.clip(np.abs((x[:, None] - x[None, :]) / h), 0.0, 1.0) w = (1 - w**3)**3 yest = np.zeros(n) delta = np.ones(n) for iteration in range(iter): for i in range(n): weights = delta * w[:, i] b = np.array([np.sum(weights*y), np.sum(weights*y*x)]) A = np.array([[np.sum(weights), np.sum(weights*x)], [np.sum(weights*x), np.sum(weights*x*x)]]) beta = linalg.solve(A, b) yest[i] = beta[0] + beta[1]*x[i] residuals = y - yest s = np.median(np.abs(residuals)) delta = np.clip(residuals / (6.0 * s), -1, 1) delta = (1 - delta**2)**2 h = np.zeros(n) for x_idx, x_val in enumerate(x): r2 = np.array([v*v for i, v in enumerate(residuals) if x[i] == x_val]) n = len(r2) se = sqrt(mean(r2)) / sqrt(len(r2)) h[x_idx] = se * t._ppf((1+confidence)/2., n-1) return yest, yest-h, yest+h
def probability(tree, instance, attr, val): """ Returns the probability of a particular value of an attribute in the instance. One of the scoring functions for incremental_evaluation. If the instance currently contains the target attribute a shallow copy is created to allow the attribute to be predicted. .. warning:: This is an older function in the library and we are not quite sure how to set it up for component values under the new representation and so for the time being it will raise an Exception if it encounts a component. :param tree: A category tree to evaluate. :type tree: :class:`CobwebTree <concept_formation.cobweb.CobwebTree>`, :class:`Cobweb3Tree <concept_formation.cobweb3.Cobweb3Tree>`, or :class:`TrestleTree <concept_formation.trestle.TrestleTree>` :param instance: An instance to use query the tree with :type instance: {a1:v1, a2:v2, ...} :param attr: A target instance attribute to evaluate probability on :type attr: :ref:`Attribute<attributes>` :param val: The target value of the given attr :type val: A :ref:`Nominal<val-nom>` or :ref:`Numeric<val-num>` value. :returns: The probabily of the given instance attribute value in the given tree :rtype: float """ if attr in instance: instance = {a: instance[a] for a in instance if not a == attr} concept = tree.categorize(instance) if isinstance(val, dict): raise Exception( "Probability cannot be estimated on component attributes!") structure_mapper = StructureMapper(concept) temp_instance = structure_mapper.transform(instance) mapping = structure_mapper.get_mapping() # temp_instance = flatten_json(instance) # mapping = flat_match(concept, temp_instance) # temp_instance = rename_flat(temp_instance, mapping) probs = [concept.probability(sub_attr, temp_instance[sub_attr]) for sub_attr in temp_instance if search(r'^' + mapping[attr], sub_attr)] return mean(probs) else: return concept.probability(attr, val)
def test_cv_mean(): for i in range(10): values = [random.normalvariate(0, 1) for i in range(100)] cv = ContinuousValue() cv.update_batch(values) assert cv.mean - utils.mean(values) < 0.00000000001
def test_cv_mean(self): for i in range(10): values = [random.normalvariate(0, 1) for i in range(100)] cv = ContinuousValue() cv.update_batch(values) assert cv.mean - utils.mean(values) < 0.00000000001