Example #1
0
 def test_standard_deviation(self):
     self.assertEqual(calculate.standard_deviation([2, 3, 3, 4]),
                      0.70710678118654757)
     self.assertEqual(calculate.standard_deviation([-2, 3, 3, 40]),
                      16.867127793432999)
     self.assertRaises(ValueError, calculate.standard_deviation,
                       ['a', 2, 3, 3, 4])
Example #2
0
 def test_standard_deviation(self):
     self.assertEqual(
         calculate.standard_deviation([2, 3, 3, 4]),
         0.70710678118654757
     )
     self.assertEqual(
         calculate.standard_deviation([-2, 3, 3, 40]),
         16.867127793432999
     )
     self.assertRaises(
         ValueError,
         calculate.standard_deviation,
         ['a', 2, 3, 3, 4]
     )
def summary_stats(data_list):
    """
    Accepts a sample of numbers and returns a pretty
    print out of a variety of descriptive statistics.
    """
    mean = calculate.mean(data_list)
    median = calculate.median(data_list)
    mode = calculate.mode(data_list)
    n = len(data_list)
    max_ = max(data_list)
    min_ = min(data_list)
    range_ = calculate.range(data_list)
    standard_deviation = calculate.standard_deviation(data_list)
    
    print """
Summary statistics
==================

n:        %s
max:        %s
min:        %s
range:        %s
mean:        %s
median:        %s
mode:        %s
std:        %s
""" % (n, max_, min_, range_, mean, median, mode, standard_deviation)
def variation_coefficient(data_list):
    """
    Accepts a list of values and returns the variation coefficient,
    which is a normalized measure of the distribution.

    This is the sort of thing you can use to compare the standard deviation
    of sets that are measured in different units.

    Note that it uses our "population" standard deviation as part of the
    calculation, not a "sample standard deviation.

    h3. Example usage

        >>> import calculate
        >>> calculate.variation_coefficient([1, 2, -2, 4, -3])
        6.442049363362563

    h3. Documentation

        * "coefficient of variation":http://en.wikipedia.org/wiki/\
Coefficient_of_variation
    """
    # Convert all the values to floats and test to make sure
    # there aren't any strings in there
    try:
        data_list = list(map(float, data_list))
    except ValueError:
        raise ValueError('Input values must contain numbers')
    std = calculate.standard_deviation(data_list)
    mean = calculate.mean(data_list)
    return std / mean
Example #5
0
    def has_sound_spike(self):
        """
        Find the standard deviation of the past 10 minutes.
        Send out a tweet if there are any signals greater than two standard deviations
        in the past 10 seconds.
        """
        ten_minutes = timezone.localtime(
            timezone.now()) - datetime.timedelta(minutes=10)
        ten_seconds = timezone.localtime(
            timezone.now()) - datetime.timedelta(seconds=10)
        signals_past_ten_min = self.signal_set.filter(
            timestamp__lt=timezone.localtime(timezone.now()),
            timestamp__gte=ten_minutes)
        if signals_past_ten_min.count > 0:
            voltages = list(
                signals_past_ten_min.values_list(
                    'voltage', flat=True).order_by('voltage'))
            avg = calculate.mean(voltages)
            std_dev = calculate.standard_deviation(voltages)
            twice_std_dev = (std_dev * 2) + avg
            signals_past_10_secs = signals_past_ten_min.filter(
                timestamp__gte=ten_seconds, voltage__gte=twice_std_dev)

            # return the voltage of the highest signal if there has been a spike
            # Or return False
            if signals_past_10_secs.count() > 0:
                signals_past_10_secs = list(
                    signals_past_10_secs.values_list(
                        'voltage', flat=True).order_by('-voltage'))
                return signals_past_10_secs[0]
            else:
                return False
        else:
            return False
Example #6
0
def summary_stats(data_list):
    """
    Accepts a sample of numbers and returns a pretty
    print out of a variety of descriptive statistics.
    """
    mean = calculate.mean(data_list)
    median = calculate.median(data_list)
    mode = calculate.mode(data_list)
    n = len(data_list)
    max_ = max(data_list)
    min_ = min(data_list)
    range_ = calculate.range(data_list)
    standard_deviation = calculate.standard_deviation(data_list)
    variation_coefficient = calculate.variation_coefficient(data_list)

    table = ptable.indent(
        [
            ['Statistic', 'Value'],
            ['n', str(n)],
            ['mean', str(mean)],
            ['median', str(median)],
            ['mode', str(mode)],
            ['maximum', str(max_)],
            ['minimum', str(min_)],
            ['range', str(range_)],
            ['standard deviation', str(standard_deviation)],
            ['variation coefficient', str(variation_coefficient)],
        ],
        hasHeader=True,
        separateRows=False,
        prefix="| ", postfix=" |",
    )
    print(table)
    def has_sound_spike(self):
        """
        Find the standard deviation of the past 10 minutes.
        Send out a tweet if there are any signals greater than two standard deviations
        in the past 10 seconds.
        """
        ten_minutes = timezone.localtime(timezone.now()) - datetime.timedelta(minutes=10)
        ten_seconds = timezone.localtime(timezone.now()) - datetime.timedelta(seconds=10)
        signals_past_ten_min = self.signal_set.filter(
            timestamp__lt=timezone.localtime(timezone.now()), timestamp__gte=ten_minutes
        )
        if signals_past_ten_min.count > 0:
            voltages = list(signals_past_ten_min.values_list("voltage", flat=True).order_by("voltage"))
            avg = calculate.mean(voltages)
            std_dev = calculate.standard_deviation(voltages)
            twice_std_dev = (std_dev * 2) + avg
            signals_past_10_secs = signals_past_ten_min.filter(timestamp__gte=ten_seconds, voltage__gte=twice_std_dev)

            # return the voltage of the highest signal if there has been a spike
            # Or return False
            if signals_past_10_secs.count() > 0:
                signals_past_10_secs = list(signals_past_10_secs.values_list("voltage", flat=True).order_by("-voltage"))
                return signals_past_10_secs[0]
            else:
                return False
        else:
            return False
def standard_deviation_distance(obj_list, point_attribute_name='point'):
    """
    Accepts a geoqueryset, list of objects or list of dictionaries, expected
    to contain objects with Point properties, and returns a float with the
    standard deviation distance of the provided points.

    The standard deviation distance is the average variation in the distance
    of points from the mean center.

    Unlike a standard deviation ellipse, it does not have a direction.

    By default, the function expects the Point field on your model to be
    called 'point'.

    If the point field is called something else, change the kwarg
    'point_attribute_name' to whatever your field might be called.

    h3. Example usage

        >> import calculate
        >> calculate.standard_deviation_distance(qs)
        0.046301584704149731

    h3. Dependencies

        * "django":http://www.djangoproject.com/
        * "geodjango":http://www.geodjango.org/

    h3. Documentation

        * "standard deviation distance":http://www.spatialanalysisonline.com/\
output/html/Directionalanalysisofpointdatasets.html
    """
    # Figure out what type of objects we're dealing with
    if isinstance(obj_list[0], type({})):
        def getkey(obj, key):
            return obj.get(key)
        gettr = getkey
    else:
        gettr = getattr
    mean = calculate.mean_center(
        obj_list,
        point_attribute_name=point_attribute_name
    )
    distances = [
        gettr(p, point_attribute_name).distance(mean)
        for p in obj_list
    ]
    return calculate.standard_deviation(distances)
Example #9
0
def standard_deviation_distance(obj_list, point_attribute_name='point'):
    """
    Accepts a geoqueryset, list of objects or list of dictionaries, expected
    to contain objects with Point properties, and returns a float with the
    standard deviation distance of the provided points.

    The standard deviation distance is the average variation in the distance
    of points from the mean center.

    Unlike a standard deviation ellipse, it does not have a direction.

    By default, the function expects the Point field on your model to be
    called 'point'.

    If the point field is called something else, change the kwarg
    'point_attribute_name' to whatever your field might be called.

    h3. Example usage

        >> import calculate
        >> calculate.standard_deviation_distance(qs)
        0.046301584704149731

    h3. Dependencies

        * "django":http://www.djangoproject.com/
        * "geodjango":http://www.geodjango.org/

    h3. Documentation

        * "standard deviation distance":http://www.spatialanalysisonline.com/\
output/html/Directionalanalysisofpointdatasets.html
    """
    # Figure out what type of objects we're dealing with
    if isinstance(obj_list[0], type({})):

        def getkey(obj, key):
            return obj.get(key)

        gettr = getkey
    else:
        gettr = getattr
    mean = calculate.mean_center(obj_list,
                                 point_attribute_name=point_attribute_name)
    distances = [
        gettr(p, point_attribute_name).distance(mean) for p in obj_list
    ]
    return calculate.standard_deviation(distances)
def standard_deviation_distance(geoqueryset, point_attribute_name='point'):
    """
	Accepts a geoqueryset, expected to contain objects with Point properties,
	and returns a float with the standard deviation distance of the provided points. 
	
	The standard deviation distance is the average variation in the distance of points 
	from the mean center. 
	
	Unlike a standard deviation ellipse, it does not have a direction.
	
	By default, the function expects the Point field on your model to be called 'point'.
	
	If the point field is called something else, change the kwarg 'point_attribute_name'
	to whatever your field might be called.
	
	h3. Example usage
	
		>> import calculate
		>> calculate.standard_deviation_distance(qs)
		0.046301584704149731
		
	h3. Dependencies
	
		* "django":http://www.djangoproject.com/
		* "geodjango":http://www.geodjango.org/
		* "numpy":http://numpy.scipy.org/
		
	h3. Documentation
	
		* "standard deviation distance":http://www.spatialanalysisonline.com/output/html/Directionalanalysisofpointdatasets.html
	
	"""
    if not isinstance(geoqueryset, GeoQuerySet):
        raise TypeError(
            'First parameter must be a Django GeoQuerySet. You submitted a %s object'
            % type(geoqueryset))
    mean = calculate.mean_center(geoqueryset,
                                 point_attribute_name=point_attribute_name)
    distances = [
        getattr(p, point_attribute_name).distance(mean) for p in geoqueryset
    ]
    return calculate.standard_deviation(distances)
def standard_deviation_distance(geoqueryset, point_attribute_name='point'):
	"""
	Accepts a geoqueryset, expected to contain objects with Point properties,
	and returns a float with the standard deviation distance of the provided points. 
	
	The standard deviation distance is the average variation in the distance of points 
	from the mean center. 
	
	Unlike a standard deviation ellipse, it does not have a direction.
	
	By default, the function expects the Point field on your model to be called 'point'.
	
	If the point field is called something else, change the kwarg 'point_attribute_name'
	to whatever your field might be called.
	
	h3. Example usage
	
		>> import calculate
		>> calculate.standard_deviation_distance(qs)
		0.046301584704149731
		
	h3. Dependencies
	
		* "django":http://www.djangoproject.com/
		* "geodjango":http://www.geodjango.org/
		* "numpy":http://numpy.scipy.org/
		
	h3. Documentation
	
		* "standard deviation distance":http://www.spatialanalysisonline.com/output/html/Directionalanalysisofpointdatasets.html
	
	"""
	if not isinstance(geoqueryset, GeoQuerySet):
		raise TypeError('First parameter must be a Django GeoQuerySet. You submitted a %s object' % type(geoqueryset))
	mean = calculate.mean_center(geoqueryset, point_attribute_name=point_attribute_name)
	distances = [getattr(p, point_attribute_name).distance(mean) for p in geoqueryset]
	return calculate.standard_deviation(distances)