def test_standard_deviation(self): self.assertEqual(calculate.standard_deviation([2, 3, 3, 4]), 0.70710678118654757) self.assertEqual(calculate.standard_deviation([-2, 3, 3, 40]), 16.867127793432999) self.assertRaises(ValueError, calculate.standard_deviation, ['a', 2, 3, 3, 4])
def test_standard_deviation(self): self.assertEqual( calculate.standard_deviation([2, 3, 3, 4]), 0.70710678118654757 ) self.assertEqual( calculate.standard_deviation([-2, 3, 3, 40]), 16.867127793432999 ) self.assertRaises( ValueError, calculate.standard_deviation, ['a', 2, 3, 3, 4] )
def summary_stats(data_list): """ Accepts a sample of numbers and returns a pretty print out of a variety of descriptive statistics. """ mean = calculate.mean(data_list) median = calculate.median(data_list) mode = calculate.mode(data_list) n = len(data_list) max_ = max(data_list) min_ = min(data_list) range_ = calculate.range(data_list) standard_deviation = calculate.standard_deviation(data_list) print """ Summary statistics ================== n: %s max: %s min: %s range: %s mean: %s median: %s mode: %s std: %s """ % (n, max_, min_, range_, mean, median, mode, standard_deviation)
def variation_coefficient(data_list): """ Accepts a list of values and returns the variation coefficient, which is a normalized measure of the distribution. This is the sort of thing you can use to compare the standard deviation of sets that are measured in different units. Note that it uses our "population" standard deviation as part of the calculation, not a "sample standard deviation. h3. Example usage >>> import calculate >>> calculate.variation_coefficient([1, 2, -2, 4, -3]) 6.442049363362563 h3. Documentation * "coefficient of variation":http://en.wikipedia.org/wiki/\ Coefficient_of_variation """ # Convert all the values to floats and test to make sure # there aren't any strings in there try: data_list = list(map(float, data_list)) except ValueError: raise ValueError('Input values must contain numbers') std = calculate.standard_deviation(data_list) mean = calculate.mean(data_list) return std / mean
def has_sound_spike(self): """ Find the standard deviation of the past 10 minutes. Send out a tweet if there are any signals greater than two standard deviations in the past 10 seconds. """ ten_minutes = timezone.localtime( timezone.now()) - datetime.timedelta(minutes=10) ten_seconds = timezone.localtime( timezone.now()) - datetime.timedelta(seconds=10) signals_past_ten_min = self.signal_set.filter( timestamp__lt=timezone.localtime(timezone.now()), timestamp__gte=ten_minutes) if signals_past_ten_min.count > 0: voltages = list( signals_past_ten_min.values_list( 'voltage', flat=True).order_by('voltage')) avg = calculate.mean(voltages) std_dev = calculate.standard_deviation(voltages) twice_std_dev = (std_dev * 2) + avg signals_past_10_secs = signals_past_ten_min.filter( timestamp__gte=ten_seconds, voltage__gte=twice_std_dev) # return the voltage of the highest signal if there has been a spike # Or return False if signals_past_10_secs.count() > 0: signals_past_10_secs = list( signals_past_10_secs.values_list( 'voltage', flat=True).order_by('-voltage')) return signals_past_10_secs[0] else: return False else: return False
def summary_stats(data_list): """ Accepts a sample of numbers and returns a pretty print out of a variety of descriptive statistics. """ mean = calculate.mean(data_list) median = calculate.median(data_list) mode = calculate.mode(data_list) n = len(data_list) max_ = max(data_list) min_ = min(data_list) range_ = calculate.range(data_list) standard_deviation = calculate.standard_deviation(data_list) variation_coefficient = calculate.variation_coefficient(data_list) table = ptable.indent( [ ['Statistic', 'Value'], ['n', str(n)], ['mean', str(mean)], ['median', str(median)], ['mode', str(mode)], ['maximum', str(max_)], ['minimum', str(min_)], ['range', str(range_)], ['standard deviation', str(standard_deviation)], ['variation coefficient', str(variation_coefficient)], ], hasHeader=True, separateRows=False, prefix="| ", postfix=" |", ) print(table)
def has_sound_spike(self): """ Find the standard deviation of the past 10 minutes. Send out a tweet if there are any signals greater than two standard deviations in the past 10 seconds. """ ten_minutes = timezone.localtime(timezone.now()) - datetime.timedelta(minutes=10) ten_seconds = timezone.localtime(timezone.now()) - datetime.timedelta(seconds=10) signals_past_ten_min = self.signal_set.filter( timestamp__lt=timezone.localtime(timezone.now()), timestamp__gte=ten_minutes ) if signals_past_ten_min.count > 0: voltages = list(signals_past_ten_min.values_list("voltage", flat=True).order_by("voltage")) avg = calculate.mean(voltages) std_dev = calculate.standard_deviation(voltages) twice_std_dev = (std_dev * 2) + avg signals_past_10_secs = signals_past_ten_min.filter(timestamp__gte=ten_seconds, voltage__gte=twice_std_dev) # return the voltage of the highest signal if there has been a spike # Or return False if signals_past_10_secs.count() > 0: signals_past_10_secs = list(signals_past_10_secs.values_list("voltage", flat=True).order_by("-voltage")) return signals_past_10_secs[0] else: return False else: return False
def standard_deviation_distance(obj_list, point_attribute_name='point'): """ Accepts a geoqueryset, list of objects or list of dictionaries, expected to contain objects with Point properties, and returns a float with the standard deviation distance of the provided points. The standard deviation distance is the average variation in the distance of points from the mean center. Unlike a standard deviation ellipse, it does not have a direction. By default, the function expects the Point field on your model to be called 'point'. If the point field is called something else, change the kwarg 'point_attribute_name' to whatever your field might be called. h3. Example usage >> import calculate >> calculate.standard_deviation_distance(qs) 0.046301584704149731 h3. Dependencies * "django":http://www.djangoproject.com/ * "geodjango":http://www.geodjango.org/ h3. Documentation * "standard deviation distance":http://www.spatialanalysisonline.com/\ output/html/Directionalanalysisofpointdatasets.html """ # Figure out what type of objects we're dealing with if isinstance(obj_list[0], type({})): def getkey(obj, key): return obj.get(key) gettr = getkey else: gettr = getattr mean = calculate.mean_center( obj_list, point_attribute_name=point_attribute_name ) distances = [ gettr(p, point_attribute_name).distance(mean) for p in obj_list ] return calculate.standard_deviation(distances)
def standard_deviation_distance(obj_list, point_attribute_name='point'): """ Accepts a geoqueryset, list of objects or list of dictionaries, expected to contain objects with Point properties, and returns a float with the standard deviation distance of the provided points. The standard deviation distance is the average variation in the distance of points from the mean center. Unlike a standard deviation ellipse, it does not have a direction. By default, the function expects the Point field on your model to be called 'point'. If the point field is called something else, change the kwarg 'point_attribute_name' to whatever your field might be called. h3. Example usage >> import calculate >> calculate.standard_deviation_distance(qs) 0.046301584704149731 h3. Dependencies * "django":http://www.djangoproject.com/ * "geodjango":http://www.geodjango.org/ h3. Documentation * "standard deviation distance":http://www.spatialanalysisonline.com/\ output/html/Directionalanalysisofpointdatasets.html """ # Figure out what type of objects we're dealing with if isinstance(obj_list[0], type({})): def getkey(obj, key): return obj.get(key) gettr = getkey else: gettr = getattr mean = calculate.mean_center(obj_list, point_attribute_name=point_attribute_name) distances = [ gettr(p, point_attribute_name).distance(mean) for p in obj_list ] return calculate.standard_deviation(distances)
def standard_deviation_distance(geoqueryset, point_attribute_name='point'): """ Accepts a geoqueryset, expected to contain objects with Point properties, and returns a float with the standard deviation distance of the provided points. The standard deviation distance is the average variation in the distance of points from the mean center. Unlike a standard deviation ellipse, it does not have a direction. By default, the function expects the Point field on your model to be called 'point'. If the point field is called something else, change the kwarg 'point_attribute_name' to whatever your field might be called. h3. Example usage >> import calculate >> calculate.standard_deviation_distance(qs) 0.046301584704149731 h3. Dependencies * "django":http://www.djangoproject.com/ * "geodjango":http://www.geodjango.org/ * "numpy":http://numpy.scipy.org/ h3. Documentation * "standard deviation distance":http://www.spatialanalysisonline.com/output/html/Directionalanalysisofpointdatasets.html """ if not isinstance(geoqueryset, GeoQuerySet): raise TypeError( 'First parameter must be a Django GeoQuerySet. You submitted a %s object' % type(geoqueryset)) mean = calculate.mean_center(geoqueryset, point_attribute_name=point_attribute_name) distances = [ getattr(p, point_attribute_name).distance(mean) for p in geoqueryset ] return calculate.standard_deviation(distances)
def standard_deviation_distance(geoqueryset, point_attribute_name='point'): """ Accepts a geoqueryset, expected to contain objects with Point properties, and returns a float with the standard deviation distance of the provided points. The standard deviation distance is the average variation in the distance of points from the mean center. Unlike a standard deviation ellipse, it does not have a direction. By default, the function expects the Point field on your model to be called 'point'. If the point field is called something else, change the kwarg 'point_attribute_name' to whatever your field might be called. h3. Example usage >> import calculate >> calculate.standard_deviation_distance(qs) 0.046301584704149731 h3. Dependencies * "django":http://www.djangoproject.com/ * "geodjango":http://www.geodjango.org/ * "numpy":http://numpy.scipy.org/ h3. Documentation * "standard deviation distance":http://www.spatialanalysisonline.com/output/html/Directionalanalysisofpointdatasets.html """ if not isinstance(geoqueryset, GeoQuerySet): raise TypeError('First parameter must be a Django GeoQuerySet. You submitted a %s object' % type(geoqueryset)) mean = calculate.mean_center(geoqueryset, point_attribute_name=point_attribute_name) distances = [getattr(p, point_attribute_name).distance(mean) for p in geoqueryset] return calculate.standard_deviation(distances)