Example #1
0
def least_squares_fit(x, y):
    """given training values for x and y,
    find the least-squares values of alpha and beta"""
    beta = stats.correlation(x, y) * \
        stats.standard_deviation(y) / stats.standard_deviation(x)
    alpha = stats.mean(y) - beta * stats.mean(x)
    return alpha, beta
Example #2
0
def least_squares_fit(x, y):
    """given training values for x and y,
    find the least-squares values of alpha and beta"""
    beta = stats.correlation(x, y) * \
        stats.standard_deviation(y) / stats.standard_deviation(x)
    alpha = stats.mean(y) - beta * stats.mean(x)
    return alpha, beta
def least_squares_fit(xs: Vector, ys: Vector) -> Tuple[float, float]:
    """
    Given a dataset represented by xs and ys, return the alpha, beta that provide the least squared error fit for a
    function y_i = alpha * x_i + beta
    """
    alpha = correlation(xs,
                        ys) * standard_deviation(ys) / standard_deviation(xs)
    beta = mean(ys) - alpha * mean(xs)
    return alpha, beta
def scale(data_matrix):
    num_rows, num_cols = shape(data_matrix)
    means = [mean(get_column(data_matrix,j))
             for j in range(num_cols)]
    stdevs = [standard_deviation(get_column(data_matrix,j))
              for j in range(num_cols)]
    return means, stdevs
Example #5
0
    def test_standard_deviation(self):
        """Assume degree of freedom = 0"""
        l = [1, 2, 3, 4, 5, 5, 4, 3, 3, 1, 2]
        my_value = st.standard_deviation(l)
        numpy_value = np.std(l)

        self.assertEquals(my_value, numpy_value)
Example #6
0
def scale(data_matrix):
    #各列の平均と標準偏差を返す
    num_rows, num_cols = shape(data_matrix)
    means = [mean(get_column(data_matrix, j)) for j in range(num_cols)]
    stdevs = [
        standard_deviation(get_column(data_matrix, j)) for j in range(num_cols)
    ]
    return means, stdevs
Example #7
0
def _test_exectime_bounded_linear_growth(execTimeTS):
    '''
    Test that when the number of samples that DCGM collects is limited there is linear growth 
    in the total amount of time used to retrieve that each field.  
    '''
    tolerance = 0.60

    for fieldId, series in execTimeTS.fieldVals.items():
        tailStart = int(0.4 * len(series))
        tailLen = len(series) - tailStart

        # take a linear regression of the execution timeseries
        # if its corr. coeff. is not high (1.0 is highest)
        # OR
        # if its slope is much different from the actual start -> end slope
        # THEN something is wrong.

        # calc the lin. regr. slope
        # taken from https://en.wikipedia.org/wiki/Simple_linear_regression#Fitting_the_regression_line
        x = execTimeTS.timestamps[tailStart:]
        y = series[tailStart:]
        if y[-1] == 0:
            logger.info("Skipping fieldId %d with exec times of 0" % fieldId)
            continue

        #logger.info("x %s, y %s" % (str(x), str(y)))
        rxy = stats.correlation_coefficient(x, y)
        sx = stats.standard_deviation(x)
        sy = stats.standard_deviation(y)

        assert (rxy >= 0.90), (
            'execution time for field %s did not have a strong linear correlation. '
            % fieldId + 'Its correlation coefficient was %.4f' % rxy)
        logger.debug('corr. coeff. for field %s: %s' % (fieldId, rxy))

        linRegSlope = rxy * (sy / sx)
        slope = (y[-1] - y[0]) / float(x[-1] - x[0])

        minSlope = (1 - tolerance) * linRegSlope
        maxSlope = (1 + tolerance) * linRegSlope
        assert (minSlope <= slope <= maxSlope), (
            'execution time growth for field %s was not linear. ' % fieldId +
            'It had an overall slope of %s but the linear regression slope was %s. '
            % (slope, linRegSlope) + 'Tolerated min: %s, tolerated max: %s' %
            (minSlope, maxSlope))
Example #8
0
def scale(data_matrix):
    """returns the mean and standard deviations of each column"""
    num_rows, num_cols = algebra.shape(data_matrix)
    means = [stats.mean(algebra.get_column(data_matrix, j))
             for j in range(num_cols)]

    stddevs = [stats.standard_deviation(algebra.get_column(data_matrix, j))
               for j in range(num_cols)]
    return means, stddevs
Example #9
0
def scale(data_matrix):
    """returns the mean and standard deviations of each column"""
    num_rows, num_cols = algebra.shape(data_matrix)
    means = [
        stats.mean(algebra.get_column(data_matrix, j)) for j in range(num_cols)
    ]

    stddevs = [
        stats.standard_deviation(algebra.get_column(data_matrix, j))
        for j in range(num_cols)
    ]
    return means, stddevs
Example #10
0
def scale(data: List[Vector]) -> Tuple[Vector, Vector]:
    """
    Given a list of data points, return 
    1) a vector of their means across features and 
    2) a vector of their stddevs across features
    """
    assert data is not None and len(data) > 0
    num_features = len(data[0])
    means = vector_mean(data)
    # for each feature compute a standard deviation of the value at that features index for each vector
    # we could one-call this if we wrote a vector_stddev function
    stdevs = [standard_deviation([vector[i] for vector in data]) for i in range(num_features)]
    return means, stdevs
                    [200 + random.random() for _ in range(50)])

    print("bootstrap_statistic(close_to_100, median, 100):")
    print(bootstrap_statistic(close_to_100, median, 100))
    print("bootstrap_statistic(far_from_100, median, 100):")
    print(bootstrap_statistic(far_from_100, median, 100))
    print()

    random.seed(0) # so that you get the same results as me

    bootstrap_betas = bootstrap_statistic(list(zip(x, daily_minutes_good)),
                                          estimate_sample_beta,
                                          100)

    bootstrap_standard_errors = [
        standard_deviation([beta[i] for beta in bootstrap_betas])
        for i in range(4)]

    print("bootstrap standard errors", bootstrap_standard_errors)
    print()

    print("p_value(30.63, 1.174)", p_value(30.63, 1.174))
    print("p_value(0.972, 0.079)", p_value(0.972, 0.079))
    print("p_value(-1.868, 0.131)", p_value(-1.868, 0.131))
    print("p_value(0.911, 0.990)", p_value(0.911, 0.990))
    print()

    print("regularization")

    random.seed(0)
    for alpha in [0.0, 0.01, 0.1, 1, 10]:
def scale(data):
    num_rows, num_cols = shape(data)
    means = [mean(get_column(data, j)) for j in range(num_cols)]
    stdevs = [standard_deviation(get_column(data, j)) for j in range(num_cols)]
    return means, stdevs
Example #13
0
median = st.median(A)
print("A's median = ", median)

quantile = st.quantile(A, 0.2)
print("A's 20% quantile = ", quantile)

quantile = st.quantile(A, 0.9)
print("A's 90% quantile = ", quantile)

mode = st.mode(A)
print("A's mode = ", mode)

data_range = st.data_range(A)
print("A's range = ", data_range)

variance = st.variance(A)
print("A's variance = ", variance)

standard_deviation = st.standard_deviation(A)
print("A's standard deviation = ", standard_deviation)

interquartile_range = st.interquartile_range(A)
print("A's interquartile range of 25% ~ 75% = ", interquartile_range)

x = [-2, -1, 0, 1, 2]
y = [2, 1, 0, 1, 2]

correlation = st.correlation(x, y)
print("correlation = ", correlation)
Example #14
0
def scale(data_matrix):
	'''returns mean and sd of each column'''
	num_rows, num_cols = lin_alg.shape(data_matrix)
	means = [stats.mean(lin_alg.get_col(data_matrix, j)) for j in range(num_cols)]
	stdevs = [stats.standard_deviation(lin_alg.get_col(data_matrix, j)) for j in range(num_cols)]
	return means, stdevs
Example #15
0
import sys

# allows import of project files (idk how else to do this)
sys.path.insert(1, '..')
from utils.webassign import array_from_shitstring
from stats import mean, deviation_from_mean, variance, standard_deviation

youngs_mod = array_from_shitstring(" 116.6 115.8 114.9 115.3 115.6 ")
youngs_mod.sort()
print("Young's Mod: {}".format(youngs_mod))

sample_mean = mean(youngs_mod)
print("Young's Mod (mean): {}".format(sample_mean))

print("Deviation from mean:")
deviation_list = deviation_from_mean(youngs_mod)
for data_point, deviation in zip(youngs_mod, deviation_list):
    print("Sample: {0}, deviation from mean: {1}".format(
        data_point, deviation))

my_variance = variance(youngs_mod)
print("Sample variance: {}".format(my_variance))
print("Standard deviation: {}".format(standard_deviation(youngs_mod)))
Example #16
0
def register_sample (app, app_id, tag, day_of_the_week = None):

	user = User.objects.filter(app = app,
							   app_id = app_id).first()

	## Clear all the events of that tag
	## Then recreate them from the new data
	events = Event.objects.filter(user = user, tag = tag)
	if day_of_the_week is not None:
		events = events.filter(day_of_week = day_of_the_week)
	for event in events:
		event.is_active = False
		event.save()

	app = importlib.import_module(user.app + "." + user.app)
	event_times = getattr(app, "%s_times" % tag)(app_id, day_of_the_week = day_of_the_week)
	if len(event_times) < 2: 
		return
	if len(event_times[0]) < 2: 
		return

	pmf = stats.event_pmf(event_times, 1440)
	pmf_average = stats.average(pmf)

	if pmf_average < minimum_pmf_mean(tag):
		## All weak probabilities. Only outlier events.
		return

	pmf_variance = stats.variance(pmf, average = pmf_average)
	pmf_std = stats.standard_deviation(pmf, variance = pmf_variance)

	in_event = False
	event_start_minutes = []
	event_end_minutes = []
	event_probabilites = []
	for minute in range(0,1440):
		if pmf[minute] > pmf_average + pmf_variance:
			if in_event is False:
				event_start_minutes.append(minute)
				in_event = True
		else:
			if in_event is True:
				event_end_minutes.append(minute)
				in_event = False


	if len(event_start_minutes) > len(event_end_minutes): ## Assume the last event started at night and ends in the morning
		event_start_minutes[0] = event_start_minutes[len(event_start_minutes) - 1]
		del event_start_minutes[len(event_start_minutes) - 1]

	## If events are too close together, combined them.
	for index in range(0, len(event_end_minutes)):
		if index + 1 >= len(event_start_minutes):
			break

		event_end_time = event_end_minutes[index]
		next_event_start_time = event_start_minutes[index + 1]
		time_between_event = next_event_start_time - event_end_time
		if time_between_event < minimum_time_between_event(tag):
			del event_end_minutes[index]
			del event_start_minutes[index + 1]


	for index in range(0, len(event_start_minutes)):
		start_minute = event_start_minutes[index]
		end_minute = event_end_minutes[index]

		if start_minute < end_minute:
			event_probability_set = pmf[start_minute:end_minute]
		else:
			event_probability_set = pmf[start_minute:1439]
			event_probability_set.extend(pmf[0:end_minute])
		event_average_probablity = stats.average(event_probability_set)
		event_probability_variance = stats.variance(event_probability_set, average = event_average_probablity)

		fringe_start_time = start_minute - fringe_time_for_event(tag)
		if fringe_start_time < 0:
			fringe_start_time = 1440 + fringe_start_time

		fringe_end_time = end_minute + fringe_time_for_event(tag)
		if fringe_end_time > 1440:
			fringe_end_time = fringe_end_time - 1440

		if fringe_end_time > fringe_start_time:
			fringe_pmf = pmf[fringe_start_time:fringe_end_time]
		else:
			fringe_pmf = pmf[fringe_start_time:1439]
			fringe_pmf.extend(pmf[0:fringe_end_time])

		fringe_average_probability = stats.average(fringe_pmf)
		fringe_variance = stats.variance(fringe_pmf, average = fringe_average_probability)

		start_hour = float(start_minute)/60.0
		end_hour = float(end_minute)/60.0

		e = Event.objects.create(user = user,
								 tag = tag,
								 start_time = start_hour, 
								 end_time = end_hour,
								 day_of_week = day_of_the_week,
								 probability = event_average_probablity,
								 probability_variance = event_probability_variance,
								 fringe_probability = fringe_average_probability,
								 fringe_variance = fringe_variance)
		e.save()
Example #17
0
def test_stats_stdev():
    """Test for Standard Deviation"""
    assert stats.standard_deviation([2, 4, 6, 8, 10]) == 3.16
    assert stats.standard_deviation([1, 2, 3, 4, 5]) == 1.58
    assert stats.standard_deviation([5, 10, 15, 20, 25]) == 7.91
Example #18
0
import stats

my_list = [4, 1, 5, 7, 6, 8, 9, 10, 8, 3, 3, 8, 12]

my_mean = stats.mean(my_list)
print('The mean is: ' + str(my_mean))

my_median = stats.median(my_list)
print('The median is: ' + str(my_median))

my_range = stats.range(my_list)
print('The range is: ' + str(my_range))

my_std_dev = stats.standard_deviation(my_list)
print('The standard deviation is: ' + str(my_std_dev))
Example #19
0
"""
This file contains some default imports and commonly used functions so that you
don't have to write lots of code over and over for each problem.
"""

# CONFIG
PROJECT_ROOT = '..'  # relative location pointing to utils/ and stats.py

# REST OF FILE
import sys

# allows import of project files (idk how else to do this)
sys.path.insert(1, PROJECT_ROOT)
from utils.webassign import array_from_shitstring_floats
from stats import get_range, variance, standard_deviation

o2_consumption = array_from_shitstring_floats(
    '29.6	 49.4	 31.0	 28.4	 28.8	 25.4	 34.0	 29.8	 23.8	 30.1')
print("O2 Consumption: {}".format(o2_consumption))

print("Sample range: {}".format(get_range(o2_consumption)))
print("Sample variance: {}".format(variance(o2_consumption)))
print("Standard deviation: {}".format(standard_deviation(o2_consumption)))
Example #20
0
def least_squares_fit(x, y):
    #xの学習データとyを与えて、alphaとbetaの最小二乗値を求める
    beta = correlation(x, y) * standard_deviation(y) / standard_deviation(x)
    alpha = mean(y) - beta * mean(x)
    return alpha, beta
                    [random.random() for _ in range(50)] +
                    [200 + random.random() for _ in range(50)])

    print("bootstrap_statistic(close_to_100, median, 100):")
    print(bootstrap_statistic(close_to_100, median, 100))
    print("bootstrap_statistic(far_from_100, median, 100):")
    print(bootstrap_statistic(far_from_100, median, 100))
    print()

    random.seed(0)  # so that you get the same results as me

    bootstrap_betas = bootstrap_statistic(list(zip(x, daily_minutes_good)),
                                          estimate_sample_beta, 100)

    bootstrap_standard_errors = [
        standard_deviation([beta[i] for beta in bootstrap_betas])
        for i in range(4)
    ]

    print("bootstrap standard errors", bootstrap_standard_errors)
    print()

    print("p_value(30.63, 1.174)", p_value(30.63, 1.174))
    print("p_value(0.972, 0.079)", p_value(0.972, 0.079))
    print("p_value(-1.868, 0.131)", p_value(-1.868, 0.131))
    print("p_value(0.911, 0.990)", p_value(0.911, 0.990))
    print()

    print("regularization")

    random.seed(0)
Example #22
0
don't have to write lots of code over and over for each problem.
"""

# CONFIG
PROJECT_ROOT = '..'  # relative location pointing to utils/ and stats.py

# REST OF FILE
import sys

# allows import of project files (idk how else to do this)
sys.path.insert(1, PROJECT_ROOT)
from utils.webassign import array_from_shitstring
from stats import variance, standard_deviation

data = array_from_shitstring(
    "85  	105  	130  	160  	180  	195  	134  	145  	214  	105  	145  151  	153  	135  	87  	99  	94  	119  	129"
)  # put your data here
data.sort()
print("Oxidation induction time (min): {}".format(data))

data_variance = variance(data)
data_standard_deviation = standard_deviation(data)
print("Sample variance: {}".format(data_variance))
print("Standard deviation: {}".format(data_standard_deviation))

data_to_hours = [value / 60 for value in data]
data_variance_in_hours = variance(data_to_hours)
standard_deviation_in_hours = standard_deviation(data_to_hours)
print("Sample variance (hrs): {}".format(data_variance_in_hours))
print("Standard deviation (hrs): {}".format(standard_deviation_in_hours))