def least_squares_fit(x, y): """given training values for x and y, find the least-squares values of alpha and beta""" beta = stats.correlation(x, y) * \ stats.standard_deviation(y) / stats.standard_deviation(x) alpha = stats.mean(y) - beta * stats.mean(x) return alpha, beta
def least_squares_fit(xs: Vector, ys: Vector) -> Tuple[float, float]: """ Given a dataset represented by xs and ys, return the alpha, beta that provide the least squared error fit for a function y_i = alpha * x_i + beta """ alpha = correlation(xs, ys) * standard_deviation(ys) / standard_deviation(xs) beta = mean(ys) - alpha * mean(xs) return alpha, beta
def scale(data_matrix): num_rows, num_cols = shape(data_matrix) means = [mean(get_column(data_matrix,j)) for j in range(num_cols)] stdevs = [standard_deviation(get_column(data_matrix,j)) for j in range(num_cols)] return means, stdevs
def test_standard_deviation(self): """Assume degree of freedom = 0""" l = [1, 2, 3, 4, 5, 5, 4, 3, 3, 1, 2] my_value = st.standard_deviation(l) numpy_value = np.std(l) self.assertEquals(my_value, numpy_value)
def scale(data_matrix): #各列の平均と標準偏差を返す num_rows, num_cols = shape(data_matrix) means = [mean(get_column(data_matrix, j)) for j in range(num_cols)] stdevs = [ standard_deviation(get_column(data_matrix, j)) for j in range(num_cols) ] return means, stdevs
def _test_exectime_bounded_linear_growth(execTimeTS): ''' Test that when the number of samples that DCGM collects is limited there is linear growth in the total amount of time used to retrieve that each field. ''' tolerance = 0.60 for fieldId, series in execTimeTS.fieldVals.items(): tailStart = int(0.4 * len(series)) tailLen = len(series) - tailStart # take a linear regression of the execution timeseries # if its corr. coeff. is not high (1.0 is highest) # OR # if its slope is much different from the actual start -> end slope # THEN something is wrong. # calc the lin. regr. slope # taken from https://en.wikipedia.org/wiki/Simple_linear_regression#Fitting_the_regression_line x = execTimeTS.timestamps[tailStart:] y = series[tailStart:] if y[-1] == 0: logger.info("Skipping fieldId %d with exec times of 0" % fieldId) continue #logger.info("x %s, y %s" % (str(x), str(y))) rxy = stats.correlation_coefficient(x, y) sx = stats.standard_deviation(x) sy = stats.standard_deviation(y) assert (rxy >= 0.90), ( 'execution time for field %s did not have a strong linear correlation. ' % fieldId + 'Its correlation coefficient was %.4f' % rxy) logger.debug('corr. coeff. for field %s: %s' % (fieldId, rxy)) linRegSlope = rxy * (sy / sx) slope = (y[-1] - y[0]) / float(x[-1] - x[0]) minSlope = (1 - tolerance) * linRegSlope maxSlope = (1 + tolerance) * linRegSlope assert (minSlope <= slope <= maxSlope), ( 'execution time growth for field %s was not linear. ' % fieldId + 'It had an overall slope of %s but the linear regression slope was %s. ' % (slope, linRegSlope) + 'Tolerated min: %s, tolerated max: %s' % (minSlope, maxSlope))
def scale(data_matrix): """returns the mean and standard deviations of each column""" num_rows, num_cols = algebra.shape(data_matrix) means = [stats.mean(algebra.get_column(data_matrix, j)) for j in range(num_cols)] stddevs = [stats.standard_deviation(algebra.get_column(data_matrix, j)) for j in range(num_cols)] return means, stddevs
def scale(data_matrix): """returns the mean and standard deviations of each column""" num_rows, num_cols = algebra.shape(data_matrix) means = [ stats.mean(algebra.get_column(data_matrix, j)) for j in range(num_cols) ] stddevs = [ stats.standard_deviation(algebra.get_column(data_matrix, j)) for j in range(num_cols) ] return means, stddevs
def scale(data: List[Vector]) -> Tuple[Vector, Vector]: """ Given a list of data points, return 1) a vector of their means across features and 2) a vector of their stddevs across features """ assert data is not None and len(data) > 0 num_features = len(data[0]) means = vector_mean(data) # for each feature compute a standard deviation of the value at that features index for each vector # we could one-call this if we wrote a vector_stddev function stdevs = [standard_deviation([vector[i] for vector in data]) for i in range(num_features)] return means, stdevs
[200 + random.random() for _ in range(50)]) print("bootstrap_statistic(close_to_100, median, 100):") print(bootstrap_statistic(close_to_100, median, 100)) print("bootstrap_statistic(far_from_100, median, 100):") print(bootstrap_statistic(far_from_100, median, 100)) print() random.seed(0) # so that you get the same results as me bootstrap_betas = bootstrap_statistic(list(zip(x, daily_minutes_good)), estimate_sample_beta, 100) bootstrap_standard_errors = [ standard_deviation([beta[i] for beta in bootstrap_betas]) for i in range(4)] print("bootstrap standard errors", bootstrap_standard_errors) print() print("p_value(30.63, 1.174)", p_value(30.63, 1.174)) print("p_value(0.972, 0.079)", p_value(0.972, 0.079)) print("p_value(-1.868, 0.131)", p_value(-1.868, 0.131)) print("p_value(0.911, 0.990)", p_value(0.911, 0.990)) print() print("regularization") random.seed(0) for alpha in [0.0, 0.01, 0.1, 1, 10]:
def scale(data): num_rows, num_cols = shape(data) means = [mean(get_column(data, j)) for j in range(num_cols)] stdevs = [standard_deviation(get_column(data, j)) for j in range(num_cols)] return means, stdevs
median = st.median(A) print("A's median = ", median) quantile = st.quantile(A, 0.2) print("A's 20% quantile = ", quantile) quantile = st.quantile(A, 0.9) print("A's 90% quantile = ", quantile) mode = st.mode(A) print("A's mode = ", mode) data_range = st.data_range(A) print("A's range = ", data_range) variance = st.variance(A) print("A's variance = ", variance) standard_deviation = st.standard_deviation(A) print("A's standard deviation = ", standard_deviation) interquartile_range = st.interquartile_range(A) print("A's interquartile range of 25% ~ 75% = ", interquartile_range) x = [-2, -1, 0, 1, 2] y = [2, 1, 0, 1, 2] correlation = st.correlation(x, y) print("correlation = ", correlation)
def scale(data_matrix): '''returns mean and sd of each column''' num_rows, num_cols = lin_alg.shape(data_matrix) means = [stats.mean(lin_alg.get_col(data_matrix, j)) for j in range(num_cols)] stdevs = [stats.standard_deviation(lin_alg.get_col(data_matrix, j)) for j in range(num_cols)] return means, stdevs
import sys # allows import of project files (idk how else to do this) sys.path.insert(1, '..') from utils.webassign import array_from_shitstring from stats import mean, deviation_from_mean, variance, standard_deviation youngs_mod = array_from_shitstring(" 116.6 115.8 114.9 115.3 115.6 ") youngs_mod.sort() print("Young's Mod: {}".format(youngs_mod)) sample_mean = mean(youngs_mod) print("Young's Mod (mean): {}".format(sample_mean)) print("Deviation from mean:") deviation_list = deviation_from_mean(youngs_mod) for data_point, deviation in zip(youngs_mod, deviation_list): print("Sample: {0}, deviation from mean: {1}".format( data_point, deviation)) my_variance = variance(youngs_mod) print("Sample variance: {}".format(my_variance)) print("Standard deviation: {}".format(standard_deviation(youngs_mod)))
def register_sample (app, app_id, tag, day_of_the_week = None): user = User.objects.filter(app = app, app_id = app_id).first() ## Clear all the events of that tag ## Then recreate them from the new data events = Event.objects.filter(user = user, tag = tag) if day_of_the_week is not None: events = events.filter(day_of_week = day_of_the_week) for event in events: event.is_active = False event.save() app = importlib.import_module(user.app + "." + user.app) event_times = getattr(app, "%s_times" % tag)(app_id, day_of_the_week = day_of_the_week) if len(event_times) < 2: return if len(event_times[0]) < 2: return pmf = stats.event_pmf(event_times, 1440) pmf_average = stats.average(pmf) if pmf_average < minimum_pmf_mean(tag): ## All weak probabilities. Only outlier events. return pmf_variance = stats.variance(pmf, average = pmf_average) pmf_std = stats.standard_deviation(pmf, variance = pmf_variance) in_event = False event_start_minutes = [] event_end_minutes = [] event_probabilites = [] for minute in range(0,1440): if pmf[minute] > pmf_average + pmf_variance: if in_event is False: event_start_minutes.append(minute) in_event = True else: if in_event is True: event_end_minutes.append(minute) in_event = False if len(event_start_minutes) > len(event_end_minutes): ## Assume the last event started at night and ends in the morning event_start_minutes[0] = event_start_minutes[len(event_start_minutes) - 1] del event_start_minutes[len(event_start_minutes) - 1] ## If events are too close together, combined them. for index in range(0, len(event_end_minutes)): if index + 1 >= len(event_start_minutes): break event_end_time = event_end_minutes[index] next_event_start_time = event_start_minutes[index + 1] time_between_event = next_event_start_time - event_end_time if time_between_event < minimum_time_between_event(tag): del event_end_minutes[index] del event_start_minutes[index + 1] for index in range(0, len(event_start_minutes)): start_minute = event_start_minutes[index] end_minute = event_end_minutes[index] if start_minute < end_minute: event_probability_set = pmf[start_minute:end_minute] else: event_probability_set = pmf[start_minute:1439] event_probability_set.extend(pmf[0:end_minute]) event_average_probablity = stats.average(event_probability_set) event_probability_variance = stats.variance(event_probability_set, average = event_average_probablity) fringe_start_time = start_minute - fringe_time_for_event(tag) if fringe_start_time < 0: fringe_start_time = 1440 + fringe_start_time fringe_end_time = end_minute + fringe_time_for_event(tag) if fringe_end_time > 1440: fringe_end_time = fringe_end_time - 1440 if fringe_end_time > fringe_start_time: fringe_pmf = pmf[fringe_start_time:fringe_end_time] else: fringe_pmf = pmf[fringe_start_time:1439] fringe_pmf.extend(pmf[0:fringe_end_time]) fringe_average_probability = stats.average(fringe_pmf) fringe_variance = stats.variance(fringe_pmf, average = fringe_average_probability) start_hour = float(start_minute)/60.0 end_hour = float(end_minute)/60.0 e = Event.objects.create(user = user, tag = tag, start_time = start_hour, end_time = end_hour, day_of_week = day_of_the_week, probability = event_average_probablity, probability_variance = event_probability_variance, fringe_probability = fringe_average_probability, fringe_variance = fringe_variance) e.save()
def test_stats_stdev(): """Test for Standard Deviation""" assert stats.standard_deviation([2, 4, 6, 8, 10]) == 3.16 assert stats.standard_deviation([1, 2, 3, 4, 5]) == 1.58 assert stats.standard_deviation([5, 10, 15, 20, 25]) == 7.91
import stats my_list = [4, 1, 5, 7, 6, 8, 9, 10, 8, 3, 3, 8, 12] my_mean = stats.mean(my_list) print('The mean is: ' + str(my_mean)) my_median = stats.median(my_list) print('The median is: ' + str(my_median)) my_range = stats.range(my_list) print('The range is: ' + str(my_range)) my_std_dev = stats.standard_deviation(my_list) print('The standard deviation is: ' + str(my_std_dev))
""" This file contains some default imports and commonly used functions so that you don't have to write lots of code over and over for each problem. """ # CONFIG PROJECT_ROOT = '..' # relative location pointing to utils/ and stats.py # REST OF FILE import sys # allows import of project files (idk how else to do this) sys.path.insert(1, PROJECT_ROOT) from utils.webassign import array_from_shitstring_floats from stats import get_range, variance, standard_deviation o2_consumption = array_from_shitstring_floats( '29.6 49.4 31.0 28.4 28.8 25.4 34.0 29.8 23.8 30.1') print("O2 Consumption: {}".format(o2_consumption)) print("Sample range: {}".format(get_range(o2_consumption))) print("Sample variance: {}".format(variance(o2_consumption))) print("Standard deviation: {}".format(standard_deviation(o2_consumption)))
def least_squares_fit(x, y): #xの学習データとyを与えて、alphaとbetaの最小二乗値を求める beta = correlation(x, y) * standard_deviation(y) / standard_deviation(x) alpha = mean(y) - beta * mean(x) return alpha, beta
[random.random() for _ in range(50)] + [200 + random.random() for _ in range(50)]) print("bootstrap_statistic(close_to_100, median, 100):") print(bootstrap_statistic(close_to_100, median, 100)) print("bootstrap_statistic(far_from_100, median, 100):") print(bootstrap_statistic(far_from_100, median, 100)) print() random.seed(0) # so that you get the same results as me bootstrap_betas = bootstrap_statistic(list(zip(x, daily_minutes_good)), estimate_sample_beta, 100) bootstrap_standard_errors = [ standard_deviation([beta[i] for beta in bootstrap_betas]) for i in range(4) ] print("bootstrap standard errors", bootstrap_standard_errors) print() print("p_value(30.63, 1.174)", p_value(30.63, 1.174)) print("p_value(0.972, 0.079)", p_value(0.972, 0.079)) print("p_value(-1.868, 0.131)", p_value(-1.868, 0.131)) print("p_value(0.911, 0.990)", p_value(0.911, 0.990)) print() print("regularization") random.seed(0)
don't have to write lots of code over and over for each problem. """ # CONFIG PROJECT_ROOT = '..' # relative location pointing to utils/ and stats.py # REST OF FILE import sys # allows import of project files (idk how else to do this) sys.path.insert(1, PROJECT_ROOT) from utils.webassign import array_from_shitstring from stats import variance, standard_deviation data = array_from_shitstring( "85 105 130 160 180 195 134 145 214 105 145 151 153 135 87 99 94 119 129" ) # put your data here data.sort() print("Oxidation induction time (min): {}".format(data)) data_variance = variance(data) data_standard_deviation = standard_deviation(data) print("Sample variance: {}".format(data_variance)) print("Standard deviation: {}".format(data_standard_deviation)) data_to_hours = [value / 60 for value in data] data_variance_in_hours = variance(data_to_hours) standard_deviation_in_hours = standard_deviation(data_to_hours) print("Sample variance (hrs): {}".format(data_variance_in_hours)) print("Standard deviation (hrs): {}".format(standard_deviation_in_hours))