def execute(self): parser = argparse.ArgumentParser() parser.add_argument( 'CSVFILE', help='path to csv file with historical data.') parser.add_argument( 'ESTVAL', help='an estimated value') args = parser.parse_args() file_path = args.CSVFILE estimated_value = float(args.ESTVAL) csv_data = io.read_csv_file(file_path) if not csv_data: print 'ERROR: No data' sys.exit(1) columns = csv_data[0].keys() x_column = io.choose_from_list('X Column:', columns) y_column = io.choose_from_list('Y Column:', columns) x_data = [float(each[x_column]) for each in csv_data if each[x_column]] y_data = [float(each[y_column]) for each in csv_data if each[y_column]] print x_data, y_data = probe.trim_to_equal_length(x_data, y_data) print 'X DATA: {}'.format(x_data) print 'Y DATA: {}'.format(y_data) print beta_0 = statistics.beta_0(x_data, y_data) print u'\u03B20: {}'.format(beta_0) beta_1 = statistics.beta_1(x_data, y_data) print u'\u03B21: {}'.format(beta_1) integ = integration.Integrator(20, 0.000001) tdist = statistics.make_t_distribution(len(x_data) - 2) itdist = lambda x: integ.integrate_minus_infinity_to(tdist, x) std_dev = ( statistics.standard_deviation_around_regression(x_data, y_data) ) print "StdDev: ", std_dev projection = beta_0 + beta_1 * estimated_value print 'Projection: ', projection print 't(70 percent): ', integration.approximate_inverse(itdist, 0.85) print 't(90 percent): ', integration.approximate_inverse(itdist, 0.95) range70 = statistics.prediction_range( estimated_value, 0.85, x_data, y_data ) range90 = statistics.prediction_range( estimated_value, 0.95, x_data, y_data ) print 'Range(70 percent) =', projection + range70, \ 'UPI =', projection - range70, 'LPI =', range70 print 'Range(90 percent) =', projection + range90, \ 'UPI =', projection - range90, 'LPI =', range90
def prediction_range(x_k, alpha, xvalues, yvalues): """Computes the prediction range for the given alpha value. Arguments: x_k(float): An estimated value alpha(float): The t-distribution alpha value. xvalues(list): A list of values yvalues(list): A list of values Returns: float: The prediction range """ if len(xvalues) < 3 or len(yvalues) < 3: raise RuntimeError('Too few values to compute prediction interval') n = len(xvalues) tdist = make_t_distribution(n - 2) integ = integration.Integrator(20, 0.00001) h = lambda x: integ.integrate_minus_infinity_to(tdist, x) std_dev = standard_deviation_around_regression(xvalues, yvalues) t_value = integration.approximate_inverse(h, alpha) x_avg = mean(xvalues) const = t_value * std_dev result = 1 + 1.0/n result += (x_k - x_avg)**2 / sum([(x - x_avg)**2 for x in xvalues]) return const * math.sqrt(result)
def get_normal_distribution_buckets(self, num_segments): """This routine returns a dict with segment ranges corresponding to each of the buckets the normal distribution is divided into. Arguments: num_segments(float): The number of buckets required. Returns: dict: A hash map with a segment range for each bucket. """ segment_probability = 1.0 / num_segments integrator = integration.Integrator(20, 1E-10) func = lambda x: integrator.integrate_minus_infinity_to( statistics.normal_distribution, x) results = {} previous_upper_bound = None for i in range(1, int(num_segments)): next_upper_bound = integration.approximate_inverse( func, i * segment_probability) results[SegmentRange(previous_upper_bound, next_upper_bound)] = 0 previous_upper_bound = next_upper_bound results[SegmentRange(previous_upper_bound, None)] = 0 return results
def get_normal_distribution_buckets(self, num_items, num_segments): """This routine divides the normal distribution into the given number of segments. It then creates a segment range for each segment based on the number of items expected to fall within it. It returns a list of segment ranges. Arguments: num_items(int): The number of items to be tested. num_segments(int): The number of segments to divide the normal distribution into. Returns: list: A list of SegmentRange objects for each segment. """ assert(num_segments > 0, "number of segments is less than 1: {}".format(num_segments)) integrator = integration.Integrator(20, 1E-10) func = lambda x: integrator.integrate_minus_infinity_to( statistics.normal_distribution, x) results = [] cumulative_probability = 0 previous_upper_bound = None segment_allocation = self.get_segment_allocation( num_items, num_segments) for items_in_segment in segment_allocation[:-1]: cumulative_probability += items_in_segment / float(num_items) upper_bound = integration.approximate_inverse( func, cumulative_probability) results.append(SegmentRange(previous_upper_bound, upper_bound)) previous_upper_bound = upper_bound results.append(SegmentRange(previous_upper_bound, None)) return results