def main(): """The application entry point""" print 'EXERCISE 1A' print '===========' print 'This program takes a CSV file, asks you to select a row from that' print 'file, and then computes the mean and standard deviation of the' print 'values in that row.' print file_path = io.get_and_confirm_input('Enter csv file with values: ') data = io.read_csv_file(file_path) if not data: raise RuntimeError('No data found in file {}'.format(file_path)) column = io.choose_from_list( 'Which column would you like to use:', data[0].keys()) if column not in data[0]: raise RuntimeError('Invalid column {}'.format(column)) values = linked_list.LinkedList() for each in data: values.insert(each[column]) for each in values: print each print 'Mean: ', statistics.mean(values) print 'Std Dev: ', statistics.standard_deviation(values)
def execute(self): print '8A: Load CSV file and sort by selected column' print file_name = io.prompt_existant_file_name('CSV file to sort: ') data = io.read_csv_file(file_name) if not data: print 'ERROR: File contains no data.' sys.exit(1) column_names = data[0].keys() sort_column = io.choose_from_list('Column to sort on', column_names) try: for each in data: each[sort_column] = float(each[sort_column]) except ValueError: print 'ERROR: Column {} contains non-integer value.'.format( sort_column) sys.exit(1) sorted_data = sorted(data, key=lambda item: item[sort_column]) table = display_table.DisplayTable(column_names) for each in sorted_data: table.add_row(each.values()) table.display()
def execute(self): parser = argparse.ArgumentParser() parser.add_argument( 'CSVFILE', help='path to csv file with historical data.') parser.add_argument( 'ESTVAL', help='an estimated value') args = parser.parse_args() file_path = args.CSVFILE estimated_value = float(args.ESTVAL) csv_data = io.read_csv_file(file_path) if not csv_data: print 'ERROR: No data' sys.exit(1) columns = csv_data[0].keys() x_column = io.choose_from_list('X Column:', columns) y_column = io.choose_from_list('Y Column:', columns) x_data = [float(each[x_column]) for each in csv_data if each[x_column]] y_data = [float(each[y_column]) for each in csv_data if each[y_column]] print x_data, y_data = probe.trim_to_equal_length(x_data, y_data) print 'X DATA: {}'.format(x_data) print 'Y DATA: {}'.format(y_data) print beta_0 = statistics.beta_0(x_data, y_data) print u'\u03B20: {}'.format(beta_0) beta_1 = statistics.beta_1(x_data, y_data) print u'\u03B21: {}'.format(beta_1) integ = integration.Integrator(20, 0.000001) tdist = statistics.make_t_distribution(len(x_data) - 2) itdist = lambda x: integ.integrate_minus_infinity_to(tdist, x) std_dev = ( statistics.standard_deviation_around_regression(x_data, y_data) ) print "StdDev: ", std_dev projection = beta_0 + beta_1 * estimated_value print 'Projection: ', projection print 't(70 percent): ', integration.approximate_inverse(itdist, 0.85) print 't(90 percent): ', integration.approximate_inverse(itdist, 0.95) range70 = statistics.prediction_range( estimated_value, 0.85, x_data, y_data ) range90 = statistics.prediction_range( estimated_value, 0.95, x_data, y_data ) print 'Range(70 percent) =', projection + range70, \ 'UPI =', projection - range70, 'LPI =', range70 print 'Range(90 percent) =', projection + range90, \ 'UPI =', projection - range90, 'LPI =', range90
def execute(self): """Prompt the user for a CSV file and column selection. Then, perform chi-squared test on the data given. """ print 'PSP Exercise 9A' print 'This program performs the chi-squared test on data given.' print file_path = self.get_file_name() data = io.read_csv_file(file_path) test_data = self.get_test_column(data) q_val, p_val = chi_squared.ChiSquaredTest().execute(test_data) print 'Q: ', q_val print 'P: ', 1.0 - p_val
def execute(self): """Run the program""" parser = argparse.ArgumentParser() parser.add_argument('CSVFILE', help='path to csv file with data.') args = parser.parse_args() csv_data = io.read_csv_file(args.CSVFILE) if not csv_data: print 'ERROR: Invalid csv data file.' sys.exit(1) columns = csv_data[0].keys() x_column = io.choose_from_list('X Column:', columns) y_column = io.choose_from_list('Y Column:', columns) x_data = [float(each[x_column]) for each in csv_data if each[x_column]] y_data = [float(each[y_column]) for each in csv_data if each[x_column]] print 'R:', statistics.correlation(x_data, y_data) print 'T:', statistics.t_value(x_data, y_data) print 'Significance:', statistics.significance(x_data, y_data)
def execute(self): """Calculate and display the relative size table report""" data = io.read_csv_file(self.file_path) if not data: print "NO DATA" return if not all([self.are_valid_keys(each.keys()) for each in data]): raise RuntimeError( 'Invalid data columns {}'.format(data[0].keys())) normalized_data = self.get_normalized_data(data) normalized_by_category = self.group_by_category(normalized_data) results = {} for category, items in normalized_by_category.iteritems(): if len(items) < 2: results[category] = [items[0]] * 5 else: results[category] = statistics.size_ranges(items) self.print_table(results)
def from_csv_file(cls, filename): """Reads historical data from a CSV file. Arguments: filename(str): A file name Returns: HistoricalData: Historical data read from CSV file. """ data = io.read_csv_file(filename) planned_sizes = collect("Planned A+M Size", data) proxy_sizes = collect("Proxy Size Estimate", data) actual_sizes = collect("Actual A+M Size", data) planned_times = collect("Planned Time", data) actual_times = collect("Actual Time", data) return cls( planned_sizes=planned_sizes, proxy_sizes=proxy_sizes, actual_sizes=actual_sizes, planned_times=planned_times, actual_times=actual_times, )
def execute(self): parser = argparse.ArgumentParser() parser.add_argument( 'CSVFILE', help='path to csv file with historical data.') args = parser.parse_args() file_path = args.CSVFILE csv_data = io.read_csv_file(file_path) if not csv_data: print 'ERROR: No data' sys.exit(1) columns = csv_data[0].keys() x_column = io.choose_from_list('X Column:', columns) y_column = io.choose_from_list('Y Column:', columns) x_data = [float(each[x_column]) for each in csv_data] y_data = [float(each[y_column]) for each in csv_data] #x_data, y_data = statistics.remove_outliers(x_data, y_data) print print 'X DATA: {}'.format(x_data) print 'Y DATA: {}'.format(y_data) print beta_0 = statistics.beta_0(x_data, y_data) print u'\u03B20: {}'.format(beta_0) warnings = statistics.beta_0_warnings(beta_0) if warnings: print 'WARNINGS:' print '\n'.join(warnings) print beta_1 = statistics.beta_1(x_data, y_data) print u'\u03B21: {}'.format(beta_1) warnings = statistics.beta_1_warnings(beta_1) if warnings: print 'WARNINGS:' print '\n'.join(warnings) print