def main(): d = Data('cars.csv') print "Raw Headers" print d.get_raw_headers() print "\n\n" print "Raw number of columns" print d.get_raw_num_columns() print "\n\n" print "Raw number of rows" print d.get_raw_num_rows() print "\n\n" print "13th row" print d.get_raw_row(13) print "\n\n" print "Value at row 6, header 'Car'" print d.get_raw_value(6, 'Car') print "\n\n" print "Matrix data" print d.matrix_data print "\n\n" print "Headers" print d.get_headers() print "\n\n" print "Number of cols" print d.get_num_columns() print "\n\n" print "5th row" print d.get_row(5) print "\n\n" print "Get value" print d.get_value(5, 'Horsepower') print "\n\n" print "get_data function" print d.get_data(['Origin', 'Horsepower']) print "\n\n" print "data range" print analysis.data_range(d, ['Origin', 'Horsepower']) print "\n\n" print "mean of horsepower and origin" print analysis.mean(d, ['Horsepower', 'Origin']) print "\n\n" print "standard deviation for horsepower and origin" print analysis.stdev(d, ['Horsepower', 'Origin']) print "\n" print "normalized columns origin and horsepower" print analysis.normalize_columns_separately(d, ['Origin', 'Horsepower']) print "\n\n" print "normalized together origin and horsepower" print analysis.normalize_columns_together(d, ['Origin', 'Horsepower']) print "\n\n" print "median of columns origin, horspower and weight" print analysis.median(d, ['Origin', 'Horsepower', 'Weight']) print d.get_data(['Origin', 'Horsepower']).shape
def main(argv): # test command line arguments if len(argv) < 2: print('Usage: python %s <csv filename>' % (argv[0])) exit(0) # create a data object, which reads in the data dobj = Data(argv[1]) headers = dobj.get_headers() #test the five analysis functions print([headers[0], headers[2]]) print("Data range by column:", analysis.data_range([headers[0], headers[2]], dobj)) print("Mean:", analysis.mean([headers[0], headers[2]], dobj)) print("Standard deviation:", analysis.stdev([headers[0], headers[2]], dobj)) print( "Normalize columns separately:", analysis.normalize_columns_separately([headers[0], headers[2]], dobj)) print("Normalize columns together:", analysis.normalize_columns_together([headers[0], headers[2]], dobj)) #Extension 1 print("Median:", analysis.median([headers[0], headers[2]], dobj)) #Extension 2 print("Median Separately:", analysis.median_separately([headers[0], headers[2]], dobj)) #Extension 3 print("just few rows:", dobj.limit_rows()) #Extension 4 print( "just a few columns. I changed the limit to 2 for demonstration purposes:", dobj.limit_columns()) #Extension 5 print("Data range overall:", analysis.data_range([headers[0], headers[2]], dobj, True)) #Extension 6 print( "The next two print statements get the last row of data. I add a row of data in between," "so they are different.") print(dobj.get_row(-1)) dobj.add_point([1, 2, 3]) print(dobj.get_row(-1))
# # Run multiple linear regression on the Data objects # analysis.testRegression(dataClean) # analysis.testRegression(dataGood) # analysis.testRegression(dataNoisy) data = Data(filename='GOOG-NASDAQ_TSLA.csv') # print out some analyses print("\n\nDescriptive statistics of Tesla's stock data (daily open and close prices and trading volume:") print("Mean: ", analysis.mean(['Open', 'Close', 'Volume'], data)) print("Standard deviation: ", analysis.stdev(['Open', 'Close', 'Volume'], data)) print("Ranges: ", analysis.dataRange(['Open', 'Close', 'Volume'], data)) print("Normalized columns: ", analysis.normalizeColumnsSeparately(['Open', 'Close', 'Volume'], data)) print("Normalized globally: ", analysis.normalizeColumnsTogether(['Open', 'Close', 'Volume'], data)) print("Variance: ", analysis.variance(['Open', 'Close', 'Volume'], data)) print("Median: ", analysis.median(['Open', 'Close', 'Volume'], data)) print("Mode value: ", analysis.modeValue(['Open', 'Close', 'Volume'], data)) print("Mode frequency: ", analysis.modeFreq(['Open', 'Close', 'Volume'], data)) print("Range value: ", analysis.rangeDiff(['Open', 'Close', 'Volume'], data), "\n") data.printData(20) # manipulate the data to show their efficacy data.set_value(0.0001, 5, 'Open') data.set_column(data.get_column('Open'), 'Close') data.add_column(data.get_column('Volume'), 'Volume2', 'numeric') data.add_raw_row(['6/28/10', 2.0, 3.0, 4.0, 5.0, 1000.0, 3]) data.add_row([1.0, 2.0, 3.0, 4.0, 5.0, 6]) # Here I print out the whole data set to show its full five-year comprehensive glory data.printData(20)