def main(): d = Data('cars.csv') print "Raw Headers" print d.get_raw_headers() print "\n\n" print "Raw number of columns" print d.get_raw_num_columns() print "\n\n" print "Raw number of rows" print d.get_raw_num_rows() print "\n\n" print "13th row" print d.get_raw_row(13) print "\n\n" print "Value at row 6, header 'Car'" print d.get_raw_value(6, 'Car') print "\n\n" print "Matrix data" print d.matrix_data print "\n\n" print "Headers" print d.get_headers() print "\n\n" print "Number of cols" print d.get_num_columns() print "\n\n" print "5th row" print d.get_row(5) print "\n\n" print "Get value" print d.get_value(5, 'Horsepower') print "\n\n" print "get_data function" print d.get_data(['Origin', 'Horsepower']) print "\n\n" print "data range" print analysis.data_range(d, ['Origin', 'Horsepower']) print "\n\n" print "mean of horsepower and origin" print analysis.mean(d, ['Horsepower', 'Origin']) print "\n\n" print "standard deviation for horsepower and origin" print analysis.stdev(d, ['Horsepower', 'Origin']) print "\n" print "normalized columns origin and horsepower" print analysis.normalize_columns_separately(d, ['Origin', 'Horsepower']) print "\n\n" print "normalized together origin and horsepower" print analysis.normalize_columns_together(d, ['Origin', 'Horsepower']) print "\n\n" print "median of columns origin, horspower and weight" print analysis.median(d, ['Origin', 'Horsepower', 'Weight']) print d.get_data(['Origin', 'Horsepower']).shape
def main(argv): # test command line arguments if len(argv) < 2: print('Usage: python %s <csv filename>' % (argv[0])) exit(0) # create a data object, which reads in the data dobj = data.Data(argv[1]) # print out information about the data print('Number of rows: ', dobj.get_num_points()) print('Number of columns: ', dobj.get_num_dimensions()) # print out the headers print("\nHeaders:") headers = dobj.get_headers() s = headers[0] for header in headers[1:]: s += ", " + header print(s) # print out the types print("\nTypes") types = dobj.get_types() s = types[0] for type in types[1:]: s += ", " + type print(s) # print out a single row print("\nPrinting row index 2") print(dobj.get_row(2)) # print out all of the data print("\nData") headers = dobj.get_headers() print("headers:", headers) for i in range(dobj.get_num_points()): s = str(dobj.get_value(headers[0], i)) for header in headers[1:]: s += "%10.3s" % (dobj.get_value(header, i)) print(s) print("\n\n\n\nselect_columns") d = dobj.get_data() # print("Data:", d) s = dobj.select_columns(['thing1', 'thing3']) print("Selected columns:", s) print("Data range:", analysis.data_range(['thing1', 'thing3'], dobj)) print("Mean:", analysis.mean(['thing1', 'thing3'], dobj)) print("Standard deviation:", analysis.stdev(['thing1', 'thing3'], dobj)) print("Normalize columns separately:", analysis.normalize_columns_separately(['thing1', 'thing3'], dobj)) print("Normalize columns together:", analysis.normalize_columns_together(['thing1', 'thing3'], dobj))
def main(argv): # test command line arguments if len(argv) < 2: print('Usage: python %s <csv filename>' % (argv[0])) exit(0) # create a data object, which reads in the data dobj = Data(argv[1]) headers = dobj.get_headers() #test the five analysis functions print([headers[0], headers[2]]) print("Data range by column:", analysis.data_range([headers[0], headers[2]], dobj)) print("Mean:", analysis.mean([headers[0], headers[2]], dobj)) print("Standard deviation:", analysis.stdev([headers[0], headers[2]], dobj)) print( "Normalize columns separately:", analysis.normalize_columns_separately([headers[0], headers[2]], dobj)) print("Normalize columns together:", analysis.normalize_columns_together([headers[0], headers[2]], dobj)) #Extension 1 print("Median:", analysis.median([headers[0], headers[2]], dobj)) #Extension 2 print("Median Separately:", analysis.median_separately([headers[0], headers[2]], dobj)) #Extension 3 print("just few rows:", dobj.limit_rows()) #Extension 4 print( "just a few columns. I changed the limit to 2 for demonstration purposes:", dobj.limit_columns()) #Extension 5 print("Data range overall:", analysis.data_range([headers[0], headers[2]], dobj, True)) #Extension 6 print( "The next two print statements get the last row of data. I add a row of data in between," "so they are different.") print(dobj.get_row(-1)) dobj.add_point([1, 2, 3]) print(dobj.get_row(-1))
def test(filename): data = Data(filename) data.addColumn('enumstuff3', 'enum', [ 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'aa', 'aaa', 'a', 'a', 'a', 'aa' ]) data.addColumn('numberstuff3', 'numeric', [1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 4, 3, 3, 4, 5]) print(data.get_data()) data.__str__() print( an.data_range([data.get_headers()[0], data.get_headers()[1]], filename)) print(an.mean([data.get_headers()[0], data.get_headers()[1]], filename)) print(an.stdev([data.get_headers()[0], data.get_headers()[1]], filename)) print( an.normalize_columns_seperately( [data.get_headers()[0], data.get_headers()[1]], filename)) print( an.normalize_columns_together( [data.get_headers()[0], data.get_headers()[1]], filename))
print(dobj.numeric_matrix) # print out the types print("\nTypes:") types = dobj.get_types() s = types[0] for type in types[1:]: s += ", " + type print(s) r = analysis.data_range(headers, dobj) print("Data Range:\n ", r) mean = analysis.mean(headers, dobj) print("Mean: \n", mean) std = analysis.stdev(headers, dobj) print("Standard Deviation: \n", std) #std = analysis.stdev(headers, dobj) #print("Standard Deviation: \n", std) nor_m1 = analysis.normalize_columns_separately(headers, dobj) print("Normalized Columns Separately: \n", nor_m1) nor_m2 = analysis.normalize_columns_together(headers, dobj) print("Normalized Columns Together: \n", nor_m2) #dobj.add_colummn('new col','numeric', [1,2,3,4,5,6,7,8,9,10,11,12,13,14]) #print("\nAdd new column: 'new col','numeric', [1,2,3,4,5,6,7,8,9,10,11,12,13,14]") #print("----- New Matrix: -----") #print(dobj.get_whole_matrix())
# # Load the data files into a Data object # dataClean = Data(filename='data-clean.csv') # dataGood = Data(filename='data-good.csv') # dataNoisy = Data(filename='data-noisy.csv') # # # Run multiple linear regression on the Data objects # analysis.testRegression(dataClean) # analysis.testRegression(dataGood) # analysis.testRegression(dataNoisy) data = Data(filename='GOOG-NASDAQ_TSLA.csv') # print out some analyses print("\n\nDescriptive statistics of Tesla's stock data (daily open and close prices and trading volume:") print("Mean: ", analysis.mean(['Open', 'Close', 'Volume'], data)) print("Standard deviation: ", analysis.stdev(['Open', 'Close', 'Volume'], data)) print("Ranges: ", analysis.dataRange(['Open', 'Close', 'Volume'], data)) print("Normalized columns: ", analysis.normalizeColumnsSeparately(['Open', 'Close', 'Volume'], data)) print("Normalized globally: ", analysis.normalizeColumnsTogether(['Open', 'Close', 'Volume'], data)) print("Variance: ", analysis.variance(['Open', 'Close', 'Volume'], data)) print("Median: ", analysis.median(['Open', 'Close', 'Volume'], data)) print("Mode value: ", analysis.modeValue(['Open', 'Close', 'Volume'], data)) print("Mode frequency: ", analysis.modeFreq(['Open', 'Close', 'Volume'], data)) print("Range value: ", analysis.rangeDiff(['Open', 'Close', 'Volume'], data), "\n") data.printData(20) # manipulate the data to show their efficacy data.set_value(0.0001, 5, 'Open') data.set_column(data.get_column('Open'), 'Close') data.add_column(data.get_column('Volume'), 'Volume2', 'numeric')
def main(): numpy.set_printoptions(suppress=True) print("\n----- Database Info -----") if len(sys.argv) < 2: print('Usage: python %s <csv filename>' % (sys.argv[0])) exit(0) # create a data object, which reads in the data dobj = data.Data(sys.argv[1]) print("\nName: ", dobj.get_filename()) # print out information about the dat print('Number of rows: ', dobj.get_num_points()) print('Number of numeric columns: ', dobj.get_num_dimensions()) # print out the headers print("\nHeaders:") headers = dobj.get_headers() s = headers[0] for header in headers[1:]: s += ", " + header print(s) # print out the headers print("\nNumeric Headers:") nheaders = dobj.get_numericheaders() s = nheaders[0] for header in nheaders[1:]: s += ", " + header print(s) # print out the types print("\nTypes:") types = dobj.get_types() s = types[0] for type in types[1:]: s += ", " + type print(s) r = analysis.data_range(headers, dobj) print("Data Range:\n ", r) mean = analysis.mean(headers, dobj) print("Mean: \n", mean) std = analysis.stdev(headers, dobj) print("Standard Deviation: \n", std) if headers == nheaders: nor_m1 = analysis.normalize_columns_separately(headers, dobj) print("Normalized Columns Separately: \n", nor_m1) if headers == nheaders: nor_m2 = analysis.normalize_columns_together(headers, dobj) print("Normalized Columns Together: \n", nor_m2) s = analysis.sumup(headers, dobj) print("Sum:\n", s) print("Variance:\n", analysis.variance(headers, dobj)) # EXTENSION5 ADD COLUMN dobj.add_colummn('new col', 'numeric', [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]) print( "\nAdd new column: 'new col','numeric', [1,2,3,4,5,6,7,8,9,10,11,12,13,14]" ) print("----- New Matrix: -----") m = dobj.get_whole_matrix() print(m) print('Number of rows: ', dobj.get_num_points()) print('Number of numeric columns: ', dobj.get_num_dimensions()) print("---------------------------------") # EXTENSION6 WRITE TO A CSV file a = numpy.asarray(m) with open('foo.csv', 'w') as outputfile: wr = csv.writer(outputfile, delimiter=',') wr.writerow(dobj.get_headers()) wr.writerow(dobj.get_types()) for ls in a: wr.writerow(ls)