Ejemplo n.º 1
0
def main():
    d = Data('cars.csv')
    print "Raw Headers"
    print d.get_raw_headers()
    print "\n\n"
    print "Raw number of columns"
    print d.get_raw_num_columns()
    print "\n\n"
    print "Raw number of rows"
    print d.get_raw_num_rows()
    print "\n\n"
    print "13th row"
    print d.get_raw_row(13)
    print "\n\n"
    print "Value at row 6, header 'Car'"
    print d.get_raw_value(6, 'Car')
    print "\n\n"
    print "Matrix data"
    print d.matrix_data
    print "\n\n"
    print "Headers"
    print d.get_headers()
    print "\n\n"
    print "Number of cols"
    print d.get_num_columns()
    print "\n\n"
    print "5th row"
    print d.get_row(5)
    print "\n\n"
    print "Get value"
    print d.get_value(5, 'Horsepower')
    print "\n\n"
    print "get_data function"
    print d.get_data(['Origin', 'Horsepower'])
    print "\n\n"
    print "data range"
    print analysis.data_range(d, ['Origin', 'Horsepower'])
    print "\n\n"
    print "mean of horsepower and origin"
    print analysis.mean(d, ['Horsepower', 'Origin'])
    print "\n\n"
    print "standard deviation for horsepower and origin"
    print analysis.stdev(d, ['Horsepower', 'Origin'])
    print "\n"
    print "normalized columns origin and horsepower"
    print analysis.normalize_columns_separately(d, ['Origin', 'Horsepower'])
    print "\n\n"
    print "normalized together origin and horsepower"
    print analysis.normalize_columns_together(d, ['Origin', 'Horsepower'])
    print "\n\n"
    print "median of columns origin, horspower and weight"
    print analysis.median(d, ['Origin', 'Horsepower', 'Weight'])
    print d.get_data(['Origin', 'Horsepower']).shape
Ejemplo n.º 2
0
def main(argv):

    # test command line arguments
    if len(argv) < 2:
        print('Usage: python %s <csv filename>' % (argv[0]))
        exit(0)

    # create a data object, which reads in the data
    dobj = data.Data(argv[1])

    # print out information about the data
    print('Number of rows:    ', dobj.get_num_points())
    print('Number of columns: ', dobj.get_num_dimensions())

    # print out the headers
    print("\nHeaders:")
    headers = dobj.get_headers()
    s = headers[0]
    for header in headers[1:]:
        s += ", " + header
    print(s)

    # print out the types
    print("\nTypes")
    types = dobj.get_types()
    s = types[0]
    for type in types[1:]:
        s += ", " + type
    print(s)

    # print out a single row
    print("\nPrinting row index 2")
    print(dobj.get_row(2))

    # print out all of the data
    print("\nData")
    headers = dobj.get_headers()
    print("headers:", headers)
    for i in range(dobj.get_num_points()):
        s = str(dobj.get_value(headers[0], i))
        for header in headers[1:]:
            s += "%10.3s" % (dobj.get_value(header, i))
        print(s)

    print("\n\n\n\nselect_columns")

    d = dobj.get_data()
    # print("Data:", d)
    s = dobj.select_columns(['thing1', 'thing3'])
    print("Selected columns:", s)

    print("Data range:", analysis.data_range(['thing1', 'thing3'], dobj))
    print("Mean:", analysis.mean(['thing1', 'thing3'], dobj))
    print("Standard deviation:", analysis.stdev(['thing1', 'thing3'], dobj))
    print("Normalize columns separately:",
          analysis.normalize_columns_separately(['thing1', 'thing3'], dobj))
    print("Normalize columns together:",
          analysis.normalize_columns_together(['thing1', 'thing3'], dobj))
Ejemplo n.º 3
0
def main(argv):

    # test command line arguments
    if len(argv) < 2:
        print('Usage: python %s <csv filename>' % (argv[0]))
        exit(0)

    # create a data object, which reads in the data
    dobj = Data(argv[1])
    headers = dobj.get_headers()
    #test the five analysis functions
    print([headers[0], headers[2]])
    print("Data range by column:",
          analysis.data_range([headers[0], headers[2]], dobj))
    print("Mean:", analysis.mean([headers[0], headers[2]], dobj))
    print("Standard deviation:", analysis.stdev([headers[0], headers[2]],
                                                dobj))
    print(
        "Normalize columns separately:",
        analysis.normalize_columns_separately([headers[0], headers[2]], dobj))
    print("Normalize columns together:",
          analysis.normalize_columns_together([headers[0], headers[2]], dobj))

    #Extension 1
    print("Median:", analysis.median([headers[0], headers[2]], dobj))

    #Extension 2
    print("Median Separately:",
          analysis.median_separately([headers[0], headers[2]], dobj))

    #Extension 3
    print("just  few rows:", dobj.limit_rows())

    #Extension 4
    print(
        "just a few columns. I changed the limit to 2 for demonstration purposes:",
        dobj.limit_columns())

    #Extension 5
    print("Data range overall:",
          analysis.data_range([headers[0], headers[2]], dobj, True))

    #Extension 6
    print(
        "The next two print statements get the last row of data. I add a row of data in between,"
        "so they are different.")
    print(dobj.get_row(-1))
    dobj.add_point([1, 2, 3])
    print(dobj.get_row(-1))
Ejemplo n.º 4
0
def test(filename):
    data = Data(filename)
    data.addColumn('enumstuff3', 'enum', [
        'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'aa', 'aaa', 'a', 'a',
        'a', 'aa'
    ])
    data.addColumn('numberstuff3', 'numeric',
                   [1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 4, 3, 3, 4, 5])
    print(data.get_data())
    data.__str__()
    print(
        an.data_range([data.get_headers()[0],
                       data.get_headers()[1]], filename))
    print(an.mean([data.get_headers()[0], data.get_headers()[1]], filename))
    print(an.stdev([data.get_headers()[0], data.get_headers()[1]], filename))
    print(
        an.normalize_columns_seperately(
            [data.get_headers()[0],
             data.get_headers()[1]], filename))
    print(
        an.normalize_columns_together(
            [data.get_headers()[0],
             data.get_headers()[1]], filename))
Ejemplo n.º 5
0
    print(dobj.numeric_matrix)

    # print out the types
    print("\nTypes:")
    types = dobj.get_types()
    s = types[0]
    for type in types[1:]:
        s += ", " + type
    print(s)

    r = analysis.data_range(headers, dobj)
    print("Data Range:\n ", r)
    mean = analysis.mean(headers, dobj)
    print("Mean: \n", mean)

    std = analysis.stdev(headers, dobj)
    print("Standard Deviation: \n", std)

    #std = analysis.stdev(headers, dobj)
    #print("Standard Deviation: \n", std)

    nor_m1 = analysis.normalize_columns_separately(headers, dobj)
    print("Normalized Columns Separately: \n", nor_m1)

    nor_m2 = analysis.normalize_columns_together(headers, dobj)
    print("Normalized Columns Together: \n", nor_m2)

    #dobj.add_colummn('new col','numeric', [1,2,3,4,5,6,7,8,9,10,11,12,13,14])
    #print("\nAdd new column: 'new col','numeric', [1,2,3,4,5,6,7,8,9,10,11,12,13,14]")
    #print("----- New Matrix: -----")
    #print(dobj.get_whole_matrix())
Ejemplo n.º 6
0
    # # Load the data files into a Data object
    # dataClean = Data(filename='data-clean.csv')
    # dataGood = Data(filename='data-good.csv')
    # dataNoisy = Data(filename='data-noisy.csv')
    #
    # # Run multiple linear regression on the Data objects
    # analysis.testRegression(dataClean)
    # analysis.testRegression(dataGood)
    # analysis.testRegression(dataNoisy)

    data = Data(filename='GOOG-NASDAQ_TSLA.csv')

    # print out some analyses
    print("\n\nDescriptive statistics of Tesla's stock data (daily open and close prices and trading volume:")
    print("Mean: ", analysis.mean(['Open', 'Close', 'Volume'], data))
    print("Standard deviation: ", analysis.stdev(['Open', 'Close', 'Volume'], data))
    print("Ranges: ", analysis.dataRange(['Open', 'Close', 'Volume'], data))
    print("Normalized columns: ", analysis.normalizeColumnsSeparately(['Open', 'Close', 'Volume'], data))
    print("Normalized globally: ", analysis.normalizeColumnsTogether(['Open', 'Close', 'Volume'], data))
    print("Variance: ", analysis.variance(['Open', 'Close', 'Volume'], data))
    print("Median: ", analysis.median(['Open', 'Close', 'Volume'], data))
    print("Mode value: ", analysis.modeValue(['Open', 'Close', 'Volume'], data))
    print("Mode frequency: ", analysis.modeFreq(['Open', 'Close', 'Volume'], data))
    print("Range value: ", analysis.rangeDiff(['Open', 'Close', 'Volume'], data), "\n")

    data.printData(20)

    # manipulate the data to show their efficacy
    data.set_value(0.0001, 5, 'Open')
    data.set_column(data.get_column('Open'), 'Close')
    data.add_column(data.get_column('Volume'), 'Volume2', 'numeric')
Ejemplo n.º 7
0
def main():
    numpy.set_printoptions(suppress=True)
    print("\n----- Database Info -----")
    if len(sys.argv) < 2:
        print('Usage: python %s <csv filename>' % (sys.argv[0]))
        exit(0)

    # create a data object, which reads in the data
    dobj = data.Data(sys.argv[1])
    print("\nName: ", dobj.get_filename())
    # print out information about the dat
    print('Number of rows:    ', dobj.get_num_points())
    print('Number of numeric columns: ', dobj.get_num_dimensions())

    # print out the headers
    print("\nHeaders:")
    headers = dobj.get_headers()
    s = headers[0]
    for header in headers[1:]:
        s += ", " + header
    print(s)

    # print out the headers
    print("\nNumeric Headers:")
    nheaders = dobj.get_numericheaders()
    s = nheaders[0]
    for header in nheaders[1:]:
        s += ", " + header
    print(s)

    # print out the types
    print("\nTypes:")
    types = dobj.get_types()
    s = types[0]
    for type in types[1:]:
        s += ", " + type
    print(s)

    r = analysis.data_range(headers, dobj)
    print("Data Range:\n ", r)
    mean = analysis.mean(headers, dobj)
    print("Mean: \n", mean)

    std = analysis.stdev(headers, dobj)
    print("Standard Deviation: \n", std)
    if headers == nheaders:
        nor_m1 = analysis.normalize_columns_separately(headers, dobj)
        print("Normalized Columns Separately: \n", nor_m1)
    if headers == nheaders:
        nor_m2 = analysis.normalize_columns_together(headers, dobj)
        print("Normalized Columns Together: \n", nor_m2)

    s = analysis.sumup(headers, dobj)
    print("Sum:\n", s)

    print("Variance:\n", analysis.variance(headers, dobj))

    # EXTENSION5 ADD COLUMN
    dobj.add_colummn('new col', 'numeric',
                     [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
    print(
        "\nAdd new column: 'new col','numeric', [1,2,3,4,5,6,7,8,9,10,11,12,13,14]"
    )
    print("----- New Matrix: -----")
    m = dobj.get_whole_matrix()
    print(m)
    print('Number of rows:    ', dobj.get_num_points())
    print('Number of numeric columns: ', dobj.get_num_dimensions())
    print("---------------------------------")

    # EXTENSION6 WRITE TO A CSV file
    a = numpy.asarray(m)
    with open('foo.csv', 'w') as outputfile:
        wr = csv.writer(outputfile, delimiter=',')
        wr.writerow(dobj.get_headers())
        wr.writerow(dobj.get_types())
        for ls in a:
            wr.writerow(ls)