Example #1
0
def test_join_optimizable_3(optimize=False):
    """
    :return:
    """
    cleanup()
    print_test_separator("Starting test_optimizable_3, optimize = " +
                         str(optimize))

    cat = CSVCatalog.CSVCatalog()
    cds = []

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text"))
    cds.append(CSVCatalog.ColumnDefinition("birthCity", "text"))
    cds.append(CSVCatalog.ColumnDefinition("birthCountry", "text"))
    cds.append(CSVCatalog.ColumnDefinition("throws", column_type="text"))

    t = cat.create_table("people", "/Users/Yan/Desktop/hw3/data/People.csv",
                         cds)
    t.define_index("pid_idx", "INDEX", ['playerID'])
    print("People table metadata = \n", json.dumps(t.describe_table(),
                                                   indent=2))

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("H", "number", True))
    cds.append(CSVCatalog.ColumnDefinition("AB", column_type="number"))
    cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("yearID", "text", True))
    cds.append(
        CSVCatalog.ColumnDefinition("stint",
                                    column_type="number",
                                    not_null=True))

    t = cat.create_table("batting", "/Users/Yan/Desktop/hw3/data/Batting.csv",
                         cds)
    print("Batting table metadata = \n",
          json.dumps(t.describe_table(), indent=2))
    t.define_index("pid_idx", "INDEX", ['playerID'])

    people_tbl = CSVDataTable.CSVTable("people")
    batting_tbl = CSVDataTable.CSVTable("batting")

    print("Loaded people table = \n", people_tbl)
    print("Loaded batting table = \n", batting_tbl)

    start_time = time.time()

    tmp = {"playerID": "willite01"}
    join_result = people_tbl.join(batting_tbl, ['playerID'], tmp)

    end_time = time.time()

    print("Result = \n", join_result)
    elapsed_time = end_time - start_time
    print("\n\nElapsed time = ", elapsed_time)

    print_test_separator("Complete test_join_optimizable_3")
Example #2
0
def test_find_by_template():

    cleanup()
    print_test_separator("Starting test_find_by_template")

    cat = CSVCatalog.CSVCatalog()
    cds = []

    cds = []
    cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True))
    cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text"))
    cds.append(CSVCatalog.ColumnDefinition("birthCity", "text"))
    cds.append(CSVCatalog.ColumnDefinition("birthCountry", "text"))
    cds.append(CSVCatalog.ColumnDefinition("throws", column_type="text"))

    t = cat.create_table("people", "/Users/Yan/Desktop/hw3/data/People.csv",
                         cds)
    t.define_index("id_idx", "INDEX", ['nameLast'])
    print("People table metadata = \n", json.dumps(t.describe_table(),
                                                   indent=2))

    people_tbl = CSVDataTable.CSVTable("people")
    print("Loaded people table = \n", people_tbl)

    tries = 1000
    start_time = time.time()
    templ = {"nameLast": "Williams"}
    print("Starting test on find using indexed field, templ = ",
          json.dumps(templ))
    for i in range(0, tries):
        result = people_tbl.find_by_template(
            templ, ['playerID', 'nameLast', 'nameFirst'])
        if i == 0:
            print("Sample result = ", json.dumps(result))
    end_time = time.time()
    print("Elapsed time for ", tries, "lookups = ", end_time - start_time)

    tries = 1000
    start_time = time.time()
    templ = {"nameFirst": "Ted"}
    print("\n\nStarting test on find using NON-indexed field, tmpl = ",
          json.dumps(templ))
    for i in range(0, tries):
        result = people_tbl.find_by_template(
            templ, ['playerID', 'nameLast', 'nameFirst'])
        if i == 0:
            print("Sample result = ", json.dumps(result))
    end_time = time.time()
    print("Elapsed time for ", tries, "lookups = ", end_time - start_time)

    print_test_separator("Complete test_find_by_template")