def test_create_table_5(): """ Modifies a preexisting/precreated table definition. :return: """ print_test_separator("Starting test_create_table_5") # DO NOT CALL CLEANUP. Want to access preexisting table. cat = CSVCatalog.CSVCatalog() t = cat.get_table("batting") print("Initial status of table = \n", json.dumps(t.describe_table(), indent=2)) t.add_column_definition(CSVCatalog.ColumnDefinition("HR", "number")) t.add_column_definition(CSVCatalog.ColumnDefinition("G", "number")) t.define_index("team_year_idx", ['teamID', 'yearID'], "INDEX") print("Modified status of table = \n", json.dumps(t.describe_table(), indent=2)) print_test_separator("Success test_create_table_5")
def test_create_table_3(): """ Creates a table that includes several column definitions. :return: """ print_test_separator("Starting test_create_table_3") cleanup() cat = CSVCatalog.CSVCatalog() cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text")) t = cat.create_table("people", "../data/People.csv", cds) print("People table", json.dumps(t.describe_table(), indent=2)) print_test_separator("Complete test_create_table_3")
def test_create_table_5_prep(): """ Creates a table that includes several column definitions and a primary key. :return: """ print_test_separator("Starting test_create_table_5_prep") cleanup() cat = CSVCatalog.CSVCatalog() cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("yearID", column_type="text", not_null=True)) cds.append(CSVCatalog.ColumnDefinition("stint", column_type="number", not_null=True)) cds.append(CSVCatalog.ColumnDefinition("H", column_type="number", not_null=False)) cds.append(CSVCatalog.ColumnDefinition("AB", column_type="number", not_null=False)) t = cat.create_table("batting", "../data/Batting.csv", cds) t.define_primary_key(['playerID', 'teamID', 'yearID', 'stint']) print("Batting table", json.dumps(t.describe_table(), indent=2)) print_test_separator("Completed test_create_table_5_prep")
def test_create_table_4_fail(): """ Creates a table that includes several column definitions and a primary key. The primary key references an undefined column, which is an error. NOTE: You should check for other errors. You do not need to check in the CSV file for uniqueness but should test other possible failures. :return: """ print_test_separator("Starting test_create_table_4_fail") cleanup() cat = CSVCatalog.CSVCatalog() cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("yearID", column_type="text", not_null=True)) cds.append(CSVCatalog.ColumnDefinition("stint", column_type="number", not_null=True)) cds.append(CSVCatalog.ColumnDefinition("H", column_type="number", not_null=False)) cds.append(CSVCatalog.ColumnDefinition("AB", column_type="number", not_null=False)) t = cat.create_table("batting", "../data/Batting.csv", cds) try: t.define_primary_key(['playerID', 'teamID', 'yearID', 'HR']) print("Batting table", json.dumps(t.describe_table(), indent=2)) print_test_separator("FAILURES test_create_table_4_fail") except Exception as e: print("Exception e = ", e) print_test_separator("SUCCESS test_create_table_4_fail should fail.")
def cleanup(): """ Deletes previously created information to enable re-running tests. :return: None """ cat = CSVCatalog.CSVCatalog() cat.drop_table("people") cat.drop_table("batting") cat.drop_table("teams")
def test_create_table_1(): """ Simple create of table definition. No columns or indexes. :return: """ cleanup() print_test_separator("Starting test_create_table_1") cat = CSVCatalog.CSVCatalog() t = cat.create_table("people", "../data/People.csv") print("People table", json.dumps(t.describe_table())) print_test_separator("Complete test_create_table_1")
def test_create_table_3_fail(): """ Creates a table that includes several column definitions. This test should fail because one of the defined columns is not in the underlying CSV file. :return: """ print_test_separator("Starting test_create_table_3_fail") cleanup() cat = CSVCatalog.CSVCatalog() cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text")) cds.append(CSVCatalog.ColumnDefinition("canary")) try: t = cat.create_table("people", "../data/People.csv", cds) print_test_separator("FAILURE test_create_table_3") print("People table", json.dumps(t.describe_table(), indent=2)) except Exception as e: print("Exception e = ", e) print_test_separator("Complete test_create_table_3_fail successfully")
def test_find_by_template(): cleanup() print_test_separator("Starting test_find_by_template") cat = CSVCatalog.CSVCatalog() cds = [] cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text")) cds.append(CSVCatalog.ColumnDefinition("birthCity", "text")) cds.append(CSVCatalog.ColumnDefinition("birthCountry", "text")) cds.append(CSVCatalog.ColumnDefinition("throws", column_type="text")) t = cat.create_table("people", "../data/People.csv", cds) t.define_index("id_idx", ['nameLast'], "INDEX") print("People table metadata = \n", json.dumps(t.describe_table(), indent=2)) people_tbl = CSVTable.CSVTable("people") print("Loaded people table = \n", people_tbl) tries = 100 start_time = time.time() templ = {"nameLast": "Williams"} print("Starting test on find using indexed field, tmpl = ", json.dumps(templ)) for i in range(0, tries): result = people_tbl.find_by_template( templ, ['playerID', 'nameLast', 'nameFirst']) if i == 0: print("Sample result = ", json.dumps(result)) end_time = time.time() print("Elapsed time for ", tries, "lookups = ", end_time - start_time) tries = 1000 start_time = time.time() templ = {"nameFirst": "Ted"} print("\n\nStarting test on find using NON-indexed field, tmpl = ", json.dumps(templ)) for i in range(0, tries): result = people_tbl.find_by_template( templ, ['playerID', 'nameLast', 'nameFirst']) if i == 0: print("Sample result = ", json.dumps(result)) end_time = time.time() print("Elapsed time for ", tries, "lookups = ", end_time - start_time) print_test_separator("Complete test_finf_by_template")
def test_create_table_2_fail(): """ Creates a table, and then attempts to create a table with the same name. Second create should fail. :return: """ print_test_separator("Starting test_create_table_2_fail") cleanup() cat = CSVCatalog.CSVCatalog() t = cat.create_table("people", "../data/People.csv") try: t = cat.create_table("people", "../data/People.csv") except Exception as e: print("Second created failed with e = ", e) print("Second create should fail.") print_test_separator("Successful end for test_create_table_2_fail") return print_test_separator("INCORRECT end for test_create_table_2_fail")
def test_join_optimizable_2(optimize=False): """ Calling this with optimize=True turns on optimizations in the JOIN code. :return: """ cleanup() print_test_separator("Starting test_optimizable_2, optimize = " + str(optimize)) cat = CSVCatalog.CSVCatalog() cds = [] cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text")) cds.append(CSVCatalog.ColumnDefinition("birthCity", "text")) cds.append(CSVCatalog.ColumnDefinition("birthCountry", "text")) cds.append(CSVCatalog.ColumnDefinition("throws", column_type="text")) t = cat.create_table( "people", data_dir + "People.csv", cds) print("People table metadata = \n", json.dumps(t.describe_table(), indent=2)) t.define_index("pid_idx", ['playerID'], "INDEX") cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("H", "number", True)) cds.append(CSVCatalog.ColumnDefinition("AB", column_type="number")) cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("yearID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("stint", column_type="number", not_null=True)) t = cat.create_table( "batting", data_dir + "Batting.csv", cds) print("Batting table metadata = \n", json.dumps(t.describe_table(), indent=2)) people_tbl = CSVTable.CSVTable("people") batting_tbl = CSVTable.CSVTable("batting") print("Loaded people table = \n", people_tbl) print("Loaded batting table = \n",batting_tbl) start_time = time.time() join_result = people_tbl.join(batting_tbl,['playerID'], None, optimize=optimize) end_time = time.time() print("Result = \n", join_result) elapsed_time = end_time - start_time print("\n\nElapsed time = ", elapsed_time) print_test_separator("Complete test_join_optimizable_2")
def test_join_not_optimized(optimize=False): """ :return: """ print_test_separator("Starting test_optimizable_1, optimize = " + str(optimize)) print("\n\nDude. This takes 30 minutes. Trust me.\n\n") return cleanup() print_test_separator("Starting test_optimizable_1, optimize = " + str(optimize)) cat = CSVCatalog.CSVCatalog() cds = [] cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text")) cds.append(CSVCatalog.ColumnDefinition("birthCity", "text")) cds.append(CSVCatalog.ColumnDefinition("birthCountry", "text")) cds.append(CSVCatalog.ColumnDefinition("throws", column_type="text")) t = cat.create_table( "people", data_dir + "People.csv", cds) print("People table metadata = \n", json.dumps(t.describe_table(), indent=2)) cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("H", "number", True)) cds.append(CSVCatalog.ColumnDefinition("AB", column_type="number")) cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("yearID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("stint", column_type="number", not_null=True)) t = cat.create_table( "batting", data_dir + "Batting.csv", cds) print("Batting table metadata = \n", json.dumps(t.describe_table(), indent=2)) people_tbl = CSVTable.CSVTable("people") batting_tbl = CSVTable.CSVTable("batting") print("Loaded people table = \n", people_tbl) print("Loaded batting table = \n",batting_tbl) start_time = time.time() tmp = { "playerID": "willite01"} join_result = people_tbl.join(batting_tbl,['playerID'], tmp, optimize=optimize) end_time = time.time() print("Result = \n", join_result) elapsed_time = end_time - start_time print("\n\nElapsed time = ", elapsed_time) print_test_separator("Complete test_join_optimizable")