def test_join_optimizable_3(optimize=False): """ :return: """ cleanup() print_test_separator("Starting test_optimizable_3, optimize = " + str(optimize)) cat = CSVCatalog.CSVCatalog() cds = [] cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text")) cds.append(CSVCatalog.ColumnDefinition("birthCity", "text")) cds.append(CSVCatalog.ColumnDefinition("birthCountry", "text")) cds.append(CSVCatalog.ColumnDefinition("throws", column_type="text")) t = cat.create_table("people", "/Users/Yan/Desktop/hw3/data/People.csv", cds) t.define_index("pid_idx", "INDEX", ['playerID']) print("People table metadata = \n", json.dumps(t.describe_table(), indent=2)) cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("H", "number", True)) cds.append(CSVCatalog.ColumnDefinition("AB", column_type="number")) cds.append(CSVCatalog.ColumnDefinition("teamID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("yearID", "text", True)) cds.append( CSVCatalog.ColumnDefinition("stint", column_type="number", not_null=True)) t = cat.create_table("batting", "/Users/Yan/Desktop/hw3/data/Batting.csv", cds) print("Batting table metadata = \n", json.dumps(t.describe_table(), indent=2)) t.define_index("pid_idx", "INDEX", ['playerID']) people_tbl = CSVDataTable.CSVTable("people") batting_tbl = CSVDataTable.CSVTable("batting") print("Loaded people table = \n", people_tbl) print("Loaded batting table = \n", batting_tbl) start_time = time.time() tmp = {"playerID": "willite01"} join_result = people_tbl.join(batting_tbl, ['playerID'], tmp) end_time = time.time() print("Result = \n", join_result) elapsed_time = end_time - start_time print("\n\nElapsed time = ", elapsed_time) print_test_separator("Complete test_join_optimizable_3")
def test_find_by_template(): cleanup() print_test_separator("Starting test_find_by_template") cat = CSVCatalog.CSVCatalog() cds = [] cds = [] cds.append(CSVCatalog.ColumnDefinition("playerID", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameLast", "text", True)) cds.append(CSVCatalog.ColumnDefinition("nameFirst", column_type="text")) cds.append(CSVCatalog.ColumnDefinition("birthCity", "text")) cds.append(CSVCatalog.ColumnDefinition("birthCountry", "text")) cds.append(CSVCatalog.ColumnDefinition("throws", column_type="text")) t = cat.create_table("people", "/Users/Yan/Desktop/hw3/data/People.csv", cds) t.define_index("id_idx", "INDEX", ['nameLast']) print("People table metadata = \n", json.dumps(t.describe_table(), indent=2)) people_tbl = CSVDataTable.CSVTable("people") print("Loaded people table = \n", people_tbl) tries = 1000 start_time = time.time() templ = {"nameLast": "Williams"} print("Starting test on find using indexed field, templ = ", json.dumps(templ)) for i in range(0, tries): result = people_tbl.find_by_template( templ, ['playerID', 'nameLast', 'nameFirst']) if i == 0: print("Sample result = ", json.dumps(result)) end_time = time.time() print("Elapsed time for ", tries, "lookups = ", end_time - start_time) tries = 1000 start_time = time.time() templ = {"nameFirst": "Ted"} print("\n\nStarting test on find using NON-indexed field, tmpl = ", json.dumps(templ)) for i in range(0, tries): result = people_tbl.find_by_template( templ, ['playerID', 'nameLast', 'nameFirst']) if i == 0: print("Sample result = ", json.dumps(result)) end_time = time.time() print("Elapsed time for ", tries, "lookups = ", end_time - start_time) print_test_separator("Complete test_find_by_template")