def t10(): "Test the time oj JOIN with and without optimization!! " rows, cols = load("../CSVFile/People.csv") t = CSVDataTable.CSVDataTable(table_name="People", column_names=cols, primary_key_columns=['playerID']) t.import_data(rows) print("T = ", t) rows, cols = load("../CSVFile/BattingSmall.csv") t2 = CSVDataTable.CSVDataTable(table_name="BattingSmall", column_names=cols, primary_key_columns=['playerID', 'teamID', 'yearID', 'stint']) t2.import_data(rows) print("T = ", t) print("\n\n With optimization\n") start_time = time.time() for i in range(0,1): j = t2.join(t, ['playerID'], w_clause={"People.nameLast": "Williams", "People.birthCity": "San Diego"}, p_clause=['playerID', "People.nameLast", "People.nameFirst", "BattingSmall.teamID", "BattingSmall.yearID", "BattingSmall.stint", "BattingSmall.H", "BattingSmall.AB"], optimize=True) end_time = time.time() print("Elapsed time = ", end_time-start_time) print("\n\n Without optimization\n") start_time = time.time() for i in range(0, 1): j = t2.join(t, ['playerID'], w_clause={"People.nameLast": "Williams", "People.birthCity": "San Diego"}, p_clause=['playerID', "People.nameLast", "People.nameFirst", "BattingSmall.teamID", "BattingSmall.yearID", "BattingSmall.stint", "BattingSmall.H", "BattingSmall.AB"], optimize=False) end_time = time.time() print("Elapsed time = ", end_time - start_time)
def t4(): rows, cols = load( "/Users/wangxinquan/Desktop/2sem/W4111Database/xw2566_HW3/Data/People.csv" ) t = CSVDataTable.CSVDataTable(table_name="People", column_names=cols, primary_key_columns=['playerID']) t.import_data(rows) print("T = ", t) rows, cols = load( "/Users/wangxinquan/Desktop/2sem/W4111Database/xw2566_HW3/Data/Batting.csv" ) t2 = CSVDataTable.CSVDataTable( table_name="Batting", column_names=cols, primary_key_columns=['playerID', 'teamID', "yearID", 'stint']) t2.import_data(rows) print("T = ", t2) j = t2.join(t, ['playerID'], w_clause={ "People.nameLast": "Williams", "People.birthCity": "San Diego" }, p_clause=[ 'playerID', "People.nameLast", "People.nameFirst", "Batting.teamID", "Batting.yearID", "Batting.stint", "Batting.H", "Batting.AB" ], optimize=True) print("Result = ", j) print("All rows =", json.dumps(j.get_rows(), indent=2))
def t4(): """ Test join function. :return: """ print("test JOIN function. ") rows, cols = load("../CSVFile/People.csv") t = CSVDataTable.CSVDataTable(table_name="People", column_names=cols, primary_key_columns=['playerID']) t.import_data(rows) print("T = ", t) rows, cols = load("../CSVFile/BattingSmall.csv") t2 = CSVDataTable.CSVDataTable(table_name="BattingSmall", column_names=cols, primary_key_columns=['playerID', 'teamID', 'yearID', 'stint']) t2.import_data(rows) print("T = ", t) j = t2.join(t, ['playerID'], w_clause={"People.nameLast": "Williams", "People.birthCity": "San Diego"}, p_clause=['playerID', "People.nameLast", "People.nameFirst", "BattingSmall.teamID", "BattingSmall.yearID", "BattingSmall.stint", "BattingSmall.H", "BattingSmall.AB"], optimize=True) print("Result = ", j) print(j.get_rows()) print("All rows = ", json.dumps(j.get_rows(), indent=2))
def test_save_and_load(): print("\n\n*************TEST TEN, SAVE AND LOAD**************") rows, cols = load("CSVFile/offices.csv") print("\n ***READING CSV TO LOAD OFFICES.CSV") t=CSVDataTable.CSVDataTable(table_name = "Offices",column_names = cols, primary_key_columns=["officeCode"]) t.import_data(rows) print("\n ***SAVING DOWN THE CSVDATATABLE") t.save() print("TABLE INFO AND ITS INDEXES\n") print(t) for k,v in t._indexes.items(): print(v.to_json()) print("\n ***LOADING CSV DATATABLE***") t2 = CSVDataTable.CSVDataTable(table_name = "Offices",loadit=True) print("\n **TABLE LOADED CONFIRM THEY ARE THE SAME**\n") print(t2) print("\n ***CONFIRM INDEX INTEGRITY***") for k,v in t2._indexes.items(): print(v.to_json())
def t1(): """ Test import_data and find_by_template functions. :return: """ print("Test import_data(which including add_rows(), and test find_by_tmp not use index. ") rows, cols = load("../CSVFile/People.csv") t = CSVDataTable.CSVDataTable(table_name="People", column_names=cols, primary_key_columns=['playerID']) t.import_data(rows) print("T = ", t) tmp = {'playerID': 'willite01'} start = time.time() for i in range(0, 1000): r = t.find_by_template(tmp, fields=None, use_index=False).get_rows() if i == 0: print("Row = ", r) end = time.time() elapsed = end - start print("Time : ", elapsed) t.save()
def test_index_or_no_index(): logging.debug("-------- test_index_or_no_index ---------") rows, cols = load( "/home/zhida/Desktop/Database/code/HW3/CSVFile/beach-weather-stations-automated-sensors-1.csv" ) t = CSVDataTable.CSVDataTable(table_name="automated-sensors", column_names=cols, primary_key_columns=["Measurement ID"], loadit=None) t.import_data(rows=rows) t.add_index("Station Name", ["Station Name"], "INDEX") t.add_index("Wet Bulb Temperature", ["Wet Bulb Temperature"], "INDEX") print("\n\n\n") time1 = time.time() for i in range(1000): t.find_by_template(tmp={"Wet Bulb Temperature": '7.0'}, use_index=False, debug_log=False) time2 = time.time() print("[-No index-] -> run find_by_template(), 1000 iterations\n") print("takes time = ", str(time2 - time1) + " second\n") time1 = time.time() for i in range(1000): t.find_by_template(tmp={"Wet Bulb Temperature": '7.0'}, use_index=True, debug_log=False) time2 = time.time() print("[-Use index-] -> run find_by_template(), 1000 iterations\n") print("takes time = ", str(time2 - time1) + " second")
def t2(): """ Test add_index function, use index. :return: """ print("Test add_index function, find_by_template use index.") rows, cols = load("../CSVFile/People.csv") t = CSVDataTable.CSVDataTable(table_name="People", column_names=cols, primary_key_columns=['playerID']) t.import_data(rows) print("T = ", t) tmp = {'playerID': 'willite01', "nameLast": "Williams"} t.add_index("LASTNAME", ['nameLast', 'nameFirst'], "INDEX") start = time.time() for i in range(0, 1000): r = t.find_by_template(tmp, fields=None, use_index=True).get_rows() if i == 0: print("Row = ", r) end = time.time() elapsed = end - start print("Time : ", elapsed)
def test_find(): print("\n\n*************TEST TWO, FIND BY TEMPLATE AND COMPARE RUN TIME WITH INDEX vs WITHOUT**************") tmp = {"playerID":"willite01"} rows, cols = load("CSVFile/people.csv") t=CSVDataTable.CSVDataTable(table_name = "People",column_names = cols, primary_key_columns=['playerID']) t.import_data(rows) print("\n***TESTING RUN WITHOUT INDEX***") start = time.time() for i in range(0,5): r = t.find_by_template(tmp,fields=None,use_index=False) if i ==0: print("Row = ",r) end = time.time() elapsed = end - start print("Elapsed time=", elapsed) print("\n***TESTING RUNNING WITH INDEX***") start = time.time() for i in range(0,5): r=t.find_by_template(tmp,fields=None,use_index=True) if i==0: print("Row = ",r) end = time.time() elapsed = end-start print("Elapsed time = ",elapsed)
def test_save_db4(): logging.debug("-------- test_save_db ---------") rows, cols = load( "/home/zhida/Desktop/Database/code/HW3/CSVFile/beach-weather-stations-automated-sensors-1.csv" ) t = CSVDataTable.CSVDataTable(table_name="automated-sensors", column_names=cols, primary_key_columns=["Measurement ID"], loadit=None) t.import_data(rows=rows) t.add_index("Station Name", ["Station Name"], "INDEX") t.save() print("save DB success!\n") print("\n\n\n\n\n\n") time1 = time.time() for i in range(100): t.find_by_template(tmp={ "Station Name": "Oak Street Weather Station", "Wet Bulb Temperature": 55 }, use_index=False) time2 = time.time() print("takes time = ", str(time2 - time1) + " second") time1 = time.time() for i in range(100): t.find_by_template(tmp={ "Station Name": "Oak Street Weather Station", "Wet Bulb Temperature": 55 }, use_index=True) time2 = time.time() print("takes time = ", str(time2 - time1) + " second")
def test_insert_and_import_data(): print("\n\n*************TEST ONE, TEST LOAD FUNCTION**************") print("*** HERE WE TEST THE IMPORT_DATA FUNCTION WHICH LEVEREAGES INSERT. LATER WE ALSO TEST LOADIT = TRUE AND THE LOAD FUNCTION***") rows, cols = load("CSVFile/people.csv") t=CSVDataTable.CSVDataTable(table_name = "People",column_names = cols, primary_key_columns=['playerID']) t.import_data(rows) print("T = ",t)
def test_specific_project(): rows, cols = load("CSVFile/People.csv") t=CSVDataTable.CSVDataTable(table_name = "People",column_names = cols, primary_key_columns=["playerID"]) t.import_data(rows) print("T = ",t) p = ["playerID","People.nameLast","Batting.H"] print("Specific template = ", t._get_specific_project())
def test_index(): print("\n\n*************TEST FOUR CREATING AN INDEX**************") t = CSVDataTable.CSVDataTable(table_name="Test",column_names=['last_name','first_name','uni'],loadit=None) i = CSVDataTable.Index(index_name="Bob", index_columns = ["last_name","first_name"],kind="INDEX", table = t) print(i) r = {"last_name": "Ferguson", "first_name": "Donald", "uni":"sure"} kv = i.compute_key(r) i.add_to_index(row=r,rid="2") print("KV = ",kv, "added to index")
def t9(): "Test delete function. Check if row id is correct after deletion. " rows, cols = load("../CSVFile/rings.csv") t = CSVDataTable.CSVDataTable(table_name="Rings", column_names=cols, primary_key_columns=['uni']) t.import_data(rows) r = t.delete({"last_name": "Baggins"}) print("delete:", r) print("\n\nafter delete, T = ", t)
def test_add_index(): print("\n\n*************TEST SIX, CREATE WITH INDEX AND THEN ADD ANOTHER INDEX**************") rows, cols = load("CSVFile/offices.csv") t=CSVDataTable.CSVDataTable(table_name = "Offices",column_names = cols, primary_key_columns=["officeCode"]) print("\n***ORIGINALLY ***\n") print(t) cols = ["city","state","postalCode"] t.add_index(name="location",column_list=cols,kind="INDEX") print("\n***ADDING ANOTHER INDEX, LOCATION ***\n") print(t)
def test_incomplete_index_info(): ###CHECK FOR not complete index info ADD_INDEX FUNCTION print("\n\n*************TEST EIGHT, NOT ENOUGH INFORMATION TO CREATE INDEX THROWS AN ERROR**************") rows, cols = load("CSVFile/offices.csv") t=CSVDataTable.CSVDataTable(table_name = "Offices",column_names = cols, primary_key_columns=["officeCode"]) cols = ["city","state","postalCode"] try: t.add_index("location",cols) except Exception as e: print("\n We caught an error, you did not provide enough information to create the key" + "\n",e)
def t8(): rows, cols = load( "/Users/wangxinquan/Desktop/2sem/W4111Database/xw2566_HW3/Data/offices.csv" ) t = CSVDataTable.CSVDataTable(table_name="offices", column_names=cols, primary_key_columns=['officeCode']) t.import_data(rows) t.save()
def test_join(): ###CHECK FOR not complete index info ADD_INDEX FUNCTION print("\n\n*************TEST ELEVEN JOIN AND ALSO SEE THAT IT SWAPS TABLESX**************") rows, cols = load("CSVFile/People.csv") t=CSVDataTable.CSVDataTable(table_name = "People",column_names = cols, primary_key_columns=["playerID"]) t.import_data(rows) print("T = ",t) rows, cols = load("CSVFile/Batting.csv") t2=CSVDataTable.CSVDataTable(table_name = "Batting",column_names = cols, primary_key_columns=["playerID","teamID","yearID","stint"]) t2.import_data(rows) print("T2 = ",t2) j = t2.join(t, ['playerID'], w_clause ={"People.nameLast":"Williams", "People.birthCity": "San Diego"}, p_clause =['playerID',"People.nameLast","People.nameFirst","Batting.teamID","Batting.yearID","Batting.stint","Batting.H","Batting.AB"] ,optimize = True) print("Result = " ,j) print("All rows = ", json.dumps(j._rows, indent=2))
def test2(): new_r, cols = load( "/Users/wangxinquan/Desktop/2sem/W4111Database/xw2566_HW3/Data/offices.csv" ) t = CSVDataTable.CSVDataTable(table_name="offices.", column_names=cols, primary_key_columns=['officeCode'], loadit=None) t.import_data(new_r) print("t = ", t)
def test_conflict(): print("\n\n*************TEST FIVE CREATING CONFLICTING INDEX THROWS ERROR WHEN UNIQUE**************") t= CSVDataTable.CSVDataTable(table_name="Test",column_names=['last_name','first_name','uni'],loadit=None) i = CSVDataTable.Index(index_name="Bob", index_columns = ["last_name","first_name"],kind="UNIQUE",table=t) r = {"last_name": "Ferguson", "first_name": "Donald", "uni":"sure"} kv = i.compute_key(r) i.add_to_index(row=r,rid="2") try: i.add_to_index(row=r, rid="3") except Exception as e: print("We caught an error: ", e)
def test_create_table(): logging.debug("-------- test_create_table ---------") t = CSVDataTable.CSVDataTable( table_name="students", column_names=["uni", "first_name", "last_name"], primary_key_columns=["uni"], loadit=None) print(t) print("\n\n\n\n\n\n")
def t3(): rows, cols = load( "/Users/wangxinquan/Desktop/2sem/W4111Database/xw2566_HW3/Data/Batting.csv" ) t2 = CSVDataTable.CSVDataTable( table_name="Batting", column_names=cols, primary_key_columns=['playerID', 'teamID', "yearID", 'stint']) t2.import_data(rows) print("T = ", t2) tmp = {"last_name": "Baggins", "first_name": "Bilbo"} print("Specific template = ", t2._get_specific_where(tmp))
def t9(): rows, cols = load( "/Users/wangxinquan/Desktop/2sem/W4111Database/xw2566_HW3/Data/offices.csv" ) t = CSVDataTable.CSVDataTable(table_name="offices", column_names=cols, primary_key_columns=['officeCode']) t.import_data(rows) r = t.delete_by_template({"officeCode": "11"}) print(r)
def test_load_data(): logging.debug("-------- test_load_data ---------") rows, cols = load("/home/zhida/Desktop/Database/code/HW3/CSVFile/city.csv") t = CSVDataTable.CSVDataTable(table_name="city", column_names=cols, primary_key_columns=["id"], loadit=None) t.import_data(rows=rows) print("t = ", json.dumps(t._rows, indent=2)) print("\n\n\n\n\n\n")
def test_save_db2(): logging.debug("-------- test_save_db ---------") rows, cols = load("/home/zhida/Desktop/Database/code/HW3/CSVFile/city.csv") t = CSVDataTable.CSVDataTable(table_name="city", column_names=cols, primary_key_columns=["id"], loadit=None) t.import_data(rows=rows) t.save() print("save DB success!\n") print("\n\n\n\n\n\n")
def t5(): rows, cols = load( "/Users/wangxinquan/Desktop/2sem/W4111Database/xw2566_HW3/Data/People.csv" ) t = CSVDataTable.CSVDataTable(table_name="People", column_names=cols, primary_key_columns=['playerID']) t.import_data(rows) print("T = ", t) p = ["playerID", "People.nameLast", "Batting.H"] print("Specific template = ", t._get_specific_project(p))
def t5(): """ Test get_specific_project function :return: """ print("Test get_specific_project function.") rows, cols = load("../CSVFile/People.csv") t = CSVDataTable.CSVDataTable(table_name="People", column_names=cols, primary_key_columns=['playerID']) t.import_data(rows) print("T = ", t) p = ["playerID", "People.nameLast", "Batting.H"] print("Specific template = ", t._get_specific_project(p))
def test_delete(): print("\n\n*************TEST NINE, DELETE WORKS SUCCESSFULLY**************") rows, cols = load("CSVFile/offices.csv") t=CSVDataTable.CSVDataTable(table_name = "Offices",column_names = cols, primary_key_columns=["officeCode"]) t.import_data(rows) print("\n **ORIGINAL META DATA BEFORE WE RUN DELETE, 10 ROWS**\n") print(t) print("\n **DELETING CAMBRIDGE**\n") r = t.delete({"city":"Cambridge"}) print("\n **DATA AFTER DELETING CAMBRIDGE, 9 ROWS**\n") print(t)
def make_test_table2(b_index=True): t = CSVDataTable.CSVDataTable(table_name="teams", column_names=["uni", "bat", "teamID"], primary_key_columns=["uni"], loadit=None) t.insert({"uni": "zz", "bat": 2, "teamID": "spurs"}) t.insert({"uni": "yy", "bat": 3, "teamID": "spurs"}) t.insert({"uni": "xx", "bat": 2, "teamID": "lakers"}) if b_index: index_columns = ["teamID"] t.add_index(index_name="Team", column_list=index_columns, kind="INDEX") return t
def test_duplicate_index_name(): print("\n\n*************TEST SEVEN, CREATING AN INDEX WITH A DUPLICATE NAME, AND CREATING A SECOND PRIMARY KEY THROWS AN ERROR**************") rows, cols = load("CSVFile/offices.csv") t=CSVDataTable.CSVDataTable(table_name = "Offices",column_names = cols, primary_key_columns=["officeCode"]) print("\n***ORIGINAL ***\n") print(t) cols = ["city","state","postalCode"] try: t.add_index("PRIMARY",['city'],"INDEX") except Exception as e: print("*****You tried to add a duplicate name index*****"+ "\n",e) try: t.add_index("RANDOM",['state'],"PRIMARY") except Exception as e: print("\n*****You tried to make more than one primary keys*****"+ "\n",e)
def t8(): print("Test insert() function. ") rows, cols = load("../CSVFile/rings.csv") t = CSVDataTable.CSVDataTable(table_name="Rings", column_names=cols, primary_key_columns=['uni']) t.import_data(rows) t.save() print("Before insert T = ", t) new_row = {"last_name": "LoL", "first_name": "New", "uni": "LN11"} t.insert(new_row) print("After inset()\n T = ", t) print("Find the inseted row:") tmp = {"last_name": "LoL", "first_name": "New"} r = t.find_by_template(tmp, fields=None, use_index=True).get_rows() print(r)