def test_create_it(): tbl = CSVDataTable( "People", { "directory": "/Users/wangxinquan/Desktop/2sem/W4111Database/HW1_xw2566/data", "file_name": "People.csv" }, ['playerID'], None) tbl.load() print("First table = ", tbl)
def test_matches(): tbl = CSVDataTable( "People", { "directory": "/Users/wangxinquan/Desktop/2sem/W4111Database/HW1_xw2566/data", "file_name": "People.csv" }, ['playerID'], None) tbl.load() print("First table = ", tbl) tmp = {"nameLast": "Aardsma", "nameFirst": "Dave"} result = tbl.matches_template(tmp, tbl._rows[0]) print("Match = ", result)
def test_key(): tbl = CSVDataTable( "People", { "directory": "/Users/wangxinquan/Desktop/2sem/W4111Database/HW1_xw2566/data", "file_name": "People.csv" }, ['playerID'], None) tbl.load() print("First table = ", tbl) result = tbl.find_by_primary_key(['willite01'], field_list=['playerID', 'nameLast']) print("\n\nResult = ", result)
def setUp(self): connect_info = { 'directory': '../data/', 'file_name': 'demographics_by_zip.csv' } self.cnx = { 'directory': '../data/', 'file_name': 'unit_test_temp.csv' } copyfile(connect_info['directory'] + connect_info['file_name'], connect_info['directory'] + 'unit_test_temp.csv') self.worker_test= CSVDataTable('stats_by_zip_test',connect_info=self.cnx,key_columns=['zipcode']) self.worker_test.load()
def test_tmp1(): tbl = CSVDataTable( "People", { "directory": "/Users/wangxinquan/Desktop/2sem/W4111Database/HW1_xw2566/data", "file_name": "People.csv" }, ['playerID'], None) tbl.load() print("First table = ", tbl) tmp = {"nameLast": "Williams", "throws": "R"} result = tbl.find_by_template( tmp, field_list=['playerID', 'nameLast', 'birthCity', 'throws']) print("\n\n After find by template, Result = ", result) tmp2 = {'birthCity': 'San Diego'} result2 = result.find_by_template(tmp2) print("\n\n After find by template find by template, Result = ", result2)
def top_ten_hitters_csv(): # init the files batting_file = CSVDataTablePkg.CSVDataTable("Batting", "Batting.csv", ["playerID", "teamID", "yearID", "stint"]); batting_file.load(); people_file = CSVDataTablePkg.CSVDataTable("People", "People.csv", ["playerID"]); people_file.load(); # save the processed ids processed_player_ids = list(); # save the record records = list(); # check the unique playerID in Batting for row in batting_file.table_data: play_id = row[PLAYER_ID]; if play_id not in processed_player_ids: processed_player_ids.append(play_id); result_batting = batting_file.find_by_template({PLAYER_ID: play_id}, [YEAR_ID, BAT, HIT]); # check yearID>=1960 for this player flag = False; if len(result_batting) > 0: total_bats = 0; total_hits = 0; for r in result_batting: if int(r[YEAR_ID]) >= 1960: flag = True; # compute total AB and total H total_bats += int(r[BAT]); total_hits += int(r[HIT]); # if this is a valid playerID if flag: # compute the batting average if total_bats > 200: batting_avg = total_hits / total_bats; # look up for the name result_name = people_file.find_by_primary_key([play_id], [PLAYER_ID, NAME_FIRST, NAME_LAST]); first_name = result_name[0][NAME_FIRST]; last_name = result_name[0][NAME_LAST]; record = dict(zip([PLAYER_ID, NAME_FIRST, NAME_LAST, TOTAL_BAT, TOTAL_HIT, BATTING_AVG], [play_id, first_name, last_name, total_bats, total_hits, batting_avg])); records.append(record); sorted_records = sorted(records, key=lambda k: k[BATTING_AVG], reverse=True); return sorted_records[0:10];
def test_update(): tbl = CSVDataTable( "offices", { "directory": "/Users/wangxinquan/Desktop/2sem/W4111Database/HW1_xw2566/data", "file_name": "offices.csv" }, ['officeCode'], None) tbl.load() tmp = {'city': "Paris"} new_v = {"officeCode": 13, "state": "Mars", "country": "Jupiter"} tbl.update_by_template(tmp, new_v) result = tbl.find_by_template(tmp) print("\n\nAfter update ... ", result)
def test_delete(): tbl = CSVDataTable( "offices", { "directory": "/Users/wangxinquan/Desktop/2sem/W4111Database/HW1_xw2566/data", "file_name": "offices.csv" }, ['officeCode'], None) tbl.load() print("First table = ", tbl) tmp = {"city": "Paris"} result1 = tbl.find_by_template(tmp) print("\n\nbefore delete =", result1) result = tbl.delete_by_template(tmp) print("\n\nI deleted ... ", result, "rows") result2 = tbl.find_by_template(tmp) print("\n\nafter delete =", result2)
def test_insert(): tbl = CSVDataTable( "offices", { "directory": "/Users/wangxinquan/Desktop/2sem/W4111Database/HW1_xw2566/data", "file_name": "offices.csv" }, ['officeCode'], None) tbl.load() print("First table = ", tbl) tmp = {"city": "Tokyo"} result1 = tbl.find_by_template(tmp) print("\n\nbefore insert =", result1) new_r = {'officeCode': '300', 'city': 'Tokyo'} result = tbl.insert(new_r) result2 = tbl.find_by_template(tmp) print("\n\nafter insert =", result2)
# Charles Blackmon-Luca, ccb2158 import sys sys.path.append("../src/") from CSVDataTable import CSVDataTable with open("../test_output/fbt_test.txt", "w+") as file: # open csv as table db = CSVDataTable("People", key_columns=["playerID"], loadit=True) file.write("Loading CSVDataTable:\n\n%s\n" % db) # find all 1871ers tmp = {"birthYear" : "1871"} file.write("Find by template : %s\n\n" % tmp) db = db.find_by_template(tmp) file.write("Result:\n\n%s\n" % db) # limit our fields list fields = ["nameFirst", "nameLast", "deathYear"] file.write("Limit to fields : %s\n\n" % fields) db = db.find_by_template({}, field_list=fields) file.write("Result:\n\n%s\n" % db)
# Charles Blackmon-Luca, ccb2158 import sys from time import time sys.path.append("../src/") from CSVDataTable import CSVDataTable with open("../test_output/index_test.txt", "w+") as file: # open csv as table db = CSVDataTable("People", key_columns=["playerID"], loadit=True) file.write("Loading CSVDataTable:\n\n%s\n" % db) # add year index db.add_index("birthYear", "INDEX", ["birthYear"]) file.write("New index: %s\n" % db._indexes["birthYear"]) # compare FBT with and without index tmp = {"birthYear": "1971", "birthCity": "Denver"} file.write("FBT using template: %s\n" % tmp) start = time() for i in range(1000): db.find_by_template(tmp, index_allowed=False) end = time() file.write("Time to execute 1000 FBTs without indexes: %s s\n" % (end - start)) start = time() for i in range(1000): db.find_by_template(tmp, index_allowed=True)
class TestStringMethods(unittest.TestCase): def setUp(self): connect_info = { 'directory': '../data/', 'file_name': 'demographics_by_zip.csv' } self.cnx = { 'directory': '../data/', 'file_name': 'unit_test_temp.csv' } copyfile(connect_info['directory'] + connect_info['file_name'], connect_info['directory'] + 'unit_test_temp.csv') self.worker_test= CSVDataTable('stats_by_zip_test',connect_info=self.cnx,key_columns=['zipcode']) self.worker_test.load() def test_wrong_keys_in_query_raises_error(self): with self.assertRaises(ValueError): self.worker_test.find_by_primary_key([10003,1]) def test_find_by_keys_works(self): self.assertEqual(self.worker_test.find_by_primary_key(['10004'],field_list=['zipcode'])._rows,[{'zipcode': '10004'}]) def test_find_by_template_works(self): self.assertEqual(self.worker_test.find_by_template({'zipcode': '10004'}, field_list=['cnt_participants'])._rows,[{'cnt_participants': '0'}]) def test_delete_by_template_works(self): self.worker_test.delete_by_template({'zipcode': '10007'}) out = self.worker_test.find_by_template({'zipcode': '10007'}) self.assertEqual(None, out) def test_delete_by_key_works(self): self.worker_test.delete_by_key(['10005']) out = self.worker_test.find_by_primary_key(['10005']) self.assertEqual(None, out) def test_update_by_template_works(self): self.worker_test.update_by_template({'zipcode': '10006'},{'cnt_participants':'3'}) out = self.worker_test.find_by_template({'zipcode': '10006'},field_list=['cnt_participants']) self.assertEqual([{'cnt_participants': '3'}], out._rows) def test_update_by_key_works(self): self.worker_test.update_by_key(['10006'],{'cnt_participants':'6'}) out = self.worker_test.find_by_primary_key(['10006'],field_list=['cnt_participants']) self.assertEqual([{'cnt_participants': '6'}], out._rows) def test_none_returned_on_no_result_query(self): self.assertEqual(self.worker_test.find_by_primary_key(['-99999']),None) def test_invalid_field_name(self): with self.assertRaises(ValueError): self.worker_test.find_by_template({'zipcode': '10004'},field_list=['test']) def test_not_implemented_error(self): with self.assertRaises(NotImplementedError): self.worker_test.find_by_template({'zipcode': '10004'},field_list=['zipcode']).find_by_primary_key([-99999]) def tearDown(self): os.remove(self.cnx['directory'] + self.cnx['file_name'])
# Charles Blackmon-Luca, ccb2158 import sys sys.path.append("../src/") from CSVDataTable import CSVDataTable with open("../test_output/insert_test.txt", "w+") as file: # open csv as table db = CSVDataTable("ssol_dummy", key_columns=["Uni"], loadit=True) file.write("Loading CSVDataTable:\n\n%s\n" % db) # add new entry and find it row = {"Uni" : "ccb2158", "First_Name" : "Charles", "Last_Name": "Luca"} db.insert(row) file.write("Insert row: %s\n\n" % row) file.write("Row in table:\n\n%s\n" % db.find_by_template({"Uni" : "ccb2158"})) # try to insert duplicate entry file.write("Insert row: %s\n\n" % {"Uni" : "bn0523"}) try: db.insert(row) except Exception as e: file.write("%s\n" % str(e)) # remove rows added file.write("Delete by template: %s\n\n" % {"Uni" : "ccb2158"})
from CSVDataTable import CSVDataTable import json # Initialize the Batting table batting_csvtable = CSVDataTable("batting", "Batting.csv", ["playerID", "teamID", "yearID", "stint"]) batting_csvtable.load() # Test load # the specified primary key fields for the DataTable are not a subset of the columns in the underlying file/table # batting_csvtable = CSVDataTable("batting", "Batting.csv", ["playersff", "teamID", "yearID", "stint"]) # batting_csvtable.load() # Test find_by_primary_key # If the fields is None ans = batting_csvtable.find_by_primary_key(["abbeych01", "WAS", "1897", "1"]) print(json.dumps(ans, indent=2)) # If the fields is not None ans = batting_csvtable.find_by_primary_key(["abbeych01", "WAS", "1897", "1"], ["playerID", "lgID", "G", "BB", "SH"]) print(json.dumps(ans, indent=2)) # If the fields has values that don't exist in csv file, in the case, I use "CCCCC" to test my program ans = batting_csvtable.find_by_primary_key(["abbeych01", "WAS", "1897", "1"], ["playerID", "lgID", "G", "BB", "CCCCC"]) print(json.dumps(ans, indent=2)) # Test find_by_template # If the input t is correct and fields is None ans = batting_csvtable.find_by_template({"teamID": "CHN", "yearID": "2006"}) print(json.dumps(ans, indent=2)) # If the input t is correct and fields is not None ans = batting_csvtable.find_by_template({"teamID": "CHN", "yearID": "2006"}, ["playerID", "yearID", "teamID", "RBI", "G"])