def test_create_it():
    tbl = CSVDataTable(
        "People", {
            "directory":
            "/Users/wangxinquan/Desktop/2sem/W4111Database/HW1_xw2566/data",
            "file_name": "People.csv"
        }, ['playerID'], None)
    tbl.load()
    print("First table = ", tbl)
def test_matches():
    tbl = CSVDataTable(
        "People", {
            "directory":
            "/Users/wangxinquan/Desktop/2sem/W4111Database/HW1_xw2566/data",
            "file_name": "People.csv"
        }, ['playerID'], None)
    tbl.load()
    print("First table = ", tbl)
    tmp = {"nameLast": "Aardsma", "nameFirst": "Dave"}
    result = tbl.matches_template(tmp, tbl._rows[0])
    print("Match = ", result)
def test_key():

    tbl = CSVDataTable(
        "People", {
            "directory":
            "/Users/wangxinquan/Desktop/2sem/W4111Database/HW1_xw2566/data",
            "file_name": "People.csv"
        }, ['playerID'], None)
    tbl.load()
    print("First table = ", tbl)

    result = tbl.find_by_primary_key(['willite01'],
                                     field_list=['playerID', 'nameLast'])
    print("\n\nResult = ", result)
Ejemplo n.º 4
0
 def setUp(self):
       connect_info = {
              'directory': '../data/',
              'file_name': 'demographics_by_zip.csv'
          }
     
       self.cnx = {
              'directory': '../data/',
              'file_name': 'unit_test_temp.csv'
          }        
      
       copyfile(connect_info['directory'] + connect_info['file_name'], connect_info['directory'] + 'unit_test_temp.csv')
       
       self.worker_test= CSVDataTable('stats_by_zip_test',connect_info=self.cnx,key_columns=['zipcode'])      
       self.worker_test.load()
def test_tmp1():

    tbl = CSVDataTable(
        "People", {
            "directory":
            "/Users/wangxinquan/Desktop/2sem/W4111Database/HW1_xw2566/data",
            "file_name": "People.csv"
        }, ['playerID'], None)
    tbl.load()
    print("First table = ", tbl)
    tmp = {"nameLast": "Williams", "throws": "R"}
    result = tbl.find_by_template(
        tmp, field_list=['playerID', 'nameLast', 'birthCity', 'throws'])
    print("\n\n After find by template, Result = ", result)

    tmp2 = {'birthCity': 'San Diego'}
    result2 = result.find_by_template(tmp2)
    print("\n\n After find by template find by template, Result = ", result2)
Ejemplo n.º 6
0
def top_ten_hitters_csv():
    # init the files
    batting_file = CSVDataTablePkg.CSVDataTable("Batting", "Batting.csv",
                                                ["playerID", "teamID", "yearID", "stint"]);
    batting_file.load();
    people_file = CSVDataTablePkg.CSVDataTable("People", "People.csv", ["playerID"]);
    people_file.load();

    # save the processed ids
    processed_player_ids = list();
    # save the record
    records = list();
    # check the unique playerID in Batting
    for row in batting_file.table_data:
        play_id = row[PLAYER_ID];
        if play_id not in processed_player_ids:
            processed_player_ids.append(play_id);
            result_batting = batting_file.find_by_template({PLAYER_ID: play_id}, [YEAR_ID, BAT, HIT]);
            # check yearID>=1960 for this player
            flag = False;
            if len(result_batting) > 0:
                total_bats = 0;
                total_hits = 0;
                for r in result_batting:
                    if int(r[YEAR_ID]) >= 1960:
                        flag = True;
                    # compute total AB and total H
                    total_bats += int(r[BAT]);
                    total_hits += int(r[HIT]);
            # if this is a valid playerID
            if flag:
                # compute the batting average
                if total_bats > 200:
                    batting_avg = total_hits / total_bats;
                    # look up for the name
                    result_name = people_file.find_by_primary_key([play_id], [PLAYER_ID, NAME_FIRST, NAME_LAST]);
                    first_name = result_name[0][NAME_FIRST];
                    last_name = result_name[0][NAME_LAST];
                    record = dict(zip([PLAYER_ID, NAME_FIRST, NAME_LAST, TOTAL_BAT, TOTAL_HIT, BATTING_AVG],
                                      [play_id, first_name, last_name, total_bats, total_hits, batting_avg]));
                    records.append(record);

    sorted_records = sorted(records, key=lambda k: k[BATTING_AVG], reverse=True);
    return sorted_records[0:10];
def test_update():
    tbl = CSVDataTable(
        "offices", {
            "directory":
            "/Users/wangxinquan/Desktop/2sem/W4111Database/HW1_xw2566/data",
            "file_name": "offices.csv"
        }, ['officeCode'], None)
    tbl.load()

    tmp = {'city': "Paris"}
    new_v = {"officeCode": 13, "state": "Mars", "country": "Jupiter"}

    tbl.update_by_template(tmp, new_v)

    result = tbl.find_by_template(tmp)
    print("\n\nAfter update ... ", result)
def test_delete():

    tbl = CSVDataTable(
        "offices", {
            "directory":
            "/Users/wangxinquan/Desktop/2sem/W4111Database/HW1_xw2566/data",
            "file_name": "offices.csv"
        }, ['officeCode'], None)
    tbl.load()
    print("First table = ", tbl)

    tmp = {"city": "Paris"}
    result1 = tbl.find_by_template(tmp)
    print("\n\nbefore delete =", result1)

    result = tbl.delete_by_template(tmp)
    print("\n\nI deleted ... ", result, "rows")

    result2 = tbl.find_by_template(tmp)
    print("\n\nafter delete =", result2)
def test_insert():

    tbl = CSVDataTable(
        "offices", {
            "directory":
            "/Users/wangxinquan/Desktop/2sem/W4111Database/HW1_xw2566/data",
            "file_name": "offices.csv"
        }, ['officeCode'], None)
    tbl.load()
    print("First table = ", tbl)

    tmp = {"city": "Tokyo"}
    result1 = tbl.find_by_template(tmp)
    print("\n\nbefore insert =", result1)

    new_r = {'officeCode': '300', 'city': 'Tokyo'}
    result = tbl.insert(new_r)

    result2 = tbl.find_by_template(tmp)
    print("\n\nafter insert =", result2)
Ejemplo n.º 10
0
# Charles Blackmon-Luca, ccb2158

import sys

sys.path.append("../src/")

from CSVDataTable import CSVDataTable

with open("../test_output/fbt_test.txt", "w+") as file:

    # open csv as table
    db = CSVDataTable("People", key_columns=["playerID"], loadit=True)
    file.write("Loading CSVDataTable:\n\n%s\n" % db)
    
    # find all 1871ers
    tmp = {"birthYear" : "1871"}
    file.write("Find by template : %s\n\n" % tmp)
    db = db.find_by_template(tmp)
    file.write("Result:\n\n%s\n" % db)
    
    # limit our fields list
    fields = ["nameFirst", "nameLast", "deathYear"]
    file.write("Limit to fields : %s\n\n" % fields)
    db = db.find_by_template({}, field_list=fields)
    file.write("Result:\n\n%s\n" % db)
Ejemplo n.º 11
0
# Charles Blackmon-Luca, ccb2158

import sys
from time import time

sys.path.append("../src/")

from CSVDataTable import CSVDataTable

with open("../test_output/index_test.txt", "w+") as file:

    # open csv as table
    db = CSVDataTable("People", key_columns=["playerID"], loadit=True)
    file.write("Loading CSVDataTable:\n\n%s\n" % db)

    # add year index
    db.add_index("birthYear", "INDEX", ["birthYear"])
    file.write("New index: %s\n" % db._indexes["birthYear"])

    # compare FBT with and without index
    tmp = {"birthYear": "1971", "birthCity": "Denver"}
    file.write("FBT using template: %s\n" % tmp)
    start = time()
    for i in range(1000):
        db.find_by_template(tmp, index_allowed=False)
    end = time()
    file.write("Time to execute 1000 FBTs without indexes: %s s\n" %
               (end - start))
    start = time()
    for i in range(1000):
        db.find_by_template(tmp, index_allowed=True)
Ejemplo n.º 12
0
class TestStringMethods(unittest.TestCase):

   def setUp(self):
         connect_info = {
                'directory': '../data/',
                'file_name': 'demographics_by_zip.csv'
            }
       
         self.cnx = {
                'directory': '../data/',
                'file_name': 'unit_test_temp.csv'
            }        
        
         copyfile(connect_info['directory'] + connect_info['file_name'], connect_info['directory'] + 'unit_test_temp.csv')
         
         self.worker_test= CSVDataTable('stats_by_zip_test',connect_info=self.cnx,key_columns=['zipcode'])      
         self.worker_test.load()

   def test_wrong_keys_in_query_raises_error(self):
        with self.assertRaises(ValueError):
            self.worker_test.find_by_primary_key([10003,1])
            
   def test_find_by_keys_works(self):
        self.assertEqual(self.worker_test.find_by_primary_key(['10004'],field_list=['zipcode'])._rows,[{'zipcode': '10004'}])
            
   def test_find_by_template_works(self):
        self.assertEqual(self.worker_test.find_by_template({'zipcode': '10004'},
                                                              field_list=['cnt_participants'])._rows,[{'cnt_participants': '0'}])
   
   def test_delete_by_template_works(self):
        self.worker_test.delete_by_template({'zipcode': '10007'})
        out = self.worker_test.find_by_template({'zipcode': '10007'})
        self.assertEqual(None, out)
        
   def test_delete_by_key_works(self):
        self.worker_test.delete_by_key(['10005'])
        out = self.worker_test.find_by_primary_key(['10005'])
        self.assertEqual(None, out)
    
   def test_update_by_template_works(self):
        self.worker_test.update_by_template({'zipcode': '10006'},{'cnt_participants':'3'})
        out = self.worker_test.find_by_template({'zipcode': '10006'},field_list=['cnt_participants'])
        self.assertEqual([{'cnt_participants': '3'}], out._rows)
              
   def test_update_by_key_works(self):
        self.worker_test.update_by_key(['10006'],{'cnt_participants':'6'})
        out = self.worker_test.find_by_primary_key(['10006'],field_list=['cnt_participants'])
        self.assertEqual([{'cnt_participants': '6'}], out._rows)
        
   def test_none_returned_on_no_result_query(self):
        self.assertEqual(self.worker_test.find_by_primary_key(['-99999']),None)
        
   def test_invalid_field_name(self):
        with self.assertRaises(ValueError):
            self.worker_test.find_by_template({'zipcode': '10004'},field_list=['test'])

   def test_not_implemented_error(self):
        with self.assertRaises(NotImplementedError):
            self.worker_test.find_by_template({'zipcode': '10004'},field_list=['zipcode']).find_by_primary_key([-99999])

   def tearDown(self):
        os.remove(self.cnx['directory'] + self.cnx['file_name'])
Ejemplo n.º 13
0
# Charles Blackmon-Luca, ccb2158

import sys

sys.path.append("../src/")

from CSVDataTable import CSVDataTable

with open("../test_output/insert_test.txt", "w+") as file:

    # open csv as table
    db = CSVDataTable("ssol_dummy", key_columns=["Uni"], loadit=True)
    file.write("Loading CSVDataTable:\n\n%s\n" % db)
    
    # add new entry and find it
    row = {"Uni" : "ccb2158",
           "First_Name" : "Charles",
           "Last_Name": "Luca"}
    db.insert(row)
    file.write("Insert row: %s\n\n" % row)
    file.write("Row in table:\n\n%s\n" % db.find_by_template({"Uni" : "ccb2158"}))
    
    # try to insert duplicate entry
    file.write("Insert row: %s\n\n" % {"Uni" : "bn0523"})
    try:
        db.insert(row)
    except Exception as e:
        file.write("%s\n" % str(e))
        
    # remove rows added
    file.write("Delete by template: %s\n\n" % {"Uni" : "ccb2158"})
Ejemplo n.º 14
0
from CSVDataTable import CSVDataTable
import json

# Initialize the Batting table
batting_csvtable = CSVDataTable("batting", "Batting.csv", ["playerID", "teamID", "yearID", "stint"])
batting_csvtable.load()

# Test load
# the specified primary key fields for the DataTable are not a subset of the columns in the underlying file/table
# batting_csvtable = CSVDataTable("batting", "Batting.csv", ["playersff", "teamID", "yearID", "stint"])
# batting_csvtable.load()

# Test find_by_primary_key
# If the fields is None
ans = batting_csvtable.find_by_primary_key(["abbeych01", "WAS", "1897", "1"])
print(json.dumps(ans, indent=2))
# If the fields is not None
ans = batting_csvtable.find_by_primary_key(["abbeych01", "WAS", "1897", "1"], ["playerID", "lgID", "G", "BB", "SH"])
print(json.dumps(ans, indent=2))
# If the fields has values that don't exist in csv file, in the case, I use "CCCCC" to test my program
ans = batting_csvtable.find_by_primary_key(["abbeych01", "WAS", "1897", "1"], ["playerID", "lgID", "G", "BB", "CCCCC"])
print(json.dumps(ans, indent=2))


# Test find_by_template
# If the input t is correct and fields is None
ans = batting_csvtable.find_by_template({"teamID": "CHN", "yearID": "2006"})
print(json.dumps(ans, indent=2))
# If the input t is correct and fields is not None
ans = batting_csvtable.find_by_template({"teamID": "CHN", "yearID": "2006"}, ["playerID", "yearID", "teamID",
                                                                              "RBI", "G"])