Esempio n. 1
0
def t10():

    "Test the time oj JOIN with and without optimization!! "

    rows, cols = load("../CSVFile/People.csv")
    t = CSVDataTable.CSVDataTable(table_name="People", column_names=cols, primary_key_columns=['playerID'])
    t.import_data(rows)
    print("T = ", t)

    rows, cols = load("../CSVFile/BattingSmall.csv")
    t2 = CSVDataTable.CSVDataTable(table_name="BattingSmall", column_names=cols, primary_key_columns=['playerID', 'teamID', 'yearID', 'stint'])
    t2.import_data(rows)
    print("T = ", t)

    print("\n\n With optimization\n")
    start_time = time.time()
    for i in range(0,1):
        j = t2.join(t, ['playerID'],
                    w_clause={"People.nameLast": "Williams", "People.birthCity": "San Diego"},
                    p_clause=['playerID', "People.nameLast", "People.nameFirst", "BattingSmall.teamID",
                              "BattingSmall.yearID", "BattingSmall.stint", "BattingSmall.H", "BattingSmall.AB"],
                    optimize=True)
    end_time = time.time()
    print("Elapsed time = ", end_time-start_time)

    print("\n\n Without optimization\n")
    start_time = time.time()
    for i in range(0, 1):
        j = t2.join(t, ['playerID'],
                    w_clause={"People.nameLast": "Williams", "People.birthCity": "San Diego"},
                    p_clause=['playerID', "People.nameLast", "People.nameFirst", "BattingSmall.teamID",
                              "BattingSmall.yearID", "BattingSmall.stint", "BattingSmall.H", "BattingSmall.AB"],
                    optimize=False)
    end_time = time.time()
    print("Elapsed time = ", end_time - start_time)
Esempio n. 2
0
def t4():
    rows, cols = load(
        "/Users/wangxinquan/Desktop/2sem/W4111Database/xw2566_HW3/Data/People.csv"
    )
    t = CSVDataTable.CSVDataTable(table_name="People",
                                  column_names=cols,
                                  primary_key_columns=['playerID'])
    t.import_data(rows)
    print("T = ", t)

    rows, cols = load(
        "/Users/wangxinquan/Desktop/2sem/W4111Database/xw2566_HW3/Data/Batting.csv"
    )
    t2 = CSVDataTable.CSVDataTable(
        table_name="Batting",
        column_names=cols,
        primary_key_columns=['playerID', 'teamID', "yearID", 'stint'])
    t2.import_data(rows)
    print("T = ", t2)

    j = t2.join(t, ['playerID'],
                w_clause={
                    "People.nameLast": "Williams",
                    "People.birthCity": "San Diego"
                },
                p_clause=[
                    'playerID', "People.nameLast", "People.nameFirst",
                    "Batting.teamID", "Batting.yearID", "Batting.stint",
                    "Batting.H", "Batting.AB"
                ],
                optimize=True)
    print("Result = ", j)
    print("All rows =", json.dumps(j.get_rows(), indent=2))
Esempio n. 3
0
def t4():
    """
    Test join function.
    :return:
    """

    print("test JOIN function. ")

    rows, cols = load("../CSVFile/People.csv")
    t = CSVDataTable.CSVDataTable(table_name="People", column_names=cols, primary_key_columns=['playerID'])
    t.import_data(rows)
    print("T = ", t)

    rows, cols = load("../CSVFile/BattingSmall.csv")
    t2 = CSVDataTable.CSVDataTable(table_name="BattingSmall", column_names=cols, primary_key_columns=['playerID', 'teamID', 'yearID', 'stint'])
    t2.import_data(rows)
    print("T = ", t)

    j = t2.join(t, ['playerID'],
                w_clause={"People.nameLast": "Williams", "People.birthCity": "San Diego"},
                p_clause=['playerID', "People.nameLast", "People.nameFirst", "BattingSmall.teamID",
                          "BattingSmall.yearID", "BattingSmall.stint", "BattingSmall.H", "BattingSmall.AB"], optimize=True)

    print("Result = ", j)
    print(j.get_rows())
    print("All rows = ", json.dumps(j.get_rows(), indent=2))
Esempio n. 4
0
def test_save_and_load():
    print("\n\n*************TEST TEN, SAVE AND LOAD**************")
    rows, cols = load("CSVFile/offices.csv")

    print("\n ***READING CSV TO LOAD OFFICES.CSV")
    t=CSVDataTable.CSVDataTable(table_name = "Offices",column_names = cols, primary_key_columns=["officeCode"])
    t.import_data(rows)

    print("\n ***SAVING DOWN THE CSVDATATABLE")
    t.save()


    print("TABLE INFO AND ITS INDEXES\n")
    print(t)
    for k,v in t._indexes.items():
        print(v.to_json())

    print("\n ***LOADING CSV DATATABLE***")
    t2 = CSVDataTable.CSVDataTable(table_name = "Offices",loadit=True)

    print("\n **TABLE LOADED CONFIRM THEY ARE THE SAME**\n")
    print(t2)
    print("\n ***CONFIRM INDEX INTEGRITY***")
    for k,v in t2._indexes.items():
        print(v.to_json())
Esempio n. 5
0
def t1():
    """
    Test import_data and find_by_template functions.
    :return:
    """
    print("Test import_data(which including add_rows(), and test find_by_tmp not use index. ")

    rows, cols = load("../CSVFile/People.csv")
    t = CSVDataTable.CSVDataTable(table_name="People", column_names=cols, primary_key_columns=['playerID'])
    t.import_data(rows)
    print("T = ", t)

    tmp = {'playerID': 'willite01'}

    start = time.time()
    for i in range(0, 1000):
        r = t.find_by_template(tmp, fields=None, use_index=False).get_rows()
        if i == 0:
            print("Row = ", r)

    end = time.time()
    elapsed = end - start
    print("Time : ", elapsed)

    t.save()
Esempio n. 6
0
def test_index_or_no_index():
    logging.debug("-------- test_index_or_no_index ---------")

    rows, cols = load(
        "/home/zhida/Desktop/Database/code/HW3/CSVFile/beach-weather-stations-automated-sensors-1.csv"
    )
    t = CSVDataTable.CSVDataTable(table_name="automated-sensors",
                                  column_names=cols,
                                  primary_key_columns=["Measurement ID"],
                                  loadit=None)
    t.import_data(rows=rows)
    t.add_index("Station Name", ["Station Name"], "INDEX")
    t.add_index("Wet Bulb Temperature", ["Wet Bulb Temperature"], "INDEX")
    print("\n\n\n")

    time1 = time.time()
    for i in range(1000):
        t.find_by_template(tmp={"Wet Bulb Temperature": '7.0'},
                           use_index=False,
                           debug_log=False)
    time2 = time.time()
    print("[-No index-] -> run find_by_template(), 1000 iterations\n")
    print("takes time = ", str(time2 - time1) + " second\n")

    time1 = time.time()
    for i in range(1000):
        t.find_by_template(tmp={"Wet Bulb Temperature": '7.0'},
                           use_index=True,
                           debug_log=False)
    time2 = time.time()
    print("[-Use index-] -> run find_by_template(), 1000 iterations\n")
    print("takes time = ", str(time2 - time1) + " second")
Esempio n. 7
0
def t2():
    """
    Test add_index function, use index.
    :return:
    """

    print("Test add_index function, find_by_template use index.")

    rows, cols = load("../CSVFile/People.csv")
    t = CSVDataTable.CSVDataTable(table_name="People", column_names=cols, primary_key_columns=['playerID'])
    t.import_data(rows)
    print("T = ", t)

    tmp = {'playerID': 'willite01', "nameLast": "Williams"}
    t.add_index("LASTNAME", ['nameLast', 'nameFirst'], "INDEX")

    start = time.time()

    for i in range(0, 1000):
        r = t.find_by_template(tmp, fields=None, use_index=True).get_rows()
        if i == 0:
            print("Row = ", r)

    end = time.time()
    elapsed = end - start
    print("Time : ", elapsed)
Esempio n. 8
0
def test_find():
    print("\n\n*************TEST TWO,  FIND BY TEMPLATE AND COMPARE RUN TIME WITH INDEX vs WITHOUT**************")
    tmp = {"playerID":"willite01"}
    rows, cols = load("CSVFile/people.csv")
    t=CSVDataTable.CSVDataTable(table_name = "People",column_names = cols, primary_key_columns=['playerID'])
    t.import_data(rows)

    print("\n***TESTING RUN WITHOUT INDEX***")
    start = time.time()
    for i in range(0,5):
        r = t.find_by_template(tmp,fields=None,use_index=False)
        if i ==0:
            print("Row = ",r)
    end = time.time()
    elapsed = end - start
    print("Elapsed time=", elapsed)

    print("\n***TESTING RUNNING WITH INDEX***")
    start = time.time()
    for i in range(0,5):
        r=t.find_by_template(tmp,fields=None,use_index=True)
        if i==0:
            print("Row = ",r)
    end = time.time()
    elapsed = end-start
    print("Elapsed time = ",elapsed)
Esempio n. 9
0
def test_save_db4():

    logging.debug("-------- test_save_db ---------")
    rows, cols = load(
        "/home/zhida/Desktop/Database/code/HW3/CSVFile/beach-weather-stations-automated-sensors-1.csv"
    )
    t = CSVDataTable.CSVDataTable(table_name="automated-sensors",
                                  column_names=cols,
                                  primary_key_columns=["Measurement ID"],
                                  loadit=None)
    t.import_data(rows=rows)
    t.add_index("Station Name", ["Station Name"], "INDEX")
    t.save()
    print("save DB success!\n")
    print("\n\n\n\n\n\n")

    time1 = time.time()
    for i in range(100):
        t.find_by_template(tmp={
            "Station Name": "Oak Street Weather Station",
            "Wet Bulb Temperature": 55
        },
                           use_index=False)
    time2 = time.time()
    print("takes time = ", str(time2 - time1) + " second")

    time1 = time.time()
    for i in range(100):
        t.find_by_template(tmp={
            "Station Name": "Oak Street Weather Station",
            "Wet Bulb Temperature": 55
        },
                           use_index=True)
    time2 = time.time()
    print("takes time = ", str(time2 - time1) + " second")
Esempio n. 10
0
def test_insert_and_import_data():
    print("\n\n*************TEST ONE, TEST LOAD FUNCTION**************")
    print("*** HERE WE TEST THE IMPORT_DATA FUNCTION WHICH LEVEREAGES INSERT. LATER WE ALSO TEST LOADIT = TRUE AND THE LOAD FUNCTION***")
    rows, cols = load("CSVFile/people.csv")
    t=CSVDataTable.CSVDataTable(table_name = "People",column_names = cols, primary_key_columns=['playerID'])
    t.import_data(rows)
    print("T = ",t)
Esempio n. 11
0
def test_specific_project():
    rows, cols = load("CSVFile/People.csv")
    t=CSVDataTable.CSVDataTable(table_name = "People",column_names = cols, primary_key_columns=["playerID"])
    t.import_data(rows)
    print("T = ",t)

    p = ["playerID","People.nameLast","Batting.H"]
    print("Specific template = ", t._get_specific_project())
Esempio n. 12
0
def test_index():
    print("\n\n*************TEST FOUR CREATING AN INDEX**************")
    t = CSVDataTable.CSVDataTable(table_name="Test",column_names=['last_name','first_name','uni'],loadit=None)
    i = CSVDataTable.Index(index_name="Bob", index_columns = ["last_name","first_name"],kind="INDEX", table = t)
    print(i)
    r = {"last_name": "Ferguson", "first_name": "Donald", "uni":"sure"}
    kv = i.compute_key(r)
    i.add_to_index(row=r,rid="2")
    print("KV = ",kv, "added to index")
Esempio n. 13
0
def t9():

    "Test delete function. Check if row id is correct after deletion. "
    rows, cols = load("../CSVFile/rings.csv")
    t = CSVDataTable.CSVDataTable(table_name="Rings", column_names=cols, primary_key_columns=['uni'])
    t.import_data(rows)
    r = t.delete({"last_name": "Baggins"})
    print("delete:", r)
    print("\n\nafter delete, T = ", t)
Esempio n. 14
0
def test_add_index():
    print("\n\n*************TEST SIX, CREATE WITH INDEX AND THEN ADD ANOTHER INDEX**************")
    rows, cols = load("CSVFile/offices.csv")
    t=CSVDataTable.CSVDataTable(table_name = "Offices",column_names = cols, primary_key_columns=["officeCode"])
    print("\n***ORIGINALLY ***\n")
    print(t)
    cols = ["city","state","postalCode"]
    t.add_index(name="location",column_list=cols,kind="INDEX")
    print("\n***ADDING ANOTHER INDEX, LOCATION ***\n")
    print(t)
Esempio n. 15
0
def test_incomplete_index_info():
    ###CHECK FOR not complete index info ADD_INDEX FUNCTION 
    print("\n\n*************TEST EIGHT, NOT ENOUGH INFORMATION TO CREATE INDEX THROWS AN ERROR**************")
    rows, cols = load("CSVFile/offices.csv")
    t=CSVDataTable.CSVDataTable(table_name = "Offices",column_names = cols, primary_key_columns=["officeCode"])
    cols = ["city","state","postalCode"]
    try:
        t.add_index("location",cols)
    except Exception as e:
        print("\n We caught an error, you did  not provide enough information to create the key" + "\n",e)
Esempio n. 16
0
def t8():
    rows, cols = load(
        "/Users/wangxinquan/Desktop/2sem/W4111Database/xw2566_HW3/Data/offices.csv"
    )
    t = CSVDataTable.CSVDataTable(table_name="offices",
                                  column_names=cols,
                                  primary_key_columns=['officeCode'])

    t.import_data(rows)

    t.save()
Esempio n. 17
0
def test_join():
    ###CHECK FOR not complete index info ADD_INDEX FUNCTION 
    print("\n\n*************TEST ELEVEN JOIN AND ALSO SEE THAT IT SWAPS TABLESX**************")
    rows, cols = load("CSVFile/People.csv")
    t=CSVDataTable.CSVDataTable(table_name = "People",column_names = cols, primary_key_columns=["playerID"])
    t.import_data(rows)
    print("T = ",t)

    rows, cols = load("CSVFile/Batting.csv")
    t2=CSVDataTable.CSVDataTable(table_name = "Batting",column_names = cols, primary_key_columns=["playerID","teamID","yearID","stint"])
    t2.import_data(rows)
    print("T2 = ",t2)

    j = t2.join(t, ['playerID'],
        w_clause ={"People.nameLast":"Williams", "People.birthCity": "San Diego"},
        p_clause =['playerID',"People.nameLast","People.nameFirst","Batting.teamID","Batting.yearID","Batting.stint","Batting.H","Batting.AB"]
        ,optimize = True)

    print("Result = " ,j)
    print("All rows = ", json.dumps(j._rows, indent=2))
Esempio n. 18
0
def test2():
    new_r, cols = load(
        "/Users/wangxinquan/Desktop/2sem/W4111Database/xw2566_HW3/Data/offices.csv"
    )
    t = CSVDataTable.CSVDataTable(table_name="offices.",
                                  column_names=cols,
                                  primary_key_columns=['officeCode'],
                                  loadit=None)

    t.import_data(new_r)
    print("t = ", t)
Esempio n. 19
0
def test_conflict():
    print("\n\n*************TEST FIVE CREATING CONFLICTING INDEX THROWS ERROR WHEN UNIQUE**************")
    t= CSVDataTable.CSVDataTable(table_name="Test",column_names=['last_name','first_name','uni'],loadit=None)
    i = CSVDataTable.Index(index_name="Bob", index_columns = ["last_name","first_name"],kind="UNIQUE",table=t)
    r = {"last_name": "Ferguson", "first_name": "Donald", "uni":"sure"}
    kv = i.compute_key(r)
    i.add_to_index(row=r,rid="2")
    try:
        i.add_to_index(row=r, rid="3")
    except Exception as e:
        print("We caught an error: ", e)
Esempio n. 20
0
def test_create_table():

    logging.debug("-------- test_create_table ---------")
    t = CSVDataTable.CSVDataTable(
        table_name="students",
        column_names=["uni", "first_name", "last_name"],
        primary_key_columns=["uni"],
        loadit=None)

    print(t)
    print("\n\n\n\n\n\n")
Esempio n. 21
0
def t3():
    rows, cols = load(
        "/Users/wangxinquan/Desktop/2sem/W4111Database/xw2566_HW3/Data/Batting.csv"
    )
    t2 = CSVDataTable.CSVDataTable(
        table_name="Batting",
        column_names=cols,
        primary_key_columns=['playerID', 'teamID', "yearID", 'stint'])
    t2.import_data(rows)
    print("T = ", t2)
    tmp = {"last_name": "Baggins", "first_name": "Bilbo"}
    print("Specific template = ", t2._get_specific_where(tmp))
Esempio n. 22
0
def t9():
    rows, cols = load(
        "/Users/wangxinquan/Desktop/2sem/W4111Database/xw2566_HW3/Data/offices.csv"
    )
    t = CSVDataTable.CSVDataTable(table_name="offices",
                                  column_names=cols,
                                  primary_key_columns=['officeCode'])

    t.import_data(rows)

    r = t.delete_by_template({"officeCode": "11"})
    print(r)
Esempio n. 23
0
def test_load_data():

    logging.debug("-------- test_load_data ---------")
    rows, cols = load("/home/zhida/Desktop/Database/code/HW3/CSVFile/city.csv")
    t = CSVDataTable.CSVDataTable(table_name="city",
                                  column_names=cols,
                                  primary_key_columns=["id"],
                                  loadit=None)
    t.import_data(rows=rows)

    print("t = ", json.dumps(t._rows, indent=2))
    print("\n\n\n\n\n\n")
Esempio n. 24
0
def test_save_db2():

    logging.debug("-------- test_save_db ---------")
    rows, cols = load("/home/zhida/Desktop/Database/code/HW3/CSVFile/city.csv")
    t = CSVDataTable.CSVDataTable(table_name="city",
                                  column_names=cols,
                                  primary_key_columns=["id"],
                                  loadit=None)
    t.import_data(rows=rows)
    t.save()
    print("save DB success!\n")
    print("\n\n\n\n\n\n")
Esempio n. 25
0
def t5():

    rows, cols = load(
        "/Users/wangxinquan/Desktop/2sem/W4111Database/xw2566_HW3/Data/People.csv"
    )
    t = CSVDataTable.CSVDataTable(table_name="People",
                                  column_names=cols,
                                  primary_key_columns=['playerID'])
    t.import_data(rows)
    print("T = ", t)

    p = ["playerID", "People.nameLast", "Batting.H"]

    print("Specific template = ", t._get_specific_project(p))
Esempio n. 26
0
def t5():
    """
    Test get_specific_project function
    :return:
    """
    print("Test get_specific_project function.")

    rows, cols = load("../CSVFile/People.csv")
    t = CSVDataTable.CSVDataTable(table_name="People", column_names=cols, primary_key_columns=['playerID'])
    t.import_data(rows)
    print("T = ", t)

    p = ["playerID", "People.nameLast", "Batting.H"]
    print("Specific template = ", t._get_specific_project(p))
Esempio n. 27
0
def test_delete():
    print("\n\n*************TEST NINE, DELETE WORKS SUCCESSFULLY**************")
    rows, cols = load("CSVFile/offices.csv")
    t=CSVDataTable.CSVDataTable(table_name = "Offices",column_names = cols, primary_key_columns=["officeCode"])
    t.import_data(rows)
    print("\n **ORIGINAL META DATA BEFORE WE RUN DELETE, 10 ROWS**\n")
    print(t)
    
    print("\n **DELETING CAMBRIDGE**\n")
    r = t.delete({"city":"Cambridge"})


    print("\n **DATA AFTER DELETING CAMBRIDGE, 9 ROWS**\n")
    print(t)
Esempio n. 28
0
def make_test_table2(b_index=True):
    t = CSVDataTable.CSVDataTable(table_name="teams",
                                  column_names=["uni", "bat", "teamID"],
                                  primary_key_columns=["uni"],
                                  loadit=None)
    t.insert({"uni": "zz", "bat": 2, "teamID": "spurs"})
    t.insert({"uni": "yy", "bat": 3, "teamID": "spurs"})
    t.insert({"uni": "xx", "bat": 2, "teamID": "lakers"})

    if b_index:
        index_columns = ["teamID"]
        t.add_index(index_name="Team", column_list=index_columns, kind="INDEX")

    return t
Esempio n. 29
0
def test_duplicate_index_name():
    print("\n\n*************TEST SEVEN, CREATING AN INDEX WITH A DUPLICATE NAME, AND CREATING A SECOND PRIMARY KEY THROWS AN ERROR**************")
    rows, cols = load("CSVFile/offices.csv")
    t=CSVDataTable.CSVDataTable(table_name = "Offices",column_names = cols, primary_key_columns=["officeCode"])
    print("\n***ORIGINAL ***\n")
    print(t)
    cols = ["city","state","postalCode"]
    try:
        t.add_index("PRIMARY",['city'],"INDEX")
    except Exception as e:
        print("*****You tried to add a duplicate name index*****"+ "\n",e)
    try:
        t.add_index("RANDOM",['state'],"PRIMARY")
    except Exception as e:
        print("\n*****You tried to make more than one primary keys*****"+ "\n",e)
Esempio n. 30
0
def t8():

    print("Test insert() function. ")

    rows, cols = load("../CSVFile/rings.csv")
    t = CSVDataTable.CSVDataTable(table_name="Rings", column_names=cols, primary_key_columns=['uni'])
    t.import_data(rows)
    t.save()
    print("Before insert T = ", t)
    new_row = {"last_name": "LoL", "first_name": "New", "uni": "LN11"}
    t.insert(new_row)
    print("After inset()\n T = ", t)
    print("Find the inseted row:")
    tmp = {"last_name": "LoL", "first_name": "New"}
    r = t.find_by_template(tmp, fields=None, use_index=True).get_rows()
    print(r)