Esempio n. 1
0
def test_rename_preserves_data():
    '''
    assert that renaming a column preserves the data
    '''
    df = Relation("../country.csv")
    renamed_df = df.rename("governmentform", "formofgov")
    assert df["governmentform"].equals(renamed_df["formofgov"])
Esempio n. 2
0
def test_cartesian_product2():
    data_real1 = {'student': ['Abby', 'Billy', 'Carson']}
    data_real2 = {
        'grade': ['A', 'A', 'B'],
        'course': ['Math', 'Science', 'Math']
    }
    df1 = pd.DataFrame(data=data_real1)
    df2 = pd.DataFrame(data=data_real2)
    r1 = Relation(df1)
    r2 = Relation(df2)
    r = r1.cartesian_product(r2)

    data_expected = {
        'student': [
            'Abby', 'Abby', 'Abby', 'Billy', 'Billy', 'Billy', 'Carson',
            'Carson', 'Carson'
        ],
        'grade': ['A', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'B'],
        'course': [
            'Math', 'Science', 'Math', 'Math', 'Science', 'Math', 'Math',
            'Science', 'Math'
        ]
    }
    df_expected = pd.DataFrame(data=data_expected)
    r_expected = Relation(df_expected)

    assert r.equals(r_expected)
Esempio n. 3
0
def test_cartesian_product1():
    data_real1 = {
        'country': ['USA', 'Canada', 'France'],
        'continent': ['North America', 'North America', 'Europe']
    }
    data_real2 = {'gnp': [1234, 5678], 'population': [100, 250]}
    df1 = pd.DataFrame(data=data_real1)
    df2 = pd.DataFrame(data=data_real2)
    r1 = Relation(df1)
    r2 = Relation(df2)
    r = r1.cartesian_product(r2)

    data_expected = {
        'country': ['USA', 'USA', 'Canada', 'Canada', 'France', 'France'],
        'continent': [
            'North America', 'North America', 'North America', 'North America',
            'Europe', 'Europe'
        ],
        'gnp': [1234, 5678, 1234, 5678, 1234, 5678],
        'population': [100, 250, 100, 250, 100, 250]
    }
    df_expected = pd.DataFrame(data=data_expected)
    r_expected = Relation(df_expected)

    assert r.equals(r_expected)
Esempio n. 4
0
def test_product_1():
    data_expected = {
        'animal_x': ['zebra', 'zebra', 'x-ray fish', 'x-ray fish'],
        'name': ['adam', 'adam', 'dina', 'dina'],
        'color': ['red', 'red', 'purple', 'purple'],
        'animal_y': ['zebra', 'x-ray fish', 'zebra', 'x-ray fish'],
        'age': [7, 678, 7, 678]
    }

    df_expected = pd.DataFrame(data=data_expected)
    r_expected = Relation(df_expected)

    data_real_table1 = {
        'animal': ['zebra', 'x-ray fish'],
        'name': ['adam', 'dina'],
        'color': ['red', 'purple']
    }
    df_table1 = pd.DataFrame(data=data_real_table1)
    r_table1 = Relation(df_table1)

    data_real_table2 = {'animal': ['zebra', 'x-ray fish'], 'age': [7, 678]}
    df_table2 = pd.DataFrame(data=data_real_table2)
    r_table2 = Relation(df_table2)

    r = r_table1.cartesian_product(r_table2)
    assert r.equals(r_expected)
Esempio n. 5
0
def test_groupby_2():
    data_expected = {
        'student': ['amanda', 'sam', 'tony'],
        'classes': ['science', 'chinese', 'math'],
        'grade': ['A', 'A', 'A']
    }

    df_expected = pd.DataFrame(data=data_expected)
    r_expected = Relation(df_expected)
    print(r_expected)

    data_real_table1 = {
        'student': ['amanda', 'sam', 'tony'],
        'classes': ['science', 'chinese', 'math']
    }
    df_table1 = pd.DataFrame(data=data_real_table1)
    r_table1 = Relation(df_table1)

    data_real_table2 = {'grade': ['A']}
    df_table2 = pd.DataFrame(data=data_real_table2)
    r_table2 = Relation(df_table2)

    r = r_table1.cartesian_product(r_table2)
    print(r)
    assert r.equals(r_expected)
Esempio n. 6
0
def test_rename_nonexistent_cols():
    '''
    assert that renaming a fake column returns the same dataframe as before and that a new empty column is not added
    '''
    df = Relation("../country.csv")
    renamed_df = df.rename("a_fake_column", "something_nice")
    assert "something_nice" not in renamed_df.columns
    assert df.equals(renamed_df)
Esempio n. 7
0
def test_union_remove_duplicates():
    '''
    assert union removes duplicate rows by comparing lengths of a dataframe against the length of one unioned with itself
    '''
    df = Relation("../country.csv")
    republic_df = df.query("governmentform == 'Republic'")
    union_on_itself_df = republic_df.union(republic_df)
    assert len(republic_df) == len(union_on_itself_df)
Esempio n. 8
0
def test_foo():
    df1 = Relation(
        pd.read_csv("tests/test_outer_join/test_outer_join_1.csv", sep="|"))
    df2 = Relation(
        pd.read_csv("tests/test_outer_join/test_outer_join_2.csv", sep="|"))
    df = df1.outerjoin(df2)
    df_expected = pd.read_csv("tests/test_outer_join/test_outer_join_3.csv")

    assert df.equals(df_expected)
Esempio n. 9
0
def test_union_on_continent():
    '''
    assert the length a union on two dataframes queried by continent is the same as the sum of the previous two lengths
    '''
    df = Relation("../country.csv")
    africa_df = df.query("continent == 'Africa'")
    europe_df = df.query("continent == 'Europe'")
    union_df = africa_df.union(europe_df)
    assert (len(africa_df) + len(europe_df)) == len(union_df)
Esempio n. 10
0
def test_antijoin(r):
    r = country.query('continent == "North America"').project(
        ['name', 'region']).antijoin(
            country.query('region == "Caribbean"').project(
                ['name', 'region'])).reset_index().drop(columns=["index"])
    data_expected = Relation("tests/antijoin.csv",
                             sep="|").reset_index().drop(columns=["index"])
    df_expected = pd.DataFrame(data=data_expected)
    r_expected = Relation(df_expected)
    assert r.equals(r_expected)
Esempio n. 11
0
def test_rename_removes_col():
    '''
    assert that renaming a column removes the first column header and adds the second
    '''
    df = Relation("../country.csv")
    renamed_df = df.rename("indepyear", "yearofindep")
    assert "indepyear" in df.columns
    assert "yearofindep" not in df.columns
    assert "indepyear" not in renamed_df.columns
    assert "yearofindep" in renamed_df.columns
Esempio n. 12
0
def test_rename_1():
    data_expected = {'color': ['green', 'blue']}
    df_expected = pd.DataFrame(data=data_expected)
    r_expected = Relation(df_expected)

    data_real = {'hello': ['green', 'blue']}
    df = pd.DataFrame(data=data_real)
    r = Relation(df)
    r = r.rename("hello", "color")

    assert r.equals(r_expected)
Esempio n. 13
0
def test_rename_2():
    data_expected = {'color': ['green', 'blue'], 'food': ['banana', 'cookie']}
    df_expected = pd.DataFrame(data=data_expected)
    r_expected = Relation(df_expected)

    data_real = {'color': ['green', 'blue'], 'hello': ['banana', 'cookie']}
    df = pd.DataFrame(data=data_real)
    r = Relation(df)
    r = r.rename("hello", "food")

    assert r.equals(r_expected)
Esempio n. 14
0
def test_foo():
    data_real = {"name": ["Carol", "Bob"], "age": [86, 4]}
    data_expected = {"name": ["Carol", "Bob"]}
    df = pd.DataFrame(data=data_real)
    df_expected = pd.DataFrame(data=data_expected)
    r = Relation(df)
    r_expected = Relation(df_expected)
    r_expected_2 = Relation('tests/test_project_expected_1.csv', sep='|')
    r2 = r.project(["name"])
    assert r2.equals(r_expected)
    assert r2.equals(r_expected_2)
Esempio n. 15
0
def test_groupby_1():
    data_expected = {'animal': ['x-ray fish', 'yak', 'zebra'],
                    'count_name': [1,2,1]}
    
    df_expected = pd.DataFrame(data=data_expected)
    r_expected = Relation(df_expected) 

    data_real = {'animal': ['zebra', 'yak', 'yak', 'x-ray fish'],
                'name': ['adam', 'bob', 'charlie', 'dina']}
    df = pd.DataFrame(data=data_real)
    r = Relation(df)

    r = r.groupby(['animal']).count('name')
    assert r.equals(r_expected)
Esempio n. 16
0
def r5():
    data_real = {
        "Class": ["History", "Art", "Drama", "History", "Art"],
        "Start Time": ["10:00", "9:00", "12:00", "13:00", "18:00"]
    }
    df05 = pd.DataFrame(data=data_real)
    return Relation(df05)
Esempio n. 17
0
def r3():
    data_real = {
        "name": ["Larry", "Bob", "Lucy"],
        "Birthyear": [1999, 2015, 1994]
    }
    df03 = pd.DataFrame(data=data_real)
    return Relation(df03)
Esempio n. 18
0
def r6():
    data_real = {
        "Year": [1996, 1996, 1997, 1998, 1998, 1998],
        "Revenue": [2000, 3400, 1200, 500, 650, 200]
    }
    df06 = pd.DataFrame(data=data_real)
    return Relation(df06)
Esempio n. 19
0
def test_select_2(r):
    r = r.select("height > 180")
    data_expected = {"name":["Changmin","Yunho"], "age":[31,33], "height": [186,184]}
    df_expected_2 = pd.DataFrame(data = data_expected)
    r_expected_2 = Relation("tests/test_select_expected_1.csv", sep="|")
    
    assert r.equals(r_expected_2)
Esempio n. 20
0
def test_foo(r):
    r = r.extend("birth", 2019 - r.age).project(["name", "age", "birth"]).head(10)
    data_expected = {"name": ["Carol", "Bob"], "age": [86, 4], "birth": [1933, 2015]}
    df_expected = pd.DataFrame(data=data_expected)
    r_expected = Relation(df_expected)

    assert r.equals(r_expected)
Esempio n. 21
0
def test_semi_join_right(r_1, r_2):

    data_expected = {'color': ['green', 'blue']}
    df_expected = pd.DataFrame(data=data_expected)
    r_expected = Relation(df_expected)
    r = r_2.semi_join(r_1)

    assert r.equals(r_expected)
Esempio n. 22
0
def test_groupby03(r6):
    data_expected = {
        "Year": [1996, 1997, 1998],
        "sum_Revenue": [5400, 1200, 1350]
    }
    df_expected = pd.DataFrame(data=data_expected)
    r_groupbyExpected = Relation(df_expected)
    assert r6.groupby("Year").sum("Revenue").equals(r_groupbyExpected)
Esempio n. 23
0
def makeContact():
    data4 = {
        "id": [0, 1],
        "email": ["*****@*****.**", "*****@*****.**"],
        "phone": ["555-107-1234", "868-402-7539"]
    }
    contact = pd.DataFrame(data=data4)
    return Relation(contact)
Esempio n. 24
0
def test_outerjoin01(r, r2):
    data_expected = {
        "name": ["Carol", "Bob", "Larry", "Lucy"],
        "age": [86, 4, 20, 25]
    }
    df_expected = pd.DataFrame(data=data_expected)
    r_expected = Relation(df_expected)
    assert r.outerJoin(r2).equals(r_expected)
Esempio n. 25
0
def test_semi_join_left(r_1, r_2):

    data_expected = {'color': ['red', 'purple']}
    df_expected = pd.DataFrame(data=data_expected)
    r_expected = Relation(df_expected)
    r = r_1.semi_join(r_2)

    assert r.equals(r_expected)
Esempio n. 26
0
def r():
    data_real = {
        "firstName": ["Alex", "John", "Clark", "Haley"],
        "degree": ["Math", "Drama", "Computer Science", "Math"],
        "age": [20, 21, 19, 19]
    }
    df = pd.DataFrame(data=data_real)
    return Relation(df)
Esempio n. 27
0
def r1():
    data_real = {
        "name": ["Jaejoong", "Junsu", "Yoochun"],
        "age": [33, 32, 33],
        "height": [179, 178, 180]
    }
    df = pd.DataFrame(data=data_real)
    return Relation(df)
Esempio n. 28
0
def test_groupby1():
    data_real = {
        'country': ['USA', 'Canada', 'France'],
        'continent': ['North America', 'North America', 'Europe']
    }
    df = pd.DataFrame(data=data_real)
    r = Relation(df)
    r = r.groupby('continent').count('country')

    data_expected = {
        'continent': ['Europe', 'North America'],
        'count_country': [1, 2]
    }
    df_expected = pd.DataFrame(data=data_expected)
    r_expected = Relation(df_expected)

    assert r.equals(r_expected)
Esempio n. 29
0
def test_groupby3():
    data_real = {
        'course': ['DBMS', 'OSA', 'ML', 'OSA', 'OSA'],
        'student': ['Abby', 'Abby', 'Abby', 'Bob', 'Carson']
    }
    df = pd.DataFrame(data=data_real)
    r = Relation(df)
    r = r.groupby('student').count('course')

    data_expected = {
        'student': ['Abby', 'Bob', 'Carson'],
        'count_course': [3, 1, 1]
    }
    df_expected = pd.DataFrame(data=data_expected)
    r_expected = Relation(df_expected)

    assert r.equals(r_expected)
Esempio n. 30
0
def test_product(r1, r2):
    r = r1.product(r2)
    r_expected = Relation("tests/test_product_expected.csv", sep="|")

    print(r_expected)
    print(r)

    assert r.equals(r_expected)