def test_rename_preserves_data(): ''' assert that renaming a column preserves the data ''' df = Relation("../country.csv") renamed_df = df.rename("governmentform", "formofgov") assert df["governmentform"].equals(renamed_df["formofgov"])
def test_cartesian_product2(): data_real1 = {'student': ['Abby', 'Billy', 'Carson']} data_real2 = { 'grade': ['A', 'A', 'B'], 'course': ['Math', 'Science', 'Math'] } df1 = pd.DataFrame(data=data_real1) df2 = pd.DataFrame(data=data_real2) r1 = Relation(df1) r2 = Relation(df2) r = r1.cartesian_product(r2) data_expected = { 'student': [ 'Abby', 'Abby', 'Abby', 'Billy', 'Billy', 'Billy', 'Carson', 'Carson', 'Carson' ], 'grade': ['A', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'B'], 'course': [ 'Math', 'Science', 'Math', 'Math', 'Science', 'Math', 'Math', 'Science', 'Math' ] } df_expected = pd.DataFrame(data=data_expected) r_expected = Relation(df_expected) assert r.equals(r_expected)
def test_cartesian_product1(): data_real1 = { 'country': ['USA', 'Canada', 'France'], 'continent': ['North America', 'North America', 'Europe'] } data_real2 = {'gnp': [1234, 5678], 'population': [100, 250]} df1 = pd.DataFrame(data=data_real1) df2 = pd.DataFrame(data=data_real2) r1 = Relation(df1) r2 = Relation(df2) r = r1.cartesian_product(r2) data_expected = { 'country': ['USA', 'USA', 'Canada', 'Canada', 'France', 'France'], 'continent': [ 'North America', 'North America', 'North America', 'North America', 'Europe', 'Europe' ], 'gnp': [1234, 5678, 1234, 5678, 1234, 5678], 'population': [100, 250, 100, 250, 100, 250] } df_expected = pd.DataFrame(data=data_expected) r_expected = Relation(df_expected) assert r.equals(r_expected)
def test_product_1(): data_expected = { 'animal_x': ['zebra', 'zebra', 'x-ray fish', 'x-ray fish'], 'name': ['adam', 'adam', 'dina', 'dina'], 'color': ['red', 'red', 'purple', 'purple'], 'animal_y': ['zebra', 'x-ray fish', 'zebra', 'x-ray fish'], 'age': [7, 678, 7, 678] } df_expected = pd.DataFrame(data=data_expected) r_expected = Relation(df_expected) data_real_table1 = { 'animal': ['zebra', 'x-ray fish'], 'name': ['adam', 'dina'], 'color': ['red', 'purple'] } df_table1 = pd.DataFrame(data=data_real_table1) r_table1 = Relation(df_table1) data_real_table2 = {'animal': ['zebra', 'x-ray fish'], 'age': [7, 678]} df_table2 = pd.DataFrame(data=data_real_table2) r_table2 = Relation(df_table2) r = r_table1.cartesian_product(r_table2) assert r.equals(r_expected)
def test_groupby_2(): data_expected = { 'student': ['amanda', 'sam', 'tony'], 'classes': ['science', 'chinese', 'math'], 'grade': ['A', 'A', 'A'] } df_expected = pd.DataFrame(data=data_expected) r_expected = Relation(df_expected) print(r_expected) data_real_table1 = { 'student': ['amanda', 'sam', 'tony'], 'classes': ['science', 'chinese', 'math'] } df_table1 = pd.DataFrame(data=data_real_table1) r_table1 = Relation(df_table1) data_real_table2 = {'grade': ['A']} df_table2 = pd.DataFrame(data=data_real_table2) r_table2 = Relation(df_table2) r = r_table1.cartesian_product(r_table2) print(r) assert r.equals(r_expected)
def test_rename_nonexistent_cols(): ''' assert that renaming a fake column returns the same dataframe as before and that a new empty column is not added ''' df = Relation("../country.csv") renamed_df = df.rename("a_fake_column", "something_nice") assert "something_nice" not in renamed_df.columns assert df.equals(renamed_df)
def test_union_remove_duplicates(): ''' assert union removes duplicate rows by comparing lengths of a dataframe against the length of one unioned with itself ''' df = Relation("../country.csv") republic_df = df.query("governmentform == 'Republic'") union_on_itself_df = republic_df.union(republic_df) assert len(republic_df) == len(union_on_itself_df)
def test_foo(): df1 = Relation( pd.read_csv("tests/test_outer_join/test_outer_join_1.csv", sep="|")) df2 = Relation( pd.read_csv("tests/test_outer_join/test_outer_join_2.csv", sep="|")) df = df1.outerjoin(df2) df_expected = pd.read_csv("tests/test_outer_join/test_outer_join_3.csv") assert df.equals(df_expected)
def test_union_on_continent(): ''' assert the length a union on two dataframes queried by continent is the same as the sum of the previous two lengths ''' df = Relation("../country.csv") africa_df = df.query("continent == 'Africa'") europe_df = df.query("continent == 'Europe'") union_df = africa_df.union(europe_df) assert (len(africa_df) + len(europe_df)) == len(union_df)
def test_antijoin(r): r = country.query('continent == "North America"').project( ['name', 'region']).antijoin( country.query('region == "Caribbean"').project( ['name', 'region'])).reset_index().drop(columns=["index"]) data_expected = Relation("tests/antijoin.csv", sep="|").reset_index().drop(columns=["index"]) df_expected = pd.DataFrame(data=data_expected) r_expected = Relation(df_expected) assert r.equals(r_expected)
def test_rename_removes_col(): ''' assert that renaming a column removes the first column header and adds the second ''' df = Relation("../country.csv") renamed_df = df.rename("indepyear", "yearofindep") assert "indepyear" in df.columns assert "yearofindep" not in df.columns assert "indepyear" not in renamed_df.columns assert "yearofindep" in renamed_df.columns
def test_rename_1(): data_expected = {'color': ['green', 'blue']} df_expected = pd.DataFrame(data=data_expected) r_expected = Relation(df_expected) data_real = {'hello': ['green', 'blue']} df = pd.DataFrame(data=data_real) r = Relation(df) r = r.rename("hello", "color") assert r.equals(r_expected)
def test_rename_2(): data_expected = {'color': ['green', 'blue'], 'food': ['banana', 'cookie']} df_expected = pd.DataFrame(data=data_expected) r_expected = Relation(df_expected) data_real = {'color': ['green', 'blue'], 'hello': ['banana', 'cookie']} df = pd.DataFrame(data=data_real) r = Relation(df) r = r.rename("hello", "food") assert r.equals(r_expected)
def test_foo(): data_real = {"name": ["Carol", "Bob"], "age": [86, 4]} data_expected = {"name": ["Carol", "Bob"]} df = pd.DataFrame(data=data_real) df_expected = pd.DataFrame(data=data_expected) r = Relation(df) r_expected = Relation(df_expected) r_expected_2 = Relation('tests/test_project_expected_1.csv', sep='|') r2 = r.project(["name"]) assert r2.equals(r_expected) assert r2.equals(r_expected_2)
def test_groupby_1(): data_expected = {'animal': ['x-ray fish', 'yak', 'zebra'], 'count_name': [1,2,1]} df_expected = pd.DataFrame(data=data_expected) r_expected = Relation(df_expected) data_real = {'animal': ['zebra', 'yak', 'yak', 'x-ray fish'], 'name': ['adam', 'bob', 'charlie', 'dina']} df = pd.DataFrame(data=data_real) r = Relation(df) r = r.groupby(['animal']).count('name') assert r.equals(r_expected)
def r5(): data_real = { "Class": ["History", "Art", "Drama", "History", "Art"], "Start Time": ["10:00", "9:00", "12:00", "13:00", "18:00"] } df05 = pd.DataFrame(data=data_real) return Relation(df05)
def r3(): data_real = { "name": ["Larry", "Bob", "Lucy"], "Birthyear": [1999, 2015, 1994] } df03 = pd.DataFrame(data=data_real) return Relation(df03)
def r6(): data_real = { "Year": [1996, 1996, 1997, 1998, 1998, 1998], "Revenue": [2000, 3400, 1200, 500, 650, 200] } df06 = pd.DataFrame(data=data_real) return Relation(df06)
def test_select_2(r): r = r.select("height > 180") data_expected = {"name":["Changmin","Yunho"], "age":[31,33], "height": [186,184]} df_expected_2 = pd.DataFrame(data = data_expected) r_expected_2 = Relation("tests/test_select_expected_1.csv", sep="|") assert r.equals(r_expected_2)
def test_foo(r): r = r.extend("birth", 2019 - r.age).project(["name", "age", "birth"]).head(10) data_expected = {"name": ["Carol", "Bob"], "age": [86, 4], "birth": [1933, 2015]} df_expected = pd.DataFrame(data=data_expected) r_expected = Relation(df_expected) assert r.equals(r_expected)
def test_semi_join_right(r_1, r_2): data_expected = {'color': ['green', 'blue']} df_expected = pd.DataFrame(data=data_expected) r_expected = Relation(df_expected) r = r_2.semi_join(r_1) assert r.equals(r_expected)
def test_groupby03(r6): data_expected = { "Year": [1996, 1997, 1998], "sum_Revenue": [5400, 1200, 1350] } df_expected = pd.DataFrame(data=data_expected) r_groupbyExpected = Relation(df_expected) assert r6.groupby("Year").sum("Revenue").equals(r_groupbyExpected)
def makeContact(): data4 = { "id": [0, 1], "email": ["*****@*****.**", "*****@*****.**"], "phone": ["555-107-1234", "868-402-7539"] } contact = pd.DataFrame(data=data4) return Relation(contact)
def test_outerjoin01(r, r2): data_expected = { "name": ["Carol", "Bob", "Larry", "Lucy"], "age": [86, 4, 20, 25] } df_expected = pd.DataFrame(data=data_expected) r_expected = Relation(df_expected) assert r.outerJoin(r2).equals(r_expected)
def test_semi_join_left(r_1, r_2): data_expected = {'color': ['red', 'purple']} df_expected = pd.DataFrame(data=data_expected) r_expected = Relation(df_expected) r = r_1.semi_join(r_2) assert r.equals(r_expected)
def r(): data_real = { "firstName": ["Alex", "John", "Clark", "Haley"], "degree": ["Math", "Drama", "Computer Science", "Math"], "age": [20, 21, 19, 19] } df = pd.DataFrame(data=data_real) return Relation(df)
def r1(): data_real = { "name": ["Jaejoong", "Junsu", "Yoochun"], "age": [33, 32, 33], "height": [179, 178, 180] } df = pd.DataFrame(data=data_real) return Relation(df)
def test_groupby1(): data_real = { 'country': ['USA', 'Canada', 'France'], 'continent': ['North America', 'North America', 'Europe'] } df = pd.DataFrame(data=data_real) r = Relation(df) r = r.groupby('continent').count('country') data_expected = { 'continent': ['Europe', 'North America'], 'count_country': [1, 2] } df_expected = pd.DataFrame(data=data_expected) r_expected = Relation(df_expected) assert r.equals(r_expected)
def test_groupby3(): data_real = { 'course': ['DBMS', 'OSA', 'ML', 'OSA', 'OSA'], 'student': ['Abby', 'Abby', 'Abby', 'Bob', 'Carson'] } df = pd.DataFrame(data=data_real) r = Relation(df) r = r.groupby('student').count('course') data_expected = { 'student': ['Abby', 'Bob', 'Carson'], 'count_course': [3, 1, 1] } df_expected = pd.DataFrame(data=data_expected) r_expected = Relation(df_expected) assert r.equals(r_expected)
def test_product(r1, r2): r = r1.product(r2) r_expected = Relation("tests/test_product_expected.csv", sep="|") print(r_expected) print(r) assert r.equals(r_expected)