Esempio n. 1
0
def test_groupby_1():
    data_expected = {'animal': ['x-ray fish', 'yak', 'zebra'],
                    'count_name': [1,2,1]}
    
    df_expected = pd.DataFrame(data=data_expected)
    r_expected = Relation(df_expected) 

    data_real = {'animal': ['zebra', 'yak', 'yak', 'x-ray fish'],
                'name': ['adam', 'bob', 'charlie', 'dina']}
    df = pd.DataFrame(data=data_real)
    r = Relation(df)

    r = r.groupby(['animal']).count('name')
    assert r.equals(r_expected)
Esempio n. 2
0
def test_groupby3():
    data_real = {
        'course': ['DBMS', 'OSA', 'ML', 'OSA', 'OSA'],
        'student': ['Abby', 'Abby', 'Abby', 'Bob', 'Carson']
    }
    df = pd.DataFrame(data=data_real)
    r = Relation(df)
    r = r.groupby('student').count('course')

    data_expected = {
        'student': ['Abby', 'Bob', 'Carson'],
        'count_course': [3, 1, 1]
    }
    df_expected = pd.DataFrame(data=data_expected)
    r_expected = Relation(df_expected)

    assert r.equals(r_expected)
Esempio n. 3
0
def test_groupby1():
    data_real = {
        'country': ['USA', 'Canada', 'France'],
        'continent': ['North America', 'North America', 'Europe']
    }
    df = pd.DataFrame(data=data_real)
    r = Relation(df)
    r = r.groupby('continent').count('country')

    data_expected = {
        'continent': ['Europe', 'North America'],
        'count_country': [1, 2]
    }
    df_expected = pd.DataFrame(data=data_expected)
    r_expected = Relation(df_expected)

    assert r.equals(r_expected)
Esempio n. 4
0
def test_groupby2():
    #This query does not necessarily represent a useful query, but is only intended to test the functionality
    data_real = {
        'major': ['CS', 'CS', 'Math', 'Math', 'DS'],
        'gradyear': [2016, 2019, 2015, 2020, 2020]
    }
    df = pd.DataFrame(data=data_real)
    r = Relation(df)
    r = r.groupby('major').max('gradyear')

    data_expected = {
        'major': ['CS', 'DS', 'Math'],
        'max_gradyear': [2019, 2020, 2020]
    }
    df_expected = pd.DataFrame(data=data_expected)
    r_expected = Relation(df_expected)

    assert r.equals(r_expected)
Esempio n. 5
0
def test_union_compatibility():
    '''
    assert that union only accepts compatable dataframes by attempting an illegal union
    '''
    df = Relation("../country.csv")
    monarchy_df = df.query("governmentform == 'Monarchy'")
    groupby_gov_df = df.groupby(['governmentform']).count('name')
    try:
        monarchy_df.union(groupby_gov_df)
    except ValueError as e:
        assert str(e) == "Relations must be Union compatible"
    except:
        '''
        all other errors result in test failure
        '''
        pytest.fail(
            f"Illegal union returned an unknown exception: {sys.exc_info()[0]} : {sys.exc_info()[1]}"
        )
Esempio n. 6
0
def test_groupby_2():
    data_expected = {'student': ['amanda', 'sam', 'sam', 'tony'],
                     'grade': ['A', 'A', 'C', 'B'],
                     'count_classes': [2,2,1,1]}

    
    df_expected = pd.DataFrame(data=data_expected)
    r_expected = Relation(df_expected) 
    print(r_expected)

    data_real = {'student': ['amanda', 'amanda', 'sam', 'sam', 'sam', 'tony'],
                'classes': ['math', 'science', 'math', 'science', 'chinese', 'math'],
                'grade': ['A', 'A', 'C', 'A', 'A', 'B']}
    df = pd.DataFrame(data=data_real)
    r = Relation(df)
    r = r.groupby(['student', 'grade']).count('classes')
    print(r)
    assert r.equals(r_expected)