Example #1
0
class Equals(object):

    def setup(self):
        N = 10**3
        self.float_df = DataFrame(np.random.randn(N, N))
        self.float_df_nan = self.float_df.copy()
        self.float_df_nan.iloc[-1, -1] = np.nan

        self.object_df = DataFrame('foo', index=range(N), columns=range(N))
        self.object_df_nan = self.object_df.copy()
        self.object_df_nan.iloc[-1, -1] = np.nan

        self.nonunique_cols = self.object_df.copy()
        self.nonunique_cols.columns = ['A'] * len(self.nonunique_cols.columns)
        self.nonunique_cols_nan = self.nonunique_cols.copy()
        self.nonunique_cols_nan.iloc[-1, -1] = np.nan

    def time_frame_float_equal(self):
        self.float_df.equals(self.float_df)

    def time_frame_float_unequal(self):
        self.float_df.equals(self.float_df_nan)

    def time_frame_nonunique_equal(self):
        self.nonunique_cols.equals(self.nonunique_cols)

    def time_frame_nonunique_unequal(self):
        self.nonunique_cols.equals(self.nonunique_cols_nan)

    def time_frame_object_equal(self):
        self.object_df.equals(self.object_df)

    def time_frame_object_unequal(self):
        self.object_df.equals(self.object_df_nan)
def test_abundance_in_group_multi_group_no_abund_col():
    """Test abundance_in_group w/multiple group columns and no abundance column"""
    abundance = DataFrame({'genus': ['a', 'a', 'd', 'f'],
                           'species': ['b', 'c', 'e', 'g'],
                           'abundance': [4, 1, 1, 1]},
                           columns=['genus', 'species', 'abundance'])
    assert abundance.equals(abundance_in_group(comp_data, ['genus', 'species']))
def test_richness_in_group_multiple_groups():
    """Test richness_in_group with a multiple groups"""
    richness = DataFrame({'site': [1, 1, 2, 3, 3, 4],
                          'year': [1, 2, 1, 1, 2, 2],
                          'richness': [1, 1, 1, 2, 1, 1]},
                         columns=['site', 'year', 'richness'])
    assert richness.equals(richness_in_group(comp_data, ['site', 'year'], ['spid']))
Example #4
0
    def test_equals(self):
        s1 = pd.Series([1, 2, 3], index=[0, 2, 1])
        s2 = s1.copy()
        self.assertTrue(s1.equals(s2))

        s1[1] = 99
        self.assertFalse(s1.equals(s2))

        # NaNs compare as equal
        s1 = pd.Series([1, np.nan, 3, np.nan], index=[0, 2, 1, 3])
        s2 = s1.copy()
        self.assertTrue(s1.equals(s2))

        s2[0] = 9.9
        self.assertFalse(s1.equals(s2))

        idx = MultiIndex.from_tuples([(0, 'a'), (1, 'b'), (2, 'c')])
        s1 = Series([1, 2, np.nan], index=idx)
        s2 = s1.copy()
        self.assertTrue(s1.equals(s2))

        # Add object dtype column with nans
        index = np.random.random(10)
        df1 = DataFrame(np.random.random(10,), index=index, columns=['floats'])
        df1['text'] = 'the sky is so blue. we could use more chocolate.'.split()
        df1['start'] = date_range('2000-1-1', periods=10, freq='T')
        df1['end'] = date_range('2000-1-1', periods=10, freq='D')
        df1['diff'] = df1['end'] - df1['start']
        df1['bool'] = (np.arange(10) % 3 == 0)
        df1.ix[::2] = nan
        df2 = df1.copy()
        self.assertTrue(df1['text'].equals(df2['text']))
        self.assertTrue(df1['start'].equals(df2['start']))
        self.assertTrue(df1['end'].equals(df2['end']))
        self.assertTrue(df1['diff'].equals(df2['diff']))
        self.assertTrue(df1['bool'].equals(df2['bool']))
        self.assertTrue(df1.equals(df2))
        self.assertFalse(df1.equals(object))

        # different dtype
        different = df1.copy()
        different['floats'] = different['floats'].astype('float32')
        self.assertFalse(df1.equals(different))

        # different index
        different_index = -index
        different = df2.set_index(different_index)
        self.assertFalse(df1.equals(different))

        # different columns
        different = df2.copy()
        different.columns = df2.columns[::-1]
        self.assertFalse(df1.equals(different))

        # DatetimeIndex
        index = pd.date_range('2000-1-1', periods=10, freq='T')
        df1 = df1.set_index(index)
        df2 = df1.copy()
        self.assertTrue(df1.equals(df2))

        # MultiIndex
        df3 = df1.set_index(['text'], append=True)
        df2 = df1.set_index(['text'], append=True)
        self.assertTrue(df3.equals(df2))

        df2 = df1.set_index(['floats'], append=True)
        self.assertFalse(df3.equals(df2))

        # NaN in index
        df3 = df1.set_index(['floats'], append=True)
        df2 = df1.set_index(['floats'], append=True)
        self.assertTrue(df3.equals(df2))
def test_abundance_in_group_abund_col():
    """Test abundance_in_group with a single group and an abundance column"""
    abundance = DataFrame({'site': [1, 2, 3, 4],
                           'abundance': [3, 5, 12, 10]},
                           columns=['site', 'abundance'])
    assert abundance.equals(abundance_in_group(comp_data, ['site'], ['counts']))
def test_abundance_in_group_no_abund_col():
    """Test abundance_in_group with no abundance column provided"""
    abundance = DataFrame({'site': [1, 2, 3, 4],
                           'abundance': [2, 1, 3, 1]},
                           columns=['site', 'abundance'])
    assert abundance.equals(abundance_in_group(comp_data, ['site']))
def test_richness_in_group_multiple_spid_single_group():
    """Test richness_in_group with a multiple species id columns, one group"""
    richness = DataFrame({'site': [1, 2, 3, 4], 'richness': [1, 1, 3, 1]},
                         columns=['site', 'richness'])
    assert richness.equals(richness_in_group(comp_data, ['site'], ['genus', 'species']))
def test_richness_in_group_single_spid_single_group():
    """Test richness_in_group with a single species identifier column, one group"""
    richness = DataFrame({'site': [1, 2, 3, 4], 'richness': [1, 1, 3, 1]},
                         columns=['site', 'richness'])
    assert richness.equals(richness_in_group(comp_data, ['site'], ['spid']))