class Equals(object): def setup(self): N = 10**3 self.float_df = DataFrame(np.random.randn(N, N)) self.float_df_nan = self.float_df.copy() self.float_df_nan.iloc[-1, -1] = np.nan self.object_df = DataFrame('foo', index=range(N), columns=range(N)) self.object_df_nan = self.object_df.copy() self.object_df_nan.iloc[-1, -1] = np.nan self.nonunique_cols = self.object_df.copy() self.nonunique_cols.columns = ['A'] * len(self.nonunique_cols.columns) self.nonunique_cols_nan = self.nonunique_cols.copy() self.nonunique_cols_nan.iloc[-1, -1] = np.nan def time_frame_float_equal(self): self.float_df.equals(self.float_df) def time_frame_float_unequal(self): self.float_df.equals(self.float_df_nan) def time_frame_nonunique_equal(self): self.nonunique_cols.equals(self.nonunique_cols) def time_frame_nonunique_unequal(self): self.nonunique_cols.equals(self.nonunique_cols_nan) def time_frame_object_equal(self): self.object_df.equals(self.object_df) def time_frame_object_unequal(self): self.object_df.equals(self.object_df_nan)
def get_answer1(df: pd.DataFrame) -> float: df = df2grid(df) #handle that df is not square for final input if df.shape[0] != df.shape[1]: df = df.append( pd.DataFrame('.', columns=df.columns, index=[max(df.index) + 1])) Maxitt = 1000 for ii in range(Maxitt): if round(ii / 10) == ii / 10: print(ii) df_next = df.copy() for x in itertools.product(list(df.index), list(df.columns)): if df.iloc[x] == '.': continue else: xn_list = get_neighbor_idx(x, df) nn = '' for xn_i in xn_list: nn += df.iloc[xn_i] if df.iloc[x] == 'L' and nn.count('#') == 0: df_next.iloc[x] = '#' elif df.iloc[x] == '#' and nn.count('#') >= 4: df_next.iloc[x] = 'L' #check for convergence if df.equals(df_next): break # update df = df_next.copy() print(ii) return (df == '#').sum().sum()
def test_equals_subclass(self): # https://github.com/pandas-dev/pandas/pull/34402 # allow subclass in both directions df1 = DataFrame({"a": [1, 2, 3]}) df2 = tm.SubclassedDataFrame({"a": [1, 2, 3]}) assert df1.equals(df2) assert df2.equals(df1)
def test_richness_in_group_multiple_groups(): """Test richness_in_group with a multiple groups""" richness = DataFrame({'site': [1, 1, 2, 3, 3, 4], 'year': [1, 2, 1, 1, 2, 2], 'richness': [1, 1, 1, 2, 1, 1]}, columns=['site', 'year', 'richness']) assert richness.equals(richness_in_group(comp_data, ['site', 'year'], ['spid']))
def test_calculate_max_drawdown2(): values = [ 0.011580, 0.010048, 0.011340, 0.012161, 0.010416, 0.010009, 0.020024, -0.024662, -0.022350, 0.020496, -0.029859, -0.030511, 0.010041, 0.010872, -0.025782, 0.010400, 0.012374, 0.012467, 0.114741, 0.010303, 0.010088, -0.033961, 0.010680, 0.010886, -0.029274, 0.011178, 0.010693, 0.010711 ] dates = [Arrow(2020, 1, 1).shift(days=i) for i in range(len(values))] df = DataFrame(zip(values, dates), columns=['profit', 'open_date']) # sort by profit and reset index df = df.sort_values('profit').reset_index(drop=True) df1 = df.copy() drawdown, h, low = calculate_max_drawdown(df, date_col='open_date', value_col='profit') # Ensure df has not been altered. assert df.equals(df1) assert isinstance(drawdown, float) # High must be before low assert h < low assert drawdown == 0.091755 df = DataFrame(zip(values[:5], dates[:5]), columns=['profit', 'open_date']) with pytest.raises(ValueError, match='No losing trade, therefore no drawdown.'): calculate_max_drawdown(df, date_col='open_date', value_col='profit')
def test_panda_data_frame(self): df_1 = DataFrame(random.randint(0, 100, size=(100, 4)), columns=list('ABCD')) df_2 = map_value(df_1) self.assertEqual(DEFDataFrame, type(df_2)) df_3 = map_value(df_2) self.assertTrue(df_1.equals(df_3))
def test_distribution_table(self): expected = DataFrame(data=[ ('language', 12), ('pleasure', 8), ('quick', 8), ('quiz', 7), ('common', 6), ('fox', 6), ('one', 6), ('grammar', 5), ('pain', 5), ('vex', 5), ('bad', 4), ('blind', 4), ('existence', 4), ('far', 4), ('jump', 4), ('like', 4), ('little', 4), ('text', 4), ('word', 4), ], columns=['Word', 'Count']) actual = self.question.distribution_table(top=19) self.assertTrue(expected.equals(actual))
def test_abundance_in_group_multi_group_no_abund_col(): """Test abundance_in_group w/multiple group columns and no abundance column""" abundance = DataFrame({'genus': ['a', 'a', 'd', 'f'], 'species': ['b', 'c', 'e', 'g'], 'abundance': [4, 1, 1, 1]}, columns=['genus', 'species', 'abundance']) assert abundance.equals(abundance_in_group(comp_data, ['genus', 'species']))
def test_richness_in_group_single_spid_single_group(): """Test richness_in_group with a single species identifier column, one group""" richness = DataFrame({ 'site': [1, 2, 3, 4], 'richness': [1, 1, 3, 1] }, columns=['site', 'richness']) assert richness.equals(richness_in_group(comp_data, ['site'], ['spid']))
def test_abundance_in_group_no_abund_col(): """Test abundance_in_group with no abundance column provided""" abundance = DataFrame({ 'site': [1, 2, 3, 4], 'abundance': [2, 1, 3, 1] }, columns=['site', 'abundance']) assert abundance.equals(abundance_in_group(comp_data, ['site']))
def pandas_assert(actual: pd.DataFrame, expected: pd.DataFrame): assert list(expected.columns) == list(actual.columns), 'different columns' assert expected.shape == actual.shape, 'different shapes' assert list(expected.dtypes) == list(actual.dtypes), 'different dtypes' assert list(expected.index) == list(actual.index), 'different indexes' assert df_to_str(expected) == df_to_str( actual), 'different str representation' assert expected.equals(actual), 'contents are not equal'
def test_distribution_table__count_percent(self): expected = DataFrame( [(0.5, 1.5, 2, 0.4), (1.5, 2.5, 0, 0), (2.5, 3.5, 1, 0.2), (3.5, 4.5, 1, 0.2), (4.5, 5.5, 1, 0.2)], columns=['From Value', 'To Value', 'Count', 'Percentage']) actual = self.attribute.distribution_table(count=True, percent=True) self.assertTrue(expected.equals(actual))
def _groups(groups: pd.DataFrame) -> pd.DataFrame: key = ['eid', 'gid'] attributes = [ 'name', 'group', 'score_W', 'score_D', 'score_L', 'M', 'N', 'file_from', 'file_date', 'file_name', 'remarks' ] assert groups.equals(groups.sort_values(key)) return (groups.loc[:, key + attributes])
def test_abundance_in_group_abund_col(): """Test abundance_in_group with a single group and an abundance column""" abundance = DataFrame({ 'site': [1, 2, 3, 4], 'abundance': [3, 5, 12, 10] }, columns=['site', 'abundance']) assert abundance.equals(abundance_in_group(comp_data, ['site'], ['counts']))
def test_distribution_table__no_significance(self): expected = DataFrame(data=[ ('apples', 3), ('bananas', 2), ('cherries', 1), ], columns=['Value', 'Count']) actual = self.question.distribution_table() self.assertTrue(expected.equals(actual))
def test_richness_in_group_multiple_spid_single_group(): """Test richness_in_group with a multiple species id columns, one group""" richness = DataFrame({ 'site': [1, 2, 3, 4], 'richness': [1, 1, 3, 1] }, columns=['site', 'richness']) assert richness.equals( richness_in_group(comp_data, ['site'], ['genus', 'species']))
def test_distribution_table__no_significance(self): expected = DataFrame(data=[('1 - strongly disagree', 2), ('2 - disagree', 4), ('3 - neither agree nor disagree', 6), ('4 - agree', 0), ('5 - strongly agree', 3)], columns=['Value', 'Count']) actual = self.question.distribution_table() self.assertTrue(expected.equals(actual))
def _print_if_new_data(self, df: pd.DataFrame, data_path: Path, message: str): # Inform user that new data exists try: orig_df = self._load(data_path) except FileNotFoundError: orig_df = None if not df.equals(orig_df): print(message)
def test_round_trip_equals(setup_path): # GH 9330 df = DataFrame({"B": [1, 2], "A": ["x", "y"]}) with ensure_clean_path(setup_path) as path: df.to_hdf(path, "df", format="table") other = read_hdf(path, "df") tm.assert_frame_equal(df, other) assert df.equals(other) assert other.equals(df)
def test_ne(self) -> None: result = self.test_store != self.test_store assert_that(result.values.any().any(), is_(False)) result = self.test_store != "a" expected = DataFrame({ "a": [False, True, True], "b": [True, True, True], "c": [True, True, True], }) assert_that(expected.equals(result), is_(True))
def test_eq(self) -> None: result = self.test_store == self.test_store assert_that(result.values.all().all(), is_(True)) result = self.test_store == "a" expected = DataFrame({ "a": [True, False, False], "b": [False, False, False], "c": [False, False, False], }) assert_that(expected.equals(result), is_(True))
def test_distribution_table__no_significance(self): expected = DataFrame(data=[ ('actor', 3), ('bartender', 2), ('cook', 1), ('designer', 0) ], columns=['Value', 'Count']) actual = self.attribute.distribution_table() self.assertTrue(expected.equals(actual))
def test_df_to_s3_and_s3_to_file(): s3 = S3(file_name="testing_s3_class.csv", s3_key="bulk/") df = DataFrame({"col1": [1, 2], "col2": [3, 4]}) s3.from_df(df, sep="\t") file_path = os.path.join(s3.file_dir, s3.file_name) s3.to_file() assert df.equals(read_csv(file_path, sep="\t")) os.remove(file_path)
def test_abundance_in_group_multi_group_abund_col(): """Test abundance_in_group w/multiple group columns and an abundance column""" abundance = DataFrame( { 'genus': ['a', 'a', 'd', 'f'], 'species': ['b', 'c', 'e', 'g'], 'abundance': [13, 4, 3, 10] }, columns=['genus', 'species', 'abundance']) assert abundance.equals( abundance_in_group(comp_data, ['genus', 'species'], ['counts']))
def test_richness_in_group_multiple_groups(): """Test richness_in_group with a multiple groups""" richness = DataFrame( { 'site': [1, 1, 2, 3, 3, 4], 'year': [1, 2, 1, 1, 2, 2], 'richness': [1, 1, 1, 2, 1, 1] }, columns=['site', 'year', 'richness']) assert richness.equals( richness_in_group(comp_data, ['site', 'year'], ['spid']))
def test_int_nptype(df: DataFrame, df2: DataFrame) -> None: dtypes = [ np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64 ] for dtype in dtypes: df["Survived"] = df["Survived"].astype(dtype) df2 = df2.lambdas.astype(Survived=dtype) assert df.equals(df2)
def test_int_string(df: DataFrame, df2: DataFrame) -> None: dtypes = [ "int", "int8", "int16", "int32", "int64", "uint8", "uint16", "uint32", "uint64" ] for dtype in dtypes: df["Survived"] = df["Survived"].astype(dtype) df2 = df2.lambdas.astype(Survived=dtype) assert df.equals(df2)
def test_distribution_table__count_only(self): expected = DataFrame([ (-0.5, 0.5, 0), (0.5, 1.5, 2), (1.5, 2.5, 0), (2.5, 3.5, 1), (3.5, 4.5, 1), (4.5, 5.5, 1) ], columns=['From Value', 'To Value', 'Count']) self.assertTrue(expected.equals(self.question.distribution_table()))
def add_shape_data( census_data: pd.DataFrame, shapes_key: str, path: Path = None, gdf: gpd.GeoDataFrame = None) -> tuple[pd.DataFrame, gpd.GeoDataFrame]: if path is not None: uid = Path( f"{hash(census_data.shape)}_{shapes_key}_{path.stem}.feather") if uid.is_file(): data = pd.read_feather(uid).set_index("index") assert census_data.equals(data[census_data.columns]) return data, gpd.GeoDataFrame() else: uid = None idx = pd.Series(census_data.index, name="object_index") points_data = gpd.GeoDataFrame(idx, geometry=gpd.points_from_xy( census_data.long, census_data.lat), crs=constants.WGS_84) if path is not None: all_shapes = gpd.read_file(path) elif gdf is not None: all_shapes = gdf else: raise ValueError( "A path to a shapefile or a GeoDataFrame must be passed") shapes = all_shapes[[shapes_key, "geometry"]].to_crs(epsg=constants.WGS_84) spatial_merged = gpd.sjoin(points_data, shapes, how="left", op="within").set_index("object_index") merged = census_data.merge(spatial_merged[[shapes_key]], how="left", left_index=True, right_index=True) assert census_data.equals(merged[census_data.columns]) if path is not None and uid is not None: merged.reset_index(drop=False).to_feather(uid) return merged, points_data
def test_distribution_table__percent_only(self): expected = DataFrame([ (-0.5, 0.5, 0), (0.5, 1.5, 0.4), (1.5, 2.5, 0), (2.5, 3.5, .2), (3.5, 4.5, .2), (4.5, 5.5, .2) ], columns=['From Value', 'To Value', 'Percentage']) actual = self.question.distribution_table(count=False, percent=True) self.assertTrue(expected.equals(actual))
def test_equals_different_blocks(self): # GH 9330 df0 = DataFrame({"A": ["x", "y"], "B": [1, 2], "C": ["w", "z"]}) df1 = df0.reset_index()[["A", "B", "C"]] # this assert verifies that the above operations have # induced a block rearrangement assert df0._mgr.blocks[0].dtype != df1._mgr.blocks[0].dtype # do the real tests tm.assert_frame_equal(df0, df1) assert df0.equals(df1) assert df1.equals(df0)
def test_convert_with_a_model(self): class Model: def __init__(self, p1, p2): self.p1 = p1 self.p2 = p2 model = Model(1, "1") df = self.__converter.convert(model) expected_df = DataFrame({"p1": [1], "p2": ["1"]}) self.assertTrue(expected_df.equals(df))
def test_duplicate_column_name(setup_path): df = DataFrame(columns=["a", "a"], data=[[0, 0]]) with ensure_clean_path(setup_path) as path: msg = "Columns index has to be unique for fixed format" with pytest.raises(ValueError, match=msg): df.to_hdf(path, "df", format="fixed") df.to_hdf(path, "df", format="table") other = read_hdf(path, "df") tm.assert_frame_equal(df, other) assert df.equals(other) assert other.equals(df)
def test_richness_in_group_single_spid_single_group(): """Test richness_in_group with a single species identifier column, one group""" richness = DataFrame({'site': [1, 2, 3, 4], 'richness': [1, 1, 3, 1]}, columns=['site', 'richness']) assert richness.equals(richness_in_group(comp_data, ['site'], ['spid']))
def test_equals(self): s1 = pd.Series([1, 2, 3], index=[0, 2, 1]) s2 = s1.copy() self.assertTrue(s1.equals(s2)) s1[1] = 99 self.assertFalse(s1.equals(s2)) # NaNs compare as equal s1 = pd.Series([1, np.nan, 3, np.nan], index=[0, 2, 1, 3]) s2 = s1.copy() self.assertTrue(s1.equals(s2)) s2[0] = 9.9 self.assertFalse(s1.equals(s2)) idx = MultiIndex.from_tuples([(0, 'a'), (1, 'b'), (2, 'c')]) s1 = Series([1, 2, np.nan], index=idx) s2 = s1.copy() self.assertTrue(s1.equals(s2)) # Add object dtype column with nans index = np.random.random(10) df1 = DataFrame(np.random.random(10,), index=index, columns=['floats']) df1['text'] = 'the sky is so blue. we could use more chocolate.'.split() df1['start'] = date_range('2000-1-1', periods=10, freq='T') df1['end'] = date_range('2000-1-1', periods=10, freq='D') df1['diff'] = df1['end'] - df1['start'] df1['bool'] = (np.arange(10) % 3 == 0) df1.ix[::2] = nan df2 = df1.copy() self.assertTrue(df1['text'].equals(df2['text'])) self.assertTrue(df1['start'].equals(df2['start'])) self.assertTrue(df1['end'].equals(df2['end'])) self.assertTrue(df1['diff'].equals(df2['diff'])) self.assertTrue(df1['bool'].equals(df2['bool'])) self.assertTrue(df1.equals(df2)) self.assertFalse(df1.equals(object)) # different dtype different = df1.copy() different['floats'] = different['floats'].astype('float32') self.assertFalse(df1.equals(different)) # different index different_index = -index different = df2.set_index(different_index) self.assertFalse(df1.equals(different)) # different columns different = df2.copy() different.columns = df2.columns[::-1] self.assertFalse(df1.equals(different)) # DatetimeIndex index = pd.date_range('2000-1-1', periods=10, freq='T') df1 = df1.set_index(index) df2 = df1.copy() self.assertTrue(df1.equals(df2)) # MultiIndex df3 = df1.set_index(['text'], append=True) df2 = df1.set_index(['text'], append=True) self.assertTrue(df3.equals(df2)) df2 = df1.set_index(['floats'], append=True) self.assertFalse(df3.equals(df2)) # NaN in index df3 = df1.set_index(['floats'], append=True) df2 = df1.set_index(['floats'], append=True) self.assertTrue(df3.equals(df2))
def test_abundance_in_group_no_abund_col(): """Test abundance_in_group with no abundance column provided""" abundance = DataFrame({'site': [1, 2, 3, 4], 'abundance': [2, 1, 3, 1]}, columns=['site', 'abundance']) assert abundance.equals(abundance_in_group(comp_data, ['site']))
def test_richness_in_group_multiple_spid_single_group(): """Test richness_in_group with a multiple species id columns, one group""" richness = DataFrame({'site': [1, 2, 3, 4], 'richness': [1, 1, 3, 1]}, columns=['site', 'richness']) assert richness.equals(richness_in_group(comp_data, ['site'], ['genus', 'species']))
def test_abundance_in_group_abund_col(): """Test abundance_in_group with a single group and an abundance column""" abundance = DataFrame({'site': [1, 2, 3, 4], 'abundance': [3, 5, 12, 10]}, columns=['site', 'abundance']) assert abundance.equals(abundance_in_group(comp_data, ['site'], ['counts']))