Example #1
0
class Equals(object):

    def setup(self):
        N = 10**3
        self.float_df = DataFrame(np.random.randn(N, N))
        self.float_df_nan = self.float_df.copy()
        self.float_df_nan.iloc[-1, -1] = np.nan

        self.object_df = DataFrame('foo', index=range(N), columns=range(N))
        self.object_df_nan = self.object_df.copy()
        self.object_df_nan.iloc[-1, -1] = np.nan

        self.nonunique_cols = self.object_df.copy()
        self.nonunique_cols.columns = ['A'] * len(self.nonunique_cols.columns)
        self.nonunique_cols_nan = self.nonunique_cols.copy()
        self.nonunique_cols_nan.iloc[-1, -1] = np.nan

    def time_frame_float_equal(self):
        self.float_df.equals(self.float_df)

    def time_frame_float_unequal(self):
        self.float_df.equals(self.float_df_nan)

    def time_frame_nonunique_equal(self):
        self.nonunique_cols.equals(self.nonunique_cols)

    def time_frame_nonunique_unequal(self):
        self.nonunique_cols.equals(self.nonunique_cols_nan)

    def time_frame_object_equal(self):
        self.object_df.equals(self.object_df)

    def time_frame_object_unequal(self):
        self.object_df.equals(self.object_df_nan)
Example #2
0
def get_answer1(df: pd.DataFrame) -> float:
    df = df2grid(df)

    #handle that df is not square for final input
    if df.shape[0] != df.shape[1]:
        df = df.append(
            pd.DataFrame('.', columns=df.columns, index=[max(df.index) + 1]))

    Maxitt = 1000
    for ii in range(Maxitt):
        if round(ii / 10) == ii / 10:
            print(ii)
        df_next = df.copy()
        for x in itertools.product(list(df.index), list(df.columns)):
            if df.iloc[x] == '.':
                continue
            else:
                xn_list = get_neighbor_idx(x, df)
                nn = ''
                for xn_i in xn_list:
                    nn += df.iloc[xn_i]
                if df.iloc[x] == 'L' and nn.count('#') == 0:
                    df_next.iloc[x] = '#'
                elif df.iloc[x] == '#' and nn.count('#') >= 4:
                    df_next.iloc[x] = 'L'

        #check for convergence
        if df.equals(df_next):
            break

        # update
        df = df_next.copy()

    print(ii)
    return (df == '#').sum().sum()
Example #3
0
 def test_equals_subclass(self):
     # https://github.com/pandas-dev/pandas/pull/34402
     # allow subclass in both directions
     df1 = DataFrame({"a": [1, 2, 3]})
     df2 = tm.SubclassedDataFrame({"a": [1, 2, 3]})
     assert df1.equals(df2)
     assert df2.equals(df1)
def test_richness_in_group_multiple_groups():
    """Test richness_in_group with a multiple groups"""
    richness = DataFrame({'site': [1, 1, 2, 3, 3, 4],
                          'year': [1, 2, 1, 1, 2, 2],
                          'richness': [1, 1, 1, 2, 1, 1]},
                         columns=['site', 'year', 'richness'])
    assert richness.equals(richness_in_group(comp_data, ['site', 'year'], ['spid']))
Example #5
0
def test_calculate_max_drawdown2():
    values = [
        0.011580, 0.010048, 0.011340, 0.012161, 0.010416, 0.010009, 0.020024,
        -0.024662, -0.022350, 0.020496, -0.029859, -0.030511, 0.010041,
        0.010872, -0.025782, 0.010400, 0.012374, 0.012467, 0.114741, 0.010303,
        0.010088, -0.033961, 0.010680, 0.010886, -0.029274, 0.011178, 0.010693,
        0.010711
    ]

    dates = [Arrow(2020, 1, 1).shift(days=i) for i in range(len(values))]
    df = DataFrame(zip(values, dates), columns=['profit', 'open_date'])
    # sort by profit and reset index
    df = df.sort_values('profit').reset_index(drop=True)
    df1 = df.copy()
    drawdown, h, low = calculate_max_drawdown(df,
                                              date_col='open_date',
                                              value_col='profit')
    # Ensure df has not been altered.
    assert df.equals(df1)

    assert isinstance(drawdown, float)
    # High must be before low
    assert h < low
    assert drawdown == 0.091755

    df = DataFrame(zip(values[:5], dates[:5]), columns=['profit', 'open_date'])
    with pytest.raises(ValueError,
                       match='No losing trade, therefore no drawdown.'):
        calculate_max_drawdown(df, date_col='open_date', value_col='profit')
 def test_panda_data_frame(self):
     df_1 = DataFrame(random.randint(0, 100, size=(100, 4)),
                      columns=list('ABCD'))
     df_2 = map_value(df_1)
     self.assertEqual(DEFDataFrame, type(df_2))
     df_3 = map_value(df_2)
     self.assertTrue(df_1.equals(df_3))
    def test_distribution_table(self):

        expected = DataFrame(data=[
            ('language', 12),
            ('pleasure', 8),
            ('quick', 8),
            ('quiz', 7),
            ('common', 6),
            ('fox', 6),
            ('one', 6),
            ('grammar', 5),
            ('pain', 5),
            ('vex', 5),
            ('bad', 4),
            ('blind', 4),
            ('existence', 4),
            ('far', 4),
            ('jump', 4),
            ('like', 4),
            ('little', 4),
            ('text', 4),
            ('word', 4),
        ],
                             columns=['Word', 'Count'])
        actual = self.question.distribution_table(top=19)
        self.assertTrue(expected.equals(actual))
def test_abundance_in_group_multi_group_no_abund_col():
    """Test abundance_in_group w/multiple group columns and no abundance column"""
    abundance = DataFrame({'genus': ['a', 'a', 'd', 'f'],
                           'species': ['b', 'c', 'e', 'g'],
                           'abundance': [4, 1, 1, 1]},
                           columns=['genus', 'species', 'abundance'])
    assert abundance.equals(abundance_in_group(comp_data, ['genus', 'species']))
def test_richness_in_group_single_spid_single_group():
    """Test richness_in_group with a single species identifier column, one group"""
    richness = DataFrame({
        'site': [1, 2, 3, 4],
        'richness': [1, 1, 3, 1]
    },
                         columns=['site', 'richness'])
    assert richness.equals(richness_in_group(comp_data, ['site'], ['spid']))
def test_abundance_in_group_no_abund_col():
    """Test abundance_in_group with no abundance column provided"""
    abundance = DataFrame({
        'site': [1, 2, 3, 4],
        'abundance': [2, 1, 3, 1]
    },
                          columns=['site', 'abundance'])
    assert abundance.equals(abundance_in_group(comp_data, ['site']))
Example #11
0
def pandas_assert(actual: pd.DataFrame, expected: pd.DataFrame):
    assert list(expected.columns) == list(actual.columns), 'different columns'
    assert expected.shape == actual.shape, 'different shapes'
    assert list(expected.dtypes) == list(actual.dtypes), 'different dtypes'
    assert list(expected.index) == list(actual.index), 'different indexes'
    assert df_to_str(expected) == df_to_str(
        actual), 'different str representation'
    assert expected.equals(actual), 'contents are not equal'
Example #12
0
    def test_distribution_table__count_percent(self):

        expected = DataFrame(
            [(0.5, 1.5, 2, 0.4), (1.5, 2.5, 0, 0), (2.5, 3.5, 1, 0.2),
             (3.5, 4.5, 1, 0.2), (4.5, 5.5, 1, 0.2)],
            columns=['From Value', 'To Value', 'Count', 'Percentage'])
        actual = self.attribute.distribution_table(count=True, percent=True)
        self.assertTrue(expected.equals(actual))
Example #13
0
def _groups(groups: pd.DataFrame) -> pd.DataFrame:
    key = ['eid', 'gid']
    attributes = [
        'name', 'group', 'score_W', 'score_D', 'score_L', 'M', 'N',
        'file_from', 'file_date', 'file_name', 'remarks'
    ]
    assert groups.equals(groups.sort_values(key))
    return (groups.loc[:, key + attributes])
def test_abundance_in_group_abund_col():
    """Test abundance_in_group with a single group and an abundance column"""
    abundance = DataFrame({
        'site': [1, 2, 3, 4],
        'abundance': [3, 5, 12, 10]
    },
                          columns=['site', 'abundance'])
    assert abundance.equals(abundance_in_group(comp_data, ['site'],
                                               ['counts']))
    def test_distribution_table__no_significance(self):

        expected = DataFrame(data=[
            ('apples', 3),
            ('bananas', 2),
            ('cherries', 1),
        ], columns=['Value', 'Count'])
        actual = self.question.distribution_table()
        self.assertTrue(expected.equals(actual))
def test_richness_in_group_multiple_spid_single_group():
    """Test richness_in_group with a multiple species id columns, one group"""
    richness = DataFrame({
        'site': [1, 2, 3, 4],
        'richness': [1, 1, 3, 1]
    },
                         columns=['site', 'richness'])
    assert richness.equals(
        richness_in_group(comp_data, ['site'], ['genus', 'species']))
    def test_distribution_table__no_significance(self):

        expected = DataFrame(data=[('1 - strongly disagree', 2),
                                   ('2 - disagree', 4),
                                   ('3 - neither agree nor disagree', 6),
                                   ('4 - agree', 0),
                                   ('5 - strongly agree', 3)],
                             columns=['Value', 'Count'])
        actual = self.question.distribution_table()
        self.assertTrue(expected.equals(actual))
Example #18
0
    def _print_if_new_data(self, df: pd.DataFrame, data_path: Path,
                           message: str):
        # Inform user that new data exists
        try:
            orig_df = self._load(data_path)
        except FileNotFoundError:
            orig_df = None

        if not df.equals(orig_df):
            print(message)
Example #19
0
def test_round_trip_equals(setup_path):
    # GH 9330
    df = DataFrame({"B": [1, 2], "A": ["x", "y"]})

    with ensure_clean_path(setup_path) as path:
        df.to_hdf(path, "df", format="table")
        other = read_hdf(path, "df")
        tm.assert_frame_equal(df, other)
        assert df.equals(other)
        assert other.equals(df)
Example #20
0
 def test_ne(self) -> None:
     result = self.test_store != self.test_store
     assert_that(result.values.any().any(), is_(False))
     result = self.test_store != "a"
     expected = DataFrame({
         "a": [False, True, True],
         "b": [True, True, True],
         "c": [True, True, True],
     })
     assert_that(expected.equals(result), is_(True))
Example #21
0
 def test_eq(self) -> None:
     result = self.test_store == self.test_store
     assert_that(result.values.all().all(), is_(True))
     result = self.test_store == "a"
     expected = DataFrame({
         "a": [True, False, False],
         "b": [False, False, False],
         "c": [False, False, False],
     })
     assert_that(expected.equals(result), is_(True))
    def test_distribution_table__no_significance(self):

        expected = DataFrame(data=[
            ('actor', 3),
            ('bartender', 2),
            ('cook', 1),
            ('designer', 0)
        ], columns=['Value', 'Count'])
        actual = self.attribute.distribution_table()
        self.assertTrue(expected.equals(actual))
Example #23
0
def test_df_to_s3_and_s3_to_file():
    s3 = S3(file_name="testing_s3_class.csv", s3_key="bulk/")
    df = DataFrame({"col1": [1, 2], "col2": [3, 4]})
    s3.from_df(df, sep="\t")

    file_path = os.path.join(s3.file_dir, s3.file_name)

    s3.to_file()

    assert df.equals(read_csv(file_path, sep="\t"))
    os.remove(file_path)
def test_abundance_in_group_multi_group_abund_col():
    """Test abundance_in_group w/multiple group columns and an abundance column"""
    abundance = DataFrame(
        {
            'genus': ['a', 'a', 'd', 'f'],
            'species': ['b', 'c', 'e', 'g'],
            'abundance': [13, 4, 3, 10]
        },
        columns=['genus', 'species', 'abundance'])
    assert abundance.equals(
        abundance_in_group(comp_data, ['genus', 'species'], ['counts']))
def test_richness_in_group_multiple_groups():
    """Test richness_in_group with a multiple groups"""
    richness = DataFrame(
        {
            'site': [1, 1, 2, 3, 3, 4],
            'year': [1, 2, 1, 1, 2, 2],
            'richness': [1, 1, 1, 2, 1, 1]
        },
        columns=['site', 'year', 'richness'])
    assert richness.equals(
        richness_in_group(comp_data, ['site', 'year'], ['spid']))
Example #26
0
def test_int_nptype(df: DataFrame, df2: DataFrame) -> None:

    dtypes = [
        np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32,
        np.uint64
    ]

    for dtype in dtypes:
        df["Survived"] = df["Survived"].astype(dtype)
        df2 = df2.lambdas.astype(Survived=dtype)
        assert df.equals(df2)
Example #27
0
def test_int_string(df: DataFrame, df2: DataFrame) -> None:

    dtypes = [
        "int", "int8", "int16", "int32", "int64", "uint8", "uint16", "uint32",
        "uint64"
    ]

    for dtype in dtypes:
        df["Survived"] = df["Survived"].astype(dtype)
        df2 = df2.lambdas.astype(Survived=dtype)
        assert df.equals(df2)
    def test_distribution_table__count_only(self):

        expected = DataFrame([
            (-0.5, 0.5, 0),
            (0.5, 1.5, 2),
            (1.5, 2.5, 0),
            (2.5, 3.5, 1),
            (3.5, 4.5, 1),
            (4.5, 5.5, 1)
        ], columns=['From Value', 'To Value', 'Count'])
        self.assertTrue(expected.equals(self.question.distribution_table()))
Example #29
0
def add_shape_data(
        census_data: pd.DataFrame,
        shapes_key: str,
        path: Path = None,
        gdf: gpd.GeoDataFrame = None) -> tuple[pd.DataFrame, gpd.GeoDataFrame]:
    if path is not None:
        uid = Path(
            f"{hash(census_data.shape)}_{shapes_key}_{path.stem}.feather")
        if uid.is_file():
            data = pd.read_feather(uid).set_index("index")
            assert census_data.equals(data[census_data.columns])
            return data, gpd.GeoDataFrame()
    else:
        uid = None

    idx = pd.Series(census_data.index, name="object_index")
    points_data = gpd.GeoDataFrame(idx,
                                   geometry=gpd.points_from_xy(
                                       census_data.long, census_data.lat),
                                   crs=constants.WGS_84)

    if path is not None:
        all_shapes = gpd.read_file(path)
    elif gdf is not None:
        all_shapes = gdf
    else:
        raise ValueError(
            "A path to a shapefile or a GeoDataFrame must be passed")
    shapes = all_shapes[[shapes_key, "geometry"]].to_crs(epsg=constants.WGS_84)

    spatial_merged = gpd.sjoin(points_data, shapes, how="left",
                               op="within").set_index("object_index")
    merged = census_data.merge(spatial_merged[[shapes_key]],
                               how="left",
                               left_index=True,
                               right_index=True)
    assert census_data.equals(merged[census_data.columns])
    if path is not None and uid is not None:
        merged.reset_index(drop=False).to_feather(uid)

    return merged, points_data
    def test_distribution_table__percent_only(self):

        expected = DataFrame([
            (-0.5, 0.5, 0),
            (0.5, 1.5, 0.4),
            (1.5, 2.5, 0),
            (2.5, 3.5, .2),
            (3.5, 4.5, .2),
            (4.5, 5.5, .2)
        ], columns=['From Value', 'To Value', 'Percentage'])
        actual = self.question.distribution_table(count=False, percent=True)
        self.assertTrue(expected.equals(actual))
Example #31
0
    def test_equals_different_blocks(self):
        # GH 9330
        df0 = DataFrame({"A": ["x", "y"], "B": [1, 2], "C": ["w", "z"]})
        df1 = df0.reset_index()[["A", "B", "C"]]
        # this assert verifies that the above operations have
        # induced a block rearrangement
        assert df0._mgr.blocks[0].dtype != df1._mgr.blocks[0].dtype

        # do the real tests
        tm.assert_frame_equal(df0, df1)
        assert df0.equals(df1)
        assert df1.equals(df0)
    def test_convert_with_a_model(self):
        class Model:
            def __init__(self, p1, p2):
                self.p1 = p1
                self.p2 = p2

        model = Model(1, "1")

        df = self.__converter.convert(model)

        expected_df = DataFrame({"p1": [1], "p2": ["1"]})

        self.assertTrue(expected_df.equals(df))
Example #33
0
def test_duplicate_column_name(setup_path):
    df = DataFrame(columns=["a", "a"], data=[[0, 0]])

    with ensure_clean_path(setup_path) as path:
        msg = "Columns index has to be unique for fixed format"
        with pytest.raises(ValueError, match=msg):
            df.to_hdf(path, "df", format="fixed")

        df.to_hdf(path, "df", format="table")
        other = read_hdf(path, "df")

        tm.assert_frame_equal(df, other)
        assert df.equals(other)
        assert other.equals(df)
def test_richness_in_group_single_spid_single_group():
    """Test richness_in_group with a single species identifier column, one group"""
    richness = DataFrame({'site': [1, 2, 3, 4], 'richness': [1, 1, 3, 1]},
                         columns=['site', 'richness'])
    assert richness.equals(richness_in_group(comp_data, ['site'], ['spid']))
Example #35
0
    def test_equals(self):
        s1 = pd.Series([1, 2, 3], index=[0, 2, 1])
        s2 = s1.copy()
        self.assertTrue(s1.equals(s2))

        s1[1] = 99
        self.assertFalse(s1.equals(s2))

        # NaNs compare as equal
        s1 = pd.Series([1, np.nan, 3, np.nan], index=[0, 2, 1, 3])
        s2 = s1.copy()
        self.assertTrue(s1.equals(s2))

        s2[0] = 9.9
        self.assertFalse(s1.equals(s2))

        idx = MultiIndex.from_tuples([(0, 'a'), (1, 'b'), (2, 'c')])
        s1 = Series([1, 2, np.nan], index=idx)
        s2 = s1.copy()
        self.assertTrue(s1.equals(s2))

        # Add object dtype column with nans
        index = np.random.random(10)
        df1 = DataFrame(np.random.random(10,), index=index, columns=['floats'])
        df1['text'] = 'the sky is so blue. we could use more chocolate.'.split()
        df1['start'] = date_range('2000-1-1', periods=10, freq='T')
        df1['end'] = date_range('2000-1-1', periods=10, freq='D')
        df1['diff'] = df1['end'] - df1['start']
        df1['bool'] = (np.arange(10) % 3 == 0)
        df1.ix[::2] = nan
        df2 = df1.copy()
        self.assertTrue(df1['text'].equals(df2['text']))
        self.assertTrue(df1['start'].equals(df2['start']))
        self.assertTrue(df1['end'].equals(df2['end']))
        self.assertTrue(df1['diff'].equals(df2['diff']))
        self.assertTrue(df1['bool'].equals(df2['bool']))
        self.assertTrue(df1.equals(df2))
        self.assertFalse(df1.equals(object))

        # different dtype
        different = df1.copy()
        different['floats'] = different['floats'].astype('float32')
        self.assertFalse(df1.equals(different))

        # different index
        different_index = -index
        different = df2.set_index(different_index)
        self.assertFalse(df1.equals(different))

        # different columns
        different = df2.copy()
        different.columns = df2.columns[::-1]
        self.assertFalse(df1.equals(different))

        # DatetimeIndex
        index = pd.date_range('2000-1-1', periods=10, freq='T')
        df1 = df1.set_index(index)
        df2 = df1.copy()
        self.assertTrue(df1.equals(df2))

        # MultiIndex
        df3 = df1.set_index(['text'], append=True)
        df2 = df1.set_index(['text'], append=True)
        self.assertTrue(df3.equals(df2))

        df2 = df1.set_index(['floats'], append=True)
        self.assertFalse(df3.equals(df2))

        # NaN in index
        df3 = df1.set_index(['floats'], append=True)
        df2 = df1.set_index(['floats'], append=True)
        self.assertTrue(df3.equals(df2))
def test_abundance_in_group_no_abund_col():
    """Test abundance_in_group with no abundance column provided"""
    abundance = DataFrame({'site': [1, 2, 3, 4],
                           'abundance': [2, 1, 3, 1]},
                           columns=['site', 'abundance'])
    assert abundance.equals(abundance_in_group(comp_data, ['site']))
def test_richness_in_group_multiple_spid_single_group():
    """Test richness_in_group with a multiple species id columns, one group"""
    richness = DataFrame({'site': [1, 2, 3, 4], 'richness': [1, 1, 3, 1]},
                         columns=['site', 'richness'])
    assert richness.equals(richness_in_group(comp_data, ['site'], ['genus', 'species']))
def test_abundance_in_group_abund_col():
    """Test abundance_in_group with a single group and an abundance column"""
    abundance = DataFrame({'site': [1, 2, 3, 4],
                           'abundance': [3, 5, 12, 10]},
                           columns=['site', 'abundance'])
    assert abundance.equals(abundance_in_group(comp_data, ['site'], ['counts']))